From 4b357ffdfac52ba1b8e38134e87e4e229edd7e83 Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Fri, 19 Sep 2025 11:19:30 +0200
Subject: [PATCH 01/43] tests are green

---
 .../text_generation/parsed_output_sample.cpp  |  67 +++++++
 src/cpp/include/openvino/genai/parsers.hpp    | 102 +++++++++++
 src/cpp/src/parsers.cpp                       | 167 ++++++++++++++++++
 src/cpp/src/parsers.hpp                       |  48 +++++
 tests/cpp/CMakeLists.txt                      |   2 +-
 tests/cpp/parser.cpp                          | 111 ++++++++++++
 6 files changed, 496 insertions(+), 1 deletion(-)
 create mode 100644 samples/cpp/text_generation/parsed_output_sample.cpp
 create mode 100644 src/cpp/include/openvino/genai/parsers.hpp
 create mode 100644 src/cpp/src/parsers.cpp
 create mode 100644 src/cpp/src/parsers.hpp
 create mode 100644 tests/cpp/parser.cpp
diff --git a/samples/cpp/text_generation/parsed_output_sample.cpp b/samples/cpp/text_generation/parsed_output_sample.cpp
new file mode 100644
index 0000000000..ada4af6751
--- /dev/null
+++ b/samples/cpp/text_generation/parsed_output_sample.cpp
@@ -0,0 +1,67 @@
+// Copyright (C) 2023-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include "openvino/genai/llm_pipeline.hpp"
+#include "openvino/genai/parsers.hpp"
+#include "openvino/genai/text_streamer.hpp"
+
+using ov::genai::ParsingState;
+
+class CurrentStreamer : public ov::genai::TextParserStreamer {
+private:
+    ParsingState m_previous_state = ParsingState::UNDEFINED;
+public:
+    CurrentStreamer(const ov::genai::Tokenizer& tokenizer)
+        : ov::genai::TextParserStreamer(tokenizer) {}
+    ov::genai::StreamingStatus write(const ov::genai::ParsedMessage& message) {
+
+        // if (m_previous_state == ParsingState::UNDEFINED && message["state"] == ParsingState::REASONING) {
+        //     std::cout << "Reasoning: " << std::endl;
+        //     std::cout << message["reasoning_content"].value();
+        // } else if (m_previous_state == ParsingState::REASONING && message["state"] == ParsingState::CONTENT) {
+        //     std::cout << std::endl << "Content: " << std::endl;
+        //     std::cout << message["content"].value();
+        // } else if (m_previous_state == ParsingState::REASONING && message["state"] == ParsingState::REASONING) {
+        //     std::cout << message["reasoning_content"].value();
+        // } else if (m_previous_state == ParsingState::CONTENT && message["state"] == ParsingState::CONTENT) {
+        //     std::cout << message["content"].value();
+        // }
+       std::cout << message.at("content");
+
+        return ov::genai::StreamingStatus::RUNNING;
+    }
+};
+
+
+int main(int argc, char* argv[]) try {
+    if (argc < 2 || argc > 3) {
+        throw std::runtime_error(std::string{"Usage: "} + argv[0] + " <MODEL_DIR> <DEVICE>");
+    }
+    // std::string prompt = "<｜begin▁of▁sentence｜><｜User｜>Please think of a dificcult task to solve x**2 + y**2 = 1<｜Assistant｜><think>";
+    std::string prompt = "<｜begin▁of▁sentence｜><｜User｜>Why is the Sky blue?<｜Assistant｜><think>";
+    std::string models_path = argv[1];
+
+    // Default device is CPU; can be overridden by the second argument
+    std::string device = (argc == 3) ? argv[2] : "CPU";  // GPU, NPU can be used as well
+    ov::genai::LLMPipeline pipe(models_path, device);
+    
+    ov::genai::GenerationConfig config;
+    config.max_new_tokens = 1000;
+
+    auto tok = pipe.get_tokenizer();
+    std::shared_ptr<CurrentStreamer> streamer = std::make_shared<CurrentStreamer>(tok);
+
+    pipe.generate(prompt, config, streamer);
+
+
+} catch (const std::exception& error) {
+    try {
+        std::cerr << error.what() << '\n';
+    } catch (const std::ios_base::failure&) {}
+    return EXIT_FAILURE;
+} catch (...) {
+    try {
+        std::cerr << "Non-exception object thrown\n";
+    } catch (const std::ios_base::failure&) {}
+    return EXIT_FAILURE;
+}
diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp
new file mode 100644
index 0000000000..043141a73f
--- /dev/null
+++ b/src/cpp/include/openvino/genai/parsers.hpp
@@ -0,0 +1,102 @@
+// Copyright (C) 2023-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+#include "openvino/genai/text_streamer.hpp"
+
+namespace ov {
+namespace genai {
+
+enum class ParsingState {
+    CONTENT,
+    REASONING,
+    TOOL_CALLING,
+    UNDEFINED
+};
+
+
+using ParsedMessage = std::map<std::string, std::string>;
+
+class ParsedJSONMessage {
+public:
+    std::map<std::string, std::string> content;
+};
+
+
+// struct DeltaMessage {
+//     std::map<std::string, std::string> content;
+//     std::optional<std::string> content;
+//     std::optional<std::string> reasoning_content;
+//     ParsingState state = ParsingState::UNDEFINED;
+    
+//     // std::vector<DeltaToolCall> tool_calls;
+
+//     DeltaMessage() = default;
+// };
+
+
+class IncrementalParserBase {
+public:
+    IncrementalParserBase() = default;
+
+    virtual ParsedMessage parse(
+        const std::string& previous_text, 
+        const std::string& delta_text, 
+        const std::optional<std::vector<int64_t>>& previous_tokens = std::nullopt, 
+        const std::optional<std::vector<int64_t>>& delta_tokens = std::nullopt
+    ) = 0;
+};
+
+class ParserBase {
+public:
+    ParserBase() = default;
+
+    virtual ParsedMessage parse(ParsedMessage& text) = 0;
+};
+
+
+
+class TextParserStreamer : public ov::genai::TextStreamer {
+public:
+    TextParserStreamer(const Tokenizer& tokenizer);
+
+    virtual StreamingStatus write(ParsedMessage& message) = 0;
+
+    ov::genai::CallbackTypeVariant write(std::string message);
+private:
+    std::string m_text_buffer;
+    std::shared_ptr<IncrementalParserBase> m_reasoning_parser;
+    std::shared_ptr<ParserBase> m_tool_calling_parser;
+};
+
+class Llama32PythonicParser : public ParserBase {
+// Does not modify original content, only extracts and adds tool calls
+public:
+    Llama32PythonicParser(bool keep_original_content = true) : m_keep_original_content(keep_original_content) {}
+
+    ParsedMessage parse(ParsedMessage& input) override;
+
+private:
+    bool m_keep_original_content = true;
+};
+
+class BaseReasoningParser : public ParserBase{
+public:
+    BaseReasoningParser(bool expect_open_tag = true, bool keep_original_content = true, std::string open_tag = "<think>", std::string close_tag = "</think>") :
+    m_expect_open_tag(expect_open_tag), 
+    m_keep_original_content(keep_original_content),
+    m_open_tag(open_tag), 
+    m_close_tag(close_tag) {}
+
+    ParsedMessage parse(ParsedMessage& input) override;
+
+private:
+    bool m_expect_open_tag = true;
+    bool m_keep_original_content = true;
+    std::string m_open_tag = "<think>";
+    std::string m_close_tag = "<think/>";
+};
+
+
+}  // namespace genai
+}  // namespace ov
diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp
new file mode 100644
index 0000000000..8a932bbba7
--- /dev/null
+++ b/src/cpp/src/parsers.cpp
@@ -0,0 +1,167 @@
+#include "openvino/genai/parsers.hpp"
+#include <regex>
+#include <string>
+#include <unordered_map>
+#include <vector>
+#include <cctype>
+#include <stdexcept>
+#include <bits/stdc++.h>
+#include <nlohmann/json.hpp>
+
+using namespace std;
+using json = nlohmann::json;
+
+namespace ov::genai {
+
+std::string state_to_string(const ParsingState state) {
+    switch (state) {
+        case ParsingState::CONTENT:
+            return "CONTENT";
+        case ParsingState::REASONING:
+            return "REASONING";
+        case ParsingState::TOOL_CALLING:
+            return "TOOL_CALLING";
+        case ParsingState::UNDEFINED:
+            return "UNDEFINED";
+        default:
+            return "UNKNOWN";
+    }
+}
+
+class DeepSeekR1Parser : public IncrementalParserBase {
+private:
+    bool m_starts_with_thinking = true;
+    ParsingState m_parsing_state = ParsingState::REASONING;
+public:
+    DeepSeekR1Parser() = default;
+    std::map<std::string, std::string> accumulated_parsed;
+
+    ParsedMessage parse(
+        const std::string& previous_text, 
+        const std::string& delta_text,
+        const std::optional<std::vector<int64_t>>& previous_tokens = std::nullopt, 
+        const std::optional<std::vector<int64_t>>& delta_tokens = std::nullopt) {
+        ParsedMessage msg;
+
+        if (!m_starts_with_thinking) {
+            m_parsing_state = ParsingState::UNDEFINED;
+        } else {
+            m_parsing_state = ParsingState::REASONING;
+        }
+
+        if (m_parsing_state == ParsingState::UNDEFINED && delta_text.find("<think>") != std::string::npos) {
+            m_parsing_state = ParsingState::REASONING;
+            auto think_idx = delta_text.find("<think>");
+            msg["reasoning_content"] = delta_text.substr(think_idx + std::string("<think>").size(), delta_text.size() - (think_idx + std::string("<think>").size()));
+        } else if (delta_text.find("</think>") != std::string::npos && m_parsing_state == ParsingState::REASONING) {
+            auto think_idx = delta_text.find("</think>");
+
+            msg["reasoning_content"] = delta_text.substr(0, think_idx);
+            msg["content"] = delta_text.substr(think_idx + std::string("</think>").size(), delta_text.size() - (think_idx + std::string("</think>").size()));
+
+            m_parsing_state = ParsingState::CONTENT;
+        } else if (m_parsing_state == ParsingState::REASONING) {
+            msg["reasoning_content"] = delta_text;
+        } else if (m_parsing_state == ParsingState::CONTENT) {
+            msg["content"] = delta_text;
+        } else {
+            throw std::runtime_error("Unexpected state in DeepSeekR1Parser");
+        }
+        msg["state"] = state_to_string(m_parsing_state);
+        
+        // TODO: consider accumulating all fiels and returning accumulated fields instead of parsing once more at the end.
+        
+        // std::string accumulated_reasoning += msg["reasoning_content"];
+        accumulated_parsed["content"] += msg["content"];
+        
+        // accumulated_parsed["reasoning_content"] = accumulated_reasoning;
+        // TODO: if thinking is closed, disable parsing and give content without cutting thinking.
+        return msg;
+    }
+};
+
+
+ParsedMessage Llama32PythonicParser::parse(ParsedMessage& input) {
+    // Input example
+    // string input = "[get_weather(location='New York, NY', unit='celsius')]<|eom_id|>";
+
+    // Regex to capture the [...] part
+    smatch m;
+    const std::string& text = input.at("content");
+    regex r(R"(\[.*?\])");
+    if (regex_search(text, m, r)) {
+        // Strip outer [ ]
+        string call = m.str().substr(1, m.str().size() - 2);
+
+        // Split function name and arguments
+        size_t pos = call.find('(');
+        string name = call.substr(0, pos);
+        string args = call.substr(pos + 1, call.size() - pos - 2); // inside (...)
+
+        // Parse arguments of the form key='value'
+        map<string, string> kv;
+        regex arg_re(R"((\w+)\s*=\s*'([^']*)')");
+        auto it = sregex_iterator(args.begin(), args.end(), arg_re);
+        for (; it != sregex_iterator(); ++it) {
+            kv[(*it)[1]] = (*it)[2];
+        }
+        json j = json::array({{
+            {"name", name},
+            {"arguments", kv}
+        }});
+        if (!m_keep_original_content) {
+            input["content"] = regex_replace(text, r, "");
+        }
+        input["tool_calls"] = j.dump();
+        return input;
+    }
+    return ParsedMessage{};
+}
+
+ParsedMessage BaseReasoningParser::parse(ParsedMessage& input) {
+    ParsedMessage res;
+    std::string reasoning_content;
+    const std::string& content = input.at("content");
+    res["content"] = content;
+
+    size_t start = content.find(m_open_tag);
+    size_t end = content.find(m_close_tag);
+
+    if (start != std::string::npos && end != std::string::npos && end > start) {
+        reasoning_content = content.substr(start + m_open_tag.size(), end - (start + m_open_tag.size()));
+        if (!m_keep_original_content) {
+            // Remove <think>...</think/> from content
+            res["content"] = content.substr(0, start) + content.substr(end + m_close_tag.size());
+        }
+    } else {
+        reasoning_content = "";
+    }
+
+    res["reasoning_content"] = reasoning_content;
+    return res;
+}
+
+
+TextParserStreamer::TextParserStreamer(const Tokenizer& tokenizer) 
+    : ov::genai::TextStreamer(tokenizer, [this](std::string s) -> ov::genai::CallbackTypeVariant {
+                return this->write(s);
+    }) {
+        m_reasoning_parser = std::make_shared<DeepSeekR1Parser>();
+    }
+
+StreamingStatus TextParserStreamer::write(ParsedMessage& message) {
+    return StreamingStatus::RUNNING;
+}
+
+ov::genai::CallbackTypeVariant TextParserStreamer::write(std::string message) {
+    // for (auto& parser: m_parsers) {
+    //     if (parser.is_active()) {
+    //         msg = parser.parse(m_text_buffer, message, msg);
+    //     }
+    // }
+
+    // m_text_buffer += message;
+    // return write(msg);
+}
+
+} // namespace ov::genai
diff --git a/src/cpp/src/parsers.hpp b/src/cpp/src/parsers.hpp
new file mode 100644
index 0000000000..21f474e089
--- /dev/null
+++ b/src/cpp/src/parsers.hpp
@@ -0,0 +1,48 @@
+// Copyright (C) 2023-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+#include "openvino/genai/text_streamer.hpp"
+
+namespace ov {
+namespace genai {
+
+struct DeltaToolCall; // Forward declaration, define as needed
+
+struct DeltaMessage {
+    std::optional<std::string> role;
+    std::optional<std::string> content;
+    std::optional<std::string> reasoning_content;
+    // std::vector<DeltaToolCall> tool_calls;
+
+    DeltaMessage()
+        : role(std::nullopt),
+          content(std::nullopt),
+          reasoning_content(std::nullopt) {}
+};
+
+class TextParserStreamer : public ov::genai::TextStreamer {
+public:
+    TextParserStreamer(const Tokenizer& tokenizer);
+
+    StreamingStatus write(const DeltaMessage& message);
+
+    ov::genai::CallbackTypeVariant write(std::string message);
+};
+
+class ReasoningParserBase {
+public:
+    ReasoningParserBase() = default;
+
+    void parse(const std::string& text);
+};
+
+class ToolCallingParserBase {
+public:
+    ToolCallingParserBase() = default;
+
+    void parse(const std::string& text);
+};
+
+}  // namespace genai
+}  // namespace ov
diff --git a/tests/cpp/CMakeLists.txt b/tests/cpp/CMakeLists.txt
index bdf959eb5d..f708e00e55 100644
--- a/tests/cpp/CMakeLists.txt
+++ b/tests/cpp/CMakeLists.txt
@@ -26,7 +26,7 @@ set(TEST_TARGET_NAME "tests_continuous_batching")
 
 add_executable(${TEST_TARGET_NAME} ${tests_src} $<TARGET_OBJECTS:openvino_genai_obj>)
 
-target_link_libraries(${TEST_TARGET_NAME} PRIVATE $<TARGET_PROPERTY:openvino::genai,LINK_LIBRARIES> gtest_main gmock_main)
+target_link_libraries(${TEST_TARGET_NAME} PRIVATE $<TARGET_PROPERTY:openvino::genai,LINK_LIBRARIES> gtest_main gmock_main nlohmann_json::nlohmann_json)
 target_include_directories(${TEST_TARGET_NAME} PRIVATE "${OpenVINOGenAI_SOURCE_DIR}/src/cpp/src"
                                                        $<TARGET_PROPERTY:openvino::genai,INTERFACE_INCLUDE_DIRECTORIES>)
 
diff --git a/tests/cpp/parser.cpp b/tests/cpp/parser.cpp
new file mode 100644
index 0000000000..35b49fde88
--- /dev/null
+++ b/tests/cpp/parser.cpp
@@ -0,0 +1,111 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include <gtest/gtest.h>
+#include "openvino/genai/generation_config.hpp"
+#include "openvino/genai/parsers.hpp"
+#include "nlohmann/json.hpp"
+#include "openvino/genai/llm_pipeline.hpp"
+
+using namespace ov::genai;
+
+nlohmann::json convert_to_json(const ParsedMessage& msg) {
+    nlohmann::json j;
+    for (const auto& [key, value] : msg) {
+        if (key == "tool_calls") {
+            j[key] = nlohmann::json::parse(value);
+            continue;
+        }
+        j[key] = value;
+    }
+    return j;
+}
+
+nlohmann::json run_parser_test(std::shared_ptr<ParserBase> parser, const std::string& prompt, const nlohmann::json& expected) {
+    ParsedMessage input;
+    input["content"] = prompt;
+    return convert_to_json(parser->parse(input));
+}
+
+
+TEST(ParserTest, test_llama32_parser_1) {
+    std::string prompt = R"(What's the weather in New York today?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n[get_weather(location='New York, NY', unit='celsius')]<|eom_id|>)";
+    nlohmann::json expected;
+    
+    // By default content should keep original values.
+    expected["content"] = prompt;
+    
+    expected["tool_calls"] = nlohmann::json::array({
+        {
+            {"name", "get_weather"},
+            {"arguments", {
+                {"location", "New York, NY"},
+                {"unit", "celsius"}
+            }}
+        }
+    });
+    std::shared_ptr<Llama32PythonicParser> parser = std::make_shared<Llama32PythonicParser>();
+    
+    nlohmann::json res = run_parser_test(parser, prompt, expected);
+    
+    ASSERT_EQ(res, expected);
+}
+
+TEST(ParserTest, test_llama32_parser_2) {
+    std::string prompt = R"(What's the weather in New York today?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n[get_weather(location='New York, NY', unit='celsius')]<|eom_id|>)";
+    nlohmann::json expected;
+    
+    // In this test tool calling part will be cut from the content after parsing.
+    expected["content"] = std::string(R"(What's the weather in New York today?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n<|eom_id|>)");
+    
+    expected["tool_calls"] = nlohmann::json::array({
+        {
+            {"name", "get_weather"},
+            {"arguments", {
+                {"location", "New York, NY"},
+                {"unit", "celsius"}
+            }}
+        }
+    });
+    auto parser = std::make_shared<Llama32PythonicParser>(/*keep_original_content*/ false);
+
+    nlohmann::json res = run_parser_test(parser, prompt, expected);
+
+    ASSERT_EQ(res, expected);
+}
+
+TEST(ParserTest, test_reasoning_parser_1) {
+    std::string prompt = R"("<｜begin▁of▁sentence｜><｜begin▁of▁sentence｜><｜User｜>What is 2 + 1?<｜Assistant｜><think>\nI need to determine the sum of 2 and 1.\n\nFirst, I'll identify the two numbers involved in the addition: 2 and 1.\n\nNext, I'll perform the addition by combining these two numbers.\n\nFinally, I'll state the result of the addition, which is 3.\n</think>\n\n**Solution:**\n\nTo find the sum of 2 and 1, )";
+    nlohmann::json expected;
+    
+    // In this test reasoning part will be cut from the content after parsing.
+    expected["content"] = std::string(R"("<｜begin▁of▁sentence｜><｜begin▁of▁sentence｜><｜User｜>What is 2 + 1?<｜Assistant｜>\n\n**Solution:**\n\nTo find the sum of 2 and 1, )");
+    
+    expected["reasoning_content"] = std::string(R"(\nI need to determine the sum of 2 and 1.\n\nFirst, I'll identify the two numbers involved in the addition: 2 and 1.\n\nNext, I'll perform the addition by combining these two numbers.\n\nFinally, I'll state the result of the addition, which is 3.\n)");
+    auto parser = std::make_shared<BaseReasoningParser>(
+        /*expect_open_tag*/ true,
+        /*keep_original_content*/ false
+    );
+
+    nlohmann::json res = run_parser_test(parser, prompt, expected);
+
+    ASSERT_EQ(res, expected);
+}
+
+TEST(ParserTest, test_reasoning_parser_2) {
+    std::string prompt = R"("<｜begin▁of▁sentence｜><｜begin▁of▁sentence｜><｜User｜>What is 2 + 1?<｜Assistant｜><think>\nI need to determine the sum of 2 and 1.\n\nFirst, I'll identify the two numbers involved in the addition: 2 and 1.\n\nNext, I'll perform the addition by combining these two numbers.\n\nFinally, I'll state the result of the addition, which is 3.\n</think>\n\n**Solution:**\n\nTo find the sum of 2 and 1, )";
+    nlohmann::json expected;
+    
+    // In this test content should keep original values.
+    expected["content"] = prompt;
+    
+    expected["reasoning_content"] = std::string(R"(\nI need to determine the sum of 2 and 1.\n\nFirst, I'll identify the two numbers involved in the addition: 2 and 1.\n\nNext, I'll perform the addition by combining these two numbers.\n\nFinally, I'll state the result of the addition, which is 3.\n)");
+    auto parser = std::make_shared<BaseReasoningParser>(
+        /*expect_open_tag*/ true,
+        /*keep_original_content*/ true
+    );
+
+    nlohmann::json res = run_parser_test(parser, prompt, expected);
+
+    ASSERT_EQ(res, expected);
+}

From 525a4d8167e56edf92f7a52f0c9e6f38348d3ce2 Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Tue, 23 Sep 2025 22:12:09 +0200
Subject: [PATCH 02/43] python api added

---
 src/cpp/include/openvino/genai/parsers.hpp    |  58 ++++---
 src/cpp/src/parsers.cpp                       | 150 +++++++++---------
 src/python/openvino_genai/__init__.py         |   8 +-
 .../openvino_genai/py_openvino_genai.pyi      |  31 +++-
 src/python/py_openvino_genai.cpp              |   2 +
 src/python/py_parsers.cpp                     |  91 +++++++++++
 src/python/py_streamers.cpp                   |  35 +++-
 tests/cpp/parser.cpp                          |  29 ++++
 tests/python_tests/test_parsers.py            |  49 ++++++
 9 files changed, 351 insertions(+), 102 deletions(-)
 create mode 100644 src/python/py_parsers.cpp
 create mode 100644 tests/python_tests/test_parsers.py

diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp
index 043141a73f..291df915a6 100644
--- a/src/cpp/include/openvino/genai/parsers.hpp
+++ b/src/cpp/include/openvino/genai/parsers.hpp
@@ -3,17 +3,14 @@
 
 #pragma once
 #include "openvino/genai/text_streamer.hpp"
+#include <string>
+#include <map>
+#include <optional>
+#include <vector>
 
 namespace ov {
 namespace genai {
 
-enum class ParsingState {
-    CONTENT,
-    REASONING,
-    TOOL_CALLING,
-    UNDEFINED
-};
-
 
 using ParsedMessage = std::map<std::string, std::string>;
 
@@ -22,29 +19,41 @@ class ParsedJSONMessage {
     std::map<std::string, std::string> content;
 };
 
-
-// struct DeltaMessage {
-//     std::map<std::string, std::string> content;
-//     std::optional<std::string> content;
-//     std::optional<std::string> reasoning_content;
-//     ParsingState state = ParsingState::UNDEFINED;
-    
-//     // std::vector<DeltaToolCall> tool_calls;
-
-//     DeltaMessage() = default;
-// };
-
-
 class IncrementalParserBase {
 public:
     IncrementalParserBase() = default;
 
     virtual ParsedMessage parse(
+        ParsedMessage& msg,
         const std::string& previous_text, 
         const std::string& delta_text, 
         const std::optional<std::vector<int64_t>>& previous_tokens = std::nullopt, 
         const std::optional<std::vector<int64_t>>& delta_tokens = std::nullopt
     ) = 0;
+
+    virtual bool is_active() const = 0;
+};
+
+class DeepSeekR1ReasoningParser : public IncrementalParserBase {
+private:
+    bool m_starts_with_thinking = true;
+    bool m_think_tag_opened = false;
+    bool m_deactivated = false;
+    std::string m_open_tag = "<think>";
+    std::string m_close_tag = "</think>";
+public:
+    DeepSeekR1ReasoningParser() = default;
+    std::map<std::string, std::string> accumulated_parsed;
+
+    ParsedMessage parse(
+        ParsedMessage& msg,
+        const std::string& previous_text, 
+        const std::string& delta_text,
+        const std::optional<std::vector<int64_t>>& previous_tokens = std::nullopt, 
+        const std::optional<std::vector<int64_t>>& delta_tokens = std::nullopt
+    ) override;
+    static std::string name() { return "DeepSeekR1ReasoningParser"; }
+    bool is_active() const override;
 };
 
 class ParserBase {
@@ -54,19 +63,20 @@ class ParserBase {
     virtual ParsedMessage parse(ParsedMessage& text) = 0;
 };
 
+using ParserVariant = std::variant<std::shared_ptr<IncrementalParserBase>, std::string>;
 
 
 class TextParserStreamer : public ov::genai::TextStreamer {
 public:
-    TextParserStreamer(const Tokenizer& tokenizer);
+    TextParserStreamer(const Tokenizer& tokenizer, std::vector<ParserVariant> parsers = {});
 
     virtual StreamingStatus write(ParsedMessage& message) = 0;
 
     ov::genai::CallbackTypeVariant write(std::string message);
+    ParsedMessage m_parsed_message;
 private:
     std::string m_text_buffer;
-    std::shared_ptr<IncrementalParserBase> m_reasoning_parser;
-    std::shared_ptr<ParserBase> m_tool_calling_parser;
+    std::vector<std::shared_ptr<IncrementalParserBase>> m_parsers;
 };
 
 class Llama32PythonicParser : public ParserBase {
@@ -75,7 +85,7 @@ class Llama32PythonicParser : public ParserBase {
     Llama32PythonicParser(bool keep_original_content = true) : m_keep_original_content(keep_original_content) {}
 
     ParsedMessage parse(ParsedMessage& input) override;
-
+    static std::string name() { return "Llama32PythonicParser"; }
 private:
     bool m_keep_original_content = true;
 };
diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp
index 8a932bbba7..76a6eb6c15 100644
--- a/src/cpp/src/parsers.cpp
+++ b/src/cpp/src/parsers.cpp
@@ -13,72 +13,54 @@ using json = nlohmann::json;
 
 namespace ov::genai {
 
-std::string state_to_string(const ParsingState state) {
-    switch (state) {
-        case ParsingState::CONTENT:
-            return "CONTENT";
-        case ParsingState::REASONING:
-            return "REASONING";
-        case ParsingState::TOOL_CALLING:
-            return "TOOL_CALLING";
-        case ParsingState::UNDEFINED:
-            return "UNDEFINED";
-        default:
-            return "UNKNOWN";
-    }
-}
+static std::map<std::string, std::shared_ptr<IncrementalParserBase>> registered_incremental_parsers;
+static std::map<std::string, std::shared_ptr<ParserBase>> registered_base_parsers;
 
-class DeepSeekR1Parser : public IncrementalParserBase {
-private:
-    bool m_starts_with_thinking = true;
-    ParsingState m_parsing_state = ParsingState::REASONING;
-public:
-    DeepSeekR1Parser() = default;
-    std::map<std::string, std::string> accumulated_parsed;
-
-    ParsedMessage parse(
-        const std::string& previous_text, 
-        const std::string& delta_text,
-        const std::optional<std::vector<int64_t>>& previous_tokens = std::nullopt, 
-        const std::optional<std::vector<int64_t>>& delta_tokens = std::nullopt) {
-        ParsedMessage msg;
-
-        if (!m_starts_with_thinking) {
-            m_parsing_state = ParsingState::UNDEFINED;
-        } else {
-            m_parsing_state = ParsingState::REASONING;
-        }
+bool DeepSeekR1ReasoningParser::is_active() const {
+    return !m_deactivated;
+}
 
-        if (m_parsing_state == ParsingState::UNDEFINED && delta_text.find("<think>") != std::string::npos) {
-            m_parsing_state = ParsingState::REASONING;
-            auto think_idx = delta_text.find("<think>");
-            msg["reasoning_content"] = delta_text.substr(think_idx + std::string("<think>").size(), delta_text.size() - (think_idx + std::string("<think>").size()));
-        } else if (delta_text.find("</think>") != std::string::npos && m_parsing_state == ParsingState::REASONING) {
-            auto think_idx = delta_text.find("</think>");
-
-            msg["reasoning_content"] = delta_text.substr(0, think_idx);
-            msg["content"] = delta_text.substr(think_idx + std::string("</think>").size(), delta_text.size() - (think_idx + std::string("</think>").size()));
-
-            m_parsing_state = ParsingState::CONTENT;
-        } else if (m_parsing_state == ParsingState::REASONING) {
-            msg["reasoning_content"] = delta_text;
-        } else if (m_parsing_state == ParsingState::CONTENT) {
-            msg["content"] = delta_text;
-        } else {
-            throw std::runtime_error("Unexpected state in DeepSeekR1Parser");
-        }
-        msg["state"] = state_to_string(m_parsing_state);
-        
-        // TODO: consider accumulating all fiels and returning accumulated fields instead of parsing once more at the end.
-        
-        // std::string accumulated_reasoning += msg["reasoning_content"];
-        accumulated_parsed["content"] += msg["content"];
-        
-        // accumulated_parsed["reasoning_content"] = accumulated_reasoning;
-        // TODO: if thinking is closed, disable parsing and give content without cutting thinking.
+ParsedMessage DeepSeekR1ReasoningParser::parse(
+    ParsedMessage& msg,
+    const std::string& previous_text, 
+    const std::string& delta_text,
+    const std::optional<std::vector<int64_t>>& previous_tokens, 
+    const std::optional<std::vector<int64_t>>& delta_tokens
+) {
+    if (msg.find("reasoning_content") == msg.end()) {
+        msg["reasoning_content"] = "";
+    }
+    if (msg.find("content") == msg.end()) {
+        msg["content"] = "";
+    }
+    
+    if (m_deactivated) {
+        msg["content"] += delta_text;
         return msg;
     }
-};
+    if (m_starts_with_thinking) {
+        m_think_tag_opened = true;
+    }
+    
+    bool think_tag_closed = delta_text.find(m_close_tag) != std::string::npos;
+
+    if (!m_think_tag_opened && delta_text.find(m_open_tag) != std::string::npos) {
+        // Thinking has started
+        auto think_idx = delta_text.find(m_open_tag);
+        msg["reasoning_content"] += delta_text.substr(think_idx + std::string(m_open_tag).size(), delta_text.size() - (think_idx + std::string(m_open_tag).size()));
+        m_think_tag_opened = true;
+    } else if (m_think_tag_opened && delta_text.find(m_close_tag) != std::string::npos) {
+        auto think_idx = delta_text.find(m_close_tag);
+        msg["reasoning_content"] += delta_text.substr(0, think_idx);
+        msg["content"] += delta_text.substr(think_idx + std::string(m_close_tag).size(), delta_text.size() - (think_idx + std::string(m_close_tag).size()));
+        m_think_tag_opened = false;
+        m_deactivated = true;
+    } else if (m_think_tag_opened) {
+        msg["reasoning_content"] += delta_text;
+    } 
+    
+    return msg;
+}
 
 
 ParsedMessage Llama32PythonicParser::parse(ParsedMessage& input) {
@@ -142,26 +124,50 @@ ParsedMessage BaseReasoningParser::parse(ParsedMessage& input) {
 }
 
 
-TextParserStreamer::TextParserStreamer(const Tokenizer& tokenizer) 
+TextParserStreamer::TextParserStreamer(const Tokenizer& tokenizer, std::vector<ParserVariant> parsers) 
     : ov::genai::TextStreamer(tokenizer, [this](std::string s) -> ov::genai::CallbackTypeVariant {
                 return this->write(s);
     }) {
-        m_reasoning_parser = std::make_shared<DeepSeekR1Parser>();
+        for (auto& parser : parsers) {
+            if (std::holds_alternative<std::shared_ptr<IncrementalParserBase>>(parser)) {
+                m_parsers.push_back(std::get<std::shared_ptr<IncrementalParserBase>>(parser));
+            } else {
+                auto parser_name = std::get<std::string>(parser);
+                if (registered_incremental_parsers.find(parser_name) != registered_incremental_parsers.end()) {
+                    m_parsers.push_back(registered_incremental_parsers[parser_name]);
+                }
+            }
+        }
     }
 
 StreamingStatus TextParserStreamer::write(ParsedMessage& message) {
+    if (message.find("content") != message.end()) {
+        std::cout << message.at("content") << std::endl;
+    }
     return StreamingStatus::RUNNING;
 }
 
 ov::genai::CallbackTypeVariant TextParserStreamer::write(std::string message) {
-    // for (auto& parser: m_parsers) {
-    //     if (parser.is_active()) {
-    //         msg = parser.parse(m_text_buffer, message, msg);
-    //     }
-    // }
-
-    // m_text_buffer += message;
-    // return write(msg);
+    for (auto& parser: m_parsers) {
+        if (parser->is_active()) {
+            m_parsed_message = parser->parse(m_parsed_message, m_text_buffer, message);
+        }
+    }
+
+    m_text_buffer = message;
+    return write(m_parsed_message);
 }
 
+
+// static initializer to register available buildin parsers
+static bool register_backends() {
+    registered_incremental_parsers[DeepSeekR1ReasoningParser::name()] = std::make_shared<DeepSeekR1ReasoningParser>();
+    
+    registered_base_parsers[Llama32PythonicParser::name()] = std::make_shared<Llama32PythonicParser>();
+    return true;
+}
+
+// Ensure the backends are registered before main
+static bool are_backends_registered = register_backends();
+
 } // namespace ov::genai
diff --git a/src/python/openvino_genai/__init__.py b/src/python/openvino_genai/__init__.py
index 218f782e29..19e4ebe97a 100644
--- a/src/python/openvino_genai/__init__.py
+++ b/src/python/openvino_genai/__init__.py
@@ -18,7 +18,13 @@
     StreamerBase,
     get_version,
     StreamingStatus,
-    TextStreamer
+    TextStreamer,
+    TextParserStreamer
+)
+
+from .py_openvino_genai import (
+    ParserBase,
+    IncrementalParserBase
 )
 
 __version__ = get_version()
diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi
index b300311721..ada0fa5ca6 100644
--- a/src/python/openvino_genai/py_openvino_genai.pyi
+++ b/src/python/openvino_genai/py_openvino_genai.pyi
@@ -5,7 +5,7 @@ from __future__ import annotations
 import collections.abc
 import openvino._pyopenvino
 import typing
-__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedGenerationResult', 'EncodedResults', 'ExtendedPerfMetrics', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'MeanStdPair', 'PerfMetrics', 'PipelineMetrics', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'SDPerModelsPerfMetrics', 'SDPerfMetrics', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'SummaryStats', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version']
+__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedGenerationResult', 'EncodedResults', 'ExtendedPerfMetrics', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParserBase', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'MeanStdPair', 'ParserBase', 'PerfMetrics', 'PipelineMetrics', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'SDPerModelsPerfMetrics', 'SDPerfMetrics', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'SummaryStats', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version']
 class Adapter:
     """
     Immutable LoRA Adapter that carries the adaptation matrices and serves as unique adapter identifier.
@@ -1374,6 +1374,17 @@ class ImageGenerationPerfMetrics:
     @property
     def raw_metrics(self) -> RawImageGenerationPerfMetrics:
         ...
+class IncrementalParserBase:
+    def __init__(self) -> None:
+        ...
+    def is_active(self) -> bool:
+        """
+        Indicates whether the parser is active and should be used during parsing.
+        """
+    def parse(self, msg: collections.abc.Mapping[str, str], previous_text: str, delta_text: str, previous_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None, delta_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None) -> dict[str, str]:
+        """
+        Parse is called every time new text delta is decoded. Returns a ParsedMessage with parsed content.
+        """
 class InpaintingPipeline:
     """
     This class is used for generation with inpainting models.
@@ -1741,6 +1752,13 @@ class MeanStdPair:
     @property
     def std(self) -> float:
         ...
+class ParserBase:
+    def __init__(self) -> None:
+        ...
+    def parse(self, text: collections.abc.Mapping[str, str]) -> dict[str, str]:
+        """
+        Parse is called with the full text. Returns a ParsedMessage with parsed content.
+        """
 class PerfMetrics:
     """
     
@@ -3133,6 +3151,15 @@ class TextEmbeddingPipeline:
         """
         Waits computed embeddings for a query
         """
+class TextParserStreamer:
+    def __init__(self, tokenizer: Tokenizer, parsers: collections.abc.Sequence[...] = []) -> None:
+        """
+        TextParserStreamer is used to decode tokens into text, parse the text and call user-defined incremental parsers.
+        """
+    def write(self, message: collections.abc.Mapping[str, str]) -> StreamingStatus:
+        """
+        Write is called with a ParsedMessage. Returns StreamingStatus.
+        """
 class TextRerankPipeline:
     """
     Text rerank pipeline
@@ -3196,8 +3223,6 @@ class TextStreamer(StreamerBase):
     """
     def __init__(self, tokenizer: Tokenizer, callback: collections.abc.Callable[[str], bool | openvino_genai.py_openvino_genai.StreamingStatus], detokenization_params: collections.abc.Mapping[str, typing.Any] = {}) -> None:
         ...
-    def end(self) -> None:
-        ...
     def write(self, token: typing.SupportsInt | collections.abc.Sequence[typing.SupportsInt]) -> StreamingStatus:
         ...
 class TokenizedInputs:
diff --git a/src/python/py_openvino_genai.cpp b/src/python/py_openvino_genai.cpp
index 8cec4de360..ed4aa8d3af 100644
--- a/src/python/py_openvino_genai.cpp
+++ b/src/python/py_openvino_genai.cpp
@@ -33,6 +33,7 @@ void init_lora_adapter(py::module_& m);
 void init_perf_metrics(py::module_& m);
 void init_tokenizer(py::module_& m);
 void init_streamers(py::module_& m);
+void init_parsers(py::module_& m);
 void init_generation_config(py::module_& m);
 
 void init_continuous_batching_pipeline(py::module_& m);
@@ -117,6 +118,7 @@ PYBIND11_MODULE(py_openvino_genai, m) {
     init_generation_config(m);
     init_tokenizer(m);
     init_streamers(m);
+    init_parsers(m);
 
     init_llm_pipeline(m);
     init_continuous_batching_pipeline(m);
diff --git a/src/python/py_parsers.cpp b/src/python/py_parsers.cpp
new file mode 100644
index 0000000000..1d3f066334
--- /dev/null
+++ b/src/python/py_parsers.cpp
@@ -0,0 +1,91 @@
+// Copyright (C) 2023-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include <filesystem>
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+#include <pybind11/stl_bind.h>
+#include <pybind11/functional.h>
+
+#include "openvino/genai/parsers.hpp"
+#include "py_utils.hpp"
+
+namespace py = pybind11;
+
+using ov::genai::ParsedMessage;
+using ov::genai::IncrementalParserBase;
+using ov::genai::ParserVariant;
+using ov::genai::ParserBase;
+using ov::genai::Tokenizer;
+using ov::genai::StreamingStatus;
+
+namespace pyutils = ov::genai::pybind::utils;
+
+namespace {
+
+
+class ConstructableIncrementalParserBase: public IncrementalParserBase {
+public:
+    ParsedMessage parse(
+        ParsedMessage& msg,
+        const std::string& previous_text, 
+        const std::string& delta_text, 
+        const std::optional<std::vector<int64_t>>& previous_tokens = std::nullopt, 
+        const std::optional<std::vector<int64_t>>& delta_tokens = std::nullopt
+    ) override {
+        PYBIND11_OVERRIDE_PURE(
+            ParsedMessage,  // Return type
+            IncrementalParserBase,  // Parent class
+            parse,  // Name of function in C++ (must match Python name)
+            msg,
+            previous_text,
+            delta_text,
+            previous_tokens,
+            delta_tokens
+        );
+    }
+    
+    bool is_active() const override {
+        PYBIND11_OVERRIDE_PURE(
+            bool,  // Return type
+            IncrementalParserBase,  // Parent class
+            is_active,  // Name of function in C++ (must match Python name)
+        );
+    }
+};
+
+class ConstructableParserBase: public ParserBase {
+public:
+    ParsedMessage parse(ParsedMessage& text) override {
+        PYBIND11_OVERRIDE_PURE(
+            ParsedMessage,  // Return type
+            ParserBase,  // Parent class
+            parse,  // Name of function in C++ (must match Python name)
+            text  // Argument(s)
+        );
+    }
+};
+
+} // namespace
+
+void init_parsers(py::module_& m) {
+    py::class_<IncrementalParserBase, ConstructableIncrementalParserBase, std::shared_ptr<IncrementalParserBase>>(m, "IncrementalParserBase")
+        .def(py::init<>())
+        .def("parse",
+            &IncrementalParserBase::parse,
+            "Parse is called every time new text delta is decoded. Returns a ParsedMessage with parsed content.",
+            py::arg("msg"),
+            py::arg("previous_text"),
+            py::arg("delta_text"),
+            py::arg("previous_tokens") = std::nullopt,
+            py::arg("delta_tokens") = std::nullopt)
+        .def("is_active", &IncrementalParserBase::is_active, "Indicates whether the parser is active and should be used during parsing.");
+        
+        py::class_<ParserBase, ConstructableParserBase, std::shared_ptr<ParserBase>>(m, "ParserBase")
+        .def(py::init<>())
+        .def("parse",
+            &ParserBase::parse,
+            "Parse is called with the full text. Returns a ParsedMessage with parsed content.",
+            py::arg("text"));
+            
+}
\ No newline at end of file
diff --git a/src/python/py_streamers.cpp b/src/python/py_streamers.cpp
index 704be71959..dbeeb1a20e 100644
--- a/src/python/py_streamers.cpp
+++ b/src/python/py_streamers.cpp
@@ -9,6 +9,7 @@
 
 #include "openvino/genai/streamer_base.hpp"
 #include "openvino/genai/text_streamer.hpp"
+#include "openvino/genai/parsers.hpp"
 #include "py_utils.hpp"
 
 namespace py = pybind11;
@@ -16,6 +17,9 @@ namespace py = pybind11;
 using ov::genai::CallbackTypeVariant;
 using ov::genai::StreamingStatus;
 using ov::genai::TextStreamer;
+using ov::genai::TextParserStreamer;
+using ov::genai::IncrementalParserBase;
+using ov::genai::ParsedMessage;
 using ov::genai::Tokenizer;
 
 namespace pyutils = ov::genai::pybind::utils;
@@ -66,6 +70,20 @@ class ConstructableStreamer: public StreamerBase {
     }
 };
 
+class ConstructableTextParserStreamer: public TextParserStreamer {
+public:
+    using TextParserStreamer::TextParserStreamer;  // inherit base constructors
+
+    StreamingStatus write(ParsedMessage& message) override {
+        PYBIND11_OVERRIDE_PURE(
+            StreamingStatus,  // Return type
+            TextParserStreamer,  // Parent class
+            write,  // Name of function in C++ (must match Python name)
+            message  // Argument(s)
+        );
+    }
+};
+
 } // namespace
 
 void init_streamers(py::module_& m) {
@@ -109,6 +127,19 @@ void init_streamers(py::module_& m) {
                     return self.write(tokens);
                 }
             },
-            py::arg("token"))
-        .def("end", &TextStreamer::end);
+            py::arg("token"));
+
+    py::class_<TextParserStreamer, ConstructableTextParserStreamer, std::shared_ptr<TextParserStreamer>>(m, "TextParserStreamer")
+        .def(py::init([](const Tokenizer& tokenizer,
+                         std::vector<std::shared_ptr<IncrementalParserBase>> parsers) {
+                std::vector<ov::genai::ParserVariant> variants(parsers.begin(), parsers.end());
+                return std::make_shared<ConstructableTextParserStreamer>(tokenizer, variants);
+            }),
+            py::arg("tokenizer"),
+            py::arg("parsers") = std::vector<std::shared_ptr<IncrementalParserBase>>({}),
+            "TextParserStreamer is used to decode tokens into text, parse the text and call user-defined incremental parsers.")
+        .def("write",
+             py::overload_cast<ParsedMessage&>(&TextParserStreamer::write),
+             py::arg("message"),
+             "Write is called with a ParsedMessage. Returns StreamingStatus.");
 }
diff --git a/tests/cpp/parser.cpp b/tests/cpp/parser.cpp
index 35b49fde88..0018dbd461 100644
--- a/tests/cpp/parser.cpp
+++ b/tests/cpp/parser.cpp
@@ -109,3 +109,32 @@ TEST(ParserTest, test_reasoning_parser_2) {
 
     ASSERT_EQ(res, expected);
 }
+
+class DeepSeekR1ReasoningParserTest : public ::testing::Test {
+protected:
+    ov::genai::DeepSeekR1ReasoningParser parser;
+    ParsedMessage msg;
+};
+
+TEST_F(DeepSeekR1ReasoningParserTest, ReasoningContentAccumulatesAcrossCalls) {
+    std::vector<std::string> input_stream = {
+        "<｜begin▁of▁sentence｜>", "First", ",", " I", " recognize", " that", " the", " question", " is", " asking", 
+        " for", " the", " sum", " of", " ", "2", " and", " ", "1", ".\n\n", "I", " know", " that", " addition", 
+        " involves", " combining", " two", " numbers", " to", " find", " their", " total", ".\n\n", "Starting", 
+        " with", " ", "2", ",", " I", " add", " ", "1", " to", " it", ".\n\n", "2", " plus", " ", "1", " equals", 
+        " ", "3", ".\n", "</think>", "\n\n", "**", "Solution", ":", "**\n\n", "To", " find", " the", " sum", 
+        " of", " ", "2", " and", " ", "1", " follow", " these", " simple", " steps", ":\n\n", "1", ".", " **", 
+        "Start", " with", " the", " number", " ", "2", ".", "**\n", "2", ".", " **", "Add", " ", "1", " to", 
+        " it", ".", "**\n", "   \n", "  ", " \\", "[\n", "  "
+    };
+    
+    std::string ref_res = "First, I recognize that the question is asking for the sum of 2 and 1.\n\nI know that addition involves combining two numbers to find their total.\n\nStarting with 2, I add 1 to it.\n\n2 plus 1 equals 3.\n";
+    
+    ParsedMessage msg;
+    for (int i = 1; i < input_stream.size(); i++) {
+        std::string previous_text = input_stream[i - 1];
+        std::string delta_text = input_stream[i];
+        msg = parser.parse(msg, previous_text, delta_text);
+    }
+    ASSERT_EQ(msg["reasoning_content"], ref_res);
+}
diff --git a/tests/python_tests/test_parsers.py b/tests/python_tests/test_parsers.py
new file mode 100644
index 0000000000..724384bcad
--- /dev/null
+++ b/tests/python_tests/test_parsers.py
@@ -0,0 +1,49 @@
+# Copyright (C) 2023-2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+import dataclasses
+import json
+from typing import Optional
+
+import numpy as np
+import openvino
+import pytest
+from openvino_genai import Tokenizer
+from transformers import AutoTokenizer
+from 
+from utils.hugging_face import convert_and_save_tokenizer, download_and_convert_model
+
+
+@pytest.fixture(scope="module")
+def hf_ov_genai_models(request, tmp_path_factory):
+    model_id, args = request.param
+    tok_load_properties = {"add_second_input": args.pop("add_second_input")} if "add_second_input" in args else {}
+    
+    hf_args = args.copy()  # to overcome mutable default argument side effects
+    if "padding_side" in hf_args and hf_args["padding_side"] is None:
+        # HF does not accept None.
+        # Need to remove padding_side and let HF to choose default value,
+        hf_args.pop("padding_side")
+    else:
+        hf_args["truncation_side"] = hf_args["padding_side"]
+    model_dir = tmp_path_factory.getbasetemp() / model_id.replace("/", "_")
+    model_dir.mkdir(exist_ok=True, parents=True)
+
+    hf_tokenizer = AutoTokenizer.from_pretrained(model_id, **hf_args)
+    convert_args = {"number_of_inputs": hf_args.pop("number_of_inputs")} if "number_of_inputs" in hf_args else {}
+    convert_and_save_tokenizer(hf_tokenizer, model_dir, **convert_args)
+
+    genai_tokenizer = Tokenizer(model_dir, tok_load_properties)
+    return hf_tokenizer, genai_tokenizer
+
+
+@pytest.mark.precommit
+@pytest.mark.parametrize(
+    "hf_ov_genai_models", 
+    ["katuni4ka/tiny-random-phi3"],
+    indirect=True
+)
+def test_non_string_chat_template(hf_ov_genai_models):
+    hf_tokenizer, genai_tokenizer = hf_ov_genai_models
+    
+
+

From 3ab9757709ca368c15f84a6fca92c238b524a00e Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Wed, 24 Sep 2025 11:01:43 +0200
Subject: [PATCH 03/43] refactored

---
 src/cpp/include/openvino/genai/parsers.hpp    | 21 ++-------
 .../include/openvino/genai/text_streamer.hpp  | 14 ++++++
 src/cpp/src/parsers.cpp                       | 46 ++-----------------
 src/cpp/src/text_streamer.cpp                 | 38 ++++++++++++++-
 src/python/py_streamers.cpp                   | 15 +++---
 tests/python_tests/test_parsers.py            | 31 +++++++++++--
 6 files changed, 97 insertions(+), 68 deletions(-)

diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp
index 291df915a6..41336e4f95 100644
--- a/src/cpp/include/openvino/genai/parsers.hpp
+++ b/src/cpp/include/openvino/genai/parsers.hpp
@@ -2,8 +2,9 @@
 // SPDX-License-Identifier: Apache-2.0
 
 #pragma once
-#include "openvino/genai/text_streamer.hpp"
 #include <string>
+#include <memory>
+#include <variant>
 #include <map>
 #include <optional>
 #include <vector>
@@ -32,6 +33,7 @@ class IncrementalParserBase {
     ) = 0;
 
     virtual bool is_active() const = 0;
+    static std::map<std::string, std::shared_ptr<IncrementalParserBase>> registered_parsers;
 };
 
 class DeepSeekR1ReasoningParser : public IncrementalParserBase {
@@ -61,24 +63,11 @@ class ParserBase {
     ParserBase() = default;
 
     virtual ParsedMessage parse(ParsedMessage& text) = 0;
+    static std::map<std::string, std::shared_ptr<ParserBase>> registered_parsers;
 };
 
 using ParserVariant = std::variant<std::shared_ptr<IncrementalParserBase>, std::string>;
 
-
-class TextParserStreamer : public ov::genai::TextStreamer {
-public:
-    TextParserStreamer(const Tokenizer& tokenizer, std::vector<ParserVariant> parsers = {});
-
-    virtual StreamingStatus write(ParsedMessage& message) = 0;
-
-    ov::genai::CallbackTypeVariant write(std::string message);
-    ParsedMessage m_parsed_message;
-private:
-    std::string m_text_buffer;
-    std::vector<std::shared_ptr<IncrementalParserBase>> m_parsers;
-};
-
 class Llama32PythonicParser : public ParserBase {
 // Does not modify original content, only extracts and adds tool calls
 public:
@@ -104,7 +93,7 @@ class BaseReasoningParser : public ParserBase{
     bool m_expect_open_tag = true;
     bool m_keep_original_content = true;
     std::string m_open_tag = "<think>";
-    std::string m_close_tag = "<think/>";
+    std::string m_close_tag = "</think>";
 };
 
 
diff --git a/src/cpp/include/openvino/genai/text_streamer.hpp b/src/cpp/include/openvino/genai/text_streamer.hpp
index f7c64e3586..658e4c2e38 100644
--- a/src/cpp/include/openvino/genai/text_streamer.hpp
+++ b/src/cpp/include/openvino/genai/text_streamer.hpp
@@ -5,6 +5,7 @@
 
 #include "openvino/genai/streamer_base.hpp"
 #include "openvino/genai/tokenizer.hpp"
+#include "openvino/genai/parsers.hpp"
 
 namespace ov {
 namespace genai {
@@ -46,5 +47,18 @@ class OPENVINO_GENAI_EXPORTS TextStreamer : public StreamerBase {
     void compute_decoded_length_for_position(size_t cache_position);
 };
 
+class TextParserStreamer : public TextStreamer {
+public:
+    TextParserStreamer(const Tokenizer& tokenizer, std::vector<ParserVariant> parsers = {});
+
+    virtual StreamingStatus write(ParsedMessage& message);
+
+    CallbackTypeVariant write(std::string message);
+    ParsedMessage m_parsed_message;
+private:
+    std::string m_text_buffer;
+    std::vector<std::shared_ptr<IncrementalParserBase>> m_parsers;
+};
+
 }  // namespace genai
 }  // namespace ov
diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp
index 76a6eb6c15..df30670145 100644
--- a/src/cpp/src/parsers.cpp
+++ b/src/cpp/src/parsers.cpp
@@ -13,9 +13,6 @@ using json = nlohmann::json;
 
 namespace ov::genai {
 
-static std::map<std::string, std::shared_ptr<IncrementalParserBase>> registered_incremental_parsers;
-static std::map<std::string, std::shared_ptr<ParserBase>> registered_base_parsers;
-
 bool DeepSeekR1ReasoningParser::is_active() const {
     return !m_deactivated;
 }
@@ -123,47 +120,14 @@ ParsedMessage BaseReasoningParser::parse(ParsedMessage& input) {
     return res;
 }
 
-
-TextParserStreamer::TextParserStreamer(const Tokenizer& tokenizer, std::vector<ParserVariant> parsers) 
-    : ov::genai::TextStreamer(tokenizer, [this](std::string s) -> ov::genai::CallbackTypeVariant {
-                return this->write(s);
-    }) {
-        for (auto& parser : parsers) {
-            if (std::holds_alternative<std::shared_ptr<IncrementalParserBase>>(parser)) {
-                m_parsers.push_back(std::get<std::shared_ptr<IncrementalParserBase>>(parser));
-            } else {
-                auto parser_name = std::get<std::string>(parser);
-                if (registered_incremental_parsers.find(parser_name) != registered_incremental_parsers.end()) {
-                    m_parsers.push_back(registered_incremental_parsers[parser_name]);
-                }
-            }
-        }
-    }
-
-StreamingStatus TextParserStreamer::write(ParsedMessage& message) {
-    if (message.find("content") != message.end()) {
-        std::cout << message.at("content") << std::endl;
-    }
-    return StreamingStatus::RUNNING;
-}
-
-ov::genai::CallbackTypeVariant TextParserStreamer::write(std::string message) {
-    for (auto& parser: m_parsers) {
-        if (parser->is_active()) {
-            m_parsed_message = parser->parse(m_parsed_message, m_text_buffer, message);
-        }
-    }
-
-    m_text_buffer = message;
-    return write(m_parsed_message);
-}
-
+std::map<std::string, std::shared_ptr<IncrementalParserBase>> IncrementalParserBase::registered_parsers;
+std::map<std::string, std::shared_ptr<ParserBase>> ParserBase::registered_parsers;
 
 // static initializer to register available buildin parsers
 static bool register_backends() {
-    registered_incremental_parsers[DeepSeekR1ReasoningParser::name()] = std::make_shared<DeepSeekR1ReasoningParser>();
-    
-    registered_base_parsers[Llama32PythonicParser::name()] = std::make_shared<Llama32PythonicParser>();
+    IncrementalParserBase::registered_parsers[DeepSeekR1ReasoningParser::name()] = std::make_shared<DeepSeekR1ReasoningParser>();
+
+    ParserBase::registered_parsers[Llama32PythonicParser::name()] = std::make_shared<Llama32PythonicParser>();
     return true;
 }
 
diff --git a/src/cpp/src/text_streamer.cpp b/src/cpp/src/text_streamer.cpp
index 9a0b4e125a..0654abf7dc 100644
--- a/src/cpp/src/text_streamer.cpp
+++ b/src/cpp/src/text_streamer.cpp
@@ -122,7 +122,43 @@ void TextStreamer::end() {
     return;
 }
 
-ov::genai::StreamerBase::~StreamerBase() = default;
+StreamerBase::~StreamerBase() = default;
+
+TextParserStreamer::TextParserStreamer(const Tokenizer& tokenizer, std::vector<ParserVariant> parsers) 
+    : TextStreamer(tokenizer, [this](std::string s) -> CallbackTypeVariant {
+                return this->write(s);
+    }) {
+        for (auto& parser : parsers) {
+            if (std::holds_alternative<std::string>(parser)) {
+                auto parser_name = std::get<std::string>(parser);
+                if (IncrementalParserBase::registered_parsers.find(parser_name) != IncrementalParserBase::registered_parsers.end()) {
+                   m_parsers.push_back(IncrementalParserBase::registered_parsers[parser_name]);
+                } else {
+                    OPENVINO_THROW("Parser with name " + parser_name + " is not registered");
+                }
+            } else {
+                m_parsers.push_back(std::get<std::shared_ptr<IncrementalParserBase>>(parser));
+            }
+        }
+    }
+
+StreamingStatus TextParserStreamer::write(ParsedMessage& message) {
+    if (message.find("content") != message.end()) {
+        std::cout << message.at("content") << std::endl;
+    }
+    return StreamingStatus::RUNNING;
+}
+
+CallbackTypeVariant TextParserStreamer::write(std::string message) {
+    for (auto& parser: m_parsers) {
+        // if (parser->is_active()) {
+            m_parsed_message = parser->parse(m_parsed_message, m_text_buffer, message);
+        // }
+    }
+
+    m_text_buffer = message;
+    return write(m_parsed_message);
+}
 
 }  // namespace genai
 }  // namespace ov
\ No newline at end of file
diff --git a/src/python/py_streamers.cpp b/src/python/py_streamers.cpp
index dbeeb1a20e..a2f8d16420 100644
--- a/src/python/py_streamers.cpp
+++ b/src/python/py_streamers.cpp
@@ -75,7 +75,7 @@ class ConstructableTextParserStreamer: public TextParserStreamer {
     using TextParserStreamer::TextParserStreamer;  // inherit base constructors
 
     StreamingStatus write(ParsedMessage& message) override {
-        PYBIND11_OVERRIDE_PURE(
+        PYBIND11_OVERRIDE(
             StreamingStatus,  // Return type
             TextParserStreamer,  // Parent class
             write,  // Name of function in C++ (must match Python name)
@@ -131,15 +131,18 @@ void init_streamers(py::module_& m) {
 
     py::class_<TextParserStreamer, ConstructableTextParserStreamer, std::shared_ptr<TextParserStreamer>>(m, "TextParserStreamer")
         .def(py::init([](const Tokenizer& tokenizer,
-                         std::vector<std::shared_ptr<IncrementalParserBase>> parsers) {
-                std::vector<ov::genai::ParserVariant> variants(parsers.begin(), parsers.end());
-                return std::make_shared<ConstructableTextParserStreamer>(tokenizer, variants);
+                         std::vector<std::variant<std::shared_ptr<IncrementalParserBase>, std::string>> parsers) {
+                return std::make_shared<ConstructableTextParserStreamer>(tokenizer, parsers);
             }),
             py::arg("tokenizer"),
-            py::arg("parsers") = std::vector<std::shared_ptr<IncrementalParserBase>>({}),
+            py::arg("parsers") = std::vector<std::variant<std::shared_ptr<IncrementalParserBase>, std::string>>({}),
             "TextParserStreamer is used to decode tokens into text, parse the text and call user-defined incremental parsers.")
         .def("write",
              py::overload_cast<ParsedMessage&>(&TextParserStreamer::write),
              py::arg("message"),
-             "Write is called with a ParsedMessage. Returns StreamingStatus.");
+             "Write is called with a ParsedMessage. Returns StreamingStatus.")
+        .def("write",
+             py::overload_cast<std::string>(&TextParserStreamer::write),
+             py::arg("message"),
+             "Write is called with a string message. Returns CallbackTypeVariant.");
 }
diff --git a/tests/python_tests/test_parsers.py b/tests/python_tests/test_parsers.py
index 724384bcad..fd762381f0 100644
--- a/tests/python_tests/test_parsers.py
+++ b/tests/python_tests/test_parsers.py
@@ -7,9 +7,8 @@
 import numpy as np
 import openvino
 import pytest
-from openvino_genai import Tokenizer
+from openvino_genai import Tokenizer, IncrementalParserBase, ParserBase, TextParserStreamer
 from transformers import AutoTokenizer
-from 
 from utils.hugging_face import convert_and_save_tokenizer, download_and_convert_model
 
 
@@ -39,11 +38,35 @@ def hf_ov_genai_models(request, tmp_path_factory):
 @pytest.mark.precommit
 @pytest.mark.parametrize(
     "hf_ov_genai_models", 
-    ["katuni4ka/tiny-random-phi3"],
+    [("katuni4ka/tiny-random-phi3", {"padding_side": "right"})],
     indirect=True
 )
 def test_non_string_chat_template(hf_ov_genai_models):
     hf_tokenizer, genai_tokenizer = hf_ov_genai_models
+    class CustomStreamer(TextParserStreamer):
+        def write(self, message):
+            if "content" in message:
+                print(message["content"])
+            return True
     
+    breakpoint()
+    streamer = CustomStreamer(genai_tokenizer, parsers=["DeepSeekR1ReasoningParser"])
+    
+    msg = {}
+    stream_string = [
+        "<｜begin▁of▁sentence｜>", "First", ",", " I", " recognize", " that", " the", " question", " is", " asking", 
+        " for", " the", " sum", " of", " ", "2", " and", " ", "1", ".\n\n", "I", " know", " that", " addition", 
+        " involves", " combining", " two", " numbers", " to", " find", " their", " total", ".\n\n", "Starting", 
+        " with", " ", "2", ",", " I", " add", " ", "1", " to", " it", ".\n\n", "2", " plus", " ", "1", " equals", 
+        " ", "3", ".\n", "</think>", "\n\n", "**", "Solution", ":", "**\n\n", "To", " find", " the", " sum", 
+        " of", " ", "2", " and", " ", "1", " follow", " these", " simple", " steps", ":\n\n", "1", ".", " **", 
+        "Start", " with", " the", " number", " ", "2", ".", "**\n", "2", ".", " **", "Add", " ", "1", " to", 
+        " it", ".", "**\n", "   \n", "  ", " \\", "[\n", "  "
+    ]
 
-
+    for subword in stream_string:
+        msg = streamer.write(subword)
+    
+    # for (prev_subword, subword) in zip(stream_string[:-1], stream_string[1:]):
+    #     msg = streamer.write(msg, prev_subword, subword)
+    breakpoint()

From 5624fc2117844f9958fa5a3e0a1b7efde000c2ad Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Thu, 25 Sep 2025 15:04:55 +0200
Subject: [PATCH 04/43] add parsing at the end of generate()

---
 .../openvino/genai/generation_config.hpp      |  4 ++
 .../include/openvino/genai/llm_pipeline.hpp   |  1 +
 src/cpp/include/openvino/genai/parsers.hpp    |  7 +--
 .../include/openvino/genai/text_streamer.hpp  |  5 +-
 src/cpp/src/generation_config.cpp             |  1 +
 src/cpp/src/llm/pipeline.cpp                  | 59 +++++++++++++++++-
 src/cpp/src/parsers.cpp                       | 22 +++++--
 src/cpp/src/text_streamer.cpp                 |  8 +--
 src/python/py_generation_config.cpp           |  1 +
 src/python/py_streamers.cpp                   | 16 +++--
 tests/python_tests/test_parsers.py            | 62 ++++++++++++++++---
 11 files changed, 154 insertions(+), 32 deletions(-)

diff --git a/src/cpp/include/openvino/genai/generation_config.hpp b/src/cpp/include/openvino/genai/generation_config.hpp
index 3020be34bc..1d832c3c8e 100644
--- a/src/cpp/include/openvino/genai/generation_config.hpp
+++ b/src/cpp/include/openvino/genai/generation_config.hpp
@@ -13,6 +13,7 @@
 #include "openvino/genai/tokenizer.hpp"
 #include "openvino/genai/scheduler_config.hpp"
 #include "openvino/genai/lora_adapter.hpp"
+#include "openvino/genai/parsers.hpp"
 
 namespace ov {
 namespace genai {
@@ -348,6 +349,9 @@ class OPENVINO_GENAI_EXPORTS GenerationConfig {
     bool is_prompt_lookup() const;
     bool is_structured_output_generation() const;
 
+    // parsers
+    std::vector<std::variant<std::string, std::shared_ptr<ParserBase>>> parsers;
+
     OPENVINO_DEPRECATED("Please, use `is_assisting_generation()` instead of `is_speculative_decoding()`. This method will be removed in 2026.0.0 release")
     bool is_speculative_decoding() const;
 
diff --git a/src/cpp/include/openvino/genai/llm_pipeline.hpp b/src/cpp/include/openvino/genai/llm_pipeline.hpp
index eea94591c3..4f2c8405f1 100644
--- a/src/cpp/include/openvino/genai/llm_pipeline.hpp
+++ b/src/cpp/include/openvino/genai/llm_pipeline.hpp
@@ -68,6 +68,7 @@ class DecodedResults {
     std::vector<float> scores;
     PerfMetrics perf_metrics;
     std::shared_ptr<ExtendedPerfMetrics> extended_perf_metrics;
+    std::vector<ParsedMessage> parsed;
 
     // @brief Convert DecodedResults to a string.
     operator std::string() const {
diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp
index 41336e4f95..7030acbf9a 100644
--- a/src/cpp/include/openvino/genai/parsers.hpp
+++ b/src/cpp/include/openvino/genai/parsers.hpp
@@ -15,11 +15,6 @@ namespace genai {
 
 using ParsedMessage = std::map<std::string, std::string>;
 
-class ParsedJSONMessage {
-public:
-    std::map<std::string, std::string> content;
-};
-
 class IncrementalParserBase {
 public:
     IncrementalParserBase() = default;
@@ -44,7 +39,7 @@ class DeepSeekR1ReasoningParser : public IncrementalParserBase {
     std::string m_open_tag = "<think>";
     std::string m_close_tag = "</think>";
 public:
-    DeepSeekR1ReasoningParser() = default;
+    DeepSeekR1ReasoningParser(bool starts_with_thinking = true) : m_starts_with_thinking(starts_with_thinking) {};
     std::map<std::string, std::string> accumulated_parsed;
 
     ParsedMessage parse(
diff --git a/src/cpp/include/openvino/genai/text_streamer.hpp b/src/cpp/include/openvino/genai/text_streamer.hpp
index 658e4c2e38..872e79dbf9 100644
--- a/src/cpp/include/openvino/genai/text_streamer.hpp
+++ b/src/cpp/include/openvino/genai/text_streamer.hpp
@@ -54,8 +54,11 @@ class TextParserStreamer : public TextStreamer {
     virtual StreamingStatus write(ParsedMessage& message);
 
     CallbackTypeVariant write(std::string message);
-    ParsedMessage m_parsed_message;
+    
+    ParsedMessage get_parsed_message() const { return m_parsed_message; }
+    std::vector<std::shared_ptr<IncrementalParserBase>> get_parsers() const { return m_parsers; }
 private:
+    ParsedMessage m_parsed_message;
     std::string m_text_buffer;
     std::vector<std::shared_ptr<IncrementalParserBase>> m_parsers;
 };
diff --git a/src/cpp/src/generation_config.cpp b/src/cpp/src/generation_config.cpp
index db7d621ed5..f370042b25 100644
--- a/src/cpp/src/generation_config.cpp
+++ b/src/cpp/src/generation_config.cpp
@@ -153,6 +153,7 @@ void GenerationConfig::update_generation_config(const ov::AnyMap& properties) {
 
     // Structured output
     read_anymap_param(properties, "structured_output_config", structured_output_config);
+    read_anymap_param(properties, "parsers", parsers);
 }
 
 
diff --git a/src/cpp/src/llm/pipeline.cpp b/src/cpp/src/llm/pipeline.cpp
index 76d1fe24dc..38f00e4e45 100644
--- a/src/cpp/src/llm/pipeline.cpp
+++ b/src/cpp/src/llm/pipeline.cpp
@@ -205,7 +205,64 @@ DecodedResults LLMPipeline::generate(
         StringInputs inputs,
         OptionalGenerationConfig generation_config,
         StreamerVariant streamer) {
-    return m_pimpl->generate(inputs, generation_config, streamer);
+    auto res = m_pimpl->generate(inputs, generation_config, streamer);
+    
+    std::vector<std::shared_ptr<IncrementalParserBase>> incremental_parsers;
+    // If streamer is of StreamerBase type, and it is TextParserStreamer, get parsed message
+    if (auto streamer_obj = std::get_if<std::shared_ptr<StreamerBase>>(&streamer)) {
+        if (auto parser_streamer = std::dynamic_pointer_cast<TextParserStreamer>(*streamer_obj)) {
+            incremental_parsers = parser_streamer->get_parsers();
+        }
+    }
+    
+    
+    if (incremental_parsers.empty() && (!generation_config.has_value() || (*generation_config).parsers.empty())) {
+        return res;
+    }
+
+    std::vector<std::shared_ptr<ParserBase>> parsers;
+    if (generation_config.has_value() && !(*generation_config).parsers.empty()) {
+        for (auto& parser_variant : (*generation_config).parsers) {
+            if (std::holds_alternative<std::string>(parser_variant)) {
+                auto parser_name = std::get<std::string>(parser_variant);
+                if (ParserBase::registered_parsers.find(parser_name) == ParserBase::registered_parsers.end()) {
+                    OPENVINO_THROW("Parser with name ", parser_name, " is not registered");
+                }
+                parsers.push_back(ParserBase::registered_parsers[parser_name]);
+            } else if (std::holds_alternative<std::shared_ptr<ParserBase>>(parser_variant)) {
+                auto parser = std::get<std::shared_ptr<ParserBase>>(parser_variant);
+                parsers.push_back(parser);
+            }
+        }
+    }
+    
+    res.parsed.resize(res.texts.size());
+
+    // BaseParsers have priority over IncrementalParsers
+    if (!parsers.empty()) {
+        for (size_t i = 0; i < res.texts.size(); ++i) {
+            auto& message = res.texts[i];
+            ParsedMessage msg;
+            for (auto& parser: parsers) {
+                msg = parser->parse(msg);
+            }
+            res.parsed[i] = msg;
+        }
+        return res;
+    }
+
+    // At this place we have only IncrementalParsers
+    for (size_t i = 0; i < res.texts.size(); ++i) {
+        auto& message = res.texts[i];
+        ParsedMessage msg;
+        for (auto& parser: incremental_parsers) {
+            // Previous is and empty message because we populate message with the full generated text.
+            msg = parser->parse(msg, "", message);
+        }
+        res.parsed[i] = msg;
+    }
+
+    return res;
 }
 
 DecodedResults LLMPipeline::generate(StringInputs text, const ov::AnyMap& config_map) {
diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp
index df30670145..ed4fcca3a5 100644
--- a/src/cpp/src/parsers.cpp
+++ b/src/cpp/src/parsers.cpp
@@ -35,18 +35,14 @@ ParsedMessage DeepSeekR1ReasoningParser::parse(
         msg["content"] += delta_text;
         return msg;
     }
-    if (m_starts_with_thinking) {
-        m_think_tag_opened = true;
-    }
-    
     bool think_tag_closed = delta_text.find(m_close_tag) != std::string::npos;
 
-    if (!m_think_tag_opened && delta_text.find(m_open_tag) != std::string::npos) {
+    if (!m_think_tag_opened && delta_text.find(m_open_tag) != std::string::npos && !m_starts_with_thinking) {
         // Thinking has started
         auto think_idx = delta_text.find(m_open_tag);
         msg["reasoning_content"] += delta_text.substr(think_idx + std::string(m_open_tag).size(), delta_text.size() - (think_idx + std::string(m_open_tag).size()));
         m_think_tag_opened = true;
-    } else if (m_think_tag_opened && delta_text.find(m_close_tag) != std::string::npos) {
+    } else if ((m_think_tag_opened || m_starts_with_thinking) && delta_text.find(m_close_tag) != std::string::npos) {
         auto think_idx = delta_text.find(m_close_tag);
         msg["reasoning_content"] += delta_text.substr(0, think_idx);
         msg["content"] += delta_text.substr(think_idx + std::string(m_close_tag).size(), delta_text.size() - (think_idx + std::string(m_close_tag).size()));
@@ -126,6 +122,8 @@ std::map<std::string, std::shared_ptr<ParserBase>> ParserBase::registered_parser
 // static initializer to register available buildin parsers
 static bool register_backends() {
     IncrementalParserBase::registered_parsers[DeepSeekR1ReasoningParser::name()] = std::make_shared<DeepSeekR1ReasoningParser>();
+    IncrementalParserBase::registered_parsers[DeepSeekR1ReasoningParser::name()] = std::make_shared<DeepSeekR1ReasoningParser>();
+    IncrementalParserBase::registered_parsers["Phi-4-reasoning"] = std::make_shared<DeepSeekR1ReasoningParser>(/*starts_with_thinking*/ false);
 
     ParserBase::registered_parsers[Llama32PythonicParser::name()] = std::make_shared<Llama32PythonicParser>();
     return true;
@@ -134,4 +132,16 @@ static bool register_backends() {
 // Ensure the backends are registered before main
 static bool are_backends_registered = register_backends();
 
+static std::vector<std::string> get_parsers_names() {
+    if (!are_backends_registered) {
+        register_backends();
+    }
+
+    std::vector<std::string> names;
+    for (const auto& [name, _] : IncrementalParserBase::registered_parsers) {
+        names.push_back(name);
+    }
+    return names;
+}
+
 } // namespace ov::genai
diff --git a/src/cpp/src/text_streamer.cpp b/src/cpp/src/text_streamer.cpp
index 0654abf7dc..a5238b4a35 100644
--- a/src/cpp/src/text_streamer.cpp
+++ b/src/cpp/src/text_streamer.cpp
@@ -131,11 +131,10 @@ TextParserStreamer::TextParserStreamer(const Tokenizer& tokenizer, std::vector<P
         for (auto& parser : parsers) {
             if (std::holds_alternative<std::string>(parser)) {
                 auto parser_name = std::get<std::string>(parser);
-                if (IncrementalParserBase::registered_parsers.find(parser_name) != IncrementalParserBase::registered_parsers.end()) {
-                   m_parsers.push_back(IncrementalParserBase::registered_parsers[parser_name]);
-                } else {
+                if (IncrementalParserBase::registered_parsers.find(parser_name) == IncrementalParserBase::registered_parsers.end()) {
                     OPENVINO_THROW("Parser with name " + parser_name + " is not registered");
                 }
+                m_parsers.push_back(IncrementalParserBase::registered_parsers[parser_name]);
             } else {
                 m_parsers.push_back(std::get<std::shared_ptr<IncrementalParserBase>>(parser));
             }
@@ -154,6 +153,7 @@ CallbackTypeVariant TextParserStreamer::write(std::string message) {
         // if (parser->is_active()) {
             m_parsed_message = parser->parse(m_parsed_message, m_text_buffer, message);
         // }
+        // m_parsed_message["content"] += message;
     }
 
     m_text_buffer = message;
@@ -161,4 +161,4 @@ CallbackTypeVariant TextParserStreamer::write(std::string message) {
 }
 
 }  // namespace genai
-}  // namespace ov
\ No newline at end of file
+}  // namespace ov
diff --git a/src/python/py_generation_config.cpp b/src/python/py_generation_config.cpp
index 9459b8dfc3..c446966772 100644
--- a/src/python/py_generation_config.cpp
+++ b/src/python/py_generation_config.cpp
@@ -280,6 +280,7 @@ void init_generation_config(py::module_& m) {
         .def_readwrite("include_stop_str_in_output", &GenerationConfig::include_stop_str_in_output)
         .def_readwrite("stop_token_ids", &GenerationConfig::stop_token_ids)
         .def_readwrite("structured_output_config", &GenerationConfig::structured_output_config)
+        .def_readwrite("parsers", &GenerationConfig::parsers)
         .def_readwrite("adapters", &GenerationConfig::adapters)
         .def_readwrite("apply_chat_template", &GenerationConfig::apply_chat_template)
         .def("set_eos_token_id", &GenerationConfig::set_eos_token_id, py::arg("tokenizer_eos_token_id"))
diff --git a/src/python/py_streamers.cpp b/src/python/py_streamers.cpp
index a2f8d16420..e643666b7e 100644
--- a/src/python/py_streamers.cpp
+++ b/src/python/py_streamers.cpp
@@ -129,7 +129,7 @@ void init_streamers(py::module_& m) {
             },
             py::arg("token"));
 
-    py::class_<TextParserStreamer, ConstructableTextParserStreamer, std::shared_ptr<TextParserStreamer>>(m, "TextParserStreamer")
+    py::class_<TextParserStreamer, ConstructableTextParserStreamer, std::shared_ptr<TextParserStreamer>, TextStreamer>(m, "TextParserStreamer")
         .def(py::init([](const Tokenizer& tokenizer,
                          std::vector<std::variant<std::shared_ptr<IncrementalParserBase>, std::string>> parsers) {
                 return std::make_shared<ConstructableTextParserStreamer>(tokenizer, parsers);
@@ -137,12 +137,16 @@ void init_streamers(py::module_& m) {
             py::arg("tokenizer"),
             py::arg("parsers") = std::vector<std::variant<std::shared_ptr<IncrementalParserBase>, std::string>>({}),
             "TextParserStreamer is used to decode tokens into text, parse the text and call user-defined incremental parsers.")
-        .def("write",
-             py::overload_cast<ParsedMessage&>(&TextParserStreamer::write),
-             py::arg("message"),
-             "Write is called with a ParsedMessage. Returns StreamingStatus.")
+        // .def("write",
+        //      py::overload_cast<ParsedMessage&>(&TextParserStreamer::write),
+        //      py::arg("message"),
+        //      "Write is called with a ParsedMessage. Returns StreamingStatus.")
         .def("write",
              py::overload_cast<std::string>(&TextParserStreamer::write),
              py::arg("message"),
-             "Write is called with a string message. Returns CallbackTypeVariant.");
+             "Write is called with a string message. Returns CallbackTypeVariant.")
+        
+        .def("get_parsed_message", &TextParserStreamer::get_parsed_message, "Get the current parsed message")
+
+        .def("get_parsers", &TextParserStreamer::get_parsers, "Get the list of parsers");
 }
diff --git a/tests/python_tests/test_parsers.py b/tests/python_tests/test_parsers.py
index fd762381f0..d8a323f3bc 100644
--- a/tests/python_tests/test_parsers.py
+++ b/tests/python_tests/test_parsers.py
@@ -41,7 +41,7 @@ def hf_ov_genai_models(request, tmp_path_factory):
     [("katuni4ka/tiny-random-phi3", {"padding_side": "right"})],
     indirect=True
 )
-def test_non_string_chat_template(hf_ov_genai_models):
+def test_parsers_1(hf_ov_genai_models):
     hf_tokenizer, genai_tokenizer = hf_ov_genai_models
     class CustomStreamer(TextParserStreamer):
         def write(self, message):
@@ -49,8 +49,7 @@ def write(self, message):
                 print(message["content"])
             return True
     
-    breakpoint()
-    streamer = CustomStreamer(genai_tokenizer, parsers=["DeepSeekR1ReasoningParser"])
+    streamer = TextParserStreamer(genai_tokenizer, parsers=["DeepSeekR1ReasoningParser"])
     
     msg = {}
     stream_string = [
@@ -64,9 +63,56 @@ def write(self, message):
         " it", ".", "**\n", "   \n", "  ", " \\", "[\n", "  "
     ]
 
-    for subword in stream_string:
-        msg = streamer.write(subword)
+    full_str = ''.join(stream_string)
+    think_content = full_str.split("</think>")[0]
+    content = full_str.split("</think>")[1]
+
+    parsers = streamer.get_parsers()
+    
+    extended = stream_string[:]
+    extended.append("")
+
+    for parser in parsers:
+        for (prev_subword, subword) in zip(extended, stream_string):
+            msg = parser.parse(msg, prev_subword, subword)
+    
+    assert msg['reasoning_content'] == think_content
+    assert msg['content'] == content
+
+def test_parsers_2(hf_ov_genai_models):
+    hf_tokenizer, genai_tokenizer = hf_ov_genai_models
+    class CustomStreamer(TextParserStreamer):
+        def write(self, message):
+            if "content" in message:
+                print(message["content"])
+            return True
+    
+    streamer = TextParserStreamer(genai_tokenizer, parsers=["DeepSeekR1ReasoningParser"])
+    
+    msg = {}
+    stream_string = [
+        "<｜begin▁of▁sentence｜>", "First", ",", " I", " recognize", " that", " the", " question", " is", " asking", 
+        " for", " the", " sum", " of", " ", "2", " and", " ", "1", ".\n\n", "I", " know", " that", " addition", 
+        " involves", " combining", " two", " numbers", " to", " find", " their", " total", ".\n\n", "Starting", 
+        " with", " ", "2", ",", " I", " add", " ", "1", " to", " it", ".\n\n", "2", " plus", " ", "1", " equals", 
+        " ", "3", ".\n", "</think>", "\n\n", "**", "Solution", ":", "**\n\n", "To", " find", " the", " sum", 
+        " of", " ", "2", " and", " ", "1", " follow", " these", " simple", " steps", ":\n\n", "1", ".", " **", 
+        "Start", " with", " the", " number", " ", "2", ".", "**\n", "2", ".", " **", "Add", " ", "1", " to", 
+        " it", ".", "**\n", "   \n", "  ", " \\", "[\n", "  "
+    ]
+
+    full_str = ''.join(stream_string)
+    think_content = full_str.split("</think>")[0]
+    content = full_str.split("</think>")[1]
+
+    parsers = streamer.get_parsers()
+    
+    extended = stream_string[:]
+    extended.append("")
+
+    for parser in parsers:
+        for (prev_subword, subword) in zip(extended, stream_string):
+            msg = parser.parse(msg, prev_subword, subword)
     
-    # for (prev_subword, subword) in zip(stream_string[:-1], stream_string[1:]):
-    #     msg = streamer.write(msg, prev_subword, subword)
-    breakpoint()
+    assert msg['reasoning_content'] == think_content
+    assert msg['content'] == content

From 3407d8fbf2865b062da1ae8d4f99b956584a8d29 Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Tue, 30 Sep 2025 01:18:34 +0200
Subject: [PATCH 05/43] hide map with predefined initialized parsers; add TODOs

---
 .github/workflows/linux.yml                   |  2 +-
 .github/workflows/mac.yml                     |  2 +-
 .github/workflows/windows.yml                 |  2 +-
 samples/cpp/text_generation/CMakeLists.txt    |  1 +
 .../text_generation/parsed_output_sample.cpp  | 17 +----
 src/cpp/include/openvino/genai/parsers.hpp    | 42 +++++++++----
 .../include/openvino/genai/text_streamer.hpp  |  2 +-
 src/cpp/src/continuous_batching/pipeline.cpp  |  4 +-
 src/cpp/src/llm/pipeline.cpp                  | 30 +++------
 src/cpp/src/parsers.cpp                       | 63 +++++++++++++------
 src/cpp/src/parsers.hpp                       | 48 --------------
 src/cpp/src/text_streamer.cpp                 | 22 +++----
 src/python/py_parsers.cpp                     | 10 +--
 src/python/py_streamers.cpp                   | 11 ++--
 tests/cpp/parser.cpp                          | 12 ++--
 tests/python_tests/test_parsers.py            |  2 +
 tests/python_tests/test_text_streamer.py      | 14 +++++
 17 files changed, 137 insertions(+), 147 deletions(-)
 delete mode 100644 src/cpp/src/parsers.hpp

diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
index 7ff8c29af3..9500add7c3 100644
--- a/.github/workflows/linux.yml
+++ b/.github/workflows/linux.yml
@@ -782,7 +782,7 @@ jobs:
         run: |
           source ${{ env.INSTALL_DIR }}/setupvars.sh
           chmod +x ${{ env.INSTALL_DIR }}/tests/tests_continuous_batching
-          ${{ env.INSTALL_DIR }}/tests/tests_continuous_batching --gtest_filter="-AddSecondInputTest.*"
+          ${{ env.INSTALL_DIR }}/tests/tests_continuous_batching --gtest_filter="-AddSecondInputTest.*" --gtest_filter="DeepSeekR1ReasoningParserTest.*" --gtest_filter="ParserTest.*"
 
       - name: Test Continuous Batching Tools
         if: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching }}
diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml
index 5620804095..d1be431238 100644
--- a/.github/workflows/mac.yml
+++ b/.github/workflows/mac.yml
@@ -695,7 +695,7 @@ jobs:
         run: |
           source ${{ env.INSTALL_DIR }}/setupvars.sh
           chmod +x ${{ env.INSTALL_DIR }}/tests/tests_continuous_batching
-          ${{ env.INSTALL_DIR }}/tests/tests_continuous_batching --gtest_filter="-AddSecondInputTest.*"
+          ${{ env.INSTALL_DIR }}/tests/tests_continuous_batching --gtest_filter="-AddSecondInputTest.*"  --gtest_filter="DeepSeekR1ReasoningParserTest.*" --gtest_filter="ParserTest.*
 
       - name: Test C++ Tools
         run: |
diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
index bb544bc0cf..fbd082bede 100644
--- a/.github/workflows/windows.yml
+++ b/.github/workflows/windows.yml
@@ -866,7 +866,7 @@ jobs:
       - name: gtests unit tests
         run: |
           . "${{ env.INSTALL_DIR }}/setupvars.ps1"
-          & "${{ env.INSTALL_DIR }}/tests/tests_continuous_batching.exe" --gtest_filter="-AddSecondInputTest.*"
+          & "${{ env.INSTALL_DIR }}/tests/tests_continuous_batching.exe" --gtest_filter="-AddSecondInputTest.*"  --gtest_filter="DeepSeekR1ReasoningParserTest.*" --gtest_filter="ParserTest.*
 
       - name: Test C++ Tools
         run: |
diff --git a/samples/cpp/text_generation/CMakeLists.txt b/samples/cpp/text_generation/CMakeLists.txt
index ebaf32c7f4..7493362e81 100644
--- a/samples/cpp/text_generation/CMakeLists.txt
+++ b/samples/cpp/text_generation/CMakeLists.txt
@@ -29,6 +29,7 @@ set (SAMPLE_LIST
     lora_greedy_causal_lm
     multinomial_causal_lm
     prompt_lookup_decoding_lm
+    parsed_output_sample
     speculative_decoding_lm)
 
 foreach(sample IN LISTS SAMPLE_LIST)
diff --git a/samples/cpp/text_generation/parsed_output_sample.cpp b/samples/cpp/text_generation/parsed_output_sample.cpp
index ada4af6751..1b481a2a4a 100644
--- a/samples/cpp/text_generation/parsed_output_sample.cpp
+++ b/samples/cpp/text_generation/parsed_output_sample.cpp
@@ -5,29 +5,14 @@
 #include "openvino/genai/parsers.hpp"
 #include "openvino/genai/text_streamer.hpp"
 
-using ov::genai::ParsingState;
 
 class CurrentStreamer : public ov::genai::TextParserStreamer {
 private:
-    ParsingState m_previous_state = ParsingState::UNDEFINED;
 public:
     CurrentStreamer(const ov::genai::Tokenizer& tokenizer)
         : ov::genai::TextParserStreamer(tokenizer) {}
-    ov::genai::StreamingStatus write(const ov::genai::ParsedMessage& message) {
-
-        // if (m_previous_state == ParsingState::UNDEFINED && message["state"] == ParsingState::REASONING) {
-        //     std::cout << "Reasoning: " << std::endl;
-        //     std::cout << message["reasoning_content"].value();
-        // } else if (m_previous_state == ParsingState::REASONING && message["state"] == ParsingState::CONTENT) {
-        //     std::cout << std::endl << "Content: " << std::endl;
-        //     std::cout << message["content"].value();
-        // } else if (m_previous_state == ParsingState::REASONING && message["state"] == ParsingState::REASONING) {
-        //     std::cout << message["reasoning_content"].value();
-        // } else if (m_previous_state == ParsingState::CONTENT && message["state"] == ParsingState::CONTENT) {
-        //     std::cout << message["content"].value();
-        // }
+    ov::genai::StreamingStatus write(ov::genai::ParsedMessage& message) {
        std::cout << message.at("content");
-
         return ov::genai::StreamingStatus::RUNNING;
     }
 };
diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp
index 7030acbf9a..020176d117 100644
--- a/src/cpp/include/openvino/genai/parsers.hpp
+++ b/src/cpp/include/openvino/genai/parsers.hpp
@@ -6,6 +6,7 @@
 #include <memory>
 #include <variant>
 #include <map>
+#include <functional>
 #include <optional>
 #include <vector>
 
@@ -19,57 +20,74 @@ class IncrementalParserBase {
 public:
     IncrementalParserBase() = default;
 
-    virtual ParsedMessage parse(
+    // We return string which with filtered text to be added to content.
+    virtual std::string parse(
         ParsedMessage& msg,
         const std::string& previous_text, 
-        const std::string& delta_text, 
+        std::string& delta_text, 
         const std::optional<std::vector<int64_t>>& previous_tokens = std::nullopt, 
         const std::optional<std::vector<int64_t>>& delta_tokens = std::nullopt
     ) = 0;
 
     virtual bool is_active() const = 0;
-    static std::map<std::string, std::shared_ptr<IncrementalParserBase>> registered_parsers;
+    static std::shared_ptr<IncrementalParserBase> get_parser(std::string name);
 };
 
-class DeepSeekR1ReasoningParser : public IncrementalParserBase {
+class ReasoningParser : public IncrementalParserBase {
 private:
     bool m_starts_with_thinking = true;
+    bool m_keep_original_content = true;
     bool m_think_tag_opened = false;
     bool m_deactivated = false;
     std::string m_open_tag = "<think>";
     std::string m_close_tag = "</think>";
 public:
-    DeepSeekR1ReasoningParser(bool starts_with_thinking = true) : m_starts_with_thinking(starts_with_thinking) {};
+    ReasoningParser(bool starts_with_thinking = true,
+                    bool keep_original_content = true)
+        : m_starts_with_thinking(starts_with_thinking),
+          m_keep_original_content(keep_original_content) {}
     std::map<std::string, std::string> accumulated_parsed;
 
-    ParsedMessage parse(
+    std::string parse(
         ParsedMessage& msg,
         const std::string& previous_text, 
-        const std::string& delta_text,
+        std::string& delta_text,
         const std::optional<std::vector<int64_t>>& previous_tokens = std::nullopt, 
         const std::optional<std::vector<int64_t>>& delta_tokens = std::nullopt
     ) override;
-    static std::string name() { return "DeepSeekR1ReasoningParser"; }
     bool is_active() const override;
 };
 
+class DeepSeekR1ReasoningParser : public ReasoningParser {
+public:
+    DeepSeekR1ReasoningParser(bool starts_with_thinking = true) : ReasoningParser(starts_with_thinking) {};
+    static std::string name() { return "DeepSeekR1ReasoningParser"; }
+};
+
+class Phi4ReasoningParser : public ReasoningParser {
+public:
+    Phi4ReasoningParser(bool starts_with_thinking = false) : ReasoningParser(starts_with_thinking) {};
+    static std::string name() { return "Phi4ReasoningParser"; }
+};
+
 class ParserBase {
 public:
     ParserBase() = default;
 
     virtual ParsedMessage parse(ParsedMessage& text) = 0;
-    static std::map<std::string, std::shared_ptr<ParserBase>> registered_parsers;
+    static std::shared_ptr<ParserBase> get_parser(std::string name);
 };
 
 using ParserVariant = std::variant<std::shared_ptr<IncrementalParserBase>, std::string>;
 
-class Llama32PythonicParser : public ParserBase {
+class Llama32PythonicToolParser : public ParserBase {
 // Does not modify original content, only extracts and adds tool calls
 public:
-    Llama32PythonicParser(bool keep_original_content = true) : m_keep_original_content(keep_original_content) {}
+    // TODO: Check that vLLM has the same default.
+    Llama32PythonicToolParser(bool keep_original_content = true) : m_keep_original_content(keep_original_content) {}
 
     ParsedMessage parse(ParsedMessage& input) override;
-    static std::string name() { return "Llama32PythonicParser"; }
+    static std::string name() { return "Llama32PythonicToolParser"; }
 private:
     bool m_keep_original_content = true;
 };
diff --git a/src/cpp/include/openvino/genai/text_streamer.hpp b/src/cpp/include/openvino/genai/text_streamer.hpp
index 872e79dbf9..055adbfbe2 100644
--- a/src/cpp/include/openvino/genai/text_streamer.hpp
+++ b/src/cpp/include/openvino/genai/text_streamer.hpp
@@ -51,7 +51,7 @@ class TextParserStreamer : public TextStreamer {
 public:
     TextParserStreamer(const Tokenizer& tokenizer, std::vector<ParserVariant> parsers = {});
 
-    virtual StreamingStatus write(ParsedMessage& message);
+    virtual StreamingStatus write(ParsedMessage& message) = 0;
 
     CallbackTypeVariant write(std::string message);
     
diff --git a/src/cpp/src/continuous_batching/pipeline.cpp b/src/cpp/src/continuous_batching/pipeline.cpp
index 404ee620e1..25256c8d70 100644
--- a/src/cpp/src/continuous_batching/pipeline.cpp
+++ b/src/cpp/src/continuous_batching/pipeline.cpp
@@ -58,7 +58,7 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline( const std::filesystem::p
 
     auto model = utils::read_model(models_path, properties);
     auto [properties_without_draft_model_without_gguf, enable_save_ov_model] = utils::extract_gguf_properties(properties_without_draft_model);
-    properties_without_draft_model_without_gguf[ov::cache_model_path.name()] = models_path;
+    // properties_without_draft_model_without_gguf[ov::cache_model_path.name()] = models_path;
     auto tokenizer = ov::genai::Tokenizer(models_path, tokenizer_properties);
     auto generation_config = utils::from_config_json_if_exists(models_path);
 
@@ -98,7 +98,7 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline(
 
     auto model = utils::read_model(models_path, properties_without_draft_model);
     auto [properties_without_draft_model_without_gguf, enable_save_ov_model] = utils::extract_gguf_properties(properties_without_draft_model);
-    properties_without_draft_model_without_gguf[ov::cache_model_path.name()] = models_path;
+    // properties_without_draft_model_without_gguf[ov::cache_model_path.name()] = models_path;
 
     auto generation_config = utils::from_config_json_if_exists(models_path);
 
diff --git a/src/cpp/src/llm/pipeline.cpp b/src/cpp/src/llm/pipeline.cpp
index 38f00e4e45..43f90980a6 100644
--- a/src/cpp/src/llm/pipeline.cpp
+++ b/src/cpp/src/llm/pipeline.cpp
@@ -207,16 +207,15 @@ DecodedResults LLMPipeline::generate(
         StreamerVariant streamer) {
     auto res = m_pimpl->generate(inputs, generation_config, streamer);
     
-    std::vector<std::shared_ptr<IncrementalParserBase>> incremental_parsers;
     // If streamer is of StreamerBase type, and it is TextParserStreamer, get parsed message
     if (auto streamer_obj = std::get_if<std::shared_ptr<StreamerBase>>(&streamer)) {
         if (auto parser_streamer = std::dynamic_pointer_cast<TextParserStreamer>(*streamer_obj)) {
-            incremental_parsers = parser_streamer->get_parsers();
+            res.parsed.resize(res.texts.size());
+            res.parsed[0] = parser_streamer->get_parsed_message();
         }
     }
     
-    
-    if (incremental_parsers.empty() && (!generation_config.has_value() || (*generation_config).parsers.empty())) {
+    if (!generation_config.has_value() || (*generation_config).parsers.empty()) {
         return res;
     }
 
@@ -225,10 +224,11 @@ DecodedResults LLMPipeline::generate(
         for (auto& parser_variant : (*generation_config).parsers) {
             if (std::holds_alternative<std::string>(parser_variant)) {
                 auto parser_name = std::get<std::string>(parser_variant);
-                if (ParserBase::registered_parsers.find(parser_name) == ParserBase::registered_parsers.end()) {
+                auto parser = ParserBase::get_parser(parser_name);
+                if (!parser) {
                     OPENVINO_THROW("Parser with name ", parser_name, " is not registered");
                 }
-                parsers.push_back(ParserBase::registered_parsers[parser_name]);
+                parsers.push_back(parser);
             } else if (std::holds_alternative<std::shared_ptr<ParserBase>>(parser_variant)) {
                 auto parser = std::get<std::shared_ptr<ParserBase>>(parser_variant);
                 parsers.push_back(parser);
@@ -238,28 +238,18 @@ DecodedResults LLMPipeline::generate(
     
     res.parsed.resize(res.texts.size());
 
-    // BaseParsers have priority over IncrementalParsers
+    // Apply Base parsers sequentially even if IncrementalParser has run.
     if (!parsers.empty()) {
         for (size_t i = 0; i < res.texts.size(); ++i) {
             auto& message = res.texts[i];
-            ParsedMessage msg;
+            ParsedMessage& msg = res.parsed[i];
             for (auto& parser: parsers) {
+                // TODO: check if is_active() is needed here
+                // TODO: Check the state of incremental parser and reset if necessary
                 msg = parser->parse(msg);
             }
             res.parsed[i] = msg;
         }
-        return res;
-    }
-
-    // At this place we have only IncrementalParsers
-    for (size_t i = 0; i < res.texts.size(); ++i) {
-        auto& message = res.texts[i];
-        ParsedMessage msg;
-        for (auto& parser: incremental_parsers) {
-            // Previous is and empty message because we populate message with the full generated text.
-            msg = parser->parse(msg, "", message);
-        }
-        res.parsed[i] = msg;
     }
 
     return res;
diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp
index ed4fcca3a5..2a89716370 100644
--- a/src/cpp/src/parsers.cpp
+++ b/src/cpp/src/parsers.cpp
@@ -13,14 +13,14 @@ using json = nlohmann::json;
 
 namespace ov::genai {
 
-bool DeepSeekR1ReasoningParser::is_active() const {
+bool ReasoningParser::is_active() const {
     return !m_deactivated;
 }
 
-ParsedMessage DeepSeekR1ReasoningParser::parse(
+std::string ReasoningParser::parse(
     ParsedMessage& msg,
     const std::string& previous_text, 
-    const std::string& delta_text,
+    std::string& delta_text,
     const std::optional<std::vector<int64_t>>& previous_tokens, 
     const std::optional<std::vector<int64_t>>& delta_tokens
 ) {
@@ -31,10 +31,6 @@ ParsedMessage DeepSeekR1ReasoningParser::parse(
         msg["content"] = "";
     }
     
-    if (m_deactivated) {
-        msg["content"] += delta_text;
-        return msg;
-    }
     bool think_tag_closed = delta_text.find(m_close_tag) != std::string::npos;
 
     if (!m_think_tag_opened && delta_text.find(m_open_tag) != std::string::npos && !m_starts_with_thinking) {
@@ -42,21 +38,30 @@ ParsedMessage DeepSeekR1ReasoningParser::parse(
         auto think_idx = delta_text.find(m_open_tag);
         msg["reasoning_content"] += delta_text.substr(think_idx + std::string(m_open_tag).size(), delta_text.size() - (think_idx + std::string(m_open_tag).size()));
         m_think_tag_opened = true;
+        if (!m_keep_original_content) {
+            delta_text = "";
+        }
     } else if ((m_think_tag_opened || m_starts_with_thinking) && delta_text.find(m_close_tag) != std::string::npos) {
         auto think_idx = delta_text.find(m_close_tag);
         msg["reasoning_content"] += delta_text.substr(0, think_idx);
         msg["content"] += delta_text.substr(think_idx + std::string(m_close_tag).size(), delta_text.size() - (think_idx + std::string(m_close_tag).size()));
         m_think_tag_opened = false;
         m_deactivated = true;
+        if (!m_keep_original_content) {
+            delta_text = delta_text.substr(think_idx + std::string(m_close_tag).size(), delta_text.size() - (think_idx + std::string(m_close_tag).size()));
+        }
     } else if (m_think_tag_opened) {
         msg["reasoning_content"] += delta_text;
-    } 
+        if (!m_keep_original_content) {
+            delta_text = "";
+        }
+    } // TODO: add case when <think> and </think> are in the same delta_text
     
-    return msg;
+    return delta_text;
 }
 
 
-ParsedMessage Llama32PythonicParser::parse(ParsedMessage& input) {
+ParsedMessage Llama32PythonicToolParser::parse(ParsedMessage& input) {
     // Input example
     // string input = "[get_weather(location='New York, NY', unit='celsius')]<|eom_id|>";
 
@@ -116,29 +121,51 @@ ParsedMessage BaseReasoningParser::parse(ParsedMessage& input) {
     return res;
 }
 
-std::map<std::string, std::shared_ptr<IncrementalParserBase>> IncrementalParserBase::registered_parsers;
-std::map<std::string, std::shared_ptr<ParserBase>> ParserBase::registered_parsers;
+std::map<std::string, std::function<std::shared_ptr<IncrementalParserBase>()>> registered_incremental_parsers;
+std::map<std::string, std::function<std::shared_ptr<ParserBase>()>> registered_base_parsers;
 
 // static initializer to register available buildin parsers
 static bool register_backends() {
-    IncrementalParserBase::registered_parsers[DeepSeekR1ReasoningParser::name()] = std::make_shared<DeepSeekR1ReasoningParser>();
-    IncrementalParserBase::registered_parsers[DeepSeekR1ReasoningParser::name()] = std::make_shared<DeepSeekR1ReasoningParser>();
-    IncrementalParserBase::registered_parsers["Phi-4-reasoning"] = std::make_shared<DeepSeekR1ReasoningParser>(/*starts_with_thinking*/ false);
+    registered_incremental_parsers[DeepSeekR1ReasoningParser::name()] = []() { return std::make_shared<DeepSeekR1ReasoningParser>(/*starts_with_thinking*/ true); };
+    registered_incremental_parsers[Phi4ReasoningParser::name()] = []() { return std::make_shared<Phi4ReasoningParser>(/*starts_with_thinking*/ false); };
+    
+    registered_base_parsers[Llama32PythonicToolParser::name()] = []() { return std::make_shared<Llama32PythonicToolParser>(); };
 
-    ParserBase::registered_parsers[Llama32PythonicParser::name()] = std::make_shared<Llama32PythonicParser>();
+    // TODO: Add more parsers and register them here.
     return true;
 }
 
 // Ensure the backends are registered before main
 static bool are_backends_registered = register_backends();
 
-static std::vector<std::string> get_parsers_names() {
+std::shared_ptr<IncrementalParserBase> IncrementalParserBase::get_parser(std::string name) {
+    if (!are_backends_registered) {
+        register_backends();
+    }
+
+    if (registered_incremental_parsers.find(name) != registered_incremental_parsers.end()) {
+        return registered_incremental_parsers[name]();
+    }
+    return nullptr;
+}
+
+std::shared_ptr<ParserBase> ParserBase::get_parser(std::string name) {
     if (!are_backends_registered) {
         register_backends();
     }
 
+    if (registered_base_parsers.find(name) != registered_base_parsers.end()) {
+        return registered_base_parsers[name]();
+    }
+    return nullptr;
+}
+
+static std::vector<std::string> get_parsers_names() {
     std::vector<std::string> names;
-    for (const auto& [name, _] : IncrementalParserBase::registered_parsers) {
+    for (const auto& [name, _] : registered_incremental_parsers) {
+        names.push_back(name);
+    }
+    for (const auto& [name, _] : registered_base_parsers) {
         names.push_back(name);
     }
     return names;
diff --git a/src/cpp/src/parsers.hpp b/src/cpp/src/parsers.hpp
deleted file mode 100644
index 21f474e089..0000000000
--- a/src/cpp/src/parsers.hpp
+++ /dev/null
@@ -1,48 +0,0 @@
-// Copyright (C) 2023-2025 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#pragma once
-#include "openvino/genai/text_streamer.hpp"
-
-namespace ov {
-namespace genai {
-
-struct DeltaToolCall; // Forward declaration, define as needed
-
-struct DeltaMessage {
-    std::optional<std::string> role;
-    std::optional<std::string> content;
-    std::optional<std::string> reasoning_content;
-    // std::vector<DeltaToolCall> tool_calls;
-
-    DeltaMessage()
-        : role(std::nullopt),
-          content(std::nullopt),
-          reasoning_content(std::nullopt) {}
-};
-
-class TextParserStreamer : public ov::genai::TextStreamer {
-public:
-    TextParserStreamer(const Tokenizer& tokenizer);
-
-    StreamingStatus write(const DeltaMessage& message);
-
-    ov::genai::CallbackTypeVariant write(std::string message);
-};
-
-class ReasoningParserBase {
-public:
-    ReasoningParserBase() = default;
-
-    void parse(const std::string& text);
-};
-
-class ToolCallingParserBase {
-public:
-    ToolCallingParserBase() = default;
-
-    void parse(const std::string& text);
-};
-
-}  // namespace genai
-}  // namespace ov
diff --git a/src/cpp/src/text_streamer.cpp b/src/cpp/src/text_streamer.cpp
index a5238b4a35..3b1519e42d 100644
--- a/src/cpp/src/text_streamer.cpp
+++ b/src/cpp/src/text_streamer.cpp
@@ -131,29 +131,25 @@ TextParserStreamer::TextParserStreamer(const Tokenizer& tokenizer, std::vector<P
         for (auto& parser : parsers) {
             if (std::holds_alternative<std::string>(parser)) {
                 auto parser_name = std::get<std::string>(parser);
-                if (IncrementalParserBase::registered_parsers.find(parser_name) == IncrementalParserBase::registered_parsers.end()) {
+                auto parser = IncrementalParserBase::get_parser(parser_name);
+                if (!parser) {
                     OPENVINO_THROW("Parser with name " + parser_name + " is not registered");
                 }
-                m_parsers.push_back(IncrementalParserBase::registered_parsers[parser_name]);
+                m_parsers.push_back(parser);
             } else {
                 m_parsers.push_back(std::get<std::shared_ptr<IncrementalParserBase>>(parser));
             }
         }
     }
 
-StreamingStatus TextParserStreamer::write(ParsedMessage& message) {
-    if (message.find("content") != message.end()) {
-        std::cout << message.at("content") << std::endl;
-    }
-    return StreamingStatus::RUNNING;
-}
-
 CallbackTypeVariant TextParserStreamer::write(std::string message) {
     for (auto& parser: m_parsers) {
-        // if (parser->is_active()) {
-            m_parsed_message = parser->parse(m_parsed_message, m_text_buffer, message);
-        // }
-        // m_parsed_message["content"] += message;
+        if (parser->is_active()) {
+            message = parser->parse(m_parsed_message, m_text_buffer, message);
+        }
+        // Message can be modified inside parser, if parser for example extracted tool calling from message content
+        // but parser 
+        m_parsed_message["content"] += message;
     }
 
     m_text_buffer = message;
diff --git a/src/python/py_parsers.cpp b/src/python/py_parsers.cpp
index 1d3f066334..d2ff74b47d 100644
--- a/src/python/py_parsers.cpp
+++ b/src/python/py_parsers.cpp
@@ -26,15 +26,15 @@ namespace {
 
 class ConstructableIncrementalParserBase: public IncrementalParserBase {
 public:
-    ParsedMessage parse(
+    std::string parse(
         ParsedMessage& msg,
         const std::string& previous_text, 
-        const std::string& delta_text, 
+        std::string& delta_text, 
         const std::optional<std::vector<int64_t>>& previous_tokens = std::nullopt, 
         const std::optional<std::vector<int64_t>>& delta_tokens = std::nullopt
     ) override {
         PYBIND11_OVERRIDE_PURE(
-            ParsedMessage,  // Return type
+            std::string,  // Return type
             IncrementalParserBase,  // Parent class
             parse,  // Name of function in C++ (must match Python name)
             msg,
@@ -68,6 +68,7 @@ class ConstructableParserBase: public ParserBase {
 
 } // namespace
 
+// TODO: double check/add more relevant docstrings for parsers.
 void init_parsers(py::module_& m) {
     py::class_<IncrementalParserBase, ConstructableIncrementalParserBase, std::shared_ptr<IncrementalParserBase>>(m, "IncrementalParserBase")
         .def(py::init<>())
@@ -87,5 +88,4 @@ void init_parsers(py::module_& m) {
             &ParserBase::parse,
             "Parse is called with the full text. Returns a ParsedMessage with parsed content.",
             py::arg("text"));
-            
-}
\ No newline at end of file
+}
diff --git a/src/python/py_streamers.cpp b/src/python/py_streamers.cpp
index e643666b7e..84b531b9f5 100644
--- a/src/python/py_streamers.cpp
+++ b/src/python/py_streamers.cpp
@@ -128,7 +128,8 @@ void init_streamers(py::module_& m) {
                 }
             },
             py::arg("token"));
-
+        
+    // TODO: double check/add more relevant docstrings for TextParserStreamer.
     py::class_<TextParserStreamer, ConstructableTextParserStreamer, std::shared_ptr<TextParserStreamer>, TextStreamer>(m, "TextParserStreamer")
         .def(py::init([](const Tokenizer& tokenizer,
                          std::vector<std::variant<std::shared_ptr<IncrementalParserBase>, std::string>> parsers) {
@@ -137,10 +138,10 @@ void init_streamers(py::module_& m) {
             py::arg("tokenizer"),
             py::arg("parsers") = std::vector<std::variant<std::shared_ptr<IncrementalParserBase>, std::string>>({}),
             "TextParserStreamer is used to decode tokens into text, parse the text and call user-defined incremental parsers.")
-        // .def("write",
-        //      py::overload_cast<ParsedMessage&>(&TextParserStreamer::write),
-        //      py::arg("message"),
-        //      "Write is called with a ParsedMessage. Returns StreamingStatus.")
+        .def("write",
+             py::overload_cast<ParsedMessage&>(&TextParserStreamer::write),
+             py::arg("message"),
+             "Write is called with a ParsedMessage. Returns StreamingStatus.")
         .def("write",
              py::overload_cast<std::string>(&TextParserStreamer::write),
              py::arg("message"),
diff --git a/tests/cpp/parser.cpp b/tests/cpp/parser.cpp
index 0018dbd461..f4d7922b7c 100644
--- a/tests/cpp/parser.cpp
+++ b/tests/cpp/parser.cpp
@@ -44,7 +44,7 @@ TEST(ParserTest, test_llama32_parser_1) {
             }}
         }
     });
-    std::shared_ptr<Llama32PythonicParser> parser = std::make_shared<Llama32PythonicParser>();
+    std::shared_ptr<Llama32PythonicToolParser> parser = std::make_shared<Llama32PythonicToolParser>();
     
     nlohmann::json res = run_parser_test(parser, prompt, expected);
     
@@ -67,7 +67,7 @@ TEST(ParserTest, test_llama32_parser_2) {
             }}
         }
     });
-    auto parser = std::make_shared<Llama32PythonicParser>(/*keep_original_content*/ false);
+    auto parser = std::make_shared<Llama32PythonicToolParser>(/*keep_original_content*/ false);
 
     nlohmann::json res = run_parser_test(parser, prompt, expected);
 
@@ -112,7 +112,7 @@ TEST(ParserTest, test_reasoning_parser_2) {
 
 class DeepSeekR1ReasoningParserTest : public ::testing::Test {
 protected:
-    ov::genai::DeepSeekR1ReasoningParser parser;
+    ov::genai::ReasoningParser parser;
     ParsedMessage msg;
 };
 
@@ -131,10 +131,14 @@ TEST_F(DeepSeekR1ReasoningParserTest, ReasoningContentAccumulatesAcrossCalls) {
     std::string ref_res = "First, I recognize that the question is asking for the sum of 2 and 1.\n\nI know that addition involves combining two numbers to find their total.\n\nStarting with 2, I add 1 to it.\n\n2 plus 1 equals 3.\n";
     
     ParsedMessage msg;
+    
+    
     for (int i = 1; i < input_stream.size(); i++) {
         std::string previous_text = input_stream[i - 1];
         std::string delta_text = input_stream[i];
-        msg = parser.parse(msg, previous_text, delta_text);
+        delta_text = parser.parse(msg, previous_text, delta_text);
     }
     ASSERT_EQ(msg["reasoning_content"], ref_res);
 }
+
+// TODO: add tests when streamer is called directly instead of manual subsequent calling of parsers.
diff --git a/tests/python_tests/test_parsers.py b/tests/python_tests/test_parsers.py
index d8a323f3bc..90f7ad93fd 100644
--- a/tests/python_tests/test_parsers.py
+++ b/tests/python_tests/test_parsers.py
@@ -116,3 +116,5 @@ def write(self, message):
     
     assert msg['reasoning_content'] == think_content
     assert msg['content'] == content
+
+# TODO: add tests when streamer is called directly instead of manual subsequent calling of parsers.
diff --git a/tests/python_tests/test_text_streamer.py b/tests/python_tests/test_text_streamer.py
index 75804256b1..9b834bf3b7 100644
--- a/tests/python_tests/test_text_streamer.py
+++ b/tests/python_tests/test_text_streamer.py
@@ -72,6 +72,20 @@ def test_text_prompts(tmp_path, prompt, model_id):
         streamer.write(token)
     streamer.end()
 
+    class CurrentStremaer(BaseStreamer):
+        def write(self, token_chunk):
+            pass
+
+    class CurrentParsingStreamer(TextParserStreamer):
+        def write(self, word: str):
+            msg: ParsedMessage = get_current_message()
+            
+            
+    streamer = lambda x: print(x)
+
+    streamer = TextStreamer(ov_tokenizer, lambda x: print(x))
+
+    
     assert ''.join(accumulated) == ov_tokenizer.decode(tokens)
 
     for chunk_size in [1,2,3,4,5]:

From 219827a415fe2f4cd3d4ed91cf82f6150be1aa66 Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Tue, 14 Oct 2025 17:23:40 +0200
Subject: [PATCH 06/43] use JsonContainer

---
 .../text_generation/parsed_output_sample.cpp  |   2 +-
 samples/python/text_generation/chat_sample.py |  33 +++++
 .../include/openvino/genai/llm_pipeline.hpp   |   4 +-
 src/cpp/include/openvino/genai/parsers.hpp    |  21 ++--
 src/cpp/src/llm/pipeline.cpp                  |   5 +-
 src/cpp/src/parsers.cpp                       | 116 ++++++++++++------
 src/cpp/src/text_streamer.cpp                 |   4 +-
 src/python/py_openvino_genai.cpp              |   3 +-
 src/python/py_parsers.cpp                     |  34 +++--
 src/python/py_streamers.cpp                   |   6 +-
 tests/cpp/parser.cpp                          |  24 +---
 tests/python_tests/test_parsers.py            |  92 +++++++++-----
 tests/python_tests/test_text_streamer.py      |   2 +-
 13 files changed, 229 insertions(+), 117 deletions(-)

diff --git a/samples/cpp/text_generation/parsed_output_sample.cpp b/samples/cpp/text_generation/parsed_output_sample.cpp
index 1b481a2a4a..6efa64ee5e 100644
--- a/samples/cpp/text_generation/parsed_output_sample.cpp
+++ b/samples/cpp/text_generation/parsed_output_sample.cpp
@@ -12,7 +12,7 @@ class CurrentStreamer : public ov::genai::TextParserStreamer {
     CurrentStreamer(const ov::genai::Tokenizer& tokenizer)
         : ov::genai::TextParserStreamer(tokenizer) {}
     ov::genai::StreamingStatus write(ov::genai::ParsedMessage& message) {
-       std::cout << message.at("content");
+       std::cout << message["content"].get_string() << std::flush;
         return ov::genai::StreamingStatus::RUNNING;
     }
 };
diff --git a/samples/python/text_generation/chat_sample.py b/samples/python/text_generation/chat_sample.py
index e4067c49f3..b852141d3c 100755
--- a/samples/python/text_generation/chat_sample.py
+++ b/samples/python/text_generation/chat_sample.py
@@ -36,3 +36,36 @@ def main():
 
 if '__main__' == __name__:
     main()
+
+    pipe = openvino_genai.LLMPipeline(args.model_dir, device)
+
+    prompt = "What is the weather in New York today?"
+    res = pipe.generate(prompt, max_new_tokens=100, streamer=streamer)
+    print(res.texts[0])
+
+    res.parsed['tool_caling']
+
+    class LlamaToolCallParser(ParserBase):
+        def parse(self, parsed_data: ParsedData) -> ParsedData:
+            # parsed_data 
+            # process parsed_data 
+            # e.g. extract tool calls, or other fields from content
+            return new_parsed_output
+
+    llama_parser = LlamaToolCallParser()
+    res = pipe.generate(prompt, parsers=[llama_parser | "LLama3.2Pythonic"], max_new_tokens=100)
+
+# At the beginning msg['original_content'] is filled with full text
+msg = res.texts[i]
+for parser in m_parsers:
+    msg = parser.parse(msg)
+
+# At the end msg is filled with all parsed fields
+parsed_data = {
+    'original_content': '<|system|>You are a helpful assistant... I will call the `get_weather` function with the location… \n\nfunctools[{"name": "get_weather", "arguments": {"location": "New York", "unit": "celsius"}}]<|end|>',
+    'content': 'blah blah', 
+    'reasoning_content': '', 
+    'tool_calls': "[{\"name\":\"get_weather\",\"arguments\":{\"location\":\"New York, NY\",\"unit\":\"celsius\"}}]",
+}
+
+res.parsed: ParsedData
\ No newline at end of file
diff --git a/src/cpp/include/openvino/genai/llm_pipeline.hpp b/src/cpp/include/openvino/genai/llm_pipeline.hpp
index 4f2c8405f1..6385ced995 100644
--- a/src/cpp/include/openvino/genai/llm_pipeline.hpp
+++ b/src/cpp/include/openvino/genai/llm_pipeline.hpp
@@ -15,6 +15,7 @@
 #include "openvino/genai/perf_metrics.hpp"
 #include "openvino/genai/scheduler_config.hpp"
 #include "openvino/genai/common_types.hpp"
+#include "openvino/genai/json_container.hpp"
 
 namespace ov {
 namespace genai {
@@ -68,7 +69,8 @@ class DecodedResults {
     std::vector<float> scores;
     PerfMetrics perf_metrics;
     std::shared_ptr<ExtendedPerfMetrics> extended_perf_metrics;
-    std::vector<ParsedMessage> parsed;
+    // std::vector<ParsedMessage> parsed;
+    std::vector<JsonContainer> parsed;
 
     // @brief Convert DecodedResults to a string.
     operator std::string() const {
diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp
index 020176d117..479dd2d4de 100644
--- a/src/cpp/include/openvino/genai/parsers.hpp
+++ b/src/cpp/include/openvino/genai/parsers.hpp
@@ -9,12 +9,14 @@
 #include <functional>
 #include <optional>
 #include <vector>
+#include "openvino/genai/json_container.hpp"
 
 namespace ov {
 namespace genai {
 
-
-using ParsedMessage = std::map<std::string, std::string>;
+// TODO: will be converted to JSONLike object
+// using ParsedMessage = std::map<std::string, std::string>;
+using ParsedMessage = JsonContainer;
 
 class IncrementalParserBase {
 public:
@@ -33,20 +35,15 @@ class IncrementalParserBase {
     static std::shared_ptr<IncrementalParserBase> get_parser(std::string name);
 };
 
+// Forward declaration
+class ReasoningParserImpl;
+
 class ReasoningParser : public IncrementalParserBase {
 private:
-    bool m_starts_with_thinking = true;
-    bool m_keep_original_content = true;
-    bool m_think_tag_opened = false;
-    bool m_deactivated = false;
-    std::string m_open_tag = "<think>";
-    std::string m_close_tag = "</think>";
+    std::shared_ptr<ReasoningParserImpl> m_impl;
 public:
     ReasoningParser(bool starts_with_thinking = true,
-                    bool keep_original_content = true)
-        : m_starts_with_thinking(starts_with_thinking),
-          m_keep_original_content(keep_original_content) {}
-    std::map<std::string, std::string> accumulated_parsed;
+                    bool keep_original_content = true);
 
     std::string parse(
         ParsedMessage& msg,
diff --git a/src/cpp/src/llm/pipeline.cpp b/src/cpp/src/llm/pipeline.cpp
index 43f90980a6..41b678d388 100644
--- a/src/cpp/src/llm/pipeline.cpp
+++ b/src/cpp/src/llm/pipeline.cpp
@@ -241,14 +241,13 @@ DecodedResults LLMPipeline::generate(
     // Apply Base parsers sequentially even if IncrementalParser has run.
     if (!parsers.empty()) {
         for (size_t i = 0; i < res.texts.size(); ++i) {
-            auto& message = res.texts[i];
-            ParsedMessage& msg = res.parsed[i];
+            ParsedMessage msg;
+            msg["content"] = res.texts[i];
             for (auto& parser: parsers) {
                 // TODO: check if is_active() is needed here
                 // TODO: Check the state of incremental parser and reset if necessary
                 msg = parser->parse(msg);
             }
-            res.parsed[i] = msg;
         }
     }
 
diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp
index 2a89716370..01560941cb 100644
--- a/src/cpp/src/parsers.cpp
+++ b/src/cpp/src/parsers.cpp
@@ -13,8 +13,72 @@ using json = nlohmann::json;
 
 namespace ov::genai {
 
-bool ReasoningParser::is_active() const {
-    return !m_deactivated;
+class ReasoningParserImpl {
+private:
+    bool m_starts_with_thinking = true;
+    bool m_keep_original_content = true;
+    bool m_think_tag_opened = false;
+    std::string m_open_tag = "<think>";
+    std::string m_close_tag = "</think>";
+    std::map<std::string, std::string> accumulated_parsed;
+public:
+    bool m_deactivated = false;
+    ReasoningParserImpl() = default;
+    ReasoningParserImpl(bool starts_with_thinking = true,
+                    bool keep_original_content = true)
+        : m_starts_with_thinking(starts_with_thinking),
+          m_keep_original_content(keep_original_content) {}
+
+    std::string parse(
+        ParsedMessage&  msg,
+        const std::string& previous_text, 
+        std::string& delta_text,
+        const std::optional<std::vector<int64_t>>& previous_tokens, 
+        const std::optional<std::vector<int64_t>>& delta_tokens
+    ) {
+        if (msg["reasoning_content"].is_null()) {
+            msg["reasoning_content"] = "";
+        }
+        if (msg["content"].is_null()) {
+            msg["content"] = "";
+        }
+        
+        bool think_tag_closed = delta_text.find(m_close_tag) != std::string::npos;
+        if (m_starts_with_thinking) {
+            m_think_tag_opened = true;
+        }
+        
+        if (!m_think_tag_opened && delta_text.find(m_open_tag) != std::string::npos && !m_starts_with_thinking) {
+            // Thinking has started
+            auto think_idx = delta_text.find(m_open_tag);
+            auto lvalue = msg["reasoning_content"].get_string();
+            msg["reasoning_content"] = lvalue + delta_text.substr(think_idx + std::string(m_open_tag).size(), delta_text.size() - (think_idx + std::string(m_open_tag).size()));
+            m_think_tag_opened = true;
+            if (!m_keep_original_content) {
+                delta_text = "";
+            }
+        } else if (m_think_tag_opened && delta_text.find(m_close_tag) != std::string::npos) {
+            auto think_idx = delta_text.find(m_close_tag);
+            msg["reasoning_content"] = msg["reasoning_content"].get_string() + delta_text.substr(0, think_idx);
+            msg["content"] = msg["content"].get_string() + delta_text.substr(think_idx + std::string(m_close_tag).size(), delta_text.size() - (think_idx + std::string(m_close_tag).size()));
+            m_think_tag_opened = false;
+            m_deactivated = true;
+            if (!m_keep_original_content) {
+                delta_text = delta_text.substr(think_idx + std::string(m_close_tag).size(), delta_text.size() - (think_idx + std::string(m_close_tag).size()));
+            }
+        } else if (m_think_tag_opened) {
+            msg["reasoning_content"] = msg["reasoning_content"].get_string() + delta_text;
+            if (!m_keep_original_content) {
+                delta_text = "";
+            }
+        } // TODO: add case when <think> and </think> are in the same delta_text
+        
+        return delta_text;
+    }
+};
+
+ReasoningParser::ReasoningParser(bool starts_with_thinking, bool keep_original_content) {
+    m_impl = std::make_shared<ReasoningParserImpl>(starts_with_thinking, keep_original_content);
 }
 
 std::string ReasoningParser::parse(
@@ -24,42 +88,12 @@ std::string ReasoningParser::parse(
     const std::optional<std::vector<int64_t>>& previous_tokens, 
     const std::optional<std::vector<int64_t>>& delta_tokens
 ) {
-    if (msg.find("reasoning_content") == msg.end()) {
-        msg["reasoning_content"] = "";
-    }
-    if (msg.find("content") == msg.end()) {
-        msg["content"] = "";
-    }
-    
-    bool think_tag_closed = delta_text.find(m_close_tag) != std::string::npos;
-
-    if (!m_think_tag_opened && delta_text.find(m_open_tag) != std::string::npos && !m_starts_with_thinking) {
-        // Thinking has started
-        auto think_idx = delta_text.find(m_open_tag);
-        msg["reasoning_content"] += delta_text.substr(think_idx + std::string(m_open_tag).size(), delta_text.size() - (think_idx + std::string(m_open_tag).size()));
-        m_think_tag_opened = true;
-        if (!m_keep_original_content) {
-            delta_text = "";
-        }
-    } else if ((m_think_tag_opened || m_starts_with_thinking) && delta_text.find(m_close_tag) != std::string::npos) {
-        auto think_idx = delta_text.find(m_close_tag);
-        msg["reasoning_content"] += delta_text.substr(0, think_idx);
-        msg["content"] += delta_text.substr(think_idx + std::string(m_close_tag).size(), delta_text.size() - (think_idx + std::string(m_close_tag).size()));
-        m_think_tag_opened = false;
-        m_deactivated = true;
-        if (!m_keep_original_content) {
-            delta_text = delta_text.substr(think_idx + std::string(m_close_tag).size(), delta_text.size() - (think_idx + std::string(m_close_tag).size()));
-        }
-    } else if (m_think_tag_opened) {
-        msg["reasoning_content"] += delta_text;
-        if (!m_keep_original_content) {
-            delta_text = "";
-        }
-    } // TODO: add case when <think> and </think> are in the same delta_text
-    
-    return delta_text;
+    return m_impl->parse(msg, previous_text, delta_text, previous_tokens, delta_tokens);
 }
 
+bool ReasoningParser::is_active() const {
+    return !m_impl->m_deactivated;
+}
 
 ParsedMessage Llama32PythonicToolParser::parse(ParsedMessage& input) {
     // Input example
@@ -67,7 +101,7 @@ ParsedMessage Llama32PythonicToolParser::parse(ParsedMessage& input) {
 
     // Regex to capture the [...] part
     smatch m;
-    const std::string& text = input.at("content");
+    const std::string& text = input["content"].get_string();
     regex r(R"(\[.*?\])");
     if (regex_search(text, m, r)) {
         // Strip outer [ ]
@@ -92,7 +126,8 @@ ParsedMessage Llama32PythonicToolParser::parse(ParsedMessage& input) {
         if (!m_keep_original_content) {
             input["content"] = regex_replace(text, r, "");
         }
-        input["tool_calls"] = j.dump();
+        std::cout << j.dump() << std::endl;
+        input["tool_calls"] = ParsedMessage::from_json_string(j.dump());
         return input;
     }
     return ParsedMessage{};
@@ -101,7 +136,8 @@ ParsedMessage Llama32PythonicToolParser::parse(ParsedMessage& input) {
 ParsedMessage BaseReasoningParser::parse(ParsedMessage& input) {
     ParsedMessage res;
     std::string reasoning_content;
-    const std::string& content = input.at("content");
+    // auto content = input["content"];
+    std::string content = input["content"].get_string();
     res["content"] = content;
 
     size_t start = content.find(m_open_tag);
@@ -171,4 +207,6 @@ static std::vector<std::string> get_parsers_names() {
     return names;
 }
 
+
+
 } // namespace ov::genai
diff --git a/src/cpp/src/text_streamer.cpp b/src/cpp/src/text_streamer.cpp
index 3b1519e42d..5ffc34689f 100644
--- a/src/cpp/src/text_streamer.cpp
+++ b/src/cpp/src/text_streamer.cpp
@@ -148,8 +148,8 @@ CallbackTypeVariant TextParserStreamer::write(std::string message) {
             message = parser->parse(m_parsed_message, m_text_buffer, message);
         }
         // Message can be modified inside parser, if parser for example extracted tool calling from message content
-        // but parser 
-        m_parsed_message["content"] += message;
+        // but parser
+        m_parsed_message["content"] = m_parsed_message["content"].get_string() + message;
     }
 
     m_text_buffer = message;
diff --git a/src/python/py_openvino_genai.cpp b/src/python/py_openvino_genai.cpp
index ed4aa8d3af..aa2854480c 100644
--- a/src/python/py_openvino_genai.cpp
+++ b/src/python/py_openvino_genai.cpp
@@ -93,6 +93,7 @@ PYBIND11_MODULE(py_openvino_genai, m) {
         .def(py::init<>())
         .def_property_readonly("texts", [](const DecodedResults &dr) -> py::typing::List<py::str> { return pyutils::handle_utf8((std::vector<std::string>)dr); })
         .def_readonly("scores", &DecodedResults::scores)
+        .def_readonly("parsed", &DecodedResults::parsed)
         .def_readonly("perf_metrics", &DecodedResults::perf_metrics)
         .def_readonly("extended_perf_metrics", &DecodedResults::extended_perf_metrics)
         .def("__str__", [](const DecodedResults &dr) -> py::str {
@@ -115,10 +116,10 @@ PYBIND11_MODULE(py_openvino_genai, m) {
         .def_readonly("extended_perf_metrics", &EncodedResults::extended_perf_metrics);
 
     init_lora_adapter(m);
+    init_parsers(m);
     init_generation_config(m);
     init_tokenizer(m);
     init_streamers(m);
-    init_parsers(m);
 
     init_llm_pipeline(m);
     init_continuous_batching_pipeline(m);
diff --git a/src/python/py_parsers.cpp b/src/python/py_parsers.cpp
index d2ff74b47d..2cf26e4b19 100644
--- a/src/python/py_parsers.cpp
+++ b/src/python/py_parsers.cpp
@@ -9,6 +9,7 @@
 
 #include "openvino/genai/parsers.hpp"
 #include "py_utils.hpp"
+#include "openvino/genai/json_container.hpp"
 
 namespace py = pybind11;
 
@@ -18,6 +19,7 @@ using ov::genai::ParserVariant;
 using ov::genai::ParserBase;
 using ov::genai::Tokenizer;
 using ov::genai::StreamingStatus;
+using ov::genai::JsonContainer;
 
 namespace pyutils = ov::genai::pybind::utils;
 
@@ -72,15 +74,31 @@ class ConstructableParserBase: public ParserBase {
 void init_parsers(py::module_& m) {
     py::class_<IncrementalParserBase, ConstructableIncrementalParserBase, std::shared_ptr<IncrementalParserBase>>(m, "IncrementalParserBase")
         .def(py::init<>())
-        .def("parse",
-            &IncrementalParserBase::parse,
-            "Parse is called every time new text delta is decoded. Returns a ParsedMessage with parsed content.",
-            py::arg("msg"),
-            py::arg("previous_text"),
-            py::arg("delta_text"),
-            py::arg("previous_tokens") = std::nullopt,
-            py::arg("delta_tokens") = std::nullopt)
+        .def("parse", [](IncrementalParserBase& self,
+                         py::dict& msg,
+                         std::string& previous_text,
+                         std::string& delta_text,
+                         const std::optional<std::vector<int64_t>>& previous_tokens = std::nullopt,
+                         const std::optional<std::vector<int64_t>>& delta_tokens = std::nullopt) {
+            // TODO: optimize conversion between py::dict and ParsedMessage
+            auto msg_anymap = ov::genai::pybind::utils::py_object_to_any_map(msg);
+            auto msg_cpp = JsonContainer(msg_anymap);
+
+
+            auto res = self.parse(msg_cpp, previous_text, delta_text, previous_tokens, delta_tokens);
+            msg.clear();
+            
+            auto json_obj = msg_cpp.to_json();
+            for (auto it = json_obj.begin(); it != json_obj.end(); ++it) {
+                msg[py::cast(it.key())] = py::cast(it.value());
+            }
+
+            return res;
+        }, py::arg("msg"), py::arg("previous_text"), py::arg("delta_text"),
+           py::arg("previous_tokens") = std::nullopt, py::arg("delta_tokens") = std::nullopt,
+           "Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output.")
         .def("is_active", &IncrementalParserBase::is_active, "Indicates whether the parser is active and should be used during parsing.");
+
         
         py::class_<ParserBase, ConstructableParserBase, std::shared_ptr<ParserBase>>(m, "ParserBase")
         .def(py::init<>())
diff --git a/src/python/py_streamers.cpp b/src/python/py_streamers.cpp
index 84b531b9f5..2ea1f72880 100644
--- a/src/python/py_streamers.cpp
+++ b/src/python/py_streamers.cpp
@@ -75,7 +75,7 @@ class ConstructableTextParserStreamer: public TextParserStreamer {
     using TextParserStreamer::TextParserStreamer;  // inherit base constructors
 
     StreamingStatus write(ParsedMessage& message) override {
-        PYBIND11_OVERRIDE(
+        PYBIND11_OVERRIDE_PURE(
             StreamingStatus,  // Return type
             TextParserStreamer,  // Parent class
             write,  // Name of function in C++ (must match Python name)
@@ -142,10 +142,10 @@ void init_streamers(py::module_& m) {
              py::overload_cast<ParsedMessage&>(&TextParserStreamer::write),
              py::arg("message"),
              "Write is called with a ParsedMessage. Returns StreamingStatus.")
-        .def("write",
+        .def("_write",
              py::overload_cast<std::string>(&TextParserStreamer::write),
              py::arg("message"),
-             "Write is called with a string message. Returns CallbackTypeVariant.")
+             "Write is called with a string message. Returns CallbackTypeVariant. This is a private method.")
         
         .def("get_parsed_message", &TextParserStreamer::get_parsed_message, "Get the current parsed message")
 
diff --git a/tests/cpp/parser.cpp b/tests/cpp/parser.cpp
index f4d7922b7c..dc15ac1482 100644
--- a/tests/cpp/parser.cpp
+++ b/tests/cpp/parser.cpp
@@ -9,22 +9,10 @@
 
 using namespace ov::genai;
 
-nlohmann::json convert_to_json(const ParsedMessage& msg) {
-    nlohmann::json j;
-    for (const auto& [key, value] : msg) {
-        if (key == "tool_calls") {
-            j[key] = nlohmann::json::parse(value);
-            continue;
-        }
-        j[key] = value;
-    }
-    return j;
-}
-
-nlohmann::json run_parser_test(std::shared_ptr<ParserBase> parser, const std::string& prompt, const nlohmann::json& expected) {
+nlohmann::json run_parser_test(std::shared_ptr<ParserBase> parser, const std::string& prompt) {
     ParsedMessage input;
     input["content"] = prompt;
-    return convert_to_json(parser->parse(input));
+    return (parser->parse(input)).to_json();
 }
 
 
@@ -46,7 +34,7 @@ TEST(ParserTest, test_llama32_parser_1) {
     });
     std::shared_ptr<Llama32PythonicToolParser> parser = std::make_shared<Llama32PythonicToolParser>();
     
-    nlohmann::json res = run_parser_test(parser, prompt, expected);
+    nlohmann::json res = run_parser_test(parser, prompt);
     
     ASSERT_EQ(res, expected);
 }
@@ -69,7 +57,7 @@ TEST(ParserTest, test_llama32_parser_2) {
     });
     auto parser = std::make_shared<Llama32PythonicToolParser>(/*keep_original_content*/ false);
 
-    nlohmann::json res = run_parser_test(parser, prompt, expected);
+    nlohmann::json res = run_parser_test(parser, prompt);
 
     ASSERT_EQ(res, expected);
 }
@@ -87,7 +75,7 @@ TEST(ParserTest, test_reasoning_parser_1) {
         /*keep_original_content*/ false
     );
 
-    nlohmann::json res = run_parser_test(parser, prompt, expected);
+    nlohmann::json res = run_parser_test(parser, prompt);
 
     ASSERT_EQ(res, expected);
 }
@@ -105,7 +93,7 @@ TEST(ParserTest, test_reasoning_parser_2) {
         /*keep_original_content*/ true
     );
 
-    nlohmann::json res = run_parser_test(parser, prompt, expected);
+    nlohmann::json res = run_parser_test(parser, prompt);
 
     ASSERT_EQ(res, expected);
 }
diff --git a/tests/python_tests/test_parsers.py b/tests/python_tests/test_parsers.py
index 90f7ad93fd..849c360a63 100644
--- a/tests/python_tests/test_parsers.py
+++ b/tests/python_tests/test_parsers.py
@@ -7,9 +7,11 @@
 import numpy as np
 import openvino
 import pytest
-from openvino_genai import Tokenizer, IncrementalParserBase, ParserBase, TextParserStreamer
+from openvino_genai import Tokenizer, IncrementalParserBase, ParserBase, TextParserStreamer, StreamingStatus
 from transformers import AutoTokenizer
 from utils.hugging_face import convert_and_save_tokenizer, download_and_convert_model
+import re
+import textwrap
 
 
 @pytest.fixture(scope="module")
@@ -43,42 +45,76 @@ def hf_ov_genai_models(request, tmp_path_factory):
 )
 def test_parsers_1(hf_ov_genai_models):
     hf_tokenizer, genai_tokenizer = hf_ov_genai_models
+    
+    answer = "<think>\nOkay, the user is asking for the answer to 2 + 1. Let me make sure I understand the question correctly. They want a short answer, so I shouldn't overcomplicate things. Basic addition here. Two plus one equals three. Yeah, that's straightforward. I need to respond with the answer inside a box using the specified format. Let me double-check the arithmetic to avoid any mistakes. Yep, 2 + 1 is definitely 3. Alright, time to put it in the box.\n</think>\n\nThe answer to 2 + 1 is \boxed{3}."
+    stream_string = re.split(r"(\s+)", answer)
+    
     class CustomStreamer(TextParserStreamer):
         def write(self, message):
-            if "content" in message:
-                print(message["content"])
-            return True
-    
-    streamer = TextParserStreamer(genai_tokenizer, parsers=["DeepSeekR1ReasoningParser"])
+            msg.update(message)
+            return StreamingStatus.RUNNING
+    streamer = CustomStreamer(genai_tokenizer, parsers=["Phi4ReasoningParser"])
     
     msg = {}
-    stream_string = [
-        "<｜begin▁of▁sentence｜>", "First", ",", " I", " recognize", " that", " the", " question", " is", " asking", 
-        " for", " the", " sum", " of", " ", "2", " and", " ", "1", ".\n\n", "I", " know", " that", " addition", 
-        " involves", " combining", " two", " numbers", " to", " find", " their", " total", ".\n\n", "Starting", 
-        " with", " ", "2", ",", " I", " add", " ", "1", " to", " it", ".\n\n", "2", " plus", " ", "1", " equals", 
-        " ", "3", ".\n", "</think>", "\n\n", "**", "Solution", ":", "**\n\n", "To", " find", " the", " sum", 
-        " of", " ", "2", " and", " ", "1", " follow", " these", " simple", " steps", ":\n\n", "1", ".", " **", 
-        "Start", " with", " the", " number", " ", "2", ".", "**\n", "2", ".", " **", "Add", " ", "1", " to", 
-        " it", ".", "**\n", "   \n", "  ", " \\", "[\n", "  "
-    ]
+    for subword in stream_string:
+        streamer._write(subword)
 
-    full_str = ''.join(stream_string)
-    think_content = full_str.split("</think>")[0]
-    content = full_str.split("</think>")[1]
+    # breakpoint()
+    think_content = answer.split("</think>")[0].replace("<think>", "")
+    content = answer
 
-    parsers = streamer.get_parsers()
-    
-    extended = stream_string[:]
-    extended.append("")
-
-    for parser in parsers:
-        for (prev_subword, subword) in zip(extended, stream_string):
-            msg = parser.parse(msg, prev_subword, subword)
-    
     assert msg['reasoning_content'] == think_content
     assert msg['content'] == content
 
+@pytest.mark.precommit
+@pytest.mark.parametrize(
+    "hf_ov_genai_models", 
+    [("katuni4ka/tiny-random-phi3", {"padding_side": "right"})],
+    indirect=True
+)
+def test_final_parser_1(ov_genai_models):
+    prompt = textwrap.dedent('''
+    <|begin_of_text|><|start_header_id|>system<|end_header_id|>
+
+    Environment: ipython
+    Cutting Knowledge Date: December 2023
+    Today Date: 15 Oct 2025
+
+    You have access to the following functions. To call functions, please respond with a python list of the calls. Respond in the format [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)] Do not use variables.
+
+    {
+        "type": "function",
+        "function": {
+            "name": "get_weather",
+            "description": "Get the current weather in a given location",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "location": {
+                        "type": "string",
+                        "description": "City and state, e.g., 'San Francisco, CA'"
+                    },
+                    "unit": {
+                        "type": "string",
+                        "enum": [
+                            "celsius",
+                            "fahrenheit"
+                        ]
+                    }
+                },
+                "required": [
+                    "location",
+                    "unit"
+                ]
+            }
+        }
+    }
+
+    You are a helpful assistant with tool calling capabilities. Only reply with a tool call if the function exists in the library provided by the user. If it doesn't exist, just reply directly in natural language. When you receive a tool call response, use the output to format an answer to the original user question.<|eot_id|><|start_header_id|>user<|end_header_id|>
+
+    What's the weather in New York today? Please explain what you are doing and call the tool<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+    ''')
+
 def test_parsers_2(hf_ov_genai_models):
     hf_tokenizer, genai_tokenizer = hf_ov_genai_models
     class CustomStreamer(TextParserStreamer):
diff --git a/tests/python_tests/test_text_streamer.py b/tests/python_tests/test_text_streamer.py
index 9b834bf3b7..bc3db5bc97 100644
--- a/tests/python_tests/test_text_streamer.py
+++ b/tests/python_tests/test_text_streamer.py
@@ -29,7 +29,7 @@ def chunks(arr: list, n: int):
     Set folder = Application.GetNamespace("Microsoft Office").PackagedInstance.GetFolder("Folder Name")
 'Get all files in the folder
     folder.Files.Clear
-"""
+""" 
 eng_prompts = [
     'What is the previous answer?',
     'Why is the Sun yellow?',

From 4c3d443c2c549130c3327e420e6d1fb68c1ab3e7 Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Thu, 16 Oct 2025 14:53:17 +0200
Subject: [PATCH 07/43] fix processing with JsonContainer and make tests green
 again

---
 src/cpp/src/parsers.cpp            | 98 ++++++++++++++++++++++++------
 src/python/py_streamers.cpp        | 39 +++++++++---
 tests/python_tests/test_parsers.py | 90 +++++++++++++--------------
 3 files changed, 156 insertions(+), 71 deletions(-)

diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp
index 01560941cb..eb491aef32 100644
--- a/src/cpp/src/parsers.cpp
+++ b/src/cpp/src/parsers.cpp
@@ -20,6 +20,7 @@ class ReasoningParserImpl {
     bool m_think_tag_opened = false;
     std::string m_open_tag = "<think>";
     std::string m_close_tag = "</think>";
+    std::string m_text_cache = "";
     std::map<std::string, std::string> accumulated_parsed;
 public:
     bool m_deactivated = false;
@@ -36,10 +37,10 @@ class ReasoningParserImpl {
         const std::optional<std::vector<int64_t>>& previous_tokens, 
         const std::optional<std::vector<int64_t>>& delta_tokens
     ) {
-        if (msg["reasoning_content"].is_null()) {
+        if (!msg.contains("reasoning_content")) {
             msg["reasoning_content"] = "";
         }
-        if (msg["content"].is_null()) {
+        if (!msg.contains("content")) {
             msg["content"] = "";
         }
         
@@ -47,31 +48,94 @@ class ReasoningParserImpl {
         if (m_starts_with_thinking) {
             m_think_tag_opened = true;
         }
-        
-        if (!m_think_tag_opened && delta_text.find(m_open_tag) != std::string::npos && !m_starts_with_thinking) {
+
+        auto txt_chunk = m_text_cache + delta_text;
+        auto reason_str = msg["reasoning_content"].get_string();
+        auto content_str = msg["content"].get_string();
+
+        if (!m_think_tag_opened && txt_chunk.find(m_open_tag) != std::string::npos && !m_starts_with_thinking) {
+            OPENVINO_ASSERT(m_open_tag.find(m_text_cache) != std::string::npos, "m_text_cache should be a prefix of m_open_tag");
+            
             // Thinking has started
-            auto think_idx = delta_text.find(m_open_tag);
-            auto lvalue = msg["reasoning_content"].get_string();
-            msg["reasoning_content"] = lvalue + delta_text.substr(think_idx + std::string(m_open_tag).size(), delta_text.size() - (think_idx + std::string(m_open_tag).size()));
-            m_think_tag_opened = true;
+            auto open_idx = txt_chunk.find(m_open_tag);
+            reason_str += txt_chunk.substr(open_idx + std::string(m_open_tag).size(), txt_chunk.size() - (open_idx + std::string(m_open_tag).size()));
             if (!m_keep_original_content) {
                 delta_text = "";
             }
-        } else if (m_think_tag_opened && delta_text.find(m_close_tag) != std::string::npos) {
-            auto think_idx = delta_text.find(m_close_tag);
-            msg["reasoning_content"] = msg["reasoning_content"].get_string() + delta_text.substr(0, think_idx);
-            msg["content"] = msg["content"].get_string() + delta_text.substr(think_idx + std::string(m_close_tag).size(), delta_text.size() - (think_idx + std::string(m_close_tag).size()));
-            m_think_tag_opened = false;
-            m_deactivated = true;
+            
+            m_think_tag_opened = true;
+            msg["reasoning_content"] = reason_str;
+            m_text_cache = "";
+
+            if (txt_chunk.find(m_close_tag) != std::string::npos) {
+                // If <think> and </think> are in the same txt_chunk + delta_text
+                auto close_idx = txt_chunk.find(m_close_tag);
+                reason_str = txt_chunk.substr(open_idx + std::string(m_open_tag).size(), close_idx - (open_idx + std::string(m_open_tag).size()));
+                content_str = txt_chunk.substr(close_idx + std::string(m_close_tag).size(), txt_chunk.size() - (close_idx + std::string(m_close_tag).size()));
+                if (!m_keep_original_content) {
+                    delta_text = content_str;
+                }
+                m_think_tag_opened = false;
+                m_deactivated = true;
+                msg["reasoning_content"] = reason_str;
+            }
+        } else if (m_think_tag_opened && txt_chunk.find(m_close_tag) != std::string::npos) {
+            // Thinking tag was closed
+            auto close_idx = txt_chunk.find(m_close_tag);
+
+            reason_str += txt_chunk.substr(0, close_idx);
+            // content_str += txt_chunk.substr(close_idx + std::string(m_close_tag).size(), txt_chunk.size() - (close_idx + std::string(m_close_tag).size()));
             if (!m_keep_original_content) {
-                delta_text = delta_text.substr(think_idx + std::string(m_close_tag).size(), delta_text.size() - (think_idx + std::string(m_close_tag).size()));
+                // Cut from the txt_chunk which is before </think> and leave only what is after </think>.
+                // Example if m_text_cache + delta_text = "...some text</th" + "ink>Answer is 3" = "...some text</think>Answer is 3"
+                // we want to keep in delta_txt only "Answer is 3". 
+                // We can operate with txt_chunk since final characters closing the tag ("ink>") are always in delta_text.
+                delta_text = txt_chunk.substr(close_idx + std::string(m_close_tag).size(), txt_chunk.size() - (close_idx + std::string(m_close_tag).size()));
             }
+
+            msg["reasoning_content"] = reason_str;
+            m_text_cache = "";
+            m_think_tag_opened = false;
+            m_deactivated = true;
         } else if (m_think_tag_opened) {
-            msg["reasoning_content"] = msg["reasoning_content"].get_string() + delta_text;
+            // Thinking tag was already opened and not closed yet
+            // reason_str += m_text_cache
+            
+            // "sdf</th", "i", "nk>"
+            // "sd<", " 2"
+            
+            // m_text_cache = "<"
+            // delta_text = " 2"
+
+            size_t num_chars_to_keep = 0; // number of characters from the end of txt_chunk which can be part of the closing tag
+            // We must be sure that no chunks with the closing tag are included to reason_str.
+            for (size_t i = txt_chunk.size(); i >= 1; --i) {
+                // Get the substring of the i last characters of txt_chunk
+                auto suffix = txt_chunk.substr(txt_chunk.size() - i, i);
+                if (m_close_tag.find(suffix) != std::string::npos) {
+                    num_chars_to_keep = i;
+                    break;
+                }
+            }
+
+            // If the suffix is a prefix of m_close_tag, we store it in the cache to detect if </think> is split between several delta_text pieces.
+            if (num_chars_to_keep > 0) {
+                m_text_cache = txt_chunk.substr(txt_chunk.size() - num_chars_to_keep, num_chars_to_keep);
+                reason_str += txt_chunk.substr(0, txt_chunk.size() - num_chars_to_keep);
+            } else {
+                reason_str += txt_chunk;
+                m_text_cache = "";
+            }
+
             if (!m_keep_original_content) {
                 delta_text = "";
             }
-        } // TODO: add case when <think> and </think> are in the same delta_text
+            msg["reasoning_content"] = reason_str;
+        } else {
+            // Think tag was not opened yet and not found in the current delta_text.
+            // Accumulate text in the cache to detect if <think> is split between several delta_text pieces.
+            m_text_cache += delta_text;
+        }
         
         return delta_text;
     }
diff --git a/src/python/py_streamers.cpp b/src/python/py_streamers.cpp
index 2ea1f72880..61dc2d026c 100644
--- a/src/python/py_streamers.cpp
+++ b/src/python/py_streamers.cpp
@@ -11,6 +11,7 @@
 #include "openvino/genai/text_streamer.hpp"
 #include "openvino/genai/parsers.hpp"
 #include "py_utils.hpp"
+#include "openvino/genai/json_container.hpp"
 
 namespace py = pybind11;
 
@@ -21,6 +22,7 @@ using ov::genai::TextParserStreamer;
 using ov::genai::IncrementalParserBase;
 using ov::genai::ParsedMessage;
 using ov::genai::Tokenizer;
+using ov::genai::JsonContainer;
 
 namespace pyutils = ov::genai::pybind::utils;
 
@@ -75,11 +77,27 @@ class ConstructableTextParserStreamer: public TextParserStreamer {
     using TextParserStreamer::TextParserStreamer;  // inherit base constructors
 
     StreamingStatus write(ParsedMessage& message) override {
+        py::dict message_py;
+        auto json_obj = message.to_json();
+        for (auto it = json_obj.begin(); it != json_obj.end(); ++it) {
+            message_py[py::cast(it.key())] = py::cast(it.value().get<std::string>());
+        }
+        
+        // call python implementation which accepts py::dict instead of ParsedMessage
+        auto res = py::get_override(this, "write")(message_py);
+        
+        auto msg_anymap = ov::genai::pybind::utils::py_object_to_any_map(message_py);
+        message = JsonContainer(msg_anymap);
+        
+        return res.cast<StreamingStatus>();
+    }
+
+    StreamingStatus write(py::dict& message) {
         PYBIND11_OVERRIDE_PURE(
-            StreamingStatus,  // Return type
-            TextParserStreamer,  // Parent class
-            write,  // Name of function in C++ (must match Python name)
-            message  // Argument(s)
+            StreamingStatus,
+            TextParserStreamer,
+            "write",
+            message
         );
     }
 };
@@ -139,9 +157,16 @@ void init_streamers(py::module_& m) {
             py::arg("parsers") = std::vector<std::variant<std::shared_ptr<IncrementalParserBase>, std::string>>({}),
             "TextParserStreamer is used to decode tokens into text, parse the text and call user-defined incremental parsers.")
         .def("write",
-             py::overload_cast<ParsedMessage&>(&TextParserStreamer::write),
-             py::arg("message"),
-             "Write is called with a ParsedMessage. Returns StreamingStatus.")
+            [](TextParserStreamer& self, py::dict& message) {
+                // Downcast to ConstructableTextParserStreamer if needed
+                auto* derived = dynamic_cast<ConstructableTextParserStreamer*>(&self);
+                if (!derived) {
+                    throw std::runtime_error("write(py::dict&) only available for ConstructableTextParserStreamer");
+                }
+                return derived->write(message);
+            },
+            py::arg("message"),
+            "Write is called with a ParsedMessage. Returns StreamingStatus.")
         .def("_write",
              py::overload_cast<std::string>(&TextParserStreamer::write),
              py::arg("message"),
diff --git a/tests/python_tests/test_parsers.py b/tests/python_tests/test_parsers.py
index 849c360a63..719766fc06 100644
--- a/tests/python_tests/test_parsers.py
+++ b/tests/python_tests/test_parsers.py
@@ -40,13 +40,24 @@ def hf_ov_genai_models(request, tmp_path_factory):
 @pytest.mark.precommit
 @pytest.mark.parametrize(
     "hf_ov_genai_models", 
-    [("katuni4ka/tiny-random-phi3", {"padding_side": "right"})],
+    [("katuni4ka/tiny-random-phi3", {"padding_side": "right"})],  # this tokenizer is used as a stub only
     indirect=True
 )
-def test_parsers_1(hf_ov_genai_models):
+@pytest.mark.parametrize("answer", [
+    "<think>\nOkay, the user is asking for the answer to 2 + 1.</think>\n\nThe answer to 2 + 1 is \boxed{3}.",
+
+    (
+        "<think>\nOkay, the user is asking for the answer to 2 + 1. Let me make sure I understand "
+        "the question correctly. They want a short answer, so I shouldn't overcomplicate things. "
+        "Basic addition here. Two plus one equals three. Yeah, that's straightforward. I need to "
+        "respond with the answer inside a box using the specified format. Let me double-check the "
+        "arithmetic to avoid any mistakes. Yep, 2 + 1 is definitely 3. Alright, time to put it in "
+        "the box.\n</think>\n\nThe answer to 2 + 1 is \boxed{3}."
+    ),
+])
+def test_phi4_reason_parser_1(hf_ov_genai_models, answer):
     hf_tokenizer, genai_tokenizer = hf_ov_genai_models
     
-    answer = "<think>\nOkay, the user is asking for the answer to 2 + 1. Let me make sure I understand the question correctly. They want a short answer, so I shouldn't overcomplicate things. Basic addition here. Two plus one equals three. Yeah, that's straightforward. I need to respond with the answer inside a box using the specified format. Let me double-check the arithmetic to avoid any mistakes. Yep, 2 + 1 is definitely 3. Alright, time to put it in the box.\n</think>\n\nThe answer to 2 + 1 is \boxed{3}."
     stream_string = re.split(r"(\s+)", answer)
     
     class CustomStreamer(TextParserStreamer):
@@ -59,7 +70,6 @@ def write(self, message):
     for subword in stream_string:
         streamer._write(subword)
 
-    # breakpoint()
     think_content = answer.split("</think>")[0].replace("<think>", "")
     content = answer
 
@@ -69,51 +79,37 @@ def write(self, message):
 @pytest.mark.precommit
 @pytest.mark.parametrize(
     "hf_ov_genai_models", 
-    [("katuni4ka/tiny-random-phi3", {"padding_side": "right"})],
+    [("katuni4ka/tiny-random-phi3", {"padding_side": "right"})],  # this tokenizer is used as a stub only
     indirect=True
 )
-def test_final_parser_1(ov_genai_models):
-    prompt = textwrap.dedent('''
-    <|begin_of_text|><|start_header_id|>system<|end_header_id|>
-
-    Environment: ipython
-    Cutting Knowledge Date: December 2023
-    Today Date: 15 Oct 2025
-
-    You have access to the following functions. To call functions, please respond with a python list of the calls. Respond in the format [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)] Do not use variables.
-
-    {
-        "type": "function",
-        "function": {
-            "name": "get_weather",
-            "description": "Get the current weather in a given location",
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "location": {
-                        "type": "string",
-                        "description": "City and state, e.g., 'San Francisco, CA'"
-                    },
-                    "unit": {
-                        "type": "string",
-                        "enum": [
-                            "celsius",
-                            "fahrenheit"
-                        ]
-                    }
-                },
-                "required": [
-                    "location",
-                    "unit"
-                ]
-            }
-        }
-    }
-
-    You are a helpful assistant with tool calling capabilities. Only reply with a tool call if the function exists in the library provided by the user. If it doesn't exist, just reply directly in natural language. When you receive a tool call response, use the output to format an answer to the original user question.<|eot_id|><|start_header_id|>user<|end_header_id|>
-
-    What's the weather in New York today? Please explain what you are doing and call the tool<|eot_id|><|start_header_id|>assistant<|end_header_id|>
-    ''')
+@pytest.mark.parametrize("split_answer", [
+    ["<th", "ink>", "\nOkay, ", "the user is asking", " for the ", "answer ", "to 2 + 1.", "</think>", "\n\nThe answer ", "to", "2 ", "+ ", "1 ", "is ", "\boxed{3}."],
+    ["<think>", "\nOkay, ", "the user is asking", " for the ", "answer ", "to 2 + 1.", "</th", "ink>", "\n\nThe answer ", "to", "2 ", "+ ", "1 ", "is ", "\boxed{3}."],
+    ["<t", "h", "ink>", "\nOkay, ", "the user is asking", " for the ", "answer ", "to 2 + 1.", "</th", "ink>", "\n\nThe answer ", "to", "2 ", "+ ", "1 ", "is ", "\boxed{3}."],
+    
+    # check that if thinking opening and closing tags are passed in a single subword, it is still parsed correctly
+    ["<think>\nOkay, the user is asking for the answer to 2 + 1.</think>\n\nThe answer to 2 + 1 is \boxed{3}."]
+])
+def test_phi4_reason_parser_2(hf_ov_genai_models, split_answer):
+    # check that if thinking opening and closing tags are in the middle of the subword, it is still parsed correctly
+    hf_tokenizer, genai_tokenizer = hf_ov_genai_models
+    
+    class CustomStreamer(TextParserStreamer):
+        def write(self, message):
+            msg.update(message)
+            return StreamingStatus.RUNNING
+    streamer = CustomStreamer(genai_tokenizer, parsers=["Phi4ReasoningParser"])
+    
+    msg = {}
+    for subword in split_answer:
+        streamer._write(subword)
+
+    think_content = (''.join(split_answer)).split("</think>")[0].replace("<think>", "")
+    content = ''.join(split_answer)
+
+    assert msg['reasoning_content'] == think_content
+    assert msg['content'] == content
+
 
 def test_parsers_2(hf_ov_genai_models):
     hf_tokenizer, genai_tokenizer = hf_ov_genai_models

From 5ee48bf33590f2d7c05618f61779d4e1d4afbf0e Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Fri, 17 Oct 2025 00:40:03 +0200
Subject: [PATCH 08/43] add Llama32JsonToolParser

---
 src/cpp/include/openvino/genai/parsers.hpp   | 12 +++
 src/cpp/src/continuous_batching/pipeline.cpp |  4 +-
 src/cpp/src/parsers.cpp                      | 98 ++++++++++++--------
 src/python/openvino_genai/__init__.py        |  6 +-
 src/python/py_parsers.cpp                    | 71 +++++++++++++-
 tests/python_tests/test_parsers.py           | 25 ++++-
 6 files changed, 170 insertions(+), 46 deletions(-)

diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp
index 479dd2d4de..18e8ee7fef 100644
--- a/src/cpp/include/openvino/genai/parsers.hpp
+++ b/src/cpp/include/openvino/genai/parsers.hpp
@@ -89,6 +89,18 @@ class Llama32PythonicToolParser : public ParserBase {
     bool m_keep_original_content = true;
 };
 
+class Llama32JsonToolParser : public ParserBase {
+// Does not modify original content, only extracts and adds tool calls
+public:
+    // TODO: Check that vLLM has the same default.
+    Llama32JsonToolParser(bool keep_original_content = true) : m_keep_original_content(keep_original_content) {}
+
+    ParsedMessage parse(ParsedMessage& input) override;
+    static std::string name() { return "Llama32JsonToolParser"; }
+private:
+    bool m_keep_original_content = true;
+};
+
 class BaseReasoningParser : public ParserBase{
 public:
     BaseReasoningParser(bool expect_open_tag = true, bool keep_original_content = true, std::string open_tag = "<think>", std::string close_tag = "</think>") :
diff --git a/src/cpp/src/continuous_batching/pipeline.cpp b/src/cpp/src/continuous_batching/pipeline.cpp
index 25256c8d70..404ee620e1 100644
--- a/src/cpp/src/continuous_batching/pipeline.cpp
+++ b/src/cpp/src/continuous_batching/pipeline.cpp
@@ -58,7 +58,7 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline( const std::filesystem::p
 
     auto model = utils::read_model(models_path, properties);
     auto [properties_without_draft_model_without_gguf, enable_save_ov_model] = utils::extract_gguf_properties(properties_without_draft_model);
-    // properties_without_draft_model_without_gguf[ov::cache_model_path.name()] = models_path;
+    properties_without_draft_model_without_gguf[ov::cache_model_path.name()] = models_path;
     auto tokenizer = ov::genai::Tokenizer(models_path, tokenizer_properties);
     auto generation_config = utils::from_config_json_if_exists(models_path);
 
@@ -98,7 +98,7 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline(
 
     auto model = utils::read_model(models_path, properties_without_draft_model);
     auto [properties_without_draft_model_without_gguf, enable_save_ov_model] = utils::extract_gguf_properties(properties_without_draft_model);
-    // properties_without_draft_model_without_gguf[ov::cache_model_path.name()] = models_path;
+    properties_without_draft_model_without_gguf[ov::cache_model_path.name()] = models_path;
 
     auto generation_config = utils::from_config_json_if_exists(models_path);
 
diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp
index eb491aef32..915167cc2d 100644
--- a/src/cpp/src/parsers.cpp
+++ b/src/cpp/src/parsers.cpp
@@ -5,10 +5,8 @@
 #include <vector>
 #include <cctype>
 #include <stdexcept>
-#include <bits/stdc++.h>
 #include <nlohmann/json.hpp>
 
-using namespace std;
 using json = nlohmann::json;
 
 namespace ov::genai {
@@ -99,15 +97,19 @@ class ReasoningParserImpl {
             m_deactivated = true;
         } else if (m_think_tag_opened) {
             // Thinking tag was already opened and not closed yet
-            // reason_str += m_text_cache
             
-            // "sdf</th", "i", "nk>"
-            // "sd<", " 2"
-            
-            // m_text_cache = "<"
-            // delta_text = " 2"
-
-            size_t num_chars_to_keep = 0; // number of characters from the end of txt_chunk which can be part of the closing tag
+            // If we have subsequently "sdf</th", "i", "nk> The"
+            // Then we put "sdf" to reason_str and "</th" to m_text_cache since it's a substring of close tag "</think>"
+            // then we put "i" to m_text_cache since m_text_cache + delta_text = "</thi" is a substring of "</think>"
+            // then (in the closing tag IF-block) we leave only " The" in delta_text.
+
+            // If we have "ing. <", " 20 ", "40>"
+            // Then we put "ing. " to reason_str and "<" to m_text_cache since it's a substring of close tag "</think>"
+            // but since continuation " 20 " is not a substring of "</think>", we will end up in this IF-block again
+            // and put " 20 " to reason_str and clear m_text_cache.
+
+            // number of characters from the end of txt_chunk which can be part of the closing tag
+            size_t num_chars_to_keep = 0; 
             // We must be sure that no chunks with the closing tag are included to reason_str.
             for (size_t i = txt_chunk.size(); i >= 1; --i) {
                 // Get the substring of the i last characters of txt_chunk
@@ -164,39 +166,61 @@ ParsedMessage Llama32PythonicToolParser::parse(ParsedMessage& input) {
     // string input = "[get_weather(location='New York, NY', unit='celsius')]<|eom_id|>";
 
     // Regex to capture the [...] part
-    smatch m;
+    std::smatch m;
     const std::string& text = input["content"].get_string();
-    regex r(R"(\[.*?\])");
-    if (regex_search(text, m, r)) {
-        // Strip outer [ ]
-        string call = m.str().substr(1, m.str().size() - 2);
-
-        // Split function name and arguments
-        size_t pos = call.find('(');
-        string name = call.substr(0, pos);
-        string args = call.substr(pos + 1, call.size() - pos - 2); // inside (...)
-
-        // Parse arguments of the form key='value'
-        map<string, string> kv;
-        regex arg_re(R"((\w+)\s*=\s*'([^']*)')");
-        auto it = sregex_iterator(args.begin(), args.end(), arg_re);
-        for (; it != sregex_iterator(); ++it) {
-            kv[(*it)[1]] = (*it)[2];
-        }
-        json j = json::array({{
-            {"name", name},
-            {"arguments", kv}
-        }});
-        if (!m_keep_original_content) {
-            input["content"] = regex_replace(text, r, "");
-        }
-        std::cout << j.dump() << std::endl;
-        input["tool_calls"] = ParsedMessage::from_json_string(j.dump());
+    std::regex r(R"(\[.*?\])");
+    if (!std::regex_search(text, m, r)) {
         return input;
     }
+
+    // Strip outer [ ]
+    std::string call = m.str().substr(1, m.str().size() - 2);
+
+    // Split function name and arguments
+    input["tool_calls"] = ParsedMessage::array();
+    
+    size_t pos = call.find('(');
+    std::string name = call.substr(0, pos);
+    std::string args = call.substr(pos + 1, call.size() - pos - 2); // inside (...)
+
+    // Parse arguments of the form key='value'
+    JsonContainer kv = JsonContainer::array();
+
+    std::regex arg_re(R"((\w+)\s*=\s*\"([^"]*)\")");
+    auto it = std::sregex_iterator(args.begin(), args.end(), arg_re);
+    for (; it != std::sregex_iterator(); ++it) {
+        kv.push_back(ParsedMessage(ov::AnyMap{{"key", std::string((*it)[1])}, {"value", std::string((*it)[2])}}));
+    }
+    
+    input["tool_calls"] = ParsedMessage::array();
+    input["tool_calls"].push_back(ParsedMessage({{"name", name}, {"arguments", kv}}));
+    
+    if (!m_keep_original_content) {
+        input["content"] = regex_replace(text, r, "");
+    }
+
     return ParsedMessage{};
 }
 
+ParsedMessage Llama32JsonToolParser::parse(ParsedMessage& message) {
+    // Find JSON in the message
+    std::string msg_content = message["content"].get_string();
+
+    size_t json_start = msg_content.find('{');
+    size_t json_end = msg_content.rfind('}');
+    if (json_start == std::string::npos || json_end == std::string::npos || json_end <= json_start) {
+        return message;
+    }
+    auto res = JsonContainer::array();
+    res.push_back(JsonContainer::from_json_string(msg_content.substr(json_start, json_end - json_start + 1)));
+    message["tool_calls"] = res;
+    
+    if (!m_keep_original_content) {
+        message["content"] = msg_content.substr(0, json_start) + msg_content.substr(json_end + 1);
+    }
+    return message;
+}
+
 ParsedMessage BaseReasoningParser::parse(ParsedMessage& input) {
     ParsedMessage res;
     std::string reasoning_content;
diff --git a/src/python/openvino_genai/__init__.py b/src/python/openvino_genai/__init__.py
index 19e4ebe97a..3656c0280a 100644
--- a/src/python/openvino_genai/__init__.py
+++ b/src/python/openvino_genai/__init__.py
@@ -24,7 +24,11 @@
 
 from .py_openvino_genai import (
     ParserBase,
-    IncrementalParserBase
+    IncrementalParserBase,
+    Phi4ReasoningParser,
+    DeepSeekR1ReasoningParser,
+    Llama32JsonToolParser,
+    Llama32PythonicToolParser,
 )
 
 __version__ = get_version()
diff --git a/src/python/py_parsers.cpp b/src/python/py_parsers.cpp
index 2cf26e4b19..5f8df28446 100644
--- a/src/python/py_parsers.cpp
+++ b/src/python/py_parsers.cpp
@@ -17,6 +17,11 @@ using ov::genai::ParsedMessage;
 using ov::genai::IncrementalParserBase;
 using ov::genai::ParserVariant;
 using ov::genai::ParserBase;
+using ov::genai::ReasoningParser;
+using ov::genai::Phi4ReasoningParser;
+using ov::genai::DeepSeekR1ReasoningParser;
+using ov::genai::Llama32JsonToolParser;
+using ov::genai::Llama32PythonicToolParser;
 using ov::genai::Tokenizer;
 using ov::genai::StreamingStatus;
 using ov::genai::JsonContainer;
@@ -68,6 +73,25 @@ class ConstructableParserBase: public ParserBase {
     }
 };
 
+static py::object json_mod = py::module_::import("json");
+
+// wrapper to enhance calling parser from Python
+void call_parser(py::dict& msg, std::function<JsonContainer(JsonContainer&)> func) {
+    auto msg_anymap = ov::genai::pybind::utils::py_object_to_any_map(msg);
+    auto msg_cpp = JsonContainer(msg_anymap);
+
+    func(msg_cpp);
+
+    auto json_str = msg_cpp.to_json_string();
+    py::dict result = json_mod.attr("loads")(json_str);
+    
+    // update msg with result
+    msg.clear();
+    for (auto item : result) {
+        msg[item.first] = item.second;
+    }
+}
+
 } // namespace
 
 // TODO: double check/add more relevant docstrings for parsers.
@@ -98,12 +122,51 @@ void init_parsers(py::module_& m) {
            py::arg("previous_tokens") = std::nullopt, py::arg("delta_tokens") = std::nullopt,
            "Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output.")
         .def("is_active", &IncrementalParserBase::is_active, "Indicates whether the parser is active and should be used during parsing.");
+    
+    py::class_<Phi4ReasoningParser, std::shared_ptr<Phi4ReasoningParser>, IncrementalParserBase>(m, "Phi4ReasoningParser")
+        .def(py::init<bool>(), py::arg("starts_with_thinking") = false)
+        .def("parse",
+            &Phi4ReasoningParser::parse,
+            "Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output.",
+            py::arg("msg"), py::arg("previous_text"), py::arg("delta_text"),
+            py::arg("previous_tokens") = std::nullopt, py::arg("delta_tokens") = std::nullopt)
+        .def_static("get_parser", &Phi4ReasoningParser::get_parser, py::arg("name"), "Factory method to get parser by name.");
 
-        
-        py::class_<ParserBase, ConstructableParserBase, std::shared_ptr<ParserBase>>(m, "ParserBase")
+    py::class_<DeepSeekR1ReasoningParser, std::shared_ptr<DeepSeekR1ReasoningParser>, IncrementalParserBase>(m, "DeepSeekR1ReasoningParser")
         .def(py::init<>())
         .def("parse",
-            &ParserBase::parse,
+            &DeepSeekR1ReasoningParser::parse,
             "Parse is called with the full text. Returns a ParsedMessage with parsed content.",
-            py::arg("text"));
+            py::arg("msg"), py::arg("previous_text"), py::arg("delta_text"),
+            py::arg("previous_tokens") = std::nullopt, py::arg("delta_tokens") = std::nullopt)
+        .def_static("get_parser", &DeepSeekR1ReasoningParser::get_parser, py::arg("name"), "Factory method to get parser by name.");
+
+    py::class_<ParserBase, ConstructableParserBase, std::shared_ptr<ParserBase>>(m, "ParserBase")
+    .def(py::init<>())
+    .def("parse",
+        [](ParserBase& self, py::dict& msg) {
+            return call_parser(msg, [&self](JsonContainer& m) {return self.parse(m);});
+        },
+        py::arg("text"),
+        "Parse is called with the full text. Returns a ParsedMessage with parsed content.");
+    
+    py::class_<Llama32JsonToolParser, std::shared_ptr<Llama32JsonToolParser>, ParserBase>(m, "Llama32JsonToolParser")
+        .def(py::init<>())
+        .def("parse",
+            [](Llama32JsonToolParser& self, py::dict& msg) {
+                return call_parser(msg, [&self](JsonContainer& m) { return self.parse(m); });
+            },
+            py::arg("text"),
+            "Parse is called with the full text. Returns a ParsedMessage with parsed content.")
+        .def_static("get_parser", &Llama32JsonToolParser::get_parser, py::arg("name"), "Factory method to get parser by name.");
+
+    py::class_<Llama32PythonicToolParser, std::shared_ptr<Llama32PythonicToolParser>, ParserBase>(m, "Llama32PythonicToolParser")
+        .def(py::init<>())
+        .def("parse",
+            [](Llama32PythonicToolParser& self, py::dict& msg) {
+                return call_parser(msg, [&self](JsonContainer& m) { return self.parse(m); });
+            },
+            py::arg("text"),
+            "Parse is called with the full text. Returns a ParsedMessage with parsed content.")
+        .def_static("get_parser", &Llama32PythonicToolParser::get_parser, py::arg("name"), "Factory method to get parser by name.");
 }
diff --git a/tests/python_tests/test_parsers.py b/tests/python_tests/test_parsers.py
index 719766fc06..f7ced84171 100644
--- a/tests/python_tests/test_parsers.py
+++ b/tests/python_tests/test_parsers.py
@@ -7,12 +7,12 @@
 import numpy as np
 import openvino
 import pytest
-from openvino_genai import Tokenizer, IncrementalParserBase, ParserBase, TextParserStreamer, StreamingStatus
+from openvino_genai import Tokenizer, IncrementalParserBase, ParserBase, TextParserStreamer, StreamingStatus, Llama32JsonToolParser, Phi4ReasoningParser, DeepSeekR1ReasoningParser
 from transformers import AutoTokenizer
 from utils.hugging_face import convert_and_save_tokenizer, download_and_convert_model
 import re
 import textwrap
-
+import json
 
 @pytest.fixture(scope="module")
 def hf_ov_genai_models(request, tmp_path_factory):
@@ -111,6 +111,27 @@ def write(self, message):
     assert msg['content'] == content
 
 
+
+@pytest.mark.precommit
+@pytest.mark.parametrize(
+    "hf_ov_genai_models", 
+    [("katuni4ka/tiny-random-phi3", {"padding_side": "right"})],
+    indirect=True
+)
+def test_final_parser_llama_32_json(hf_ov_genai_models):
+    hf_tokenizer, genai_tokenizer = hf_ov_genai_models
+
+    json_str = '{"type": "function", "function": {"name": "get_weather", "parameters": {"location": "New York, NY", "unit": "celsius"}}}'
+    content_json = {
+        "content": f"Calling weather API: {json_str}"
+    }
+
+    parser = Llama32JsonToolParser()
+    parser.parse(content_json)
+
+    assert content_json['tool_calls'][0] == json.loads(json_str)
+
+
 def test_parsers_2(hf_ov_genai_models):
     hf_tokenizer, genai_tokenizer = hf_ov_genai_models
     class CustomStreamer(TextParserStreamer):

From 248ccc6355504ab58775f1b3822b0f919c8652ee Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Fri, 17 Oct 2025 08:07:49 +0200
Subject: [PATCH 09/43] rename ParsedMessage -> JsonContainer

---
 .../text_generation/parsed_output_sample.cpp  |  2 +-
 .../include/openvino/genai/llm_pipeline.hpp   |  1 -
 src/cpp/include/openvino/genai/parsers.hpp    | 16 +++++---------
 .../include/openvino/genai/text_streamer.hpp  |  6 ++---
 src/cpp/src/llm/pipeline.cpp                  |  2 +-
 src/cpp/src/parsers.cpp                       | 22 +++++++++----------
 .../openvino_genai/py_openvino_genai.pyi      |  6 ++---
 src/python/py_parsers.cpp                     | 18 +++++++--------
 src/python/py_streamers.cpp                   |  8 +++----
 tests/cpp/parser.cpp                          |  6 ++---
 tests/python_tests/test_text_streamer.py      |  2 +-
 11 files changed, 42 insertions(+), 47 deletions(-)

diff --git a/samples/cpp/text_generation/parsed_output_sample.cpp b/samples/cpp/text_generation/parsed_output_sample.cpp
index 6efa64ee5e..dfc2ef964b 100644
--- a/samples/cpp/text_generation/parsed_output_sample.cpp
+++ b/samples/cpp/text_generation/parsed_output_sample.cpp
@@ -11,7 +11,7 @@ class CurrentStreamer : public ov::genai::TextParserStreamer {
 public:
     CurrentStreamer(const ov::genai::Tokenizer& tokenizer)
         : ov::genai::TextParserStreamer(tokenizer) {}
-    ov::genai::StreamingStatus write(ov::genai::ParsedMessage& message) {
+    ov::genai::StreamingStatus write(ov::genai::JsonContainer& message) {
        std::cout << message["content"].get_string() << std::flush;
         return ov::genai::StreamingStatus::RUNNING;
     }
diff --git a/src/cpp/include/openvino/genai/llm_pipeline.hpp b/src/cpp/include/openvino/genai/llm_pipeline.hpp
index 6385ced995..5061901108 100644
--- a/src/cpp/include/openvino/genai/llm_pipeline.hpp
+++ b/src/cpp/include/openvino/genai/llm_pipeline.hpp
@@ -69,7 +69,6 @@ class DecodedResults {
     std::vector<float> scores;
     PerfMetrics perf_metrics;
     std::shared_ptr<ExtendedPerfMetrics> extended_perf_metrics;
-    // std::vector<ParsedMessage> parsed;
     std::vector<JsonContainer> parsed;
 
     // @brief Convert DecodedResults to a string.
diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp
index 18e8ee7fef..979d63ef7d 100644
--- a/src/cpp/include/openvino/genai/parsers.hpp
+++ b/src/cpp/include/openvino/genai/parsers.hpp
@@ -14,17 +14,13 @@
 namespace ov {
 namespace genai {
 
-// TODO: will be converted to JSONLike object
-// using ParsedMessage = std::map<std::string, std::string>;
-using ParsedMessage = JsonContainer;
-
 class IncrementalParserBase {
 public:
     IncrementalParserBase() = default;
 
     // We return string which with filtered text to be added to content.
     virtual std::string parse(
-        ParsedMessage& msg,
+        JsonContainer& msg,
         const std::string& previous_text, 
         std::string& delta_text, 
         const std::optional<std::vector<int64_t>>& previous_tokens = std::nullopt, 
@@ -46,7 +42,7 @@ class ReasoningParser : public IncrementalParserBase {
                     bool keep_original_content = true);
 
     std::string parse(
-        ParsedMessage& msg,
+        JsonContainer& msg,
         const std::string& previous_text, 
         std::string& delta_text,
         const std::optional<std::vector<int64_t>>& previous_tokens = std::nullopt, 
@@ -71,7 +67,7 @@ class ParserBase {
 public:
     ParserBase() = default;
 
-    virtual ParsedMessage parse(ParsedMessage& text) = 0;
+    virtual JsonContainer parse(JsonContainer& text) = 0;
     static std::shared_ptr<ParserBase> get_parser(std::string name);
 };
 
@@ -83,7 +79,7 @@ class Llama32PythonicToolParser : public ParserBase {
     // TODO: Check that vLLM has the same default.
     Llama32PythonicToolParser(bool keep_original_content = true) : m_keep_original_content(keep_original_content) {}
 
-    ParsedMessage parse(ParsedMessage& input) override;
+    JsonContainer parse(JsonContainer& input) override;
     static std::string name() { return "Llama32PythonicToolParser"; }
 private:
     bool m_keep_original_content = true;
@@ -95,7 +91,7 @@ class Llama32JsonToolParser : public ParserBase {
     // TODO: Check that vLLM has the same default.
     Llama32JsonToolParser(bool keep_original_content = true) : m_keep_original_content(keep_original_content) {}
 
-    ParsedMessage parse(ParsedMessage& input) override;
+    JsonContainer parse(JsonContainer& input) override;
     static std::string name() { return "Llama32JsonToolParser"; }
 private:
     bool m_keep_original_content = true;
@@ -109,7 +105,7 @@ class BaseReasoningParser : public ParserBase{
     m_open_tag(open_tag), 
     m_close_tag(close_tag) {}
 
-    ParsedMessage parse(ParsedMessage& input) override;
+    JsonContainer parse(JsonContainer& input) override;
 
 private:
     bool m_expect_open_tag = true;
diff --git a/src/cpp/include/openvino/genai/text_streamer.hpp b/src/cpp/include/openvino/genai/text_streamer.hpp
index 055adbfbe2..4ac5758b73 100644
--- a/src/cpp/include/openvino/genai/text_streamer.hpp
+++ b/src/cpp/include/openvino/genai/text_streamer.hpp
@@ -51,14 +51,14 @@ class TextParserStreamer : public TextStreamer {
 public:
     TextParserStreamer(const Tokenizer& tokenizer, std::vector<ParserVariant> parsers = {});
 
-    virtual StreamingStatus write(ParsedMessage& message) = 0;
+    virtual StreamingStatus write(JsonContainer& message) = 0;
 
     CallbackTypeVariant write(std::string message);
     
-    ParsedMessage get_parsed_message() const { return m_parsed_message; }
+    JsonContainer get_parsed_message() const { return m_parsed_message; }
     std::vector<std::shared_ptr<IncrementalParserBase>> get_parsers() const { return m_parsers; }
 private:
-    ParsedMessage m_parsed_message;
+    JsonContainer m_parsed_message;
     std::string m_text_buffer;
     std::vector<std::shared_ptr<IncrementalParserBase>> m_parsers;
 };
diff --git a/src/cpp/src/llm/pipeline.cpp b/src/cpp/src/llm/pipeline.cpp
index 41b678d388..c2b10208f0 100644
--- a/src/cpp/src/llm/pipeline.cpp
+++ b/src/cpp/src/llm/pipeline.cpp
@@ -241,7 +241,7 @@ DecodedResults LLMPipeline::generate(
     // Apply Base parsers sequentially even if IncrementalParser has run.
     if (!parsers.empty()) {
         for (size_t i = 0; i < res.texts.size(); ++i) {
-            ParsedMessage msg;
+            JsonContainer msg;
             msg["content"] = res.texts[i];
             for (auto& parser: parsers) {
                 // TODO: check if is_active() is needed here
diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp
index 915167cc2d..018a70950e 100644
--- a/src/cpp/src/parsers.cpp
+++ b/src/cpp/src/parsers.cpp
@@ -29,7 +29,7 @@ class ReasoningParserImpl {
           m_keep_original_content(keep_original_content) {}
 
     std::string parse(
-        ParsedMessage&  msg,
+        JsonContainer&  msg,
         const std::string& previous_text, 
         std::string& delta_text,
         const std::optional<std::vector<int64_t>>& previous_tokens, 
@@ -148,7 +148,7 @@ ReasoningParser::ReasoningParser(bool starts_with_thinking, bool keep_original_c
 }
 
 std::string ReasoningParser::parse(
-    ParsedMessage& msg,
+    JsonContainer& msg,
     const std::string& previous_text, 
     std::string& delta_text,
     const std::optional<std::vector<int64_t>>& previous_tokens, 
@@ -161,7 +161,7 @@ bool ReasoningParser::is_active() const {
     return !m_impl->m_deactivated;
 }
 
-ParsedMessage Llama32PythonicToolParser::parse(ParsedMessage& input) {
+JsonContainer Llama32PythonicToolParser::parse(JsonContainer& input) {
     // Input example
     // string input = "[get_weather(location='New York, NY', unit='celsius')]<|eom_id|>";
 
@@ -177,7 +177,7 @@ ParsedMessage Llama32PythonicToolParser::parse(ParsedMessage& input) {
     std::string call = m.str().substr(1, m.str().size() - 2);
 
     // Split function name and arguments
-    input["tool_calls"] = ParsedMessage::array();
+    input["tool_calls"] = JsonContainer::array();
     
     size_t pos = call.find('(');
     std::string name = call.substr(0, pos);
@@ -189,20 +189,20 @@ ParsedMessage Llama32PythonicToolParser::parse(ParsedMessage& input) {
     std::regex arg_re(R"((\w+)\s*=\s*\"([^"]*)\")");
     auto it = std::sregex_iterator(args.begin(), args.end(), arg_re);
     for (; it != std::sregex_iterator(); ++it) {
-        kv.push_back(ParsedMessage(ov::AnyMap{{"key", std::string((*it)[1])}, {"value", std::string((*it)[2])}}));
+        kv.push_back(JsonContainer(ov::AnyMap{{"key", std::string((*it)[1])}, {"value", std::string((*it)[2])}}));
     }
     
-    input["tool_calls"] = ParsedMessage::array();
-    input["tool_calls"].push_back(ParsedMessage({{"name", name}, {"arguments", kv}}));
+    input["tool_calls"] = JsonContainer::array();
+    input["tool_calls"].push_back(JsonContainer({{"name", name}, {"arguments", kv}}));
     
     if (!m_keep_original_content) {
         input["content"] = regex_replace(text, r, "");
     }
 
-    return ParsedMessage{};
+    return JsonContainer{};
 }
 
-ParsedMessage Llama32JsonToolParser::parse(ParsedMessage& message) {
+JsonContainer Llama32JsonToolParser::parse(JsonContainer& message) {
     // Find JSON in the message
     std::string msg_content = message["content"].get_string();
 
@@ -221,8 +221,8 @@ ParsedMessage Llama32JsonToolParser::parse(ParsedMessage& message) {
     return message;
 }
 
-ParsedMessage BaseReasoningParser::parse(ParsedMessage& input) {
-    ParsedMessage res;
+JsonContainer BaseReasoningParser::parse(JsonContainer& input) {
+    JsonContainer res;
     std::string reasoning_content;
     // auto content = input["content"];
     std::string content = input["content"].get_string();
diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi
index ada0fa5ca6..82c0721e9a 100644
--- a/src/python/openvino_genai/py_openvino_genai.pyi
+++ b/src/python/openvino_genai/py_openvino_genai.pyi
@@ -1383,7 +1383,7 @@ class IncrementalParserBase:
         """
     def parse(self, msg: collections.abc.Mapping[str, str], previous_text: str, delta_text: str, previous_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None, delta_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None) -> dict[str, str]:
         """
-        Parse is called every time new text delta is decoded. Returns a ParsedMessage with parsed content.
+        Parse is called every time new text delta is decoded. Returns a JsonContainer with parsed content.
         """
 class InpaintingPipeline:
     """
@@ -1757,7 +1757,7 @@ class ParserBase:
         ...
     def parse(self, text: collections.abc.Mapping[str, str]) -> dict[str, str]:
         """
-        Parse is called with the full text. Returns a ParsedMessage with parsed content.
+        Parse is called with the full text. Returns a JsonContainer with parsed content.
         """
 class PerfMetrics:
     """
@@ -3158,7 +3158,7 @@ class TextParserStreamer:
         """
     def write(self, message: collections.abc.Mapping[str, str]) -> StreamingStatus:
         """
-        Write is called with a ParsedMessage. Returns StreamingStatus.
+        Write is called with a JsonContainer. Returns StreamingStatus.
         """
 class TextRerankPipeline:
     """
diff --git a/src/python/py_parsers.cpp b/src/python/py_parsers.cpp
index 5f8df28446..d756f2a942 100644
--- a/src/python/py_parsers.cpp
+++ b/src/python/py_parsers.cpp
@@ -13,7 +13,7 @@
 
 namespace py = pybind11;
 
-using ov::genai::ParsedMessage;
+using ov::genai::JsonContainer;
 using ov::genai::IncrementalParserBase;
 using ov::genai::ParserVariant;
 using ov::genai::ParserBase;
@@ -34,7 +34,7 @@ namespace {
 class ConstructableIncrementalParserBase: public IncrementalParserBase {
 public:
     std::string parse(
-        ParsedMessage& msg,
+        JsonContainer& msg,
         const std::string& previous_text, 
         std::string& delta_text, 
         const std::optional<std::vector<int64_t>>& previous_tokens = std::nullopt, 
@@ -63,9 +63,9 @@ class ConstructableIncrementalParserBase: public IncrementalParserBase {
 
 class ConstructableParserBase: public ParserBase {
 public:
-    ParsedMessage parse(ParsedMessage& text) override {
+    JsonContainer parse(JsonContainer& text) override {
         PYBIND11_OVERRIDE_PURE(
-            ParsedMessage,  // Return type
+            JsonContainer,  // Return type
             ParserBase,  // Parent class
             parse,  // Name of function in C++ (must match Python name)
             text  // Argument(s)
@@ -104,7 +104,7 @@ void init_parsers(py::module_& m) {
                          std::string& delta_text,
                          const std::optional<std::vector<int64_t>>& previous_tokens = std::nullopt,
                          const std::optional<std::vector<int64_t>>& delta_tokens = std::nullopt) {
-            // TODO: optimize conversion between py::dict and ParsedMessage
+            // TODO: optimize conversion between py::dict and JsonContainer
             auto msg_anymap = ov::genai::pybind::utils::py_object_to_any_map(msg);
             auto msg_cpp = JsonContainer(msg_anymap);
 
@@ -136,7 +136,7 @@ void init_parsers(py::module_& m) {
         .def(py::init<>())
         .def("parse",
             &DeepSeekR1ReasoningParser::parse,
-            "Parse is called with the full text. Returns a ParsedMessage with parsed content.",
+            "Parse is called with the full text. Returns a JsonContainer with parsed content.",
             py::arg("msg"), py::arg("previous_text"), py::arg("delta_text"),
             py::arg("previous_tokens") = std::nullopt, py::arg("delta_tokens") = std::nullopt)
         .def_static("get_parser", &DeepSeekR1ReasoningParser::get_parser, py::arg("name"), "Factory method to get parser by name.");
@@ -148,7 +148,7 @@ void init_parsers(py::module_& m) {
             return call_parser(msg, [&self](JsonContainer& m) {return self.parse(m);});
         },
         py::arg("text"),
-        "Parse is called with the full text. Returns a ParsedMessage with parsed content.");
+        "Parse is called with the full text. Returns a JsonContainer with parsed content.");
     
     py::class_<Llama32JsonToolParser, std::shared_ptr<Llama32JsonToolParser>, ParserBase>(m, "Llama32JsonToolParser")
         .def(py::init<>())
@@ -157,7 +157,7 @@ void init_parsers(py::module_& m) {
                 return call_parser(msg, [&self](JsonContainer& m) { return self.parse(m); });
             },
             py::arg("text"),
-            "Parse is called with the full text. Returns a ParsedMessage with parsed content.")
+            "Parse is called with the full text. Returns a JsonContainer with parsed content.")
         .def_static("get_parser", &Llama32JsonToolParser::get_parser, py::arg("name"), "Factory method to get parser by name.");
 
     py::class_<Llama32PythonicToolParser, std::shared_ptr<Llama32PythonicToolParser>, ParserBase>(m, "Llama32PythonicToolParser")
@@ -167,6 +167,6 @@ void init_parsers(py::module_& m) {
                 return call_parser(msg, [&self](JsonContainer& m) { return self.parse(m); });
             },
             py::arg("text"),
-            "Parse is called with the full text. Returns a ParsedMessage with parsed content.")
+            "Parse is called with the full text. Returns a JsonContainer with parsed content.")
         .def_static("get_parser", &Llama32PythonicToolParser::get_parser, py::arg("name"), "Factory method to get parser by name.");
 }
diff --git a/src/python/py_streamers.cpp b/src/python/py_streamers.cpp
index 61dc2d026c..4bfa01223d 100644
--- a/src/python/py_streamers.cpp
+++ b/src/python/py_streamers.cpp
@@ -20,7 +20,7 @@ using ov::genai::StreamingStatus;
 using ov::genai::TextStreamer;
 using ov::genai::TextParserStreamer;
 using ov::genai::IncrementalParserBase;
-using ov::genai::ParsedMessage;
+using ov::genai::JsonContainer;
 using ov::genai::Tokenizer;
 using ov::genai::JsonContainer;
 
@@ -76,14 +76,14 @@ class ConstructableTextParserStreamer: public TextParserStreamer {
 public:
     using TextParserStreamer::TextParserStreamer;  // inherit base constructors
 
-    StreamingStatus write(ParsedMessage& message) override {
+    StreamingStatus write(JsonContainer& message) override {
         py::dict message_py;
         auto json_obj = message.to_json();
         for (auto it = json_obj.begin(); it != json_obj.end(); ++it) {
             message_py[py::cast(it.key())] = py::cast(it.value().get<std::string>());
         }
         
-        // call python implementation which accepts py::dict instead of ParsedMessage
+        // call python implementation which accepts py::dict instead of JsonContainer
         auto res = py::get_override(this, "write")(message_py);
         
         auto msg_anymap = ov::genai::pybind::utils::py_object_to_any_map(message_py);
@@ -166,7 +166,7 @@ void init_streamers(py::module_& m) {
                 return derived->write(message);
             },
             py::arg("message"),
-            "Write is called with a ParsedMessage. Returns StreamingStatus.")
+            "Write is called with a JsonContainer. Returns StreamingStatus.")
         .def("_write",
              py::overload_cast<std::string>(&TextParserStreamer::write),
              py::arg("message"),
diff --git a/tests/cpp/parser.cpp b/tests/cpp/parser.cpp
index dc15ac1482..2068e9308f 100644
--- a/tests/cpp/parser.cpp
+++ b/tests/cpp/parser.cpp
@@ -10,7 +10,7 @@
 using namespace ov::genai;
 
 nlohmann::json run_parser_test(std::shared_ptr<ParserBase> parser, const std::string& prompt) {
-    ParsedMessage input;
+    JsonContainer input;
     input["content"] = prompt;
     return (parser->parse(input)).to_json();
 }
@@ -101,7 +101,7 @@ TEST(ParserTest, test_reasoning_parser_2) {
 class DeepSeekR1ReasoningParserTest : public ::testing::Test {
 protected:
     ov::genai::ReasoningParser parser;
-    ParsedMessage msg;
+    JsonContainer msg;
 };
 
 TEST_F(DeepSeekR1ReasoningParserTest, ReasoningContentAccumulatesAcrossCalls) {
@@ -118,7 +118,7 @@ TEST_F(DeepSeekR1ReasoningParserTest, ReasoningContentAccumulatesAcrossCalls) {
     
     std::string ref_res = "First, I recognize that the question is asking for the sum of 2 and 1.\n\nI know that addition involves combining two numbers to find their total.\n\nStarting with 2, I add 1 to it.\n\n2 plus 1 equals 3.\n";
     
-    ParsedMessage msg;
+    JsonContainer msg;
     
     
     for (int i = 1; i < input_stream.size(); i++) {
diff --git a/tests/python_tests/test_text_streamer.py b/tests/python_tests/test_text_streamer.py
index bc3db5bc97..9b646c6553 100644
--- a/tests/python_tests/test_text_streamer.py
+++ b/tests/python_tests/test_text_streamer.py
@@ -78,7 +78,7 @@ def write(self, token_chunk):
 
     class CurrentParsingStreamer(TextParserStreamer):
         def write(self, word: str):
-            msg: ParsedMessage = get_current_message()
+            msg: JsonContainer = get_current_message()
             
             
     streamer = lambda x: print(x)

From 04064bfe2e602121f72eaf515d77d9514807ca42 Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Fri, 17 Oct 2025 08:59:46 +0200
Subject: [PATCH 10/43] make tests green again, apply copilot comments

---
 .github/workflows/linux.yml                |  2 +-
 .github/workflows/mac.yml                  |  2 +-
 .github/workflows/windows.yml              |  2 +-
 src/cpp/include/openvino/genai/parsers.hpp |  2 --
 src/cpp/src/llm/pipeline.cpp               |  4 +--
 src/cpp/src/parsers.cpp                    | 39 +++++++++++-----------
 src/cpp/src/text_streamer.cpp              |  4 +--
 src/python/py_parsers.cpp                  | 11 +-----
 src/python/py_streamers.cpp                |  3 +-
 tests/cpp/parser.cpp                       |  9 ++---
 tests/python_tests/test_parsers.py         |  2 +-
 tests/python_tests/test_text_streamer.py   |  4 +--
 12 files changed, 36 insertions(+), 48 deletions(-)

diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
index 9500add7c3..7ff8c29af3 100644
--- a/.github/workflows/linux.yml
+++ b/.github/workflows/linux.yml
@@ -782,7 +782,7 @@ jobs:
         run: |
           source ${{ env.INSTALL_DIR }}/setupvars.sh
           chmod +x ${{ env.INSTALL_DIR }}/tests/tests_continuous_batching
-          ${{ env.INSTALL_DIR }}/tests/tests_continuous_batching --gtest_filter="-AddSecondInputTest.*" --gtest_filter="DeepSeekR1ReasoningParserTest.*" --gtest_filter="ParserTest.*"
+          ${{ env.INSTALL_DIR }}/tests/tests_continuous_batching --gtest_filter="-AddSecondInputTest.*"
 
       - name: Test Continuous Batching Tools
         if: ${{ fromJSON(needs.smart_ci.outputs.affected_components).continuous_batching }}
diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml
index d1be431238..5620804095 100644
--- a/.github/workflows/mac.yml
+++ b/.github/workflows/mac.yml
@@ -695,7 +695,7 @@ jobs:
         run: |
           source ${{ env.INSTALL_DIR }}/setupvars.sh
           chmod +x ${{ env.INSTALL_DIR }}/tests/tests_continuous_batching
-          ${{ env.INSTALL_DIR }}/tests/tests_continuous_batching --gtest_filter="-AddSecondInputTest.*"  --gtest_filter="DeepSeekR1ReasoningParserTest.*" --gtest_filter="ParserTest.*
+          ${{ env.INSTALL_DIR }}/tests/tests_continuous_batching --gtest_filter="-AddSecondInputTest.*"
 
       - name: Test C++ Tools
         run: |
diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
index fbd082bede..bb544bc0cf 100644
--- a/.github/workflows/windows.yml
+++ b/.github/workflows/windows.yml
@@ -866,7 +866,7 @@ jobs:
       - name: gtests unit tests
         run: |
           . "${{ env.INSTALL_DIR }}/setupvars.ps1"
-          & "${{ env.INSTALL_DIR }}/tests/tests_continuous_batching.exe" --gtest_filter="-AddSecondInputTest.*"  --gtest_filter="DeepSeekR1ReasoningParserTest.*" --gtest_filter="ParserTest.*
+          & "${{ env.INSTALL_DIR }}/tests/tests_continuous_batching.exe" --gtest_filter="-AddSecondInputTest.*"
 
       - name: Test C++ Tools
         run: |
diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp
index 979d63ef7d..6d2dac8fd0 100644
--- a/src/cpp/include/openvino/genai/parsers.hpp
+++ b/src/cpp/include/openvino/genai/parsers.hpp
@@ -27,7 +27,6 @@ class IncrementalParserBase {
         const std::optional<std::vector<int64_t>>& delta_tokens = std::nullopt
     ) = 0;
 
-    virtual bool is_active() const = 0;
     static std::shared_ptr<IncrementalParserBase> get_parser(std::string name);
 };
 
@@ -48,7 +47,6 @@ class ReasoningParser : public IncrementalParserBase {
         const std::optional<std::vector<int64_t>>& previous_tokens = std::nullopt, 
         const std::optional<std::vector<int64_t>>& delta_tokens = std::nullopt
     ) override;
-    bool is_active() const override;
 };
 
 class DeepSeekR1ReasoningParser : public ReasoningParser {
diff --git a/src/cpp/src/llm/pipeline.cpp b/src/cpp/src/llm/pipeline.cpp
index c2b10208f0..899da4e39f 100644
--- a/src/cpp/src/llm/pipeline.cpp
+++ b/src/cpp/src/llm/pipeline.cpp
@@ -244,10 +244,10 @@ DecodedResults LLMPipeline::generate(
             JsonContainer msg;
             msg["content"] = res.texts[i];
             for (auto& parser: parsers) {
-                // TODO: check if is_active() is needed here
                 // TODO: Check the state of incremental parser and reset if necessary
-                msg = parser->parse(msg);
+                parser->parse(msg);
             }
+            res.parsed[i] = msg;
         }
     }
 
diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp
index 018a70950e..3026f44599 100644
--- a/src/cpp/src/parsers.cpp
+++ b/src/cpp/src/parsers.cpp
@@ -14,6 +14,7 @@ namespace ov::genai {
 class ReasoningParserImpl {
 private:
     bool m_starts_with_thinking = true;
+    bool m_first_run = true;
     bool m_keep_original_content = true;
     bool m_think_tag_opened = false;
     std::string m_open_tag = "<think>";
@@ -35,6 +36,14 @@ class ReasoningParserImpl {
         const std::optional<std::vector<int64_t>>& previous_tokens, 
         const std::optional<std::vector<int64_t>>& delta_tokens
     ) {
+        if (m_deactivated) {
+            return delta_text;
+        }
+        if (m_starts_with_thinking && m_first_run) {
+            m_think_tag_opened = true;
+        }
+        m_first_run = false;
+
         if (!msg.contains("reasoning_content")) {
             msg["reasoning_content"] = "";
         }
@@ -42,10 +51,6 @@ class ReasoningParserImpl {
             msg["content"] = "";
         }
         
-        bool think_tag_closed = delta_text.find(m_close_tag) != std::string::npos;
-        if (m_starts_with_thinking) {
-            m_think_tag_opened = true;
-        }
 
         auto txt_chunk = m_text_cache + delta_text;
         auto reason_str = msg["reasoning_content"].get_string();
@@ -114,7 +119,8 @@ class ReasoningParserImpl {
             for (size_t i = txt_chunk.size(); i >= 1; --i) {
                 // Get the substring of the i last characters of txt_chunk
                 auto suffix = txt_chunk.substr(txt_chunk.size() - i, i);
-                if (m_close_tag.find(suffix) != std::string::npos) {
+                // If this suffix is a prefix of m_close_tag, we need to keep it in the cache.
+                if (m_close_tag.find(suffix) == 0) {
                     num_chars_to_keep = i;
                     break;
                 }
@@ -157,10 +163,6 @@ std::string ReasoningParser::parse(
     return m_impl->parse(msg, previous_text, delta_text, previous_tokens, delta_tokens);
 }
 
-bool ReasoningParser::is_active() const {
-    return !m_impl->m_deactivated;
-}
-
 JsonContainer Llama32PythonicToolParser::parse(JsonContainer& input) {
     // Input example
     // string input = "[get_weather(location='New York, NY', unit='celsius')]<|eom_id|>";
@@ -182,14 +184,14 @@ JsonContainer Llama32PythonicToolParser::parse(JsonContainer& input) {
     size_t pos = call.find('(');
     std::string name = call.substr(0, pos);
     std::string args = call.substr(pos + 1, call.size() - pos - 2); // inside (...)
-
+    
+    
+    JsonContainer kv;
     // Parse arguments of the form key='value'
-    JsonContainer kv = JsonContainer::array();
-
     std::regex arg_re(R"((\w+)\s*=\s*\"([^"]*)\")");
     auto it = std::sregex_iterator(args.begin(), args.end(), arg_re);
     for (; it != std::sregex_iterator(); ++it) {
-        kv.push_back(JsonContainer(ov::AnyMap{{"key", std::string((*it)[1])}, {"value", std::string((*it)[2])}}));
+        kv[std::string((*it)[1])] = std::string((*it)[2]);
     }
     
     input["tool_calls"] = JsonContainer::array();
@@ -198,8 +200,7 @@ JsonContainer Llama32PythonicToolParser::parse(JsonContainer& input) {
     if (!m_keep_original_content) {
         input["content"] = regex_replace(text, r, "");
     }
-
-    return JsonContainer{};
+    return input;
 }
 
 JsonContainer Llama32JsonToolParser::parse(JsonContainer& message) {
@@ -224,9 +225,7 @@ JsonContainer Llama32JsonToolParser::parse(JsonContainer& message) {
 JsonContainer BaseReasoningParser::parse(JsonContainer& input) {
     JsonContainer res;
     std::string reasoning_content;
-    // auto content = input["content"];
     std::string content = input["content"].get_string();
-    res["content"] = content;
 
     size_t start = content.find(m_open_tag);
     size_t end = content.find(m_close_tag);
@@ -235,14 +234,14 @@ JsonContainer BaseReasoningParser::parse(JsonContainer& input) {
         reasoning_content = content.substr(start + m_open_tag.size(), end - (start + m_open_tag.size()));
         if (!m_keep_original_content) {
             // Remove <think>...</think/> from content
-            res["content"] = content.substr(0, start) + content.substr(end + m_close_tag.size());
+            input["content"] = content.substr(0, start) + content.substr(end + m_close_tag.size());
         }
     } else {
         reasoning_content = "";
     }
 
-    res["reasoning_content"] = reasoning_content;
-    return res;
+    input["reasoning_content"] = reasoning_content;
+    return input;
 }
 
 std::map<std::string, std::function<std::shared_ptr<IncrementalParserBase>()>> registered_incremental_parsers;
diff --git a/src/cpp/src/text_streamer.cpp b/src/cpp/src/text_streamer.cpp
index 5ffc34689f..a34b575519 100644
--- a/src/cpp/src/text_streamer.cpp
+++ b/src/cpp/src/text_streamer.cpp
@@ -144,9 +144,7 @@ TextParserStreamer::TextParserStreamer(const Tokenizer& tokenizer, std::vector<P
 
 CallbackTypeVariant TextParserStreamer::write(std::string message) {
     for (auto& parser: m_parsers) {
-        if (parser->is_active()) {
-            message = parser->parse(m_parsed_message, m_text_buffer, message);
-        }
+        message = parser->parse(m_parsed_message, m_text_buffer, message);
         // Message can be modified inside parser, if parser for example extracted tool calling from message content
         // but parser
         m_parsed_message["content"] = m_parsed_message["content"].get_string() + message;
diff --git a/src/python/py_parsers.cpp b/src/python/py_parsers.cpp
index d756f2a942..a3c255d1b6 100644
--- a/src/python/py_parsers.cpp
+++ b/src/python/py_parsers.cpp
@@ -51,14 +51,6 @@ class ConstructableIncrementalParserBase: public IncrementalParserBase {
             delta_tokens
         );
     }
-    
-    bool is_active() const override {
-        PYBIND11_OVERRIDE_PURE(
-            bool,  // Return type
-            IncrementalParserBase,  // Parent class
-            is_active,  // Name of function in C++ (must match Python name)
-        );
-    }
 };
 
 class ConstructableParserBase: public ParserBase {
@@ -120,8 +112,7 @@ void init_parsers(py::module_& m) {
             return res;
         }, py::arg("msg"), py::arg("previous_text"), py::arg("delta_text"),
            py::arg("previous_tokens") = std::nullopt, py::arg("delta_tokens") = std::nullopt,
-           "Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output.")
-        .def("is_active", &IncrementalParserBase::is_active, "Indicates whether the parser is active and should be used during parsing.");
+           "Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output.");
     
     py::class_<Phi4ReasoningParser, std::shared_ptr<Phi4ReasoningParser>, IncrementalParserBase>(m, "Phi4ReasoningParser")
         .def(py::init<bool>(), py::arg("starts_with_thinking") = false)
diff --git a/src/python/py_streamers.cpp b/src/python/py_streamers.cpp
index 4bfa01223d..912e33d4be 100644
--- a/src/python/py_streamers.cpp
+++ b/src/python/py_streamers.cpp
@@ -145,7 +145,8 @@ void init_streamers(py::module_& m) {
                     return self.write(tokens);
                 }
             },
-            py::arg("token"));
+            py::arg("token"))
+            .def("end", &TextStreamer::end);
         
     // TODO: double check/add more relevant docstrings for TextParserStreamer.
     py::class_<TextParserStreamer, ConstructableTextParserStreamer, std::shared_ptr<TextParserStreamer>, TextStreamer>(m, "TextParserStreamer")
diff --git a/tests/cpp/parser.cpp b/tests/cpp/parser.cpp
index 2068e9308f..1e56fef042 100644
--- a/tests/cpp/parser.cpp
+++ b/tests/cpp/parser.cpp
@@ -12,12 +12,13 @@ using namespace ov::genai;
 nlohmann::json run_parser_test(std::shared_ptr<ParserBase> parser, const std::string& prompt) {
     JsonContainer input;
     input["content"] = prompt;
-    return (parser->parse(input)).to_json();
+    parser->parse(input);
+    return input.to_json();
 }
 
 
 TEST(ParserTest, test_llama32_parser_1) {
-    std::string prompt = R"(What's the weather in New York today?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n[get_weather(location='New York, NY', unit='celsius')]<|eom_id|>)";
+    std::string prompt = R"(What's the weather in New York today?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n[get_weather(location="New York, NY", unit="celsius")]<|eom_id|>)";
     nlohmann::json expected;
     
     // By default content should keep original values.
@@ -40,7 +41,7 @@ TEST(ParserTest, test_llama32_parser_1) {
 }
 
 TEST(ParserTest, test_llama32_parser_2) {
-    std::string prompt = R"(What's the weather in New York today?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n[get_weather(location='New York, NY', unit='celsius')]<|eom_id|>)";
+    std::string prompt = R"(What's the weather in New York today?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n[get_weather(location="New York, NY", unit="celsius")]<|eom_id|>)";
     nlohmann::json expected;
     
     // In this test tool calling part will be cut from the content after parsing.
@@ -100,7 +101,7 @@ TEST(ParserTest, test_reasoning_parser_2) {
 
 class DeepSeekR1ReasoningParserTest : public ::testing::Test {
 protected:
-    ov::genai::ReasoningParser parser;
+    ov::genai::DeepSeekR1ReasoningParser parser;
     JsonContainer msg;
 };
 
diff --git a/tests/python_tests/test_parsers.py b/tests/python_tests/test_parsers.py
index f7ced84171..f273f13918 100644
--- a/tests/python_tests/test_parsers.py
+++ b/tests/python_tests/test_parsers.py
@@ -138,7 +138,7 @@ class CustomStreamer(TextParserStreamer):
         def write(self, message):
             if "content" in message:
                 print(message["content"])
-            return True
+            return StreamingStatus.RUNNING
     
     streamer = TextParserStreamer(genai_tokenizer, parsers=["DeepSeekR1ReasoningParser"])
     
diff --git a/tests/python_tests/test_text_streamer.py b/tests/python_tests/test_text_streamer.py
index 9b646c6553..4790ab4b3d 100644
--- a/tests/python_tests/test_text_streamer.py
+++ b/tests/python_tests/test_text_streamer.py
@@ -29,7 +29,7 @@ def chunks(arr: list, n: int):
     Set folder = Application.GetNamespace("Microsoft Office").PackagedInstance.GetFolder("Folder Name")
 'Get all files in the folder
     folder.Files.Clear
-""" 
+"""
 eng_prompts = [
     'What is the previous answer?',
     'Why is the Sun yellow?',
@@ -72,7 +72,7 @@ def test_text_prompts(tmp_path, prompt, model_id):
         streamer.write(token)
     streamer.end()
 
-    class CurrentStremaer(BaseStreamer):
+    class CurrentStreamer(BaseStreamer):
         def write(self, token_chunk):
             pass
 

From ae1930b92c3d793a313e9ebecd4785abca5479b7 Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Fri, 17 Oct 2025 09:32:42 +0200
Subject: [PATCH 11/43] revert sample

---
 samples/cpp/text_generation/CMakeLists.txt    |  1 -
 .../text_generation/parsed_output_sample.cpp  | 52 -------------------
 samples/python/text_generation/chat_sample.py | 33 ------------
 3 files changed, 86 deletions(-)
 delete mode 100644 samples/cpp/text_generation/parsed_output_sample.cpp

diff --git a/samples/cpp/text_generation/CMakeLists.txt b/samples/cpp/text_generation/CMakeLists.txt
index 7493362e81..ebaf32c7f4 100644
--- a/samples/cpp/text_generation/CMakeLists.txt
+++ b/samples/cpp/text_generation/CMakeLists.txt
@@ -29,7 +29,6 @@ set (SAMPLE_LIST
     lora_greedy_causal_lm
     multinomial_causal_lm
     prompt_lookup_decoding_lm
-    parsed_output_sample
     speculative_decoding_lm)
 
 foreach(sample IN LISTS SAMPLE_LIST)
diff --git a/samples/cpp/text_generation/parsed_output_sample.cpp b/samples/cpp/text_generation/parsed_output_sample.cpp
deleted file mode 100644
index dfc2ef964b..0000000000
--- a/samples/cpp/text_generation/parsed_output_sample.cpp
+++ /dev/null
@@ -1,52 +0,0 @@
-// Copyright (C) 2023-2025 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include "openvino/genai/llm_pipeline.hpp"
-#include "openvino/genai/parsers.hpp"
-#include "openvino/genai/text_streamer.hpp"
-
-
-class CurrentStreamer : public ov::genai::TextParserStreamer {
-private:
-public:
-    CurrentStreamer(const ov::genai::Tokenizer& tokenizer)
-        : ov::genai::TextParserStreamer(tokenizer) {}
-    ov::genai::StreamingStatus write(ov::genai::JsonContainer& message) {
-       std::cout << message["content"].get_string() << std::flush;
-        return ov::genai::StreamingStatus::RUNNING;
-    }
-};
-
-
-int main(int argc, char* argv[]) try {
-    if (argc < 2 || argc > 3) {
-        throw std::runtime_error(std::string{"Usage: "} + argv[0] + " <MODEL_DIR> <DEVICE>");
-    }
-    // std::string prompt = "<｜begin▁of▁sentence｜><｜User｜>Please think of a dificcult task to solve x**2 + y**2 = 1<｜Assistant｜><think>";
-    std::string prompt = "<｜begin▁of▁sentence｜><｜User｜>Why is the Sky blue?<｜Assistant｜><think>";
-    std::string models_path = argv[1];
-
-    // Default device is CPU; can be overridden by the second argument
-    std::string device = (argc == 3) ? argv[2] : "CPU";  // GPU, NPU can be used as well
-    ov::genai::LLMPipeline pipe(models_path, device);
-    
-    ov::genai::GenerationConfig config;
-    config.max_new_tokens = 1000;
-
-    auto tok = pipe.get_tokenizer();
-    std::shared_ptr<CurrentStreamer> streamer = std::make_shared<CurrentStreamer>(tok);
-
-    pipe.generate(prompt, config, streamer);
-
-
-} catch (const std::exception& error) {
-    try {
-        std::cerr << error.what() << '\n';
-    } catch (const std::ios_base::failure&) {}
-    return EXIT_FAILURE;
-} catch (...) {
-    try {
-        std::cerr << "Non-exception object thrown\n";
-    } catch (const std::ios_base::failure&) {}
-    return EXIT_FAILURE;
-}
diff --git a/samples/python/text_generation/chat_sample.py b/samples/python/text_generation/chat_sample.py
index b852141d3c..e4067c49f3 100755
--- a/samples/python/text_generation/chat_sample.py
+++ b/samples/python/text_generation/chat_sample.py
@@ -36,36 +36,3 @@ def main():
 
 if '__main__' == __name__:
     main()
-
-    pipe = openvino_genai.LLMPipeline(args.model_dir, device)
-
-    prompt = "What is the weather in New York today?"
-    res = pipe.generate(prompt, max_new_tokens=100, streamer=streamer)
-    print(res.texts[0])
-
-    res.parsed['tool_caling']
-
-    class LlamaToolCallParser(ParserBase):
-        def parse(self, parsed_data: ParsedData) -> ParsedData:
-            # parsed_data 
-            # process parsed_data 
-            # e.g. extract tool calls, or other fields from content
-            return new_parsed_output
-
-    llama_parser = LlamaToolCallParser()
-    res = pipe.generate(prompt, parsers=[llama_parser | "LLama3.2Pythonic"], max_new_tokens=100)
-
-# At the beginning msg['original_content'] is filled with full text
-msg = res.texts[i]
-for parser in m_parsers:
-    msg = parser.parse(msg)
-
-# At the end msg is filled with all parsed fields
-parsed_data = {
-    'original_content': '<|system|>You are a helpful assistant... I will call the `get_weather` function with the location… \n\nfunctools[{"name": "get_weather", "arguments": {"location": "New York", "unit": "celsius"}}]<|end|>',
-    'content': 'blah blah', 
-    'reasoning_content': '', 
-    'tool_calls': "[{\"name\":\"get_weather\",\"arguments\":{\"location\":\"New York, NY\",\"unit\":\"celsius\"}}]",
-}
-
-res.parsed: ParsedData
\ No newline at end of file

From 2772c985752abf3ce20dd0a663c218c09bcdff69 Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Fri, 17 Oct 2025 09:37:12 +0200
Subject: [PATCH 12/43] update pybind11 stubs

---
 src/python/openvino_genai/__init__.pyi        |  9 +-
 .../openvino_genai/py_openvino_genai.pyi      | 89 ++++++++++++++++---
 2 files changed, 86 insertions(+), 12 deletions(-)

diff --git a/src/python/openvino_genai/__init__.pyi b/src/python/openvino_genai/__init__.pyi
index 175df870eb..94846441f5 100644
--- a/src/python/openvino_genai/__init__.pyi
+++ b/src/python/openvino_genai/__init__.pyi
@@ -14,6 +14,7 @@ from openvino_genai.py_openvino_genai import ChunkStreamerBase
 from openvino_genai.py_openvino_genai import ContinuousBatchingPipeline
 from openvino_genai.py_openvino_genai import CppStdGenerator
 from openvino_genai.py_openvino_genai import DecodedResults
+from openvino_genai.py_openvino_genai import DeepSeekR1ReasoningParser
 from openvino_genai.py_openvino_genai import EncodedResults
 from openvino_genai.py_openvino_genai import FluxTransformer2DModel
 from openvino_genai.py_openvino_genai import GenerationConfig
@@ -24,11 +25,16 @@ from openvino_genai.py_openvino_genai import Generator
 from openvino_genai.py_openvino_genai import Image2ImagePipeline
 from openvino_genai.py_openvino_genai import ImageGenerationConfig
 from openvino_genai.py_openvino_genai import ImageGenerationPerfMetrics
+from openvino_genai.py_openvino_genai import IncrementalParserBase
 from openvino_genai.py_openvino_genai import InpaintingPipeline
 from openvino_genai.py_openvino_genai import KVCrushAnchorPointMode
 from openvino_genai.py_openvino_genai import KVCrushConfig
 from openvino_genai.py_openvino_genai import LLMPipeline
+from openvino_genai.py_openvino_genai import Llama32JsonToolParser
+from openvino_genai.py_openvino_genai import Llama32PythonicToolParser
+from openvino_genai.py_openvino_genai import ParserBase
 from openvino_genai.py_openvino_genai import PerfMetrics
+from openvino_genai.py_openvino_genai import Phi4ReasoningParser
 from openvino_genai.py_openvino_genai import RawImageGenerationPerfMetrics
 from openvino_genai.py_openvino_genai import RawPerfMetrics
 from openvino_genai.py_openvino_genai import SD3Transformer2DModel
@@ -49,6 +55,7 @@ from openvino_genai.py_openvino_genai import Text2ImagePipeline
 from openvino_genai.py_openvino_genai import Text2SpeechDecodedResults
 from openvino_genai.py_openvino_genai import Text2SpeechPipeline
 from openvino_genai.py_openvino_genai import TextEmbeddingPipeline
+from openvino_genai.py_openvino_genai import TextParserStreamer
 from openvino_genai.py_openvino_genai import TextRerankPipeline
 from openvino_genai.py_openvino_genai import TextStreamer
 from openvino_genai.py_openvino_genai import TokenizedInputs
@@ -64,5 +71,5 @@ from openvino_genai.py_openvino_genai import draft_model
 from openvino_genai.py_openvino_genai import get_version
 import os as os
 from . import py_openvino_genai
-__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'PerfMetrics', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai']
+__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningParser', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParserBase', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama32JsonToolParser', 'Llama32PythonicToolParser', 'ParserBase', 'PerfMetrics', 'Phi4ReasoningParser', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai']
 __version__: str
diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi
index 82c0721e9a..3c4ade4515 100644
--- a/src/python/openvino_genai/py_openvino_genai.pyi
+++ b/src/python/openvino_genai/py_openvino_genai.pyi
@@ -5,7 +5,7 @@ from __future__ import annotations
 import collections.abc
 import openvino._pyopenvino
 import typing
-__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedGenerationResult', 'EncodedResults', 'ExtendedPerfMetrics', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParserBase', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'MeanStdPair', 'ParserBase', 'PerfMetrics', 'PipelineMetrics', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'SDPerModelsPerfMetrics', 'SDPerfMetrics', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'SummaryStats', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version']
+__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningParser', 'EncodedGenerationResult', 'EncodedResults', 'ExtendedPerfMetrics', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParserBase', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama32JsonToolParser', 'Llama32PythonicToolParser', 'MeanStdPair', 'ParserBase', 'PerfMetrics', 'Phi4ReasoningParser', 'PipelineMetrics', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'SDPerModelsPerfMetrics', 'SDPerfMetrics', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'SummaryStats', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version']
 class Adapter:
     """
     Immutable LoRA Adapter that carries the adaptation matrices and serves as unique adapter identifier.
@@ -493,6 +493,9 @@ class DecodedResults:
     def extended_perf_metrics(self) -> ExtendedPerfMetrics:
         ...
     @property
+    def parsed(self) -> list[...]:
+        ...
+    @property
     def perf_metrics(self) -> PerfMetrics:
         ...
     @property
@@ -501,6 +504,18 @@ class DecodedResults:
     @property
     def texts(self) -> list[str]:
         ...
+class DeepSeekR1ReasoningParser(IncrementalParserBase):
+    @staticmethod
+    def get_parser(name: str) -> IncrementalParserBase:
+        """
+        Factory method to get parser by name.
+        """
+    def __init__(self) -> None:
+        ...
+    def parse(self, msg: ..., previous_text: str, delta_text: str, previous_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None, delta_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None) -> str:
+        """
+        Parse is called with the full text. Returns a JsonContainer with parsed content.
+        """
 class EncodedGenerationResult:
     """
     
@@ -899,6 +914,12 @@ class GenerationConfig:
     def num_return_sequences(self, arg0: typing.SupportsInt) -> None:
         ...
     @property
+    def parsers(self) -> list[str | openvino_genai.py_openvino_genai.ParserBase]:
+        ...
+    @parsers.setter
+    def parsers(self, arg0: collections.abc.Sequence[str | openvino_genai.py_openvino_genai.ParserBase]) -> None:
+        ...
+    @property
     def presence_penalty(self) -> float:
         ...
     @presence_penalty.setter
@@ -1377,13 +1398,9 @@ class ImageGenerationPerfMetrics:
 class IncrementalParserBase:
     def __init__(self) -> None:
         ...
-    def is_active(self) -> bool:
-        """
-        Indicates whether the parser is active and should be used during parsing.
-        """
-    def parse(self, msg: collections.abc.Mapping[str, str], previous_text: str, delta_text: str, previous_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None, delta_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None) -> dict[str, str]:
+    def parse(self, msg: dict, previous_text: str, delta_text: str, previous_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None, delta_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None) -> str:
         """
-        Parse is called every time new text delta is decoded. Returns a JsonContainer with parsed content.
+        Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output.
         """
 class InpaintingPipeline:
     """
@@ -1741,6 +1758,30 @@ class LLMPipeline:
         ...
     def start_chat(self, system_message: str = '') -> None:
         ...
+class Llama32JsonToolParser(ParserBase):
+    @staticmethod
+    def get_parser(name: str) -> ParserBase:
+        """
+        Factory method to get parser by name.
+        """
+    def __init__(self) -> None:
+        ...
+    def parse(self, text: dict) -> None:
+        """
+        Parse is called with the full text. Returns a JsonContainer with parsed content.
+        """
+class Llama32PythonicToolParser(ParserBase):
+    @staticmethod
+    def get_parser(name: str) -> ParserBase:
+        """
+        Factory method to get parser by name.
+        """
+    def __init__(self) -> None:
+        ...
+    def parse(self, text: dict) -> None:
+        """
+        Parse is called with the full text. Returns a JsonContainer with parsed content.
+        """
 class MeanStdPair:
     def __init__(self) -> None:
         ...
@@ -1755,7 +1796,7 @@ class MeanStdPair:
 class ParserBase:
     def __init__(self) -> None:
         ...
-    def parse(self, text: collections.abc.Mapping[str, str]) -> dict[str, str]:
+    def parse(self, text: dict) -> None:
         """
         Parse is called with the full text. Returns a JsonContainer with parsed content.
         """
@@ -1861,6 +1902,18 @@ class PerfMetrics:
     @property
     def raw_metrics(self) -> RawPerfMetrics:
         ...
+class Phi4ReasoningParser(IncrementalParserBase):
+    @staticmethod
+    def get_parser(name: str) -> IncrementalParserBase:
+        """
+        Factory method to get parser by name.
+        """
+    def __init__(self, starts_with_thinking: bool = False) -> None:
+        ...
+    def parse(self, msg: ..., previous_text: str, delta_text: str, previous_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None, delta_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None) -> str:
+        """
+        Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output.
+        """
 class PipelineMetrics:
     """
     
@@ -3151,12 +3204,24 @@ class TextEmbeddingPipeline:
         """
         Waits computed embeddings for a query
         """
-class TextParserStreamer:
-    def __init__(self, tokenizer: Tokenizer, parsers: collections.abc.Sequence[...] = []) -> None:
+class TextParserStreamer(TextStreamer):
+    def __init__(self, tokenizer: Tokenizer, parsers: collections.abc.Sequence[openvino_genai.py_openvino_genai.IncrementalParserBase | str] = []) -> None:
         """
         TextParserStreamer is used to decode tokens into text, parse the text and call user-defined incremental parsers.
         """
-    def write(self, message: collections.abc.Mapping[str, str]) -> StreamingStatus:
+    def _write(self, message: str) -> bool | openvino_genai.py_openvino_genai.StreamingStatus:
+        """
+        Write is called with a string message. Returns CallbackTypeVariant. This is a private method.
+        """
+    def get_parsed_message(self) -> ...:
+        """
+        Get the current parsed message
+        """
+    def get_parsers(self) -> list[IncrementalParserBase]:
+        """
+        Get the list of parsers
+        """
+    def write(self, message: dict) -> StreamingStatus:
         """
         Write is called with a JsonContainer. Returns StreamingStatus.
         """
@@ -3223,6 +3288,8 @@ class TextStreamer(StreamerBase):
     """
     def __init__(self, tokenizer: Tokenizer, callback: collections.abc.Callable[[str], bool | openvino_genai.py_openvino_genai.StreamingStatus], detokenization_params: collections.abc.Mapping[str, typing.Any] = {}) -> None:
         ...
+    def end(self) -> None:
+        ...
     def write(self, token: typing.SupportsInt | collections.abc.Sequence[typing.SupportsInt]) -> StreamingStatus:
         ...
 class TokenizedInputs:

From 94b8370dc0906f1ae84aa36960bd98f179045716 Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Fri, 17 Oct 2025 10:44:16 +0200
Subject: [PATCH 13/43] update stubs

---
 .../openvino_genai/py_openvino_genai.pyi      | 18 ++---
 src/python/py_openvino_genai.cpp              | 13 +++-
 src/python/py_parsers.cpp                     | 77 +++++++++++++++++--
 src/python/py_streamers.cpp                   | 15 +++-
 4 files changed, 102 insertions(+), 21 deletions(-)

diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi
index 3c4ade4515..451cd720db 100644
--- a/src/python/openvino_genai/py_openvino_genai.pyi
+++ b/src/python/openvino_genai/py_openvino_genai.pyi
@@ -493,7 +493,7 @@ class DecodedResults:
     def extended_perf_metrics(self) -> ExtendedPerfMetrics:
         ...
     @property
-    def parsed(self) -> list[...]:
+    def parsed(self) -> dict:
         ...
     @property
     def perf_metrics(self) -> PerfMetrics:
@@ -512,9 +512,9 @@ class DeepSeekR1ReasoningParser(IncrementalParserBase):
         """
     def __init__(self) -> None:
         ...
-    def parse(self, msg: ..., previous_text: str, delta_text: str, previous_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None, delta_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None) -> str:
+    def parse(self, msg: dict, previous_text: str, delta_text: str, previous_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None, delta_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None) -> str:
         """
-        Parse is called with the full text. Returns a JsonContainer with parsed content.
+        Parse is called with the full text. Returns a dict with parsed content.
         """
 class EncodedGenerationResult:
     """
@@ -1768,7 +1768,7 @@ class Llama32JsonToolParser(ParserBase):
         ...
     def parse(self, text: dict) -> None:
         """
-        Parse is called with the full text. Returns a JsonContainer with parsed content.
+        Parse is called with the full text. Returns a dict with parsed content.
         """
 class Llama32PythonicToolParser(ParserBase):
     @staticmethod
@@ -1780,7 +1780,7 @@ class Llama32PythonicToolParser(ParserBase):
         ...
     def parse(self, text: dict) -> None:
         """
-        Parse is called with the full text. Returns a JsonContainer with parsed content.
+        Parse is called with the full text. Returns a dict with parsed content.
         """
 class MeanStdPair:
     def __init__(self) -> None:
@@ -1798,7 +1798,7 @@ class ParserBase:
         ...
     def parse(self, text: dict) -> None:
         """
-        Parse is called with the full text. Returns a JsonContainer with parsed content.
+        Parse is called with the full text. Returns a dict with parsed content.
         """
 class PerfMetrics:
     """
@@ -1910,7 +1910,7 @@ class Phi4ReasoningParser(IncrementalParserBase):
         """
     def __init__(self, starts_with_thinking: bool = False) -> None:
         ...
-    def parse(self, msg: ..., previous_text: str, delta_text: str, previous_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None, delta_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None) -> str:
+    def parse(self, msg: dict, previous_text: str, delta_text: str, previous_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None, delta_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None) -> str:
         """
         Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output.
         """
@@ -3213,7 +3213,7 @@ class TextParserStreamer(TextStreamer):
         """
         Write is called with a string message. Returns CallbackTypeVariant. This is a private method.
         """
-    def get_parsed_message(self) -> ...:
+    def get_parsed_message(self) -> dict:
         """
         Get the current parsed message
         """
@@ -3223,7 +3223,7 @@ class TextParserStreamer(TextStreamer):
         """
     def write(self, message: dict) -> StreamingStatus:
         """
-        Write is called with a JsonContainer. Returns StreamingStatus.
+        Write is called with a dict. Returns StreamingStatus.
         """
 class TextRerankPipeline:
     """
diff --git a/src/python/py_openvino_genai.cpp b/src/python/py_openvino_genai.cpp
index aa2854480c..586b6b2d1f 100644
--- a/src/python/py_openvino_genai.cpp
+++ b/src/python/py_openvino_genai.cpp
@@ -93,7 +93,18 @@ PYBIND11_MODULE(py_openvino_genai, m) {
         .def(py::init<>())
         .def_property_readonly("texts", [](const DecodedResults &dr) -> py::typing::List<py::str> { return pyutils::handle_utf8((std::vector<std::string>)dr); })
         .def_readonly("scores", &DecodedResults::scores)
-        .def_readonly("parsed", &DecodedResults::parsed)
+        .def_property_readonly("parsed", [](const DecodedResults& dr) -> py::dict {
+            static py::object json_mod = py::module_::import("json");
+            py::list result_dicts;
+            
+            for (const auto& parsed: dr.parsed) {
+                auto json_str =  parsed.to_json_string();
+                py::dict json_dict = json_mod.attr("loads")(json_str);
+
+                result_dicts.append(json_dict);
+            }
+            return result_dicts;
+        })
         .def_readonly("perf_metrics", &DecodedResults::perf_metrics)
         .def_readonly("extended_perf_metrics", &DecodedResults::extended_perf_metrics)
         .def("__str__", [](const DecodedResults &dr) -> py::str {
diff --git a/src/python/py_parsers.cpp b/src/python/py_parsers.cpp
index a3c255d1b6..cb19fb503c 100644
--- a/src/python/py_parsers.cpp
+++ b/src/python/py_parsers.cpp
@@ -13,18 +13,17 @@
 
 namespace py = pybind11;
 
-using ov::genai::JsonContainer;
 using ov::genai::IncrementalParserBase;
 using ov::genai::ParserVariant;
 using ov::genai::ParserBase;
 using ov::genai::ReasoningParser;
 using ov::genai::Phi4ReasoningParser;
 using ov::genai::DeepSeekR1ReasoningParser;
+using ov::genai::JsonContainer;
 using ov::genai::Llama32JsonToolParser;
 using ov::genai::Llama32PythonicToolParser;
 using ov::genai::Tokenizer;
 using ov::genai::StreamingStatus;
-using ov::genai::JsonContainer;
 
 namespace pyutils = ov::genai::pybind::utils;
 
@@ -84,6 +83,32 @@ void call_parser(py::dict& msg, std::function<JsonContainer(JsonContainer&)> fun
     }
 }
 
+// wrapper to enhance calling incremental parser from Python
+std::string call_incremental_parser(
+    IncrementalParserBase& parser,
+    py::dict& msg,
+    const std::string& previous_text,
+    std::string& delta_text,
+    const std::optional<std::vector<int64_t>>& previous_tokens,
+    const std::optional<std::vector<int64_t>>& delta_tokens,
+    std::function<std::string(JsonContainer&, const std::string&, std::string&, const std::optional<std::vector<int64_t>>&,
+                               const std::optional<std::vector<int64_t>>&)> func) {
+    auto msg_anymap = ov::genai::pybind::utils::py_object_to_any_map(msg);
+    auto msg_cpp = JsonContainer(msg_anymap);
+
+    auto res = func(msg_cpp, previous_text, delta_text, previous_tokens, delta_tokens);
+
+    auto json_str = msg_cpp.to_json_string();
+    py::dict result = json_mod.attr("loads")(json_str);
+    
+    // update msg with result
+    msg.clear();
+    for (auto item : result) {
+        msg[item.first] = item.second;
+    }
+    return res;   
+}
+
 } // namespace
 
 // TODO: double check/add more relevant docstrings for parsers.
@@ -117,7 +142,25 @@ void init_parsers(py::module_& m) {
     py::class_<Phi4ReasoningParser, std::shared_ptr<Phi4ReasoningParser>, IncrementalParserBase>(m, "Phi4ReasoningParser")
         .def(py::init<bool>(), py::arg("starts_with_thinking") = false)
         .def("parse",
-            &Phi4ReasoningParser::parse,
+            [](Phi4ReasoningParser& self,
+               py::dict& msg,
+               const std::string& previous_text,
+               std::string& delta_text,
+               const std::optional<std::vector<int64_t>>& previous_tokens = std::nullopt,
+               const std::optional<std::vector<int64_t>>& delta_tokens = std::nullopt) {
+                return call_incremental_parser(
+                    self,
+                    msg,
+                    previous_text,
+                    delta_text,
+                    previous_tokens,
+                    delta_tokens,
+                    [&self](JsonContainer& m, const std::string& prev_text, std::string& delta_t,
+                            const std::optional<std::vector<int64_t>>& prev_tokens,
+                            const std::optional<std::vector<int64_t>>& delta_toks) {
+                        return self.parse(m, prev_text, delta_t, prev_tokens, delta_toks);
+                    });
+            },
             "Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output.",
             py::arg("msg"), py::arg("previous_text"), py::arg("delta_text"),
             py::arg("previous_tokens") = std::nullopt, py::arg("delta_tokens") = std::nullopt)
@@ -126,8 +169,26 @@ void init_parsers(py::module_& m) {
     py::class_<DeepSeekR1ReasoningParser, std::shared_ptr<DeepSeekR1ReasoningParser>, IncrementalParserBase>(m, "DeepSeekR1ReasoningParser")
         .def(py::init<>())
         .def("parse",
-            &DeepSeekR1ReasoningParser::parse,
-            "Parse is called with the full text. Returns a JsonContainer with parsed content.",
+            [](DeepSeekR1ReasoningParser& self,
+               py::dict& msg,
+               const std::string& previous_text,
+               std::string& delta_text,
+               const std::optional<std::vector<int64_t>>& previous_tokens = std::nullopt,
+               const std::optional<std::vector<int64_t>>& delta_tokens = std::nullopt) {
+                return call_incremental_parser(
+                    self,
+                    msg,
+                    previous_text,
+                    delta_text,
+                    previous_tokens,
+                    delta_tokens,
+                    [&self](JsonContainer& m, const std::string& prev_text, std::string& delta_t,
+                            const std::optional<std::vector<int64_t>>& prev_tokens,
+                            const std::optional<std::vector<int64_t>>& delta_toks) {
+                        return self.parse(m, prev_text, delta_t, prev_tokens, delta_toks);
+                    });
+            },
+            "Parse is called with the full text. Returns a dict with parsed content.",
             py::arg("msg"), py::arg("previous_text"), py::arg("delta_text"),
             py::arg("previous_tokens") = std::nullopt, py::arg("delta_tokens") = std::nullopt)
         .def_static("get_parser", &DeepSeekR1ReasoningParser::get_parser, py::arg("name"), "Factory method to get parser by name.");
@@ -139,7 +200,7 @@ void init_parsers(py::module_& m) {
             return call_parser(msg, [&self](JsonContainer& m) {return self.parse(m);});
         },
         py::arg("text"),
-        "Parse is called with the full text. Returns a JsonContainer with parsed content.");
+        "Parse is called with the full text. Returns a dict with parsed content.");
     
     py::class_<Llama32JsonToolParser, std::shared_ptr<Llama32JsonToolParser>, ParserBase>(m, "Llama32JsonToolParser")
         .def(py::init<>())
@@ -148,7 +209,7 @@ void init_parsers(py::module_& m) {
                 return call_parser(msg, [&self](JsonContainer& m) { return self.parse(m); });
             },
             py::arg("text"),
-            "Parse is called with the full text. Returns a JsonContainer with parsed content.")
+            "Parse is called with the full text. Returns a dict with parsed content.")
         .def_static("get_parser", &Llama32JsonToolParser::get_parser, py::arg("name"), "Factory method to get parser by name.");
 
     py::class_<Llama32PythonicToolParser, std::shared_ptr<Llama32PythonicToolParser>, ParserBase>(m, "Llama32PythonicToolParser")
@@ -158,6 +219,6 @@ void init_parsers(py::module_& m) {
                 return call_parser(msg, [&self](JsonContainer& m) { return self.parse(m); });
             },
             py::arg("text"),
-            "Parse is called with the full text. Returns a JsonContainer with parsed content.")
+            "Parse is called with the full text. Returns a dict with parsed content.")
         .def_static("get_parser", &Llama32PythonicToolParser::get_parser, py::arg("name"), "Factory method to get parser by name.");
 }
diff --git a/src/python/py_streamers.cpp b/src/python/py_streamers.cpp
index 912e33d4be..75b0fdf9ad 100644
--- a/src/python/py_streamers.cpp
+++ b/src/python/py_streamers.cpp
@@ -22,7 +22,6 @@ using ov::genai::TextParserStreamer;
 using ov::genai::IncrementalParserBase;
 using ov::genai::JsonContainer;
 using ov::genai::Tokenizer;
-using ov::genai::JsonContainer;
 
 namespace pyutils = ov::genai::pybind::utils;
 
@@ -167,13 +166,23 @@ void init_streamers(py::module_& m) {
                 return derived->write(message);
             },
             py::arg("message"),
-            "Write is called with a JsonContainer. Returns StreamingStatus.")
+            "Write is called with a dict. Returns StreamingStatus.")
         .def("_write",
              py::overload_cast<std::string>(&TextParserStreamer::write),
              py::arg("message"),
              "Write is called with a string message. Returns CallbackTypeVariant. This is a private method.")
         
-        .def("get_parsed_message", &TextParserStreamer::get_parsed_message, "Get the current parsed message")
+        .def("get_parsed_message", 
+            [](TextParserStreamer& self) {
+                static py::object json_mod = py::module_::import("json");
+                
+                auto res = self.get_parsed_message();
+                auto json_str =  res.to_json_string();
+                py::dict json_dict = json_mod.attr("loads")(json_str);
+
+                return json_dict;
+                
+            }, "Get the current parsed message")
 
         .def("get_parsers", &TextParserStreamer::get_parsers, "Get the list of parsers");
 }

From 7759b48095a3193675984907eff63f3401bbf89d Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Fri, 17 Oct 2025 11:44:41 +0200
Subject: [PATCH 14/43] replace starts_with_thinking -> starts_with_thinking;
 some other corrections

---
 src/cpp/include/openvino/genai/parsers.hpp    | 36 +++-----
 .../include/openvino/genai/text_streamer.hpp  |  2 +-
 src/cpp/src/parsers.cpp                       | 83 ++++++++++++-------
 src/cpp/src/text_streamer.cpp                 |  2 +-
 .../openvino_genai/py_openvino_genai.pyi      |  2 +-
 src/python/py_parsers.cpp                     |  3 +-
 6 files changed, 69 insertions(+), 59 deletions(-)

diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp
index 6d2dac8fd0..320302aa0d 100644
--- a/src/cpp/include/openvino/genai/parsers.hpp
+++ b/src/cpp/include/openvino/genai/parsers.hpp
@@ -30,14 +30,12 @@ class IncrementalParserBase {
     static std::shared_ptr<IncrementalParserBase> get_parser(std::string name);
 };
 
-// Forward declaration
-class ReasoningParserImpl;
-
 class ReasoningParser : public IncrementalParserBase {
 private:
+    class ReasoningParserImpl;
     std::shared_ptr<ReasoningParserImpl> m_impl;
 public:
-    ReasoningParser(bool starts_with_thinking = true,
+    ReasoningParser(bool expect_open_tag = true,
                     bool keep_original_content = true);
 
     std::string parse(
@@ -51,67 +49,55 @@ class ReasoningParser : public IncrementalParserBase {
 
 class DeepSeekR1ReasoningParser : public ReasoningParser {
 public:
-    DeepSeekR1ReasoningParser(bool starts_with_thinking = true) : ReasoningParser(starts_with_thinking) {};
+    explicit DeepSeekR1ReasoningParser(bool expect_open_tag = true) : ReasoningParser(expect_open_tag) {};
     static std::string name() { return "DeepSeekR1ReasoningParser"; }
 };
 
 class Phi4ReasoningParser : public ReasoningParser {
 public:
-    Phi4ReasoningParser(bool starts_with_thinking = false) : ReasoningParser(starts_with_thinking) {};
+    explicit Phi4ReasoningParser(bool expect_open_tag = false) : ReasoningParser(expect_open_tag) {};
     static std::string name() { return "Phi4ReasoningParser"; }
 };
 
 class ParserBase {
 public:
     ParserBase() = default;
-
     virtual JsonContainer parse(JsonContainer& text) = 0;
     static std::shared_ptr<ParserBase> get_parser(std::string name);
 };
 
-using ParserVariant = std::variant<std::shared_ptr<IncrementalParserBase>, std::string>;
-
 class Llama32PythonicToolParser : public ParserBase {
 // Does not modify original content, only extracts and adds tool calls
 public:
     // TODO: Check that vLLM has the same default.
-    Llama32PythonicToolParser(bool keep_original_content = true) : m_keep_original_content(keep_original_content) {}
+    explicit Llama32PythonicToolParser(bool keep_original_content = true) : m_keep_original_content(keep_original_content) {}
 
     JsonContainer parse(JsonContainer& input) override;
     static std::string name() { return "Llama32PythonicToolParser"; }
 private:
-    bool m_keep_original_content = true;
+    bool m_keep_original_content;
 };
 
 class Llama32JsonToolParser : public ParserBase {
 // Does not modify original content, only extracts and adds tool calls
 public:
     // TODO: Check that vLLM has the same default.
-    Llama32JsonToolParser(bool keep_original_content = true) : m_keep_original_content(keep_original_content) {}
+    explicit Llama32JsonToolParser(bool keep_original_content = true) : m_keep_original_content(keep_original_content) {}
 
     JsonContainer parse(JsonContainer& input) override;
     static std::string name() { return "Llama32JsonToolParser"; }
 private:
-    bool m_keep_original_content = true;
+    bool m_keep_original_content;
 };
 
 class BaseReasoningParser : public ParserBase{
 public:
-    BaseReasoningParser(bool expect_open_tag = true, bool keep_original_content = true, std::string open_tag = "<think>", std::string close_tag = "</think>") :
-    m_expect_open_tag(expect_open_tag), 
-    m_keep_original_content(keep_original_content),
-    m_open_tag(open_tag), 
-    m_close_tag(close_tag) {}
-
+    BaseReasoningParser(bool expect_open_tag = true, bool keep_original_content = true, std::string open_tag = "<think>", std::string close_tag = "</think>");
     JsonContainer parse(JsonContainer& input) override;
-
 private:
-    bool m_expect_open_tag = true;
-    bool m_keep_original_content = true;
-    std::string m_open_tag = "<think>";
-    std::string m_close_tag = "</think>";
+    class BaseReasoningParserImpl;
+    std::shared_ptr<BaseReasoningParserImpl> m_impl;
 };
 
-
 }  // namespace genai
 }  // namespace ov
diff --git a/src/cpp/include/openvino/genai/text_streamer.hpp b/src/cpp/include/openvino/genai/text_streamer.hpp
index 4ac5758b73..43f383ca83 100644
--- a/src/cpp/include/openvino/genai/text_streamer.hpp
+++ b/src/cpp/include/openvino/genai/text_streamer.hpp
@@ -49,7 +49,7 @@ class OPENVINO_GENAI_EXPORTS TextStreamer : public StreamerBase {
 
 class TextParserStreamer : public TextStreamer {
 public:
-    TextParserStreamer(const Tokenizer& tokenizer, std::vector<ParserVariant> parsers = {});
+    TextParserStreamer(const Tokenizer& tokenizer, std::vector<std::variant<std::shared_ptr<IncrementalParserBase>, std::string>> parsers = {});
 
     virtual StreamingStatus write(JsonContainer& message) = 0;
 
diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp
index 3026f44599..94abed8d5f 100644
--- a/src/cpp/src/parsers.cpp
+++ b/src/cpp/src/parsers.cpp
@@ -11,11 +11,11 @@ using json = nlohmann::json;
 
 namespace ov::genai {
 
-class ReasoningParserImpl {
+class ReasoningParser::ReasoningParserImpl {
 private:
-    bool m_starts_with_thinking = true;
+    bool m_expect_open_tag = true;
     bool m_first_run = true;
-    bool m_keep_original_content = true;
+    bool m_keep_original_content;
     bool m_think_tag_opened = false;
     std::string m_open_tag = "<think>";
     std::string m_close_tag = "</think>";
@@ -24,9 +24,9 @@ class ReasoningParserImpl {
 public:
     bool m_deactivated = false;
     ReasoningParserImpl() = default;
-    ReasoningParserImpl(bool starts_with_thinking = true,
+    ReasoningParserImpl(bool expect_open_tag = true,
                     bool keep_original_content = true)
-        : m_starts_with_thinking(starts_with_thinking),
+        : m_expect_open_tag(expect_open_tag),
           m_keep_original_content(keep_original_content) {}
 
     std::string parse(
@@ -39,7 +39,7 @@ class ReasoningParserImpl {
         if (m_deactivated) {
             return delta_text;
         }
-        if (m_starts_with_thinking && m_first_run) {
+        if (m_expect_open_tag && m_first_run) {
             m_think_tag_opened = true;
         }
         m_first_run = false;
@@ -56,7 +56,7 @@ class ReasoningParserImpl {
         auto reason_str = msg["reasoning_content"].get_string();
         auto content_str = msg["content"].get_string();
 
-        if (!m_think_tag_opened && txt_chunk.find(m_open_tag) != std::string::npos && !m_starts_with_thinking) {
+        if (!m_think_tag_opened && txt_chunk.find(m_open_tag) != std::string::npos && !m_expect_open_tag) {
             OPENVINO_ASSERT(m_open_tag.find(m_text_cache) != std::string::npos, "m_text_cache should be a prefix of m_open_tag");
             
             // Thinking has started
@@ -149,8 +149,8 @@ class ReasoningParserImpl {
     }
 };
 
-ReasoningParser::ReasoningParser(bool starts_with_thinking, bool keep_original_content) {
-    m_impl = std::make_shared<ReasoningParserImpl>(starts_with_thinking, keep_original_content);
+ReasoningParser::ReasoningParser(bool expect_open_tag, bool keep_original_content) {
+    m_impl = std::make_shared<ReasoningParserImpl>(expect_open_tag, keep_original_content);
 }
 
 std::string ReasoningParser::parse(
@@ -222,26 +222,51 @@ JsonContainer Llama32JsonToolParser::parse(JsonContainer& message) {
     return message;
 }
 
-JsonContainer BaseReasoningParser::parse(JsonContainer& input) {
-    JsonContainer res;
-    std::string reasoning_content;
-    std::string content = input["content"].get_string();
-
-    size_t start = content.find(m_open_tag);
-    size_t end = content.find(m_close_tag);
-
-    if (start != std::string::npos && end != std::string::npos && end > start) {
-        reasoning_content = content.substr(start + m_open_tag.size(), end - (start + m_open_tag.size()));
-        if (!m_keep_original_content) {
-            // Remove <think>...</think/> from content
-            input["content"] = content.substr(0, start) + content.substr(end + m_close_tag.size());
+class BaseReasoningParser::BaseReasoningParserImpl {
+public:
+    BaseReasoningParserImpl(bool expect_open_tag,
+                            bool keep_original_content,
+                            std::string open_tag,
+                            std::string close_tag):
+    m_expect_open_tag(expect_open_tag),
+    m_keep_original_content(keep_original_content),
+    m_open_tag(open_tag),
+    m_close_tag(close_tag) {};
+
+    JsonContainer parse(JsonContainer& input) {
+        JsonContainer res;
+        std::string reasoning_content;
+        std::string content = input["content"].get_string();
+
+        size_t start = content.find(m_open_tag);
+        size_t end = content.find(m_close_tag);
+
+        if (start != std::string::npos && end != std::string::npos && end > start) {
+            reasoning_content = content.substr(start + m_open_tag.size(), end - (start + m_open_tag.size()));
+            if (!m_keep_original_content) {
+                // Remove <think>...</think/> from content
+                input["content"] = content.substr(0, start) + content.substr(end + m_close_tag.size());
+            }
+        } else {
+            reasoning_content = "";
         }
-    } else {
-        reasoning_content = "";
+
+        input["reasoning_content"] = reasoning_content;
+        return input;
     }
+private:
+    bool m_expect_open_tag;
+    bool m_keep_original_content;
+    std::string m_open_tag;
+    std::string m_close_tag;
+};
 
-    input["reasoning_content"] = reasoning_content;
-    return input;
+BaseReasoningParser::BaseReasoningParser(bool expect_open_tag, bool keep_original_content, std::string open_tag, std::string close_tag) {
+    m_impl = std::make_shared<BaseReasoningParserImpl>(expect_open_tag, keep_original_content, open_tag, close_tag);
+}
+
+JsonContainer BaseReasoningParser::parse(JsonContainer& input) {
+    return m_impl->parse(input);
 }
 
 std::map<std::string, std::function<std::shared_ptr<IncrementalParserBase>()>> registered_incremental_parsers;
@@ -249,9 +274,9 @@ std::map<std::string, std::function<std::shared_ptr<ParserBase>()>> registered_b
 
 // static initializer to register available buildin parsers
 static bool register_backends() {
-    registered_incremental_parsers[DeepSeekR1ReasoningParser::name()] = []() { return std::make_shared<DeepSeekR1ReasoningParser>(/*starts_with_thinking*/ true); };
-    registered_incremental_parsers[Phi4ReasoningParser::name()] = []() { return std::make_shared<Phi4ReasoningParser>(/*starts_with_thinking*/ false); };
-    
+    registered_incremental_parsers[DeepSeekR1ReasoningParser::name()] = []() { return std::make_shared<DeepSeekR1ReasoningParser>(/*expect_open_tag*/ true); };
+    registered_incremental_parsers[Phi4ReasoningParser::name()] = []() { return std::make_shared<Phi4ReasoningParser>(/*expect_open_tag*/ false); };
+
     registered_base_parsers[Llama32PythonicToolParser::name()] = []() { return std::make_shared<Llama32PythonicToolParser>(); };
 
     // TODO: Add more parsers and register them here.
diff --git a/src/cpp/src/text_streamer.cpp b/src/cpp/src/text_streamer.cpp
index a34b575519..437352358c 100644
--- a/src/cpp/src/text_streamer.cpp
+++ b/src/cpp/src/text_streamer.cpp
@@ -124,7 +124,7 @@ void TextStreamer::end() {
 
 StreamerBase::~StreamerBase() = default;
 
-TextParserStreamer::TextParserStreamer(const Tokenizer& tokenizer, std::vector<ParserVariant> parsers) 
+TextParserStreamer::TextParserStreamer(const Tokenizer& tokenizer, std::vector<std::variant<std::shared_ptr<IncrementalParserBase>, std::string>> parsers) 
     : TextStreamer(tokenizer, [this](std::string s) -> CallbackTypeVariant {
                 return this->write(s);
     }) {
diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi
index 451cd720db..256834c92f 100644
--- a/src/python/openvino_genai/py_openvino_genai.pyi
+++ b/src/python/openvino_genai/py_openvino_genai.pyi
@@ -1908,7 +1908,7 @@ class Phi4ReasoningParser(IncrementalParserBase):
         """
         Factory method to get parser by name.
         """
-    def __init__(self, starts_with_thinking: bool = False) -> None:
+    def __init__(self, expect_open_tag: bool = False) -> None:
         ...
     def parse(self, msg: dict, previous_text: str, delta_text: str, previous_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None, delta_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None) -> str:
         """
diff --git a/src/python/py_parsers.cpp b/src/python/py_parsers.cpp
index cb19fb503c..37b78dc39a 100644
--- a/src/python/py_parsers.cpp
+++ b/src/python/py_parsers.cpp
@@ -14,7 +14,6 @@
 namespace py = pybind11;
 
 using ov::genai::IncrementalParserBase;
-using ov::genai::ParserVariant;
 using ov::genai::ParserBase;
 using ov::genai::ReasoningParser;
 using ov::genai::Phi4ReasoningParser;
@@ -140,7 +139,7 @@ void init_parsers(py::module_& m) {
            "Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output.");
     
     py::class_<Phi4ReasoningParser, std::shared_ptr<Phi4ReasoningParser>, IncrementalParserBase>(m, "Phi4ReasoningParser")
-        .def(py::init<bool>(), py::arg("starts_with_thinking") = false)
+        .def(py::init<bool>(), py::arg("expect_open_tag") = false)
         .def("parse",
             [](Phi4ReasoningParser& self,
                py::dict& msg,

From 5470b63481efe4cd5ad2cd68ab79eab3e5b92436 Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Fri, 17 Oct 2025 12:22:41 +0200
Subject: [PATCH 15/43] remove std::variant<std::string,
 std::shared_ptr<Parsers>>

---
 .../openvino/genai/generation_config.hpp      |  2 +-
 src/cpp/include/openvino/genai/parsers.hpp    | 13 -----
 .../include/openvino/genai/text_streamer.hpp  |  3 +-
 src/cpp/src/llm/pipeline.cpp                  | 37 ++++---------
 src/cpp/src/parsers.cpp                       | 52 -------------------
 src/cpp/src/text_streamer.cpp                 | 17 +-----
 .../openvino_genai/py_openvino_genai.pyi      | 30 ++---------
 src/python/py_parsers.cpp                     | 12 ++---
 src/python/py_streamers.cpp                   |  8 ++-
 tests/python_tests/test_parsers.py            | 11 ++--
 10 files changed, 31 insertions(+), 154 deletions(-)

diff --git a/src/cpp/include/openvino/genai/generation_config.hpp b/src/cpp/include/openvino/genai/generation_config.hpp
index 1d832c3c8e..ad328b3d1f 100644
--- a/src/cpp/include/openvino/genai/generation_config.hpp
+++ b/src/cpp/include/openvino/genai/generation_config.hpp
@@ -350,7 +350,7 @@ class OPENVINO_GENAI_EXPORTS GenerationConfig {
     bool is_structured_output_generation() const;
 
     // parsers
-    std::vector<std::variant<std::string, std::shared_ptr<ParserBase>>> parsers;
+    std::vector<std::shared_ptr<ParserBase>> parsers;
 
     OPENVINO_DEPRECATED("Please, use `is_assisting_generation()` instead of `is_speculative_decoding()`. This method will be removed in 2026.0.0 release")
     bool is_speculative_decoding() const;
diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp
index 320302aa0d..283b91f775 100644
--- a/src/cpp/include/openvino/genai/parsers.hpp
+++ b/src/cpp/include/openvino/genai/parsers.hpp
@@ -4,10 +4,6 @@
 #pragma once
 #include <string>
 #include <memory>
-#include <variant>
-#include <map>
-#include <functional>
-#include <optional>
 #include <vector>
 #include "openvino/genai/json_container.hpp"
 
@@ -26,8 +22,6 @@ class IncrementalParserBase {
         const std::optional<std::vector<int64_t>>& previous_tokens = std::nullopt, 
         const std::optional<std::vector<int64_t>>& delta_tokens = std::nullopt
     ) = 0;
-
-    static std::shared_ptr<IncrementalParserBase> get_parser(std::string name);
 };
 
 class ReasoningParser : public IncrementalParserBase {
@@ -50,30 +44,25 @@ class ReasoningParser : public IncrementalParserBase {
 class DeepSeekR1ReasoningParser : public ReasoningParser {
 public:
     explicit DeepSeekR1ReasoningParser(bool expect_open_tag = true) : ReasoningParser(expect_open_tag) {};
-    static std::string name() { return "DeepSeekR1ReasoningParser"; }
 };
 
 class Phi4ReasoningParser : public ReasoningParser {
 public:
     explicit Phi4ReasoningParser(bool expect_open_tag = false) : ReasoningParser(expect_open_tag) {};
-    static std::string name() { return "Phi4ReasoningParser"; }
 };
 
 class ParserBase {
 public:
     ParserBase() = default;
     virtual JsonContainer parse(JsonContainer& text) = 0;
-    static std::shared_ptr<ParserBase> get_parser(std::string name);
 };
 
 class Llama32PythonicToolParser : public ParserBase {
 // Does not modify original content, only extracts and adds tool calls
 public:
-    // TODO: Check that vLLM has the same default.
     explicit Llama32PythonicToolParser(bool keep_original_content = true) : m_keep_original_content(keep_original_content) {}
 
     JsonContainer parse(JsonContainer& input) override;
-    static std::string name() { return "Llama32PythonicToolParser"; }
 private:
     bool m_keep_original_content;
 };
@@ -81,11 +70,9 @@ class Llama32PythonicToolParser : public ParserBase {
 class Llama32JsonToolParser : public ParserBase {
 // Does not modify original content, only extracts and adds tool calls
 public:
-    // TODO: Check that vLLM has the same default.
     explicit Llama32JsonToolParser(bool keep_original_content = true) : m_keep_original_content(keep_original_content) {}
 
     JsonContainer parse(JsonContainer& input) override;
-    static std::string name() { return "Llama32JsonToolParser"; }
 private:
     bool m_keep_original_content;
 };
diff --git a/src/cpp/include/openvino/genai/text_streamer.hpp b/src/cpp/include/openvino/genai/text_streamer.hpp
index 43f383ca83..5b6fd6d16e 100644
--- a/src/cpp/include/openvino/genai/text_streamer.hpp
+++ b/src/cpp/include/openvino/genai/text_streamer.hpp
@@ -49,14 +49,13 @@ class OPENVINO_GENAI_EXPORTS TextStreamer : public StreamerBase {
 
 class TextParserStreamer : public TextStreamer {
 public:
-    TextParserStreamer(const Tokenizer& tokenizer, std::vector<std::variant<std::shared_ptr<IncrementalParserBase>, std::string>> parsers = {});
+    TextParserStreamer(const Tokenizer& tokenizer, std::vector<std::shared_ptr<IncrementalParserBase>> parsers = {});
 
     virtual StreamingStatus write(JsonContainer& message) = 0;
 
     CallbackTypeVariant write(std::string message);
     
     JsonContainer get_parsed_message() const { return m_parsed_message; }
-    std::vector<std::shared_ptr<IncrementalParserBase>> get_parsers() const { return m_parsers; }
 private:
     JsonContainer m_parsed_message;
     std::string m_text_buffer;
diff --git a/src/cpp/src/llm/pipeline.cpp b/src/cpp/src/llm/pipeline.cpp
index 899da4e39f..6bd601f49c 100644
--- a/src/cpp/src/llm/pipeline.cpp
+++ b/src/cpp/src/llm/pipeline.cpp
@@ -219,36 +219,21 @@ DecodedResults LLMPipeline::generate(
         return res;
     }
 
-    std::vector<std::shared_ptr<ParserBase>> parsers;
-    if (generation_config.has_value() && !(*generation_config).parsers.empty()) {
-        for (auto& parser_variant : (*generation_config).parsers) {
-            if (std::holds_alternative<std::string>(parser_variant)) {
-                auto parser_name = std::get<std::string>(parser_variant);
-                auto parser = ParserBase::get_parser(parser_name);
-                if (!parser) {
-                    OPENVINO_THROW("Parser with name ", parser_name, " is not registered");
-                }
-                parsers.push_back(parser);
-            } else if (std::holds_alternative<std::shared_ptr<ParserBase>>(parser_variant)) {
-                auto parser = std::get<std::shared_ptr<ParserBase>>(parser_variant);
-                parsers.push_back(parser);
-            }
-        }
+    if (!generation_config.has_value()  || (*generation_config).parsers.empty()) {
+        return res;
     }
-    
-    res.parsed.resize(res.texts.size());
 
+    std::vector<std::shared_ptr<ParserBase>> parsers = (*generation_config).parsers;
+    res.parsed.resize(res.texts.size());
     // Apply Base parsers sequentially even if IncrementalParser has run.
-    if (!parsers.empty()) {
-        for (size_t i = 0; i < res.texts.size(); ++i) {
-            JsonContainer msg;
-            msg["content"] = res.texts[i];
-            for (auto& parser: parsers) {
-                // TODO: Check the state of incremental parser and reset if necessary
-                parser->parse(msg);
-            }
-            res.parsed[i] = msg;
+    for (size_t i = 0; i < res.texts.size(); ++i) {
+        JsonContainer msg;
+        msg["content"] = res.texts[i];
+        for (auto& parser: parsers) {
+            // TODO: Check the state of incremental parser and reset if necessary
+            parser->parse(msg);
         }
+        res.parsed[i] = msg;
     }
 
     return res;
diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp
index 94abed8d5f..dcfa6d2378 100644
--- a/src/cpp/src/parsers.cpp
+++ b/src/cpp/src/parsers.cpp
@@ -269,56 +269,4 @@ JsonContainer BaseReasoningParser::parse(JsonContainer& input) {
     return m_impl->parse(input);
 }
 
-std::map<std::string, std::function<std::shared_ptr<IncrementalParserBase>()>> registered_incremental_parsers;
-std::map<std::string, std::function<std::shared_ptr<ParserBase>()>> registered_base_parsers;
-
-// static initializer to register available buildin parsers
-static bool register_backends() {
-    registered_incremental_parsers[DeepSeekR1ReasoningParser::name()] = []() { return std::make_shared<DeepSeekR1ReasoningParser>(/*expect_open_tag*/ true); };
-    registered_incremental_parsers[Phi4ReasoningParser::name()] = []() { return std::make_shared<Phi4ReasoningParser>(/*expect_open_tag*/ false); };
-
-    registered_base_parsers[Llama32PythonicToolParser::name()] = []() { return std::make_shared<Llama32PythonicToolParser>(); };
-
-    // TODO: Add more parsers and register them here.
-    return true;
-}
-
-// Ensure the backends are registered before main
-static bool are_backends_registered = register_backends();
-
-std::shared_ptr<IncrementalParserBase> IncrementalParserBase::get_parser(std::string name) {
-    if (!are_backends_registered) {
-        register_backends();
-    }
-
-    if (registered_incremental_parsers.find(name) != registered_incremental_parsers.end()) {
-        return registered_incremental_parsers[name]();
-    }
-    return nullptr;
-}
-
-std::shared_ptr<ParserBase> ParserBase::get_parser(std::string name) {
-    if (!are_backends_registered) {
-        register_backends();
-    }
-
-    if (registered_base_parsers.find(name) != registered_base_parsers.end()) {
-        return registered_base_parsers[name]();
-    }
-    return nullptr;
-}
-
-static std::vector<std::string> get_parsers_names() {
-    std::vector<std::string> names;
-    for (const auto& [name, _] : registered_incremental_parsers) {
-        names.push_back(name);
-    }
-    for (const auto& [name, _] : registered_base_parsers) {
-        names.push_back(name);
-    }
-    return names;
-}
-
-
-
 } // namespace ov::genai
diff --git a/src/cpp/src/text_streamer.cpp b/src/cpp/src/text_streamer.cpp
index 437352358c..7738c455e0 100644
--- a/src/cpp/src/text_streamer.cpp
+++ b/src/cpp/src/text_streamer.cpp
@@ -124,23 +124,10 @@ void TextStreamer::end() {
 
 StreamerBase::~StreamerBase() = default;
 
-TextParserStreamer::TextParserStreamer(const Tokenizer& tokenizer, std::vector<std::variant<std::shared_ptr<IncrementalParserBase>, std::string>> parsers) 
+TextParserStreamer::TextParserStreamer(const Tokenizer& tokenizer, std::vector<std::shared_ptr<IncrementalParserBase>> parsers) 
     : TextStreamer(tokenizer, [this](std::string s) -> CallbackTypeVariant {
                 return this->write(s);
-    }) {
-        for (auto& parser : parsers) {
-            if (std::holds_alternative<std::string>(parser)) {
-                auto parser_name = std::get<std::string>(parser);
-                auto parser = IncrementalParserBase::get_parser(parser_name);
-                if (!parser) {
-                    OPENVINO_THROW("Parser with name " + parser_name + " is not registered");
-                }
-                m_parsers.push_back(parser);
-            } else {
-                m_parsers.push_back(std::get<std::shared_ptr<IncrementalParserBase>>(parser));
-            }
-        }
-    }
+    }), m_parsers{parsers} {}
 
 CallbackTypeVariant TextParserStreamer::write(std::string message) {
     for (auto& parser: m_parsers) {
diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi
index 256834c92f..8241769460 100644
--- a/src/python/openvino_genai/py_openvino_genai.pyi
+++ b/src/python/openvino_genai/py_openvino_genai.pyi
@@ -505,11 +505,6 @@ class DecodedResults:
     def texts(self) -> list[str]:
         ...
 class DeepSeekR1ReasoningParser(IncrementalParserBase):
-    @staticmethod
-    def get_parser(name: str) -> IncrementalParserBase:
-        """
-        Factory method to get parser by name.
-        """
     def __init__(self) -> None:
         ...
     def parse(self, msg: dict, previous_text: str, delta_text: str, previous_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None, delta_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None) -> str:
@@ -914,10 +909,10 @@ class GenerationConfig:
     def num_return_sequences(self, arg0: typing.SupportsInt) -> None:
         ...
     @property
-    def parsers(self) -> list[str | openvino_genai.py_openvino_genai.ParserBase]:
+    def parsers(self) -> list[ParserBase]:
         ...
     @parsers.setter
-    def parsers(self, arg0: collections.abc.Sequence[str | openvino_genai.py_openvino_genai.ParserBase]) -> None:
+    def parsers(self, arg0: collections.abc.Sequence[ParserBase]) -> None:
         ...
     @property
     def presence_penalty(self) -> float:
@@ -1759,11 +1754,6 @@ class LLMPipeline:
     def start_chat(self, system_message: str = '') -> None:
         ...
 class Llama32JsonToolParser(ParserBase):
-    @staticmethod
-    def get_parser(name: str) -> ParserBase:
-        """
-        Factory method to get parser by name.
-        """
     def __init__(self) -> None:
         ...
     def parse(self, text: dict) -> None:
@@ -1771,11 +1761,6 @@ class Llama32JsonToolParser(ParserBase):
         Parse is called with the full text. Returns a dict with parsed content.
         """
 class Llama32PythonicToolParser(ParserBase):
-    @staticmethod
-    def get_parser(name: str) -> ParserBase:
-        """
-        Factory method to get parser by name.
-        """
     def __init__(self) -> None:
         ...
     def parse(self, text: dict) -> None:
@@ -1903,11 +1888,6 @@ class PerfMetrics:
     def raw_metrics(self) -> RawPerfMetrics:
         ...
 class Phi4ReasoningParser(IncrementalParserBase):
-    @staticmethod
-    def get_parser(name: str) -> IncrementalParserBase:
-        """
-        Factory method to get parser by name.
-        """
     def __init__(self, expect_open_tag: bool = False) -> None:
         ...
     def parse(self, msg: dict, previous_text: str, delta_text: str, previous_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None, delta_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None) -> str:
@@ -3205,7 +3185,7 @@ class TextEmbeddingPipeline:
         Waits computed embeddings for a query
         """
 class TextParserStreamer(TextStreamer):
-    def __init__(self, tokenizer: Tokenizer, parsers: collections.abc.Sequence[openvino_genai.py_openvino_genai.IncrementalParserBase | str] = []) -> None:
+    def __init__(self, tokenizer: Tokenizer, parsers: collections.abc.Sequence[IncrementalParserBase] = []) -> None:
         """
         TextParserStreamer is used to decode tokens into text, parse the text and call user-defined incremental parsers.
         """
@@ -3217,10 +3197,6 @@ class TextParserStreamer(TextStreamer):
         """
         Get the current parsed message
         """
-    def get_parsers(self) -> list[IncrementalParserBase]:
-        """
-        Get the list of parsers
-        """
     def write(self, message: dict) -> StreamingStatus:
         """
         Write is called with a dict. Returns StreamingStatus.
diff --git a/src/python/py_parsers.cpp b/src/python/py_parsers.cpp
index 37b78dc39a..c4842ba132 100644
--- a/src/python/py_parsers.cpp
+++ b/src/python/py_parsers.cpp
@@ -162,8 +162,7 @@ void init_parsers(py::module_& m) {
             },
             "Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output.",
             py::arg("msg"), py::arg("previous_text"), py::arg("delta_text"),
-            py::arg("previous_tokens") = std::nullopt, py::arg("delta_tokens") = std::nullopt)
-        .def_static("get_parser", &Phi4ReasoningParser::get_parser, py::arg("name"), "Factory method to get parser by name.");
+            py::arg("previous_tokens") = std::nullopt, py::arg("delta_tokens") = std::nullopt);
 
     py::class_<DeepSeekR1ReasoningParser, std::shared_ptr<DeepSeekR1ReasoningParser>, IncrementalParserBase>(m, "DeepSeekR1ReasoningParser")
         .def(py::init<>())
@@ -189,8 +188,7 @@ void init_parsers(py::module_& m) {
             },
             "Parse is called with the full text. Returns a dict with parsed content.",
             py::arg("msg"), py::arg("previous_text"), py::arg("delta_text"),
-            py::arg("previous_tokens") = std::nullopt, py::arg("delta_tokens") = std::nullopt)
-        .def_static("get_parser", &DeepSeekR1ReasoningParser::get_parser, py::arg("name"), "Factory method to get parser by name.");
+            py::arg("previous_tokens") = std::nullopt, py::arg("delta_tokens") = std::nullopt);
 
     py::class_<ParserBase, ConstructableParserBase, std::shared_ptr<ParserBase>>(m, "ParserBase")
     .def(py::init<>())
@@ -208,8 +206,7 @@ void init_parsers(py::module_& m) {
                 return call_parser(msg, [&self](JsonContainer& m) { return self.parse(m); });
             },
             py::arg("text"),
-            "Parse is called with the full text. Returns a dict with parsed content.")
-        .def_static("get_parser", &Llama32JsonToolParser::get_parser, py::arg("name"), "Factory method to get parser by name.");
+            "Parse is called with the full text. Returns a dict with parsed content.");
 
     py::class_<Llama32PythonicToolParser, std::shared_ptr<Llama32PythonicToolParser>, ParserBase>(m, "Llama32PythonicToolParser")
         .def(py::init<>())
@@ -218,6 +215,5 @@ void init_parsers(py::module_& m) {
                 return call_parser(msg, [&self](JsonContainer& m) { return self.parse(m); });
             },
             py::arg("text"),
-            "Parse is called with the full text. Returns a dict with parsed content.")
-        .def_static("get_parser", &Llama32PythonicToolParser::get_parser, py::arg("name"), "Factory method to get parser by name.");
+            "Parse is called with the full text. Returns a dict with parsed content.");
 }
diff --git a/src/python/py_streamers.cpp b/src/python/py_streamers.cpp
index 75b0fdf9ad..47ac549f70 100644
--- a/src/python/py_streamers.cpp
+++ b/src/python/py_streamers.cpp
@@ -150,11 +150,11 @@ void init_streamers(py::module_& m) {
     // TODO: double check/add more relevant docstrings for TextParserStreamer.
     py::class_<TextParserStreamer, ConstructableTextParserStreamer, std::shared_ptr<TextParserStreamer>, TextStreamer>(m, "TextParserStreamer")
         .def(py::init([](const Tokenizer& tokenizer,
-                         std::vector<std::variant<std::shared_ptr<IncrementalParserBase>, std::string>> parsers) {
+                         std::vector<std::shared_ptr<IncrementalParserBase>> parsers) {
                 return std::make_shared<ConstructableTextParserStreamer>(tokenizer, parsers);
             }),
             py::arg("tokenizer"),
-            py::arg("parsers") = std::vector<std::variant<std::shared_ptr<IncrementalParserBase>, std::string>>({}),
+            py::arg("parsers") = std::vector<std::shared_ptr<IncrementalParserBase>>(),
             "TextParserStreamer is used to decode tokens into text, parse the text and call user-defined incremental parsers.")
         .def("write",
             [](TextParserStreamer& self, py::dict& message) {
@@ -182,7 +182,5 @@ void init_streamers(py::module_& m) {
 
                 return json_dict;
                 
-            }, "Get the current parsed message")
-
-        .def("get_parsers", &TextParserStreamer::get_parsers, "Get the list of parsers");
+            }, "Get the current parsed message");
 }
diff --git a/tests/python_tests/test_parsers.py b/tests/python_tests/test_parsers.py
index f273f13918..8a7117ad2c 100644
--- a/tests/python_tests/test_parsers.py
+++ b/tests/python_tests/test_parsers.py
@@ -64,7 +64,7 @@ class CustomStreamer(TextParserStreamer):
         def write(self, message):
             msg.update(message)
             return StreamingStatus.RUNNING
-    streamer = CustomStreamer(genai_tokenizer, parsers=["Phi4ReasoningParser"])
+    streamer = CustomStreamer(genai_tokenizer, parsers=[Phi4ReasoningParser()])
     
     msg = {}
     for subword in stream_string:
@@ -98,7 +98,7 @@ class CustomStreamer(TextParserStreamer):
         def write(self, message):
             msg.update(message)
             return StreamingStatus.RUNNING
-    streamer = CustomStreamer(genai_tokenizer, parsers=["Phi4ReasoningParser"])
+    streamer = CustomStreamer(genai_tokenizer, parsers=[Phi4ReasoningParser()])
     
     msg = {}
     for subword in split_answer:
@@ -139,9 +139,10 @@ def write(self, message):
             if "content" in message:
                 print(message["content"])
             return StreamingStatus.RUNNING
-    
-    streamer = TextParserStreamer(genai_tokenizer, parsers=["DeepSeekR1ReasoningParser"])
-    
+
+    streamer = TextParserStreamer(genai_tokenizer, parsers=[DeepSeekR1ReasoningParser()])
+    breakpoint()
+
     msg = {}
     stream_string = [
         "<｜begin▁of▁sentence｜>", "First", ",", " I", " recognize", " that", " the", " question", " is", " asking", 

From 62bf17a320b7059e3f16abcc151e32ded17acbd2 Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Fri, 17 Oct 2025 12:25:44 +0200
Subject: [PATCH 16/43] hide leftovers to m_pimpl

---
 src/cpp/include/openvino/genai/parsers.hpp |  12 +-
 src/cpp/src/parsers.cpp                    | 127 +++++++++++++--------
 2 files changed, 84 insertions(+), 55 deletions(-)

diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp
index 283b91f775..3a00e631b2 100644
--- a/src/cpp/include/openvino/genai/parsers.hpp
+++ b/src/cpp/include/openvino/genai/parsers.hpp
@@ -60,21 +60,21 @@ class ParserBase {
 class Llama32PythonicToolParser : public ParserBase {
 // Does not modify original content, only extracts and adds tool calls
 public:
-    explicit Llama32PythonicToolParser(bool keep_original_content = true) : m_keep_original_content(keep_original_content) {}
-
+    explicit Llama32PythonicToolParser(bool keep_original_content = true);
     JsonContainer parse(JsonContainer& input) override;
 private:
-    bool m_keep_original_content;
+    class Llama32PythonicToolParserImpl;
+    std::shared_ptr<Llama32PythonicToolParserImpl> m_impl;
 };
 
 class Llama32JsonToolParser : public ParserBase {
 // Does not modify original content, only extracts and adds tool calls
 public:
-    explicit Llama32JsonToolParser(bool keep_original_content = true) : m_keep_original_content(keep_original_content) {}
-
+    explicit Llama32JsonToolParser(bool keep_original_content = true);
     JsonContainer parse(JsonContainer& input) override;
 private:
-    bool m_keep_original_content;
+    class Llama32JsonToolParserImpl;
+    std::shared_ptr<Llama32JsonToolParserImpl> m_impl;
 };
 
 class BaseReasoningParser : public ParserBase{
diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp
index dcfa6d2378..9bfbe446cc 100644
--- a/src/cpp/src/parsers.cpp
+++ b/src/cpp/src/parsers.cpp
@@ -163,63 +163,92 @@ std::string ReasoningParser::parse(
     return m_impl->parse(msg, previous_text, delta_text, previous_tokens, delta_tokens);
 }
 
-JsonContainer Llama32PythonicToolParser::parse(JsonContainer& input) {
-    // Input example
-    // string input = "[get_weather(location='New York, NY', unit='celsius')]<|eom_id|>";
-
-    // Regex to capture the [...] part
-    std::smatch m;
-    const std::string& text = input["content"].get_string();
-    std::regex r(R"(\[.*?\])");
-    if (!std::regex_search(text, m, r)) {
+class Llama32PythonicToolParser::Llama32PythonicToolParserImpl {
+public:
+    Llama32PythonicToolParserImpl(bool keep_original_content) : m_keep_original_content(keep_original_content) {}
+    bool m_keep_original_content;
+
+    JsonContainer parse(JsonContainer& input) {
+        // Input example
+        // string input = "[get_weather(location='New York, NY', unit='celsius')]<|eom_id|>";
+
+        // Regex to capture the [...] part
+        std::smatch m;
+        const std::string& text = input["content"].get_string();
+        std::regex r(R"(\[.*?\])");
+        if (!std::regex_search(text, m, r)) {
+            return input;
+        }
+
+        // Strip outer [ ]
+        std::string call = m.str().substr(1, m.str().size() - 2);
+
+        // Split function name and arguments
+        input["tool_calls"] = JsonContainer::array();
+        
+        size_t pos = call.find('(');
+        std::string name = call.substr(0, pos);
+        std::string args = call.substr(pos + 1, call.size() - pos - 2); // inside (...)
+        
+        
+        JsonContainer kv;
+        // Parse arguments of the form key='value'
+        std::regex arg_re(R"((\w+)\s*=\s*\"([^"]*)\")");
+        auto it = std::sregex_iterator(args.begin(), args.end(), arg_re);
+        for (; it != std::sregex_iterator(); ++it) {
+            kv[std::string((*it)[1])] = std::string((*it)[2]);
+        }
+        
+        input["tool_calls"] = JsonContainer::array();
+        input["tool_calls"].push_back(JsonContainer({{"name", name}, {"arguments", kv}}));
+        
+        if (!m_keep_original_content) {
+            input["content"] = regex_replace(text, r, "");
+        }
         return input;
     }
+};
 
-    // Strip outer [ ]
-    std::string call = m.str().substr(1, m.str().size() - 2);
-
-    // Split function name and arguments
-    input["tool_calls"] = JsonContainer::array();
-    
-    size_t pos = call.find('(');
-    std::string name = call.substr(0, pos);
-    std::string args = call.substr(pos + 1, call.size() - pos - 2); // inside (...)
-    
-    
-    JsonContainer kv;
-    // Parse arguments of the form key='value'
-    std::regex arg_re(R"((\w+)\s*=\s*\"([^"]*)\")");
-    auto it = std::sregex_iterator(args.begin(), args.end(), arg_re);
-    for (; it != std::sregex_iterator(); ++it) {
-        kv[std::string((*it)[1])] = std::string((*it)[2]);
-    }
-    
-    input["tool_calls"] = JsonContainer::array();
-    input["tool_calls"].push_back(JsonContainer({{"name", name}, {"arguments", kv}}));
-    
-    if (!m_keep_original_content) {
-        input["content"] = regex_replace(text, r, "");
-    }
-    return input;
+Llama32PythonicToolParser::Llama32PythonicToolParser(bool keep_original_content) {
+    m_impl = std::make_shared<Llama32PythonicToolParserImpl>(keep_original_content);
+}
+
+JsonContainer Llama32PythonicToolParser::parse(JsonContainer& input) {
+    return m_impl->parse(input);
 }
 
-JsonContainer Llama32JsonToolParser::parse(JsonContainer& message) {
-    // Find JSON in the message
-    std::string msg_content = message["content"].get_string();
+class Llama32JsonToolParser::Llama32JsonToolParserImpl {
+private:
+    bool m_keep_original_content;
+public:
+    Llama32JsonToolParserImpl(bool keep_original_content) : m_keep_original_content(keep_original_content) {}
+
+    JsonContainer parse(JsonContainer& message) {
+        // Find JSON in the message
+        std::string msg_content = message["content"].get_string();
 
-    size_t json_start = msg_content.find('{');
-    size_t json_end = msg_content.rfind('}');
-    if (json_start == std::string::npos || json_end == std::string::npos || json_end <= json_start) {
+        size_t json_start = msg_content.find('{');
+        size_t json_end = msg_content.rfind('}');
+        if (json_start == std::string::npos || json_end == std::string::npos || json_end <= json_start) {
+            return message;
+        }
+        auto res = JsonContainer::array();
+        res.push_back(JsonContainer::from_json_string(msg_content.substr(json_start, json_end - json_start + 1)));
+        message["tool_calls"] = res;
+        
+        if (!m_keep_original_content) {
+            message["content"] = msg_content.substr(0, json_start) + msg_content.substr(json_end + 1);
+        }
         return message;
     }
-    auto res = JsonContainer::array();
-    res.push_back(JsonContainer::from_json_string(msg_content.substr(json_start, json_end - json_start + 1)));
-    message["tool_calls"] = res;
-    
-    if (!m_keep_original_content) {
-        message["content"] = msg_content.substr(0, json_start) + msg_content.substr(json_end + 1);
-    }
-    return message;
+};
+
+Llama32JsonToolParser::Llama32JsonToolParser(bool keep_original_content) {
+    m_impl = std::make_shared<Llama32JsonToolParserImpl>(keep_original_content);
+}
+
+JsonContainer Llama32JsonToolParser::parse(JsonContainer& input) {
+    return m_impl->parse(input);
 }
 
 class BaseReasoningParser::BaseReasoningParserImpl {

From 4f757062049a95805218ff2767a79e5c52f418ce Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Fri, 17 Oct 2025 12:38:27 +0200
Subject: [PATCH 17/43] move defaults from class members to ctor default
 arguments

---
 src/cpp/include/openvino/genai/parsers.hpp |  4 +++-
 src/cpp/src/parsers.cpp                    | 20 ++++++++++++--------
 2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp
index 3a00e631b2..007e52e6cf 100644
--- a/src/cpp/include/openvino/genai/parsers.hpp
+++ b/src/cpp/include/openvino/genai/parsers.hpp
@@ -30,7 +30,9 @@ class ReasoningParser : public IncrementalParserBase {
     std::shared_ptr<ReasoningParserImpl> m_impl;
 public:
     ReasoningParser(bool expect_open_tag = true,
-                    bool keep_original_content = true);
+                    bool keep_original_content = true, 
+                    std::string open_tag="<think>", 
+                    std::string close_tag="</think>");
 
     std::string parse(
         JsonContainer& msg,
diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp
index 9bfbe446cc..da4b433946 100644
--- a/src/cpp/src/parsers.cpp
+++ b/src/cpp/src/parsers.cpp
@@ -13,21 +13,25 @@ namespace ov::genai {
 
 class ReasoningParser::ReasoningParserImpl {
 private:
-    bool m_expect_open_tag = true;
+    bool m_expect_open_tag;
     bool m_first_run = true;
     bool m_keep_original_content;
     bool m_think_tag_opened = false;
-    std::string m_open_tag = "<think>";
-    std::string m_close_tag = "</think>";
+    std::string m_open_tag;
+    std::string m_close_tag;
     std::string m_text_cache = "";
     std::map<std::string, std::string> accumulated_parsed;
 public:
     bool m_deactivated = false;
     ReasoningParserImpl() = default;
-    ReasoningParserImpl(bool expect_open_tag = true,
-                    bool keep_original_content = true)
+    ReasoningParserImpl(bool expect_open_tag,
+                    bool keep_original_content,
+                    std::string open_tag, 
+                    std::string close_tag)
         : m_expect_open_tag(expect_open_tag),
-          m_keep_original_content(keep_original_content) {}
+          m_keep_original_content(keep_original_content),
+          m_open_tag(std::move(open_tag)),
+          m_close_tag(std::move(close_tag)) {}
 
     std::string parse(
         JsonContainer&  msg,
@@ -149,8 +153,8 @@ class ReasoningParser::ReasoningParserImpl {
     }
 };
 
-ReasoningParser::ReasoningParser(bool expect_open_tag, bool keep_original_content) {
-    m_impl = std::make_shared<ReasoningParserImpl>(expect_open_tag, keep_original_content);
+ReasoningParser::ReasoningParser(bool expect_open_tag, bool keep_original_content, std::string open_tag, std::string close_tag) {
+    m_impl = std::make_shared<ReasoningParserImpl>(expect_open_tag, keep_original_content, std::move(open_tag), std::move(close_tag));
 }
 
 std::string ReasoningParser::parse(

From edc2c3e7a4ecd0e350bdb507520f3d46f2b19375 Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Fri, 17 Oct 2025 12:43:57 +0200
Subject: [PATCH 18/43] return void instead of JsonContainer and keep modifying
 argument by reference

---
 src/cpp/include/openvino/genai/parsers.hpp |  8 +++----
 src/cpp/src/parsers.cpp                    | 26 +++++++++-------------
 src/python/py_parsers.cpp                  | 12 +++++-----
 3 files changed, 21 insertions(+), 25 deletions(-)

diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp
index 007e52e6cf..4837f93289 100644
--- a/src/cpp/include/openvino/genai/parsers.hpp
+++ b/src/cpp/include/openvino/genai/parsers.hpp
@@ -56,14 +56,14 @@ class Phi4ReasoningParser : public ReasoningParser {
 class ParserBase {
 public:
     ParserBase() = default;
-    virtual JsonContainer parse(JsonContainer& text) = 0;
+    virtual void parse(JsonContainer& text) = 0;
 };
 
 class Llama32PythonicToolParser : public ParserBase {
 // Does not modify original content, only extracts and adds tool calls
 public:
     explicit Llama32PythonicToolParser(bool keep_original_content = true);
-    JsonContainer parse(JsonContainer& input) override;
+    void parse(JsonContainer& input) override;
 private:
     class Llama32PythonicToolParserImpl;
     std::shared_ptr<Llama32PythonicToolParserImpl> m_impl;
@@ -73,7 +73,7 @@ class Llama32JsonToolParser : public ParserBase {
 // Does not modify original content, only extracts and adds tool calls
 public:
     explicit Llama32JsonToolParser(bool keep_original_content = true);
-    JsonContainer parse(JsonContainer& input) override;
+    void parse(JsonContainer& input) override;
 private:
     class Llama32JsonToolParserImpl;
     std::shared_ptr<Llama32JsonToolParserImpl> m_impl;
@@ -82,7 +82,7 @@ class Llama32JsonToolParser : public ParserBase {
 class BaseReasoningParser : public ParserBase{
 public:
     BaseReasoningParser(bool expect_open_tag = true, bool keep_original_content = true, std::string open_tag = "<think>", std::string close_tag = "</think>");
-    JsonContainer parse(JsonContainer& input) override;
+    void parse(JsonContainer& input) override;
 private:
     class BaseReasoningParserImpl;
     std::shared_ptr<BaseReasoningParserImpl> m_impl;
diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp
index da4b433946..66976339c5 100644
--- a/src/cpp/src/parsers.cpp
+++ b/src/cpp/src/parsers.cpp
@@ -172,7 +172,7 @@ class Llama32PythonicToolParser::Llama32PythonicToolParserImpl {
     Llama32PythonicToolParserImpl(bool keep_original_content) : m_keep_original_content(keep_original_content) {}
     bool m_keep_original_content;
 
-    JsonContainer parse(JsonContainer& input) {
+    void parse(JsonContainer& input) {
         // Input example
         // string input = "[get_weather(location='New York, NY', unit='celsius')]<|eom_id|>";
 
@@ -181,7 +181,7 @@ class Llama32PythonicToolParser::Llama32PythonicToolParserImpl {
         const std::string& text = input["content"].get_string();
         std::regex r(R"(\[.*?\])");
         if (!std::regex_search(text, m, r)) {
-            return input;
+            return;
         }
 
         // Strip outer [ ]
@@ -209,7 +209,6 @@ class Llama32PythonicToolParser::Llama32PythonicToolParserImpl {
         if (!m_keep_original_content) {
             input["content"] = regex_replace(text, r, "");
         }
-        return input;
     }
 };
 
@@ -217,8 +216,8 @@ Llama32PythonicToolParser::Llama32PythonicToolParser(bool keep_original_content)
     m_impl = std::make_shared<Llama32PythonicToolParserImpl>(keep_original_content);
 }
 
-JsonContainer Llama32PythonicToolParser::parse(JsonContainer& input) {
-    return m_impl->parse(input);
+void Llama32PythonicToolParser::parse(JsonContainer& input) {
+    m_impl->parse(input);
 }
 
 class Llama32JsonToolParser::Llama32JsonToolParserImpl {
@@ -227,14 +226,14 @@ class Llama32JsonToolParser::Llama32JsonToolParserImpl {
 public:
     Llama32JsonToolParserImpl(bool keep_original_content) : m_keep_original_content(keep_original_content) {}
 
-    JsonContainer parse(JsonContainer& message) {
+    void parse(JsonContainer& message) {
         // Find JSON in the message
         std::string msg_content = message["content"].get_string();
 
         size_t json_start = msg_content.find('{');
         size_t json_end = msg_content.rfind('}');
         if (json_start == std::string::npos || json_end == std::string::npos || json_end <= json_start) {
-            return message;
+            return;
         }
         auto res = JsonContainer::array();
         res.push_back(JsonContainer::from_json_string(msg_content.substr(json_start, json_end - json_start + 1)));
@@ -243,7 +242,6 @@ class Llama32JsonToolParser::Llama32JsonToolParserImpl {
         if (!m_keep_original_content) {
             message["content"] = msg_content.substr(0, json_start) + msg_content.substr(json_end + 1);
         }
-        return message;
     }
 };
 
@@ -251,8 +249,8 @@ Llama32JsonToolParser::Llama32JsonToolParser(bool keep_original_content) {
     m_impl = std::make_shared<Llama32JsonToolParserImpl>(keep_original_content);
 }
 
-JsonContainer Llama32JsonToolParser::parse(JsonContainer& input) {
-    return m_impl->parse(input);
+void Llama32JsonToolParser::parse(JsonContainer& input) {
+    m_impl->parse(input);
 }
 
 class BaseReasoningParser::BaseReasoningParserImpl {
@@ -266,8 +264,7 @@ class BaseReasoningParser::BaseReasoningParserImpl {
     m_open_tag(open_tag),
     m_close_tag(close_tag) {};
 
-    JsonContainer parse(JsonContainer& input) {
-        JsonContainer res;
+    void parse(JsonContainer& input) {
         std::string reasoning_content;
         std::string content = input["content"].get_string();
 
@@ -285,7 +282,6 @@ class BaseReasoningParser::BaseReasoningParserImpl {
         }
 
         input["reasoning_content"] = reasoning_content;
-        return input;
     }
 private:
     bool m_expect_open_tag;
@@ -298,8 +294,8 @@ BaseReasoningParser::BaseReasoningParser(bool expect_open_tag, bool keep_origina
     m_impl = std::make_shared<BaseReasoningParserImpl>(expect_open_tag, keep_original_content, open_tag, close_tag);
 }
 
-JsonContainer BaseReasoningParser::parse(JsonContainer& input) {
-    return m_impl->parse(input);
+void BaseReasoningParser::parse(JsonContainer& input) {
+    m_impl->parse(input);
 }
 
 } // namespace ov::genai
diff --git a/src/python/py_parsers.cpp b/src/python/py_parsers.cpp
index c4842ba132..39d83beb2d 100644
--- a/src/python/py_parsers.cpp
+++ b/src/python/py_parsers.cpp
@@ -53,9 +53,9 @@ class ConstructableIncrementalParserBase: public IncrementalParserBase {
 
 class ConstructableParserBase: public ParserBase {
 public:
-    JsonContainer parse(JsonContainer& text) override {
+    void parse(JsonContainer& text) override {
         PYBIND11_OVERRIDE_PURE(
-            JsonContainer,  // Return type
+            void,  // Return type
             ParserBase,  // Parent class
             parse,  // Name of function in C++ (must match Python name)
             text  // Argument(s)
@@ -66,7 +66,7 @@ class ConstructableParserBase: public ParserBase {
 static py::object json_mod = py::module_::import("json");
 
 // wrapper to enhance calling parser from Python
-void call_parser(py::dict& msg, std::function<JsonContainer(JsonContainer&)> func) {
+void call_parser(py::dict& msg, std::function<void(JsonContainer&)> func) {
     auto msg_anymap = ov::genai::pybind::utils::py_object_to_any_map(msg);
     auto msg_cpp = JsonContainer(msg_anymap);
 
@@ -194,7 +194,7 @@ void init_parsers(py::module_& m) {
     .def(py::init<>())
     .def("parse",
         [](ParserBase& self, py::dict& msg) {
-            return call_parser(msg, [&self](JsonContainer& m) {return self.parse(m);});
+            return call_parser(msg, [&self](JsonContainer& m) {self.parse(m);});
         },
         py::arg("text"),
         "Parse is called with the full text. Returns a dict with parsed content.");
@@ -203,7 +203,7 @@ void init_parsers(py::module_& m) {
         .def(py::init<>())
         .def("parse",
             [](Llama32JsonToolParser& self, py::dict& msg) {
-                return call_parser(msg, [&self](JsonContainer& m) { return self.parse(m); });
+                return call_parser(msg, [&self](JsonContainer& m) { self.parse(m); });
             },
             py::arg("text"),
             "Parse is called with the full text. Returns a dict with parsed content.");
@@ -212,7 +212,7 @@ void init_parsers(py::module_& m) {
         .def(py::init<>())
         .def("parse",
             [](Llama32PythonicToolParser& self, py::dict& msg) {
-                return call_parser(msg, [&self](JsonContainer& m) { return self.parse(m); });
+                return call_parser(msg, [&self](JsonContainer& m) { self.parse(m); });
             },
             py::arg("text"),
             "Parse is called with the full text. Returns a dict with parsed content.");

From e4ac07966f8b01afc06eea1783328af83c639d1e Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Fri, 17 Oct 2025 12:49:35 +0200
Subject: [PATCH 19/43] pass openv/close tag strings by reference instead of
 value

---
 src/cpp/include/openvino/genai/parsers.hpp | 10 +++++++---
 src/cpp/src/parsers.cpp                    | 18 +++++++++---------
 2 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp
index 4837f93289..45427baccc 100644
--- a/src/cpp/include/openvino/genai/parsers.hpp
+++ b/src/cpp/include/openvino/genai/parsers.hpp
@@ -31,8 +31,8 @@ class ReasoningParser : public IncrementalParserBase {
 public:
     ReasoningParser(bool expect_open_tag = true,
                     bool keep_original_content = true, 
-                    std::string open_tag="<think>", 
-                    std::string close_tag="</think>");
+                    const std::string& open_tag = "<think>", 
+                    const std::string& close_tag = "</think>");
 
     std::string parse(
         JsonContainer& msg,
@@ -81,7 +81,11 @@ class Llama32JsonToolParser : public ParserBase {
 
 class BaseReasoningParser : public ParserBase{
 public:
-    BaseReasoningParser(bool expect_open_tag = true, bool keep_original_content = true, std::string open_tag = "<think>", std::string close_tag = "</think>");
+    BaseReasoningParser(
+        bool expect_open_tag = true, 
+        bool keep_original_content = true, 
+        const std::string& open_tag = "<think>", 
+        const std::string& close_tag = "</think>");
     void parse(JsonContainer& input) override;
 private:
     class BaseReasoningParserImpl;
diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp
index 66976339c5..6006a31181 100644
--- a/src/cpp/src/parsers.cpp
+++ b/src/cpp/src/parsers.cpp
@@ -26,12 +26,12 @@ class ReasoningParser::ReasoningParserImpl {
     ReasoningParserImpl() = default;
     ReasoningParserImpl(bool expect_open_tag,
                     bool keep_original_content,
-                    std::string open_tag, 
-                    std::string close_tag)
+                    const std::string& open_tag, 
+                    const std::string& close_tag)
         : m_expect_open_tag(expect_open_tag),
           m_keep_original_content(keep_original_content),
-          m_open_tag(std::move(open_tag)),
-          m_close_tag(std::move(close_tag)) {}
+          m_open_tag(open_tag),
+          m_close_tag(close_tag) {}
 
     std::string parse(
         JsonContainer&  msg,
@@ -153,8 +153,8 @@ class ReasoningParser::ReasoningParserImpl {
     }
 };
 
-ReasoningParser::ReasoningParser(bool expect_open_tag, bool keep_original_content, std::string open_tag, std::string close_tag) {
-    m_impl = std::make_shared<ReasoningParserImpl>(expect_open_tag, keep_original_content, std::move(open_tag), std::move(close_tag));
+ReasoningParser::ReasoningParser(bool expect_open_tag, bool keep_original_content, const std::string& open_tag, const std::string& close_tag) {
+    m_impl = std::make_shared<ReasoningParserImpl>(expect_open_tag, keep_original_content, open_tag, close_tag);
 }
 
 std::string ReasoningParser::parse(
@@ -257,8 +257,8 @@ class BaseReasoningParser::BaseReasoningParserImpl {
 public:
     BaseReasoningParserImpl(bool expect_open_tag,
                             bool keep_original_content,
-                            std::string open_tag,
-                            std::string close_tag):
+                            const std::string& open_tag,
+                            const std::string& close_tag):
     m_expect_open_tag(expect_open_tag),
     m_keep_original_content(keep_original_content),
     m_open_tag(open_tag),
@@ -290,7 +290,7 @@ class BaseReasoningParser::BaseReasoningParserImpl {
     std::string m_close_tag;
 };
 
-BaseReasoningParser::BaseReasoningParser(bool expect_open_tag, bool keep_original_content, std::string open_tag, std::string close_tag) {
+BaseReasoningParser::BaseReasoningParser(bool expect_open_tag, bool keep_original_content, const std::string& open_tag, const std::string& close_tag) {
     m_impl = std::make_shared<BaseReasoningParserImpl>(expect_open_tag, keep_original_content, open_tag, close_tag);
 }
 

From ca732bafe7ddf6491d18a49c0291b492fceb4f34 Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Fri, 17 Oct 2025 13:10:12 +0200
Subject: [PATCH 20/43] remvoe breakpoint()

---
 tests/python_tests/test_parsers.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/python_tests/test_parsers.py b/tests/python_tests/test_parsers.py
index 8a7117ad2c..56328760e1 100644
--- a/tests/python_tests/test_parsers.py
+++ b/tests/python_tests/test_parsers.py
@@ -141,7 +141,6 @@ def write(self, message):
             return StreamingStatus.RUNNING
 
     streamer = TextParserStreamer(genai_tokenizer, parsers=[DeepSeekR1ReasoningParser()])
-    breakpoint()
 
     msg = {}
     stream_string = [

From 32356cb9b7eda55d984f957485352bd75055e3c0 Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Mon, 20 Oct 2025 11:59:15 +0200
Subject: [PATCH 21/43] use new JsonContainer to py::dict conversion approach

---
 src/cpp/include/openvino/genai/parsers.hpp    |   4 +
 src/cpp/src/parsers.cpp                       |  10 +-
 src/python/openvino_genai/__init__.pyi        |   2 +-
 .../openvino_genai/py_openvino_genai.pyi      |   4 +-
 src/python/py_openvino_genai.cpp              |   9 +-
 src/python/py_parsers.cpp                     |  92 ++++++++--------
 src/python/py_streamers.cpp                   |  17 +--
 tests/cpp/CMakeLists.txt                      |   2 +-
 tests/cpp/parser.cpp                          | 101 ++++++++----------
 tests/python_tests/test_parsers.py            |   3 +-
 10 files changed, 108 insertions(+), 136 deletions(-)

diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp
index 45427baccc..0f71c5641b 100644
--- a/src/cpp/include/openvino/genai/parsers.hpp
+++ b/src/cpp/include/openvino/genai/parsers.hpp
@@ -22,6 +22,8 @@ class IncrementalParserBase {
         const std::optional<std::vector<int64_t>>& previous_tokens = std::nullopt, 
         const std::optional<std::vector<int64_t>>& delta_tokens = std::nullopt
     ) = 0;
+
+    virtual ~IncrementalParserBase() = default;
 };
 
 class ReasoningParser : public IncrementalParserBase {
@@ -33,6 +35,7 @@ class ReasoningParser : public IncrementalParserBase {
                     bool keep_original_content = true, 
                     const std::string& open_tag = "<think>", 
                     const std::string& close_tag = "</think>");
+    virtual ~ReasoningParser() = default;
 
     std::string parse(
         JsonContainer& msg,
@@ -56,6 +59,7 @@ class Phi4ReasoningParser : public ReasoningParser {
 class ParserBase {
 public:
     ParserBase() = default;
+    virtual ~ParserBase() = default;
     virtual void parse(JsonContainer& text) = 0;
 };
 
diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp
index 6006a31181..f73dd82d04 100644
--- a/src/cpp/src/parsers.cpp
+++ b/src/cpp/src/parsers.cpp
@@ -5,9 +5,6 @@
 #include <vector>
 #include <cctype>
 #include <stdexcept>
-#include <nlohmann/json.hpp>
-
-using json = nlohmann::json;
 
 namespace ov::genai {
 
@@ -24,6 +21,7 @@ class ReasoningParser::ReasoningParserImpl {
 public:
     bool m_deactivated = false;
     ReasoningParserImpl() = default;
+    
     ReasoningParserImpl(bool expect_open_tag,
                     bool keep_original_content,
                     const std::string& open_tag, 
@@ -91,7 +89,6 @@ class ReasoningParser::ReasoningParserImpl {
             auto close_idx = txt_chunk.find(m_close_tag);
 
             reason_str += txt_chunk.substr(0, close_idx);
-            // content_str += txt_chunk.substr(close_idx + std::string(m_close_tag).size(), txt_chunk.size() - (close_idx + std::string(m_close_tag).size()));
             if (!m_keep_original_content) {
                 // Cut from the txt_chunk which is before </think> and leave only what is after </think>.
                 // Example if m_text_cache + delta_text = "...some text</th" + "ink>Answer is 3" = "...some text</think>Answer is 3"
@@ -187,14 +184,10 @@ class Llama32PythonicToolParser::Llama32PythonicToolParserImpl {
         // Strip outer [ ]
         std::string call = m.str().substr(1, m.str().size() - 2);
 
-        // Split function name and arguments
-        input["tool_calls"] = JsonContainer::array();
-        
         size_t pos = call.find('(');
         std::string name = call.substr(0, pos);
         std::string args = call.substr(pos + 1, call.size() - pos - 2); // inside (...)
         
-        
         JsonContainer kv;
         // Parse arguments of the form key='value'
         std::regex arg_re(R"((\w+)\s*=\s*\"([^"]*)\")");
@@ -203,6 +196,7 @@ class Llama32PythonicToolParser::Llama32PythonicToolParserImpl {
             kv[std::string((*it)[1])] = std::string((*it)[2]);
         }
         
+        // Split function name and arguments
         input["tool_calls"] = JsonContainer::array();
         input["tool_calls"].push_back(JsonContainer({{"name", name}, {"arguments", kv}}));
         
diff --git a/src/python/openvino_genai/__init__.pyi b/src/python/openvino_genai/__init__.pyi
index dfbe188abc..f8fd25d6a8 100644
--- a/src/python/openvino_genai/__init__.pyi
+++ b/src/python/openvino_genai/__init__.pyi
@@ -72,5 +72,5 @@ from openvino_genai.py_openvino_genai import draft_model
 from openvino_genai.py_openvino_genai import get_version
 import os as os
 from . import py_openvino_genai
-__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'PerfMetrics', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai']
+__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningParser', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParserBase', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama32JsonToolParser', 'Llama32PythonicToolParser', 'ParserBase', 'PerfMetrics', 'Phi4ReasoningParser', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai']
 __version__: str
diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi
index 7687174233..8741d691b6 100644
--- a/src/python/openvino_genai/py_openvino_genai.pyi
+++ b/src/python/openvino_genai/py_openvino_genai.pyi
@@ -5,7 +5,7 @@ from __future__ import annotations
 import collections.abc
 import openvino._pyopenvino
 import typing
-__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedGenerationResult', 'EncodedResults', 'ExtendedPerfMetrics', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'MeanStdPair', 'PerfMetrics', 'PipelineMetrics', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'SDPerModelsPerfMetrics', 'SDPerfMetrics', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'SummaryStats', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version']
+__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningParser', 'EncodedGenerationResult', 'EncodedResults', 'ExtendedPerfMetrics', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParserBase', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama32JsonToolParser', 'Llama32PythonicToolParser', 'MeanStdPair', 'ParserBase', 'PerfMetrics', 'Phi4ReasoningParser', 'PipelineMetrics', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'SDPerModelsPerfMetrics', 'SDPerfMetrics', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'SummaryStats', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version']
 class Adapter:
     """
     Immutable LoRA Adapter that carries the adaptation matrices and serves as unique adapter identifier.
@@ -566,7 +566,7 @@ class DecodedResults:
     def extended_perf_metrics(self) -> ExtendedPerfMetrics:
         ...
     @property
-    def parsed(self) -> dict:
+    def parsed(self) -> list:
         ...
     @property
     def perf_metrics(self) -> PerfMetrics:
diff --git a/src/python/py_openvino_genai.cpp b/src/python/py_openvino_genai.cpp
index cfa8d94cd0..ed010a7581 100644
--- a/src/python/py_openvino_genai.cpp
+++ b/src/python/py_openvino_genai.cpp
@@ -94,15 +94,10 @@ PYBIND11_MODULE(py_openvino_genai, m) {
         .def(py::init<>())
         .def_property_readonly("texts", [](const DecodedResults &dr) -> py::typing::List<py::str> { return pyutils::handle_utf8((std::vector<std::string>)dr); })
         .def_readonly("scores", &DecodedResults::scores)
-        .def_property_readonly("parsed", [](const DecodedResults& dr) -> py::dict {
-            static py::object json_mod = py::module_::import("json");
+        .def_property_readonly("parsed", [](const DecodedResults& dr) -> py::list {
             py::list result_dicts;
-            
             for (const auto& parsed: dr.parsed) {
-                auto json_str =  parsed.to_json_string();
-                py::dict json_dict = json_mod.attr("loads")(json_str);
-
-                result_dicts.append(json_dict);
+                result_dicts.append(pyutils::json_container_to_py_object(parsed));
             }
             return result_dicts;
         })
diff --git a/src/python/py_parsers.cpp b/src/python/py_parsers.cpp
index 39d83beb2d..a5a4a5855c 100644
--- a/src/python/py_parsers.cpp
+++ b/src/python/py_parsers.cpp
@@ -67,11 +67,11 @@ static py::object json_mod = py::module_::import("json");
 
 // wrapper to enhance calling parser from Python
 void call_parser(py::dict& msg, std::function<void(JsonContainer&)> func) {
-    auto msg_anymap = ov::genai::pybind::utils::py_object_to_any_map(msg);
-    auto msg_cpp = JsonContainer(msg_anymap);
-
+    auto msg_cpp = pyutils::py_object_to_json_container(msg);
     func(msg_cpp);
 
+    // TODO: msg = pyutils::json_container_to_py_object(msg_cpp) does not work properly here,
+    // since it create a new object instead of updating existing dict.
     auto json_str = msg_cpp.to_json_string();
     py::dict result = json_mod.attr("loads")(json_str);
     
@@ -92,14 +92,14 @@ std::string call_incremental_parser(
     const std::optional<std::vector<int64_t>>& delta_tokens,
     std::function<std::string(JsonContainer&, const std::string&, std::string&, const std::optional<std::vector<int64_t>>&,
                                const std::optional<std::vector<int64_t>>&)> func) {
-    auto msg_anymap = ov::genai::pybind::utils::py_object_to_any_map(msg);
-    auto msg_cpp = JsonContainer(msg_anymap);
+    auto msg_cpp = pyutils::py_object_to_json_container(msg);
 
     auto res = func(msg_cpp, previous_text, delta_text, previous_tokens, delta_tokens);
-
     auto json_str = msg_cpp.to_json_string();
-    py::dict result = json_mod.attr("loads")(json_str);
     
+    // TODO: msg = pyutils::json_container_to_py_object(msg_cpp) does not work properly here,
+    // since it create a new object instead of updating existing dict.
+    py::dict result = json_mod.attr("loads")(json_str);
     // update msg with result
     msg.clear();
     for (auto item : result) {
@@ -120,20 +120,19 @@ void init_parsers(py::module_& m) {
                          std::string& delta_text,
                          const std::optional<std::vector<int64_t>>& previous_tokens = std::nullopt,
                          const std::optional<std::vector<int64_t>>& delta_tokens = std::nullopt) {
-            // TODO: optimize conversion between py::dict and JsonContainer
-            auto msg_anymap = ov::genai::pybind::utils::py_object_to_any_map(msg);
-            auto msg_cpp = JsonContainer(msg_anymap);
-
-
-            auto res = self.parse(msg_cpp, previous_text, delta_text, previous_tokens, delta_tokens);
-            msg.clear();
-            
-            auto json_obj = msg_cpp.to_json();
-            for (auto it = json_obj.begin(); it != json_obj.end(); ++it) {
-                msg[py::cast(it.key())] = py::cast(it.value());
-            }
-
-            return res;
+            return call_incremental_parser(
+                self,
+                msg,
+                previous_text,
+                delta_text,
+                previous_tokens,
+                delta_tokens,
+                [&self](JsonContainer& m, const std::string& prev_text, std::string& delta_t,
+                        const std::optional<std::vector<int64_t>>& prev_tokens,
+                        const std::optional<std::vector<int64_t>>& delta_toks) {
+                    return self.parse(m, prev_text, delta_t, prev_tokens, delta_toks);
+                }
+            );
         }, py::arg("msg"), py::arg("previous_text"), py::arg("delta_text"),
            py::arg("previous_tokens") = std::nullopt, py::arg("delta_tokens") = std::nullopt,
            "Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output.");
@@ -147,18 +146,18 @@ void init_parsers(py::module_& m) {
                std::string& delta_text,
                const std::optional<std::vector<int64_t>>& previous_tokens = std::nullopt,
                const std::optional<std::vector<int64_t>>& delta_tokens = std::nullopt) {
-                return call_incremental_parser(
-                    self,
-                    msg,
-                    previous_text,
-                    delta_text,
-                    previous_tokens,
-                    delta_tokens,
-                    [&self](JsonContainer& m, const std::string& prev_text, std::string& delta_t,
-                            const std::optional<std::vector<int64_t>>& prev_tokens,
-                            const std::optional<std::vector<int64_t>>& delta_toks) {
-                        return self.parse(m, prev_text, delta_t, prev_tokens, delta_toks);
-                    });
+            return call_incremental_parser(
+                self,
+                msg,
+                previous_text,
+                delta_text,
+                previous_tokens,
+                delta_tokens,
+                [&self](JsonContainer& m, const std::string& prev_text, std::string& delta_t,
+                        const std::optional<std::vector<int64_t>>& prev_tokens,
+                        const std::optional<std::vector<int64_t>>& delta_toks) {
+                    return self.parse(m, prev_text, delta_t, prev_tokens, delta_toks);
+                });
             },
             "Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output.",
             py::arg("msg"), py::arg("previous_text"), py::arg("delta_text"),
@@ -173,19 +172,20 @@ void init_parsers(py::module_& m) {
                std::string& delta_text,
                const std::optional<std::vector<int64_t>>& previous_tokens = std::nullopt,
                const std::optional<std::vector<int64_t>>& delta_tokens = std::nullopt) {
-                return call_incremental_parser(
-                    self,
-                    msg,
-                    previous_text,
-                    delta_text,
-                    previous_tokens,
-                    delta_tokens,
-                    [&self](JsonContainer& m, const std::string& prev_text, std::string& delta_t,
-                            const std::optional<std::vector<int64_t>>& prev_tokens,
-                            const std::optional<std::vector<int64_t>>& delta_toks) {
-                        return self.parse(m, prev_text, delta_t, prev_tokens, delta_toks);
-                    });
-            },
+            return call_incremental_parser(
+                self,
+                msg,
+                previous_text,
+                delta_text,
+                previous_tokens,
+                delta_tokens,
+                [&self](JsonContainer& m, const std::string& prev_text, std::string& delta_t,
+                        const std::optional<std::vector<int64_t>>& prev_tokens,
+                        const std::optional<std::vector<int64_t>>& delta_toks) {
+                    return self.parse(m, prev_text, delta_t, prev_tokens, delta_toks);
+                }
+            );
+        },
             "Parse is called with the full text. Returns a dict with parsed content.",
             py::arg("msg"), py::arg("previous_text"), py::arg("delta_text"),
             py::arg("previous_tokens") = std::nullopt, py::arg("delta_tokens") = std::nullopt);
diff --git a/src/python/py_streamers.cpp b/src/python/py_streamers.cpp
index 47ac549f70..29bc7296ac 100644
--- a/src/python/py_streamers.cpp
+++ b/src/python/py_streamers.cpp
@@ -77,16 +77,11 @@ class ConstructableTextParserStreamer: public TextParserStreamer {
 
     StreamingStatus write(JsonContainer& message) override {
         py::dict message_py;
-        auto json_obj = message.to_json();
-        for (auto it = json_obj.begin(); it != json_obj.end(); ++it) {
-            message_py[py::cast(it.key())] = py::cast(it.value().get<std::string>());
-        }
+        message_py = pyutils::json_container_to_py_object(message);
         
         // call python implementation which accepts py::dict instead of JsonContainer
         auto res = py::get_override(this, "write")(message_py);
-        
-        auto msg_anymap = ov::genai::pybind::utils::py_object_to_any_map(message_py);
-        message = JsonContainer(msg_anymap);
+        message = pyutils::py_object_to_json_container(message_py);
         
         return res.cast<StreamingStatus>();
     }
@@ -174,13 +169,7 @@ void init_streamers(py::module_& m) {
         
         .def("get_parsed_message", 
             [](TextParserStreamer& self) {
-                static py::object json_mod = py::module_::import("json");
-                
-                auto res = self.get_parsed_message();
-                auto json_str =  res.to_json_string();
-                py::dict json_dict = json_mod.attr("loads")(json_str);
-
-                return json_dict;
+                return pyutils::json_container_to_py_object(self.get_parsed_message());
                 
             }, "Get the current parsed message");
 }
diff --git a/tests/cpp/CMakeLists.txt b/tests/cpp/CMakeLists.txt
index f708e00e55..bdf959eb5d 100644
--- a/tests/cpp/CMakeLists.txt
+++ b/tests/cpp/CMakeLists.txt
@@ -26,7 +26,7 @@ set(TEST_TARGET_NAME "tests_continuous_batching")
 
 add_executable(${TEST_TARGET_NAME} ${tests_src} $<TARGET_OBJECTS:openvino_genai_obj>)
 
-target_link_libraries(${TEST_TARGET_NAME} PRIVATE $<TARGET_PROPERTY:openvino::genai,LINK_LIBRARIES> gtest_main gmock_main nlohmann_json::nlohmann_json)
+target_link_libraries(${TEST_TARGET_NAME} PRIVATE $<TARGET_PROPERTY:openvino::genai,LINK_LIBRARIES> gtest_main gmock_main)
 target_include_directories(${TEST_TARGET_NAME} PRIVATE "${OpenVINOGenAI_SOURCE_DIR}/src/cpp/src"
                                                        $<TARGET_PROPERTY:openvino::genai,INTERFACE_INCLUDE_DIRECTORIES>)
 
diff --git a/tests/cpp/parser.cpp b/tests/cpp/parser.cpp
index 1e56fef042..0cd2d602ba 100644
--- a/tests/cpp/parser.cpp
+++ b/tests/cpp/parser.cpp
@@ -9,94 +9,86 @@
 
 using namespace ov::genai;
 
-nlohmann::json run_parser_test(std::shared_ptr<ParserBase> parser, const std::string& prompt) {
-    JsonContainer input;
-    input["content"] = prompt;
-    parser->parse(input);
-    return input.to_json();
-}
-
-
 TEST(ParserTest, test_llama32_parser_1) {
     std::string prompt = R"(What's the weather in New York today?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n[get_weather(location="New York, NY", unit="celsius")]<|eom_id|>)";
-    nlohmann::json expected;
-    
     // By default content should keep original values.
-    expected["content"] = prompt;
     
-    expected["tool_calls"] = nlohmann::json::array({
-        {
-            {"name", "get_weather"},
-            {"arguments", {
-                {"location", "New York, NY"},
-                {"unit", "celsius"}
-            }}
-        }
-    });
+    JsonContainer expected;
+    expected["content"] = prompt;
+    expected["tool_calls"] = JsonContainer::array();
+    expected["tool_calls"].push_back(JsonContainer({
+        {"name", "get_weather"},
+        {"arguments", JsonContainer{
+            {"location", "New York, NY"},
+            {"unit", "celsius"}
+        }}
+    }));
+
+
     std::shared_ptr<Llama32PythonicToolParser> parser = std::make_shared<Llama32PythonicToolParser>();
+    JsonContainer input;
+    input["content"] = prompt;
+    parser->parse(input);
     
-    nlohmann::json res = run_parser_test(parser, prompt);
-    
-    ASSERT_EQ(res, expected);
+    ASSERT_TRUE(expected == input);
 }
 
 TEST(ParserTest, test_llama32_parser_2) {
     std::string prompt = R"(What's the weather in New York today?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n[get_weather(location="New York, NY", unit="celsius")]<|eom_id|>)";
-    nlohmann::json expected;
     
-    // In this test tool calling part will be cut from the content after parsing.
+    JsonContainer expected;
     expected["content"] = std::string(R"(What's the weather in New York today?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n<|eom_id|>)");
-    
-    expected["tool_calls"] = nlohmann::json::array({
-        {
-            {"name", "get_weather"},
-            {"arguments", {
-                {"location", "New York, NY"},
-                {"unit", "celsius"}
-            }}
-        }
-    });
-    auto parser = std::make_shared<Llama32PythonicToolParser>(/*keep_original_content*/ false);
-
-    nlohmann::json res = run_parser_test(parser, prompt);
-
-    ASSERT_EQ(res, expected);
+    expected["tool_calls"] = JsonContainer::array();
+    expected["tool_calls"].push_back(JsonContainer(ov::AnyMap({
+        {"name", "get_weather"},
+        {"arguments", ov::AnyMap{
+            {"location", "New York, NY"},
+            {"unit", "celsius"}
+        }}
+    })));
+
+    std::shared_ptr<Llama32PythonicToolParser> parser = std::make_shared<Llama32PythonicToolParser>(/*keep_original_content*/ false);
+    JsonContainer input;
+    input["content"] = prompt;
+    parser->parse(input);
+
+    ASSERT_EQ(input, expected);
 }
 
 TEST(ParserTest, test_reasoning_parser_1) {
     std::string prompt = R"("<｜begin▁of▁sentence｜><｜begin▁of▁sentence｜><｜User｜>What is 2 + 1?<｜Assistant｜><think>\nI need to determine the sum of 2 and 1.\n\nFirst, I'll identify the two numbers involved in the addition: 2 and 1.\n\nNext, I'll perform the addition by combining these two numbers.\n\nFinally, I'll state the result of the addition, which is 3.\n</think>\n\n**Solution:**\n\nTo find the sum of 2 and 1, )";
-    nlohmann::json expected;
     
-    // In this test reasoning part will be cut from the content after parsing.
+    JsonContainer expected;
     expected["content"] = std::string(R"("<｜begin▁of▁sentence｜><｜begin▁of▁sentence｜><｜User｜>What is 2 + 1?<｜Assistant｜>\n\n**Solution:**\n\nTo find the sum of 2 and 1, )");
-    
     expected["reasoning_content"] = std::string(R"(\nI need to determine the sum of 2 and 1.\n\nFirst, I'll identify the two numbers involved in the addition: 2 and 1.\n\nNext, I'll perform the addition by combining these two numbers.\n\nFinally, I'll state the result of the addition, which is 3.\n)");
-    auto parser = std::make_shared<BaseReasoningParser>(
+
+    std::shared_ptr<BaseReasoningParser> parser = std::make_shared<BaseReasoningParser>(
         /*expect_open_tag*/ true,
         /*keep_original_content*/ false
     );
+    JsonContainer input;
+    input["content"] = prompt;
+    parser->parse(input);
 
-    nlohmann::json res = run_parser_test(parser, prompt);
-
-    ASSERT_EQ(res, expected);
+    ASSERT_EQ(input, expected);
 }
 
 TEST(ParserTest, test_reasoning_parser_2) {
     std::string prompt = R"("<｜begin▁of▁sentence｜><｜begin▁of▁sentence｜><｜User｜>What is 2 + 1?<｜Assistant｜><think>\nI need to determine the sum of 2 and 1.\n\nFirst, I'll identify the two numbers involved in the addition: 2 and 1.\n\nNext, I'll perform the addition by combining these two numbers.\n\nFinally, I'll state the result of the addition, which is 3.\n</think>\n\n**Solution:**\n\nTo find the sum of 2 and 1, )";
-    nlohmann::json expected;
     
-    // In this test content should keep original values.
+    JsonContainer expected;
     expected["content"] = prompt;
-    
     expected["reasoning_content"] = std::string(R"(\nI need to determine the sum of 2 and 1.\n\nFirst, I'll identify the two numbers involved in the addition: 2 and 1.\n\nNext, I'll perform the addition by combining these two numbers.\n\nFinally, I'll state the result of the addition, which is 3.\n)");
-    auto parser = std::make_shared<BaseReasoningParser>(
+
+    std::shared_ptr<BaseReasoningParser> parser = std::make_shared<BaseReasoningParser>(
         /*expect_open_tag*/ true,
         /*keep_original_content*/ true
     );
+    JsonContainer input;
+    input["content"] = prompt;
+    parser->parse(input);
 
-    nlohmann::json res = run_parser_test(parser, prompt);
-
-    ASSERT_EQ(res, expected);
+    ASSERT_EQ(input, expected);
 }
 
 class DeepSeekR1ReasoningParserTest : public ::testing::Test {
@@ -121,7 +113,6 @@ TEST_F(DeepSeekR1ReasoningParserTest, ReasoningContentAccumulatesAcrossCalls) {
     
     JsonContainer msg;
     
-    
     for (int i = 1; i < input_stream.size(); i++) {
         std::string previous_text = input_stream[i - 1];
         std::string delta_text = input_stream[i];
diff --git a/tests/python_tests/test_parsers.py b/tests/python_tests/test_parsers.py
index 56328760e1..45f034374c 100644
--- a/tests/python_tests/test_parsers.py
+++ b/tests/python_tests/test_parsers.py
@@ -125,10 +125,9 @@ def test_final_parser_llama_32_json(hf_ov_genai_models):
     content_json = {
         "content": f"Calling weather API: {json_str}"
     }
-
+    
     parser = Llama32JsonToolParser()
     parser.parse(content_json)
-
     assert content_json['tool_calls'][0] == json.loads(json_str)
 
 

From aefbd7b437acfbe5dee9bbb5e8027017c24941fa Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Mon, 20 Oct 2025 16:23:23 +0200
Subject: [PATCH 22/43] fix segfault; some other fixes

---
 src/python/py_parsers.cpp          |  4 +++-
 src/python/py_streamers.cpp        |  7 +++++--
 tests/python_tests/test_parsers.py | 32 ++++++++++--------------------
 3 files changed, 19 insertions(+), 24 deletions(-)

diff --git a/src/python/py_parsers.cpp b/src/python/py_parsers.cpp
index a5a4a5855c..0904d913fc 100644
--- a/src/python/py_parsers.cpp
+++ b/src/python/py_parsers.cpp
@@ -63,7 +63,6 @@ class ConstructableParserBase: public ParserBase {
     }
 };
 
-static py::object json_mod = py::module_::import("json");
 
 // wrapper to enhance calling parser from Python
 void call_parser(py::dict& msg, std::function<void(JsonContainer&)> func) {
@@ -71,6 +70,8 @@ void call_parser(py::dict& msg, std::function<void(JsonContainer&)> func) {
     func(msg_cpp);
 
     // TODO: msg = pyutils::json_container_to_py_object(msg_cpp) does not work properly here,
+    py::object json_mod = py::module_::import("json");
+    
     // since it create a new object instead of updating existing dict.
     auto json_str = msg_cpp.to_json_string();
     py::dict result = json_mod.attr("loads")(json_str);
@@ -99,6 +100,7 @@ std::string call_incremental_parser(
     
     // TODO: msg = pyutils::json_container_to_py_object(msg_cpp) does not work properly here,
     // since it create a new object instead of updating existing dict.
+    py::object json_mod = py::module_::import("json");
     py::dict result = json_mod.attr("loads")(json_str);
     // update msg with result
     msg.clear();
diff --git a/src/python/py_streamers.cpp b/src/python/py_streamers.cpp
index 29bc7296ac..d34fc58a22 100644
--- a/src/python/py_streamers.cpp
+++ b/src/python/py_streamers.cpp
@@ -76,10 +76,13 @@ class ConstructableTextParserStreamer: public TextParserStreamer {
     using TextParserStreamer::TextParserStreamer;  // inherit base constructors
 
     StreamingStatus write(JsonContainer& message) override {
+        // Since c++ calls function with JsonContainer while python override expects py::dict, 
+        // this function is a wrapper to call Python implementation of 'write' with py::dict
         py::dict message_py;
         message_py = pyutils::json_container_to_py_object(message);
         
-        // call python implementation which accepts py::dict instead of JsonContainer
+        // Call python implementation which accepts py::dict instead of JsonContainer
+        // And convert back the resulting message back to JsonContainer
         auto res = py::get_override(this, "write")(message_py);
         message = pyutils::py_object_to_json_container(message_py);
         
@@ -168,7 +171,7 @@ void init_streamers(py::module_& m) {
              "Write is called with a string message. Returns CallbackTypeVariant. This is a private method.")
         
         .def("get_parsed_message", 
-            [](TextParserStreamer& self) {
+            [](TextParserStreamer& self) -> py::dict{
                 return pyutils::json_container_to_py_object(self.get_parsed_message());
                 
             }, "Get the current parsed message");
diff --git a/tests/python_tests/test_parsers.py b/tests/python_tests/test_parsers.py
index 45f034374c..5912cfd75c 100644
--- a/tests/python_tests/test_parsers.py
+++ b/tests/python_tests/test_parsers.py
@@ -9,38 +9,28 @@
 import pytest
 from openvino_genai import Tokenizer, IncrementalParserBase, ParserBase, TextParserStreamer, StreamingStatus, Llama32JsonToolParser, Phi4ReasoningParser, DeepSeekR1ReasoningParser
 from transformers import AutoTokenizer
-from utils.hugging_face import convert_and_save_tokenizer, download_and_convert_model
+from utils.hugging_face import convert_and_save_tokenizer
 import re
-import textwrap
 import json
 
 @pytest.fixture(scope="module")
 def hf_ov_genai_models(request, tmp_path_factory):
-    model_id, args = request.param
-    tok_load_properties = {"add_second_input": args.pop("add_second_input")} if "add_second_input" in args else {}
-    
-    hf_args = args.copy()  # to overcome mutable default argument side effects
-    if "padding_side" in hf_args and hf_args["padding_side"] is None:
-        # HF does not accept None.
-        # Need to remove padding_side and let HF to choose default value,
-        hf_args.pop("padding_side")
-    else:
-        hf_args["truncation_side"] = hf_args["padding_side"]
+    model_id = request.param
+
     model_dir = tmp_path_factory.getbasetemp() / model_id.replace("/", "_")
     model_dir.mkdir(exist_ok=True, parents=True)
 
-    hf_tokenizer = AutoTokenizer.from_pretrained(model_id, **hf_args)
-    convert_args = {"number_of_inputs": hf_args.pop("number_of_inputs")} if "number_of_inputs" in hf_args else {}
-    convert_and_save_tokenizer(hf_tokenizer, model_dir, **convert_args)
+    hf_tokenizer = AutoTokenizer.from_pretrained(model_id)
+    convert_and_save_tokenizer(hf_tokenizer, model_dir)
 
-    genai_tokenizer = Tokenizer(model_dir, tok_load_properties)
+    genai_tokenizer = Tokenizer(model_dir)
     return hf_tokenizer, genai_tokenizer
 
 
 @pytest.mark.precommit
 @pytest.mark.parametrize(
     "hf_ov_genai_models", 
-    [("katuni4ka/tiny-random-phi3", {"padding_side": "right"})],  # this tokenizer is used as a stub only
+    ["katuni4ka/tiny-random-phi3"],  # this tokenizer is used as a stub only
     indirect=True
 )
 @pytest.mark.parametrize("answer", [
@@ -76,10 +66,11 @@ def write(self, message):
     assert msg['reasoning_content'] == think_content
     assert msg['content'] == content
 
+
 @pytest.mark.precommit
 @pytest.mark.parametrize(
     "hf_ov_genai_models", 
-    [("katuni4ka/tiny-random-phi3", {"padding_side": "right"})],  # this tokenizer is used as a stub only
+    ["katuni4ka/tiny-random-phi3"],
     indirect=True
 )
 @pytest.mark.parametrize("split_answer", [
@@ -111,11 +102,10 @@ def write(self, message):
     assert msg['content'] == content
 
 
-
 @pytest.mark.precommit
 @pytest.mark.parametrize(
     "hf_ov_genai_models", 
-    [("katuni4ka/tiny-random-phi3", {"padding_side": "right"})],
+    ["katuni4ka/tiny-random-phi3"],
     indirect=True
 )
 def test_final_parser_llama_32_json(hf_ov_genai_models):
@@ -125,7 +115,7 @@ def test_final_parser_llama_32_json(hf_ov_genai_models):
     content_json = {
         "content": f"Calling weather API: {json_str}"
     }
-    
+
     parser = Llama32JsonToolParser()
     parser.parse(content_json)
     assert content_json['tool_calls'][0] == json.loads(json_str)

From 9c0422c7e9c63771ed405f46914a47ac043f48ca Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Mon, 20 Oct 2025 18:14:05 +0200
Subject: [PATCH 23/43] add export symbols

---
 src/cpp/include/openvino/genai/parsers.hpp       | 14 +++++++-------
 src/cpp/include/openvino/genai/text_streamer.hpp |  2 +-
 tests/python_tests/test_parsers.py               |  2 +-
 tests/python_tests/test_text_streamer.py         | 14 --------------
 4 files changed, 9 insertions(+), 23 deletions(-)

diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp
index 0f71c5641b..dabb46a9e6 100644
--- a/src/cpp/include/openvino/genai/parsers.hpp
+++ b/src/cpp/include/openvino/genai/parsers.hpp
@@ -10,7 +10,7 @@
 namespace ov {
 namespace genai {
 
-class IncrementalParserBase {
+class OPENVINO_GENAI_EXPORTS IncrementalParserBase {
 public:
     IncrementalParserBase() = default;
 
@@ -26,7 +26,7 @@ class IncrementalParserBase {
     virtual ~IncrementalParserBase() = default;
 };
 
-class ReasoningParser : public IncrementalParserBase {
+class OPENVINO_GENAI_EXPORTS ReasoningParser : public IncrementalParserBase {
 private:
     class ReasoningParserImpl;
     std::shared_ptr<ReasoningParserImpl> m_impl;
@@ -46,12 +46,12 @@ class ReasoningParser : public IncrementalParserBase {
     ) override;
 };
 
-class DeepSeekR1ReasoningParser : public ReasoningParser {
+class OPENVINO_GENAI_EXPORTS DeepSeekR1ReasoningParser : public ReasoningParser {
 public:
     explicit DeepSeekR1ReasoningParser(bool expect_open_tag = true) : ReasoningParser(expect_open_tag) {};
 };
 
-class Phi4ReasoningParser : public ReasoningParser {
+class OPENVINO_GENAI_EXPORTS Phi4ReasoningParser : public ReasoningParser {
 public:
     explicit Phi4ReasoningParser(bool expect_open_tag = false) : ReasoningParser(expect_open_tag) {};
 };
@@ -63,7 +63,7 @@ class ParserBase {
     virtual void parse(JsonContainer& text) = 0;
 };
 
-class Llama32PythonicToolParser : public ParserBase {
+class OPENVINO_GENAI_EXPORTS Llama32PythonicToolParser : public ParserBase {
 // Does not modify original content, only extracts and adds tool calls
 public:
     explicit Llama32PythonicToolParser(bool keep_original_content = true);
@@ -73,7 +73,7 @@ class Llama32PythonicToolParser : public ParserBase {
     std::shared_ptr<Llama32PythonicToolParserImpl> m_impl;
 };
 
-class Llama32JsonToolParser : public ParserBase {
+class OPENVINO_GENAI_EXPORTS Llama32JsonToolParser : public ParserBase {
 // Does not modify original content, only extracts and adds tool calls
 public:
     explicit Llama32JsonToolParser(bool keep_original_content = true);
@@ -83,7 +83,7 @@ class Llama32JsonToolParser : public ParserBase {
     std::shared_ptr<Llama32JsonToolParserImpl> m_impl;
 };
 
-class BaseReasoningParser : public ParserBase{
+class OPENVINO_GENAI_EXPORTS BaseReasoningParser : public ParserBase{
 public:
     BaseReasoningParser(
         bool expect_open_tag = true, 
diff --git a/src/cpp/include/openvino/genai/text_streamer.hpp b/src/cpp/include/openvino/genai/text_streamer.hpp
index 5b6fd6d16e..816427c985 100644
--- a/src/cpp/include/openvino/genai/text_streamer.hpp
+++ b/src/cpp/include/openvino/genai/text_streamer.hpp
@@ -47,7 +47,7 @@ class OPENVINO_GENAI_EXPORTS TextStreamer : public StreamerBase {
     void compute_decoded_length_for_position(size_t cache_position);
 };
 
-class TextParserStreamer : public TextStreamer {
+class OPENVINO_GENAI_EXPORTS TextParserStreamer : public TextStreamer {
 public:
     TextParserStreamer(const Tokenizer& tokenizer, std::vector<std::shared_ptr<IncrementalParserBase>> parsers = {});
 
diff --git a/tests/python_tests/test_parsers.py b/tests/python_tests/test_parsers.py
index 5912cfd75c..fd0407b31f 100644
--- a/tests/python_tests/test_parsers.py
+++ b/tests/python_tests/test_parsers.py
@@ -11,7 +11,7 @@
 from transformers import AutoTokenizer
 from utils.hugging_face import convert_and_save_tokenizer
 import re
-import json
+
 
 @pytest.fixture(scope="module")
 def hf_ov_genai_models(request, tmp_path_factory):
diff --git a/tests/python_tests/test_text_streamer.py b/tests/python_tests/test_text_streamer.py
index 4790ab4b3d..a3ea55d225 100644
--- a/tests/python_tests/test_text_streamer.py
+++ b/tests/python_tests/test_text_streamer.py
@@ -71,20 +71,6 @@ def test_text_prompts(tmp_path, prompt, model_id):
     for token in tokens:
         streamer.write(token)
     streamer.end()
-
-    class CurrentStreamer(BaseStreamer):
-        def write(self, token_chunk):
-            pass
-
-    class CurrentParsingStreamer(TextParserStreamer):
-        def write(self, word: str):
-            msg: JsonContainer = get_current_message()
-            
-            
-    streamer = lambda x: print(x)
-
-    streamer = TextStreamer(ov_tokenizer, lambda x: print(x))
-
     
     assert ''.join(accumulated) == ov_tokenizer.decode(tokens)
 

From ac9dd8c3995097f68dc9b86bc5b52c1ef852ed6f Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Tue, 21 Oct 2025 18:41:10 +0200
Subject: [PATCH 24/43] add more tests use unique_ptr

---
 src/cpp/include/openvino/genai/parsers.hpp |  15 +--
 src/cpp/src/llm/pipeline.cpp               |  18 ++--
 src/cpp/src/parsers.cpp                    |  21 ++--
 src/cpp/src/text_streamer.cpp              |   1 -
 src/python/py_parsers.cpp                  |  77 ++++++++++++--
 src/python/py_streamers.cpp                |   1 +
 tests/cpp/parser.cpp                       | 118 ++++++++++++++++++++-
 tests/python_tests/test_parsers.py         | 118 ++++++++++++++++++++-
 8 files changed, 329 insertions(+), 40 deletions(-)

diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp
index dabb46a9e6..4af6fb2cca 100644
--- a/src/cpp/include/openvino/genai/parsers.hpp
+++ b/src/cpp/include/openvino/genai/parsers.hpp
@@ -29,13 +29,13 @@ class OPENVINO_GENAI_EXPORTS IncrementalParserBase {
 class OPENVINO_GENAI_EXPORTS ReasoningParser : public IncrementalParserBase {
 private:
     class ReasoningParserImpl;
-    std::shared_ptr<ReasoningParserImpl> m_impl;
+    std::unique_ptr<ReasoningParserImpl> m_impl;
 public:
     ReasoningParser(bool expect_open_tag = true,
                     bool keep_original_content = true, 
                     const std::string& open_tag = "<think>", 
                     const std::string& close_tag = "</think>");
-    virtual ~ReasoningParser() = default;
+    virtual ~ReasoningParser();
 
     std::string parse(
         JsonContainer& msg,
@@ -59,7 +59,7 @@ class OPENVINO_GENAI_EXPORTS Phi4ReasoningParser : public ReasoningParser {
 class ParserBase {
 public:
     ParserBase() = default;
-    virtual ~ParserBase() = default;
+    virtual ~ParserBase();
     virtual void parse(JsonContainer& text) = 0;
 };
 
@@ -67,20 +67,22 @@ class OPENVINO_GENAI_EXPORTS Llama32PythonicToolParser : public ParserBase {
 // Does not modify original content, only extracts and adds tool calls
 public:
     explicit Llama32PythonicToolParser(bool keep_original_content = true);
+    ~Llama32PythonicToolParser();
     void parse(JsonContainer& input) override;
 private:
     class Llama32PythonicToolParserImpl;
-    std::shared_ptr<Llama32PythonicToolParserImpl> m_impl;
+    std::unique_ptr<Llama32PythonicToolParserImpl> m_impl;
 };
 
 class OPENVINO_GENAI_EXPORTS Llama32JsonToolParser : public ParserBase {
 // Does not modify original content, only extracts and adds tool calls
 public:
     explicit Llama32JsonToolParser(bool keep_original_content = true);
+    ~Llama32JsonToolParser();
     void parse(JsonContainer& input) override;
 private:
     class Llama32JsonToolParserImpl;
-    std::shared_ptr<Llama32JsonToolParserImpl> m_impl;
+    std::unique_ptr<Llama32JsonToolParserImpl> m_impl;
 };
 
 class OPENVINO_GENAI_EXPORTS BaseReasoningParser : public ParserBase{
@@ -91,9 +93,10 @@ class OPENVINO_GENAI_EXPORTS BaseReasoningParser : public ParserBase{
         const std::string& open_tag = "<think>", 
         const std::string& close_tag = "</think>");
     void parse(JsonContainer& input) override;
+    ~BaseReasoningParser();
 private:
     class BaseReasoningParserImpl;
-    std::shared_ptr<BaseReasoningParserImpl> m_impl;
+    std::unique_ptr<BaseReasoningParserImpl> m_impl;
 };
 
 }  // namespace genai
diff --git a/src/cpp/src/llm/pipeline.cpp b/src/cpp/src/llm/pipeline.cpp
index c6d4772453..9f988dbb59 100644
--- a/src/cpp/src/llm/pipeline.cpp
+++ b/src/cpp/src/llm/pipeline.cpp
@@ -253,27 +253,27 @@ DecodedResults LLMPipeline::generate(
     auto res = m_pimpl->generate(inputs, generation_config, streamer);
     
     // If streamer is of StreamerBase type, and it is TextParserStreamer, get parsed message
+    // Streaming is available only for batch size 1 therefore only parsed[0]
     if (auto streamer_obj = std::get_if<std::shared_ptr<StreamerBase>>(&streamer)) {
         if (auto parser_streamer = std::dynamic_pointer_cast<TextParserStreamer>(*streamer_obj)) {
-            res.parsed.resize(res.texts.size());
+            res.parsed.resize(1);
             res.parsed[0] = parser_streamer->get_parsed_message();
         }
     }
-    
-    if (!generation_config.has_value() || (*generation_config).parsers.empty()) {
-        return res;
-    }
 
-    if (!generation_config.has_value()  || (*generation_config).parsers.empty()) {
+    if (!generation_config.has_value() || generation_config->parsers.empty()) {
         return res;
     }
-
+    
     std::vector<std::shared_ptr<ParserBase>> parsers = (*generation_config).parsers;
     res.parsed.resize(res.texts.size());
     // Apply Base parsers sequentially even if IncrementalParser has run.
     for (size_t i = 0; i < res.texts.size(); ++i) {
-        JsonContainer msg;
-        msg["content"] = res.texts[i];
+        auto& msg = res.parsed[i];
+        if (!msg.contains("content")) {
+            // Initialize msg with content
+            msg["content"] = res.texts[i];
+        }
         for (auto& parser: parsers) {
             // TODO: Check the state of incremental parser and reset if necessary
             parser->parse(msg);
diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp
index f73dd82d04..0a8670dbc4 100644
--- a/src/cpp/src/parsers.cpp
+++ b/src/cpp/src/parsers.cpp
@@ -17,7 +17,6 @@ class ReasoningParser::ReasoningParserImpl {
     std::string m_open_tag;
     std::string m_close_tag;
     std::string m_text_cache = "";
-    std::map<std::string, std::string> accumulated_parsed;
 public:
     bool m_deactivated = false;
     ReasoningParserImpl() = default;
@@ -59,8 +58,6 @@ class ReasoningParser::ReasoningParserImpl {
         auto content_str = msg["content"].get_string();
 
         if (!m_think_tag_opened && txt_chunk.find(m_open_tag) != std::string::npos && !m_expect_open_tag) {
-            OPENVINO_ASSERT(m_open_tag.find(m_text_cache) != std::string::npos, "m_text_cache should be a prefix of m_open_tag");
-            
             // Thinking has started
             auto open_idx = txt_chunk.find(m_open_tag);
             reason_str += txt_chunk.substr(open_idx + std::string(m_open_tag).size(), txt_chunk.size() - (open_idx + std::string(m_open_tag).size()));
@@ -151,9 +148,11 @@ class ReasoningParser::ReasoningParserImpl {
 };
 
 ReasoningParser::ReasoningParser(bool expect_open_tag, bool keep_original_content, const std::string& open_tag, const std::string& close_tag) {
-    m_impl = std::make_shared<ReasoningParserImpl>(expect_open_tag, keep_original_content, open_tag, close_tag);
+    m_impl = std::make_unique<ReasoningParserImpl>(expect_open_tag, keep_original_content, open_tag, close_tag);
 }
 
+ReasoningParser::~ReasoningParser() = default;
+
 std::string ReasoningParser::parse(
     JsonContainer& msg,
     const std::string& previous_text, 
@@ -207,13 +206,15 @@ class Llama32PythonicToolParser::Llama32PythonicToolParserImpl {
 };
 
 Llama32PythonicToolParser::Llama32PythonicToolParser(bool keep_original_content) {
-    m_impl = std::make_shared<Llama32PythonicToolParserImpl>(keep_original_content);
+    m_impl = std::make_unique<Llama32PythonicToolParserImpl>(keep_original_content);
 }
 
 void Llama32PythonicToolParser::parse(JsonContainer& input) {
     m_impl->parse(input);
 }
 
+Llama32PythonicToolParser::~Llama32PythonicToolParser() = default;
+
 class Llama32JsonToolParser::Llama32JsonToolParserImpl {
 private:
     bool m_keep_original_content;
@@ -240,13 +241,15 @@ class Llama32JsonToolParser::Llama32JsonToolParserImpl {
 };
 
 Llama32JsonToolParser::Llama32JsonToolParser(bool keep_original_content) {
-    m_impl = std::make_shared<Llama32JsonToolParserImpl>(keep_original_content);
+    m_impl = std::make_unique<Llama32JsonToolParserImpl>(keep_original_content);
 }
 
 void Llama32JsonToolParser::parse(JsonContainer& input) {
     m_impl->parse(input);
 }
 
+Llama32JsonToolParser::~Llama32JsonToolParser() = default;
+
 class BaseReasoningParser::BaseReasoningParserImpl {
 public:
     BaseReasoningParserImpl(bool expect_open_tag,
@@ -285,11 +288,15 @@ class BaseReasoningParser::BaseReasoningParserImpl {
 };
 
 BaseReasoningParser::BaseReasoningParser(bool expect_open_tag, bool keep_original_content, const std::string& open_tag, const std::string& close_tag) {
-    m_impl = std::make_shared<BaseReasoningParserImpl>(expect_open_tag, keep_original_content, open_tag, close_tag);
+    m_impl = std::make_unique<BaseReasoningParserImpl>(expect_open_tag, keep_original_content, open_tag, close_tag);
 }
 
 void BaseReasoningParser::parse(JsonContainer& input) {
     m_impl->parse(input);
 }
 
+BaseReasoningParser::~BaseReasoningParser() = default;
+
+ParserBase::~ParserBase() = default;
+
 } // namespace ov::genai
diff --git a/src/cpp/src/text_streamer.cpp b/src/cpp/src/text_streamer.cpp
index 7738c455e0..9b83e0c60c 100644
--- a/src/cpp/src/text_streamer.cpp
+++ b/src/cpp/src/text_streamer.cpp
@@ -133,7 +133,6 @@ CallbackTypeVariant TextParserStreamer::write(std::string message) {
     for (auto& parser: m_parsers) {
         message = parser->parse(m_parsed_message, m_text_buffer, message);
         // Message can be modified inside parser, if parser for example extracted tool calling from message content
-        // but parser
         m_parsed_message["content"] = m_parsed_message["content"].get_string() + message;
     }
 
diff --git a/src/python/py_parsers.cpp b/src/python/py_parsers.cpp
index 0904d913fc..d853f507ef 100644
--- a/src/python/py_parsers.cpp
+++ b/src/python/py_parsers.cpp
@@ -31,6 +31,7 @@ namespace {
 
 class ConstructableIncrementalParserBase: public IncrementalParserBase {
 public:
+    using IncrementalParserBase::IncrementalParserBase;
     std::string parse(
         JsonContainer& msg,
         const std::string& previous_text, 
@@ -38,10 +39,49 @@ class ConstructableIncrementalParserBase: public IncrementalParserBase {
         const std::optional<std::vector<int64_t>>& previous_tokens = std::nullopt, 
         const std::optional<std::vector<int64_t>>& delta_tokens = std::nullopt
     ) override {
+        // Convert JsonContainer to py::dict
+        py::dict py_msg = pyutils::json_container_to_py_object(msg);
+
+        py::function parse_method = py::get_override(static_cast<const IncrementalParserBase*>(this), "parse");
+        if (!parse_method) {
+            throw std::runtime_error("parse method not implemented in Python subclass");
+        }
+        
+        auto res = parse_method(
+            py_msg,
+            previous_text,
+            delta_text,
+            previous_tokens,
+            delta_tokens
+        );
+        
+        // iterate throught py_msg and update msg
+        msg.clear();
+        auto msg_anymap = pyutils::py_object_to_any_map(py_msg);
+        for (const auto& [key, value] : msg_anymap) {
+            if (value.is<std::string>()) {
+                msg[key] = value.as<std::string>();
+            } else if (value.is<ov::AnyMap>()) {
+                msg[key] = JsonContainer(value.as<ov::AnyMap>());
+            } else {
+                OPENVINO_THROW("Unsupported type in JsonContainer update from Python dict");
+            }
+        }
+        return res.cast<std::string>();
+    }
+
+    // This method should be overridden in Python
+    std::string parse(
+        py::dict& msg,
+        const std::string& previous_text,
+        std::string& delta_text,
+        const std::optional<std::vector<int64_t>>& previous_tokens = std::nullopt,
+        const std::optional<std::vector<int64_t>>& delta_tokens = std::nullopt
+    ) {
         PYBIND11_OVERRIDE_PURE(
-            std::string,  // Return type
-            IncrementalParserBase,  // Parent class
-            parse,  // Name of function in C++ (must match Python name)
+            std::string,
+            IncrementalParserBase,
+            "parse",
             msg,
             previous_text,
             delta_text,
@@ -53,13 +93,30 @@ class ConstructableIncrementalParserBase: public IncrementalParserBase {
 
 class ConstructableParserBase: public ParserBase {
 public:
-    void parse(JsonContainer& text) override {
-        PYBIND11_OVERRIDE_PURE(
-            void,  // Return type
-            ParserBase,  // Parent class
-            parse,  // Name of function in C++ (must match Python name)
-            text  // Argument(s)
-        );
+    void parse(JsonContainer& msg) override {
+        py::gil_scoped_acquire acquire;
+        
+        py::function parse_method = py::get_override(static_cast<const ParserBase*>(this), "parse");
+        if (!parse_method) {
+            throw std::runtime_error("parse method not implemented in Python subclass");
+        }
+        
+        // Convert JsonContainer to py::dict
+       py::dict py_msg = pyutils::json_container_to_py_object(msg);
+       parse_method(py_msg);
+
+       // iterate throught py_msg and update msg
+       msg.clear();
+       auto msg_anymap = pyutils::py_object_to_any_map(py_msg);
+       for (const auto& [key, value] : msg_anymap) {
+           if (value.is<std::string>()) {
+               msg[key] = value.as<std::string>();
+           } else if (value.is<ov::AnyMap>()) {
+               msg[key] = JsonContainer(value.as<ov::AnyMap>());
+           } else {
+               OPENVINO_THROW("Unsupported type in JsonContainer update from Python dict");
+           }
+       }
     }
 };
 
diff --git a/src/python/py_streamers.cpp b/src/python/py_streamers.cpp
index d34fc58a22..d6660fcd17 100644
--- a/src/python/py_streamers.cpp
+++ b/src/python/py_streamers.cpp
@@ -153,6 +153,7 @@ void init_streamers(py::module_& m) {
             }),
             py::arg("tokenizer"),
             py::arg("parsers") = std::vector<std::shared_ptr<IncrementalParserBase>>(),
+            py::keep_alive<1, 3>(),
             "TextParserStreamer is used to decode tokens into text, parse the text and call user-defined incremental parsers.")
         .def("write",
             [](TextParserStreamer& self, py::dict& message) {
diff --git a/tests/cpp/parser.cpp b/tests/cpp/parser.cpp
index 0cd2d602ba..a74a12f499 100644
--- a/tests/cpp/parser.cpp
+++ b/tests/cpp/parser.cpp
@@ -4,7 +4,7 @@
 #include <gtest/gtest.h>
 #include "openvino/genai/generation_config.hpp"
 #include "openvino/genai/parsers.hpp"
-#include "nlohmann/json.hpp"
+#include "openvino/genai/text_streamer.hpp"
 #include "openvino/genai/llm_pipeline.hpp"
 
 using namespace ov::genai;
@@ -91,6 +91,8 @@ TEST(ParserTest, test_reasoning_parser_2) {
     ASSERT_EQ(input, expected);
 }
 
+
+
 class DeepSeekR1ReasoningParserTest : public ::testing::Test {
 protected:
     ov::genai::DeepSeekR1ReasoningParser parser;
@@ -121,4 +123,116 @@ TEST_F(DeepSeekR1ReasoningParserTest, ReasoningContentAccumulatesAcrossCalls) {
     ASSERT_EQ(msg["reasoning_content"], ref_res);
 }
 
-// TODO: add tests when streamer is called directly instead of manual subsequent calling of parsers.
+TEST(ParserTest, test_custom_parser) {
+    // Define a small custom parser derived from ParserBase
+    class CustomParser : public ov::genai::ParserBase {
+    public:
+        void parse(ov::genai::JsonContainer& msg) override {
+            // extract "content"
+            if (!msg.contains("content"))
+                return;
+
+            auto content_opt = msg["content"].as_string();
+            if (!content_opt.has_value())
+                return;
+
+            const std::string& content = content_opt.value();
+
+            // find text between <think> and </think>
+            std::size_t start = content.find("<think>");
+            std::size_t end   = content.find("</think>");
+            if (start != std::string::npos && end != std::string::npos && end > start) {
+                std::string think_text = content.substr(start + 7, end - (start + 7));
+                // trim leading/trailing whitespace
+                auto l = think_text.find_first_not_of(" \n\r\t");
+                auto r = think_text.find_last_not_of(" \n\r\t");
+                if (l != std::string::npos && r != std::string::npos)
+                    think_text = think_text.substr(l, r - l + 1);
+                msg["reasoning_content"] = think_text;
+            }
+        }
+    };
+
+    CustomParser parser;
+
+    ov::genai::JsonContainer msg;
+    msg["content"] = "<think>This is reasoning.</think> And this is the answer";
+
+    parser.parse(msg);
+
+    ASSERT_TRUE(msg.contains("reasoning_content"));
+    ASSERT_EQ(msg["reasoning_content"].get_string(), "This is reasoning.");
+}
+
+TEST(ParserTest, CustomParser_AccumulatesBetweenStartStop) {
+    using namespace ov::genai;
+
+    // Custom incremental parser: mirrors the Python logic
+    class CustomParser : public IncrementalParserBase {
+    public:
+        bool main_part_started = false;
+
+        std::string parse(JsonContainer& msg,
+                          const std::string& previous_text,
+                          std::string& delta_text,
+                          const std::optional<std::vector<int64_t>>& /*previous_tokens*/ = std::nullopt,
+                          const std::optional<std::vector<int64_t>>& /*delta_tokens*/ = std::nullopt) override {
+            // Ensure fields exist (Python test used dict defaults)
+            if (!msg.contains("content")) {
+                msg.to_empty_object();
+                msg["content"] = std::string{};
+            }
+            if (!msg.contains("reasoning_content")) {
+                msg["reasoning_content"] = std::string{};
+            }
+
+            if (!main_part_started && delta_text == "<think>") {
+                main_part_started = true;
+            } else if (main_part_started && delta_text == "</think>") {
+                main_part_started = false;
+            } else {
+                if (main_part_started) {
+                    // Append delta into reasoning_content
+                    auto cur = msg["reasoning_content"].as_string().value_or("");
+                    cur += delta_text;
+                    msg["reasoning_content"] = cur;
+                }
+            }
+            // Return delta_text (same as Python)
+            return delta_text;
+        }
+
+        // Virtual dtor for safety
+        ~CustomParser() override = default;
+    };
+
+    class CustomStreamer : public ov::genai::TextParserStreamer {
+    public:
+        using TextParserStreamer::write;
+        // Forwarding constructor to base class
+        CustomStreamer(ov::genai::Tokenizer& tok, const std::vector<std::shared_ptr<IncrementalParserBase>>& parsers)
+            : ov::genai::TextParserStreamer(tok, parsers) {}
+
+        JsonContainer final_msg;
+        StreamingStatus write(JsonContainer& message) override {
+            final_msg = message;
+            return StreamingStatus::RUNNING;
+        }
+    };
+
+    Tokenizer tok;
+    std::shared_ptr<IncrementalParserBase> parser = std::make_shared<CustomParser>();
+    CustomStreamer streamer(tok, {parser});
+    
+    
+    // Same stream as in the Python example
+    std::vector<std::string> stream_string = {"<think>", " ", "world", " ", "</think>", "!"};
+
+    for (size_t i = 0; i < stream_string.size(); ++i) {
+        streamer.write(stream_string[i]);
+    }
+    
+    JsonContainer msg = streamer.get_parsed_message();
+    ASSERT_TRUE(msg.contains("reasoning_content"));
+    ASSERT_EQ(msg["reasoning_content"].get_string(), " world ");
+}
diff --git a/tests/python_tests/test_parsers.py b/tests/python_tests/test_parsers.py
index fd0407b31f..6535a1130c 100644
--- a/tests/python_tests/test_parsers.py
+++ b/tests/python_tests/test_parsers.py
@@ -3,13 +3,14 @@
 import dataclasses
 import json
 from typing import Optional
-
+from utils.hugging_face import convert_and_save_tokenizer, download_and_convert_model
+from utils.ov_genai_pipelines import create_ov_pipeline
+from utils.network import retry_request
 import numpy as np
 import openvino
 import pytest
-from openvino_genai import Tokenizer, IncrementalParserBase, ParserBase, TextParserStreamer, StreamingStatus, Llama32JsonToolParser, Phi4ReasoningParser, DeepSeekR1ReasoningParser
+from openvino_genai import Tokenizer, IncrementalParserBase, ParserBase, TextParserStreamer, StreamingStatus, Llama32JsonToolParser, Phi4ReasoningParser, DeepSeekR1ReasoningParser, GenerationConfig
 from transformers import AutoTokenizer
-from utils.hugging_face import convert_and_save_tokenizer
 import re
 
 
@@ -121,6 +122,112 @@ def test_final_parser_llama_32_json(hf_ov_genai_models):
     assert content_json['tool_calls'][0] == json.loads(json_str)
 
 
+@pytest.mark.precommit
+@pytest.mark.parametrize(
+    "hf_ov_genai_models", 
+    ["katuni4ka/tiny-random-phi3"],
+    indirect=True
+)
+def test_custom_streamer_parser(hf_ov_genai_models):
+    hf_tokenizer, genai_tokenizer = hf_ov_genai_models
+
+    class CustomParser(IncrementalParserBase):
+        main_part_started: bool = False
+
+        def parse(self, msg: dict, previous_text: str, delta_text: str, prev_tokens = None, delta_tokens = None) -> str:
+            if 'content' not in msg:
+                msg['content'] = ''
+            if 'main_text' not in msg:
+                msg['main_text'] = ''
+
+            if not self.main_part_started and delta_text == '<start>':
+                self.main_part_started = True
+            elif self.main_part_started and delta_text == '</stop>':
+                self.main_part_started = False
+            else:
+                if self.main_part_started:
+                    msg['main_text'] += delta_text
+                
+            return delta_text
+
+    msg = {}
+    class CustomStreamer(TextParserStreamer):
+        def write(self, message):
+            msg.update(message)
+            return StreamingStatus.RUNNING
+
+    streamer = CustomStreamer(genai_tokenizer, parsers=[CustomParser()])
+
+    stream_string = ["Hello", "<start>", " ", "world", " ", "</stop>", "!"]
+
+    for subword in stream_string:
+        streamer._write(subword)
+
+    assert msg['main_text'] == ''.join(" world ")
+
+# @pytest.mark.precommit
+# @pytest.mark.parametrize(
+#     "hf_ov_genai_models", 
+#     ["microsoft/Phi-4-mini-reasoning"],
+#     # ["katuni4ka/tiny-random-phi3"],
+#     indirect=True
+# )
+# def test_custom_parser_(hf_ov_genai_models):
+
+
+#     msg = {
+#         "content": "<think>This is reasoning.</think> And this is the answer"
+#     }
+#     parser.parse(msg)
+
+#     assert msg['reasoning_content'] == "This is reasoning."
+
+@pytest.mark.parametrize("model_id", ["microsoft/Phi-4-mini-reasoning"])
+@pytest.mark.nightly
+def test_custom_parser(tmp_path, model_id):
+    _, _, models_path = download_and_convert_model(model_id, padding_side="left")
+    pipe = create_ov_pipeline(models_path)
+    tok = pipe.get_tokenizer()
+    
+    class CustomParser(ParserBase):
+        def parse(self, msg: dict):
+            content = None
+            if 'content' in msg:
+                content = msg['content']
+            if not content:
+                return
+
+            # find text between <think> and </think>
+            think_start = content.find("<think>")
+            think_end = content.find("</think>")
+            if think_start != -1 and think_end != -1 and think_end > think_start:
+                think_text = content[think_start + len("<think>"):think_end].strip()
+                msg['reasoning_content'] = think_text
+    
+    class CustomStreamer(TextParserStreamer):
+        def write(self, message):
+            # make whatever you want with message, but it will be accumulated and parsed by parser afterwards
+            # accumulated message can be found by get_parsed_message()
+            return StreamingStatus.RUNNING
+        
+    parser = CustomParser()
+    config = GenerationConfig()
+    config.max_new_tokens = 600
+    config.parsers = [parser]
+
+    res = pipe.generate(["Please say \"hello\""], generation_config=config)
+    
+    # extract manually reasoning content from the parsed result
+    content = res.texts[0]
+    think_start = content.find("<think>")
+    think_end = content.find("</think>")
+    if think_start != -1 and think_end != -1 and think_end > think_start:
+        think_text = content[think_start + len("<think>"):think_end].strip()
+    
+    assert 'reasoning_content' in res.parsed[0]
+    assert res.parsed[0]['reasoning_content'] != ""
+    assert res.parsed[0]['reasoning_content'] == think_text
+
 def test_parsers_2(hf_ov_genai_models):
     hf_tokenizer, genai_tokenizer = hf_ov_genai_models
     class CustomStreamer(TextParserStreamer):
@@ -150,7 +257,7 @@ def write(self, message):
     parsers = streamer.get_parsers()
     
     extended = stream_string[:]
-    extended.append("")
+    extended.insert(0, "")
 
     for parser in parsers:
         for (prev_subword, subword) in zip(extended, stream_string):
@@ -159,4 +266,5 @@ def write(self, message):
     assert msg['reasoning_content'] == think_content
     assert msg['content'] == content
 
-# TODO: add tests when streamer is called directly instead of manual subsequent calling of parsers.
+# TODO: add when streamer accepts integer tokens
+

From e4ff386971427d8666c64292acceccb6e9e0571b Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Wed, 22 Oct 2025 11:12:27 +0200
Subject: [PATCH 25/43] cleanup pybindings; rename Llama32 -> Llama3

---
 src/cpp/include/openvino/genai/parsers.hpp    |  22 +-
 src/cpp/src/parsers.cpp                       |  24 +--
 src/python/openvino_genai/__init__.py         |   4 +-
 src/python/openvino_genai/__init__.pyi        |   6 +-
 .../openvino_genai/py_openvino_genai.pyi      |  22 +-
 src/python/py_parsers.cpp                     | 192 ++++--------------
 tests/cpp/parser.cpp                          |   8 +-
 tests/python_tests/test_parsers.py            | 148 +++++++-------
 8 files changed, 144 insertions(+), 282 deletions(-)

diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp
index 4af6fb2cca..93b4b099a1 100644
--- a/src/cpp/include/openvino/genai/parsers.hpp
+++ b/src/cpp/include/openvino/genai/parsers.hpp
@@ -63,26 +63,26 @@ class ParserBase {
     virtual void parse(JsonContainer& text) = 0;
 };
 
-class OPENVINO_GENAI_EXPORTS Llama32PythonicToolParser : public ParserBase {
+class OPENVINO_GENAI_EXPORTS Llama3PythonicToolParser : public ParserBase {
 // Does not modify original content, only extracts and adds tool calls
 public:
-    explicit Llama32PythonicToolParser(bool keep_original_content = true);
-    ~Llama32PythonicToolParser();
+    explicit Llama3PythonicToolParser(bool keep_original_content = true);
+    ~Llama3PythonicToolParser();
     void parse(JsonContainer& input) override;
 private:
-    class Llama32PythonicToolParserImpl;
-    std::unique_ptr<Llama32PythonicToolParserImpl> m_impl;
+    class Llama3PythonicToolParserImpl;
+    std::unique_ptr<Llama3PythonicToolParserImpl> m_impl;
 };
 
-class OPENVINO_GENAI_EXPORTS Llama32JsonToolParser : public ParserBase {
+class OPENVINO_GENAI_EXPORTS Llama3JsonToolParser : public ParserBase {
 // Does not modify original content, only extracts and adds tool calls
 public:
-    explicit Llama32JsonToolParser(bool keep_original_content = true);
-    ~Llama32JsonToolParser();
+    explicit Llama3JsonToolParser(bool keep_original_content = true);
+    ~Llama3JsonToolParser();
     void parse(JsonContainer& input) override;
 private:
-    class Llama32JsonToolParserImpl;
-    std::unique_ptr<Llama32JsonToolParserImpl> m_impl;
+    class Llama3JsonToolParserImpl;
+    std::unique_ptr<Llama3JsonToolParserImpl> m_impl;
 };
 
 class OPENVINO_GENAI_EXPORTS BaseReasoningParser : public ParserBase{
@@ -99,5 +99,7 @@ class OPENVINO_GENAI_EXPORTS BaseReasoningParser : public ParserBase{
     std::unique_ptr<BaseReasoningParserImpl> m_impl;
 };
 
+// TODO: DeepSeekR1ReasoningParser -> DeepSeekR1IncrementalParser
+
 }  // namespace genai
 }  // namespace ov
diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp
index 0a8670dbc4..df3ffd6a31 100644
--- a/src/cpp/src/parsers.cpp
+++ b/src/cpp/src/parsers.cpp
@@ -163,9 +163,9 @@ std::string ReasoningParser::parse(
     return m_impl->parse(msg, previous_text, delta_text, previous_tokens, delta_tokens);
 }
 
-class Llama32PythonicToolParser::Llama32PythonicToolParserImpl {
+class Llama3PythonicToolParser::Llama3PythonicToolParserImpl {
 public:
-    Llama32PythonicToolParserImpl(bool keep_original_content) : m_keep_original_content(keep_original_content) {}
+    Llama3PythonicToolParserImpl(bool keep_original_content) : m_keep_original_content(keep_original_content) {}
     bool m_keep_original_content;
 
     void parse(JsonContainer& input) {
@@ -205,21 +205,21 @@ class Llama32PythonicToolParser::Llama32PythonicToolParserImpl {
     }
 };
 
-Llama32PythonicToolParser::Llama32PythonicToolParser(bool keep_original_content) {
-    m_impl = std::make_unique<Llama32PythonicToolParserImpl>(keep_original_content);
+Llama3PythonicToolParser::Llama3PythonicToolParser(bool keep_original_content) {
+    m_impl = std::make_unique<Llama3PythonicToolParserImpl>(keep_original_content);
 }
 
-void Llama32PythonicToolParser::parse(JsonContainer& input) {
+void Llama3PythonicToolParser::parse(JsonContainer& input) {
     m_impl->parse(input);
 }
 
-Llama32PythonicToolParser::~Llama32PythonicToolParser() = default;
+Llama3PythonicToolParser::~Llama3PythonicToolParser() = default;
 
-class Llama32JsonToolParser::Llama32JsonToolParserImpl {
+class Llama3JsonToolParser::Llama3JsonToolParserImpl {
 private:
     bool m_keep_original_content;
 public:
-    Llama32JsonToolParserImpl(bool keep_original_content) : m_keep_original_content(keep_original_content) {}
+    Llama3JsonToolParserImpl(bool keep_original_content) : m_keep_original_content(keep_original_content) {}
 
     void parse(JsonContainer& message) {
         // Find JSON in the message
@@ -240,15 +240,15 @@ class Llama32JsonToolParser::Llama32JsonToolParserImpl {
     }
 };
 
-Llama32JsonToolParser::Llama32JsonToolParser(bool keep_original_content) {
-    m_impl = std::make_unique<Llama32JsonToolParserImpl>(keep_original_content);
+Llama3JsonToolParser::Llama3JsonToolParser(bool keep_original_content) {
+    m_impl = std::make_unique<Llama3JsonToolParserImpl>(keep_original_content);
 }
 
-void Llama32JsonToolParser::parse(JsonContainer& input) {
+void Llama3JsonToolParser::parse(JsonContainer& input) {
     m_impl->parse(input);
 }
 
-Llama32JsonToolParser::~Llama32JsonToolParser() = default;
+Llama3JsonToolParser::~Llama3JsonToolParser() = default;
 
 class BaseReasoningParser::BaseReasoningParserImpl {
 public:
diff --git a/src/python/openvino_genai/__init__.py b/src/python/openvino_genai/__init__.py
index ba99ce7e83..34e0b153f4 100644
--- a/src/python/openvino_genai/__init__.py
+++ b/src/python/openvino_genai/__init__.py
@@ -27,8 +27,8 @@
     IncrementalParserBase,
     Phi4ReasoningParser,
     DeepSeekR1ReasoningParser,
-    Llama32JsonToolParser,
-    Llama32PythonicToolParser,
+    Llama3JsonToolParser,
+    Llama3PythonicToolParser,
 )
 
 __version__ = get_version()
diff --git a/src/python/openvino_genai/__init__.pyi b/src/python/openvino_genai/__init__.pyi
index f8fd25d6a8..00f67b6d2d 100644
--- a/src/python/openvino_genai/__init__.pyi
+++ b/src/python/openvino_genai/__init__.pyi
@@ -31,8 +31,8 @@ from openvino_genai.py_openvino_genai import InpaintingPipeline
 from openvino_genai.py_openvino_genai import KVCrushAnchorPointMode
 from openvino_genai.py_openvino_genai import KVCrushConfig
 from openvino_genai.py_openvino_genai import LLMPipeline
-from openvino_genai.py_openvino_genai import Llama32JsonToolParser
-from openvino_genai.py_openvino_genai import Llama32PythonicToolParser
+from openvino_genai.py_openvino_genai import Llama3JsonToolParser
+from openvino_genai.py_openvino_genai import Llama3PythonicToolParser
 from openvino_genai.py_openvino_genai import ParserBase
 from openvino_genai.py_openvino_genai import PerfMetrics
 from openvino_genai.py_openvino_genai import Phi4ReasoningParser
@@ -72,5 +72,5 @@ from openvino_genai.py_openvino_genai import draft_model
 from openvino_genai.py_openvino_genai import get_version
 import os as os
 from . import py_openvino_genai
-__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningParser', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParserBase', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama32JsonToolParser', 'Llama32PythonicToolParser', 'ParserBase', 'PerfMetrics', 'Phi4ReasoningParser', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai']
+__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningParser', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParserBase', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama3JsonToolParser', 'Llama3PythonicToolParser', 'ParserBase', 'PerfMetrics', 'Phi4ReasoningParser', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai']
 __version__: str
diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi
index 8741d691b6..4a27153742 100644
--- a/src/python/openvino_genai/py_openvino_genai.pyi
+++ b/src/python/openvino_genai/py_openvino_genai.pyi
@@ -5,7 +5,7 @@ from __future__ import annotations
 import collections.abc
 import openvino._pyopenvino
 import typing
-__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningParser', 'EncodedGenerationResult', 'EncodedResults', 'ExtendedPerfMetrics', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParserBase', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama32JsonToolParser', 'Llama32PythonicToolParser', 'MeanStdPair', 'ParserBase', 'PerfMetrics', 'Phi4ReasoningParser', 'PipelineMetrics', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'SDPerModelsPerfMetrics', 'SDPerfMetrics', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'SummaryStats', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version']
+__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningParser', 'EncodedGenerationResult', 'EncodedResults', 'ExtendedPerfMetrics', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParserBase', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama3JsonToolParser', 'Llama3PythonicToolParser', 'MeanStdPair', 'ParserBase', 'PerfMetrics', 'Phi4ReasoningParser', 'PipelineMetrics', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'SDPerModelsPerfMetrics', 'SDPerfMetrics', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'SummaryStats', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version']
 class Adapter:
     """
     Immutable LoRA Adapter that carries the adaptation matrices and serves as unique adapter identifier.
@@ -580,10 +580,6 @@ class DecodedResults:
 class DeepSeekR1ReasoningParser(IncrementalParserBase):
     def __init__(self) -> None:
         ...
-    def parse(self, msg: dict, previous_text: str, delta_text: str, previous_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None, delta_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None) -> str:
-        """
-        Parse is called with the full text. Returns a dict with parsed content.
-        """
 class EncodedGenerationResult:
     """
     
@@ -1826,20 +1822,12 @@ class LLMPipeline:
         ...
     def start_chat(self, system_message: str = '') -> None:
         ...
-class Llama32JsonToolParser(ParserBase):
+class Llama3JsonToolParser(ParserBase):
     def __init__(self) -> None:
         ...
-    def parse(self, text: dict) -> None:
-        """
-        Parse is called with the full text. Returns a dict with parsed content.
-        """
-class Llama32PythonicToolParser(ParserBase):
+class Llama3PythonicToolParser(ParserBase):
     def __init__(self) -> None:
         ...
-    def parse(self, text: dict) -> None:
-        """
-        Parse is called with the full text. Returns a dict with parsed content.
-        """
 class MeanStdPair:
     def __init__(self) -> None:
         ...
@@ -1963,10 +1951,6 @@ class PerfMetrics:
 class Phi4ReasoningParser(IncrementalParserBase):
     def __init__(self, expect_open_tag: bool = False) -> None:
         ...
-    def parse(self, msg: dict, previous_text: str, delta_text: str, previous_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None, delta_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None) -> str:
-        """
-        Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output.
-        """
 class PipelineMetrics:
     """
     
diff --git a/src/python/py_parsers.cpp b/src/python/py_parsers.cpp
index d853f507ef..a7ffba9b69 100644
--- a/src/python/py_parsers.cpp
+++ b/src/python/py_parsers.cpp
@@ -19,8 +19,8 @@ using ov::genai::ReasoningParser;
 using ov::genai::Phi4ReasoningParser;
 using ov::genai::DeepSeekR1ReasoningParser;
 using ov::genai::JsonContainer;
-using ov::genai::Llama32JsonToolParser;
-using ov::genai::Llama32PythonicToolParser;
+using ov::genai::Llama3JsonToolParser;
+using ov::genai::Llama3PythonicToolParser;
 using ov::genai::Tokenizer;
 using ov::genai::StreamingStatus;
 
@@ -28,7 +28,8 @@ namespace pyutils = ov::genai::pybind::utils;
 
 namespace {
 
-
+// ConstructableIncrementalParserBase and ConstructableParserBase are used when python overload is called from C++
+// and we need to convert JsonContainer to py::dict and then update back JsonContainer from the py::dict which was modified in Python.
 class ConstructableIncrementalParserBase: public IncrementalParserBase {
 public:
     using IncrementalParserBase::IncrementalParserBase;
@@ -56,7 +57,6 @@ class ConstructableIncrementalParserBase: public IncrementalParserBase {
         );
         
         // iterate throught py_msg and update msg
-        msg.clear();
         auto msg_anymap = pyutils::py_object_to_any_map(py_msg);
         for (const auto& [key, value] : msg_anymap) {
             if (value.is<std::string>()) {
@@ -69,26 +69,6 @@ class ConstructableIncrementalParserBase: public IncrementalParserBase {
         }
         return res.cast<std::string>();
     }
-
-    // This method should be overridden in Python
-    std::string parse(
-        py::dict& msg,
-        const std::string& previous_text,
-        std::string& delta_text,
-        const std::optional<std::vector<int64_t>>& previous_tokens = std::nullopt,
-        const std::optional<std::vector<int64_t>>& delta_tokens = std::nullopt
-    ) {
-        PYBIND11_OVERRIDE_PURE(
-            std::string,
-            IncrementalParserBase,
-            "parse",
-            msg,
-            previous_text,
-            delta_text,
-            previous_tokens,
-            delta_tokens
-        );
-    }
 };
 
 class ConstructableParserBase: public ParserBase {
@@ -106,7 +86,6 @@ class ConstructableParserBase: public ParserBase {
        parse_method(py_msg);
 
        // iterate throught py_msg and update msg
-       msg.clear();
        auto msg_anymap = pyutils::py_object_to_any_map(py_msg);
        for (const auto& [key, value] : msg_anymap) {
            if (value.is<std::string>()) {
@@ -120,53 +99,6 @@ class ConstructableParserBase: public ParserBase {
     }
 };
 
-
-// wrapper to enhance calling parser from Python
-void call_parser(py::dict& msg, std::function<void(JsonContainer&)> func) {
-    auto msg_cpp = pyutils::py_object_to_json_container(msg);
-    func(msg_cpp);
-
-    // TODO: msg = pyutils::json_container_to_py_object(msg_cpp) does not work properly here,
-    py::object json_mod = py::module_::import("json");
-    
-    // since it create a new object instead of updating existing dict.
-    auto json_str = msg_cpp.to_json_string();
-    py::dict result = json_mod.attr("loads")(json_str);
-    
-    // update msg with result
-    msg.clear();
-    for (auto item : result) {
-        msg[item.first] = item.second;
-    }
-}
-
-// wrapper to enhance calling incremental parser from Python
-std::string call_incremental_parser(
-    IncrementalParserBase& parser,
-    py::dict& msg,
-    const std::string& previous_text,
-    std::string& delta_text,
-    const std::optional<std::vector<int64_t>>& previous_tokens,
-    const std::optional<std::vector<int64_t>>& delta_tokens,
-    std::function<std::string(JsonContainer&, const std::string&, std::string&, const std::optional<std::vector<int64_t>>&,
-                               const std::optional<std::vector<int64_t>>&)> func) {
-    auto msg_cpp = pyutils::py_object_to_json_container(msg);
-
-    auto res = func(msg_cpp, previous_text, delta_text, previous_tokens, delta_tokens);
-    auto json_str = msg_cpp.to_json_string();
-    
-    // TODO: msg = pyutils::json_container_to_py_object(msg_cpp) does not work properly here,
-    // since it create a new object instead of updating existing dict.
-    py::object json_mod = py::module_::import("json");
-    py::dict result = json_mod.attr("loads")(json_str);
-    // update msg with result
-    msg.clear();
-    for (auto item : result) {
-        msg[item.first] = item.second;
-    }
-    return res;   
-}
-
 } // namespace
 
 // TODO: double check/add more relevant docstrings for parsers.
@@ -179,100 +111,54 @@ void init_parsers(py::module_& m) {
                          std::string& delta_text,
                          const std::optional<std::vector<int64_t>>& previous_tokens = std::nullopt,
                          const std::optional<std::vector<int64_t>>& delta_tokens = std::nullopt) {
-            return call_incremental_parser(
-                self,
-                msg,
-                previous_text,
-                delta_text,
-                previous_tokens,
-                delta_tokens,
-                [&self](JsonContainer& m, const std::string& prev_text, std::string& delta_t,
-                        const std::optional<std::vector<int64_t>>& prev_tokens,
-                        const std::optional<std::vector<int64_t>>& delta_toks) {
-                    return self.parse(m, prev_text, delta_t, prev_tokens, delta_toks);
-                }
-            );
+            auto msg_cpp = pyutils::py_object_to_json_container(msg);
+            auto res = self.parse(msg_cpp, previous_text, delta_text, previous_tokens, delta_tokens);
+            auto json_str = msg_cpp.to_json_string();
+            
+            // TODO: msg = pyutils::json_container_to_py_object(msg_cpp) does not work properly here,
+            // since it create a new object instead of updating existing dict.
+            py::object json_mod = py::module_::import("json");
+            py::dict result = json_mod.attr("loads")(json_str);
+            // update msg with result
+            for (auto item : result) {
+                msg[item.first] = item.second;
+            }
+            return res;
         }, py::arg("msg"), py::arg("previous_text"), py::arg("delta_text"),
            py::arg("previous_tokens") = std::nullopt, py::arg("delta_tokens") = std::nullopt,
            "Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output.");
     
     py::class_<Phi4ReasoningParser, std::shared_ptr<Phi4ReasoningParser>, IncrementalParserBase>(m, "Phi4ReasoningParser")
-        .def(py::init<bool>(), py::arg("expect_open_tag") = false)
-        .def("parse",
-            [](Phi4ReasoningParser& self,
-               py::dict& msg,
-               const std::string& previous_text,
-               std::string& delta_text,
-               const std::optional<std::vector<int64_t>>& previous_tokens = std::nullopt,
-               const std::optional<std::vector<int64_t>>& delta_tokens = std::nullopt) {
-            return call_incremental_parser(
-                self,
-                msg,
-                previous_text,
-                delta_text,
-                previous_tokens,
-                delta_tokens,
-                [&self](JsonContainer& m, const std::string& prev_text, std::string& delta_t,
-                        const std::optional<std::vector<int64_t>>& prev_tokens,
-                        const std::optional<std::vector<int64_t>>& delta_toks) {
-                    return self.parse(m, prev_text, delta_t, prev_tokens, delta_toks);
-                });
-            },
-            "Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output.",
-            py::arg("msg"), py::arg("previous_text"), py::arg("delta_text"),
-            py::arg("previous_tokens") = std::nullopt, py::arg("delta_tokens") = std::nullopt);
+        .def(py::init<bool>(), py::arg("expect_open_tag") = false);
 
     py::class_<DeepSeekR1ReasoningParser, std::shared_ptr<DeepSeekR1ReasoningParser>, IncrementalParserBase>(m, "DeepSeekR1ReasoningParser")
-        .def(py::init<>())
-        .def("parse",
-            [](DeepSeekR1ReasoningParser& self,
-               py::dict& msg,
-               const std::string& previous_text,
-               std::string& delta_text,
-               const std::optional<std::vector<int64_t>>& previous_tokens = std::nullopt,
-               const std::optional<std::vector<int64_t>>& delta_tokens = std::nullopt) {
-            return call_incremental_parser(
-                self,
-                msg,
-                previous_text,
-                delta_text,
-                previous_tokens,
-                delta_tokens,
-                [&self](JsonContainer& m, const std::string& prev_text, std::string& delta_t,
-                        const std::optional<std::vector<int64_t>>& prev_tokens,
-                        const std::optional<std::vector<int64_t>>& delta_toks) {
-                    return self.parse(m, prev_text, delta_t, prev_tokens, delta_toks);
-                }
-            );
-        },
-            "Parse is called with the full text. Returns a dict with parsed content.",
-            py::arg("msg"), py::arg("previous_text"), py::arg("delta_text"),
-            py::arg("previous_tokens") = std::nullopt, py::arg("delta_tokens") = std::nullopt);
+        .def(py::init<>());
 
     py::class_<ParserBase, ConstructableParserBase, std::shared_ptr<ParserBase>>(m, "ParserBase")
-    .def(py::init<>())
-    .def("parse",
-        [](ParserBase& self, py::dict& msg) {
-            return call_parser(msg, [&self](JsonContainer& m) {self.parse(m);});
-        },
-        py::arg("text"),
-        "Parse is called with the full text. Returns a dict with parsed content.");
-    
-    py::class_<Llama32JsonToolParser, std::shared_ptr<Llama32JsonToolParser>, ParserBase>(m, "Llama32JsonToolParser")
         .def(py::init<>())
         .def("parse",
-            [](Llama32JsonToolParser& self, py::dict& msg) {
-                return call_parser(msg, [&self](JsonContainer& m) { self.parse(m); });
+            [](ParserBase& self, py::dict& msg) {
+                auto msg_cpp = pyutils::py_object_to_json_container(msg);
+                self.parse(msg_cpp);
+
+                // TODO: msg = pyutils::json_container_to_py_object(msg_cpp) does not work properly here,
+                py::object json_mod = py::module_::import("json");
+                
+                // since it create a new object instead of updating existing dict.
+                auto json_str = msg_cpp.to_json_string();
+                py::dict result = json_mod.attr("loads")(json_str);
+                
+                // update msg with result
+                for (auto item : result) {
+                    msg[item.first] = item.second;
+                }
             },
             py::arg("text"),
             "Parse is called with the full text. Returns a dict with parsed content.");
 
-    py::class_<Llama32PythonicToolParser, std::shared_ptr<Llama32PythonicToolParser>, ParserBase>(m, "Llama32PythonicToolParser")
-        .def(py::init<>())
-        .def("parse",
-            [](Llama32PythonicToolParser& self, py::dict& msg) {
-                return call_parser(msg, [&self](JsonContainer& m) { self.parse(m); });
-            },
-            py::arg("text"),
-            "Parse is called with the full text. Returns a dict with parsed content.");
+    py::class_<Llama3JsonToolParser, std::shared_ptr<Llama3JsonToolParser>, ParserBase>(m, "Llama3JsonToolParser")
+        .def(py::init<>());
+
+    py::class_<Llama3PythonicToolParser, std::shared_ptr<Llama3PythonicToolParser>, ParserBase>(m, "Llama3PythonicToolParser")
+        .def(py::init<>());
 }
diff --git a/tests/cpp/parser.cpp b/tests/cpp/parser.cpp
index a74a12f499..e31bd3c236 100644
--- a/tests/cpp/parser.cpp
+++ b/tests/cpp/parser.cpp
@@ -9,7 +9,7 @@
 
 using namespace ov::genai;
 
-TEST(ParserTest, test_llama32_parser_1) {
+TEST(ParserTest, test_llama3_parser_1) {
     std::string prompt = R"(What's the weather in New York today?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n[get_weather(location="New York, NY", unit="celsius")]<|eom_id|>)";
     // By default content should keep original values.
     
@@ -25,7 +25,7 @@ TEST(ParserTest, test_llama32_parser_1) {
     }));
 
 
-    std::shared_ptr<Llama32PythonicToolParser> parser = std::make_shared<Llama32PythonicToolParser>();
+    std::shared_ptr<Llama3PythonicToolParser> parser = std::make_shared<Llama3PythonicToolParser>();
     JsonContainer input;
     input["content"] = prompt;
     parser->parse(input);
@@ -33,7 +33,7 @@ TEST(ParserTest, test_llama32_parser_1) {
     ASSERT_TRUE(expected == input);
 }
 
-TEST(ParserTest, test_llama32_parser_2) {
+TEST(ParserTest, test_llama3_parser_2) {
     std::string prompt = R"(What's the weather in New York today?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n[get_weather(location="New York, NY", unit="celsius")]<|eom_id|>)";
     
     JsonContainer expected;
@@ -47,7 +47,7 @@ TEST(ParserTest, test_llama32_parser_2) {
         }}
     })));
 
-    std::shared_ptr<Llama32PythonicToolParser> parser = std::make_shared<Llama32PythonicToolParser>(/*keep_original_content*/ false);
+    std::shared_ptr<Llama3PythonicToolParser> parser = std::make_shared<Llama3PythonicToolParser>(/*keep_original_content*/ false);
     JsonContainer input;
     input["content"] = prompt;
     parser->parse(input);
diff --git a/tests/python_tests/test_parsers.py b/tests/python_tests/test_parsers.py
index 6535a1130c..08574d9c89 100644
--- a/tests/python_tests/test_parsers.py
+++ b/tests/python_tests/test_parsers.py
@@ -1,15 +1,10 @@
 # Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
-import dataclasses
 import json
-from typing import Optional
 from utils.hugging_face import convert_and_save_tokenizer, download_and_convert_model
 from utils.ov_genai_pipelines import create_ov_pipeline
-from utils.network import retry_request
-import numpy as np
-import openvino
 import pytest
-from openvino_genai import Tokenizer, IncrementalParserBase, ParserBase, TextParserStreamer, StreamingStatus, Llama32JsonToolParser, Phi4ReasoningParser, DeepSeekR1ReasoningParser, GenerationConfig
+from openvino_genai import Tokenizer, IncrementalParserBase, ParserBase, TextParserStreamer, StreamingStatus, Llama3JsonToolParser, Phi4ReasoningParser, DeepSeekR1ReasoningParser, GenerationConfig
 from transformers import AutoTokenizer
 import re
 
@@ -46,7 +41,7 @@ def hf_ov_genai_models(request, tmp_path_factory):
         "the box.\n</think>\n\nThe answer to 2 + 1 is \boxed{3}."
     ),
 ])
-def test_phi4_reason_parser_1(hf_ov_genai_models, answer):
+def test_incremental_phi4_reason_parser_1(hf_ov_genai_models, answer):
     hf_tokenizer, genai_tokenizer = hf_ov_genai_models
     
     stream_string = re.split(r"(\s+)", answer)
@@ -82,7 +77,7 @@ def write(self, message):
     # check that if thinking opening and closing tags are passed in a single subword, it is still parsed correctly
     ["<think>\nOkay, the user is asking for the answer to 2 + 1.</think>\n\nThe answer to 2 + 1 is \boxed{3}."]
 ])
-def test_phi4_reason_parser_2(hf_ov_genai_models, split_answer):
+def test_incremental_phi4_reason_parser_2(hf_ov_genai_models, split_answer):
     # check that if thinking opening and closing tags are in the middle of the subword, it is still parsed correctly
     hf_tokenizer, genai_tokenizer = hf_ov_genai_models
     
@@ -104,22 +99,54 @@ def write(self, message):
 
 
 @pytest.mark.precommit
-@pytest.mark.parametrize(
-    "hf_ov_genai_models", 
-    ["katuni4ka/tiny-random-phi3"],
-    indirect=True
-)
-def test_final_parser_llama_32_json(hf_ov_genai_models):
-    hf_tokenizer, genai_tokenizer = hf_ov_genai_models
+@pytest.mark.parametrize("answer", [
+    "<think>\nOkay, the user is asking for the answer to 2 + 1.</think>\n\nThe answer to 2 + 1 is \boxed{3}.",
+])
+def test_incremental_phi4_reason_parser_nostreamer(answer):
+    # In this test we are calling parser directly without streamer
+    parser = Phi4ReasoningParser()
+    
+    stream_string = re.split(r"(\s+)", answer)
+    msg = {}
+    for subword in stream_string:
+        parser.parse(msg, '', subword)
+        # When parser is called from streamer, it is expected that content is accumulated inside streamer.
+        # Here we are calling parser manually therefore we need to accumulate content manually.
+        msg['content'] += subword  
 
-    json_str = '{"type": "function", "function": {"name": "get_weather", "parameters": {"location": "New York, NY", "unit": "celsius"}}}'
-    content_json = {
-        "content": f"Calling weather API: {json_str}"
-    }
+    think_content = answer.split("</think>")[0].replace("<think>", "")
+    content = answer
 
-    parser = Llama32JsonToolParser()
-    parser.parse(content_json)
-    assert content_json['tool_calls'][0] == json.loads(json_str)
+    assert msg['reasoning_content'] == think_content
+    assert msg['content'] == content
+
+
+def test_incremental_deepseek_parser():
+    msg = {}
+    stream_string = [
+        "<｜begin▁of▁sentence｜>", "First", ",", " I", " recognize", " that", " the", " question", " is", " asking", 
+        " for", " the", " sum", " of", " ", "2", " and", " ", "1", ".\n\n", "I", " know", " that", " addition", 
+        " involves", " combining", " two", " numbers", " to", " find", " their", " total", ".\n\n", "Starting", 
+        " with", " ", "2", ",", " I", " add", " ", "1", " to", " it", ".\n\n", "2", " plus", " ", "1", " equals", 
+        " ", "3", ".\n", "</think>", "\n\n", "**", "Solution", ":", "**\n\n", "To", " find", " the", " sum", 
+        " of", " ", "2", " and", " ", "1", " follow", " these", " simple", " steps", ":\n\n", "1", ".", " **", 
+        "Start", " with", " the", " number", " ", "2", ".", "**\n", "2", ".", " **", "Add", " ", "1", " to", 
+        " it", ".", "**\n", "   \n", "  ", " \\", "[\n", "  "
+    ]
+
+    full_str = ''.join(stream_string)
+    think_content = full_str.split("</think>")[0]
+    content = full_str.split("</think>")[1]
+
+    extended = stream_string[:]
+    extended.insert(0, "")
+
+    parser = DeepSeekR1ReasoningParser()
+    for (prev_subword, subword) in zip(extended, stream_string):
+        msg = parser.parse(msg, prev_subword, subword)
+    
+    assert msg['reasoning_content'] == think_content
+    assert msg['content'] == content
 
 
 @pytest.mark.precommit
@@ -128,7 +155,7 @@ def test_final_parser_llama_32_json(hf_ov_genai_models):
     ["katuni4ka/tiny-random-phi3"],
     indirect=True
 )
-def test_custom_streamer_parser(hf_ov_genai_models):
+def test_custom_incremental_parser(hf_ov_genai_models):
     hf_tokenizer, genai_tokenizer = hf_ov_genai_models
 
     class CustomParser(IncrementalParserBase):
@@ -155,7 +182,6 @@ class CustomStreamer(TextParserStreamer):
         def write(self, message):
             msg.update(message)
             return StreamingStatus.RUNNING
-
     streamer = CustomStreamer(genai_tokenizer, parsers=[CustomParser()])
 
     stream_string = ["Hello", "<start>", " ", "world", " ", "</stop>", "!"]
@@ -165,22 +191,28 @@ def write(self, message):
 
     assert msg['main_text'] == ''.join(" world ")
 
-# @pytest.mark.precommit
-# @pytest.mark.parametrize(
-#     "hf_ov_genai_models", 
-#     ["microsoft/Phi-4-mini-reasoning"],
-#     # ["katuni4ka/tiny-random-phi3"],
-#     indirect=True
-# )
-# def test_custom_parser_(hf_ov_genai_models):
 
+@pytest.mark.precommit
+@pytest.mark.parametrize(
+    "hf_ov_genai_models", 
+    ["katuni4ka/tiny-random-phi3"],
+    indirect=True
+)
+def test_final_parser_llama_32_json(hf_ov_genai_models):
+    hf_tokenizer, genai_tokenizer = hf_ov_genai_models
 
-#     msg = {
-#         "content": "<think>This is reasoning.</think> And this is the answer"
-#     }
-#     parser.parse(msg)
+    json_str = '{"type": "function", "function": {"name": "get_weather", "parameters": {"location": "New York, NY", "unit": "celsius"}}}'
+    content_json = {
+        "content": f"Calling weather API: {json_str}"
+    }
+
+    parser = Llama3JsonToolParser()
+    parser.parse(content_json)
+    assert content_json['tool_calls'][0] == json.loads(json_str)
+
+
+# TODO: add test when several parsers are called.
 
-#     assert msg['reasoning_content'] == "This is reasoning."
 
 @pytest.mark.parametrize("model_id", ["microsoft/Phi-4-mini-reasoning"])
 @pytest.mark.nightly
@@ -203,12 +235,6 @@ def parse(self, msg: dict):
             if think_start != -1 and think_end != -1 and think_end > think_start:
                 think_text = content[think_start + len("<think>"):think_end].strip()
                 msg['reasoning_content'] = think_text
-    
-    class CustomStreamer(TextParserStreamer):
-        def write(self, message):
-            # make whatever you want with message, but it will be accumulated and parsed by parser afterwards
-            # accumulated message can be found by get_parsed_message()
-            return StreamingStatus.RUNNING
         
     parser = CustomParser()
     config = GenerationConfig()
@@ -228,43 +254,7 @@ def write(self, message):
     assert res.parsed[0]['reasoning_content'] != ""
     assert res.parsed[0]['reasoning_content'] == think_text
 
-def test_parsers_2(hf_ov_genai_models):
-    hf_tokenizer, genai_tokenizer = hf_ov_genai_models
-    class CustomStreamer(TextParserStreamer):
-        def write(self, message):
-            if "content" in message:
-                print(message["content"])
-            return StreamingStatus.RUNNING
 
-    streamer = TextParserStreamer(genai_tokenizer, parsers=[DeepSeekR1ReasoningParser()])
 
-    msg = {}
-    stream_string = [
-        "<｜begin▁of▁sentence｜>", "First", ",", " I", " recognize", " that", " the", " question", " is", " asking", 
-        " for", " the", " sum", " of", " ", "2", " and", " ", "1", ".\n\n", "I", " know", " that", " addition", 
-        " involves", " combining", " two", " numbers", " to", " find", " their", " total", ".\n\n", "Starting", 
-        " with", " ", "2", ",", " I", " add", " ", "1", " to", " it", ".\n\n", "2", " plus", " ", "1", " equals", 
-        " ", "3", ".\n", "</think>", "\n\n", "**", "Solution", ":", "**\n\n", "To", " find", " the", " sum", 
-        " of", " ", "2", " and", " ", "1", " follow", " these", " simple", " steps", ":\n\n", "1", ".", " **", 
-        "Start", " with", " the", " number", " ", "2", ".", "**\n", "2", ".", " **", "Add", " ", "1", " to", 
-        " it", ".", "**\n", "   \n", "  ", " \\", "[\n", "  "
-    ]
-
-    full_str = ''.join(stream_string)
-    think_content = full_str.split("</think>")[0]
-    content = full_str.split("</think>")[1]
-
-    parsers = streamer.get_parsers()
-    
-    extended = stream_string[:]
-    extended.insert(0, "")
-
-    for parser in parsers:
-        for (prev_subword, subword) in zip(extended, stream_string):
-            msg = parser.parse(msg, prev_subword, subword)
-    
-    assert msg['reasoning_content'] == think_content
-    assert msg['content'] == content
 
 # TODO: add when streamer accepts integer tokens
-

From 196a54c1ac5a2813bbbbed1abf68d0ff40e87082 Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Wed, 22 Oct 2025 12:52:40 +0200
Subject: [PATCH 26/43] corrected expect_open_tag behaviour, added some tests

---
 src/cpp/include/openvino/genai/parsers.hpp    |  4 +-
 .../include/openvino/genai/text_streamer.hpp  |  1 +
 src/cpp/src/parsers.cpp                       | 13 ++--
 src/python/openvino_genai/__init__.py         |  1 +
 src/python/py_generation_config.cpp           |  2 +-
 src/python/py_parsers.cpp                     | 11 ++-
 src/python/py_streamers.cpp                   | 13 ++--
 tests/python_tests/test_parsers.py            | 71 ++++++++++++++++---
 8 files changed, 91 insertions(+), 25 deletions(-)

diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp
index 93b4b099a1..7b85c3541c 100644
--- a/src/cpp/include/openvino/genai/parsers.hpp
+++ b/src/cpp/include/openvino/genai/parsers.hpp
@@ -48,12 +48,12 @@ class OPENVINO_GENAI_EXPORTS ReasoningParser : public IncrementalParserBase {
 
 class OPENVINO_GENAI_EXPORTS DeepSeekR1ReasoningParser : public ReasoningParser {
 public:
-    explicit DeepSeekR1ReasoningParser(bool expect_open_tag = true) : ReasoningParser(expect_open_tag) {};
+    explicit DeepSeekR1ReasoningParser(bool expect_open_tag = false) : ReasoningParser(expect_open_tag) {};
 };
 
 class OPENVINO_GENAI_EXPORTS Phi4ReasoningParser : public ReasoningParser {
 public:
-    explicit Phi4ReasoningParser(bool expect_open_tag = false) : ReasoningParser(expect_open_tag) {};
+    explicit Phi4ReasoningParser(bool expect_open_tag = true) : ReasoningParser(expect_open_tag) {};
 };
 
 class ParserBase {
diff --git a/src/cpp/include/openvino/genai/text_streamer.hpp b/src/cpp/include/openvino/genai/text_streamer.hpp
index 816427c985..53324def89 100644
--- a/src/cpp/include/openvino/genai/text_streamer.hpp
+++ b/src/cpp/include/openvino/genai/text_streamer.hpp
@@ -49,6 +49,7 @@ class OPENVINO_GENAI_EXPORTS TextStreamer : public StreamerBase {
 
 class OPENVINO_GENAI_EXPORTS TextParserStreamer : public TextStreamer {
 public:
+    using TextStreamer::write;
     TextParserStreamer(const Tokenizer& tokenizer, std::vector<std::shared_ptr<IncrementalParserBase>> parsers = {});
 
     virtual StreamingStatus write(JsonContainer& message) = 0;
diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp
index df3ffd6a31..8de2bd5e1f 100644
--- a/src/cpp/src/parsers.cpp
+++ b/src/cpp/src/parsers.cpp
@@ -40,7 +40,7 @@ class ReasoningParser::ReasoningParserImpl {
         if (m_deactivated) {
             return delta_text;
         }
-        if (m_expect_open_tag && m_first_run) {
+        if (!m_expect_open_tag && m_first_run) {
             m_think_tag_opened = true;
         }
         m_first_run = false;
@@ -57,10 +57,11 @@ class ReasoningParser::ReasoningParserImpl {
         auto reason_str = msg["reasoning_content"].get_string();
         auto content_str = msg["content"].get_string();
 
-        if (!m_think_tag_opened && txt_chunk.find(m_open_tag) != std::string::npos && !m_expect_open_tag) {
+        if (!m_think_tag_opened && txt_chunk.find(m_open_tag) != std::string::npos && m_expect_open_tag) {
             // Thinking has started
             auto open_idx = txt_chunk.find(m_open_tag);
-            reason_str += txt_chunk.substr(open_idx + std::string(m_open_tag).size(), txt_chunk.size() - (open_idx + std::string(m_open_tag).size()));
+            
+            reason_str += txt_chunk.substr(open_idx + m_open_tag.size(), txt_chunk.size() - (open_idx + m_open_tag.size()));
             if (!m_keep_original_content) {
                 delta_text = "";
             }
@@ -72,8 +73,8 @@ class ReasoningParser::ReasoningParserImpl {
             if (txt_chunk.find(m_close_tag) != std::string::npos) {
                 // If <think> and </think> are in the same txt_chunk + delta_text
                 auto close_idx = txt_chunk.find(m_close_tag);
-                reason_str = txt_chunk.substr(open_idx + std::string(m_open_tag).size(), close_idx - (open_idx + std::string(m_open_tag).size()));
-                content_str = txt_chunk.substr(close_idx + std::string(m_close_tag).size(), txt_chunk.size() - (close_idx + std::string(m_close_tag).size()));
+                reason_str = txt_chunk.substr(open_idx + m_open_tag.size(), close_idx - (open_idx + m_open_tag.size()));
+                content_str = txt_chunk.substr(close_idx + m_close_tag.size(), txt_chunk.size() - (close_idx + m_close_tag.size()));
                 if (!m_keep_original_content) {
                     delta_text = content_str;
                 }
@@ -91,7 +92,7 @@ class ReasoningParser::ReasoningParserImpl {
                 // Example if m_text_cache + delta_text = "...some text</th" + "ink>Answer is 3" = "...some text</think>Answer is 3"
                 // we want to keep in delta_txt only "Answer is 3". 
                 // We can operate with txt_chunk since final characters closing the tag ("ink>") are always in delta_text.
-                delta_text = txt_chunk.substr(close_idx + std::string(m_close_tag).size(), txt_chunk.size() - (close_idx + std::string(m_close_tag).size()));
+                delta_text = txt_chunk.substr(close_idx + m_close_tag.size(), txt_chunk.size() - (close_idx + m_close_tag.size()));
             }
 
             msg["reasoning_content"] = reason_str;
diff --git a/src/python/openvino_genai/__init__.py b/src/python/openvino_genai/__init__.py
index 34e0b153f4..e4edc351ca 100644
--- a/src/python/openvino_genai/__init__.py
+++ b/src/python/openvino_genai/__init__.py
@@ -29,6 +29,7 @@
     DeepSeekR1ReasoningParser,
     Llama3JsonToolParser,
     Llama3PythonicToolParser,
+    ReasoningParser
 )
 
 __version__ = get_version()
diff --git a/src/python/py_generation_config.cpp b/src/python/py_generation_config.cpp
index 4c9250bd87..86ad684aa7 100644
--- a/src/python/py_generation_config.cpp
+++ b/src/python/py_generation_config.cpp
@@ -445,7 +445,7 @@ void init_generation_config(py::module_& m) {
         .def_readwrite("include_stop_str_in_output", &GenerationConfig::include_stop_str_in_output)
         .def_readwrite("stop_token_ids", &GenerationConfig::stop_token_ids)
         .def_readwrite("structured_output_config", &GenerationConfig::structured_output_config)
-        .def_readwrite("parsers", &GenerationConfig::parsers)
+        .def_readwrite("parsers", &GenerationConfig::parsers)  // TODO: add keep_alive
         .def_readwrite("adapters", &GenerationConfig::adapters)
         .def_readwrite("apply_chat_template", &GenerationConfig::apply_chat_template)
         .def("set_eos_token_id", &GenerationConfig::set_eos_token_id, py::arg("tokenizer_eos_token_id"))
diff --git a/src/python/py_parsers.cpp b/src/python/py_parsers.cpp
index a7ffba9b69..931c84ce18 100644
--- a/src/python/py_parsers.cpp
+++ b/src/python/py_parsers.cpp
@@ -129,10 +129,10 @@ void init_parsers(py::module_& m) {
            "Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output.");
     
     py::class_<Phi4ReasoningParser, std::shared_ptr<Phi4ReasoningParser>, IncrementalParserBase>(m, "Phi4ReasoningParser")
-        .def(py::init<bool>(), py::arg("expect_open_tag") = false);
+        .def(py::init<bool>(), py::arg("expect_open_tag") = true);
 
     py::class_<DeepSeekR1ReasoningParser, std::shared_ptr<DeepSeekR1ReasoningParser>, IncrementalParserBase>(m, "DeepSeekR1ReasoningParser")
-        .def(py::init<>());
+        .def(py::init<bool>(), py::arg("expect_open_tag") = false);
 
     py::class_<ParserBase, ConstructableParserBase, std::shared_ptr<ParserBase>>(m, "ParserBase")
         .def(py::init<>())
@@ -161,4 +161,11 @@ void init_parsers(py::module_& m) {
 
     py::class_<Llama3PythonicToolParser, std::shared_ptr<Llama3PythonicToolParser>, ParserBase>(m, "Llama3PythonicToolParser")
         .def(py::init<>());
+    
+    py::class_<ReasoningParser, std::shared_ptr<ReasoningParser>, IncrementalParserBase>(m, "ReasoningParser")
+        .def(py::init<bool, bool, const std::string&, const std::string&>(),
+             py::arg("expect_open_tag") = true,
+             py::arg("keep_original_content") = true,
+             py::arg("open_tag") = "<think>",
+             py::arg("close_tag") = "</think>");
 }
diff --git a/src/python/py_streamers.cpp b/src/python/py_streamers.cpp
index d6660fcd17..7ac5ee437e 100644
--- a/src/python/py_streamers.cpp
+++ b/src/python/py_streamers.cpp
@@ -166,11 +166,14 @@ void init_streamers(py::module_& m) {
             },
             py::arg("message"),
             "Write is called with a dict. Returns StreamingStatus.")
-        .def("_write",
-             py::overload_cast<std::string>(&TextParserStreamer::write),
-             py::arg("message"),
-             "Write is called with a string message. Returns CallbackTypeVariant. This is a private method.")
-        
+        .def("_write", [](TextParserStreamer& self, std::variant<std::vector<int64_t>, std::string> chunk) -> StreamingStatus {
+                if (auto _token = std::get_if<std::vector<int64_t>>(&chunk)) {
+                    return self.write(*_token);
+                } else if (auto _str =  std::get_if<std::string>(&chunk)) {
+                    auto res = self.write(*_str);
+                    return std::get<StreamingStatus>(res);
+                }
+        })
         .def("get_parsed_message", 
             [](TextParserStreamer& self) -> py::dict{
                 return pyutils::json_container_to_py_object(self.get_parsed_message());
diff --git a/tests/python_tests/test_parsers.py b/tests/python_tests/test_parsers.py
index 08574d9c89..6b9022255d 100644
--- a/tests/python_tests/test_parsers.py
+++ b/tests/python_tests/test_parsers.py
@@ -4,7 +4,7 @@
 from utils.hugging_face import convert_and_save_tokenizer, download_and_convert_model
 from utils.ov_genai_pipelines import create_ov_pipeline
 import pytest
-from openvino_genai import Tokenizer, IncrementalParserBase, ParserBase, TextParserStreamer, StreamingStatus, Llama3JsonToolParser, Phi4ReasoningParser, DeepSeekR1ReasoningParser, GenerationConfig
+from openvino_genai import Tokenizer, IncrementalParserBase, ParserBase, TextParserStreamer, StreamingStatus, Llama3JsonToolParser, Phi4ReasoningParser, DeepSeekR1ReasoningParser, GenerationConfig, ReasoningParser
 from transformers import AutoTokenizer
 import re
 
@@ -63,6 +63,35 @@ def write(self, message):
     assert msg['content'] == content
 
 
+@pytest.mark.precommit
+@pytest.mark.parametrize(
+    "hf_ov_genai_models", 
+    ["katuni4ka/tiny-random-phi3"],  # this tokenizer is used as a stub only
+    indirect=True
+)
+def test_incremental_phi4_reason_integer_token_ids(hf_ov_genai_models):
+    hf_tokenizer, genai_tokenizer = hf_ov_genai_models
+    
+    class CustomStreamer(TextParserStreamer):
+        def write(self, message):
+            msg.update(message)
+            return StreamingStatus.RUNNING
+    streamer = CustomStreamer(genai_tokenizer, parsers=[Phi4ReasoningParser()])
+    
+    msg = {}
+    answer = "<think>\nOkay, the user is asking for the answer to 2 + 1.</think>\n\nThe answer to 2 + 1 is \boxed{3}."
+    encoded_tokens = genai_tokenizer.encode(answer).input_ids.data.tolist()
+    for token in encoded_tokens:
+        streamer._write(token)
+    streamer.end()
+
+    think_content = answer.split("</think>")[0].replace("<think>", "")
+    content = answer
+    
+    assert msg['reasoning_content'] == think_content
+    assert msg['content'] == content
+
+
 @pytest.mark.precommit
 @pytest.mark.parametrize(
     "hf_ov_genai_models", 
@@ -121,6 +150,38 @@ def test_incremental_phi4_reason_parser_nostreamer(answer):
     assert msg['content'] == content
 
 
+@pytest.mark.precommit
+@pytest.mark.parametrize("keep_original_content", [True, False])
+@pytest.mark.parametrize(
+    "hf_ov_genai_models", 
+    ["katuni4ka/tiny-random-phi3"],  # this tokenizer is used as a stub only
+    indirect=True
+)
+@pytest.mark.parametrize("answer", [
+    "<think>\nOkay, the user is asking for the answer to 2 + 1.</think>\n\nThe answer to 2 + 1 is \boxed{3}.",
+])
+def test_reasoning_parser_cut_content(hf_ov_genai_models, answer, keep_original_content):
+    hf_tokenizer, genai_tokenizer = hf_ov_genai_models
+    
+    stream_string = re.split(r"(\s+)", answer)
+    
+    class CustomStreamer(TextParserStreamer):
+        def write(self, message):
+            msg.update(message)
+            return StreamingStatus.RUNNING
+    streamer = CustomStreamer(genai_tokenizer, parsers=[ReasoningParser(expect_open_tag=True, keep_original_content=keep_original_content)])
+    
+    msg = {}
+    for subword in stream_string:
+        streamer._write(subword)
+
+    think_content = answer.split("</think>")[0].replace("<think>", "")
+    content = answer
+    
+    assert msg['reasoning_content'] == think_content
+    assert msg['content'] == (content if keep_original_content else "\n\nThe answer to 2 + 1 is \boxed{3}.")
+
+
 def test_incremental_deepseek_parser():
     msg = {}
     stream_string = [
@@ -211,9 +272,6 @@ def test_final_parser_llama_32_json(hf_ov_genai_models):
     assert content_json['tool_calls'][0] == json.loads(json_str)
 
 
-# TODO: add test when several parsers are called.
-
-
 @pytest.mark.parametrize("model_id", ["microsoft/Phi-4-mini-reasoning"])
 @pytest.mark.nightly
 def test_custom_parser(tmp_path, model_id):
@@ -253,8 +311,3 @@ def parse(self, msg: dict):
     assert 'reasoning_content' in res.parsed[0]
     assert res.parsed[0]['reasoning_content'] != ""
     assert res.parsed[0]['reasoning_content'] == think_text
-
-
-
-
-# TODO: add when streamer accepts integer tokens

From 801d8fb332406114bc1d8f2d537aaf91f385e187 Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Wed, 22 Oct 2025 13:19:26 +0200
Subject: [PATCH 27/43] renamed parsers; removed ParserBase -> Parser;
 IncrementalParserBase -> IncrementalParser

---
 .../openvino/genai/generation_config.hpp      |  3 +-
 src/cpp/include/openvino/genai/parsers.hpp    | 34 ++++++------
 .../include/openvino/genai/text_streamer.hpp  |  4 +-
 src/cpp/src/llm/pipeline.cpp                  |  2 +-
 src/cpp/src/parsers.cpp                       | 10 ++--
 src/cpp/src/text_streamer.cpp                 |  2 +-
 src/python/openvino_genai/__init__.py         | 10 ++--
 src/python/openvino_genai/__init__.pyi        | 11 ++--
 .../openvino_genai/py_openvino_genai.pyi      | 33 ++++++------
 src/python/py_parsers.cpp                     | 52 +++++++++----------
 src/python/py_streamers.cpp                   |  6 +--
 tests/cpp/parser.cpp                          | 12 ++---
 tests/python_tests/test_parsers.py            | 18 +++----
 13 files changed, 98 insertions(+), 99 deletions(-)

diff --git a/src/cpp/include/openvino/genai/generation_config.hpp b/src/cpp/include/openvino/genai/generation_config.hpp
index 31c50b28e5..e592cb36ff 100644
--- a/src/cpp/include/openvino/genai/generation_config.hpp
+++ b/src/cpp/include/openvino/genai/generation_config.hpp
@@ -689,8 +689,7 @@ class OPENVINO_GENAI_EXPORTS GenerationConfig {
     bool is_prompt_lookup() const;
     bool is_structured_output_generation() const;
 
-    // parsers
-    std::vector<std::shared_ptr<ParserBase>> parsers;
+    std::vector<std::shared_ptr<Parser>> parsers;
 
     OPENVINO_DEPRECATED("Please, use `is_assisting_generation()` instead of `is_speculative_decoding()`. This method will be removed in 2026.0.0 release")
     bool is_speculative_decoding() const;
diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp
index 7b85c3541c..7225669f5a 100644
--- a/src/cpp/include/openvino/genai/parsers.hpp
+++ b/src/cpp/include/openvino/genai/parsers.hpp
@@ -10,9 +10,9 @@
 namespace ov {
 namespace genai {
 
-class OPENVINO_GENAI_EXPORTS IncrementalParserBase {
+class OPENVINO_GENAI_EXPORTS IncrementalParser {
 public:
-    IncrementalParserBase() = default;
+    IncrementalParser() = default;
 
     // We return string which with filtered text to be added to content.
     virtual std::string parse(
@@ -23,19 +23,19 @@ class OPENVINO_GENAI_EXPORTS IncrementalParserBase {
         const std::optional<std::vector<int64_t>>& delta_tokens = std::nullopt
     ) = 0;
 
-    virtual ~IncrementalParserBase() = default;
+    virtual ~IncrementalParser() = default;
 };
 
-class OPENVINO_GENAI_EXPORTS ReasoningParser : public IncrementalParserBase {
+class OPENVINO_GENAI_EXPORTS ReasoningIncrementalParser : public IncrementalParser {
 private:
     class ReasoningParserImpl;
     std::unique_ptr<ReasoningParserImpl> m_impl;
 public:
-    ReasoningParser(bool expect_open_tag = true,
+    ReasoningIncrementalParser(bool expect_open_tag = true,
                     bool keep_original_content = true, 
                     const std::string& open_tag = "<think>", 
                     const std::string& close_tag = "</think>");
-    virtual ~ReasoningParser();
+    virtual ~ReasoningIncrementalParser();
 
     std::string parse(
         JsonContainer& msg,
@@ -46,24 +46,24 @@ class OPENVINO_GENAI_EXPORTS ReasoningParser : public IncrementalParserBase {
     ) override;
 };
 
-class OPENVINO_GENAI_EXPORTS DeepSeekR1ReasoningParser : public ReasoningParser {
+class OPENVINO_GENAI_EXPORTS DeepSeekR1ReasoningIncrementalParser : public ReasoningIncrementalParser {
 public:
-    explicit DeepSeekR1ReasoningParser(bool expect_open_tag = false) : ReasoningParser(expect_open_tag) {};
+    explicit DeepSeekR1ReasoningIncrementalParser(bool expect_open_tag = false) : ReasoningIncrementalParser(expect_open_tag) {};
 };
 
-class OPENVINO_GENAI_EXPORTS Phi4ReasoningParser : public ReasoningParser {
+class OPENVINO_GENAI_EXPORTS Phi4ReasoningIncrementalParser : public ReasoningIncrementalParser {
 public:
-    explicit Phi4ReasoningParser(bool expect_open_tag = true) : ReasoningParser(expect_open_tag) {};
+    explicit Phi4ReasoningIncrementalParser(bool expect_open_tag = true) : ReasoningIncrementalParser(expect_open_tag) {};
 };
 
-class ParserBase {
+class Parser {
 public:
-    ParserBase() = default;
-    virtual ~ParserBase();
+    Parser() = default;
+    virtual ~Parser();
     virtual void parse(JsonContainer& text) = 0;
 };
 
-class OPENVINO_GENAI_EXPORTS Llama3PythonicToolParser : public ParserBase {
+class OPENVINO_GENAI_EXPORTS Llama3PythonicToolParser : public Parser {
 // Does not modify original content, only extracts and adds tool calls
 public:
     explicit Llama3PythonicToolParser(bool keep_original_content = true);
@@ -74,7 +74,7 @@ class OPENVINO_GENAI_EXPORTS Llama3PythonicToolParser : public ParserBase {
     std::unique_ptr<Llama3PythonicToolParserImpl> m_impl;
 };
 
-class OPENVINO_GENAI_EXPORTS Llama3JsonToolParser : public ParserBase {
+class OPENVINO_GENAI_EXPORTS Llama3JsonToolParser : public Parser {
 // Does not modify original content, only extracts and adds tool calls
 public:
     explicit Llama3JsonToolParser(bool keep_original_content = true);
@@ -85,7 +85,7 @@ class OPENVINO_GENAI_EXPORTS Llama3JsonToolParser : public ParserBase {
     std::unique_ptr<Llama3JsonToolParserImpl> m_impl;
 };
 
-class OPENVINO_GENAI_EXPORTS BaseReasoningParser : public ParserBase{
+class OPENVINO_GENAI_EXPORTS BaseReasoningParser : public Parser {
 public:
     BaseReasoningParser(
         bool expect_open_tag = true, 
@@ -99,7 +99,5 @@ class OPENVINO_GENAI_EXPORTS BaseReasoningParser : public ParserBase{
     std::unique_ptr<BaseReasoningParserImpl> m_impl;
 };
 
-// TODO: DeepSeekR1ReasoningParser -> DeepSeekR1IncrementalParser
-
 }  // namespace genai
 }  // namespace ov
diff --git a/src/cpp/include/openvino/genai/text_streamer.hpp b/src/cpp/include/openvino/genai/text_streamer.hpp
index 53324def89..fabea0f524 100644
--- a/src/cpp/include/openvino/genai/text_streamer.hpp
+++ b/src/cpp/include/openvino/genai/text_streamer.hpp
@@ -50,7 +50,7 @@ class OPENVINO_GENAI_EXPORTS TextStreamer : public StreamerBase {
 class OPENVINO_GENAI_EXPORTS TextParserStreamer : public TextStreamer {
 public:
     using TextStreamer::write;
-    TextParserStreamer(const Tokenizer& tokenizer, std::vector<std::shared_ptr<IncrementalParserBase>> parsers = {});
+    TextParserStreamer(const Tokenizer& tokenizer, std::vector<std::shared_ptr<IncrementalParser>> parsers = {});
 
     virtual StreamingStatus write(JsonContainer& message) = 0;
 
@@ -60,7 +60,7 @@ class OPENVINO_GENAI_EXPORTS TextParserStreamer : public TextStreamer {
 private:
     JsonContainer m_parsed_message;
     std::string m_text_buffer;
-    std::vector<std::shared_ptr<IncrementalParserBase>> m_parsers;
+    std::vector<std::shared_ptr<IncrementalParser>> m_parsers;
 };
 
 }  // namespace genai
diff --git a/src/cpp/src/llm/pipeline.cpp b/src/cpp/src/llm/pipeline.cpp
index 9f988dbb59..513d047bf2 100644
--- a/src/cpp/src/llm/pipeline.cpp
+++ b/src/cpp/src/llm/pipeline.cpp
@@ -265,7 +265,7 @@ DecodedResults LLMPipeline::generate(
         return res;
     }
     
-    std::vector<std::shared_ptr<ParserBase>> parsers = (*generation_config).parsers;
+    std::vector<std::shared_ptr<Parser>> parsers = (*generation_config).parsers;
     res.parsed.resize(res.texts.size());
     // Apply Base parsers sequentially even if IncrementalParser has run.
     for (size_t i = 0; i < res.texts.size(); ++i) {
diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp
index 8de2bd5e1f..c362d9f73d 100644
--- a/src/cpp/src/parsers.cpp
+++ b/src/cpp/src/parsers.cpp
@@ -8,7 +8,7 @@
 
 namespace ov::genai {
 
-class ReasoningParser::ReasoningParserImpl {
+class ReasoningIncrementalParser::ReasoningParserImpl {
 private:
     bool m_expect_open_tag;
     bool m_first_run = true;
@@ -148,13 +148,13 @@ class ReasoningParser::ReasoningParserImpl {
     }
 };
 
-ReasoningParser::ReasoningParser(bool expect_open_tag, bool keep_original_content, const std::string& open_tag, const std::string& close_tag) {
+ReasoningIncrementalParser::ReasoningIncrementalParser(bool expect_open_tag, bool keep_original_content, const std::string& open_tag, const std::string& close_tag) {
     m_impl = std::make_unique<ReasoningParserImpl>(expect_open_tag, keep_original_content, open_tag, close_tag);
 }
 
-ReasoningParser::~ReasoningParser() = default;
+ReasoningIncrementalParser::~ReasoningIncrementalParser() = default;
 
-std::string ReasoningParser::parse(
+std::string ReasoningIncrementalParser::parse(
     JsonContainer& msg,
     const std::string& previous_text, 
     std::string& delta_text,
@@ -298,6 +298,6 @@ void BaseReasoningParser::parse(JsonContainer& input) {
 
 BaseReasoningParser::~BaseReasoningParser() = default;
 
-ParserBase::~ParserBase() = default;
+Parser::~Parser() = default;
 
 } // namespace ov::genai
diff --git a/src/cpp/src/text_streamer.cpp b/src/cpp/src/text_streamer.cpp
index 9b83e0c60c..ee48cff1f9 100644
--- a/src/cpp/src/text_streamer.cpp
+++ b/src/cpp/src/text_streamer.cpp
@@ -124,7 +124,7 @@ void TextStreamer::end() {
 
 StreamerBase::~StreamerBase() = default;
 
-TextParserStreamer::TextParserStreamer(const Tokenizer& tokenizer, std::vector<std::shared_ptr<IncrementalParserBase>> parsers) 
+TextParserStreamer::TextParserStreamer(const Tokenizer& tokenizer, std::vector<std::shared_ptr<IncrementalParser>> parsers) 
     : TextStreamer(tokenizer, [this](std::string s) -> CallbackTypeVariant {
                 return this->write(s);
     }), m_parsers{parsers} {}
diff --git a/src/python/openvino_genai/__init__.py b/src/python/openvino_genai/__init__.py
index e4edc351ca..a0c06baec6 100644
--- a/src/python/openvino_genai/__init__.py
+++ b/src/python/openvino_genai/__init__.py
@@ -23,13 +23,13 @@
 )
 
 from .py_openvino_genai import (
-    ParserBase,
-    IncrementalParserBase,
-    Phi4ReasoningParser,
-    DeepSeekR1ReasoningParser,
+    Parser,
+    IncrementalParser,
+    Phi4ReasoningIncrementalParser,
+    DeepSeekR1ReasoningIncrementalParser,
     Llama3JsonToolParser,
     Llama3PythonicToolParser,
-    ReasoningParser
+    ReasoningIncrementalParser
 )
 
 __version__ = get_version()
diff --git a/src/python/openvino_genai/__init__.pyi b/src/python/openvino_genai/__init__.pyi
index 00f67b6d2d..04bd694c57 100644
--- a/src/python/openvino_genai/__init__.pyi
+++ b/src/python/openvino_genai/__init__.pyi
@@ -15,7 +15,7 @@ from openvino_genai.py_openvino_genai import ChunkStreamerBase
 from openvino_genai.py_openvino_genai import ContinuousBatchingPipeline
 from openvino_genai.py_openvino_genai import CppStdGenerator
 from openvino_genai.py_openvino_genai import DecodedResults
-from openvino_genai.py_openvino_genai import DeepSeekR1ReasoningParser
+from openvino_genai.py_openvino_genai import DeepSeekR1ReasoningIncrementalParser
 from openvino_genai.py_openvino_genai import EncodedResults
 from openvino_genai.py_openvino_genai import FluxTransformer2DModel
 from openvino_genai.py_openvino_genai import GenerationConfig
@@ -26,18 +26,19 @@ from openvino_genai.py_openvino_genai import Generator
 from openvino_genai.py_openvino_genai import Image2ImagePipeline
 from openvino_genai.py_openvino_genai import ImageGenerationConfig
 from openvino_genai.py_openvino_genai import ImageGenerationPerfMetrics
-from openvino_genai.py_openvino_genai import IncrementalParserBase
+from openvino_genai.py_openvino_genai import IncrementalParser
 from openvino_genai.py_openvino_genai import InpaintingPipeline
 from openvino_genai.py_openvino_genai import KVCrushAnchorPointMode
 from openvino_genai.py_openvino_genai import KVCrushConfig
 from openvino_genai.py_openvino_genai import LLMPipeline
 from openvino_genai.py_openvino_genai import Llama3JsonToolParser
 from openvino_genai.py_openvino_genai import Llama3PythonicToolParser
-from openvino_genai.py_openvino_genai import ParserBase
+from openvino_genai.py_openvino_genai import Parser
 from openvino_genai.py_openvino_genai import PerfMetrics
-from openvino_genai.py_openvino_genai import Phi4ReasoningParser
+from openvino_genai.py_openvino_genai import Phi4ReasoningIncrementalParser
 from openvino_genai.py_openvino_genai import RawImageGenerationPerfMetrics
 from openvino_genai.py_openvino_genai import RawPerfMetrics
+from openvino_genai.py_openvino_genai import ReasoningIncrementalParser
 from openvino_genai.py_openvino_genai import SD3Transformer2DModel
 from openvino_genai.py_openvino_genai import Scheduler
 from openvino_genai.py_openvino_genai import SchedulerConfig
@@ -72,5 +73,5 @@ from openvino_genai.py_openvino_genai import draft_model
 from openvino_genai.py_openvino_genai import get_version
 import os as os
 from . import py_openvino_genai
-__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningParser', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParserBase', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama3JsonToolParser', 'Llama3PythonicToolParser', 'ParserBase', 'PerfMetrics', 'Phi4ReasoningParser', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai']
+__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningIncrementalParser', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParser', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama3JsonToolParser', 'Llama3PythonicToolParser', 'Parser', 'PerfMetrics', 'Phi4ReasoningIncrementalParser', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'ReasoningIncrementalParser', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai']
 __version__: str
diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi
index 4a27153742..549da2bf3a 100644
--- a/src/python/openvino_genai/py_openvino_genai.pyi
+++ b/src/python/openvino_genai/py_openvino_genai.pyi
@@ -5,7 +5,7 @@ from __future__ import annotations
 import collections.abc
 import openvino._pyopenvino
 import typing
-__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningParser', 'EncodedGenerationResult', 'EncodedResults', 'ExtendedPerfMetrics', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParserBase', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama3JsonToolParser', 'Llama3PythonicToolParser', 'MeanStdPair', 'ParserBase', 'PerfMetrics', 'Phi4ReasoningParser', 'PipelineMetrics', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'SDPerModelsPerfMetrics', 'SDPerfMetrics', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'SummaryStats', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version']
+__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningIncrementalParser', 'EncodedGenerationResult', 'EncodedResults', 'ExtendedPerfMetrics', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParser', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama3JsonToolParser', 'Llama3PythonicToolParser', 'MeanStdPair', 'Parser', 'PerfMetrics', 'Phi4ReasoningIncrementalParser', 'PipelineMetrics', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'ReasoningIncrementalParser', 'SD3Transformer2DModel', 'SDPerModelsPerfMetrics', 'SDPerfMetrics', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'SummaryStats', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version']
 class Adapter:
     """
     Immutable LoRA Adapter that carries the adaptation matrices and serves as unique adapter identifier.
@@ -577,8 +577,8 @@ class DecodedResults:
     @property
     def texts(self) -> list[str]:
         ...
-class DeepSeekR1ReasoningParser(IncrementalParserBase):
-    def __init__(self) -> None:
+class DeepSeekR1ReasoningIncrementalParser(IncrementalParser):
+    def __init__(self, expect_open_tag: bool = False) -> None:
         ...
 class EncodedGenerationResult:
     """
@@ -978,10 +978,10 @@ class GenerationConfig:
     def num_return_sequences(self, arg0: typing.SupportsInt) -> None:
         ...
     @property
-    def parsers(self) -> list[ParserBase]:
+    def parsers(self) -> list[Parser]:
         ...
     @parsers.setter
-    def parsers(self, arg0: collections.abc.Sequence[ParserBase]) -> None:
+    def parsers(self, arg0: collections.abc.Sequence[Parser]) -> None:
         ...
     @property
     def presence_penalty(self) -> float:
@@ -1459,7 +1459,7 @@ class ImageGenerationPerfMetrics:
     @property
     def raw_metrics(self) -> RawImageGenerationPerfMetrics:
         ...
-class IncrementalParserBase:
+class IncrementalParser:
     def __init__(self) -> None:
         ...
     def parse(self, msg: dict, previous_text: str, delta_text: str, previous_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None, delta_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None) -> str:
@@ -1822,10 +1822,10 @@ class LLMPipeline:
         ...
     def start_chat(self, system_message: str = '') -> None:
         ...
-class Llama3JsonToolParser(ParserBase):
+class Llama3JsonToolParser(Parser):
     def __init__(self) -> None:
         ...
-class Llama3PythonicToolParser(ParserBase):
+class Llama3PythonicToolParser(Parser):
     def __init__(self) -> None:
         ...
 class MeanStdPair:
@@ -1839,7 +1839,7 @@ class MeanStdPair:
     @property
     def std(self) -> float:
         ...
-class ParserBase:
+class Parser:
     def __init__(self) -> None:
         ...
     def parse(self, text: dict) -> None:
@@ -1948,8 +1948,8 @@ class PerfMetrics:
     @property
     def raw_metrics(self) -> RawPerfMetrics:
         ...
-class Phi4ReasoningParser(IncrementalParserBase):
-    def __init__(self, expect_open_tag: bool = False) -> None:
+class Phi4ReasoningIncrementalParser(IncrementalParser):
+    def __init__(self, expect_open_tag: bool = True) -> None:
         ...
 class PipelineMetrics:
     """
@@ -2082,6 +2082,9 @@ class RawPerfMetrics:
     @property
     def tokenization_durations(self) -> list[float]:
         ...
+class ReasoningIncrementalParser(IncrementalParser):
+    def __init__(self, expect_open_tag: bool = True, keep_original_content: bool = True, open_tag: str = '<think>', close_tag: str = '</think>') -> None:
+        ...
 class SD3Transformer2DModel:
     """
     SD3Transformer2DModel class.
@@ -3384,14 +3387,12 @@ class TextEmbeddingPipeline:
         Waits computed embeddings for a query
         """
 class TextParserStreamer(TextStreamer):
-    def __init__(self, tokenizer: Tokenizer, parsers: collections.abc.Sequence[IncrementalParserBase] = []) -> None:
+    def __init__(self, tokenizer: Tokenizer, parsers: collections.abc.Sequence[IncrementalParser] = []) -> None:
         """
         TextParserStreamer is used to decode tokens into text, parse the text and call user-defined incremental parsers.
         """
-    def _write(self, message: str) -> bool | openvino_genai.py_openvino_genai.StreamingStatus:
-        """
-        Write is called with a string message. Returns CallbackTypeVariant. This is a private method.
-        """
+    def _write(self, arg0: collections.abc.Sequence[typing.SupportsInt] | str) -> StreamingStatus:
+        ...
     def get_parsed_message(self) -> dict:
         """
         Get the current parsed message
diff --git a/src/python/py_parsers.cpp b/src/python/py_parsers.cpp
index 931c84ce18..e6b3437f87 100644
--- a/src/python/py_parsers.cpp
+++ b/src/python/py_parsers.cpp
@@ -13,11 +13,11 @@
 
 namespace py = pybind11;
 
-using ov::genai::IncrementalParserBase;
-using ov::genai::ParserBase;
-using ov::genai::ReasoningParser;
-using ov::genai::Phi4ReasoningParser;
-using ov::genai::DeepSeekR1ReasoningParser;
+using ov::genai::IncrementalParser;
+using ov::genai::Parser;
+using ov::genai::ReasoningIncrementalParser;
+using ov::genai::Phi4ReasoningIncrementalParser;
+using ov::genai::DeepSeekR1ReasoningIncrementalParser;
 using ov::genai::JsonContainer;
 using ov::genai::Llama3JsonToolParser;
 using ov::genai::Llama3PythonicToolParser;
@@ -28,11 +28,11 @@ namespace pyutils = ov::genai::pybind::utils;
 
 namespace {
 
-// ConstructableIncrementalParserBase and ConstructableParserBase are used when python overload is called from C++
+// ConstructableIncremental and ConstructableBase are used when python overload is called from C++
 // and we need to convert JsonContainer to py::dict and then update back JsonContainer from the py::dict which was modified in Python.
-class ConstructableIncrementalParserBase: public IncrementalParserBase {
+class ConstructableIncrementalParser: public IncrementalParser {
 public:
-    using IncrementalParserBase::IncrementalParserBase;
+    using IncrementalParser::IncrementalParser;
     std::string parse(
         JsonContainer& msg,
         const std::string& previous_text, 
@@ -43,7 +43,7 @@ class ConstructableIncrementalParserBase: public IncrementalParserBase {
         // Convert JsonContainer to py::dict
         py::dict py_msg = pyutils::json_container_to_py_object(msg);
 
-        py::function parse_method = py::get_override(static_cast<const IncrementalParserBase*>(this), "parse");
+        py::function parse_method = py::get_override(static_cast<const IncrementalParser*>(this), "parse");
         if (!parse_method) {
             throw std::runtime_error("parse method not implemented in Python subclass");
         }
@@ -71,12 +71,12 @@ class ConstructableIncrementalParserBase: public IncrementalParserBase {
     }
 };
 
-class ConstructableParserBase: public ParserBase {
+class ConstructableParser: public Parser {
 public:
     void parse(JsonContainer& msg) override {
         py::gil_scoped_acquire acquire;
         
-        py::function parse_method = py::get_override(static_cast<const ParserBase*>(this), "parse");
+        py::function parse_method = py::get_override(static_cast<const Parser*>(this), "parse");
         if (!parse_method) {
             throw std::runtime_error("parse method not implemented in Python subclass");
         }
@@ -103,9 +103,9 @@ class ConstructableParserBase: public ParserBase {
 
 // TODO: double check/add more relevant docstrings for parsers.
 void init_parsers(py::module_& m) {
-    py::class_<IncrementalParserBase, ConstructableIncrementalParserBase, std::shared_ptr<IncrementalParserBase>>(m, "IncrementalParserBase")
+    py::class_<IncrementalParser, ConstructableIncrementalParser, std::shared_ptr<IncrementalParser>>(m, "IncrementalParser")
         .def(py::init<>())
-        .def("parse", [](IncrementalParserBase& self,
+        .def("parse", [](IncrementalParser& self,
                          py::dict& msg,
                          std::string& previous_text,
                          std::string& delta_text,
@@ -128,16 +128,23 @@ void init_parsers(py::module_& m) {
            py::arg("previous_tokens") = std::nullopt, py::arg("delta_tokens") = std::nullopt,
            "Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output.");
     
-    py::class_<Phi4ReasoningParser, std::shared_ptr<Phi4ReasoningParser>, IncrementalParserBase>(m, "Phi4ReasoningParser")
+    py::class_<ReasoningIncrementalParser, std::shared_ptr<ReasoningIncrementalParser>, IncrementalParser>(m, "ReasoningIncrementalParser")
+        .def(py::init<bool, bool, const std::string&, const std::string&>(),
+             py::arg("expect_open_tag") = true,
+             py::arg("keep_original_content") = true,
+             py::arg("open_tag") = "<think>",
+             py::arg("close_tag") = "</think>");
+    
+    py::class_<Phi4ReasoningIncrementalParser, std::shared_ptr<Phi4ReasoningIncrementalParser>, IncrementalParser>(m, "Phi4ReasoningIncrementalParser")
         .def(py::init<bool>(), py::arg("expect_open_tag") = true);
 
-    py::class_<DeepSeekR1ReasoningParser, std::shared_ptr<DeepSeekR1ReasoningParser>, IncrementalParserBase>(m, "DeepSeekR1ReasoningParser")
+    py::class_<DeepSeekR1ReasoningIncrementalParser, std::shared_ptr<DeepSeekR1ReasoningIncrementalParser>, IncrementalParser>(m, "DeepSeekR1ReasoningIncrementalParser")
         .def(py::init<bool>(), py::arg("expect_open_tag") = false);
 
-    py::class_<ParserBase, ConstructableParserBase, std::shared_ptr<ParserBase>>(m, "ParserBase")
+    py::class_<Parser, ConstructableParser, std::shared_ptr<Parser>>(m, "Parser")
         .def(py::init<>())
         .def("parse",
-            [](ParserBase& self, py::dict& msg) {
+            [](Parser& self, py::dict& msg) {
                 auto msg_cpp = pyutils::py_object_to_json_container(msg);
                 self.parse(msg_cpp);
 
@@ -156,16 +163,9 @@ void init_parsers(py::module_& m) {
             py::arg("text"),
             "Parse is called with the full text. Returns a dict with parsed content.");
 
-    py::class_<Llama3JsonToolParser, std::shared_ptr<Llama3JsonToolParser>, ParserBase>(m, "Llama3JsonToolParser")
+    py::class_<Llama3JsonToolParser, std::shared_ptr<Llama3JsonToolParser>, Parser>(m, "Llama3JsonToolParser")
         .def(py::init<>());
 
-    py::class_<Llama3PythonicToolParser, std::shared_ptr<Llama3PythonicToolParser>, ParserBase>(m, "Llama3PythonicToolParser")
+    py::class_<Llama3PythonicToolParser, std::shared_ptr<Llama3PythonicToolParser>, Parser>(m, "Llama3PythonicToolParser")
         .def(py::init<>());
-    
-    py::class_<ReasoningParser, std::shared_ptr<ReasoningParser>, IncrementalParserBase>(m, "ReasoningParser")
-        .def(py::init<bool, bool, const std::string&, const std::string&>(),
-             py::arg("expect_open_tag") = true,
-             py::arg("keep_original_content") = true,
-             py::arg("open_tag") = "<think>",
-             py::arg("close_tag") = "</think>");
 }
diff --git a/src/python/py_streamers.cpp b/src/python/py_streamers.cpp
index 7ac5ee437e..94fb52355d 100644
--- a/src/python/py_streamers.cpp
+++ b/src/python/py_streamers.cpp
@@ -19,7 +19,7 @@ using ov::genai::CallbackTypeVariant;
 using ov::genai::StreamingStatus;
 using ov::genai::TextStreamer;
 using ov::genai::TextParserStreamer;
-using ov::genai::IncrementalParserBase;
+using ov::genai::IncrementalParser;
 using ov::genai::JsonContainer;
 using ov::genai::Tokenizer;
 
@@ -148,11 +148,11 @@ void init_streamers(py::module_& m) {
     // TODO: double check/add more relevant docstrings for TextParserStreamer.
     py::class_<TextParserStreamer, ConstructableTextParserStreamer, std::shared_ptr<TextParserStreamer>, TextStreamer>(m, "TextParserStreamer")
         .def(py::init([](const Tokenizer& tokenizer,
-                         std::vector<std::shared_ptr<IncrementalParserBase>> parsers) {
+                         std::vector<std::shared_ptr<IncrementalParser>> parsers) {
                 return std::make_shared<ConstructableTextParserStreamer>(tokenizer, parsers);
             }),
             py::arg("tokenizer"),
-            py::arg("parsers") = std::vector<std::shared_ptr<IncrementalParserBase>>(),
+            py::arg("parsers") = std::vector<std::shared_ptr<IncrementalParser>>(),
             py::keep_alive<1, 3>(),
             "TextParserStreamer is used to decode tokens into text, parse the text and call user-defined incremental parsers.")
         .def("write",
diff --git a/tests/cpp/parser.cpp b/tests/cpp/parser.cpp
index e31bd3c236..7660d1625b 100644
--- a/tests/cpp/parser.cpp
+++ b/tests/cpp/parser.cpp
@@ -95,7 +95,7 @@ TEST(ParserTest, test_reasoning_parser_2) {
 
 class DeepSeekR1ReasoningParserTest : public ::testing::Test {
 protected:
-    ov::genai::DeepSeekR1ReasoningParser parser;
+    ov::genai::DeepSeekR1ReasoningIncrementalParser parser;
     JsonContainer msg;
 };
 
@@ -124,8 +124,8 @@ TEST_F(DeepSeekR1ReasoningParserTest, ReasoningContentAccumulatesAcrossCalls) {
 }
 
 TEST(ParserTest, test_custom_parser) {
-    // Define a small custom parser derived from ParserBase
-    class CustomParser : public ov::genai::ParserBase {
+    // Define a small custom parser derived from Parser
+    class CustomParser : public ov::genai::Parser {
     public:
         void parse(ov::genai::JsonContainer& msg) override {
             // extract "content"
@@ -168,7 +168,7 @@ TEST(ParserTest, CustomParser_AccumulatesBetweenStartStop) {
     using namespace ov::genai;
 
     // Custom incremental parser: mirrors the Python logic
-    class CustomParser : public IncrementalParserBase {
+    class CustomParser : public IncrementalParser {
     public:
         bool main_part_started = false;
 
@@ -210,7 +210,7 @@ TEST(ParserTest, CustomParser_AccumulatesBetweenStartStop) {
     public:
         using TextParserStreamer::write;
         // Forwarding constructor to base class
-        CustomStreamer(ov::genai::Tokenizer& tok, const std::vector<std::shared_ptr<IncrementalParserBase>>& parsers)
+        CustomStreamer(ov::genai::Tokenizer& tok, const std::vector<std::shared_ptr<IncrementalParser>>& parsers)
             : ov::genai::TextParserStreamer(tok, parsers) {}
 
         JsonContainer final_msg;
@@ -221,7 +221,7 @@ TEST(ParserTest, CustomParser_AccumulatesBetweenStartStop) {
     };
 
     Tokenizer tok;
-    std::shared_ptr<IncrementalParserBase> parser = std::make_shared<CustomParser>();
+    std::shared_ptr<IncrementalParser> parser = std::make_shared<CustomParser>();
     CustomStreamer streamer(tok, {parser});
     
     
diff --git a/tests/python_tests/test_parsers.py b/tests/python_tests/test_parsers.py
index 6b9022255d..2368571f62 100644
--- a/tests/python_tests/test_parsers.py
+++ b/tests/python_tests/test_parsers.py
@@ -4,7 +4,7 @@
 from utils.hugging_face import convert_and_save_tokenizer, download_and_convert_model
 from utils.ov_genai_pipelines import create_ov_pipeline
 import pytest
-from openvino_genai import Tokenizer, IncrementalParserBase, ParserBase, TextParserStreamer, StreamingStatus, Llama3JsonToolParser, Phi4ReasoningParser, DeepSeekR1ReasoningParser, GenerationConfig, ReasoningParser
+from openvino_genai import Tokenizer, IncrementalParser, Parser, TextParserStreamer, StreamingStatus, Llama3JsonToolParser, Phi4ReasoningIncrementalParser, DeepSeekR1ReasoningIncrementalParser, GenerationConfig, ReasoningIncrementalParser
 from transformers import AutoTokenizer
 import re
 
@@ -50,7 +50,7 @@ class CustomStreamer(TextParserStreamer):
         def write(self, message):
             msg.update(message)
             return StreamingStatus.RUNNING
-    streamer = CustomStreamer(genai_tokenizer, parsers=[Phi4ReasoningParser()])
+    streamer = CustomStreamer(genai_tokenizer, parsers=[Phi4ReasoningIncrementalParser()])
     
     msg = {}
     for subword in stream_string:
@@ -76,7 +76,7 @@ class CustomStreamer(TextParserStreamer):
         def write(self, message):
             msg.update(message)
             return StreamingStatus.RUNNING
-    streamer = CustomStreamer(genai_tokenizer, parsers=[Phi4ReasoningParser()])
+    streamer = CustomStreamer(genai_tokenizer, parsers=[Phi4ReasoningIncrementalParser()])
     
     msg = {}
     answer = "<think>\nOkay, the user is asking for the answer to 2 + 1.</think>\n\nThe answer to 2 + 1 is \boxed{3}."
@@ -114,7 +114,7 @@ class CustomStreamer(TextParserStreamer):
         def write(self, message):
             msg.update(message)
             return StreamingStatus.RUNNING
-    streamer = CustomStreamer(genai_tokenizer, parsers=[Phi4ReasoningParser()])
+    streamer = CustomStreamer(genai_tokenizer, parsers=[Phi4ReasoningIncrementalParser()])
     
     msg = {}
     for subword in split_answer:
@@ -133,7 +133,7 @@ def write(self, message):
 ])
 def test_incremental_phi4_reason_parser_nostreamer(answer):
     # In this test we are calling parser directly without streamer
-    parser = Phi4ReasoningParser()
+    parser = Phi4ReasoningIncrementalParser()
     
     stream_string = re.split(r"(\s+)", answer)
     msg = {}
@@ -169,7 +169,7 @@ class CustomStreamer(TextParserStreamer):
         def write(self, message):
             msg.update(message)
             return StreamingStatus.RUNNING
-    streamer = CustomStreamer(genai_tokenizer, parsers=[ReasoningParser(expect_open_tag=True, keep_original_content=keep_original_content)])
+    streamer = CustomStreamer(genai_tokenizer, parsers=[ReasoningIncrementalParser(expect_open_tag=True, keep_original_content=keep_original_content)])
     
     msg = {}
     for subword in stream_string:
@@ -202,7 +202,7 @@ def test_incremental_deepseek_parser():
     extended = stream_string[:]
     extended.insert(0, "")
 
-    parser = DeepSeekR1ReasoningParser()
+    parser = DeepSeekR1ReasoningIncrementalParser()
     for (prev_subword, subword) in zip(extended, stream_string):
         msg = parser.parse(msg, prev_subword, subword)
     
@@ -219,7 +219,7 @@ def test_incremental_deepseek_parser():
 def test_custom_incremental_parser(hf_ov_genai_models):
     hf_tokenizer, genai_tokenizer = hf_ov_genai_models
 
-    class CustomParser(IncrementalParserBase):
+    class CustomParser(IncrementalParser):
         main_part_started: bool = False
 
         def parse(self, msg: dict, previous_text: str, delta_text: str, prev_tokens = None, delta_tokens = None) -> str:
@@ -279,7 +279,7 @@ def test_custom_parser(tmp_path, model_id):
     pipe = create_ov_pipeline(models_path)
     tok = pipe.get_tokenizer()
     
-    class CustomParser(ParserBase):
+    class CustomParser(Parser):
         def parse(self, msg: dict):
             content = None
             if 'content' in msg:

From 01d422452d5ccd4ce8b89668c574cf9e76d470f4 Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Wed, 22 Oct 2025 15:09:53 +0200
Subject: [PATCH 28/43] renaming leftovers

---
 src/cpp/include/openvino/genai/parsers.hpp    | 38 +++++------
 src/cpp/src/parsers.cpp                       | 68 +++++++++----------
 src/python/openvino_genai/__init__.py         |  7 +-
 src/python/openvino_genai/__init__.pyi        |  3 +-
 .../openvino_genai/py_openvino_genai.pyi      |  5 +-
 src/python/py_parsers.cpp                     | 24 ++++---
 tests/cpp/parser.cpp                          |  4 +-
 7 files changed, 81 insertions(+), 68 deletions(-)

diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp
index 7225669f5a..33657072f2 100644
--- a/src/cpp/include/openvino/genai/parsers.hpp
+++ b/src/cpp/include/openvino/genai/parsers.hpp
@@ -16,7 +16,7 @@ class OPENVINO_GENAI_EXPORTS IncrementalParser {
 
     // We return string which with filtered text to be added to content.
     virtual std::string parse(
-        JsonContainer& msg,
+        JsonContainer& message,
         const std::string& previous_text, 
         std::string& delta_text, 
         const std::optional<std::vector<int64_t>>& previous_tokens = std::nullopt, 
@@ -38,7 +38,7 @@ class OPENVINO_GENAI_EXPORTS ReasoningIncrementalParser : public IncrementalPars
     virtual ~ReasoningIncrementalParser();
 
     std::string parse(
-        JsonContainer& msg,
+        JsonContainer& message,
         const std::string& previous_text, 
         std::string& delta_text,
         const std::optional<std::vector<int64_t>>& previous_tokens = std::nullopt, 
@@ -56,19 +56,33 @@ class OPENVINO_GENAI_EXPORTS Phi4ReasoningIncrementalParser : public ReasoningIn
     explicit Phi4ReasoningIncrementalParser(bool expect_open_tag = true) : ReasoningIncrementalParser(expect_open_tag) {};
 };
 
-class Parser {
+class OPENVINO_GENAI_EXPORTS Parser {
 public:
     Parser() = default;
     virtual ~Parser();
     virtual void parse(JsonContainer& text) = 0;
 };
 
+class OPENVINO_GENAI_EXPORTS ReasoningParser : public Parser {
+public:
+    ReasoningParser(
+        bool expect_open_tag = true, 
+        bool keep_original_content = true, 
+        const std::string& open_tag = "<think>", 
+        const std::string& close_tag = "</think>");
+    void parse(JsonContainer& message) override;
+    ~ReasoningParser();
+private:
+    class ReasoningParserImpl;
+    std::unique_ptr<ReasoningParserImpl> m_impl;
+};
+
 class OPENVINO_GENAI_EXPORTS Llama3PythonicToolParser : public Parser {
 // Does not modify original content, only extracts and adds tool calls
 public:
     explicit Llama3PythonicToolParser(bool keep_original_content = true);
     ~Llama3PythonicToolParser();
-    void parse(JsonContainer& input) override;
+    void parse(JsonContainer& message) override;
 private:
     class Llama3PythonicToolParserImpl;
     std::unique_ptr<Llama3PythonicToolParserImpl> m_impl;
@@ -79,25 +93,11 @@ class OPENVINO_GENAI_EXPORTS Llama3JsonToolParser : public Parser {
 public:
     explicit Llama3JsonToolParser(bool keep_original_content = true);
     ~Llama3JsonToolParser();
-    void parse(JsonContainer& input) override;
+    void parse(JsonContainer& message) override;
 private:
     class Llama3JsonToolParserImpl;
     std::unique_ptr<Llama3JsonToolParserImpl> m_impl;
 };
 
-class OPENVINO_GENAI_EXPORTS BaseReasoningParser : public Parser {
-public:
-    BaseReasoningParser(
-        bool expect_open_tag = true, 
-        bool keep_original_content = true, 
-        const std::string& open_tag = "<think>", 
-        const std::string& close_tag = "</think>");
-    void parse(JsonContainer& input) override;
-    ~BaseReasoningParser();
-private:
-    class BaseReasoningParserImpl;
-    std::unique_ptr<BaseReasoningParserImpl> m_impl;
-};
-
 }  // namespace genai
 }  // namespace ov
diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp
index c362d9f73d..a43a328a2f 100644
--- a/src/cpp/src/parsers.cpp
+++ b/src/cpp/src/parsers.cpp
@@ -31,7 +31,7 @@ class ReasoningIncrementalParser::ReasoningParserImpl {
           m_close_tag(close_tag) {}
 
     std::string parse(
-        JsonContainer&  msg,
+        JsonContainer&  message,
         const std::string& previous_text, 
         std::string& delta_text,
         const std::optional<std::vector<int64_t>>& previous_tokens, 
@@ -45,17 +45,17 @@ class ReasoningIncrementalParser::ReasoningParserImpl {
         }
         m_first_run = false;
 
-        if (!msg.contains("reasoning_content")) {
-            msg["reasoning_content"] = "";
+        if (!message.contains("reasoning_content")) {
+            message["reasoning_content"] = "";
         }
-        if (!msg.contains("content")) {
-            msg["content"] = "";
+        if (!message.contains("content")) {
+            message["content"] = "";
         }
         
 
         auto txt_chunk = m_text_cache + delta_text;
-        auto reason_str = msg["reasoning_content"].get_string();
-        auto content_str = msg["content"].get_string();
+        auto reason_str = message["reasoning_content"].get_string();
+        auto content_str = message["content"].get_string();
 
         if (!m_think_tag_opened && txt_chunk.find(m_open_tag) != std::string::npos && m_expect_open_tag) {
             // Thinking has started
@@ -67,7 +67,7 @@ class ReasoningIncrementalParser::ReasoningParserImpl {
             }
             
             m_think_tag_opened = true;
-            msg["reasoning_content"] = reason_str;
+            message["reasoning_content"] = reason_str;
             m_text_cache = "";
 
             if (txt_chunk.find(m_close_tag) != std::string::npos) {
@@ -80,7 +80,7 @@ class ReasoningIncrementalParser::ReasoningParserImpl {
                 }
                 m_think_tag_opened = false;
                 m_deactivated = true;
-                msg["reasoning_content"] = reason_str;
+                message["reasoning_content"] = reason_str;
             }
         } else if (m_think_tag_opened && txt_chunk.find(m_close_tag) != std::string::npos) {
             // Thinking tag was closed
@@ -95,7 +95,7 @@ class ReasoningIncrementalParser::ReasoningParserImpl {
                 delta_text = txt_chunk.substr(close_idx + m_close_tag.size(), txt_chunk.size() - (close_idx + m_close_tag.size()));
             }
 
-            msg["reasoning_content"] = reason_str;
+            message["reasoning_content"] = reason_str;
             m_text_cache = "";
             m_think_tag_opened = false;
             m_deactivated = true;
@@ -137,7 +137,7 @@ class ReasoningIncrementalParser::ReasoningParserImpl {
             if (!m_keep_original_content) {
                 delta_text = "";
             }
-            msg["reasoning_content"] = reason_str;
+            message["reasoning_content"] = reason_str;
         } else {
             // Think tag was not opened yet and not found in the current delta_text.
             // Accumulate text in the cache to detect if <think> is split between several delta_text pieces.
@@ -155,13 +155,13 @@ ReasoningIncrementalParser::ReasoningIncrementalParser(bool expect_open_tag, boo
 ReasoningIncrementalParser::~ReasoningIncrementalParser() = default;
 
 std::string ReasoningIncrementalParser::parse(
-    JsonContainer& msg,
+    JsonContainer& message,
     const std::string& previous_text, 
     std::string& delta_text,
     const std::optional<std::vector<int64_t>>& previous_tokens, 
     const std::optional<std::vector<int64_t>>& delta_tokens
 ) {
-    return m_impl->parse(msg, previous_text, delta_text, previous_tokens, delta_tokens);
+    return m_impl->parse(message, previous_text, delta_text, previous_tokens, delta_tokens);
 }
 
 class Llama3PythonicToolParser::Llama3PythonicToolParserImpl {
@@ -169,13 +169,13 @@ class Llama3PythonicToolParser::Llama3PythonicToolParserImpl {
     Llama3PythonicToolParserImpl(bool keep_original_content) : m_keep_original_content(keep_original_content) {}
     bool m_keep_original_content;
 
-    void parse(JsonContainer& input) {
+    void parse(JsonContainer& message) {
         // Input example
-        // string input = "[get_weather(location='New York, NY', unit='celsius')]<|eom_id|>";
+        // string message = "[get_weather(location='New York, NY', unit='celsius')]<|eom_id|>";
 
         // Regex to capture the [...] part
         std::smatch m;
-        const std::string& text = input["content"].get_string();
+        const std::string& text = message["content"].get_string();
         std::regex r(R"(\[.*?\])");
         if (!std::regex_search(text, m, r)) {
             return;
@@ -197,11 +197,11 @@ class Llama3PythonicToolParser::Llama3PythonicToolParserImpl {
         }
         
         // Split function name and arguments
-        input["tool_calls"] = JsonContainer::array();
-        input["tool_calls"].push_back(JsonContainer({{"name", name}, {"arguments", kv}}));
+        message["tool_calls"] = JsonContainer::array();
+        message["tool_calls"].push_back(JsonContainer({{"name", name}, {"arguments", kv}}));
         
         if (!m_keep_original_content) {
-            input["content"] = regex_replace(text, r, "");
+            message["content"] = regex_replace(text, r, "");
         }
     }
 };
@@ -210,8 +210,8 @@ Llama3PythonicToolParser::Llama3PythonicToolParser(bool keep_original_content) {
     m_impl = std::make_unique<Llama3PythonicToolParserImpl>(keep_original_content);
 }
 
-void Llama3PythonicToolParser::parse(JsonContainer& input) {
-    m_impl->parse(input);
+void Llama3PythonicToolParser::parse(JsonContainer& message) {
+    m_impl->parse(message);
 }
 
 Llama3PythonicToolParser::~Llama3PythonicToolParser() = default;
@@ -245,15 +245,15 @@ Llama3JsonToolParser::Llama3JsonToolParser(bool keep_original_content) {
     m_impl = std::make_unique<Llama3JsonToolParserImpl>(keep_original_content);
 }
 
-void Llama3JsonToolParser::parse(JsonContainer& input) {
-    m_impl->parse(input);
+void Llama3JsonToolParser::parse(JsonContainer& message) {
+    m_impl->parse(message);
 }
 
 Llama3JsonToolParser::~Llama3JsonToolParser() = default;
 
-class BaseReasoningParser::BaseReasoningParserImpl {
+class ReasoningParser::ReasoningParserImpl {
 public:
-    BaseReasoningParserImpl(bool expect_open_tag,
+    ReasoningParserImpl(bool expect_open_tag,
                             bool keep_original_content,
                             const std::string& open_tag,
                             const std::string& close_tag):
@@ -262,9 +262,9 @@ class BaseReasoningParser::BaseReasoningParserImpl {
     m_open_tag(open_tag),
     m_close_tag(close_tag) {};
 
-    void parse(JsonContainer& input) {
+    void parse(JsonContainer& message) {
         std::string reasoning_content;
-        std::string content = input["content"].get_string();
+        std::string content = message["content"].get_string();
 
         size_t start = content.find(m_open_tag);
         size_t end = content.find(m_close_tag);
@@ -273,13 +273,13 @@ class BaseReasoningParser::BaseReasoningParserImpl {
             reasoning_content = content.substr(start + m_open_tag.size(), end - (start + m_open_tag.size()));
             if (!m_keep_original_content) {
                 // Remove <think>...</think/> from content
-                input["content"] = content.substr(0, start) + content.substr(end + m_close_tag.size());
+                message["content"] = content.substr(0, start) + content.substr(end + m_close_tag.size());
             }
         } else {
             reasoning_content = "";
         }
 
-        input["reasoning_content"] = reasoning_content;
+        message["reasoning_content"] = reasoning_content;
     }
 private:
     bool m_expect_open_tag;
@@ -288,15 +288,15 @@ class BaseReasoningParser::BaseReasoningParserImpl {
     std::string m_close_tag;
 };
 
-BaseReasoningParser::BaseReasoningParser(bool expect_open_tag, bool keep_original_content, const std::string& open_tag, const std::string& close_tag) {
-    m_impl = std::make_unique<BaseReasoningParserImpl>(expect_open_tag, keep_original_content, open_tag, close_tag);
+ReasoningParser::ReasoningParser(bool expect_open_tag, bool keep_original_content, const std::string& open_tag, const std::string& close_tag) {
+    m_impl = std::make_unique<ReasoningParserImpl>(expect_open_tag, keep_original_content, open_tag, close_tag);
 }
 
-void BaseReasoningParser::parse(JsonContainer& input) {
-    m_impl->parse(input);
+void ReasoningParser::parse(JsonContainer& message) {
+    m_impl->parse(message);
 }
 
-BaseReasoningParser::~BaseReasoningParser() = default;
+ReasoningParser::~ReasoningParser() = default;
 
 Parser::~Parser() = default;
 
diff --git a/src/python/openvino_genai/__init__.py b/src/python/openvino_genai/__init__.py
index a0c06baec6..5673b941cb 100644
--- a/src/python/openvino_genai/__init__.py
+++ b/src/python/openvino_genai/__init__.py
@@ -24,12 +24,13 @@
 
 from .py_openvino_genai import (
     Parser,
+    ReasoningParser,
+    Llama3JsonToolParser,
+    Llama3PythonicToolParser,
     IncrementalParser,
+    ReasoningIncrementalParser,
     Phi4ReasoningIncrementalParser,
     DeepSeekR1ReasoningIncrementalParser,
-    Llama3JsonToolParser,
-    Llama3PythonicToolParser,
-    ReasoningIncrementalParser
 )
 
 __version__ = get_version()
diff --git a/src/python/openvino_genai/__init__.pyi b/src/python/openvino_genai/__init__.pyi
index 04bd694c57..f92c55f4fd 100644
--- a/src/python/openvino_genai/__init__.pyi
+++ b/src/python/openvino_genai/__init__.pyi
@@ -39,6 +39,7 @@ from openvino_genai.py_openvino_genai import Phi4ReasoningIncrementalParser
 from openvino_genai.py_openvino_genai import RawImageGenerationPerfMetrics
 from openvino_genai.py_openvino_genai import RawPerfMetrics
 from openvino_genai.py_openvino_genai import ReasoningIncrementalParser
+from openvino_genai.py_openvino_genai import ReasoningParser
 from openvino_genai.py_openvino_genai import SD3Transformer2DModel
 from openvino_genai.py_openvino_genai import Scheduler
 from openvino_genai.py_openvino_genai import SchedulerConfig
@@ -73,5 +74,5 @@ from openvino_genai.py_openvino_genai import draft_model
 from openvino_genai.py_openvino_genai import get_version
 import os as os
 from . import py_openvino_genai
-__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningIncrementalParser', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParser', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama3JsonToolParser', 'Llama3PythonicToolParser', 'Parser', 'PerfMetrics', 'Phi4ReasoningIncrementalParser', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'ReasoningIncrementalParser', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai']
+__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningIncrementalParser', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParser', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama3JsonToolParser', 'Llama3PythonicToolParser', 'Parser', 'PerfMetrics', 'Phi4ReasoningIncrementalParser', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'ReasoningIncrementalParser', 'ReasoningParser', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai']
 __version__: str
diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi
index 549da2bf3a..25bd1a6b15 100644
--- a/src/python/openvino_genai/py_openvino_genai.pyi
+++ b/src/python/openvino_genai/py_openvino_genai.pyi
@@ -5,7 +5,7 @@ from __future__ import annotations
 import collections.abc
 import openvino._pyopenvino
 import typing
-__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningIncrementalParser', 'EncodedGenerationResult', 'EncodedResults', 'ExtendedPerfMetrics', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParser', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama3JsonToolParser', 'Llama3PythonicToolParser', 'MeanStdPair', 'Parser', 'PerfMetrics', 'Phi4ReasoningIncrementalParser', 'PipelineMetrics', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'ReasoningIncrementalParser', 'SD3Transformer2DModel', 'SDPerModelsPerfMetrics', 'SDPerfMetrics', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'SummaryStats', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version']
+__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningIncrementalParser', 'EncodedGenerationResult', 'EncodedResults', 'ExtendedPerfMetrics', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParser', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama3JsonToolParser', 'Llama3PythonicToolParser', 'MeanStdPair', 'Parser', 'PerfMetrics', 'Phi4ReasoningIncrementalParser', 'PipelineMetrics', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'ReasoningIncrementalParser', 'ReasoningParser', 'SD3Transformer2DModel', 'SDPerModelsPerfMetrics', 'SDPerfMetrics', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'SummaryStats', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version']
 class Adapter:
     """
     Immutable LoRA Adapter that carries the adaptation matrices and serves as unique adapter identifier.
@@ -2085,6 +2085,9 @@ class RawPerfMetrics:
 class ReasoningIncrementalParser(IncrementalParser):
     def __init__(self, expect_open_tag: bool = True, keep_original_content: bool = True, open_tag: str = '<think>', close_tag: str = '</think>') -> None:
         ...
+class ReasoningParser(Parser):
+    def __init__(self, expect_open_tag: bool = True, keep_original_content: bool = True, open_tag: str = '<think>', close_tag: str = '</think>') -> None:
+        ...
 class SD3Transformer2DModel:
     """
     SD3Transformer2DModel class.
diff --git a/src/python/py_parsers.cpp b/src/python/py_parsers.cpp
index e6b3437f87..d55e3d29ee 100644
--- a/src/python/py_parsers.cpp
+++ b/src/python/py_parsers.cpp
@@ -15,6 +15,7 @@ namespace py = pybind11;
 
 using ov::genai::IncrementalParser;
 using ov::genai::Parser;
+using ov::genai::ReasoningParser;
 using ov::genai::ReasoningIncrementalParser;
 using ov::genai::Phi4ReasoningIncrementalParser;
 using ov::genai::DeepSeekR1ReasoningIncrementalParser;
@@ -106,12 +107,12 @@ void init_parsers(py::module_& m) {
     py::class_<IncrementalParser, ConstructableIncrementalParser, std::shared_ptr<IncrementalParser>>(m, "IncrementalParser")
         .def(py::init<>())
         .def("parse", [](IncrementalParser& self,
-                         py::dict& msg,
+                         py::dict& message,
                          std::string& previous_text,
                          std::string& delta_text,
                          const std::optional<std::vector<int64_t>>& previous_tokens = std::nullopt,
                          const std::optional<std::vector<int64_t>>& delta_tokens = std::nullopt) {
-            auto msg_cpp = pyutils::py_object_to_json_container(msg);
+            auto msg_cpp = pyutils::py_object_to_json_container(message);
             auto res = self.parse(msg_cpp, previous_text, delta_text, previous_tokens, delta_tokens);
             auto json_str = msg_cpp.to_json_string();
             
@@ -121,10 +122,10 @@ void init_parsers(py::module_& m) {
             py::dict result = json_mod.attr("loads")(json_str);
             // update msg with result
             for (auto item : result) {
-                msg[item.first] = item.second;
+                message[item.first] = item.second;
             }
             return res;
-        }, py::arg("msg"), py::arg("previous_text"), py::arg("delta_text"),
+        }, py::arg("message"), py::arg("previous_text"), py::arg("delta_text"),
            py::arg("previous_tokens") = std::nullopt, py::arg("delta_tokens") = std::nullopt,
            "Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output.");
     
@@ -144,8 +145,8 @@ void init_parsers(py::module_& m) {
     py::class_<Parser, ConstructableParser, std::shared_ptr<Parser>>(m, "Parser")
         .def(py::init<>())
         .def("parse",
-            [](Parser& self, py::dict& msg) {
-                auto msg_cpp = pyutils::py_object_to_json_container(msg);
+            [](Parser& self, py::dict& message) {
+                auto msg_cpp = pyutils::py_object_to_json_container(message);
                 self.parse(msg_cpp);
 
                 // TODO: msg = pyutils::json_container_to_py_object(msg_cpp) does not work properly here,
@@ -157,12 +158,19 @@ void init_parsers(py::module_& m) {
                 
                 // update msg with result
                 for (auto item : result) {
-                    msg[item.first] = item.second;
+                    message[item.first] = item.second;
                 }
             },
-            py::arg("text"),
+            py::arg("message"),
             "Parse is called with the full text. Returns a dict with parsed content.");
 
+    py::class_<ReasoningParser, std::shared_ptr<ReasoningParser>, Parser>(m, "ReasoningParser")
+        .def(py::init<bool, bool, const std::string&, const std::string&>(),
+                py::arg("expect_open_tag") = true,
+                py::arg("keep_original_content") = true,
+                py::arg("open_tag") = "<think>",
+                py::arg("close_tag") = "</think>");
+
     py::class_<Llama3JsonToolParser, std::shared_ptr<Llama3JsonToolParser>, Parser>(m, "Llama3JsonToolParser")
         .def(py::init<>());
 
diff --git a/tests/cpp/parser.cpp b/tests/cpp/parser.cpp
index 7660d1625b..7aa4caf6b8 100644
--- a/tests/cpp/parser.cpp
+++ b/tests/cpp/parser.cpp
@@ -62,7 +62,7 @@ TEST(ParserTest, test_reasoning_parser_1) {
     expected["content"] = std::string(R"("<｜begin▁of▁sentence｜><｜begin▁of▁sentence｜><｜User｜>What is 2 + 1?<｜Assistant｜>\n\n**Solution:**\n\nTo find the sum of 2 and 1, )");
     expected["reasoning_content"] = std::string(R"(\nI need to determine the sum of 2 and 1.\n\nFirst, I'll identify the two numbers involved in the addition: 2 and 1.\n\nNext, I'll perform the addition by combining these two numbers.\n\nFinally, I'll state the result of the addition, which is 3.\n)");
 
-    std::shared_ptr<BaseReasoningParser> parser = std::make_shared<BaseReasoningParser>(
+    std::shared_ptr<ReasoningParser> parser = std::make_shared<ReasoningParser>(
         /*expect_open_tag*/ true,
         /*keep_original_content*/ false
     );
@@ -80,7 +80,7 @@ TEST(ParserTest, test_reasoning_parser_2) {
     expected["content"] = prompt;
     expected["reasoning_content"] = std::string(R"(\nI need to determine the sum of 2 and 1.\n\nFirst, I'll identify the two numbers involved in the addition: 2 and 1.\n\nNext, I'll perform the addition by combining these two numbers.\n\nFinally, I'll state the result of the addition, which is 3.\n)");
 
-    std::shared_ptr<BaseReasoningParser> parser = std::make_shared<BaseReasoningParser>(
+    std::shared_ptr<ReasoningParser> parser = std::make_shared<ReasoningParser>(
         /*expect_open_tag*/ true,
         /*keep_original_content*/ true
     );

From fec89453572730332188893f8b8a34b834501fd4 Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Wed, 22 Oct 2025 16:55:57 +0200
Subject: [PATCH 29/43] some corrections

---
 src/cpp/src/llm/pipeline.cpp                  | 72 ++++++++++---------
 src/cpp/src/parsers.cpp                       |  4 +-
 src/cpp/src/text_streamer.cpp                 |  2 +-
 .../openvino_genai/py_openvino_genai.pyi      | 23 +++---
 src/python/py_parsers.cpp                     | 43 +++--------
 src/python/py_streamers.cpp                   | 52 ++++++--------
 tests/python_tests/test_text_streamer.py      |  2 +-
 7 files changed, 88 insertions(+), 110 deletions(-)

diff --git a/src/cpp/src/llm/pipeline.cpp b/src/cpp/src/llm/pipeline.cpp
index 513d047bf2..f6f206a6ab 100644
--- a/src/cpp/src/llm/pipeline.cpp
+++ b/src/cpp/src/llm/pipeline.cpp
@@ -15,6 +15,41 @@
 #include "speculative_decoding/speculative_decoding_stateful.hpp"
 #include "utils.hpp"
 
+namespace {
+
+void run_parsers(ov::genai::DecodedResults& res, const ov::genai::OptionalGenerationConfig& generation_config, const ov::genai::StreamerVariant& streamer) {
+    // If streamer is of StreamerBase type, and it is TextParserStreamer, get parsed message
+    // Streaming is available only for batch size 1 therefore only parsed[0]
+    if (auto streamer_obj = std::get_if<std::shared_ptr<ov::genai::StreamerBase>>(&streamer)) {
+        if (auto parser_streamer = std::dynamic_pointer_cast<ov::genai::TextParserStreamer>(*streamer_obj)) {
+            res.parsed.resize(1);
+            res.parsed[0] = parser_streamer->get_parsed_message();
+        }
+    }
+
+    if (!generation_config.has_value() || generation_config->parsers.empty()) {
+        return;
+    }
+    
+    std::vector<std::shared_ptr<ov::genai::Parser>> parsers = generation_config->parsers;
+    res.parsed.resize(res.texts.size());
+    // Apply Base parsers sequentially even if IncrementalParser has run.
+    for (size_t i = 0; i < res.texts.size(); ++i) {
+        auto& msg = res.parsed[i];
+        if (!msg.contains("content")) {
+            // Initialize msg with content
+            msg["content"] = res.texts[i];
+        }
+        for (auto& parser: parsers) {
+            // TODO: Check the state of incremental parser and reset if necessary
+            parser->parse(msg);
+        }
+        res.parsed[i] = msg;
+    }
+}
+
+}
+
 namespace ov {
 
 namespace genai {
@@ -251,36 +286,7 @@ DecodedResults LLMPipeline::generate(
         OptionalGenerationConfig generation_config,
         StreamerVariant streamer) {
     auto res = m_pimpl->generate(inputs, generation_config, streamer);
-    
-    // If streamer is of StreamerBase type, and it is TextParserStreamer, get parsed message
-    // Streaming is available only for batch size 1 therefore only parsed[0]
-    if (auto streamer_obj = std::get_if<std::shared_ptr<StreamerBase>>(&streamer)) {
-        if (auto parser_streamer = std::dynamic_pointer_cast<TextParserStreamer>(*streamer_obj)) {
-            res.parsed.resize(1);
-            res.parsed[0] = parser_streamer->get_parsed_message();
-        }
-    }
-
-    if (!generation_config.has_value() || generation_config->parsers.empty()) {
-        return res;
-    }
-    
-    std::vector<std::shared_ptr<Parser>> parsers = (*generation_config).parsers;
-    res.parsed.resize(res.texts.size());
-    // Apply Base parsers sequentially even if IncrementalParser has run.
-    for (size_t i = 0; i < res.texts.size(); ++i) {
-        auto& msg = res.parsed[i];
-        if (!msg.contains("content")) {
-            // Initialize msg with content
-            msg["content"] = res.texts[i];
-        }
-        for (auto& parser: parsers) {
-            // TODO: Check the state of incremental parser and reset if necessary
-            parser->parse(msg);
-        }
-        res.parsed[i] = msg;
-    }
-
+    run_parsers(res, generation_config, streamer);
     return res;
 }
 
@@ -288,8 +294,10 @@ DecodedResults LLMPipeline::generate(StringInputs text, const ov::AnyMap& config
     auto config_arg = utils::get_config_from_map(config_map);
     GenerationConfig config = (config_arg.has_value()) ? *config_arg : get_generation_config();
     config.update_generation_config(config_map);
-
-    return m_pimpl->generate(text, config, utils::get_streamer_from_map(config_map));
+    auto streamer = utils::get_streamer_from_map(config_map);
+    auto res = m_pimpl->generate(text, config, streamer);
+    run_parsers(res, config_arg, streamer);
+    return res;
 }
 
 EncodedResults LLMPipeline::generate(
diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp
index a43a328a2f..2ed107260e 100644
--- a/src/cpp/src/parsers.cpp
+++ b/src/cpp/src/parsers.cpp
@@ -17,8 +17,8 @@ class ReasoningIncrementalParser::ReasoningParserImpl {
     std::string m_open_tag;
     std::string m_close_tag;
     std::string m_text_cache = "";
-public:
     bool m_deactivated = false;
+public:
     ReasoningParserImpl() = default;
     
     ReasoningParserImpl(bool expect_open_tag,
@@ -189,7 +189,7 @@ class Llama3PythonicToolParser::Llama3PythonicToolParserImpl {
         std::string args = call.substr(pos + 1, call.size() - pos - 2); // inside (...)
         
         JsonContainer kv;
-        // Parse arguments of the form key='value'
+        // Parse arguments of the form key="value"
         std::regex arg_re(R"((\w+)\s*=\s*\"([^"]*)\")");
         auto it = std::sregex_iterator(args.begin(), args.end(), arg_re);
         for (; it != std::sregex_iterator(); ++it) {
diff --git a/src/cpp/src/text_streamer.cpp b/src/cpp/src/text_streamer.cpp
index ee48cff1f9..9df53aab71 100644
--- a/src/cpp/src/text_streamer.cpp
+++ b/src/cpp/src/text_streamer.cpp
@@ -136,7 +136,7 @@ CallbackTypeVariant TextParserStreamer::write(std::string message) {
         m_parsed_message["content"] = m_parsed_message["content"].get_string() + message;
     }
 
-    m_text_buffer = message;
+    m_text_buffer += message;
     return write(m_parsed_message);
 }
 
diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi
index 25bd1a6b15..273f2d778b 100644
--- a/src/python/openvino_genai/py_openvino_genai.pyi
+++ b/src/python/openvino_genai/py_openvino_genai.pyi
@@ -1462,7 +1462,7 @@ class ImageGenerationPerfMetrics:
 class IncrementalParser:
     def __init__(self) -> None:
         ...
-    def parse(self, msg: dict, previous_text: str, delta_text: str, previous_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None, delta_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None) -> str:
+    def parse(self, message: dict, previous_text: str, delta_text: str, previous_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None, delta_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None) -> str:
         """
         Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output.
         """
@@ -1842,7 +1842,7 @@ class MeanStdPair:
 class Parser:
     def __init__(self) -> None:
         ...
-    def parse(self, text: dict) -> None:
+    def parse(self, message: dict) -> None:
         """
         Parse is called with the full text. Returns a dict with parsed content.
         """
@@ -3390,19 +3390,22 @@ class TextEmbeddingPipeline:
         Waits computed embeddings for a query
         """
 class TextParserStreamer(TextStreamer):
+    """
+    
+    Base class for text streamers which works with parsed messages. In order to use inherit from this class and implement write method which takes a dict as input parameter.
+    
+    tokenizer: Tokenizer object to decode tokens into text.
+    parsers: vector of IncrementalParser to process the text stream incrementally.
+    """
     def __init__(self, tokenizer: Tokenizer, parsers: collections.abc.Sequence[IncrementalParser] = []) -> None:
-        """
-        TextParserStreamer is used to decode tokens into text, parse the text and call user-defined incremental parsers.
-        """
-    def _write(self, arg0: collections.abc.Sequence[typing.SupportsInt] | str) -> StreamingStatus:
         ...
-    def get_parsed_message(self) -> dict:
+    def _write(self, chunk: collections.abc.Sequence[typing.SupportsInt] | str) -> StreamingStatus:
         """
-        Get the current parsed message
+        This is a private method is used to call write with integer tokens or text chunks. Is used for text purposes only.
         """
-    def write(self, message: dict) -> StreamingStatus:
+    def get_parsed_message(self) -> dict:
         """
-        Write is called with a dict. Returns StreamingStatus.
+        Returns the accumulated message.
         """
 class TextRerankPipeline:
     """
diff --git a/src/python/py_parsers.cpp b/src/python/py_parsers.cpp
index d55e3d29ee..30faa18388 100644
--- a/src/python/py_parsers.cpp
+++ b/src/python/py_parsers.cpp
@@ -46,28 +46,12 @@ class ConstructableIncrementalParser: public IncrementalParser {
 
         py::function parse_method = py::get_override(static_cast<const IncrementalParser*>(this), "parse");
         if (!parse_method) {
-            throw std::runtime_error("parse method not implemented in Python subclass");
+            OPENVINO_THROW("parse method not implemented in Python subclass");
         }
+
+        auto res = parse_method(py_msg, previous_text, delta_text, previous_tokens, delta_tokens);
+        msg = pyutils::py_object_to_json_container(py_msg);
         
-        auto res = parse_method(
-            py_msg,
-            previous_text,
-            delta_text,
-            previous_tokens,
-            delta_tokens
-        );
-        
-        // iterate throught py_msg and update msg
-        auto msg_anymap = pyutils::py_object_to_any_map(py_msg);
-        for (const auto& [key, value] : msg_anymap) {
-            if (value.is<std::string>()) {
-                msg[key] = value.as<std::string>();
-            } else if (value.is<ov::AnyMap>()) {
-                msg[key] = JsonContainer(value.as<ov::AnyMap>());
-            } else {
-                OPENVINO_THROW("Unsupported type in JsonContainer update from Python dict");
-            }
-        }
         return res.cast<std::string>();
     }
 };
@@ -79,24 +63,13 @@ class ConstructableParser: public Parser {
         
         py::function parse_method = py::get_override(static_cast<const Parser*>(this), "parse");
         if (!parse_method) {
-            throw std::runtime_error("parse method not implemented in Python subclass");
+            OPENVINO_THROW("parse method not implemented in Python subclass");
         }
         
         // Convert JsonContainer to py::dict
-       py::dict py_msg = pyutils::json_container_to_py_object(msg);
-       parse_method(py_msg);
-
-       // iterate throught py_msg and update msg
-       auto msg_anymap = pyutils::py_object_to_any_map(py_msg);
-       for (const auto& [key, value] : msg_anymap) {
-           if (value.is<std::string>()) {
-               msg[key] = value.as<std::string>();
-           } else if (value.is<ov::AnyMap>()) {
-               msg[key] = JsonContainer(value.as<ov::AnyMap>());
-           } else {
-               OPENVINO_THROW("Unsupported type in JsonContainer update from Python dict");
-           }
-       }
+        py::dict py_msg = pyutils::json_container_to_py_object(msg);
+        parse_method(py_msg);
+        msg = pyutils::py_object_to_json_container(py_msg);
     }
 };
 
diff --git a/src/python/py_streamers.cpp b/src/python/py_streamers.cpp
index 94fb52355d..ba596ead92 100644
--- a/src/python/py_streamers.cpp
+++ b/src/python/py_streamers.cpp
@@ -39,6 +39,13 @@ callback: User-defined callback function to process the decoded text, callback s
 detokenization_params: AnyMap with detokenization parameters, e.g. ov::genai::skip_special_tokens(...)
 )";
 
+auto text_parser_streamer_docstring = R"(
+Base class for text streamers which works with parsed messages. In order to use inherit from this class and implement write method which takes a dict as input parameter.
+
+tokenizer: Tokenizer object to decode tokens into text.
+parsers: vector of IncrementalParser to process the text stream incrementally.
+)";
+
 class ConstructableStreamer: public StreamerBase {
     OPENVINO_SUPPRESS_DEPRECATED_START
     bool put(int64_t token) override {
@@ -88,15 +95,6 @@ class ConstructableTextParserStreamer: public TextParserStreamer {
         
         return res.cast<StreamingStatus>();
     }
-
-    StreamingStatus write(py::dict& message) {
-        PYBIND11_OVERRIDE_PURE(
-            StreamingStatus,
-            TextParserStreamer,
-            "write",
-            message
-        );
-    }
 };
 
 } // namespace
@@ -145,38 +143,34 @@ void init_streamers(py::module_& m) {
             py::arg("token"))
             .def("end", &TextStreamer::end);
         
-    // TODO: double check/add more relevant docstrings for TextParserStreamer.
-    py::class_<TextParserStreamer, ConstructableTextParserStreamer, std::shared_ptr<TextParserStreamer>, TextStreamer>(m, "TextParserStreamer")
+    py::class_<TextParserStreamer, ConstructableTextParserStreamer, std::shared_ptr<TextParserStreamer>, TextStreamer>(m, "TextParserStreamer", text_parser_streamer_docstring)
         .def(py::init([](const Tokenizer& tokenizer,
                          std::vector<std::shared_ptr<IncrementalParser>> parsers) {
                 return std::make_shared<ConstructableTextParserStreamer>(tokenizer, parsers);
             }),
             py::arg("tokenizer"),
             py::arg("parsers") = std::vector<std::shared_ptr<IncrementalParser>>(),
-            py::keep_alive<1, 3>(),
-            "TextParserStreamer is used to decode tokens into text, parse the text and call user-defined incremental parsers.")
-        .def("write",
-            [](TextParserStreamer& self, py::dict& message) {
-                // Downcast to ConstructableTextParserStreamer if needed
-                auto* derived = dynamic_cast<ConstructableTextParserStreamer*>(&self);
-                if (!derived) {
-                    throw std::runtime_error("write(py::dict&) only available for ConstructableTextParserStreamer");
-                }
-                return derived->write(message);
-            },
-            py::arg("message"),
-            "Write is called with a dict. Returns StreamingStatus.")
-        .def("_write", [](TextParserStreamer& self, std::variant<std::vector<int64_t>, std::string> chunk) -> StreamingStatus {
+            py::keep_alive<1, 3>())
+        
+        // If we inherit and implement 'write' in Python and try to call write with text chunks or integer tokens 
+        // then Python implementation will be called since python does not have overloads.
+        // But for texts we need to check that when we call write with strings/integer tokens they are accumulated and stored correctly in py::dict.
+        // Therefore we provide a private method '_write' which is used to call 'write' with correct parameters from C++ side.
+        .def("_write", 
+            [](TextParserStreamer& self, std::variant<std::vector<int64_t>, std::string> chunk) -> StreamingStatus {
                 if (auto _token = std::get_if<std::vector<int64_t>>(&chunk)) {
                     return self.write(*_token);
                 } else if (auto _str =  std::get_if<std::string>(&chunk)) {
                     auto res = self.write(*_str);
                     return std::get<StreamingStatus>(res);
                 }
-        })
-        .def("get_parsed_message", 
+                return StreamingStatus::RUNNING;
+            },
+            py::arg("chunk"), "This is a private method is used to call write with integer tokens or text chunks. Is used for text purposes only.")
+        
+        .def("get_parsed_message",
             [](TextParserStreamer& self) -> py::dict{
                 return pyutils::json_container_to_py_object(self.get_parsed_message());
-                
-            }, "Get the current parsed message");
+
+            }, "Returns the accumulated message.");
 }
diff --git a/tests/python_tests/test_text_streamer.py b/tests/python_tests/test_text_streamer.py
index a3ea55d225..75804256b1 100644
--- a/tests/python_tests/test_text_streamer.py
+++ b/tests/python_tests/test_text_streamer.py
@@ -71,7 +71,7 @@ def test_text_prompts(tmp_path, prompt, model_id):
     for token in tokens:
         streamer.write(token)
     streamer.end()
-    
+
     assert ''.join(accumulated) == ov_tokenizer.decode(tokens)
 
     for chunk_size in [1,2,3,4,5]:

From 9fa7d01d1fcb1cf2097f014c90471d13d486c68d Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Wed, 22 Oct 2025 17:25:17 +0200
Subject: [PATCH 30/43] hide TextParsedStreamerImplementation

---
 .../include/openvino/genai/text_streamer.hpp  | 13 ++++----
 src/cpp/src/text_streamer.cpp                 | 33 ++++++++++++++-----
 2 files changed, 32 insertions(+), 14 deletions(-)

diff --git a/src/cpp/include/openvino/genai/text_streamer.hpp b/src/cpp/include/openvino/genai/text_streamer.hpp
index fabea0f524..53e06b7a93 100644
--- a/src/cpp/include/openvino/genai/text_streamer.hpp
+++ b/src/cpp/include/openvino/genai/text_streamer.hpp
@@ -49,18 +49,19 @@ class OPENVINO_GENAI_EXPORTS TextStreamer : public StreamerBase {
 
 class OPENVINO_GENAI_EXPORTS TextParserStreamer : public TextStreamer {
 public:
+    class TextParserStreamerImpl;
     using TextStreamer::write;
+    
     TextParserStreamer(const Tokenizer& tokenizer, std::vector<std::shared_ptr<IncrementalParser>> parsers = {});
-
+    ~TextParserStreamer();
+    
     virtual StreamingStatus write(JsonContainer& message) = 0;
 
     CallbackTypeVariant write(std::string message);
-    
-    JsonContainer get_parsed_message() const { return m_parsed_message; }
+
+    JsonContainer get_parsed_message() const;
 private:
-    JsonContainer m_parsed_message;
-    std::string m_text_buffer;
-    std::vector<std::shared_ptr<IncrementalParser>> m_parsers;
+    std::unique_ptr<TextParserStreamerImpl> m_pimpl;
 };
 
 }  // namespace genai
diff --git a/src/cpp/src/text_streamer.cpp b/src/cpp/src/text_streamer.cpp
index 9df53aab71..9ad9bd9543 100644
--- a/src/cpp/src/text_streamer.cpp
+++ b/src/cpp/src/text_streamer.cpp
@@ -124,21 +124,38 @@ void TextStreamer::end() {
 
 StreamerBase::~StreamerBase() = default;
 
-TextParserStreamer::TextParserStreamer(const Tokenizer& tokenizer, std::vector<std::shared_ptr<IncrementalParser>> parsers) 
-    : TextStreamer(tokenizer, [this](std::string s) -> CallbackTypeVariant {
-                return this->write(s);
-    }), m_parsers{parsers} {}
+class TextParserStreamer::TextParserStreamerImpl {
+public:
 
-CallbackTypeVariant TextParserStreamer::write(std::string message) {
+std::vector<std::shared_ptr<IncrementalParser>> m_parsers;
+JsonContainer m_parsed_message;
+
+TextParserStreamerImpl(std::vector<std::shared_ptr<IncrementalParser>> parsers) : m_parsers{parsers} {}
+
+void parse(std::string message) {
     for (auto& parser: m_parsers) {
-        message = parser->parse(m_parsed_message, m_text_buffer, message);
+        message = parser->parse(m_parsed_message, message);
         // Message can be modified inside parser, if parser for example extracted tool calling from message content
         m_parsed_message["content"] = m_parsed_message["content"].get_string() + message;
     }
+}
+};
 
-    m_text_buffer += message;
-    return write(m_parsed_message);
+TextParserStreamer::TextParserStreamer(const Tokenizer& tokenizer, std::vector<std::shared_ptr<IncrementalParser>> parsers) 
+    : TextStreamer(tokenizer, [this](std::string s) -> CallbackTypeVariant {
+                return this->write(s);
+    }), m_pimpl{std::make_unique<TextParserStreamerImpl>(parsers)} {}
+
+CallbackTypeVariant TextParserStreamer::write(std::string message) {
+    m_pimpl->parse(message);
+    return write(m_pimpl->m_parsed_message);
+}
+
+JsonContainer TextParserStreamer::get_parsed_message() const {
+    return m_pimpl->m_parsed_message;
 }
 
+TextParserStreamer::~TextParserStreamer() = default;
+
 }  // namespace genai
 }  // namespace ov

From a2307a2a87d0734c1e3d97f2b87b8500c828e342 Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Wed, 22 Oct 2025 18:16:07 +0200
Subject: [PATCH 31/43] remove redundant previous_text, previous_tokens, add
 docstrings

---
 src/cpp/include/openvino/genai/parsers.hpp | 215 ++++++++++++++++-----
 src/cpp/src/parsers.cpp                    |  29 +--
 src/python/py_parsers.cpp                  |  15 +-
 tests/cpp/parser.cpp                       |  27 +--
 tests/python_tests/test_parsers.py         |  13 +-
 5 files changed, 179 insertions(+), 120 deletions(-)

diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp
index 33657072f2..f952268d97 100644
--- a/src/cpp/include/openvino/genai/parsers.hpp
+++ b/src/cpp/include/openvino/genai/parsers.hpp
@@ -10,93 +10,202 @@
 namespace ov {
 namespace genai {
 
+/**
+ * @brief Abstract base class for parsers that process complete text content at the end of generation.
+ */
+class OPENVINO_GENAI_EXPORTS Parser {
+public:
+    Parser() = default;
+    virtual ~Parser();
+    
+    /**
+     * @brief Parse complete text content at the end of generate call.
+     *
+     * This method processes the entire text content and extracts or modifies
+     * information as needed. The results are stored in the provided JsonContainer.
+     *
+     * @param message JsonContainer containing the text to parse and to store results
+     */
+    virtual void parse(JsonContainer& message) = 0;
+};
+
+class OPENVINO_GENAI_EXPORTS ReasoningParser : public Parser {
+public:
+    /**
+     * @brief ReasoningParser extracts reasoning content between open and close tags from text.
+     * Field 'content' should be filled in order to extract reasoning content.
+     * The reasoning content is stored in the 'reasoning_content' field of the JsonContainer.
+     *
+     * @param expect_open_tag If true then open_tag is expected to be generated, if false then it's already part of the model input string
+     * @param keep_original_content Whether to preserve the original 'content' including reasoning sections
+     * @param open_tag The opening tag (default: "<think>")
+     * @param close_tag The closing tag (default: "</think>")
+     */
+    ReasoningParser(
+        bool expect_open_tag = true,
+        bool keep_original_content = true,
+        const std::string& open_tag = "<think>",
+        const std::string& close_tag = "</think>");
+    
+    /**
+     * @brief Parse complete text content at the end of generate call.
+     *
+     * This method processes the entire text content and extracts or modifies
+     * information as needed. The results are stored in the provided JsonContainer.
+     *
+     * @param message JsonContainer containing the text to parse and to store results
+     */
+    void parse(JsonContainer& message) override;
+    ~ReasoningParser();
+private:
+    class ReasoningParserImpl;
+    std::unique_ptr<ReasoningParserImpl> m_impl;
+};
+
+/**
+ * @brief Parser for Llama 3 Pythonic tool calls format.
+ *
+ * Llama3PythonicToolParser extracts tool calls from text content formatted
+ * in Llama 3's Pythonic style, e.g. [get_weather(location='New York, NY', unit='celsius')].
+ * It does not modify the original content,
+ * only extracts and adds tool call information to the message.
+ */
+class OPENVINO_GENAI_EXPORTS Llama3PythonicToolParser : public Parser {
+public:
+    explicit Llama3PythonicToolParser();
+    ~Llama3PythonicToolParser();
+    
+    /**
+     * @brief Parse Llama 3 Pythonic tool calls from text.
+     *
+     * Extracts tool call information from text formatted in Llama 3's Pythonic style
+     * and adds the 'tool_calls' to the JsonContainer without modifying the original content.
+     *
+     * @param message JsonContainer containing the text to parse and to store tool call results
+     */
+    void parse(JsonContainer& message) override;
+private:
+    class Llama3PythonicToolParserImpl;
+    std::unique_ptr<Llama3PythonicToolParserImpl> m_impl;
+};
+
+/**
+ * @brief Parser for Llama 3 JSON tool calls format.
+ *
+ * Llama3JsonToolParser extracts tool calls from text content formatted
+ * in Llama 3's JSON style, e.g. {"type": "function", "function": {"name": "get_weather", "parameters": {"location": "New York, NY", ...}}}.
+ * It does not modify the original content, only extracts and adds tool call information to the message.
+ */
+class OPENVINO_GENAI_EXPORTS Llama3JsonToolParser : public Parser {
+public:
+    explicit Llama3JsonToolParser();
+    ~Llama3JsonToolParser();
+    
+    /**
+     * @brief Parse Llama 3 JSON tool calls from text.
+     *
+     * Extracts tool call information from text formatted in Llama 3's JSON style
+     * and adds the tool calls to the JsonContainer without modifying the original content.
+     *
+     * @param message JsonContainer containing the text to parse and to store tool call results
+     */
+    void parse(JsonContainer& message) override;
+private:
+    class Llama3JsonToolParserImpl;
+    std::unique_ptr<Llama3JsonToolParserImpl> m_impl;
+};
+
+/**
+ * @brief Abstract base class for incremental parsers that process text during streaming.
+ */
 class OPENVINO_GENAI_EXPORTS IncrementalParser {
 public:
     IncrementalParser() = default;
 
-    // We return string which with filtered text to be added to content.
+    /**
+     * @brief Parse incremental text content and return filtered text.
+     *
+     * This method processes incoming text deltas and returns filtered text that should
+     * be added to the content.
+     *
+     * @param message JsonContainer to store parsed results and metadata
+     * @param delta_text New text chunk to be processed in this step
+     * @param delta_tokens Optional vector of new token IDs to be processed in case if more fast token-based processing is needed.
+     * @return std::string Filtered text that should be added to the content
+     */
     virtual std::string parse(
         JsonContainer& message,
-        const std::string& previous_text, 
-        std::string& delta_text, 
-        const std::optional<std::vector<int64_t>>& previous_tokens = std::nullopt, 
+        std::string& delta_text,  // TODO: double check
         const std::optional<std::vector<int64_t>>& delta_tokens = std::nullopt
     ) = 0;
 
     virtual ~IncrementalParser() = default;
 };
 
+/**
+ * @brief Incremental parser for reasoning content with configurable tags.
+ *
+ * Extracts text with open and close tags. Original JsonContainer must have 'content' field.
+ * The reasoning content is stored in the 'reasoning_content' field of the JsonContainer.
+ */
 class OPENVINO_GENAI_EXPORTS ReasoningIncrementalParser : public IncrementalParser {
 private:
     class ReasoningParserImpl;
     std::unique_ptr<ReasoningParserImpl> m_impl;
 public:
+    /**
+     * @brief Constructor for ReasoningIncrementalParser.
+     *
+     * @param expect_open_tag If true then open_tag is expected to be generated, if false then it's already part of the model input string
+     * @param keep_original_content If true then original 'content' is preserved, otherwise reasoning text is removed from 'content'
+     * @param open_tag The opening tag (default: "<think>")
+     * @param close_tag The closing tag (default: "</think>")
+     */
     ReasoningIncrementalParser(bool expect_open_tag = true,
-                    bool keep_original_content = true, 
-                    const std::string& open_tag = "<think>", 
+                    bool keep_original_content = true,
+                    const std::string& open_tag = "<think>",
                     const std::string& close_tag = "</think>");
     virtual ~ReasoningIncrementalParser();
 
+    /**
+     * @brief Parse reasoning content incrementally.
+     *
+     * Processes text streams containing reasoning sections marked by configurable tags.
+     * Can filter out reasoning content or preserve it based on parser configuration.
+     *
+     * @param message JsonContainer to store parsed results and reasoning metadata
+     * @param delta_text New text chunk to be processed in this step
+     * @param delta_tokens Optional vector of new token IDs to be processed
+     * @return std::string Filtered text with reasoning content processed according to configuration
+     */
     std::string parse(
         JsonContainer& message,
-        const std::string& previous_text, 
         std::string& delta_text,
-        const std::optional<std::vector<int64_t>>& previous_tokens = std::nullopt, 
         const std::optional<std::vector<int64_t>>& delta_tokens = std::nullopt
     ) override;
 };
 
+/**
+ * @brief Specialized incremental parser for DeepSeek R1 model reasoning format.
+ *
+ * DeepSeekR1ReasoningIncrementalParser is a specialized version of ReasoningIncrementalParser
+ * configured specifically for the DeepSeek R1 model's reasoning format, which doesn't expect an opening tag.
+ */
 class OPENVINO_GENAI_EXPORTS DeepSeekR1ReasoningIncrementalParser : public ReasoningIncrementalParser {
 public:
-    explicit DeepSeekR1ReasoningIncrementalParser(bool expect_open_tag = false) : ReasoningIncrementalParser(expect_open_tag) {};
+    explicit DeepSeekR1ReasoningIncrementalParser() : ReasoningIncrementalParser(/*expect_open_tag=*/false) {};
 };
 
+/**
+ * @brief Specialized incremental parser for Phi-4 model reasoning format.
+ *
+ * Phi4ReasoningIncrementalParser is a specialized version of ReasoningIncrementalParser
+ * configured specifically for the Phi-4 model's reasoning format, which typically
+ * expects an opening tag by default.
+ */
 class OPENVINO_GENAI_EXPORTS Phi4ReasoningIncrementalParser : public ReasoningIncrementalParser {
 public:
-    explicit Phi4ReasoningIncrementalParser(bool expect_open_tag = true) : ReasoningIncrementalParser(expect_open_tag) {};
-};
-
-class OPENVINO_GENAI_EXPORTS Parser {
-public:
-    Parser() = default;
-    virtual ~Parser();
-    virtual void parse(JsonContainer& text) = 0;
-};
-
-class OPENVINO_GENAI_EXPORTS ReasoningParser : public Parser {
-public:
-    ReasoningParser(
-        bool expect_open_tag = true, 
-        bool keep_original_content = true, 
-        const std::string& open_tag = "<think>", 
-        const std::string& close_tag = "</think>");
-    void parse(JsonContainer& message) override;
-    ~ReasoningParser();
-private:
-    class ReasoningParserImpl;
-    std::unique_ptr<ReasoningParserImpl> m_impl;
-};
-
-class OPENVINO_GENAI_EXPORTS Llama3PythonicToolParser : public Parser {
-// Does not modify original content, only extracts and adds tool calls
-public:
-    explicit Llama3PythonicToolParser(bool keep_original_content = true);
-    ~Llama3PythonicToolParser();
-    void parse(JsonContainer& message) override;
-private:
-    class Llama3PythonicToolParserImpl;
-    std::unique_ptr<Llama3PythonicToolParserImpl> m_impl;
-};
-
-class OPENVINO_GENAI_EXPORTS Llama3JsonToolParser : public Parser {
-// Does not modify original content, only extracts and adds tool calls
-public:
-    explicit Llama3JsonToolParser(bool keep_original_content = true);
-    ~Llama3JsonToolParser();
-    void parse(JsonContainer& message) override;
-private:
-    class Llama3JsonToolParserImpl;
-    std::unique_ptr<Llama3JsonToolParserImpl> m_impl;
+    explicit Phi4ReasoningIncrementalParser() : ReasoningIncrementalParser(/*expect_open_tag=*/true) {};
 };
 
 }  // namespace genai
diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp
index 2ed107260e..cff484105f 100644
--- a/src/cpp/src/parsers.cpp
+++ b/src/cpp/src/parsers.cpp
@@ -32,9 +32,7 @@ class ReasoningIncrementalParser::ReasoningParserImpl {
 
     std::string parse(
         JsonContainer&  message,
-        const std::string& previous_text, 
         std::string& delta_text,
-        const std::optional<std::vector<int64_t>>& previous_tokens, 
         const std::optional<std::vector<int64_t>>& delta_tokens
     ) {
         if (m_deactivated) {
@@ -156,19 +154,14 @@ ReasoningIncrementalParser::~ReasoningIncrementalParser() = default;
 
 std::string ReasoningIncrementalParser::parse(
     JsonContainer& message,
-    const std::string& previous_text, 
     std::string& delta_text,
-    const std::optional<std::vector<int64_t>>& previous_tokens, 
     const std::optional<std::vector<int64_t>>& delta_tokens
 ) {
-    return m_impl->parse(message, previous_text, delta_text, previous_tokens, delta_tokens);
+    return m_impl->parse(message, delta_text, delta_tokens);
 }
 
 class Llama3PythonicToolParser::Llama3PythonicToolParserImpl {
 public:
-    Llama3PythonicToolParserImpl(bool keep_original_content) : m_keep_original_content(keep_original_content) {}
-    bool m_keep_original_content;
-
     void parse(JsonContainer& message) {
         // Input example
         // string message = "[get_weather(location='New York, NY', unit='celsius')]<|eom_id|>";
@@ -199,15 +192,11 @@ class Llama3PythonicToolParser::Llama3PythonicToolParserImpl {
         // Split function name and arguments
         message["tool_calls"] = JsonContainer::array();
         message["tool_calls"].push_back(JsonContainer({{"name", name}, {"arguments", kv}}));
-        
-        if (!m_keep_original_content) {
-            message["content"] = regex_replace(text, r, "");
-        }
     }
 };
 
-Llama3PythonicToolParser::Llama3PythonicToolParser(bool keep_original_content) {
-    m_impl = std::make_unique<Llama3PythonicToolParserImpl>(keep_original_content);
+Llama3PythonicToolParser::Llama3PythonicToolParser() {
+    m_impl = std::make_unique<Llama3PythonicToolParserImpl>();
 }
 
 void Llama3PythonicToolParser::parse(JsonContainer& message) {
@@ -217,11 +206,7 @@ void Llama3PythonicToolParser::parse(JsonContainer& message) {
 Llama3PythonicToolParser::~Llama3PythonicToolParser() = default;
 
 class Llama3JsonToolParser::Llama3JsonToolParserImpl {
-private:
-    bool m_keep_original_content;
 public:
-    Llama3JsonToolParserImpl(bool keep_original_content) : m_keep_original_content(keep_original_content) {}
-
     void parse(JsonContainer& message) {
         // Find JSON in the message
         std::string msg_content = message["content"].get_string();
@@ -234,15 +219,11 @@ class Llama3JsonToolParser::Llama3JsonToolParserImpl {
         auto res = JsonContainer::array();
         res.push_back(JsonContainer::from_json_string(msg_content.substr(json_start, json_end - json_start + 1)));
         message["tool_calls"] = res;
-        
-        if (!m_keep_original_content) {
-            message["content"] = msg_content.substr(0, json_start) + msg_content.substr(json_end + 1);
-        }
     }
 };
 
-Llama3JsonToolParser::Llama3JsonToolParser(bool keep_original_content) {
-    m_impl = std::make_unique<Llama3JsonToolParserImpl>(keep_original_content);
+Llama3JsonToolParser::Llama3JsonToolParser() {
+    m_impl = std::make_unique<Llama3JsonToolParserImpl>();
 }
 
 void Llama3JsonToolParser::parse(JsonContainer& message) {
diff --git a/src/python/py_parsers.cpp b/src/python/py_parsers.cpp
index 30faa18388..7818044e71 100644
--- a/src/python/py_parsers.cpp
+++ b/src/python/py_parsers.cpp
@@ -36,9 +36,7 @@ class ConstructableIncrementalParser: public IncrementalParser {
     using IncrementalParser::IncrementalParser;
     std::string parse(
         JsonContainer& msg,
-        const std::string& previous_text, 
         std::string& delta_text, 
-        const std::optional<std::vector<int64_t>>& previous_tokens = std::nullopt, 
         const std::optional<std::vector<int64_t>>& delta_tokens = std::nullopt
     ) override {
         // Convert JsonContainer to py::dict
@@ -49,7 +47,7 @@ class ConstructableIncrementalParser: public IncrementalParser {
             OPENVINO_THROW("parse method not implemented in Python subclass");
         }
 
-        auto res = parse_method(py_msg, previous_text, delta_text, previous_tokens, delta_tokens);
+        auto res = parse_method(py_msg, delta_text, delta_tokens);
         msg = pyutils::py_object_to_json_container(py_msg);
         
         return res.cast<std::string>();
@@ -81,12 +79,10 @@ void init_parsers(py::module_& m) {
         .def(py::init<>())
         .def("parse", [](IncrementalParser& self,
                          py::dict& message,
-                         std::string& previous_text,
                          std::string& delta_text,
-                         const std::optional<std::vector<int64_t>>& previous_tokens = std::nullopt,
                          const std::optional<std::vector<int64_t>>& delta_tokens = std::nullopt) {
             auto msg_cpp = pyutils::py_object_to_json_container(message);
-            auto res = self.parse(msg_cpp, previous_text, delta_text, previous_tokens, delta_tokens);
+            auto res = self.parse(msg_cpp, delta_text, delta_tokens);
             auto json_str = msg_cpp.to_json_string();
             
             // TODO: msg = pyutils::json_container_to_py_object(msg_cpp) does not work properly here,
@@ -98,8 +94,7 @@ void init_parsers(py::module_& m) {
                 message[item.first] = item.second;
             }
             return res;
-        }, py::arg("message"), py::arg("previous_text"), py::arg("delta_text"),
-           py::arg("previous_tokens") = std::nullopt, py::arg("delta_tokens") = std::nullopt,
+        }, py::arg("message"), py::arg("delta_text"), py::arg("delta_tokens") = std::nullopt,
            "Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output.");
     
     py::class_<ReasoningIncrementalParser, std::shared_ptr<ReasoningIncrementalParser>, IncrementalParser>(m, "ReasoningIncrementalParser")
@@ -110,10 +105,10 @@ void init_parsers(py::module_& m) {
              py::arg("close_tag") = "</think>");
     
     py::class_<Phi4ReasoningIncrementalParser, std::shared_ptr<Phi4ReasoningIncrementalParser>, IncrementalParser>(m, "Phi4ReasoningIncrementalParser")
-        .def(py::init<bool>(), py::arg("expect_open_tag") = true);
+        .def(py::init<>());
 
     py::class_<DeepSeekR1ReasoningIncrementalParser, std::shared_ptr<DeepSeekR1ReasoningIncrementalParser>, IncrementalParser>(m, "DeepSeekR1ReasoningIncrementalParser")
-        .def(py::init<bool>(), py::arg("expect_open_tag") = false);
+        .def(py::init<>());
 
     py::class_<Parser, ConstructableParser, std::shared_ptr<Parser>>(m, "Parser")
         .def(py::init<>())
diff --git a/tests/cpp/parser.cpp b/tests/cpp/parser.cpp
index 7aa4caf6b8..ca2b39453b 100644
--- a/tests/cpp/parser.cpp
+++ b/tests/cpp/parser.cpp
@@ -33,28 +33,6 @@ TEST(ParserTest, test_llama3_parser_1) {
     ASSERT_TRUE(expected == input);
 }
 
-TEST(ParserTest, test_llama3_parser_2) {
-    std::string prompt = R"(What's the weather in New York today?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n[get_weather(location="New York, NY", unit="celsius")]<|eom_id|>)";
-    
-    JsonContainer expected;
-    expected["content"] = std::string(R"(What's the weather in New York today?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n<|eom_id|>)");
-    expected["tool_calls"] = JsonContainer::array();
-    expected["tool_calls"].push_back(JsonContainer(ov::AnyMap({
-        {"name", "get_weather"},
-        {"arguments", ov::AnyMap{
-            {"location", "New York, NY"},
-            {"unit", "celsius"}
-        }}
-    })));
-
-    std::shared_ptr<Llama3PythonicToolParser> parser = std::make_shared<Llama3PythonicToolParser>(/*keep_original_content*/ false);
-    JsonContainer input;
-    input["content"] = prompt;
-    parser->parse(input);
-
-    ASSERT_EQ(input, expected);
-}
-
 TEST(ParserTest, test_reasoning_parser_1) {
     std::string prompt = R"("<｜begin▁of▁sentence｜><｜begin▁of▁sentence｜><｜User｜>What is 2 + 1?<｜Assistant｜><think>\nI need to determine the sum of 2 and 1.\n\nFirst, I'll identify the two numbers involved in the addition: 2 and 1.\n\nNext, I'll perform the addition by combining these two numbers.\n\nFinally, I'll state the result of the addition, which is 3.\n</think>\n\n**Solution:**\n\nTo find the sum of 2 and 1, )";
     
@@ -116,9 +94,8 @@ TEST_F(DeepSeekR1ReasoningParserTest, ReasoningContentAccumulatesAcrossCalls) {
     JsonContainer msg;
     
     for (int i = 1; i < input_stream.size(); i++) {
-        std::string previous_text = input_stream[i - 1];
         std::string delta_text = input_stream[i];
-        delta_text = parser.parse(msg, previous_text, delta_text);
+        delta_text = parser.parse(msg, delta_text);
     }
     ASSERT_EQ(msg["reasoning_content"], ref_res);
 }
@@ -173,9 +150,7 @@ TEST(ParserTest, CustomParser_AccumulatesBetweenStartStop) {
         bool main_part_started = false;
 
         std::string parse(JsonContainer& msg,
-                          const std::string& previous_text,
                           std::string& delta_text,
-                          const std::optional<std::vector<int64_t>>& /*previous_tokens*/ = std::nullopt,
                           const std::optional<std::vector<int64_t>>& /*delta_tokens*/ = std::nullopt) override {
             // Ensure fields exist (Python test used dict defaults)
             if (!msg.contains("content")) {
diff --git a/tests/python_tests/test_parsers.py b/tests/python_tests/test_parsers.py
index 2368571f62..e5f33ac955 100644
--- a/tests/python_tests/test_parsers.py
+++ b/tests/python_tests/test_parsers.py
@@ -138,7 +138,7 @@ def test_incremental_phi4_reason_parser_nostreamer(answer):
     stream_string = re.split(r"(\s+)", answer)
     msg = {}
     for subword in stream_string:
-        parser.parse(msg, '', subword)
+        parser.parse(msg, subword)
         # When parser is called from streamer, it is expected that content is accumulated inside streamer.
         # Here we are calling parser manually therefore we need to accumulate content manually.
         msg['content'] += subword  
@@ -199,12 +199,9 @@ def test_incremental_deepseek_parser():
     think_content = full_str.split("</think>")[0]
     content = full_str.split("</think>")[1]
 
-    extended = stream_string[:]
-    extended.insert(0, "")
-
     parser = DeepSeekR1ReasoningIncrementalParser()
-    for (prev_subword, subword) in zip(extended, stream_string):
-        msg = parser.parse(msg, prev_subword, subword)
+    for subword in stream_string:
+        msg = parser.parse(msg, subword)
     
     assert msg['reasoning_content'] == think_content
     assert msg['content'] == content
@@ -222,7 +219,7 @@ def test_custom_incremental_parser(hf_ov_genai_models):
     class CustomParser(IncrementalParser):
         main_part_started: bool = False
 
-        def parse(self, msg: dict, previous_text: str, delta_text: str, prev_tokens = None, delta_tokens = None) -> str:
+        def parse(self, msg: dict, delta_text: str, delta_tokens = None) -> str:
             if 'content' not in msg:
                 msg['content'] = ''
             if 'main_text' not in msg:
@@ -311,3 +308,5 @@ def parse(self, msg: dict):
     assert 'reasoning_content' in res.parsed[0]
     assert res.parsed[0]['reasoning_content'] != ""
     assert res.parsed[0]['reasoning_content'] == think_text
+
+# TODO; add test for reseting incremental parser at generation start

From cada0556078defcd8931a22fb30ddeb7965fe78b Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Wed, 22 Oct 2025 20:04:25 +0200
Subject: [PATCH 32/43] add decorator to call/reset parser before/after
 generate

---
 src/cpp/src/llm/pipeline.cpp | 44 +++++++++++++++++++++++++-----------
 1 file changed, 31 insertions(+), 13 deletions(-)

diff --git a/src/cpp/src/llm/pipeline.cpp b/src/cpp/src/llm/pipeline.cpp
index f6f206a6ab..0b2a445db4 100644
--- a/src/cpp/src/llm/pipeline.cpp
+++ b/src/cpp/src/llm/pipeline.cpp
@@ -17,22 +17,37 @@
 
 namespace {
 
-void run_parsers(ov::genai::DecodedResults& res, const ov::genai::OptionalGenerationConfig& generation_config, const ov::genai::StreamerVariant& streamer) {
+// This is an decorator function that wraps a generation callable to apply parsers and reset them before generation if needed.
+ov::genai::DecodedResults run_generate_with_parsers(const ov::genai::OptionalGenerationConfig& generation_config,
+                 const ov::genai::StreamerVariant& streamer,
+                std::function<ov::genai::DecodedResults(void)> generate_callable) {
+                    
+    std::shared_ptr<ov::genai::TextParserStreamer> parser_streamer;
     // If streamer is of StreamerBase type, and it is TextParserStreamer, get parsed message
     // Streaming is available only for batch size 1 therefore only parsed[0]
     if (auto streamer_obj = std::get_if<std::shared_ptr<ov::genai::StreamerBase>>(&streamer)) {
-        if (auto parser_streamer = std::dynamic_pointer_cast<ov::genai::TextParserStreamer>(*streamer_obj)) {
-            res.parsed.resize(1);
-            res.parsed[0] = parser_streamer->get_parsed_message();
-        }
+        parser_streamer = std::dynamic_pointer_cast<ov::genai::TextParserStreamer>(*streamer_obj);
+    }
+
+    // determine from generation config when 'need_to_reset_parser' will be available
+    bool need_to_reset_parser = true;
+    if (parser_streamer && need_to_reset_parser) {
+        parser_streamer->reset();
     }
 
+    auto res = generate_callable();
+
+    res.parsed.resize(1);
+    res.parsed[0] = parser_streamer->get_parsed_message();
+
+    // If no parsers are defined, return
     if (!generation_config.has_value() || generation_config->parsers.empty()) {
-        return;
+        return res;
     }
     
     std::vector<std::shared_ptr<ov::genai::Parser>> parsers = generation_config->parsers;
     res.parsed.resize(res.texts.size());
+    
     // Apply Base parsers sequentially even if IncrementalParser has run.
     for (size_t i = 0; i < res.texts.size(); ++i) {
         auto& msg = res.parsed[i];
@@ -40,12 +55,13 @@ void run_parsers(ov::genai::DecodedResults& res, const ov::genai::OptionalGenera
             // Initialize msg with content
             msg["content"] = res.texts[i];
         }
+        
         for (auto& parser: parsers) {
-            // TODO: Check the state of incremental parser and reset if necessary
             parser->parse(msg);
         }
         res.parsed[i] = msg;
     }
+    return res;
 }
 
 }
@@ -285,9 +301,10 @@ DecodedResults LLMPipeline::generate(
         StringInputs inputs,
         OptionalGenerationConfig generation_config,
         StreamerVariant streamer) {
-    auto res = m_pimpl->generate(inputs, generation_config, streamer);
-    run_parsers(res, generation_config, streamer);
-    return res;
+
+    return run_generate_with_parsers(generation_config, streamer, [&]() -> DecodedResults {
+        return m_pimpl->generate(inputs, generation_config, streamer);
+    });
 }
 
 DecodedResults LLMPipeline::generate(StringInputs text, const ov::AnyMap& config_map) {
@@ -295,9 +312,10 @@ DecodedResults LLMPipeline::generate(StringInputs text, const ov::AnyMap& config
     GenerationConfig config = (config_arg.has_value()) ? *config_arg : get_generation_config();
     config.update_generation_config(config_map);
     auto streamer = utils::get_streamer_from_map(config_map);
-    auto res = m_pimpl->generate(text, config, streamer);
-    run_parsers(res, config_arg, streamer);
-    return res;
+    
+    return run_generate_with_parsers(config_arg, streamer, [&]() -> DecodedResults {
+        return m_pimpl->generate(text, config, streamer);
+    });
 }
 
 EncodedResults LLMPipeline::generate(

From b18ee75b125ed767fce3064848b7fda621313b0f Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Wed, 22 Oct 2025 20:04:36 +0200
Subject: [PATCH 33/43] add reset() method

---
 src/cpp/include/openvino/genai/parsers.hpp    |  18 ++-
 .../include/openvino/genai/text_streamer.hpp  |   2 +
 src/cpp/src/llm/pipeline.cpp                  |   8 +-
 src/cpp/src/parsers.cpp                       |  17 ++-
 src/cpp/src/text_streamer.cpp                 |   7 ++
 src/python/py_parsers.cpp                     | 110 ++++++++++--------
 tests/cpp/parser.cpp                          |   4 +
 7 files changed, 106 insertions(+), 60 deletions(-)

diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp
index f952268d97..c4163f8201 100644
--- a/src/cpp/include/openvino/genai/parsers.hpp
+++ b/src/cpp/include/openvino/genai/parsers.hpp
@@ -135,10 +135,15 @@ class OPENVINO_GENAI_EXPORTS IncrementalParser {
      */
     virtual std::string parse(
         JsonContainer& message,
-        std::string& delta_text,  // TODO: double check
+        std::string& delta_text,
         const std::optional<std::vector<int64_t>>& delta_tokens = std::nullopt
     ) = 0;
 
+    /**
+     * @brief Reset the internal state of the parser.
+     */
+    virtual void reset() = 0;
+
     virtual ~IncrementalParser() = default;
 };
 
@@ -149,9 +154,6 @@ class OPENVINO_GENAI_EXPORTS IncrementalParser {
  * The reasoning content is stored in the 'reasoning_content' field of the JsonContainer.
  */
 class OPENVINO_GENAI_EXPORTS ReasoningIncrementalParser : public IncrementalParser {
-private:
-    class ReasoningParserImpl;
-    std::unique_ptr<ReasoningParserImpl> m_impl;
 public:
     /**
      * @brief Constructor for ReasoningIncrementalParser.
@@ -183,6 +185,14 @@ class OPENVINO_GENAI_EXPORTS ReasoningIncrementalParser : public IncrementalPars
         std::string& delta_text,
         const std::optional<std::vector<int64_t>>& delta_tokens = std::nullopt
     ) override;
+
+    /**
+     * @brief Reset the internal state of the parser.
+     */
+    void reset() override;
+private:
+    class ReasoningParserImpl;
+    std::unique_ptr<ReasoningParserImpl> m_impl;
 };
 
 /**
diff --git a/src/cpp/include/openvino/genai/text_streamer.hpp b/src/cpp/include/openvino/genai/text_streamer.hpp
index 53e06b7a93..06f400a88e 100644
--- a/src/cpp/include/openvino/genai/text_streamer.hpp
+++ b/src/cpp/include/openvino/genai/text_streamer.hpp
@@ -60,6 +60,8 @@ class OPENVINO_GENAI_EXPORTS TextParserStreamer : public TextStreamer {
     CallbackTypeVariant write(std::string message);
 
     JsonContainer get_parsed_message() const;
+
+    void reset();
 private:
     std::unique_ptr<TextParserStreamerImpl> m_pimpl;
 };
diff --git a/src/cpp/src/llm/pipeline.cpp b/src/cpp/src/llm/pipeline.cpp
index 0b2a445db4..7cee7d9ac3 100644
--- a/src/cpp/src/llm/pipeline.cpp
+++ b/src/cpp/src/llm/pipeline.cpp
@@ -36,9 +36,11 @@ ov::genai::DecodedResults run_generate_with_parsers(const ov::genai::OptionalGen
     }
 
     auto res = generate_callable();
-
-    res.parsed.resize(1);
-    res.parsed[0] = parser_streamer->get_parsed_message();
+    
+    if (parser_streamer) {
+        res.parsed.resize(1);
+        res.parsed[0] = parser_streamer->get_parsed_message();
+    }
 
     // If no parsers are defined, return
     if (!generation_config.has_value() || generation_config->parsers.empty()) {
diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp
index cff484105f..b5b08b2f50 100644
--- a/src/cpp/src/parsers.cpp
+++ b/src/cpp/src/parsers.cpp
@@ -10,12 +10,14 @@ namespace ov::genai {
 
 class ReasoningIncrementalParser::ReasoningParserImpl {
 private:
+    // Values initialized from constructor don't need default member initializer.
     bool m_expect_open_tag;
-    bool m_first_run = true;
     bool m_keep_original_content;
-    bool m_think_tag_opened = false;
     std::string m_open_tag;
     std::string m_close_tag;
+    // Values with default member initializers are reset on each reset() call.
+    bool m_first_run = true;
+    bool m_think_tag_opened = false;
     std::string m_text_cache = "";
     bool m_deactivated = false;
 public:
@@ -144,6 +146,13 @@ class ReasoningIncrementalParser::ReasoningParserImpl {
         
         return delta_text;
     }
+
+    void reset() {
+        m_first_run = true;
+        m_think_tag_opened = false;
+        m_text_cache = "";
+        m_deactivated = false;
+    }
 };
 
 ReasoningIncrementalParser::ReasoningIncrementalParser(bool expect_open_tag, bool keep_original_content, const std::string& open_tag, const std::string& close_tag) {
@@ -160,6 +169,10 @@ std::string ReasoningIncrementalParser::parse(
     return m_impl->parse(message, delta_text, delta_tokens);
 }
 
+void ReasoningIncrementalParser::reset() {
+    m_impl->reset();
+}
+
 class Llama3PythonicToolParser::Llama3PythonicToolParserImpl {
 public:
     void parse(JsonContainer& message) {
diff --git a/src/cpp/src/text_streamer.cpp b/src/cpp/src/text_streamer.cpp
index 9ad9bd9543..fa48f1a33e 100644
--- a/src/cpp/src/text_streamer.cpp
+++ b/src/cpp/src/text_streamer.cpp
@@ -155,6 +155,13 @@ JsonContainer TextParserStreamer::get_parsed_message() const {
     return m_pimpl->m_parsed_message;
 }
 
+void TextParserStreamer::reset() {
+    m_pimpl->m_parsed_message = JsonContainer();
+    for (auto& parser : m_pimpl->m_parsers) {
+        parser->reset();
+    }
+}
+
 TextParserStreamer::~TextParserStreamer() = default;
 
 }  // namespace genai
diff --git a/src/python/py_parsers.cpp b/src/python/py_parsers.cpp
index 7818044e71..b92454855c 100644
--- a/src/python/py_parsers.cpp
+++ b/src/python/py_parsers.cpp
@@ -29,6 +29,23 @@ namespace pyutils = ov::genai::pybind::utils;
 
 namespace {
 
+class ConstructableParser: public Parser {
+public:
+    void parse(JsonContainer& msg) override {
+        py::gil_scoped_acquire acquire;
+        
+        py::function parse_method = py::get_override(static_cast<const Parser*>(this), "parse");
+        if (!parse_method) {
+            OPENVINO_THROW("parse method not implemented in Python subclass");
+        }
+        
+        // Convert JsonContainer to py::dict
+        py::dict py_msg = pyutils::json_container_to_py_object(msg);
+        parse_method(py_msg);
+        msg = pyutils::py_object_to_json_container(py_msg);
+    }
+};
+
 // ConstructableIncremental and ConstructableBase are used when python overload is called from C++
 // and we need to convert JsonContainer to py::dict and then update back JsonContainer from the py::dict which was modified in Python.
 class ConstructableIncrementalParser: public IncrementalParser {
@@ -52,64 +69,19 @@ class ConstructableIncrementalParser: public IncrementalParser {
         
         return res.cast<std::string>();
     }
-};
 
-class ConstructableParser: public Parser {
-public:
-    void parse(JsonContainer& msg) override {
-        py::gil_scoped_acquire acquire;
-        
-        py::function parse_method = py::get_override(static_cast<const Parser*>(this), "parse");
-        if (!parse_method) {
-            OPENVINO_THROW("parse method not implemented in Python subclass");
-        }
-        
-        // Convert JsonContainer to py::dict
-        py::dict py_msg = pyutils::json_container_to_py_object(msg);
-        parse_method(py_msg);
-        msg = pyutils::py_object_to_json_container(py_msg);
+    void reset() override {
+        PYBIND11_OVERLOAD_PURE(
+            void,
+            IncrementalParser,
+            reset,
+        );
     }
 };
 
 } // namespace
 
-// TODO: double check/add more relevant docstrings for parsers.
 void init_parsers(py::module_& m) {
-    py::class_<IncrementalParser, ConstructableIncrementalParser, std::shared_ptr<IncrementalParser>>(m, "IncrementalParser")
-        .def(py::init<>())
-        .def("parse", [](IncrementalParser& self,
-                         py::dict& message,
-                         std::string& delta_text,
-                         const std::optional<std::vector<int64_t>>& delta_tokens = std::nullopt) {
-            auto msg_cpp = pyutils::py_object_to_json_container(message);
-            auto res = self.parse(msg_cpp, delta_text, delta_tokens);
-            auto json_str = msg_cpp.to_json_string();
-            
-            // TODO: msg = pyutils::json_container_to_py_object(msg_cpp) does not work properly here,
-            // since it create a new object instead of updating existing dict.
-            py::object json_mod = py::module_::import("json");
-            py::dict result = json_mod.attr("loads")(json_str);
-            // update msg with result
-            for (auto item : result) {
-                message[item.first] = item.second;
-            }
-            return res;
-        }, py::arg("message"), py::arg("delta_text"), py::arg("delta_tokens") = std::nullopt,
-           "Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output.");
-    
-    py::class_<ReasoningIncrementalParser, std::shared_ptr<ReasoningIncrementalParser>, IncrementalParser>(m, "ReasoningIncrementalParser")
-        .def(py::init<bool, bool, const std::string&, const std::string&>(),
-             py::arg("expect_open_tag") = true,
-             py::arg("keep_original_content") = true,
-             py::arg("open_tag") = "<think>",
-             py::arg("close_tag") = "</think>");
-    
-    py::class_<Phi4ReasoningIncrementalParser, std::shared_ptr<Phi4ReasoningIncrementalParser>, IncrementalParser>(m, "Phi4ReasoningIncrementalParser")
-        .def(py::init<>());
-
-    py::class_<DeepSeekR1ReasoningIncrementalParser, std::shared_ptr<DeepSeekR1ReasoningIncrementalParser>, IncrementalParser>(m, "DeepSeekR1ReasoningIncrementalParser")
-        .def(py::init<>());
-
     py::class_<Parser, ConstructableParser, std::shared_ptr<Parser>>(m, "Parser")
         .def(py::init<>())
         .def("parse",
@@ -144,4 +116,40 @@ void init_parsers(py::module_& m) {
 
     py::class_<Llama3PythonicToolParser, std::shared_ptr<Llama3PythonicToolParser>, Parser>(m, "Llama3PythonicToolParser")
         .def(py::init<>());
+    
+    py::class_<IncrementalParser, ConstructableIncrementalParser, std::shared_ptr<IncrementalParser>>(m, "IncrementalParser")
+        .def(py::init<>())
+        .def("parse", [](IncrementalParser& self,
+                         py::dict& message,
+                         std::string& delta_text,
+                         const std::optional<std::vector<int64_t>>& delta_tokens = std::nullopt) {
+            auto msg_cpp = pyutils::py_object_to_json_container(message);
+            auto res = self.parse(msg_cpp, delta_text, delta_tokens);
+            auto json_str = msg_cpp.to_json_string();
+            
+            // TODO: msg = pyutils::json_container_to_py_object(msg_cpp) does not work properly here,
+            // since it create a new object instead of updating existing dict.
+            py::object json_mod = py::module_::import("json");
+            py::dict result = json_mod.attr("loads")(json_str);
+            // update msg with result
+            for (auto item : result) {
+                message[item.first] = item.second;
+            }
+            return res;
+        }, py::arg("message"), py::arg("delta_text"), py::arg("delta_tokens") = std::nullopt,
+           "Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output.")
+        .def("reset", &IncrementalParser::reset, "Reset the internal state of the parser.");
+    
+    py::class_<ReasoningIncrementalParser, std::shared_ptr<ReasoningIncrementalParser>, IncrementalParser>(m, "ReasoningIncrementalParser")
+        .def(py::init<bool, bool, const std::string&, const std::string&>(),
+             py::arg("expect_open_tag") = true,
+             py::arg("keep_original_content") = true,
+             py::arg("open_tag") = "<think>",
+             py::arg("close_tag") = "</think>");
+    
+    py::class_<Phi4ReasoningIncrementalParser, std::shared_ptr<Phi4ReasoningIncrementalParser>, IncrementalParser>(m, "Phi4ReasoningIncrementalParser")
+        .def(py::init<>());
+
+    py::class_<DeepSeekR1ReasoningIncrementalParser, std::shared_ptr<DeepSeekR1ReasoningIncrementalParser>, IncrementalParser>(m, "DeepSeekR1ReasoningIncrementalParser")
+        .def(py::init<>());
 }
diff --git a/tests/cpp/parser.cpp b/tests/cpp/parser.cpp
index ca2b39453b..732f7cbe37 100644
--- a/tests/cpp/parser.cpp
+++ b/tests/cpp/parser.cpp
@@ -177,6 +177,10 @@ TEST(ParserTest, CustomParser_AccumulatesBetweenStartStop) {
             return delta_text;
         }
 
+        void reset() override {
+            main_part_started = false;
+        }
+
         // Virtual dtor for safety
         ~CustomParser() override = default;
     };

From 79d299e78d277d00f4b5888959bde50ef9dac5c6 Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Thu, 23 Oct 2025 00:28:02 +0200
Subject: [PATCH 34/43] put delta_tokens to parser as well

---
 .../include/openvino/genai/text_streamer.hpp  |  4 +-
 src/cpp/src/text_streamer.cpp                 | 62 +++++++++++++++----
 .../openvino_genai/py_openvino_genai.pyi      | 10 ++-
 tests/cpp/parser.cpp                          | 10 +--
 tests/python_tests/test_parsers.py            | 57 ++++++++++++++++-
 5 files changed, 118 insertions(+), 25 deletions(-)

diff --git a/src/cpp/include/openvino/genai/text_streamer.hpp b/src/cpp/include/openvino/genai/text_streamer.hpp
index 06f400a88e..403c63ef7d 100644
--- a/src/cpp/include/openvino/genai/text_streamer.hpp
+++ b/src/cpp/include/openvino/genai/text_streamer.hpp
@@ -29,7 +29,7 @@ class OPENVINO_GENAI_EXPORTS TextStreamer : public StreamerBase {
 
     TextStreamer(const Tokenizer& tokenizer, std::function<CallbackTypeVariant(std::string)> callback, const ov::AnyMap& detokenization_params = {});
 
-private:
+protected:
     Tokenizer m_tokenizer;
     std::vector<int64_t> m_tokens_cache;
     std::vector<int64_t> m_decoded_lengths;
@@ -58,7 +58,7 @@ class OPENVINO_GENAI_EXPORTS TextParserStreamer : public TextStreamer {
     virtual StreamingStatus write(JsonContainer& message) = 0;
 
     CallbackTypeVariant write(std::string message);
-
+    
     JsonContainer get_parsed_message() const;
 
     void reset();
diff --git a/src/cpp/src/text_streamer.cpp b/src/cpp/src/text_streamer.cpp
index fa48f1a33e..fca30b7aea 100644
--- a/src/cpp/src/text_streamer.cpp
+++ b/src/cpp/src/text_streamer.cpp
@@ -9,6 +9,9 @@ bool is_incomplete(std::string& text) {
     constexpr char replacement[] = "\xef\xbf\xbd";
     return text.size() >= 3 && text.compare(text.size() - 3, 3, replacement) == 0;
 }
+
+constexpr size_t delay_n_tokens = 3;
+
 }  // namespace
 
 namespace ov {
@@ -31,10 +34,13 @@ StreamingStatus TextStreamer::write(int64_t token) {
     if (!text.empty() && '\n' == text.back() && text.size() > m_printed_len) {
         // Flush the cache after the new line symbol
         res << std::string_view{text.data() + m_printed_len, text.size() - m_printed_len};
+        // Get the list of tokens decoded for this chunk or rest of text.
+
+        auto res_status = run_callback_if_needed(res.str());
         m_tokens_cache.clear();
         m_decoded_lengths.clear();
         m_printed_len = 0;
-        return run_callback_if_needed(res.str());
+        return res_status;
     }
 
     if (is_incomplete(text)) {
@@ -42,7 +48,7 @@ StreamingStatus TextStreamer::write(int64_t token) {
         // Don't print incomplete text
         return run_callback_if_needed(res.str());
     }
-    constexpr size_t delay_n_tokens = 3;
+
     // In some cases adding the next token can shorten the text,
     // e.g. when apostrophe removing regex had worked after adding new tokens.
     // Printing several last tokens is delayed.
@@ -58,10 +64,14 @@ StreamingStatus TextStreamer::write(int64_t token) {
         // It is possible to have a shorter text after adding new token.
         // Print to output only if text length is increaesed.
         res << std::string_view{text.data() + m_printed_len, print_until - m_printed_len} << std::flush;
+    }
+    
+    auto status = run_callback_if_needed(res.str());
+    
+    if (print_until > -1 && print_until > m_printed_len) {
         m_printed_len = print_until;
     }
-
-    return run_callback_if_needed(res.str());
+    return status;
 }
 
 void TextStreamer::compute_decoded_length_for_position(size_t cache_position) {
@@ -124,6 +134,7 @@ void TextStreamer::end() {
 
 StreamerBase::~StreamerBase() = default;
 
+// Is used to hide internal states of TextParserStreamer
 class TextParserStreamer::TextParserStreamerImpl {
 public:
 
@@ -131,14 +142,6 @@ std::vector<std::shared_ptr<IncrementalParser>> m_parsers;
 JsonContainer m_parsed_message;
 
 TextParserStreamerImpl(std::vector<std::shared_ptr<IncrementalParser>> parsers) : m_parsers{parsers} {}
-
-void parse(std::string message) {
-    for (auto& parser: m_parsers) {
-        message = parser->parse(m_parsed_message, message);
-        // Message can be modified inside parser, if parser for example extracted tool calling from message content
-        m_parsed_message["content"] = m_parsed_message["content"].get_string() + message;
-    }
-}
 };
 
 TextParserStreamer::TextParserStreamer(const Tokenizer& tokenizer, std::vector<std::shared_ptr<IncrementalParser>> parsers) 
@@ -147,7 +150,40 @@ TextParserStreamer::TextParserStreamer(const Tokenizer& tokenizer, std::vector<s
     }), m_pimpl{std::make_unique<TextParserStreamerImpl>(parsers)} {}
 
 CallbackTypeVariant TextParserStreamer::write(std::string message) {
-    m_pimpl->parse(message);
+    // When 'write' is called with string, it means new chunck of tokens is decoded into text
+
+    auto flushed_tokens = std::vector<int64_t>();
+    if (message.back() == '\n') {
+        // Flush all tokens // TODO: m_decoded_lengths[m_decoded_lengths.size() - 1] = -1;
+        flushed_tokens.assign(m_tokens_cache.begin(), m_tokens_cache.end());
+    } else if (m_decoded_lengths.size() >= delay_n_tokens) {
+        // prompt          = "I was waiting for the bus.\n"
+        // tokens          = [2,2,  3,      45, 67, 89,4,2]
+        // decoded_lengths = [1,5,  13,     17, 21, 25,26,27]
+        // let printed_len = 13 (after "I was waiting")
+        // then delta_text = "for the bus.\n"
+        // delta_tokens = [45, 67, 89,4,2]
+        // delta_tokens = m_tokens_cache[4..end]
+
+        // Find where the last printed tokens are located based on m_printed_len and print_until
+        auto print_until = m_decoded_lengths[m_decoded_lengths.size() - delay_n_tokens];
+        auto first = std::upper_bound(m_decoded_lengths.begin(), m_decoded_lengths.end(), static_cast<long long>(m_printed_len))
+                     - m_decoded_lengths.begin();
+        auto last  = std::upper_bound(m_decoded_lengths.begin(), m_decoded_lengths.end(), static_cast<long long>(print_until))
+                     - m_decoded_lengths.begin();
+        
+        // Before calling base write from TextStreamer save the current token.
+        if (last >= first) {
+            flushed_tokens.assign(m_tokens_cache.begin() + first, m_tokens_cache.begin() + last);
+        }
+    }
+
+    // Iterate over all parsers and apply them to the message
+    for (auto& parser: m_pimpl->m_parsers) {
+        message = parser->parse(m_pimpl->m_parsed_message, message, flushed_tokens);
+        // Message can be modified inside parser, if parser for example extracted tool calling from message content
+        m_pimpl->m_parsed_message["content"] = m_pimpl->m_parsed_message["content"].get_string() + message;
+    }
     return write(m_pimpl->m_parsed_message);
 }
 
diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi
index 273f2d778b..2cfa83a583 100644
--- a/src/python/openvino_genai/py_openvino_genai.pyi
+++ b/src/python/openvino_genai/py_openvino_genai.pyi
@@ -578,7 +578,7 @@ class DecodedResults:
     def texts(self) -> list[str]:
         ...
 class DeepSeekR1ReasoningIncrementalParser(IncrementalParser):
-    def __init__(self, expect_open_tag: bool = False) -> None:
+    def __init__(self) -> None:
         ...
 class EncodedGenerationResult:
     """
@@ -1462,10 +1462,14 @@ class ImageGenerationPerfMetrics:
 class IncrementalParser:
     def __init__(self) -> None:
         ...
-    def parse(self, message: dict, previous_text: str, delta_text: str, previous_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None, delta_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None) -> str:
+    def parse(self, message: dict, delta_text: str, delta_tokens: collections.abc.Sequence[typing.SupportsInt] | None = None) -> str:
         """
         Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output.
         """
+    def reset(self) -> None:
+        """
+        Reset the internal state of the parser.
+        """
 class InpaintingPipeline:
     """
     This class is used for generation with inpainting models.
@@ -1949,7 +1953,7 @@ class PerfMetrics:
     def raw_metrics(self) -> RawPerfMetrics:
         ...
 class Phi4ReasoningIncrementalParser(IncrementalParser):
-    def __init__(self, expect_open_tag: bool = True) -> None:
+    def __init__(self) -> None:
         ...
 class PipelineMetrics:
     """
diff --git a/tests/cpp/parser.cpp b/tests/cpp/parser.cpp
index 732f7cbe37..e4db4da3f2 100644
--- a/tests/cpp/parser.cpp
+++ b/tests/cpp/parser.cpp
@@ -37,8 +37,8 @@ TEST(ParserTest, test_reasoning_parser_1) {
     std::string prompt = R"("<｜begin▁of▁sentence｜><｜begin▁of▁sentence｜><｜User｜>What is 2 + 1?<｜Assistant｜><think>\nI need to determine the sum of 2 and 1.\n\nFirst, I'll identify the two numbers involved in the addition: 2 and 1.\n\nNext, I'll perform the addition by combining these two numbers.\n\nFinally, I'll state the result of the addition, which is 3.\n</think>\n\n**Solution:**\n\nTo find the sum of 2 and 1, )";
     
     JsonContainer expected;
-    expected["content"] = std::string(R"("<｜begin▁of▁sentence｜><｜begin▁of▁sentence｜><｜User｜>What is 2 + 1?<｜Assistant｜>\n\n**Solution:**\n\nTo find the sum of 2 and 1, )");
-    expected["reasoning_content"] = std::string(R"(\nI need to determine the sum of 2 and 1.\n\nFirst, I'll identify the two numbers involved in the addition: 2 and 1.\n\nNext, I'll perform the addition by combining these two numbers.\n\nFinally, I'll state the result of the addition, which is 3.\n)");
+    expected["content"] = R"("<｜begin▁of▁sentence｜><｜begin▁of▁sentence｜><｜User｜>What is 2 + 1?<｜Assistant｜>\n\n**Solution:**\n\nTo find the sum of 2 and 1, )";
+    expected["reasoning_content"] = R"(\nI need to determine the sum of 2 and 1.\n\nFirst, I'll identify the two numbers involved in the addition: 2 and 1.\n\nNext, I'll perform the addition by combining these two numbers.\n\nFinally, I'll state the result of the addition, which is 3.\n)";
 
     std::shared_ptr<ReasoningParser> parser = std::make_shared<ReasoningParser>(
         /*expect_open_tag*/ true,
@@ -56,7 +56,7 @@ TEST(ParserTest, test_reasoning_parser_2) {
     
     JsonContainer expected;
     expected["content"] = prompt;
-    expected["reasoning_content"] = std::string(R"(\nI need to determine the sum of 2 and 1.\n\nFirst, I'll identify the two numbers involved in the addition: 2 and 1.\n\nNext, I'll perform the addition by combining these two numbers.\n\nFinally, I'll state the result of the addition, which is 3.\n)");
+    expected["reasoning_content"] = R"(\nI need to determine the sum of 2 and 1.\n\nFirst, I'll identify the two numbers involved in the addition: 2 and 1.\n\nNext, I'll perform the addition by combining these two numbers.\n\nFinally, I'll state the result of the addition, which is 3.\n)";
 
     std::shared_ptr<ReasoningParser> parser = std::make_shared<ReasoningParser>(
         /*expect_open_tag*/ true,
@@ -155,10 +155,10 @@ TEST(ParserTest, CustomParser_AccumulatesBetweenStartStop) {
             // Ensure fields exist (Python test used dict defaults)
             if (!msg.contains("content")) {
                 msg.to_empty_object();
-                msg["content"] = std::string{};
+                msg["content"] = "";
             }
             if (!msg.contains("reasoning_content")) {
-                msg["reasoning_content"] = std::string{};
+                msg["reasoning_content"] = "";
             }
 
             if (!main_part_started && delta_text == "<think>") {
diff --git a/tests/python_tests/test_parsers.py b/tests/python_tests/test_parsers.py
index e5f33ac955..53b79a027e 100644
--- a/tests/python_tests/test_parsers.py
+++ b/tests/python_tests/test_parsers.py
@@ -80,9 +80,9 @@ def write(self, message):
     
     msg = {}
     answer = "<think>\nOkay, the user is asking for the answer to 2 + 1.</think>\n\nThe answer to 2 + 1 is \boxed{3}."
-    encoded_tokens = genai_tokenizer.encode(answer).input_ids.data.tolist()
+    encoded_tokens = genai_tokenizer.encode(answer).input_ids.data.tolist()[0]
     for token in encoded_tokens:
-        streamer._write(token)
+        streamer._write([token])
     streamer.end()
 
     think_content = answer.split("</think>")[0].replace("<think>", "")
@@ -92,6 +92,59 @@ def write(self, message):
     assert msg['content'] == content
 
 
+@pytest.mark.precommit
+@pytest.mark.parametrize(
+    "hf_ov_genai_models", 
+    ["katuni4ka/tiny-random-phi3"],  # this tokenizer is used as a stub only
+    indirect=True
+)
+def test_incremental_integer_token_ids(hf_ov_genai_models):
+    hf_tokenizer, genai_tokenizer = hf_ov_genai_models
+    
+    class CustomIncrementalParser(IncrementalParser):
+        started_reasoning: bool = False
+
+        def parse(self, msg: dict, delta_text: str, delta_tokens = None) -> str:
+            if 'content' not in msg:
+                msg['content'] = ''
+            if 'reasoning_content' not in msg:
+                msg['reasoning_content'] = ''
+
+            if 1 in delta_tokens and not self.started_reasoning:
+                self.started_reasoning = True
+                msg['reasoning_content'] += delta_text
+                delta_text = ''
+            elif 1 in delta_tokens and self.started_reasoning:
+                self.started_reasoning = False
+                delta_text = ''
+            elif self.started_reasoning:
+                msg['reasoning_content'] += delta_text
+                delta_text = ''
+
+            # # Here we are only collecting ordinary text, therefore leave delta_text unchanged.
+            # # msg['content'] += delta_text will happen under the hood
+            return delta_text
+        
+    class CustomStreamer(TextParserStreamer):
+        def write(self, message):
+            msg.update(message)
+            return StreamingStatus.RUNNING
+    streamer = CustomStreamer(genai_tokenizer, parsers=[CustomIncrementalParser()])
+
+    msg = {}
+    # All closing tags </s>, <|/inst|>, <|endoftext|>, ent. in tiny-random-phi3 add strange \x0c\x0c characters 
+    # so we avoid them in this test. 
+    answer = "<s>\nOkay, the user is asking for the answer to 2 + 1.<s>The answer to 2 + 1 is 3."
+    encoded_tokens = genai_tokenizer.encode(answer, add_special_tokens=False).input_ids.data.tolist()[0]
+
+    for token in encoded_tokens:
+        streamer._write([token])
+    streamer.end()
+
+    assert msg['reasoning_content'] == "\nOkay, the user is asking for the answer to 2 + 1"
+    assert msg['content'] == " The answer to 2 + 1 is 3."
+
+
 @pytest.mark.precommit
 @pytest.mark.parametrize(
     "hf_ov_genai_models", 

From 775cf13d17116fb847490df785b4f54bc712477b Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Thu, 23 Oct 2025 01:14:42 +0200
Subject: [PATCH 35/43] store compiled pattern as a member

---
 src/cpp/src/parsers.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp
index b5b08b2f50..ad8a9a1cc1 100644
--- a/src/cpp/src/parsers.cpp
+++ b/src/cpp/src/parsers.cpp
@@ -175,6 +175,7 @@ void ReasoningIncrementalParser::reset() {
 
 class Llama3PythonicToolParser::Llama3PythonicToolParserImpl {
 public:
+    std::regex m_pattern = std::regex(R"(\[(.*?)\])");
     void parse(JsonContainer& message) {
         // Input example
         // string message = "[get_weather(location='New York, NY', unit='celsius')]<|eom_id|>";
@@ -182,8 +183,8 @@ class Llama3PythonicToolParser::Llama3PythonicToolParserImpl {
         // Regex to capture the [...] part
         std::smatch m;
         const std::string& text = message["content"].get_string();
-        std::regex r(R"(\[.*?\])");
-        if (!std::regex_search(text, m, r)) {
+
+        if (!std::regex_search(text, m, m_pattern)) {
             return;
         }
 

From d4bf6bee4c431ce3fe72c31ac516325e87eb19a6 Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Thu, 23 Oct 2025 08:48:55 +0200
Subject: [PATCH 36/43] call parsers for ChatHistory as well; update caches

---
 .github/workflows/linux.yml   | 2 +-
 .github/workflows/mac.yml     | 2 +-
 .github/workflows/windows.yml | 2 +-
 src/cpp/src/llm/pipeline.cpp  | 9 +++++++--
 4 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
index 09b39d4b42..44a439493c 100644
--- a/.github/workflows/linux.yml
+++ b/.github/workflows/linux.yml
@@ -25,7 +25,7 @@ env:
   SCCACHE_CACHE_SIZE: 30G
   SCCACHE_AZURE_KEY_PREFIX: genai/ubuntu/22_04/x64
   HF_HOME: /mount/caches/huggingface/lin
-  OV_CACHE: /mount/caches/huggingface/.ov_cache/lin
+  OV_CACHE: /mount/caches/huggingface/.ov_cache/lin/775cf1/
   OPENVINO_LOG_LEVEL: 4
   GENAI_ARCHIVE_NAME: genai.tar.gz
   GENAI_SAMPLES_NAME: genai_samples.tar.gz
diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml
index 249519057d..fc4b9261aa 100644
--- a/.github/workflows/mac.yml
+++ b/.github/workflows/mac.yml
@@ -22,7 +22,7 @@ env:
   BASE_PRODUCT_TYPE: public_macos_arm64
   CCACHE_MAXSIZE: 500Mi
   HF_HOME: ~/.cache/hf
-  OV_CACHE: ~/.cache/ov_cache/194c936
+  OV_CACHE: ~/.cache/ov_cache/775cf1/
   CLEANUP_CACHE: 1
   OPENVINO_LOG_LEVEL: 4
 
diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
index 0d646578bc..5502fec5c0 100644
--- a/.github/workflows/windows.yml
+++ b/.github/workflows/windows.yml
@@ -23,7 +23,7 @@ env:
   CMAKE_C_COMPILER_LAUNCHER: ccache
   CCACHE_MAXSIZE: 500Mi
   HF_HOME: C:/mount/caches/huggingface/win
-  OV_CACHE: C:/mount/caches/huggingface/.ov_cache/win/
+  OV_CACHE: C:/mount/caches/huggingface/.ov_cache/win/775cf1/
   OPENVINO_LOG_LEVEL: 2  # Windows fails with out of memory because of too verbose logging
   ARTIFACTS_SHARE: '/mount/build-artifacts'
   BASE_PRODUCT_TYPE: public_windows_vs2022
diff --git a/src/cpp/src/llm/pipeline.cpp b/src/cpp/src/llm/pipeline.cpp
index 2e136e0897..fe1222f779 100644
--- a/src/cpp/src/llm/pipeline.cpp
+++ b/src/cpp/src/llm/pipeline.cpp
@@ -324,15 +324,20 @@ DecodedResults LLMPipeline::generate(
         const ChatHistory& history,
         OptionalGenerationConfig generation_config,
         StreamerVariant streamer) {
-    return m_pimpl->generate(history, generation_config, streamer);
+    return run_generate_with_parsers(generation_config, streamer, [&]() -> DecodedResults {
+        return m_pimpl->generate(history, generation_config, streamer);
+    });
 }
 
 DecodedResults LLMPipeline::generate(const ChatHistory& history, const ov::AnyMap& config_map) {
     auto config_arg = utils::get_config_from_map(config_map);
     GenerationConfig config = config_arg.value_or(get_generation_config());
     config.update_generation_config(config_map);
+    auto streamer = utils::get_streamer_from_map(config_map);
 
-    return m_pimpl->generate(history, config, utils::get_streamer_from_map(config_map));
+    return run_generate_with_parsers(config, streamer, [&]() -> DecodedResults {
+        return m_pimpl->generate(history, config, streamer);
+    });
 }
 
 EncodedResults LLMPipeline::generate(

From 8ddc2c42f46cc2ee5596b5f1a7df96bed10dd6ad Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Thu, 23 Oct 2025 09:30:28 +0200
Subject: [PATCH 37/43] use json_container_to_py_object when `parse()` is
 called from Python as well

---
 src/python/py_parsers.cpp | 26 ++++----------------------
 1 file changed, 4 insertions(+), 22 deletions(-)

diff --git a/src/python/py_parsers.cpp b/src/python/py_parsers.cpp
index b92454855c..0352a36554 100644
--- a/src/python/py_parsers.cpp
+++ b/src/python/py_parsers.cpp
@@ -88,18 +88,8 @@ void init_parsers(py::module_& m) {
             [](Parser& self, py::dict& message) {
                 auto msg_cpp = pyutils::py_object_to_json_container(message);
                 self.parse(msg_cpp);
-
-                // TODO: msg = pyutils::json_container_to_py_object(msg_cpp) does not work properly here,
-                py::object json_mod = py::module_::import("json");
-                
-                // since it create a new object instead of updating existing dict.
-                auto json_str = msg_cpp.to_json_string();
-                py::dict result = json_mod.attr("loads")(json_str);
-                
-                // update msg with result
-                for (auto item : result) {
-                    message[item.first] = item.second;
-                }
+                py::dict result = pyutils::json_container_to_py_object(msg_cpp);
+                message.attr("update")(result);
             },
             py::arg("message"),
             "Parse is called with the full text. Returns a dict with parsed content.");
@@ -125,16 +115,8 @@ void init_parsers(py::module_& m) {
                          const std::optional<std::vector<int64_t>>& delta_tokens = std::nullopt) {
             auto msg_cpp = pyutils::py_object_to_json_container(message);
             auto res = self.parse(msg_cpp, delta_text, delta_tokens);
-            auto json_str = msg_cpp.to_json_string();
-            
-            // TODO: msg = pyutils::json_container_to_py_object(msg_cpp) does not work properly here,
-            // since it create a new object instead of updating existing dict.
-            py::object json_mod = py::module_::import("json");
-            py::dict result = json_mod.attr("loads")(json_str);
-            // update msg with result
-            for (auto item : result) {
-                message[item.first] = item.second;
-            }
+            auto result = pyutils::json_container_to_py_object(msg_cpp);
+            message.attr("update")(result);
             return res;
         }, py::arg("message"), py::arg("delta_text"), py::arg("delta_tokens") = std::nullopt,
            "Parse is called every time new text delta is decoded. Returns a string with any additional text to append to the current output.")

From d82c66c6352ff1048cd1ba15351b9b192b3151b5 Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Fri, 24 Oct 2025 13:21:50 +0200
Subject: [PATCH 38/43] add reset() tests, corrected pybinding, added Phi4,
 DeepSeek predefined static parsers

---
 src/cpp/include/openvino/genai/parsers.hpp    | 39 ++++++++---
 src/python/openvino_genai/__init__.py         |  4 +-
 src/python/openvino_genai/__init__.pyi        |  4 +-
 .../openvino_genai/py_openvino_genai.pyi      | 12 +++-
 src/python/py_parsers.cpp                     | 10 ++-
 src/python/py_streamers.cpp                   |  9 ++-
 src/python/py_utils.cpp                       | 11 +++
 tests/python_tests/test_parsers.py            | 68 +++++++++++++++----
 8 files changed, 127 insertions(+), 30 deletions(-)

diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp
index c4163f8201..d72f3b8d62 100644
--- a/src/cpp/include/openvino/genai/parsers.hpp
+++ b/src/cpp/include/openvino/genai/parsers.hpp
@@ -62,6 +62,26 @@ class OPENVINO_GENAI_EXPORTS ReasoningParser : public Parser {
     std::unique_ptr<ReasoningParserImpl> m_impl;
 };
 
+/**
+ * @brief Parser for DeepSeek R1 model reasoning format.
+ *
+ * DeepSeekR1ReasoningParser is configured for the DeepSeek R1 model's reasoning format, which doesn't expect an opening tag.
+ */
+class OPENVINO_GENAI_EXPORTS DeepSeekR1ReasoningParser : public ReasoningParser {
+public:
+    DeepSeekR1ReasoningParser() : ReasoningParser(/*expect_open_tag=*/false) {};
+};
+
+/**
+ * @brief Parser for Phi-4 model reasoning format.
+ *
+ * Phi4ReasoningParser is configured specifically for the Phi-4 model's reasoning format, which expects an opening tag by default.
+ */
+class OPENVINO_GENAI_EXPORTS Phi4ReasoningParser : public ReasoningParser {
+public:
+   Phi4ReasoningParser() : ReasoningParser(/*expect_open_tag=*/true) {};
+};
+
 /**
  * @brief Parser for Llama 3 Pythonic tool calls format.
  *
@@ -72,7 +92,7 @@ class OPENVINO_GENAI_EXPORTS ReasoningParser : public Parser {
  */
 class OPENVINO_GENAI_EXPORTS Llama3PythonicToolParser : public Parser {
 public:
-    explicit Llama3PythonicToolParser();
+    Llama3PythonicToolParser();
     ~Llama3PythonicToolParser();
     
     /**
@@ -98,7 +118,7 @@ class OPENVINO_GENAI_EXPORTS Llama3PythonicToolParser : public Parser {
  */
 class OPENVINO_GENAI_EXPORTS Llama3JsonToolParser : public Parser {
 public:
-    explicit Llama3JsonToolParser();
+    Llama3JsonToolParser();
     ~Llama3JsonToolParser();
     
     /**
@@ -196,26 +216,23 @@ class OPENVINO_GENAI_EXPORTS ReasoningIncrementalParser : public IncrementalPars
 };
 
 /**
- * @brief Specialized incremental parser for DeepSeek R1 model reasoning format.
+ * @brief Incremental parser for DeepSeek R1 model reasoning format.
  *
- * DeepSeekR1ReasoningIncrementalParser is a specialized version of ReasoningIncrementalParser
- * configured specifically for the DeepSeek R1 model's reasoning format, which doesn't expect an opening tag.
+ * DeepSeekR1ReasoningIncrementalParser is configured for the DeepSeek R1 model's reasoning format, which doesn't expect an opening tag.
  */
 class OPENVINO_GENAI_EXPORTS DeepSeekR1ReasoningIncrementalParser : public ReasoningIncrementalParser {
 public:
-    explicit DeepSeekR1ReasoningIncrementalParser() : ReasoningIncrementalParser(/*expect_open_tag=*/false) {};
+    DeepSeekR1ReasoningIncrementalParser() : ReasoningIncrementalParser(/*expect_open_tag=*/false) {};
 };
 
 /**
- * @brief Specialized incremental parser for Phi-4 model reasoning format.
+ * @brief Incremental parser for Phi-4 model reasoning format.
  *
- * Phi4ReasoningIncrementalParser is a specialized version of ReasoningIncrementalParser
- * configured specifically for the Phi-4 model's reasoning format, which typically
- * expects an opening tag by default.
+ * Phi4ReasoningIncrementalParser is configured specifically for the Phi-4 model's reasoning format, which expects an opening tag by default.
  */
 class OPENVINO_GENAI_EXPORTS Phi4ReasoningIncrementalParser : public ReasoningIncrementalParser {
 public:
-    explicit Phi4ReasoningIncrementalParser() : ReasoningIncrementalParser(/*expect_open_tag=*/true) {};
+    Phi4ReasoningIncrementalParser() : ReasoningIncrementalParser(/*expect_open_tag=*/true) {};
 };
 
 }  // namespace genai
diff --git a/src/python/openvino_genai/__init__.py b/src/python/openvino_genai/__init__.py
index 5673b941cb..b10aadd062 100644
--- a/src/python/openvino_genai/__init__.py
+++ b/src/python/openvino_genai/__init__.py
@@ -25,12 +25,14 @@
 from .py_openvino_genai import (
     Parser,
     ReasoningParser,
+    DeepSeekR1ReasoningParser,
+    Phi4ReasoningParser,
     Llama3JsonToolParser,
     Llama3PythonicToolParser,
     IncrementalParser,
     ReasoningIncrementalParser,
-    Phi4ReasoningIncrementalParser,
     DeepSeekR1ReasoningIncrementalParser,
+    Phi4ReasoningIncrementalParser,
 )
 
 __version__ = get_version()
diff --git a/src/python/openvino_genai/__init__.pyi b/src/python/openvino_genai/__init__.pyi
index f92c55f4fd..c1d1f1dc30 100644
--- a/src/python/openvino_genai/__init__.pyi
+++ b/src/python/openvino_genai/__init__.pyi
@@ -16,6 +16,7 @@ from openvino_genai.py_openvino_genai import ContinuousBatchingPipeline
 from openvino_genai.py_openvino_genai import CppStdGenerator
 from openvino_genai.py_openvino_genai import DecodedResults
 from openvino_genai.py_openvino_genai import DeepSeekR1ReasoningIncrementalParser
+from openvino_genai.py_openvino_genai import DeepSeekR1ReasoningParser
 from openvino_genai.py_openvino_genai import EncodedResults
 from openvino_genai.py_openvino_genai import FluxTransformer2DModel
 from openvino_genai.py_openvino_genai import GenerationConfig
@@ -36,6 +37,7 @@ from openvino_genai.py_openvino_genai import Llama3PythonicToolParser
 from openvino_genai.py_openvino_genai import Parser
 from openvino_genai.py_openvino_genai import PerfMetrics
 from openvino_genai.py_openvino_genai import Phi4ReasoningIncrementalParser
+from openvino_genai.py_openvino_genai import Phi4ReasoningParser
 from openvino_genai.py_openvino_genai import RawImageGenerationPerfMetrics
 from openvino_genai.py_openvino_genai import RawPerfMetrics
 from openvino_genai.py_openvino_genai import ReasoningIncrementalParser
@@ -74,5 +76,5 @@ from openvino_genai.py_openvino_genai import draft_model
 from openvino_genai.py_openvino_genai import get_version
 import os as os
 from . import py_openvino_genai
-__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningIncrementalParser', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParser', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama3JsonToolParser', 'Llama3PythonicToolParser', 'Parser', 'PerfMetrics', 'Phi4ReasoningIncrementalParser', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'ReasoningIncrementalParser', 'ReasoningParser', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai']
+__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningIncrementalParser', 'DeepSeekR1ReasoningParser', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParser', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama3JsonToolParser', 'Llama3PythonicToolParser', 'Parser', 'PerfMetrics', 'Phi4ReasoningIncrementalParser', 'Phi4ReasoningParser', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'ReasoningIncrementalParser', 'ReasoningParser', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai']
 __version__: str
diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi
index c1c8fd612f..0444146334 100644
--- a/src/python/openvino_genai/py_openvino_genai.pyi
+++ b/src/python/openvino_genai/py_openvino_genai.pyi
@@ -5,7 +5,7 @@ from __future__ import annotations
 import collections.abc
 import openvino._pyopenvino
 import typing
-__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningIncrementalParser', 'EncodedGenerationResult', 'EncodedResults', 'ExtendedPerfMetrics', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParser', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama3JsonToolParser', 'Llama3PythonicToolParser', 'MeanStdPair', 'Parser', 'PerfMetrics', 'Phi4ReasoningIncrementalParser', 'PipelineMetrics', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'ReasoningIncrementalParser', 'ReasoningParser', 'SD3Transformer2DModel', 'SDPerModelsPerfMetrics', 'SDPerfMetrics', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'SummaryStats', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version']
+__all__: list[str] = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChatHistory', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'DeepSeekR1ReasoningIncrementalParser', 'DeepSeekR1ReasoningParser', 'EncodedGenerationResult', 'EncodedResults', 'ExtendedPerfMetrics', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'IncrementalParser', 'InpaintingPipeline', 'KVCrushAnchorPointMode', 'KVCrushConfig', 'LLMPipeline', 'Llama3JsonToolParser', 'Llama3PythonicToolParser', 'MeanStdPair', 'Parser', 'PerfMetrics', 'Phi4ReasoningIncrementalParser', 'Phi4ReasoningParser', 'PipelineMetrics', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'ReasoningIncrementalParser', 'ReasoningParser', 'SD3Transformer2DModel', 'SDPerModelsPerfMetrics', 'SDPerfMetrics', 'Scheduler', 'SchedulerConfig', 'SparseAttentionConfig', 'SparseAttentionMode', 'SpeechGenerationConfig', 'SpeechGenerationPerfMetrics', 'StopCriteria', 'StreamerBase', 'StreamingStatus', 'StructuralTagItem', 'StructuralTagsConfig', 'StructuredOutputConfig', 'SummaryStats', 'T5EncoderModel', 'Text2ImagePipeline', 'Text2SpeechDecodedResults', 'Text2SpeechPipeline', 'TextEmbeddingPipeline', 'TextParserStreamer', 'TextRerankPipeline', 'TextStreamer', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version']
 class Adapter:
     """
     Immutable LoRA Adapter that carries the adaptation matrices and serves as unique adapter identifier.
@@ -585,6 +585,9 @@ class DecodedResults:
 class DeepSeekR1ReasoningIncrementalParser(IncrementalParser):
     def __init__(self) -> None:
         ...
+class DeepSeekR1ReasoningParser(ReasoningParser):
+    def __init__(self) -> None:
+        ...
 class EncodedGenerationResult:
     """
     
@@ -1962,6 +1965,9 @@ class PerfMetrics:
 class Phi4ReasoningIncrementalParser(IncrementalParser):
     def __init__(self) -> None:
         ...
+class Phi4ReasoningParser(ReasoningParser):
+    def __init__(self) -> None:
+        ...
 class PipelineMetrics:
     """
     
@@ -3422,6 +3428,10 @@ class TextParserStreamer(TextStreamer):
         """
         Returns the accumulated message.
         """
+    def reset(self) -> None:
+        """
+        Resets the internal state of the parser streamer.
+        """
 class TextRerankPipeline:
     """
     Text rerank pipeline
diff --git a/src/python/py_parsers.cpp b/src/python/py_parsers.cpp
index 0352a36554..9eae3c5e93 100644
--- a/src/python/py_parsers.cpp
+++ b/src/python/py_parsers.cpp
@@ -17,8 +17,10 @@ using ov::genai::IncrementalParser;
 using ov::genai::Parser;
 using ov::genai::ReasoningParser;
 using ov::genai::ReasoningIncrementalParser;
-using ov::genai::Phi4ReasoningIncrementalParser;
+using ov::genai::DeepSeekR1ReasoningParser;
 using ov::genai::DeepSeekR1ReasoningIncrementalParser;
+using ov::genai::Phi4ReasoningIncrementalParser;
+using ov::genai::Phi4ReasoningParser;
 using ov::genai::JsonContainer;
 using ov::genai::Llama3JsonToolParser;
 using ov::genai::Llama3PythonicToolParser;
@@ -101,6 +103,12 @@ void init_parsers(py::module_& m) {
                 py::arg("open_tag") = "<think>",
                 py::arg("close_tag") = "</think>");
 
+    py::class_<DeepSeekR1ReasoningParser, std::shared_ptr<DeepSeekR1ReasoningParser>, ReasoningParser>(m, "DeepSeekR1ReasoningParser")
+        .def(py::init<>());
+    
+    py::class_<Phi4ReasoningParser, std::shared_ptr<Phi4ReasoningParser>, ReasoningParser>(m, "Phi4ReasoningParser")
+        .def(py::init<>());
+    
     py::class_<Llama3JsonToolParser, std::shared_ptr<Llama3JsonToolParser>, Parser>(m, "Llama3JsonToolParser")
         .def(py::init<>());
 
diff --git a/src/python/py_streamers.cpp b/src/python/py_streamers.cpp
index ba596ead92..f2df853e8b 100644
--- a/src/python/py_streamers.cpp
+++ b/src/python/py_streamers.cpp
@@ -85,8 +85,9 @@ class ConstructableTextParserStreamer: public TextParserStreamer {
     StreamingStatus write(JsonContainer& message) override {
         // Since c++ calls function with JsonContainer while python override expects py::dict, 
         // this function is a wrapper to call Python implementation of 'write' with py::dict
-        py::dict message_py;
-        message_py = pyutils::json_container_to_py_object(message);
+        py::gil_scoped_acquire acquire;
+
+        py::dict message_py = pyutils::json_container_to_py_object(message);
         
         // Call python implementation which accepts py::dict instead of JsonContainer
         // And convert back the resulting message back to JsonContainer
@@ -172,5 +173,7 @@ void init_streamers(py::module_& m) {
             [](TextParserStreamer& self) -> py::dict{
                 return pyutils::json_container_to_py_object(self.get_parsed_message());
 
-            }, "Returns the accumulated message.");
+            }, "Returns the accumulated message.")
+        
+        .def("reset", &TextParserStreamer::reset, "Resets the internal state of the parser streamer.");
 }
diff --git a/src/python/py_utils.cpp b/src/python/py_utils.cpp
index bee695ac0b..ffb8f7849f 100644
--- a/src/python/py_utils.cpp
+++ b/src/python/py_utils.cpp
@@ -149,6 +149,17 @@ ov::Any py_object_to_any(const py::object& py_obj, std::string property_name) {
                 }
             }
             return structural_tags;
+        } else if (property_name == "parsers") {
+            auto property_list = py_obj.cast<py::list>();
+            std::vector<std::shared_ptr<ov::genai::Parser>> parsers;
+            for (const auto& item : property_list) {
+                if (py::isinstance<ov::genai::Parser>(item)) {
+                    parsers.push_back(item.cast<std::shared_ptr<ov::genai::Parser>>());
+                } else {
+                    OPENVINO_THROW("Incorrect value in \"", property_name, "\". Expected Parser.");
+                }
+            }
+            return parsers;
         } else {
             auto _list = py_obj.cast<py::list>();
             enum class PY_TYPE : int { UNKNOWN = 0, STR, INT, FLOAT, BOOL, PARTIAL_SHAPE, TENSOR, DICT};
diff --git a/tests/python_tests/test_parsers.py b/tests/python_tests/test_parsers.py
index 53b79a027e..8bc23c1f7d 100644
--- a/tests/python_tests/test_parsers.py
+++ b/tests/python_tests/test_parsers.py
@@ -4,7 +4,7 @@
 from utils.hugging_face import convert_and_save_tokenizer, download_and_convert_model
 from utils.ov_genai_pipelines import create_ov_pipeline
 import pytest
-from openvino_genai import Tokenizer, IncrementalParser, Parser, TextParserStreamer, StreamingStatus, Llama3JsonToolParser, Phi4ReasoningIncrementalParser, DeepSeekR1ReasoningIncrementalParser, GenerationConfig, ReasoningIncrementalParser
+from openvino_genai import Tokenizer, IncrementalParser, Parser, TextParserStreamer, StreamingStatus, Llama3JsonToolParser, Phi4ReasoningParser, Phi4ReasoningIncrementalParser, DeepSeekR1ReasoningIncrementalParser, GenerationConfig, ReasoningIncrementalParser
 from transformers import AutoTokenizer
 import re
 
@@ -205,6 +205,7 @@ def test_incremental_phi4_reason_parser_nostreamer(answer):
 
 @pytest.mark.precommit
 @pytest.mark.parametrize("keep_original_content", [True, False])
+@pytest.mark.parametrize("do_reset", [True, False])
 @pytest.mark.parametrize(
     "hf_ov_genai_models", 
     ["katuni4ka/tiny-random-phi3"],  # this tokenizer is used as a stub only
@@ -213,7 +214,7 @@ def test_incremental_phi4_reason_parser_nostreamer(answer):
 @pytest.mark.parametrize("answer", [
     "<think>\nOkay, the user is asking for the answer to 2 + 1.</think>\n\nThe answer to 2 + 1 is \boxed{3}.",
 ])
-def test_reasoning_parser_cut_content(hf_ov_genai_models, answer, keep_original_content):
+def test_reasoning_parser_cut_content(hf_ov_genai_models, answer, keep_original_content, do_reset):
     hf_tokenizer, genai_tokenizer = hf_ov_genai_models
     
     stream_string = re.split(r"(\s+)", answer)
@@ -223,16 +224,26 @@ def write(self, message):
             msg.update(message)
             return StreamingStatus.RUNNING
     streamer = CustomStreamer(genai_tokenizer, parsers=[ReasoningIncrementalParser(expect_open_tag=True, keep_original_content=keep_original_content)])
-    
-    msg = {}
-    for subword in stream_string:
-        streamer._write(subword)
 
-    think_content = answer.split("</think>")[0].replace("<think>", "")
-    content = answer
-    
-    assert msg['reasoning_content'] == think_content
-    assert msg['content'] == (content if keep_original_content else "\n\nThe answer to 2 + 1 is \boxed{3}.")
+    num_runs = 2
+    for i in range(num_runs):
+        if do_reset:
+            streamer.reset()
+        
+        msg = {}
+        for subword in stream_string:
+            streamer._write(subword)
+
+        think_content = answer.split("</think>")[0].replace("<think>", "")
+        content = answer
+        
+        if do_reset:
+            # If has been reset, check that content is parsed correctly
+            assert msg['reasoning_content'] == think_content
+            assert msg['content'] == (content if keep_original_content else "\n\nThe answer to 2 + 1 is \boxed{3}.")
+        else:
+            # If has not been reset(), then content will contine to accumulate thinking parts from the next runs
+            msg['content'].find("<think>")
 
 
 def test_incremental_deepseek_parser():
@@ -362,4 +373,37 @@ def parse(self, msg: dict):
     assert res.parsed[0]['reasoning_content'] != ""
     assert res.parsed[0]['reasoning_content'] == think_text
 
-# TODO; add test for reseting incremental parser at generation start
+
+@pytest.mark.parametrize("model_id", ["microsoft/Phi-4-mini-reasoning"])
+@pytest.mark.nightly
+def test_reset_incremental_parser(tmp_path, model_id):
+    _, _, models_path = download_and_convert_model(model_id, padding_side="left")
+    pipe = create_ov_pipeline(models_path)
+    tok = pipe.get_tokenizer()
+    
+    class CustomStreamer(TextParserStreamer):
+        def write(self, message):
+            return StreamingStatus.RUNNING
+    streamer = CustomStreamer(tok, parsers=[Phi4ReasoningIncrementalParser()])
+
+    prompt = "Please say \"hello\""
+    res = pipe.generate([prompt], max_new_tokens=600, parsers=[Phi4ReasoningParser()])
+    
+    # extract manually reasoning content from the parsed result
+    content = res.texts[0]
+    think_start = content.find("<think>")
+    think_end = content.find("</think>")
+    if think_start != -1 and think_end != -1 and think_end > think_start:
+        think_text = content[think_start + len("<think>"):think_end]
+    
+    assert 'reasoning_content' in res.parsed[0]
+    assert res.parsed[0]['reasoning_content'] != ""
+    assert res.parsed[0]['reasoning_content'] == think_text
+    
+    res_streamer_1 = pipe.generate([prompt], max_new_tokens=600, streamer=streamer)
+    res_streamer_2 = pipe.generate([prompt], max_new_tokens=600, streamer=streamer)
+    # Check that results from streamer generation are the same as from non-streamer generation.
+    assert res_streamer_1.parsed == res.parsed
+    
+    # Also asserts that resetting streamer between generations works correctly.
+    assert res_streamer_2.parsed == res.parsed

From 99ed0a98fe18c887bb38710b750d69072be02c1c Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Fri, 24 Oct 2025 13:39:15 +0200
Subject: [PATCH 39/43] wip

---
 src/cpp/src/parsers.cpp | 169 +++++++++++++++++++++-------------------
 1 file changed, 89 insertions(+), 80 deletions(-)

diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp
index ad8a9a1cc1..15a5e965d6 100644
--- a/src/cpp/src/parsers.cpp
+++ b/src/cpp/src/parsers.cpp
@@ -18,7 +18,7 @@ class ReasoningIncrementalParser::ReasoningParserImpl {
     // Values with default member initializers are reset on each reset() call.
     bool m_first_run = true;
     bool m_think_tag_opened = false;
-    std::string m_text_cache = "";
+    std::string m_text_cache;
     bool m_deactivated = false;
 public:
     ReasoningParserImpl() = default;
@@ -30,7 +30,9 @@ class ReasoningIncrementalParser::ReasoningParserImpl {
         : m_expect_open_tag(expect_open_tag),
           m_keep_original_content(keep_original_content),
           m_open_tag(open_tag),
-          m_close_tag(close_tag) {}
+          m_close_tag(close_tag) {
+        m_text_cache.reserve(close_tag.size());
+    }
 
     std::string parse(
         JsonContainer&  message,
@@ -40,11 +42,15 @@ class ReasoningIncrementalParser::ReasoningParserImpl {
         if (m_deactivated) {
             return delta_text;
         }
-        if (!m_expect_open_tag && m_first_run) {
-            m_think_tag_opened = true;
+        
+        if (m_first_run) {
+            m_first_run = false;
+            if (!m_expect_open_tag) {
+                m_think_tag_opened = true;
+            }
         }
-        m_first_run = false;
 
+        // Initialize message fields if needed
         if (!message.contains("reasoning_content")) {
             message["reasoning_content"] = "";
         }
@@ -52,105 +58,108 @@ class ReasoningIncrementalParser::ReasoningParserImpl {
             message["content"] = "";
         }
         
-
-        auto txt_chunk = m_text_cache + delta_text;
+        // Combine cached text with new delta
+        m_text_cache += delta_text;
+        const std::string& txt_chunk = m_text_cache;
+        
         auto reason_str = message["reasoning_content"].get_string();
-        auto content_str = message["content"].get_string();
 
-        if (!m_think_tag_opened && txt_chunk.find(m_open_tag) != std::string::npos && m_expect_open_tag) {
-            // Thinking has started
-            auto open_idx = txt_chunk.find(m_open_tag);
-            
-            reason_str += txt_chunk.substr(open_idx + m_open_tag.size(), txt_chunk.size() - (open_idx + m_open_tag.size()));
-            if (!m_keep_original_content) {
-                delta_text = "";
+        if (!m_think_tag_opened && m_expect_open_tag) {
+            // Look for opening tag
+            size_t open_idx = txt_chunk.find(m_open_tag);
+            if (open_idx != std::string::npos) {
+                // Thinking has started
+                m_think_tag_opened = true;
+                size_t content_start = open_idx + m_open_tag.size();
+                
+                // Check if closing tag is also present
+                size_t close_idx = txt_chunk.find(m_close_tag, content_start);
+                if (close_idx != std::string::npos) {
+                    // Both tags in same chunk
+                    reason_str = txt_chunk.substr(content_start, close_idx - content_start);
+                    message["reasoning_content"] = reason_str;
+                    
+                    if (!m_keep_original_content) {
+                        delta_text = txt_chunk.substr(close_idx + m_close_tag.size());
+                    }
+                    
+                    m_think_tag_opened = false;
+                    m_deactivated = true;
+                    m_text_cache.clear();
+                } else {
+                    // Only opening tag found
+                    reason_str += txt_chunk.substr(content_start);
+                    message["reasoning_content"] = reason_str;
+                    
+                    if (!m_keep_original_content) {
+                        delta_text.clear();
+                    }
+                    m_text_cache.clear();
+                }
+                return delta_text;
             }
-            
-            m_think_tag_opened = true;
-            message["reasoning_content"] = reason_str;
-            m_text_cache = "";
-
-            if (txt_chunk.find(m_close_tag) != std::string::npos) {
-                // If <think> and </think> are in the same txt_chunk + delta_text
-                auto close_idx = txt_chunk.find(m_close_tag);
-                reason_str = txt_chunk.substr(open_idx + m_open_tag.size(), close_idx - (open_idx + m_open_tag.size()));
-                content_str = txt_chunk.substr(close_idx + m_close_tag.size(), txt_chunk.size() - (close_idx + m_close_tag.size()));
+            // Opening tag not found, keep accumulating
+            return delta_text;
+        } 
+        
+        if (m_think_tag_opened) {
+            // Look for closing tag
+            size_t close_idx = txt_chunk.find(m_close_tag);
+            if (close_idx != std::string::npos) {
+                // Thinking tag was closed
+                reason_str += txt_chunk.substr(0, close_idx);
+                message["reasoning_content"] = reason_str;
+                
                 if (!m_keep_original_content) {
-                    delta_text = content_str;
+                    delta_text = txt_chunk.substr(close_idx + m_close_tag.size());
                 }
+                
+                m_text_cache.clear();
                 m_think_tag_opened = false;
                 m_deactivated = true;
-                message["reasoning_content"] = reason_str;
-            }
-        } else if (m_think_tag_opened && txt_chunk.find(m_close_tag) != std::string::npos) {
-            // Thinking tag was closed
-            auto close_idx = txt_chunk.find(m_close_tag);
-
-            reason_str += txt_chunk.substr(0, close_idx);
-            if (!m_keep_original_content) {
-                // Cut from the txt_chunk which is before </think> and leave only what is after </think>.
-                // Example if m_text_cache + delta_text = "...some text</th" + "ink>Answer is 3" = "...some text</think>Answer is 3"
-                // we want to keep in delta_txt only "Answer is 3". 
-                // We can operate with txt_chunk since final characters closing the tag ("ink>") are always in delta_text.
-                delta_text = txt_chunk.substr(close_idx + m_close_tag.size(), txt_chunk.size() - (close_idx + m_close_tag.size()));
+                return delta_text;
             }
-
-            message["reasoning_content"] = reason_str;
-            m_text_cache = "";
-            m_think_tag_opened = false;
-            m_deactivated = true;
-        } else if (m_think_tag_opened) {
-            // Thinking tag was already opened and not closed yet
             
-            // If we have subsequently "sdf</th", "i", "nk> The"
-            // Then we put "sdf" to reason_str and "</th" to m_text_cache since it's a substring of close tag "</think>"
-            // then we put "i" to m_text_cache since m_text_cache + delta_text = "</thi" is a substring of "</think>"
-            // then (in the closing tag IF-block) we leave only " The" in delta_text.
-
-            // If we have "ing. <", " 20 ", "40>"
-            // Then we put "ing. " to reason_str and "<" to m_text_cache since it's a substring of close tag "</think>"
-            // but since continuation " 20 " is not a substring of "</think>", we will end up in this IF-block again
-            // and put " 20 " to reason_str and clear m_text_cache.
-
-            // number of characters from the end of txt_chunk which can be part of the closing tag
-            size_t num_chars_to_keep = 0; 
-            // We must be sure that no chunks with the closing tag are included to reason_str.
-            for (size_t i = txt_chunk.size(); i >= 1; --i) {
-                // Get the substring of the i last characters of txt_chunk
-                auto suffix = txt_chunk.substr(txt_chunk.size() - i, i);
-                // If this suffix is a prefix of m_close_tag, we need to keep it in the cache.
-                if (m_close_tag.find(suffix) == 0) {
-                    num_chars_to_keep = i;
-                    break;
-                }
-            }
-
-            // If the suffix is a prefix of m_close_tag, we store it in the cache to detect if </think> is split between several delta_text pieces.
+            // Closing tag not found - check if end might be partial match
+            size_t num_chars_to_keep = find_prefix_match_length(txt_chunk, m_close_tag);
+            
             if (num_chars_to_keep > 0) {
-                m_text_cache = txt_chunk.substr(txt_chunk.size() - num_chars_to_keep, num_chars_to_keep);
+                // Keep potential partial match in cache
                 reason_str += txt_chunk.substr(0, txt_chunk.size() - num_chars_to_keep);
+                message["reasoning_content"] = reason_str;
+                m_text_cache = txt_chunk.substr(txt_chunk.size() - num_chars_to_keep);
             } else {
+                // No partial match - add all to reasoning
                 reason_str += txt_chunk;
-                m_text_cache = "";
+                message["reasoning_content"] = reason_str;
+                m_text_cache.clear();
             }
-
+            
             if (!m_keep_original_content) {
-                delta_text = "";
+                delta_text.clear();
             }
-            message["reasoning_content"] = reason_str;
-        } else {
-            // Think tag was not opened yet and not found in the current delta_text.
-            // Accumulate text in the cache to detect if <think> is split between several delta_text pieces.
-            m_text_cache += delta_text;
         }
+        // else: accumulating text before opening tag
         
         return delta_text;
     }
 
+    // Find the longest suffix of txt that is a prefix of close_tag
+    size_t find_prefix_match_length(const std::string& txt, const std::string& close_tag) const {
+        size_t max_check = std::min(txt.size(), close_tag.size() - 1);
+        for (size_t len = max_check; len >= 1; --len) {
+            if (txt.compare(txt.size() - len, len, close_tag, 0, len) == 0) {
+                return len;
+            }
+        }
+        return 0;
+    }
+
+public:
     void reset() {
         m_first_run = true;
         m_think_tag_opened = false;
-        m_text_cache = "";
+        m_text_cache.clear();
         m_deactivated = false;
     }
 };

From fbd57e39bef4f2a6b5fc01fa7cb271042a99d4af Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Fri, 24 Oct 2025 13:46:32 +0200
Subject: [PATCH 40/43] optimize parse()

---
 src/cpp/src/parsers.cpp | 252 +++++++++++++++++++++++-----------------
 1 file changed, 144 insertions(+), 108 deletions(-)

diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp
index 15a5e965d6..cd3ca11adc 100644
--- a/src/cpp/src/parsers.cpp
+++ b/src/cpp/src/parsers.cpp
@@ -18,8 +18,120 @@ class ReasoningIncrementalParser::ReasoningParserImpl {
     // Values with default member initializers are reset on each reset() call.
     bool m_first_run = true;
     bool m_think_tag_opened = false;
-    std::string m_text_cache;
+    std::string m_text_cache = "";
     bool m_deactivated = false;
+    
+    /**
+     * @brief Ensure required fields exist in the message container.
+     */
+    void ensure_message_fields(JsonContainer& message) {
+        if (!message.contains("reasoning_content")) {
+            message["reasoning_content"] = "";
+        }
+        if (!message.contains("content")) {
+            message["content"] = "";
+        }
+    }
+
+    /**
+     * @brief Find the longest suffix of text that is a prefix of the close tag.
+     * 
+     * This is used to detect if the close tag is split across multiple chunks.
+     * For example, if text ends with "</th" and m_close_tag is "</think>", 
+     * this returns 4 (the length of "</th").
+     * 
+     * @param text The text to check (using string_view for efficient read-only access)
+     * @return The number of characters at the end of text that match the start of m_close_tag
+     */
+    size_t find_close_tag_prefix_length(std::string_view text) const {
+        const size_t max_check = std::min(text.size(), m_close_tag.size());
+        
+        for (size_t i = max_check; i >= 1; --i) {
+            // Compare the last i characters of text with the first i characters of m_close_tag
+            if (text.compare(text.size() - i, i, m_close_tag, 0, i) == 0) {
+                return i;
+            }
+        }
+        return 0;
+    }
+
+    /**
+     * @brief Handle the case where both open and close tags are found in the same chunk.
+     */
+    void handle_complete_reasoning(JsonContainer& message, std::string_view txt_chunk,
+                                   size_t open_idx, size_t close_idx, std::string& delta_text) {
+        // Extract reasoning content between tags
+        message["reasoning_content"] = std::string(txt_chunk.substr(open_idx + m_open_tag.size(), 
+                                                                    close_idx - (open_idx + m_open_tag.size())));
+        
+        if (!m_keep_original_content) {
+            delta_text = std::string(txt_chunk.substr(close_idx + m_close_tag.size()));
+        }
+        
+        m_think_tag_opened = false;
+        m_deactivated = true;
+        m_text_cache.clear();
+    }
+
+    /**
+     * @brief Handle the case where only the open tag is found.
+     */
+    void handle_open_tag(JsonContainer& message, std::string& reason_str, 
+                        std::string_view txt_chunk, size_t open_idx, std::string& delta_text) {
+        // Start accumulating reasoning content
+        reason_str.append(txt_chunk.substr(open_idx + m_open_tag.size()));
+        message["reasoning_content"] = std::move(reason_str);
+        
+        if (!m_keep_original_content) {
+            delta_text.clear();
+        }
+        
+        m_think_tag_opened = true;
+        m_text_cache.clear();
+    }
+
+    /**
+     * @brief Handle the case where the close tag is found.
+     */
+    void handle_close_tag(JsonContainer& message, std::string& reason_str,
+                         std::string_view txt_chunk, size_t close_idx, std::string& delta_text) {
+        // Append text before close tag to reasoning content
+        reason_str.append(txt_chunk.substr(0, close_idx));
+        message["reasoning_content"] = std::move(reason_str);
+        
+        if (!m_keep_original_content) {
+            delta_text = std::string(txt_chunk.substr(close_idx + m_close_tag.size()));
+        }
+
+        m_text_cache.clear();
+        m_think_tag_opened = false;
+        m_deactivated = true;
+    }
+
+    /**
+     * @brief Handle accumulating text while inside reasoning tags.
+     */
+    void handle_inside_reasoning(JsonContainer& message, std::string& reason_str,
+                                std::string_view txt_chunk, std::string& delta_text) {
+        // Find if the end of txt_chunk might be the start of a close tag
+        const size_t num_chars_to_keep = find_close_tag_prefix_length(txt_chunk);
+        
+        if (num_chars_to_keep > 0) {
+            // Keep potential partial close tag in cache
+            m_text_cache = std::string(txt_chunk.substr(txt_chunk.size() - num_chars_to_keep));
+            reason_str.append(txt_chunk.substr(0, txt_chunk.size() - num_chars_to_keep));
+        } else {
+            // No partial close tag, accumulate all text
+            reason_str.append(txt_chunk);
+            m_text_cache.clear();
+        }
+
+        if (!m_keep_original_content) {
+            delta_text.clear();
+        }
+        message["reasoning_content"] = std::move(reason_str);
+    }
+
 public:
     ReasoningParserImpl() = default;
     
@@ -30,9 +142,7 @@ class ReasoningIncrementalParser::ReasoningParserImpl {
         : m_expect_open_tag(expect_open_tag),
           m_keep_original_content(keep_original_content),
           m_open_tag(open_tag),
-          m_close_tag(close_tag) {
-        m_text_cache.reserve(close_tag.size());
-    }
+          m_close_tag(close_tag) {}
 
     std::string parse(
         JsonContainer&  message,
@@ -42,124 +152,50 @@ class ReasoningIncrementalParser::ReasoningParserImpl {
         if (m_deactivated) {
             return delta_text;
         }
-        
-        if (m_first_run) {
-            m_first_run = false;
-            if (!m_expect_open_tag) {
-                m_think_tag_opened = true;
-            }
+        if (!m_expect_open_tag && m_first_run) {
+            m_think_tag_opened = true;
         }
+        m_first_run = false;
 
-        // Initialize message fields if needed
-        if (!message.contains("reasoning_content")) {
-            message["reasoning_content"] = "";
-        }
-        if (!message.contains("content")) {
-            message["content"] = "";
-        }
-        
-        // Combine cached text with new delta
-        m_text_cache += delta_text;
-        const std::string& txt_chunk = m_text_cache;
+        ensure_message_fields(message);
         
-        auto reason_str = message["reasoning_content"].get_string();
-
-        if (!m_think_tag_opened && m_expect_open_tag) {
-            // Look for opening tag
-            size_t open_idx = txt_chunk.find(m_open_tag);
-            if (open_idx != std::string::npos) {
-                // Thinking has started
-                m_think_tag_opened = true;
-                size_t content_start = open_idx + m_open_tag.size();
-                
-                // Check if closing tag is also present
-                size_t close_idx = txt_chunk.find(m_close_tag, content_start);
-                if (close_idx != std::string::npos) {
-                    // Both tags in same chunk
-                    reason_str = txt_chunk.substr(content_start, close_idx - content_start);
-                    message["reasoning_content"] = reason_str;
-                    
-                    if (!m_keep_original_content) {
-                        delta_text = txt_chunk.substr(close_idx + m_close_tag.size());
-                    }
-                    
-                    m_think_tag_opened = false;
-                    m_deactivated = true;
-                    m_text_cache.clear();
-                } else {
-                    // Only opening tag found
-                    reason_str += txt_chunk.substr(content_start);
-                    message["reasoning_content"] = reason_str;
-                    
-                    if (!m_keep_original_content) {
-                        delta_text.clear();
-                    }
-                    m_text_cache.clear();
-                }
-                return delta_text;
-            }
-            // Opening tag not found, keep accumulating
-            return delta_text;
-        } 
-        
-        if (m_think_tag_opened) {
-            // Look for closing tag
-            size_t close_idx = txt_chunk.find(m_close_tag);
-            if (close_idx != std::string::npos) {
-                // Thinking tag was closed
-                reason_str += txt_chunk.substr(0, close_idx);
-                message["reasoning_content"] = reason_str;
-                
-                if (!m_keep_original_content) {
-                    delta_text = txt_chunk.substr(close_idx + m_close_tag.size());
-                }
-                
-                m_text_cache.clear();
-                m_think_tag_opened = false;
-                m_deactivated = true;
-                return delta_text;
-            }
-            
-            // Closing tag not found - check if end might be partial match
-            size_t num_chars_to_keep = find_prefix_match_length(txt_chunk, m_close_tag);
+        const std::string txt_chunk = m_text_cache + delta_text;
+        std::string reason_str;
+        if (message.contains("reasoning_content")) {
+            reason_str = std::move(message["reasoning_content"].get_string());
+        }
+
+        // Cache find() results to avoid redundant searches
+        const auto open_idx = txt_chunk.find(m_open_tag);
+        const auto close_idx = txt_chunk.find(m_close_tag);
+
+        if (!m_think_tag_opened && open_idx != std::string::npos && m_expect_open_tag) {
+            // Check if close tag is also present after the open tag
+            const auto close_idx_after_open = (close_idx != std::string::npos && close_idx > open_idx) 
+                                               ? close_idx : std::string::npos;
             
-            if (num_chars_to_keep > 0) {
-                // Keep potential partial match in cache
-                reason_str += txt_chunk.substr(0, txt_chunk.size() - num_chars_to_keep);
-                message["reasoning_content"] = reason_str;
-                m_text_cache = txt_chunk.substr(txt_chunk.size() - num_chars_to_keep);
+            if (close_idx_after_open != std::string::npos) {
+                handle_complete_reasoning(message, txt_chunk, open_idx, close_idx_after_open, delta_text);
             } else {
-                // No partial match - add all to reasoning
-                reason_str += txt_chunk;
-                message["reasoning_content"] = reason_str;
-                m_text_cache.clear();
-            }
-            
-            if (!m_keep_original_content) {
-                delta_text.clear();
+                handle_open_tag(message, reason_str, txt_chunk, open_idx, delta_text);
             }
+        } else if (m_think_tag_opened && close_idx != std::string::npos) {
+            handle_close_tag(message, reason_str, txt_chunk, close_idx, delta_text);
+        } else if (m_think_tag_opened) {
+            handle_inside_reasoning(message, reason_str, txt_chunk, delta_text);
+        } else {
+            // Think tag was not opened yet and not found in the current delta_text.
+            // Accumulate text in the cache to detect if <think> is split between several delta_text pieces.
+            m_text_cache += delta_text;
         }
-        // else: accumulating text before opening tag
         
         return delta_text;
     }
 
-    // Find the longest suffix of txt that is a prefix of close_tag
-    size_t find_prefix_match_length(const std::string& txt, const std::string& close_tag) const {
-        size_t max_check = std::min(txt.size(), close_tag.size() - 1);
-        for (size_t len = max_check; len >= 1; --len) {
-            if (txt.compare(txt.size() - len, len, close_tag, 0, len) == 0) {
-                return len;
-            }
-        }
-        return 0;
-    }
-
-public:
     void reset() {
         m_first_run = true;
         m_think_tag_opened = false;
-        m_text_cache.clear();
+        m_text_cache = "";
         m_deactivated = false;
     }
 };

From 27b7bae9b1daa93b3283e29cb744289ce90dbb90 Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@gmail.com>
Date: Fri, 24 Oct 2025 16:07:39 +0200
Subject: [PATCH 41/43] Apply suggestions from code review

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 src/cpp/include/openvino/genai/parsers.hpp | 21 +++++++++++++++++++++
 src/cpp/src/llm/pipeline.cpp               |  1 +
 src/cpp/src/text_streamer.cpp              |  7 +++----
 src/python/py_generation_config.cpp        |  2 +-
 src/python/py_streamers.cpp                |  2 +-
 5 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/src/cpp/include/openvino/genai/parsers.hpp b/src/cpp/include/openvino/genai/parsers.hpp
index d72f3b8d62..156d158aca 100644
--- a/src/cpp/include/openvino/genai/parsers.hpp
+++ b/src/cpp/include/openvino/genai/parsers.hpp
@@ -137,6 +137,27 @@ class OPENVINO_GENAI_EXPORTS Llama3JsonToolParser : public Parser {
 
 /**
  * @brief Abstract base class for incremental parsers that process text during streaming.
+ *
+ * Derived classes must implement both the `parse()` and `reset()` methods, as these are pure virtual.
+ *
+ * Use `IncrementalParser` when you need to process text as it is generated (e.g., in streaming scenarios),
+ * handling partial content and maintaining internal state between increments. Use `Parser` when you only
+ * need to process the complete text after generation has finished.
+ *
+ * Example:
+ * @code
+ * class MyIncrementalParser : public ov::genai::IncrementalParser {
+ * public:
+ *     std::string parse(JsonContainer& message, std::string& delta_text,
+ *                       const std::optional<std::vector<int64_t>>& delta_tokens = std::nullopt) override {
+ *         // Implement incremental parsing logic here
+ *         return delta_text; // Example: simply return the input
+ *     }
+ *     void reset() override {
+ *         // Reset internal state here
+ *     }
+ * };
+ * @endcode
  */
 class OPENVINO_GENAI_EXPORTS IncrementalParser {
 public:
diff --git a/src/cpp/src/llm/pipeline.cpp b/src/cpp/src/llm/pipeline.cpp
index fe1222f779..de05b5b950 100644
--- a/src/cpp/src/llm/pipeline.cpp
+++ b/src/cpp/src/llm/pipeline.cpp
@@ -30,6 +30,7 @@ ov::genai::DecodedResults run_generate_with_parsers(const ov::genai::OptionalGen
     }
 
     // determine from generation config when 'need_to_reset_parser' will be available
+    // TODO: Determine 'need_to_reset_parser' from generation_config when available.
     bool need_to_reset_parser = true;
     if (parser_streamer && need_to_reset_parser) {
         parser_streamer->reset();
diff --git a/src/cpp/src/text_streamer.cpp b/src/cpp/src/text_streamer.cpp
index fca30b7aea..ee9db529cf 100644
--- a/src/cpp/src/text_streamer.cpp
+++ b/src/cpp/src/text_streamer.cpp
@@ -34,7 +34,6 @@ StreamingStatus TextStreamer::write(int64_t token) {
     if (!text.empty() && '\n' == text.back() && text.size() > m_printed_len) {
         // Flush the cache after the new line symbol
         res << std::string_view{text.data() + m_printed_len, text.size() - m_printed_len};
-        // Get the list of tokens decoded for this chunk or rest of text.
 
         auto res_status = run_callback_if_needed(res.str());
         m_tokens_cache.clear();
@@ -62,7 +61,7 @@ StreamingStatus TextStreamer::write(int64_t token) {
 
     if (print_until > -1 && print_until > m_printed_len) {
         // It is possible to have a shorter text after adding new token.
-        // Print to output only if text length is increaesed.
+        // Print to output only if text length is increased.
         res << std::string_view{text.data() + m_printed_len, print_until - m_printed_len} << std::flush;
     }
     
@@ -150,11 +149,11 @@ TextParserStreamer::TextParserStreamer(const Tokenizer& tokenizer, std::vector<s
     }), m_pimpl{std::make_unique<TextParserStreamerImpl>(parsers)} {}
 
 CallbackTypeVariant TextParserStreamer::write(std::string message) {
-    // When 'write' is called with string, it means new chunck of tokens is decoded into text
+    // When 'write' is called with string, it means new chunk of tokens is decoded into text
 
     auto flushed_tokens = std::vector<int64_t>();
     if (message.back() == '\n') {
-        // Flush all tokens // TODO: m_decoded_lengths[m_decoded_lengths.size() - 1] = -1;
+        // Flush all tokens
         flushed_tokens.assign(m_tokens_cache.begin(), m_tokens_cache.end());
     } else if (m_decoded_lengths.size() >= delay_n_tokens) {
         // prompt          = "I was waiting for the bus.\n"
diff --git a/src/python/py_generation_config.cpp b/src/python/py_generation_config.cpp
index 86ad684aa7..18ddbab248 100644
--- a/src/python/py_generation_config.cpp
+++ b/src/python/py_generation_config.cpp
@@ -445,7 +445,7 @@ void init_generation_config(py::module_& m) {
         .def_readwrite("include_stop_str_in_output", &GenerationConfig::include_stop_str_in_output)
         .def_readwrite("stop_token_ids", &GenerationConfig::stop_token_ids)
         .def_readwrite("structured_output_config", &GenerationConfig::structured_output_config)
-        .def_readwrite("parsers", &GenerationConfig::parsers)  // TODO: add keep_alive
+        .def_readwrite("parsers", &GenerationConfig::parsers, py::keep_alive<1, 2>())
         .def_readwrite("adapters", &GenerationConfig::adapters)
         .def_readwrite("apply_chat_template", &GenerationConfig::apply_chat_template)
         .def("set_eos_token_id", &GenerationConfig::set_eos_token_id, py::arg("tokenizer_eos_token_id"))
diff --git a/src/python/py_streamers.cpp b/src/python/py_streamers.cpp
index f2df853e8b..349ec2a8c0 100644
--- a/src/python/py_streamers.cpp
+++ b/src/python/py_streamers.cpp
@@ -83,7 +83,7 @@ class ConstructableTextParserStreamer: public TextParserStreamer {
     using TextParserStreamer::TextParserStreamer;  // inherit base constructors
 
     StreamingStatus write(JsonContainer& message) override {
-        // Since c++ calls function with JsonContainer while python override expects py::dict, 
+        // Since C++ calls function with JsonContainer while python override expects py::dict, 
         // this function is a wrapper to call Python implementation of 'write' with py::dict
         py::gil_scoped_acquire acquire;
 

From 6e847c0d6fcb93bb25373147bd0e5a67d2bc6710 Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@gmail.com>
Date: Fri, 24 Oct 2025 17:01:15 +0200
Subject: [PATCH 42/43] Apply suggestions from code review

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 src/cpp/src/llm/pipeline.cpp | 2 +-
 src/cpp/src/parsers.cpp      | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/cpp/src/llm/pipeline.cpp b/src/cpp/src/llm/pipeline.cpp
index de05b5b950..1f1f1e339d 100644
--- a/src/cpp/src/llm/pipeline.cpp
+++ b/src/cpp/src/llm/pipeline.cpp
@@ -17,7 +17,7 @@
 
 namespace {
 
-// This is an decorator function that wraps a generation callable to apply parsers and reset them before generation if needed.
+// This is a decorator function that wraps a generation callable to apply parsers and reset them before generation if needed.
 ov::genai::DecodedResults run_generate_with_parsers(const ov::genai::OptionalGenerationConfig& generation_config,
                  const ov::genai::StreamerVariant& streamer,
                 std::function<ov::genai::DecodedResults(void)> generate_callable) {
diff --git a/src/cpp/src/parsers.cpp b/src/cpp/src/parsers.cpp
index cd3ca11adc..d359fad2fc 100644
--- a/src/cpp/src/parsers.cpp
+++ b/src/cpp/src/parsers.cpp
@@ -45,14 +45,14 @@ class ReasoningIncrementalParser::ReasoningParserImpl {
      */
     size_t find_close_tag_prefix_length(std::string_view text) const {
         const size_t max_check = std::min(text.size(), m_close_tag.size());
-        
-        for (size_t i = max_check; i >= 1; --i) {
+        size_t longest_match = 0;
+        for (size_t i = 1; i <= max_check; ++i) {
             // Compare the last i characters of text with the first i characters of m_close_tag
             if (text.compare(text.size() - i, i, m_close_tag, 0, i) == 0) {
-                return i;
+                longest_match = i;
             }
         }
-        return 0;
+        return longest_match;
     }
 
     /**

From cbce8e6bddb661014d354bea8e3a51786cd92206 Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Fri, 24 Oct 2025 17:20:31 +0200
Subject: [PATCH 43/43] improve test_reasoning_parser_cut_content

---
 tests/python_tests/test_parsers.py | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/tests/python_tests/test_parsers.py b/tests/python_tests/test_parsers.py
index 8bc23c1f7d..3155e17fe1 100644
--- a/tests/python_tests/test_parsers.py
+++ b/tests/python_tests/test_parsers.py
@@ -205,7 +205,7 @@ def test_incremental_phi4_reason_parser_nostreamer(answer):
 
 @pytest.mark.precommit
 @pytest.mark.parametrize("keep_original_content", [True, False])
-@pytest.mark.parametrize("do_reset", [True, False])
+@pytest.mark.parametrize("do_reset", [False])
 @pytest.mark.parametrize(
     "hf_ov_genai_models", 
     ["katuni4ka/tiny-random-phi3"],  # this tokenizer is used as a stub only
@@ -224,26 +224,25 @@ def write(self, message):
             msg.update(message)
             return StreamingStatus.RUNNING
     streamer = CustomStreamer(genai_tokenizer, parsers=[ReasoningIncrementalParser(expect_open_tag=True, keep_original_content=keep_original_content)])
-
+    
     num_runs = 2
+    msg = {}
     for i in range(num_runs):
         if do_reset:
             streamer.reset()
         
-        msg = {}
         for subword in stream_string:
             streamer._write(subword)
 
         think_content = answer.split("</think>")[0].replace("<think>", "")
-        content = answer
-        
-        if do_reset:
-            # If has been reset, check that content is parsed correctly
-            assert msg['reasoning_content'] == think_content
-            assert msg['content'] == (content if keep_original_content else "\n\nThe answer to 2 + 1 is \boxed{3}.")
-        else:
-            # If has not been reset(), then content will contine to accumulate thinking parts from the next runs
-            msg['content'].find("<think>")
+    
+    if do_reset:
+        # If has been reset, check that content is parsed correctly
+        assert msg['reasoning_content'] == think_content
+        assert msg['content'] == (answer if keep_original_content else "\n\nThe answer to 2 + 1 is \boxed{3}.")
+    else:
+        # If has not been reset(), then content msg['content'] will continue to accumulate thinking parts from the next runs
+        assert msg['content'].find("<think>") >= 0
 
 
 def test_incremental_deepseek_parser():
@@ -311,7 +310,7 @@ def write(self, message):
     for subword in stream_string:
         streamer._write(subword)
 
-    assert msg['main_text'] == ''.join(" world ")
+    assert msg['main_text'] == " world "
 
 
 @pytest.mark.precommit