Skip to content
Open
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
4b357ff
tests are green
pavel-esir Sep 19, 2025
525a4d8
python api added
pavel-esir Sep 23, 2025
3ab9757
refactored
pavel-esir Sep 24, 2025
5624fc2
add parsing at the end of generate()
pavel-esir Sep 25, 2025
3407d8f
hide map with predefined initialized parsers; add TODOs
pavel-esir Sep 29, 2025
219827a
use JsonContainer
pavel-esir Oct 14, 2025
4c3d443
fix processing with JsonContainer and make tests green again
pavel-esir Oct 16, 2025
5ee48bf
add Llama32JsonToolParser
pavel-esir Oct 16, 2025
248ccc6
rename ParsedMessage -> JsonContainer
pavel-esir Oct 17, 2025
04064bf
make tests green again, apply copilot comments
pavel-esir Oct 17, 2025
ae1930b
revert sample
pavel-esir Oct 17, 2025
2772c98
update pybind11 stubs
pavel-esir Oct 17, 2025
94b8370
update stubs
pavel-esir Oct 17, 2025
7759b48
replace starts_with_thinking -> starts_with_thinking; some other corr…
pavel-esir Oct 17, 2025
5470b63
remove std::variant<std::string, std::shared_ptr<Parsers>>
pavel-esir Oct 17, 2025
62bf17a
hide leftovers to m_pimpl
pavel-esir Oct 17, 2025
4f75706
move defaults from class members to ctor default arguments
pavel-esir Oct 17, 2025
edc2c3e
return void instead of JsonContainer and keep modifying argument by r…
pavel-esir Oct 17, 2025
e4ac079
pass openv/close tag strings by reference instead of value
pavel-esir Oct 17, 2025
ca732ba
remvoe breakpoint()
pavel-esir Oct 17, 2025
d6a6ef7
Merge remote-tracking branch 'upstream/master' into add_parsing
pavel-esir Oct 20, 2025
32356cb
use new JsonContainer to py::dict conversion approach
pavel-esir Oct 20, 2025
aefbd7b
fix segfault; some other fixes
pavel-esir Oct 20, 2025
9c0422c
add export symbols
pavel-esir Oct 20, 2025
ac9dd8c
add more tests use unique_ptr
pavel-esir Oct 21, 2025
e4ff386
cleanup pybindings; rename Llama32 -> Llama3
pavel-esir Oct 22, 2025
196a54c
corrected expect_open_tag behaviour, added some tests
pavel-esir Oct 22, 2025
801d8fb
renamed parsers; removed ParserBase -> Parser; IncrementalParserBase …
pavel-esir Oct 22, 2025
01d4224
renaming leftovers
pavel-esir Oct 22, 2025
fec8945
some corrections
pavel-esir Oct 22, 2025
9fa7d01
hide TextParsedStreamerImplementation
pavel-esir Oct 22, 2025
a2307a2
remove redundant previous_text, previous_tokens, add docstrings
pavel-esir Oct 22, 2025
cada055
add decorator to call/reset parser before/after generate
pavel-esir Oct 22, 2025
b18ee75
add reset() method
pavel-esir Oct 22, 2025
79d299e
put delta_tokens to parser as well
pavel-esir Oct 22, 2025
14a3a7f
Merge remote-tracking branch 'upstream/master' into add_parsing
pavel-esir Oct 22, 2025
775cf13
store compiled pattern as a member
pavel-esir Oct 22, 2025
d4bf6be
call parsers for ChatHistory as well; update caches
pavel-esir Oct 23, 2025
8ddc2c4
use json_container_to_py_object when `parse()` is called from Python …
pavel-esir Oct 23, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/cpp/include/openvino/genai/generation_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "openvino/genai/tokenizer.hpp"
#include "openvino/genai/scheduler_config.hpp"
#include "openvino/genai/lora_adapter.hpp"
#include "openvino/genai/parsers.hpp"

namespace ov {
namespace genai {
Expand Down Expand Up @@ -348,6 +349,9 @@ class OPENVINO_GENAI_EXPORTS GenerationConfig {
bool is_prompt_lookup() const;
bool is_structured_output_generation() const;

// parsers
std::vector<std::shared_ptr<ParserBase>> parsers;

OPENVINO_DEPRECATED("Please, use `is_assisting_generation()` instead of `is_speculative_decoding()`. This method will be removed in 2026.0.0 release")
bool is_speculative_decoding() const;

Expand Down
2 changes: 2 additions & 0 deletions src/cpp/include/openvino/genai/llm_pipeline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "openvino/genai/perf_metrics.hpp"
#include "openvino/genai/scheduler_config.hpp"
#include "openvino/genai/common_types.hpp"
#include "openvino/genai/json_container.hpp"

namespace ov {
namespace genai {
Expand Down Expand Up @@ -68,6 +69,7 @@ class DecodedResults {
std::vector<float> scores;
PerfMetrics perf_metrics;
std::shared_ptr<ExtendedPerfMetrics> extended_perf_metrics;
std::vector<JsonContainer> parsed;

// @brief Convert DecodedResults to a string.
operator std::string() const {
Expand Down
96 changes: 96 additions & 0 deletions src/cpp/include/openvino/genai/parsers.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
// Copyright (C) 2023-2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0

#pragma once
#include <string>
#include <memory>
#include <vector>
#include "openvino/genai/json_container.hpp"

namespace ov {
namespace genai {

class IncrementalParserBase {
public:
IncrementalParserBase() = default;

// We return string which with filtered text to be added to content.
virtual std::string parse(
JsonContainer& msg,
const std::string& previous_text,
std::string& delta_text,
const std::optional<std::vector<int64_t>>& previous_tokens = std::nullopt,
const std::optional<std::vector<int64_t>>& delta_tokens = std::nullopt
) = 0;
};

class ReasoningParser : public IncrementalParserBase {
private:
class ReasoningParserImpl;
std::shared_ptr<ReasoningParserImpl> m_impl;
public:
ReasoningParser(bool expect_open_tag = true,
bool keep_original_content = true,
const std::string& open_tag = "<think>",
const std::string& close_tag = "</think>");

std::string parse(
JsonContainer& msg,
const std::string& previous_text,
std::string& delta_text,
const std::optional<std::vector<int64_t>>& previous_tokens = std::nullopt,
const std::optional<std::vector<int64_t>>& delta_tokens = std::nullopt
) override;
};

class DeepSeekR1ReasoningParser : public ReasoningParser {
public:
explicit DeepSeekR1ReasoningParser(bool expect_open_tag = true) : ReasoningParser(expect_open_tag) {};
};

class Phi4ReasoningParser : public ReasoningParser {
public:
explicit Phi4ReasoningParser(bool expect_open_tag = false) : ReasoningParser(expect_open_tag) {};
};

class ParserBase {
public:
ParserBase() = default;
virtual void parse(JsonContainer& text) = 0;
};

class Llama32PythonicToolParser : public ParserBase {
// Does not modify original content, only extracts and adds tool calls
public:
explicit Llama32PythonicToolParser(bool keep_original_content = true);
void parse(JsonContainer& input) override;
private:
class Llama32PythonicToolParserImpl;
std::shared_ptr<Llama32PythonicToolParserImpl> m_impl;
};

class Llama32JsonToolParser : public ParserBase {
// Does not modify original content, only extracts and adds tool calls
public:
explicit Llama32JsonToolParser(bool keep_original_content = true);
void parse(JsonContainer& input) override;
private:
class Llama32JsonToolParserImpl;
std::shared_ptr<Llama32JsonToolParserImpl> m_impl;
};

class BaseReasoningParser : public ParserBase{
public:
BaseReasoningParser(
bool expect_open_tag = true,
bool keep_original_content = true,
const std::string& open_tag = "<think>",
const std::string& close_tag = "</think>");
void parse(JsonContainer& input) override;
private:
class BaseReasoningParserImpl;
std::shared_ptr<BaseReasoningParserImpl> m_impl;
};

} // namespace genai
} // namespace ov
16 changes: 16 additions & 0 deletions src/cpp/include/openvino/genai/text_streamer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

#include "openvino/genai/streamer_base.hpp"
#include "openvino/genai/tokenizer.hpp"
#include "openvino/genai/parsers.hpp"

namespace ov {
namespace genai {
Expand Down Expand Up @@ -46,5 +47,20 @@ class OPENVINO_GENAI_EXPORTS TextStreamer : public StreamerBase {
void compute_decoded_length_for_position(size_t cache_position);
};

class TextParserStreamer : public TextStreamer {
public:
TextParserStreamer(const Tokenizer& tokenizer, std::vector<std::shared_ptr<IncrementalParserBase>> parsers = {});

virtual StreamingStatus write(JsonContainer& message) = 0;

CallbackTypeVariant write(std::string message);

JsonContainer get_parsed_message() const { return m_parsed_message; }
private:
JsonContainer m_parsed_message;
std::string m_text_buffer;
std::vector<std::shared_ptr<IncrementalParserBase>> m_parsers;
};

} // namespace genai
} // namespace ov
1 change: 1 addition & 0 deletions src/cpp/src/generation_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ void GenerationConfig::update_generation_config(const ov::AnyMap& properties) {

// Structured output
read_anymap_param(properties, "structured_output_config", structured_output_config);
read_anymap_param(properties, "parsers", parsers);
}


Expand Down
33 changes: 32 additions & 1 deletion src/cpp/src/llm/pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,38 @@ DecodedResults LLMPipeline::generate(
StringInputs inputs,
OptionalGenerationConfig generation_config,
StreamerVariant streamer) {
return m_pimpl->generate(inputs, generation_config, streamer);
auto res = m_pimpl->generate(inputs, generation_config, streamer);

// If streamer is of StreamerBase type, and it is TextParserStreamer, get parsed message
if (auto streamer_obj = std::get_if<std::shared_ptr<StreamerBase>>(&streamer)) {
if (auto parser_streamer = std::dynamic_pointer_cast<TextParserStreamer>(*streamer_obj)) {
res.parsed.resize(res.texts.size());
res.parsed[0] = parser_streamer->get_parsed_message();
}
}

if (!generation_config.has_value() || (*generation_config).parsers.empty()) {
return res;
}

if (!generation_config.has_value() || (*generation_config).parsers.empty()) {
return res;
}

std::vector<std::shared_ptr<ParserBase>> parsers = (*generation_config).parsers;
res.parsed.resize(res.texts.size());
// Apply Base parsers sequentially even if IncrementalParser has run.
for (size_t i = 0; i < res.texts.size(); ++i) {
JsonContainer msg;
msg["content"] = res.texts[i];
for (auto& parser: parsers) {
// TODO: Check the state of incremental parser and reset if necessary
parser->parse(msg);
}
res.parsed[i] = msg;
}

return res;
}

DecodedResults LLMPipeline::generate(StringInputs text, const ov::AnyMap& config_map) {
Expand Down
Loading
Loading