Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion spelling-whitelist.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@ release_files/thirdparty-licenses/icu.LICENSE.txt:160: TaBE ==> table, tab
release_files/thirdparty-licenses/libgt2.LICENSE.txt:1040: aheared ==> adhered
release_files/thirdparty-licenses/libgt2.LICENSE.txt:1065: rouines ==> routines
release_files/thirdparty-licenses/libgt2.LICENSE.txt:1083: publically ==> publicly
src/test/llm/output_parsers/qwen3coder_output_parser_test.cpp:559: paramete ==> parameter
src/test/llm/output_parsers/qwen3coder_output_parser_test.cpp
1 change: 1 addition & 0 deletions src/llm/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ ovms_cc_library( # TODO split further so we don't have to recompile everything w
],
deps = [
"@com_github_tencent_rapidjson//:rapidjson",
"//src/port:rapidjson_document",
"//src:libovmslogging",
"//src:libovmsstring_utils",
":partial_json_builder",
Expand Down
6 changes: 3 additions & 3 deletions src/llm/io_processing/base_output_parser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,15 +97,15 @@ class BaseOutputParser {
// Otherwise we return a JSON object containing the delta that conforms to OpenAI API.
virtual std::optional<rapidjson::Document> parseChunk(const std::string& chunkResponse, ov::genai::GenerationFinishReason finishReason) = 0;

// Get the tag that marks the beginning of the segment that should be processed by the parser.
// Get the tags that marks the beginning of the segment that should be processed by the parser.
// This method is used in streaming mode to determine if the parser should start processing the content.
// If empty string is returned, it means that the parser will never start processing the content.
virtual const std::string& getParsingStartTag() const = 0;
virtual const std::vector<std::string>& getParsingStartTags() const = 0;

// Get a vector of additional tags that mark beginning of the segment that should be processed by the parser.
// These tags are considered only if they are the first output produced by the model.
// In streaming mode it means that they are considered only in UNKNOWN phase.
virtual const std::unordered_set<std::string>& getSpecialParsingStartTags() const = 0;
virtual const std::vector<std::string>& getSpecialParsingStartTags() const = 0;

// Get the tag that marks the end of the segment that should be processed by the parser.
// This method is used in streaming mode to determine if the parser should stop processing the content.
Expand Down
11 changes: 3 additions & 8 deletions src/llm/io_processing/gptoss/reasoning_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,7 @@
#include <string>
#include <vector>

#pragma warning(push)
#pragma warning(disable : 6313)
#include <rapidjson/document.h>
#include <rapidjson/stringbuffer.h>
#include <rapidjson/writer.h>
#pragma warning(pop)
#include "src/port/rapidjson_document.hpp"

#include "../../../logging.hpp"
#include "../../../stringutils.hpp"
Expand Down Expand Up @@ -56,10 +51,10 @@ std::optional<rapidjson::Document> GptOssReasoningParser::parseChunk(const std::

StreamState lastState = state;

if (startsWith(chunk, getParsingStartTag())) {
if (startsWith(chunk, getParsingStartTags()[0])) {
// Final content
state = StreamState::READING_REASONING;
chunk = chunk.substr(getParsingStartTag().size());
chunk = chunk.substr(getParsingStartTags()[0].size());
} else if (startsWith(chunk, "<|start|>assistant<|channel|>final<|message|>")) {
// Final content
state = StreamState::READING_CONTENT;
Expand Down
19 changes: 8 additions & 11 deletions src/llm/io_processing/gptoss/reasoning_parser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,9 @@

#include <openvino/genai/tokenizer.hpp>
#include <string>
#include <unordered_set>
#include <vector>

#pragma warning(push)
#pragma warning(disable : 6313)
#include <rapidjson/document.h>
#include <rapidjson/stringbuffer.h>
#include <rapidjson/writer.h>
#pragma warning(pop)
#include "src/port/rapidjson_document.hpp"

#include "../base_output_parser.hpp"

Expand Down Expand Up @@ -58,12 +52,15 @@ class GptOssReasoningParser : public BaseOutputParser {
// Streaming
std::optional<rapidjson::Document> parseChunk(const std::string& chunk, ov::genai::GenerationFinishReason finishReason) override;

const std::string& getParsingStartTag() const override {
return parsingStartTag;
const std::vector<std::string>& getParsingStartTags() const override {
// If you add another element you have to update implementation as well
// as mostly it assumed just one element
static const std::vector<std::string> parsingStartTags{parsingStartTag};
return parsingStartTags;
}

const std::unordered_set<std::string>& getSpecialParsingStartTags() const override {
static const std::unordered_set<std::string> specialParsingStartTags = {
const std::vector<std::string>& getSpecialParsingStartTags() const override {
static const std::vector<std::string> specialParsingStartTags = {
"<|channel|>final<|message|>",
"<|channel|>commentary<|message|>", // Preable to reasoning, users usually sees that
"<|start|>assistant<|channel|>final<|message|>", // Final content users sees
Expand Down
9 changes: 2 additions & 7 deletions src/llm/io_processing/gptoss/tool_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,7 @@
#include <vector>
#include <regex>

#pragma warning(push)
#pragma warning(disable : 6313)
#include <rapidjson/document.h>
#include <rapidjson/stringbuffer.h>
#include <rapidjson/writer.h>
#pragma warning(pop)
#include "src/port/rapidjson_document.hpp"

#include "../../../logging.hpp"
#include "../../../stringutils.hpp"
Expand Down Expand Up @@ -91,7 +86,7 @@ std::optional<rapidjson::Document> GptOssToolParser::parseChunk(const std::strin
std::string chunk = newChunk;
std::optional<rapidjson::Document> result;

if (chunk.find(getParsingStartTag()) != std::string::npos) {
if (chunk.find(getParsingStartTags()[0]) != std::string::npos) {
toolCallIndex++; // starting with -1, first call will be 0
return std::nullopt;
}
Expand Down
17 changes: 6 additions & 11 deletions src/llm/io_processing/gptoss/tool_parser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,9 @@
#include <openvino/genai/tokenizer.hpp>
#include <optional>
#include <string>
#include <unordered_set>
#include <vector>

#pragma warning(push)
#pragma warning(disable : 6313)
#include <rapidjson/document.h>
#include <rapidjson/stringbuffer.h>
#include <rapidjson/writer.h>
#pragma warning(pop)
#include "src/port/rapidjson_document.hpp"

#include "../base_output_parser.hpp"

Expand Down Expand Up @@ -63,12 +57,13 @@ class GptOssToolParser : public BaseOutputParser {
// Streaming
std::optional<rapidjson::Document> parseChunk(const std::string& chunk, ov::genai::GenerationFinishReason finishReason) override;

const std::string& getParsingStartTag() const override {
return parsingStartTag;
const std::vector<std::string>& getParsingStartTags() const override {
static const std::vector<std::string> parsingStartTags{parsingStartTag};
return parsingStartTags;
}

const std::unordered_set<std::string>& getSpecialParsingStartTags() const override {
static const std::unordered_set<std::string> specialParsingStartTags = {};
const std::vector<std::string>& getSpecialParsingStartTags() const override {
static const std::vector<std::string> specialParsingStartTags = {};
return specialParsingStartTags;
}

Expand Down
7 changes: 1 addition & 6 deletions src/llm/io_processing/hermes3/tool_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,7 @@
#include <string>
#include <vector>

#pragma warning(push)
#pragma warning(disable : 6313)
#include <rapidjson/document.h>
#include <rapidjson/stringbuffer.h>
#include <rapidjson/writer.h>
#pragma warning(pop)
#include "src/port/rapidjson_document.hpp"

#include "../../../logging.hpp"
#include "tool_parser.hpp"
Expand Down
17 changes: 6 additions & 11 deletions src/llm/io_processing/hermes3/tool_parser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,9 @@
#include <openvino/genai/tokenizer.hpp>
#include <optional>
#include <string>
#include <unordered_set>
#include <vector>

#pragma warning(push)
#pragma warning(disable : 6313)
#include <rapidjson/document.h>
#include <rapidjson/stringbuffer.h>
#include <rapidjson/writer.h>
#pragma warning(pop)
#include "src/port/rapidjson_document.hpp"

#include "src/llm/io_processing/base_output_parser.hpp"
#include "src/llm/io_processing/partial_json_builder.hpp"
Expand Down Expand Up @@ -82,11 +76,12 @@ class Hermes3ToolParser : public BaseOutputParser {

void parse(ParsedOutput& parsedOutput, const std::vector<int64_t>& generatedTokens) override;
std::optional<rapidjson::Document> parseChunk(const std::string& chunk, ov::genai::GenerationFinishReason finishReason) override;
const std::string& getParsingStartTag() const override {
return parsingStartTag;
const std::vector<std::string>& getParsingStartTags() const override {
static const std::vector<std::string> parsingStartTags = {parsingStartTag};
return parsingStartTags;
}
const std::unordered_set<std::string>& getSpecialParsingStartTags() const override {
static const std::unordered_set<std::string> beginningOnlyTags = {};
const std::vector<std::string>& getSpecialParsingStartTags() const override {
static const std::vector<std::string> beginningOnlyTags = {};
return beginningOnlyTags;
}
// Tools calls are expected to be the last part of the content, so we do not specify an end tag.
Expand Down
7 changes: 1 addition & 6 deletions src/llm/io_processing/llama3/tool_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,7 @@
#include <vector>
#include <utility>

#pragma warning(push)
#pragma warning(disable : 6313)
#include <rapidjson/document.h>
#include <rapidjson/stringbuffer.h>
#include <rapidjson/writer.h>
#pragma warning(pop)
#include "src/port/rapidjson_document.hpp"

#include "../../../logging.hpp"
#include "tool_parser.hpp"
Expand Down
17 changes: 6 additions & 11 deletions src/llm/io_processing/llama3/tool_parser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,9 @@
#include <openvino/genai/tokenizer.hpp>
#include <optional>
#include <string>
#include <unordered_set>
#include <vector>

#pragma warning(push)
#pragma warning(disable : 6313)
#include <rapidjson/document.h>
#include <rapidjson/stringbuffer.h>
#include <rapidjson/writer.h>
#pragma warning(pop)
#include "src/port/rapidjson_document.hpp"

#include "src/llm/io_processing/base_output_parser.hpp"
#include "src/llm/io_processing/partial_json_builder.hpp"
Expand Down Expand Up @@ -62,11 +56,12 @@ class Llama3ToolParser : public BaseOutputParser {

void parse(ParsedOutput& parsedOutput, const std::vector<int64_t>& generatedTokens) override;
std::optional<rapidjson::Document> parseChunk(const std::string& chunk, ov::genai::GenerationFinishReason finishReason) override;
const std::string& getParsingStartTag() const override {
return parsingStartTag;
const std::vector<std::string>& getParsingStartTags() const override {
static const std::vector<std::string> parsingStartTags = {parsingStartTag};
return parsingStartTags;
}
const std::unordered_set<std::string>& getSpecialParsingStartTags() const override {
static const std::unordered_set<std::string> specialParsingStartTags = {"{"};
const std::vector<std::string>& getSpecialParsingStartTags() const override {
static const std::vector<std::string> specialParsingStartTags = {"{"};
return specialParsingStartTags;
}
// Tools calls are expected to be the last part of the content, so we do not specify an end tag.
Expand Down
7 changes: 1 addition & 6 deletions src/llm/io_processing/mistral/tool_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,7 @@
#include <vector>
#include <regex>

#pragma warning(push)
#pragma warning(disable : 6313)
#include <rapidjson/document.h>
#include <rapidjson/stringbuffer.h>
#include <rapidjson/writer.h>
#pragma warning(pop)
#include "src/port/rapidjson_document.hpp"

#include "../../../logging.hpp"
#include "tool_parser.hpp"
Expand Down
18 changes: 6 additions & 12 deletions src/llm/io_processing/mistral/tool_parser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,9 @@
#include <openvino/genai/tokenizer.hpp>
#include <optional>
#include <string>
#include <unordered_set>
#include <vector>

#pragma warning(push)
#pragma warning(disable : 6313)
#include <rapidjson/document.h>
#include <rapidjson/stringbuffer.h>
#include <rapidjson/writer.h>
#pragma warning(pop)
#include "src/port/rapidjson_document.hpp"

#include "src/llm/io_processing/base_output_parser.hpp"

Expand All @@ -41,12 +35,12 @@ class MistralToolParser : public BaseOutputParser {

void parse(ParsedOutput& parsedOutput, const std::vector<int64_t>& generatedTokens) override;
std::optional<rapidjson::Document> parseChunk(const std::string& chunk, ov::genai::GenerationFinishReason finishReason) override;
const std::string& getParsingStartTag() const override {
static const std::string toolCallStartTag = "[TOOL_CALLS]";
return toolCallStartTag;
const std::vector<std::string>& getParsingStartTags() const override {
static const std::vector<std::string> toolCallStartTags{"[TOOL_CALLS]"};
return toolCallStartTags;
}
const std::unordered_set<std::string>& getSpecialParsingStartTags() const override {
static const std::unordered_set<std::string> specialParsingStartTags = {};
const std::vector<std::string>& getSpecialParsingStartTags() const override {
static const std::vector<std::string> specialParsingStartTags{};
return specialParsingStartTags;
}
// Tools calls are expected to be the last part of the content, so we do not specify an end tag.
Expand Down
20 changes: 9 additions & 11 deletions src/llm/io_processing/output_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ OutputParser::TagLookupStatus OutputParser::StreamOutputCache::lookupTag(const s
if (tag.empty()) {
return TagLookupStatus::NOT_FOUND;
}

if (tag.size() > buffer.size()) {
/*
If the tag is longer than the buffer, we check if the buffer and tag overlap (either partially or fully for exact match)
Expand Down Expand Up @@ -79,7 +78,7 @@ OutputParser::TagLookupStatus OutputParser::StreamOutputCache::lookupTag(const s
}
}

OutputParser::TagLookupStatus OutputParser::StreamOutputCache::lookupTags(const std::unordered_set<std::string>& tags) const {
OutputParser::TagLookupStatus OutputParser::StreamOutputCache::lookupTags(const std::vector<std::string>& tags) const {
// We look for multiple tags and return the status in the following priority: FOUND COMPLETE > FOUND_INCOMPLETE > NOT_FOUND
TagLookupStatus finalTagLookupStatus = TagLookupStatus::NOT_FOUND;
for (const auto& tag : tags) {
Expand Down Expand Up @@ -157,7 +156,6 @@ std::optional<rapidjson::Document> OutputParser::parseReasoningChunk(ov::genai::

OutputParser::OutputParser(ov::genai::Tokenizer& tokenizer, const std::string toolParserName, const std::string reasoningParserName, const ToolsSchemas_t& toolNameSchemaMap) :
tokenizer(tokenizer) {
SPDLOG_TRACE("OutputParser created with toolNameSchemaMap of size: {}", toolNameSchemaMap.size());
if (toolParserName == "llama3") {
toolParser = std::make_unique<Llama3ToolParser>(tokenizer);
} else if (toolParserName == "hermes3") {
Expand Down Expand Up @@ -208,7 +206,7 @@ void OutputParser::enableImmediateToolParsing() {

std::string OutputParser::getToolParserStartTag() const {
if (toolParser) {
return toolParser->getParsingStartTag();
return toolParser->getParsingStartTags()[0];
} else {
throw std::runtime_error("Tool parser is not available, cannot get start tag");
}
Expand Down Expand Up @@ -243,13 +241,13 @@ std::optional<rapidjson::Document> OutputParser::parseChunk(const std::string& c
so only use those methods or return nullopt.
*/

bool reasoningParserExistsAndSupportsStreaming = reasoningParser && !reasoningParser->getParsingStartTag().empty() && !reasoningParser->getParsingEndTag().empty();
bool toolParserExistsAndSupportsStreaming = toolParser && !toolParser->getParsingStartTag().empty();
bool reasoningParserExistsAndSupportsStreaming = reasoningParser && !reasoningParser->getParsingStartTags().empty() && !reasoningParser->getParsingEndTag().empty();
bool toolParserExistsAndSupportsStreaming = toolParser && !toolParser->getParsingStartTags().empty();
bool applyToolParser = toolParserExistsAndSupportsStreaming && toolsAvailable;

if (applyToolParser && toolParser->isImmediateParsingEnabled() && processingPhase == UNKNOWN) {
// If zero trigger parsing is enabled, we assume the start tag has been injected to the prompt.
streamOutputCache.add(toolParser->getParsingStartTag());
streamOutputCache.add(getToolParserStartTag());
}

streamOutputCache.add(chunkResponse);
Expand All @@ -259,7 +257,7 @@ std::optional<rapidjson::Document> OutputParser::parseChunk(const std::string& c
TagLookupStatus anyStartTagStatus = TagLookupStatus::NOT_FOUND;
if (reasoningParserExistsAndSupportsStreaming) {
// Check if reasoning start tag has been received
TagLookupStatus reasoningStartTagStatus = streamOutputCache.lookupTag(reasoningParser->getParsingStartTag());
TagLookupStatus reasoningStartTagStatus = streamOutputCache.lookupTags(reasoningParser->getParsingStartTags());
if (reasoningStartTagStatus == TagLookupStatus::NOT_FOUND) {
// If reasoning start tag is not found, check if any of the special start tags are found
reasoningStartTagStatus = streamOutputCache.lookupTags(reasoningParser->getSpecialParsingStartTags());
Expand All @@ -277,7 +275,7 @@ std::optional<rapidjson::Document> OutputParser::parseChunk(const std::string& c
return parseToolCallChunk(finishReason);
} else {
// Check if tool call start tag has been received
TagLookupStatus toolCallStartTagStatus = streamOutputCache.lookupTag(toolParser->getParsingStartTag());
TagLookupStatus toolCallStartTagStatus = streamOutputCache.lookupTags(toolParser->getParsingStartTags());
if (toolCallStartTagStatus == TagLookupStatus::NOT_FOUND) {
// If tool call start tag is not found, check if any of the special start tags are found
toolCallStartTagStatus = streamOutputCache.lookupTags(toolParser->getSpecialParsingStartTags());
Expand Down Expand Up @@ -311,7 +309,7 @@ std::optional<rapidjson::Document> OutputParser::parseChunk(const std::string& c
// If we are in the CONTENT phase, we check if tool parser start tag is found and if so, switch to TOOL_CALLS phase.
// TOOL_CALLS is the only phase that can be processed after CONTENT.
if (applyToolParser) {
TagLookupStatus toolStartTagStatus = streamOutputCache.lookupTag(toolParser->getParsingStartTag());
TagLookupStatus toolStartTagStatus = streamOutputCache.lookupTags(toolParser->getParsingStartTags());
if (toolStartTagStatus == TagLookupStatus::FOUND_COMPLETE) {
return parseToolCallChunk(finishReason);
} else if (toolStartTagStatus == TagLookupStatus::FOUND_INCOMPLETE && finishReason == ov::genai::GenerationFinishReason::NONE) {
Expand All @@ -335,7 +333,7 @@ std::optional<rapidjson::Document> OutputParser::parseChunk(const std::string& c
} else if (processingPhase == TOOL_CALLS_WAITING_FOR_TOOL) {
// In this phase we are waiting for next tool call or finish of generation.
// If we get next tool call start tag, we switch to TOOL_CALLS phase, otherwise if generation finishes we switch to CONTENT phase to flush any remaining content.
TagLookupStatus toolStartTagStatus = streamOutputCache.lookupTag(toolParser->getParsingStartTag());
TagLookupStatus toolStartTagStatus = streamOutputCache.lookupTags(toolParser->getParsingStartTags());
if (toolStartTagStatus == TagLookupStatus::FOUND_INCOMPLETE && finishReason == ov::genai::GenerationFinishReason::NONE) {
return std::nullopt; // Wait for more chunks to determine if start tag is complete
}
Expand Down
Loading