Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion spelling-whitelist.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@ release_files/thirdparty-licenses/icu.LICENSE.txt:160: TaBE ==> table, tab
release_files/thirdparty-licenses/libgt2.LICENSE.txt:1040: aheared ==> adhered
release_files/thirdparty-licenses/libgt2.LICENSE.txt:1065: rouines ==> routines
release_files/thirdparty-licenses/libgt2.LICENSE.txt:1083: publically ==> publicly
src/test/llm/output_parsers/qwen3coder_output_parser_test.cpp:559: paramete ==> parameter
src/test/llm/output_parsers/qwen3coder_output_parser_test.cpp
1 change: 1 addition & 0 deletions src/llm/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ ovms_cc_library( # TODO split further so we don't have to recompile everything w
],
deps = [
"@com_github_tencent_rapidjson//:rapidjson",
"//src/port:rapidjson_document",
"//src:libovmslogging",
"//src:libovmsstring_utils",
":partial_json_builder",
Expand Down
6 changes: 3 additions & 3 deletions src/llm/io_processing/base_output_parser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,15 +97,15 @@ class BaseOutputParser {
// Otherwise we return a JSON object containing the delta that conforms to OpenAI API.
virtual std::optional<rapidjson::Document> parseChunk(const std::string& chunkResponse, ov::genai::GenerationFinishReason finishReason) = 0;

// Get the tag that marks the beginning of the segment that should be processed by the parser.
// Get the tags that marks the beginning of the segment that should be processed by the parser.
// This method is used in streaming mode to determine if the parser should start processing the content.
// If empty string is returned, it means that the parser will never start processing the content.
virtual const std::string& getParsingStartTag() const = 0;
virtual const std::vector<std::string>& getParsingStartTags() const = 0;

// Get a vector of additional tags that mark beginning of the segment that should be processed by the parser.
// These tags are considered only if they are the first output produced by the model.
// In streaming mode it means that they are considered only in UNKNOWN phase.
virtual const std::unordered_set<std::string>& getSpecialParsingStartTags() const = 0;
virtual const std::vector<std::string>& getSpecialParsingStartTags() const = 0;

// Get the tag that marks the end of the segment that should be processed by the parser.
// This method is used in streaming mode to determine if the parser should stop processing the content.
Expand Down
11 changes: 3 additions & 8 deletions src/llm/io_processing/gptoss/reasoning_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,7 @@
#include <string>
#include <vector>

#pragma warning(push)
#pragma warning(disable : 6313)
#include <rapidjson/document.h>
#include <rapidjson/stringbuffer.h>
#include <rapidjson/writer.h>
#pragma warning(pop)
#include "src/port/rapidjson_document.hpp"

#include "../../../logging.hpp"
#include "../../../stringutils.hpp"
Expand Down Expand Up @@ -56,10 +51,10 @@ std::optional<rapidjson::Document> GptOssReasoningParser::parseChunk(const std::

StreamState lastState = state;

if (startsWith(chunk, getParsingStartTag())) {
if (startsWith(chunk, getParsingStartTags()[0])) {
// Final content
state = StreamState::READING_REASONING;
chunk = chunk.substr(getParsingStartTag().size());
chunk = chunk.substr(getParsingStartTags()[0].size());
} else if (startsWith(chunk, "<|start|>assistant<|channel|>final<|message|>")) {
// Final content
state = StreamState::READING_CONTENT;
Expand Down
19 changes: 8 additions & 11 deletions src/llm/io_processing/gptoss/reasoning_parser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,9 @@

#include <openvino/genai/tokenizer.hpp>
#include <string>
#include <unordered_set>
#include <vector>

#pragma warning(push)
#pragma warning(disable : 6313)
#include <rapidjson/document.h>
#include <rapidjson/stringbuffer.h>
#include <rapidjson/writer.h>
#pragma warning(pop)
#include "src/port/rapidjson_document.hpp"

#include "../base_output_parser.hpp"

Expand Down Expand Up @@ -58,12 +52,15 @@ class GptOssReasoningParser : public BaseOutputParser {
// Streaming
std::optional<rapidjson::Document> parseChunk(const std::string& chunk, ov::genai::GenerationFinishReason finishReason) override;

const std::string& getParsingStartTag() const override {
return parsingStartTag;
const std::vector<std::string>& getParsingStartTags() const override {
// If you add another element you have to update implementation as well
// as mostly it assumed just one element
static const std::vector<std::string> parsingStartTags{parsingStartTag};
return parsingStartTags;
}

const std::unordered_set<std::string>& getSpecialParsingStartTags() const override {
static const std::unordered_set<std::string> specialParsingStartTags = {
const std::vector<std::string>& getSpecialParsingStartTags() const override {
static const std::vector<std::string> specialParsingStartTags = {
"<|channel|>final<|message|>",
"<|channel|>commentary<|message|>", // Preable to reasoning, users usually sees that
"<|start|>assistant<|channel|>final<|message|>", // Final content users sees
Expand Down
9 changes: 2 additions & 7 deletions src/llm/io_processing/gptoss/tool_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,7 @@
#include <vector>
#include <regex>

#pragma warning(push)
#pragma warning(disable : 6313)
#include <rapidjson/document.h>
#include <rapidjson/stringbuffer.h>
#include <rapidjson/writer.h>
#pragma warning(pop)
#include "src/port/rapidjson_document.hpp"

#include "../../../logging.hpp"
#include "../../../stringutils.hpp"
Expand Down Expand Up @@ -91,7 +86,7 @@ std::optional<rapidjson::Document> GptOssToolParser::parseChunk(const std::strin
std::string chunk = newChunk;
std::optional<rapidjson::Document> result;

if (chunk.find(getParsingStartTag()) != std::string::npos) {
if (chunk.find(getParsingStartTags()[0]) != std::string::npos) {
toolCallIndex++; // starting with -1, first call will be 0
return std::nullopt;
}
Expand Down
17 changes: 6 additions & 11 deletions src/llm/io_processing/gptoss/tool_parser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,9 @@
#include <openvino/genai/tokenizer.hpp>
#include <optional>
#include <string>
#include <unordered_set>
#include <vector>

#pragma warning(push)
#pragma warning(disable : 6313)
#include <rapidjson/document.h>
#include <rapidjson/stringbuffer.h>
#include <rapidjson/writer.h>
#pragma warning(pop)
#include "src/port/rapidjson_document.hpp"

#include "../base_output_parser.hpp"

Expand Down Expand Up @@ -63,12 +57,13 @@ class GptOssToolParser : public BaseOutputParser {
// Streaming
std::optional<rapidjson::Document> parseChunk(const std::string& chunk, ov::genai::GenerationFinishReason finishReason) override;

const std::string& getParsingStartTag() const override {
return parsingStartTag;
const std::vector<std::string>& getParsingStartTags() const override {
static const std::vector<std::string> parsingStartTags{parsingStartTag};
return parsingStartTags;
}

const std::unordered_set<std::string>& getSpecialParsingStartTags() const override {
static const std::unordered_set<std::string> specialParsingStartTags = {};
const std::vector<std::string>& getSpecialParsingStartTags() const override {
static const std::vector<std::string> specialParsingStartTags = {};
return specialParsingStartTags;
}

Expand Down
7 changes: 1 addition & 6 deletions src/llm/io_processing/hermes3/tool_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,7 @@
#include <string>
#include <vector>

#pragma warning(push)
#pragma warning(disable : 6313)
#include <rapidjson/document.h>
#include <rapidjson/stringbuffer.h>
#include <rapidjson/writer.h>
#pragma warning(pop)
#include "src/port/rapidjson_document.hpp"

#include "../../../logging.hpp"
#include "tool_parser.hpp"
Expand Down
17 changes: 6 additions & 11 deletions src/llm/io_processing/hermes3/tool_parser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,9 @@
#include <openvino/genai/tokenizer.hpp>
#include <optional>
#include <string>
#include <unordered_set>
#include <vector>

#pragma warning(push)
#pragma warning(disable : 6313)
#include <rapidjson/document.h>
#include <rapidjson/stringbuffer.h>
#include <rapidjson/writer.h>
#pragma warning(pop)
#include "src/port/rapidjson_document.hpp"

#include "src/llm/io_processing/base_output_parser.hpp"
#include "src/llm/io_processing/partial_json_builder.hpp"
Expand Down Expand Up @@ -82,11 +76,12 @@ class Hermes3ToolParser : public BaseOutputParser {

void parse(ParsedOutput& parsedOutput, const std::vector<int64_t>& generatedTokens) override;
std::optional<rapidjson::Document> parseChunk(const std::string& chunk, ov::genai::GenerationFinishReason finishReason) override;
const std::string& getParsingStartTag() const override {
return parsingStartTag;
const std::vector<std::string>& getParsingStartTags() const override {
static const std::vector<std::string> parsingStartTags = {parsingStartTag};
return parsingStartTags;
}
const std::unordered_set<std::string>& getSpecialParsingStartTags() const override {
static const std::unordered_set<std::string> beginningOnlyTags = {};
const std::vector<std::string>& getSpecialParsingStartTags() const override {
static const std::vector<std::string> beginningOnlyTags = {};
return beginningOnlyTags;
}
// Tools calls are expected to be the last part of the content, so we do not specify an end tag.
Expand Down
7 changes: 1 addition & 6 deletions src/llm/io_processing/llama3/tool_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,7 @@
#include <vector>
#include <utility>

#pragma warning(push)
#pragma warning(disable : 6313)
#include <rapidjson/document.h>
#include <rapidjson/stringbuffer.h>
#include <rapidjson/writer.h>
#pragma warning(pop)
#include "src/port/rapidjson_document.hpp"

#include "../../../logging.hpp"
#include "tool_parser.hpp"
Expand Down
17 changes: 6 additions & 11 deletions src/llm/io_processing/llama3/tool_parser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,9 @@
#include <openvino/genai/tokenizer.hpp>
#include <optional>
#include <string>
#include <unordered_set>
#include <vector>

#pragma warning(push)
#pragma warning(disable : 6313)
#include <rapidjson/document.h>
#include <rapidjson/stringbuffer.h>
#include <rapidjson/writer.h>
#pragma warning(pop)
#include "src/port/rapidjson_document.hpp"

#include "src/llm/io_processing/base_output_parser.hpp"
#include "src/llm/io_processing/partial_json_builder.hpp"
Expand Down Expand Up @@ -62,11 +56,12 @@ class Llama3ToolParser : public BaseOutputParser {

void parse(ParsedOutput& parsedOutput, const std::vector<int64_t>& generatedTokens) override;
std::optional<rapidjson::Document> parseChunk(const std::string& chunk, ov::genai::GenerationFinishReason finishReason) override;
const std::string& getParsingStartTag() const override {
return parsingStartTag;
const std::vector<std::string>& getParsingStartTags() const override {
static const std::vector<std::string> parsingStartTags = {parsingStartTag};
return parsingStartTags;
}
const std::unordered_set<std::string>& getSpecialParsingStartTags() const override {
static const std::unordered_set<std::string> specialParsingStartTags = {"{"};
const std::vector<std::string>& getSpecialParsingStartTags() const override {
static const std::vector<std::string> specialParsingStartTags = {"{"};
return specialParsingStartTags;
}
// Tools calls are expected to be the last part of the content, so we do not specify an end tag.
Expand Down
7 changes: 1 addition & 6 deletions src/llm/io_processing/mistral/tool_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,7 @@
#include <vector>
#include <regex>

#pragma warning(push)
#pragma warning(disable : 6313)
#include <rapidjson/document.h>
#include <rapidjson/stringbuffer.h>
#include <rapidjson/writer.h>
#pragma warning(pop)
#include "src/port/rapidjson_document.hpp"

#include "../../../logging.hpp"
#include "tool_parser.hpp"
Expand Down
18 changes: 6 additions & 12 deletions src/llm/io_processing/mistral/tool_parser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,9 @@
#include <openvino/genai/tokenizer.hpp>
#include <optional>
#include <string>
#include <unordered_set>
#include <vector>

#pragma warning(push)
#pragma warning(disable : 6313)
#include <rapidjson/document.h>
#include <rapidjson/stringbuffer.h>
#include <rapidjson/writer.h>
#pragma warning(pop)
#include "src/port/rapidjson_document.hpp"

#include "src/llm/io_processing/base_output_parser.hpp"

Expand All @@ -41,12 +35,12 @@ class MistralToolParser : public BaseOutputParser {

void parse(ParsedOutput& parsedOutput, const std::vector<int64_t>& generatedTokens) override;
std::optional<rapidjson::Document> parseChunk(const std::string& chunk, ov::genai::GenerationFinishReason finishReason) override;
const std::string& getParsingStartTag() const override {
static const std::string toolCallStartTag = "[TOOL_CALLS]";
return toolCallStartTag;
const std::vector<std::string>& getParsingStartTags() const override {
static const std::vector<std::string> toolCallStartTags{"[TOOL_CALLS]"};
return toolCallStartTags;
}
const std::unordered_set<std::string>& getSpecialParsingStartTags() const override {
static const std::unordered_set<std::string> specialParsingStartTags = {};
const std::vector<std::string>& getSpecialParsingStartTags() const override {
static const std::vector<std::string> specialParsingStartTags{};
return specialParsingStartTags;
}
// Tools calls are expected to be the last part of the content, so we do not specify an end tag.
Expand Down
21 changes: 10 additions & 11 deletions src/llm/io_processing/output_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ OutputParser::TagLookupStatus OutputParser::StreamOutputCache::lookupTag(const s
if (tag.empty()) {
return TagLookupStatus::NOT_FOUND;
}

SPDLOG_TRACE("XYZ lookupTag: looking for tag: '{}' in buffer: '{}'", tag, buffer);
if (tag.size() > buffer.size()) {
/*
If the tag is longer than the buffer, we check if the buffer and tag overlap (either partially or fully for exact match)
Expand Down Expand Up @@ -79,7 +79,7 @@ OutputParser::TagLookupStatus OutputParser::StreamOutputCache::lookupTag(const s
}
}

OutputParser::TagLookupStatus OutputParser::StreamOutputCache::lookupTags(const std::unordered_set<std::string>& tags) const {
OutputParser::TagLookupStatus OutputParser::StreamOutputCache::lookupTags(const std::vector<std::string>& tags) const {
// We look for multiple tags and return the status in the following priority: FOUND COMPLETE > FOUND_INCOMPLETE > NOT_FOUND
TagLookupStatus finalTagLookupStatus = TagLookupStatus::NOT_FOUND;
for (const auto& tag : tags) {
Expand Down Expand Up @@ -157,7 +157,6 @@ std::optional<rapidjson::Document> OutputParser::parseReasoningChunk(ov::genai::

OutputParser::OutputParser(ov::genai::Tokenizer& tokenizer, const std::string toolParserName, const std::string reasoningParserName, const ToolsSchemas_t& toolNameSchemaMap) :
tokenizer(tokenizer) {
SPDLOG_TRACE("OutputParser created with toolNameSchemaMap of size: {}", toolNameSchemaMap.size());
if (toolParserName == "llama3") {
toolParser = std::make_unique<Llama3ToolParser>(tokenizer);
} else if (toolParserName == "hermes3") {
Expand Down Expand Up @@ -208,7 +207,7 @@ void OutputParser::enableImmediateToolParsing() {

std::string OutputParser::getToolParserStartTag() const {
if (toolParser) {
return toolParser->getParsingStartTag();
return toolParser->getParsingStartTags()[0];
} else {
throw std::runtime_error("Tool parser is not available, cannot get start tag");
}
Expand Down Expand Up @@ -243,13 +242,13 @@ std::optional<rapidjson::Document> OutputParser::parseChunk(const std::string& c
so only use those methods or return nullopt.
*/

bool reasoningParserExistsAndSupportsStreaming = reasoningParser && !reasoningParser->getParsingStartTag().empty() && !reasoningParser->getParsingEndTag().empty();
bool toolParserExistsAndSupportsStreaming = toolParser && !toolParser->getParsingStartTag().empty();
bool reasoningParserExistsAndSupportsStreaming = reasoningParser && !reasoningParser->getParsingStartTags().empty() && !reasoningParser->getParsingEndTag().empty();
bool toolParserExistsAndSupportsStreaming = toolParser && !toolParser->getParsingStartTags().empty();
bool applyToolParser = toolParserExistsAndSupportsStreaming && toolsAvailable;

if (applyToolParser && toolParser->isImmediateParsingEnabled() && processingPhase == UNKNOWN) {
// If zero trigger parsing is enabled, we assume the start tag has been injected to the prompt.
streamOutputCache.add(toolParser->getParsingStartTag());
streamOutputCache.add(getToolParserStartTag());
}

streamOutputCache.add(chunkResponse);
Expand All @@ -259,7 +258,7 @@ std::optional<rapidjson::Document> OutputParser::parseChunk(const std::string& c
TagLookupStatus anyStartTagStatus = TagLookupStatus::NOT_FOUND;
if (reasoningParserExistsAndSupportsStreaming) {
// Check if reasoning start tag has been received
TagLookupStatus reasoningStartTagStatus = streamOutputCache.lookupTag(reasoningParser->getParsingStartTag());
TagLookupStatus reasoningStartTagStatus = streamOutputCache.lookupTags(reasoningParser->getParsingStartTags());
if (reasoningStartTagStatus == TagLookupStatus::NOT_FOUND) {
// If reasoning start tag is not found, check if any of the special start tags are found
reasoningStartTagStatus = streamOutputCache.lookupTags(reasoningParser->getSpecialParsingStartTags());
Expand All @@ -277,7 +276,7 @@ std::optional<rapidjson::Document> OutputParser::parseChunk(const std::string& c
return parseToolCallChunk(finishReason);
} else {
// Check if tool call start tag has been received
TagLookupStatus toolCallStartTagStatus = streamOutputCache.lookupTag(toolParser->getParsingStartTag());
TagLookupStatus toolCallStartTagStatus = streamOutputCache.lookupTags(toolParser->getParsingStartTags());
if (toolCallStartTagStatus == TagLookupStatus::NOT_FOUND) {
// If tool call start tag is not found, check if any of the special start tags are found
toolCallStartTagStatus = streamOutputCache.lookupTags(toolParser->getSpecialParsingStartTags());
Expand Down Expand Up @@ -311,7 +310,7 @@ std::optional<rapidjson::Document> OutputParser::parseChunk(const std::string& c
// If we are in the CONTENT phase, we check if tool parser start tag is found and if so, switch to TOOL_CALLS phase.
// TOOL_CALLS is the only phase that can be processed after CONTENT.
if (applyToolParser) {
TagLookupStatus toolStartTagStatus = streamOutputCache.lookupTag(toolParser->getParsingStartTag());
TagLookupStatus toolStartTagStatus = streamOutputCache.lookupTags(toolParser->getParsingStartTags());
if (toolStartTagStatus == TagLookupStatus::FOUND_COMPLETE) {
return parseToolCallChunk(finishReason);
} else if (toolStartTagStatus == TagLookupStatus::FOUND_INCOMPLETE && finishReason == ov::genai::GenerationFinishReason::NONE) {
Expand All @@ -335,7 +334,7 @@ std::optional<rapidjson::Document> OutputParser::parseChunk(const std::string& c
} else if (processingPhase == TOOL_CALLS_WAITING_FOR_TOOL) {
// In this phase we are waiting for next tool call or finish of generation.
// If we get next tool call start tag, we switch to TOOL_CALLS phase, otherwise if generation finishes we switch to CONTENT phase to flush any remaining content.
TagLookupStatus toolStartTagStatus = streamOutputCache.lookupTag(toolParser->getParsingStartTag());
TagLookupStatus toolStartTagStatus = streamOutputCache.lookupTags(toolParser->getParsingStartTags());
if (toolStartTagStatus == TagLookupStatus::FOUND_INCOMPLETE && finishReason == ov::genai::GenerationFinishReason::NONE) {
return std::nullopt; // Wait for more chunks to determine if start tag is complete
}
Expand Down
Loading