openvinotoolkit · atobiszei · Oct 17, 2025 · Oct 8, 2025 · Oct 13, 2025 · Oct 14, 2025
diff --git a/spelling-whitelist.txt b/spelling-whitelist.txt
@@ -25,4 +25,4 @@ release_files/thirdparty-licenses/icu.LICENSE.txt:160: TaBE ==> table, tab
 release_files/thirdparty-licenses/libgt2.LICENSE.txt:1040: aheared ==> adhered
 release_files/thirdparty-licenses/libgt2.LICENSE.txt:1065: rouines ==> routines
 release_files/thirdparty-licenses/libgt2.LICENSE.txt:1083: publically ==> publicly
-src/test/llm/output_parsers/qwen3coder_output_parser_test.cpp:559: paramete ==> parameter
+src/test/llm/output_parsers/qwen3coder_output_parser_test.cpp
diff --git a/src/llm/BUILD b/src/llm/BUILD
@@ -172,6 +172,7 @@ ovms_cc_library( # TODO split further so we don't have to recompile everything w
     ],
     deps = [
         "@com_github_tencent_rapidjson//:rapidjson",
+        "//src/port:rapidjson_document",
         "//src:libovmslogging",
         "//src:libovmsstring_utils",
         ":partial_json_builder",

diff --git a/src/llm/io_processing/base_output_parser.hpp b/src/llm/io_processing/base_output_parser.hpp
@@ -97,15 +97,15 @@ class BaseOutputParser {
     // Otherwise we return a JSON object containing the delta that conforms to OpenAI API.
     virtual std::optional<rapidjson::Document> parseChunk(const std::string& chunkResponse, ov::genai::GenerationFinishReason finishReason) = 0;
 
-    // Get the tag that marks the beginning of the segment that should be processed by the parser.
+    // Get the tags that marks the beginning of the segment that should be processed by the parser.
     // This method is used in streaming mode to determine if the parser should start processing the content.
     // If empty string is returned, it means that the parser will never start processing the content.
-    virtual const std::string& getParsingStartTag() const = 0;
+    virtual const std::vector<std::string>& getParsingStartTags() const = 0;
 
     // Get a vector of additional tags that mark beginning of the segment that should be processed by the parser.
     // These tags are considered only if they are the first output produced by the model.
     // In streaming mode it means that they are considered only in UNKNOWN phase.
-    virtual const std::unordered_set<std::string>& getSpecialParsingStartTags() const = 0;
+    virtual const std::vector<std::string>& getSpecialParsingStartTags() const = 0;
 
     // Get the tag that marks the end of the segment that should be processed by the parser.
     // This method is used in streaming mode to determine if the parser should stop processing the content.

diff --git a/src/llm/io_processing/gptoss/reasoning_parser.cpp b/src/llm/io_processing/gptoss/reasoning_parser.cpp
@@ -18,12 +18,7 @@
 #include <string>
 #include <vector>
 
-#pragma warning(push)
-#pragma warning(disable : 6313)
-#include <rapidjson/document.h>
-#include <rapidjson/stringbuffer.h>
-#include <rapidjson/writer.h>
-#pragma warning(pop)
+#include "src/port/rapidjson_document.hpp"
 
 #include "../../../logging.hpp"
 #include "../../../stringutils.hpp"
@@ -56,10 +51,10 @@ std::optional<rapidjson::Document> GptOssReasoningParser::parseChunk(const std::
 
     StreamState lastState = state;
 
-    if (startsWith(chunk, getParsingStartTag())) {
+    if (startsWith(chunk, getParsingStartTags()[0])) {
         // Final content
         state = StreamState::READING_REASONING;
-        chunk = chunk.substr(getParsingStartTag().size());
+        chunk = chunk.substr(getParsingStartTags()[0].size());
     } else if (startsWith(chunk, "<|start|>assistant<|channel|>final<|message|>")) {
         // Final content
         state = StreamState::READING_CONTENT;

diff --git a/src/llm/io_processing/gptoss/reasoning_parser.hpp b/src/llm/io_processing/gptoss/reasoning_parser.hpp
@@ -17,15 +17,9 @@
 
 #include <openvino/genai/tokenizer.hpp>
 #include <string>
-#include <unordered_set>
 #include <vector>
 
-#pragma warning(push)
-#pragma warning(disable : 6313)
-#include <rapidjson/document.h>
-#include <rapidjson/stringbuffer.h>
-#include <rapidjson/writer.h>
-#pragma warning(pop)
+#include "src/port/rapidjson_document.hpp"
 
 #include "../base_output_parser.hpp"
 
@@ -58,12 +52,15 @@ class GptOssReasoningParser : public BaseOutputParser {
     // Streaming
     std::optional<rapidjson::Document> parseChunk(const std::string& chunk, ov::genai::GenerationFinishReason finishReason) override;
 
-    const std::string& getParsingStartTag() const override {
-        return parsingStartTag;
+    const std::vector<std::string>& getParsingStartTags() const override {
+        // If you add another element you have to update implementation as well
+        // as mostly it assumed just one element
+        static const std::vector<std::string> parsingStartTags{parsingStartTag};
+        return parsingStartTags;
     }
 
-    const std::unordered_set<std::string>& getSpecialParsingStartTags() const override {
-        static const std::unordered_set<std::string> specialParsingStartTags = {
+    const std::vector<std::string>& getSpecialParsingStartTags() const override {
+        static const std::vector<std::string> specialParsingStartTags = {
             "<|channel|>final<|message|>",
             "<|channel|>commentary<|message|>",               // Preable to reasoning, users usually sees that
             "<|start|>assistant<|channel|>final<|message|>",  // Final content users sees

diff --git a/src/llm/io_processing/gptoss/tool_parser.cpp b/src/llm/io_processing/gptoss/tool_parser.cpp
@@ -19,12 +19,7 @@
 #include <vector>
 #include <regex>
 
-#pragma warning(push)
-#pragma warning(disable : 6313)
-#include <rapidjson/document.h>
-#include <rapidjson/stringbuffer.h>
-#include <rapidjson/writer.h>
-#pragma warning(pop)
+#include "src/port/rapidjson_document.hpp"
 
 #include "../../../logging.hpp"
 #include "../../../stringutils.hpp"
@@ -91,7 +86,7 @@ std::optional<rapidjson::Document> GptOssToolParser::parseChunk(const std::strin
     std::string chunk = newChunk;
     std::optional<rapidjson::Document> result;
 
-    if (chunk.find(getParsingStartTag()) != std::string::npos) {
+    if (chunk.find(getParsingStartTags()[0]) != std::string::npos) {
         toolCallIndex++;  // starting with -1, first call will be 0
         return std::nullopt;
     }

diff --git a/src/llm/io_processing/gptoss/tool_parser.hpp b/src/llm/io_processing/gptoss/tool_parser.hpp
@@ -18,15 +18,9 @@
 #include <openvino/genai/tokenizer.hpp>
 #include <optional>
 #include <string>
-#include <unordered_set>
 #include <vector>
 
-#pragma warning(push)
-#pragma warning(disable : 6313)
-#include <rapidjson/document.h>
-#include <rapidjson/stringbuffer.h>
-#include <rapidjson/writer.h>
-#pragma warning(pop)
+#include "src/port/rapidjson_document.hpp"
 
 #include "../base_output_parser.hpp"
 
@@ -63,12 +57,13 @@ class GptOssToolParser : public BaseOutputParser {
     // Streaming
     std::optional<rapidjson::Document> parseChunk(const std::string& chunk, ov::genai::GenerationFinishReason finishReason) override;
 
-    const std::string& getParsingStartTag() const override {
-        return parsingStartTag;
+    const std::vector<std::string>& getParsingStartTags() const override {
+        static const std::vector<std::string> parsingStartTags{parsingStartTag};
+        return parsingStartTags;
     }
 
-    const std::unordered_set<std::string>& getSpecialParsingStartTags() const override {
-        static const std::unordered_set<std::string> specialParsingStartTags = {};
+    const std::vector<std::string>& getSpecialParsingStartTags() const override {
+        static const std::vector<std::string> specialParsingStartTags = {};
         return specialParsingStartTags;
     }
 

diff --git a/src/llm/io_processing/hermes3/tool_parser.cpp b/src/llm/io_processing/hermes3/tool_parser.cpp
@@ -18,12 +18,7 @@
 #include <string>
 #include <vector>
 
-#pragma warning(push)
-#pragma warning(disable : 6313)
-#include <rapidjson/document.h>
-#include <rapidjson/stringbuffer.h>
-#include <rapidjson/writer.h>
-#pragma warning(pop)
+#include "src/port/rapidjson_document.hpp"
 
 #include "../../../logging.hpp"
 #include "tool_parser.hpp"

diff --git a/src/llm/io_processing/hermes3/tool_parser.hpp b/src/llm/io_processing/hermes3/tool_parser.hpp
@@ -18,15 +18,9 @@
 #include <openvino/genai/tokenizer.hpp>
 #include <optional>
 #include <string>
-#include <unordered_set>
 #include <vector>
 
-#pragma warning(push)
-#pragma warning(disable : 6313)
-#include <rapidjson/document.h>
-#include <rapidjson/stringbuffer.h>
-#include <rapidjson/writer.h>
-#pragma warning(pop)
+#include "src/port/rapidjson_document.hpp"
 
 #include "src/llm/io_processing/base_output_parser.hpp"
 #include "src/llm/io_processing/partial_json_builder.hpp"
@@ -82,11 +76,12 @@ class Hermes3ToolParser : public BaseOutputParser {
 
     void parse(ParsedOutput& parsedOutput, const std::vector<int64_t>& generatedTokens) override;
     std::optional<rapidjson::Document> parseChunk(const std::string& chunk, ov::genai::GenerationFinishReason finishReason) override;
-    const std::string& getParsingStartTag() const override {
-        return parsingStartTag;
+    const std::vector<std::string>& getParsingStartTags() const override {
+        static const std::vector<std::string> parsingStartTags = {parsingStartTag};
+        return parsingStartTags;
     }
-    const std::unordered_set<std::string>& getSpecialParsingStartTags() const override {
-        static const std::unordered_set<std::string> beginningOnlyTags = {};
+    const std::vector<std::string>& getSpecialParsingStartTags() const override {
+        static const std::vector<std::string> beginningOnlyTags = {};
         return beginningOnlyTags;
     }
     // Tools calls are expected to be the last part of the content, so we do not specify an end tag.

diff --git a/src/llm/io_processing/llama3/tool_parser.cpp b/src/llm/io_processing/llama3/tool_parser.cpp
@@ -19,12 +19,7 @@
 #include <vector>
 #include <utility>
 
-#pragma warning(push)
-#pragma warning(disable : 6313)
-#include <rapidjson/document.h>
-#include <rapidjson/stringbuffer.h>
-#include <rapidjson/writer.h>
-#pragma warning(pop)
+#include "src/port/rapidjson_document.hpp"
 
 #include "../../../logging.hpp"
 #include "tool_parser.hpp"

diff --git a/src/llm/io_processing/llama3/tool_parser.hpp b/src/llm/io_processing/llama3/tool_parser.hpp
@@ -18,15 +18,9 @@
 #include <openvino/genai/tokenizer.hpp>
 #include <optional>
 #include <string>
-#include <unordered_set>
 #include <vector>
 
-#pragma warning(push)
-#pragma warning(disable : 6313)
-#include <rapidjson/document.h>
-#include <rapidjson/stringbuffer.h>
-#include <rapidjson/writer.h>
-#pragma warning(pop)
+#include "src/port/rapidjson_document.hpp"
 
 #include "src/llm/io_processing/base_output_parser.hpp"
 #include "src/llm/io_processing/partial_json_builder.hpp"
@@ -62,11 +56,12 @@ class Llama3ToolParser : public BaseOutputParser {
 
     void parse(ParsedOutput& parsedOutput, const std::vector<int64_t>& generatedTokens) override;
     std::optional<rapidjson::Document> parseChunk(const std::string& chunk, ov::genai::GenerationFinishReason finishReason) override;
-    const std::string& getParsingStartTag() const override {
-        return parsingStartTag;
+    const std::vector<std::string>& getParsingStartTags() const override {
+        static const std::vector<std::string> parsingStartTags = {parsingStartTag};
+        return parsingStartTags;
     }
-    const std::unordered_set<std::string>& getSpecialParsingStartTags() const override {
-        static const std::unordered_set<std::string> specialParsingStartTags = {"{"};
+    const std::vector<std::string>& getSpecialParsingStartTags() const override {
+        static const std::vector<std::string> specialParsingStartTags = {"{"};
         return specialParsingStartTags;
     }
     // Tools calls are expected to be the last part of the content, so we do not specify an end tag.

diff --git a/src/llm/io_processing/mistral/tool_parser.cpp b/src/llm/io_processing/mistral/tool_parser.cpp
@@ -19,12 +19,7 @@
 #include <vector>
 #include <regex>
 
-#pragma warning(push)
-#pragma warning(disable : 6313)
-#include <rapidjson/document.h>
-#include <rapidjson/stringbuffer.h>
-#include <rapidjson/writer.h>
-#pragma warning(pop)
+#include "src/port/rapidjson_document.hpp"
 
 #include "../../../logging.hpp"
 #include "tool_parser.hpp"

diff --git a/src/llm/io_processing/mistral/tool_parser.hpp b/src/llm/io_processing/mistral/tool_parser.hpp
@@ -18,15 +18,9 @@
 #include <openvino/genai/tokenizer.hpp>
 #include <optional>
 #include <string>
-#include <unordered_set>
 #include <vector>
 
-#pragma warning(push)
-#pragma warning(disable : 6313)
-#include <rapidjson/document.h>
-#include <rapidjson/stringbuffer.h>
-#include <rapidjson/writer.h>
-#pragma warning(pop)
+#include "src/port/rapidjson_document.hpp"
 
 #include "src/llm/io_processing/base_output_parser.hpp"
 
@@ -41,12 +35,12 @@ class MistralToolParser : public BaseOutputParser {
 
     void parse(ParsedOutput& parsedOutput, const std::vector<int64_t>& generatedTokens) override;
     std::optional<rapidjson::Document> parseChunk(const std::string& chunk, ov::genai::GenerationFinishReason finishReason) override;
-    const std::string& getParsingStartTag() const override {
-        static const std::string toolCallStartTag = "[TOOL_CALLS]";
-        return toolCallStartTag;
+    const std::vector<std::string>& getParsingStartTags() const override {
+        static const std::vector<std::string> toolCallStartTags{"[TOOL_CALLS]"};
+        return toolCallStartTags;
     }
-    const std::unordered_set<std::string>& getSpecialParsingStartTags() const override {
-        static const std::unordered_set<std::string> specialParsingStartTags = {};
+    const std::vector<std::string>& getSpecialParsingStartTags() const override {
+        static const std::vector<std::string> specialParsingStartTags{};
         return specialParsingStartTags;
     }
     // Tools calls are expected to be the last part of the content, so we do not specify an end tag.

diff --git a/src/llm/io_processing/output_parser.cpp b/src/llm/io_processing/output_parser.cpp
@@ -34,7 +34,6 @@ OutputParser::TagLookupStatus OutputParser::StreamOutputCache::lookupTag(const s
     if (tag.empty()) {
         return TagLookupStatus::NOT_FOUND;
     }
-
     if (tag.size() > buffer.size()) {
         /* 
         If the tag is longer than the buffer, we check if the buffer and tag overlap (either partially or fully for exact match)
@@ -79,7 +78,7 @@ OutputParser::TagLookupStatus OutputParser::StreamOutputCache::lookupTag(const s
     }
 }
 
-OutputParser::TagLookupStatus OutputParser::StreamOutputCache::lookupTags(const std::unordered_set<std::string>& tags) const {
+OutputParser::TagLookupStatus OutputParser::StreamOutputCache::lookupTags(const std::vector<std::string>& tags) const {
     // We look for multiple tags and return the status in the following priority: FOUND COMPLETE > FOUND_INCOMPLETE > NOT_FOUND
     TagLookupStatus finalTagLookupStatus = TagLookupStatus::NOT_FOUND;
     for (const auto& tag : tags) {
@@ -157,7 +156,6 @@ std::optional<rapidjson::Document> OutputParser::parseReasoningChunk(ov::genai::
 
 OutputParser::OutputParser(ov::genai::Tokenizer& tokenizer, const std::string toolParserName, const std::string reasoningParserName, const ToolsSchemas_t& toolNameSchemaMap) :
     tokenizer(tokenizer) {
-    SPDLOG_TRACE("OutputParser created with toolNameSchemaMap of size: {}", toolNameSchemaMap.size());
     if (toolParserName == "llama3") {
         toolParser = std::make_unique<Llama3ToolParser>(tokenizer);
     } else if (toolParserName == "hermes3") {
@@ -208,7 +206,7 @@ void OutputParser::enableImmediateToolParsing() {
 
 std::string OutputParser::getToolParserStartTag() const {
     if (toolParser) {
-        return toolParser->getParsingStartTag();
+        return toolParser->getParsingStartTags()[0];
     } else {
         throw std::runtime_error("Tool parser is not available, cannot get start tag");
     }
@@ -243,13 +241,13 @@ std::optional<rapidjson::Document> OutputParser::parseChunk(const std::string& c
     so only use those methods or return nullopt.
     */
 
-    bool reasoningParserExistsAndSupportsStreaming = reasoningParser && !reasoningParser->getParsingStartTag().empty() && !reasoningParser->getParsingEndTag().empty();
-    bool toolParserExistsAndSupportsStreaming = toolParser && !toolParser->getParsingStartTag().empty();
+    bool reasoningParserExistsAndSupportsStreaming = reasoningParser && !reasoningParser->getParsingStartTags().empty() && !reasoningParser->getParsingEndTag().empty();
+    bool toolParserExistsAndSupportsStreaming = toolParser && !toolParser->getParsingStartTags().empty();
     bool applyToolParser = toolParserExistsAndSupportsStreaming && toolsAvailable;
 
     if (applyToolParser && toolParser->isImmediateParsingEnabled() && processingPhase == UNKNOWN) {
         // If zero trigger parsing is enabled, we assume the start tag has been injected to the prompt.
-        streamOutputCache.add(toolParser->getParsingStartTag());
+        streamOutputCache.add(getToolParserStartTag());
     }
 
     streamOutputCache.add(chunkResponse);
@@ -259,7 +257,7 @@ std::optional<rapidjson::Document> OutputParser::parseChunk(const std::string& c
         TagLookupStatus anyStartTagStatus = TagLookupStatus::NOT_FOUND;
         if (reasoningParserExistsAndSupportsStreaming) {
             // Check if reasoning start tag has been received
-            TagLookupStatus reasoningStartTagStatus = streamOutputCache.lookupTag(reasoningParser->getParsingStartTag());
+            TagLookupStatus reasoningStartTagStatus = streamOutputCache.lookupTags(reasoningParser->getParsingStartTags());
             if (reasoningStartTagStatus == TagLookupStatus::NOT_FOUND) {
                 // If reasoning start tag is not found, check if any of the special start tags are found
                 reasoningStartTagStatus = streamOutputCache.lookupTags(reasoningParser->getSpecialParsingStartTags());
@@ -277,7 +275,7 @@ std::optional<rapidjson::Document> OutputParser::parseChunk(const std::string& c
                 return parseToolCallChunk(finishReason);
             } else {
                 // Check if tool call start tag has been received
-                TagLookupStatus toolCallStartTagStatus = streamOutputCache.lookupTag(toolParser->getParsingStartTag());
+                TagLookupStatus toolCallStartTagStatus = streamOutputCache.lookupTags(toolParser->getParsingStartTags());
                 if (toolCallStartTagStatus == TagLookupStatus::NOT_FOUND) {
                     // If tool call start tag is not found, check if any of the special start tags are found
                     toolCallStartTagStatus = streamOutputCache.lookupTags(toolParser->getSpecialParsingStartTags());
@@ -311,7 +309,7 @@ std::optional<rapidjson::Document> OutputParser::parseChunk(const std::string& c
         // If we are in the CONTENT phase, we check if tool parser start tag is found and if so, switch to TOOL_CALLS phase.
         // TOOL_CALLS is the only phase that can be processed after CONTENT.
         if (applyToolParser) {
-            TagLookupStatus toolStartTagStatus = streamOutputCache.lookupTag(toolParser->getParsingStartTag());
+            TagLookupStatus toolStartTagStatus = streamOutputCache.lookupTags(toolParser->getParsingStartTags());
             if (toolStartTagStatus == TagLookupStatus::FOUND_COMPLETE) {
                 return parseToolCallChunk(finishReason);
             } else if (toolStartTagStatus == TagLookupStatus::FOUND_INCOMPLETE && finishReason == ov::genai::GenerationFinishReason::NONE) {
@@ -335,7 +333,7 @@ std::optional<rapidjson::Document> OutputParser::parseChunk(const std::string& c
     } else if (processingPhase == TOOL_CALLS_WAITING_FOR_TOOL) {
         // In this phase we are waiting for next tool call or finish of generation.
         // If we get next tool call start tag, we switch to TOOL_CALLS phase, otherwise if generation finishes we switch to CONTENT phase to flush any remaining content.
-        TagLookupStatus toolStartTagStatus = streamOutputCache.lookupTag(toolParser->getParsingStartTag());
+        TagLookupStatus toolStartTagStatus = streamOutputCache.lookupTags(toolParser->getParsingStartTags());
         if (toolStartTagStatus == TagLookupStatus::FOUND_INCOMPLETE && finishReason == ov::genai::GenerationFinishReason::NONE) {
             return std::nullopt;  // Wait for more chunks to determine if start tag is complete
         }