diff --git a/src/llm/servable_initializer.cpp b/src/llm/servable_initializer.cpp index d2712af3ec..71ac74423a 100644 --- a/src/llm/servable_initializer.cpp +++ b/src/llm/servable_initializer.cpp @@ -191,8 +191,6 @@ void GenAiServableInitializer::loadPyTemplateProcessor(std::shared_ptr hermes3Tokenizer; class Hermes3OutputParserTest : public ::testing::Test { protected: std::unique_ptr outputParserWithRegularToolParsing; std::unique_ptr outputParserWithImmediateToolParsing; + static void SetUpTestSuite() { + try { + hermes3Tokenizer = std::make_unique(tokenizerPath); + } catch (const std::exception& e) { + FAIL() << "Failed to initialize hermes3 tokenizer: " << e.what(); + } catch (...) { + FAIL() << "Failed to initialize hermes3 tokenizer due to unknown error."; + } + } + + static void TearDownTestSuite() { + hermes3Tokenizer.reset(); + } + void SetUp() override { // For Hermes3 model there is only tool parser available - outputParserWithRegularToolParsing = std::make_unique(hermes3Tokenizer, "hermes3", ""); - outputParserWithImmediateToolParsing = std::make_unique(hermes3Tokenizer, "hermes3", ""); + outputParserWithRegularToolParsing = std::make_unique(*hermes3Tokenizer, "hermes3", ""); + outputParserWithImmediateToolParsing = std::make_unique(*hermes3Tokenizer, "hermes3", ""); outputParserWithImmediateToolParsing->enableImmediateToolParsing(); } }; @@ -59,7 +73,7 @@ TEST_F(Hermes3OutputParserTest, ParseToolCallOutputWithSingleToolCall) { // Remove opening tag for immediate parsing input = input.substr(std::string("").length()); } - auto generatedTensor = hermes3Tokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids; + auto generatedTensor = hermes3Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse(generatedTokens, true) : outputParserWithRegularToolParsing->parse(generatedTokens, true); EXPECT_EQ(parsedOutput.content, ""); @@ -88,7 +102,7 @@ TEST_F(Hermes3OutputParserTest, ParseToolCallOutputWithNoToolsInTheRequest) { // Remove opening tag for immediate parsing testInput = testInput.substr(std::string("").length()); } - auto generatedTensor = hermes3Tokenizer.encode(testInput, ov::genai::add_special_tokens(false)).input_ids; + auto generatedTensor = hermes3Tokenizer->encode(testInput, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse(generatedTokens, false) : outputParserWithRegularToolParsing->parse(generatedTokens, false); EXPECT_EQ(parsedOutput.content, testInput); @@ -115,7 +129,7 @@ TEST_F(Hermes3OutputParserTest, ParseToolCallOutputWithThreeToolCalls) { if (immediateParsing) { input = input.substr(std::string("").length()); } - auto generatedTensor = hermes3Tokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids; + auto generatedTensor = hermes3Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse(generatedTokens, true) : outputParserWithRegularToolParsing->parse(generatedTokens, true); EXPECT_EQ(parsedOutput.content, ""); @@ -162,7 +176,7 @@ TEST_F(Hermes3OutputParserTest, ParseToolCallOutputWithTwoValidToolCallsAndOneIn if (immediateParsing) { input = input.substr(std::string("").length()); } - auto generatedTensor = hermes3Tokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids; + auto generatedTensor = hermes3Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse(generatedTokens, true) : outputParserWithRegularToolParsing->parse(generatedTokens, true); EXPECT_EQ(parsedOutput.content, ""); @@ -188,7 +202,7 @@ TEST_F(Hermes3OutputParserTest, ParseToolCallOutputWithTwoValidToolCallsAndOneIn TEST_F(Hermes3OutputParserTest, ParseToolCallOutputWithContentAndNoToolCalls) { std::string input = "This is a regular model response without tool calls."; - auto generatedTensor = hermes3Tokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids; + auto generatedTensor = hermes3Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); EXPECT_EQ(parsedOutput.content, "This is a regular model response without tool calls."); @@ -204,7 +218,7 @@ TEST_F(Hermes3OutputParserTest, ParseToolCallOutputWithContentAndNoToolCalls) { TEST_F(Hermes3OutputParserTest, ParseToolCallOutputWithContentAndSingleToolCall) { std::string input = "This is a content part and next will be a tool call.\n\n{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}"; - auto generatedTensor = hermes3Tokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids; + auto generatedTensor = hermes3Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); // generatedTokens should now contain content followed by bot token ID and then tool call ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); diff --git a/src/test/llm/output_parsers/llama3_output_parser_test.cpp b/src/test/llm/output_parsers/llama3_output_parser_test.cpp index 72cec5ee6b..56d2c73713 100644 --- a/src/test/llm/output_parsers/llama3_output_parser_test.cpp +++ b/src/test/llm/output_parsers/llama3_output_parser_test.cpp @@ -31,7 +31,7 @@ const std::string tokenizerPath = getWindowsRepoRootPath() + "\\src\\test\\llm_t const std::string tokenizerPath = "/ovms/src/test/llm_testing/meta-llama/Llama-3.1-8B-Instruct"; #endif -static ov::genai::Tokenizer llama3Tokenizer(tokenizerPath); +static std::unique_ptr llama3Tokenizer; // Id of the <|python_tag|> which is a special token used to indicate the start of a tool calls constexpr int64_t botTokenId = 128010; @@ -41,16 +41,30 @@ class Llama3OutputParserTest : public ::testing::Test { std::unique_ptr outputParserWithRegularToolParsing; std::unique_ptr outputParserWithImmediateToolParsing; + static void SetUpTestSuite() { + try { + llama3Tokenizer = std::make_unique(tokenizerPath); + } catch (const std::exception& e) { + FAIL() << "Failed to initialize llama3 tokenizer: " << e.what(); + } catch (...) { + FAIL() << "Failed to initialize llama3 tokenizer due to unknown error."; + } + } + + static void TearDownTestSuite() { + llama3Tokenizer.reset(); + } + void SetUp() override { - outputParserWithRegularToolParsing = std::make_unique(llama3Tokenizer, "llama3", ""); - outputParserWithImmediateToolParsing = std::make_unique(llama3Tokenizer, "llama3", ""); + outputParserWithRegularToolParsing = std::make_unique(*llama3Tokenizer, "llama3", ""); + outputParserWithImmediateToolParsing = std::make_unique(*llama3Tokenizer, "llama3", ""); outputParserWithImmediateToolParsing->enableImmediateToolParsing(); } }; TEST_F(Llama3OutputParserTest, ParseToolCallOutputWithSingleToolCall) { std::string input = "{\"name\": \"example_tool\", \"parameters\": {\"arg1\": \"value1\", \"arg2\": 42}}"; - auto generatedTensor = llama3Tokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids; + auto generatedTensor = llama3Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); generatedTokens.insert(generatedTokens.begin(), botTokenId); for (bool immediateParsing : {false, true}) { @@ -66,7 +80,7 @@ TEST_F(Llama3OutputParserTest, ParseToolCallOutputWithSingleToolCall) { TEST_F(Llama3OutputParserTest, ParseToolCallOutputNoToolsInTheRequest) { std::string input = "{\"name\": \"example_tool\", \"parameters\": {\"arg1\": \"value1\", \"arg2\": 42}}"; - auto generatedTensor = llama3Tokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids; + auto generatedTensor = llama3Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); for (bool immediateParsing : {false, true}) { ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse(generatedTokens, false) : outputParserWithRegularToolParsing->parse(generatedTokens, false); @@ -79,7 +93,7 @@ TEST_F(Llama3OutputParserTest, ParseToolCallOutputNoToolsInTheRequest) { // Tool parser assumes entire output are tool calls since it starts with "{", but it's not the case TEST_F(Llama3OutputParserTest, ParseRegularJsonOutputToolsInTheRequest) { std::string input = "{\"name\": \"Jane Doe\", \"location\": \"unknown\"}"; - auto generatedTensor = llama3Tokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids; + auto generatedTensor = llama3Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); for (bool immediateParsing : {false, true}) { ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse(generatedTokens, true) : outputParserWithRegularToolParsing->parse(generatedTokens, true); @@ -92,7 +106,7 @@ TEST_F(Llama3OutputParserTest, ParseRegularJsonOutputToolsInTheRequest) { // Tool parser is available, but there are no tools in the request, so all output should be treated as content TEST_F(Llama3OutputParserTest, ParseRegularJsonOutputNoToolsInTheRequest) { std::string input = "{\"name\": \"Jane Doe\", \"location\": \"unknown\"}"; - auto generatedTensor = llama3Tokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids; + auto generatedTensor = llama3Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); for (bool immediateParsing : {false, true}) { ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse(generatedTokens, false) : outputParserWithRegularToolParsing->parse(generatedTokens, false); @@ -105,7 +119,7 @@ TEST_F(Llama3OutputParserTest, ParseToolCallOutputWithThreeToolCalls) { std::string input = "{\"name\": \"example_tool\", \"parameters\": {\"arg1\": \"value1\", \"arg2\": 42}};" "{\"name\": \"another_tool\", \"parameters\": {\"param1\": \"data\", \"param2\": true}};" "{\"name\": \"third_tool\", \"parameters\": {\"key\": \"value\"}}"; - auto generatedTensor = llama3Tokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids; + auto generatedTensor = llama3Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); for (bool immediateParsing : {false, true}) { ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse(generatedTokens, true) : outputParserWithRegularToolParsing->parse(generatedTokens, true); @@ -132,7 +146,7 @@ TEST_F(Llama3OutputParserTest, ParseToolCallOutputWithThreeToolCalls) { TEST_F(Llama3OutputParserTest, ParseToolCallOutputWithContentAndNoToolCalls) { std::string input = "This is a regular model response without tool calls."; - auto generatedTensor = llama3Tokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids; + auto generatedTensor = llama3Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); for (bool immediateParsing : {false, true}) { ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse(generatedTokens, true) : outputParserWithRegularToolParsing->parse(generatedTokens, true); @@ -145,9 +159,9 @@ TEST_F(Llama3OutputParserTest, ParseToolCallOutputWithContentAndNoToolCalls) { TEST_F(Llama3OutputParserTest, ParseToolCallOutputWithContentAndSingleToolCall) { std::string content = "This is a content part and next will be a tool call."; std::string toolCall = "{\"name\": \"example_tool\", \"parameters\": {\"arg1\": \"value1\", \"arg2\": 42}}"; - auto generatedContentTensor = llama3Tokenizer.encode(content, ov::genai::add_special_tokens(false)).input_ids; + auto generatedContentTensor = llama3Tokenizer->encode(content, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedContentTokens(generatedContentTensor.data(), generatedContentTensor.data() + generatedContentTensor.get_size()); - auto generatedToolCallTensor = llama3Tokenizer.encode(toolCall, ov::genai::add_special_tokens(false)).input_ids; + auto generatedToolCallTensor = llama3Tokenizer->encode(toolCall, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedToolCallTokens(generatedToolCallTensor.data(), generatedToolCallTensor.data() + generatedToolCallTensor.get_size()); std::vector generatedTokens; generatedTokens.insert(generatedTokens.end(), generatedContentTokens.begin(), generatedContentTokens.end()); diff --git a/src/test/llm/output_parsers/mistral_output_parser_test.cpp b/src/test/llm/output_parsers/mistral_output_parser_test.cpp index 02379e5df7..fd22e34da2 100644 --- a/src/test/llm/output_parsers/mistral_output_parser_test.cpp +++ b/src/test/llm/output_parsers/mistral_output_parser_test.cpp @@ -31,16 +31,30 @@ const std::string tokenizerPath = getWindowsRepoRootPath() + "\\src\\test\\llm_t const std::string tokenizerPath = "/ovms/src/test/llm_testing/mistralai/Mistral-7B-Instruct-v0.3/"; #endif -static ov::genai::Tokenizer mistralTokenizer(tokenizerPath); +static std::unique_ptr mistralTokenizer; class MistralOutputParserTest : public ::testing::Test { protected: std::unique_ptr outputParserWithRegularToolParsing; std::unique_ptr outputParserWithImmediateToolParsing; + static void SetUpTestSuite() { + try { + mistralTokenizer = std::make_unique(tokenizerPath); + } catch (const std::exception& e) { + FAIL() << "Failed to initialize mistral tokenizer: " << e.what(); + } catch (...) { + FAIL() << "Failed to initialize mistral tokenizer due to unknown error."; + } + } + + static void TearDownTestSuite() { + mistralTokenizer.reset(); + } + void SetUp() override { - outputParserWithRegularToolParsing = std::make_unique(mistralTokenizer, "mistral", ""); - outputParserWithImmediateToolParsing = std::make_unique(mistralTokenizer, "mistral", ""); + outputParserWithRegularToolParsing = std::make_unique(*mistralTokenizer, "mistral", ""); + outputParserWithImmediateToolParsing = std::make_unique(*mistralTokenizer, "mistral", ""); outputParserWithImmediateToolParsing->enableImmediateToolParsing(); } }; @@ -52,7 +66,7 @@ TEST_F(MistralOutputParserTest, ParseToolCallOutputWithSingleToolCall) { if (immediateParsing) { testInput = testInput.substr(std::string("[TOOL_CALLS]").length()); } - auto generatedTensor = mistralTokenizer.encode(testInput, ov::genai::add_special_tokens(false)).input_ids; + auto generatedTensor = mistralTokenizer->encode(testInput, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse(generatedTokens, true) : outputParserWithRegularToolParsing->parse(generatedTokens, true); EXPECT_EQ(parsedOutput.content, ""); @@ -73,7 +87,7 @@ TEST_F(MistralOutputParserTest, ParseToolCallOutputWithThreeToolCalls) { if (immediateParsing) { testInput = testInput.substr(std::string("[TOOL_CALLS]").length()); } - auto generatedTensor = mistralTokenizer.encode(testInput, ov::genai::add_special_tokens(false)).input_ids; + auto generatedTensor = mistralTokenizer->encode(testInput, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse(generatedTokens, true) : outputParserWithRegularToolParsing->parse(generatedTokens, true); EXPECT_EQ(parsedOutput.content, ""); @@ -106,7 +120,7 @@ TEST_F(MistralOutputParserTest, ParseToolCallOutputWithOneValidToolCallAndTwoInv if (immediateParsing) { testInput = testInput.substr(std::string("[TOOL_CALLS]").length()); } - auto generatedTensor = mistralTokenizer.encode(testInput, ov::genai::add_special_tokens(false)).input_ids; + auto generatedTensor = mistralTokenizer->encode(testInput, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse(generatedTokens, true) : outputParserWithRegularToolParsing->parse(generatedTokens, true); EXPECT_EQ(parsedOutput.content, ""); @@ -122,7 +136,7 @@ TEST_F(MistralOutputParserTest, ParseToolCallOutputWithOneValidToolCallAndTwoInv TEST_F(MistralOutputParserTest, ParseToolCallOutputWithContentAndNoToolCalls) { std::string input = "This is a regular model response without tool calls."; for (bool immediateParsing : {false, true}) { - auto generatedTensor = mistralTokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids; + auto generatedTensor = mistralTokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse(generatedTokens, true) : outputParserWithRegularToolParsing->parse(generatedTokens, true); EXPECT_EQ(parsedOutput.content, "This is a regular model response without tool calls."); @@ -134,7 +148,7 @@ TEST_F(MistralOutputParserTest, ParseToolCallOutputWithContentAndNoToolCalls) { TEST_F(MistralOutputParserTest, ParseToolCallOutputWithContentAndSingleToolCall) { std::string input = "This is a content part and next will be a tool call.\n\n[TOOL_CALLS][{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}]"; for (bool immediateParsing : {false, true}) { - auto generatedTensor = mistralTokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids; + auto generatedTensor = mistralTokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse(generatedTokens, true) : outputParserWithRegularToolParsing->parse(generatedTokens, true); EXPECT_EQ(parsedOutput.content, "This is a content part and next will be a tool call.\n\n [{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}]"); @@ -145,7 +159,7 @@ TEST_F(MistralOutputParserTest, ParseToolCallOutputWithContentAndSingleToolCall) TEST_F(MistralOutputParserTest, ParseToolCallOutputWithContentOnBothSidesAndSingleToolCall) { std::string input = "This is a content part and next will be a tool call.\n\n[TOOL_CALLS][{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}] This is a content part after tool call."; for (bool immediateParsing : {false, true}) { - auto generatedTensor = mistralTokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids; + auto generatedTensor = mistralTokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse(generatedTokens, true) : outputParserWithRegularToolParsing->parse(generatedTokens, true); EXPECT_EQ(parsedOutput.content, "This is a content part and next will be a tool call.\n\n [{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}] This is a content part after tool call."); @@ -160,7 +174,7 @@ TEST_F(MistralOutputParserTest, ParseToolCallOutputWithMultipleToolCallsReturnsC if (immediateParsing) { testInput = testInput.substr(std::string("[TOOL_CALLS]").length()); } - auto generatedTensor = mistralTokenizer.encode(testInput, ov::genai::add_special_tokens(false)).input_ids; + auto generatedTensor = mistralTokenizer->encode(testInput, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse(generatedTokens, true) : outputParserWithRegularToolParsing->parse(generatedTokens, true); // Same expected content as tokenizer does not add special tokens @@ -177,7 +191,7 @@ TEST_F(MistralOutputParserTest, ParseToolCallOutputWithArrayArguments) { if (immediateParsing) { testInput = testInput.substr(std::string("[TOOL_CALLS]").length()); } - auto generatedTensor = mistralTokenizer.encode(testInput, ov::genai::add_special_tokens(false)).input_ids; + auto generatedTensor = mistralTokenizer->encode(testInput, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse(generatedTokens, true) : outputParserWithRegularToolParsing->parse(generatedTokens, true); EXPECT_EQ(parsedOutput.content, ""); diff --git a/src/test/llm/output_parsers/phi4_output_parser_test.cpp b/src/test/llm/output_parsers/phi4_output_parser_test.cpp index 8d57519bd4..a1ae488cc2 100644 --- a/src/test/llm/output_parsers/phi4_output_parser_test.cpp +++ b/src/test/llm/output_parsers/phi4_output_parser_test.cpp @@ -31,16 +31,30 @@ const std::string tokenizerPath = getWindowsRepoRootPath() + "\\src\\test\\llm_t const std::string tokenizerPath = "/ovms/src/test/llm_testing/microsoft/Phi-4-mini-instruct"; #endif -static ov::genai::Tokenizer phi4Tokenizer(tokenizerPath); +static std::unique_ptr phi4Tokenizer; class Phi4OutputParserTest : public ::testing::Test { protected: std::unique_ptr outputParserWithRegularToolParsing; std::unique_ptr outputParserWithImmediateToolParsing; + static void SetUpTestSuite() { + try { + phi4Tokenizer = std::make_unique(tokenizerPath); + } catch (const std::exception& e) { + FAIL() << "Failed to initialize phi tokenizer: " << e.what(); + } catch (...) { + FAIL() << "Failed to initialize phi tokenizer due to unknown error."; + } + } + + static void TearDownTestSuite() { + phi4Tokenizer.reset(); + } + void SetUp() override { - outputParserWithRegularToolParsing = std::make_unique(phi4Tokenizer, "phi4", ""); - outputParserWithImmediateToolParsing = std::make_unique(phi4Tokenizer, "phi4", ""); + outputParserWithRegularToolParsing = std::make_unique(*phi4Tokenizer, "phi4", ""); + outputParserWithImmediateToolParsing = std::make_unique(*phi4Tokenizer, "phi4", ""); outputParserWithImmediateToolParsing->enableImmediateToolParsing(); } }; @@ -52,7 +66,7 @@ TEST_F(Phi4OutputParserTest, ParseToolCallOutputWithSingleToolCall) { if (immediateParsing) { testInput = testInput.substr(std::string("functools").length()); } - auto generatedTensor = phi4Tokenizer.encode(testInput, ov::genai::add_special_tokens(false)).input_ids; + auto generatedTensor = phi4Tokenizer->encode(testInput, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse(generatedTokens, true) : outputParserWithRegularToolParsing->parse(generatedTokens, true); EXPECT_EQ(parsedOutput.content, ""); @@ -73,7 +87,7 @@ TEST_F(Phi4OutputParserTest, ParseToolCallOutputWithThreeToolCalls) { if (immediateParsing) { testInput = testInput.substr(std::string("functools").length()); } - auto generatedTensor = phi4Tokenizer.encode(testInput, ov::genai::add_special_tokens(false)).input_ids; + auto generatedTensor = phi4Tokenizer->encode(testInput, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse(generatedTokens, true) : outputParserWithRegularToolParsing->parse(generatedTokens, true); EXPECT_EQ(parsedOutput.content, ""); @@ -106,7 +120,7 @@ TEST_F(Phi4OutputParserTest, ParseToolCallOutputWithOneValidToolCallAndTwoInvali if (immediateParsing) { testInput = testInput.substr(std::string("functools").length()); } - auto generatedTensor = phi4Tokenizer.encode(testInput, ov::genai::add_special_tokens(false)).input_ids; + auto generatedTensor = phi4Tokenizer->encode(testInput, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse(generatedTokens, true) : outputParserWithRegularToolParsing->parse(generatedTokens, true); EXPECT_EQ(parsedOutput.content, ""); @@ -122,7 +136,7 @@ TEST_F(Phi4OutputParserTest, ParseToolCallOutputWithOneValidToolCallAndTwoInvali TEST_F(Phi4OutputParserTest, ParseToolCallOutputWithContentAndNoToolCalls) { std::string input = "This is a regular model response without tool calls."; for (bool immediateParsing : {false, true}) { - auto generatedTensor = phi4Tokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids; + auto generatedTensor = phi4Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse(generatedTokens, true) : outputParserWithRegularToolParsing->parse(generatedTokens, true); EXPECT_EQ(parsedOutput.content, immediateParsing ? "" : "This is a regular model response without tool calls."); @@ -134,7 +148,7 @@ TEST_F(Phi4OutputParserTest, ParseToolCallOutputWithContentAndNoToolCalls) { TEST_F(Phi4OutputParserTest, ParseToolCallOutputWithContentAndSingleToolCall) { std::string input = "This is a content part and next will be a tool call.\n\nfunctools[{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}]"; for (bool immediateParsing : {false, true}) { - auto generatedTensor = phi4Tokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids; + auto generatedTensor = phi4Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse(generatedTokens, true) : outputParserWithRegularToolParsing->parse(generatedTokens, true); EXPECT_EQ(parsedOutput.content, immediateParsing ? "" : "This is a content part and next will be a tool call.\n\n"); @@ -154,7 +168,7 @@ TEST_F(Phi4OutputParserTest, ParseToolCallOutputWithMultipleFunctoolsReturnsNoth if (immediateParsing) { testInput.erase(std::string("functools").length()); } - auto generatedTensor = phi4Tokenizer.encode(testInput, ov::genai::add_special_tokens(false)).input_ids; + auto generatedTensor = phi4Tokenizer->encode(testInput, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse(generatedTokens, true) : outputParserWithRegularToolParsing->parse(generatedTokens, true); EXPECT_EQ(parsedOutput.content, ""); @@ -170,7 +184,7 @@ TEST_F(Phi4OutputParserTest, ParseToolCallOutputWithArrayArguments) { if (immediateParsing) { testInput = testInput.substr(std::string("functools").length()); } - auto generatedTensor = phi4Tokenizer.encode(testInput, ov::genai::add_special_tokens(false)).input_ids; + auto generatedTensor = phi4Tokenizer->encode(testInput, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse(generatedTokens, true) : outputParserWithRegularToolParsing->parse(generatedTokens, true); EXPECT_EQ(parsedOutput.content, ""); diff --git a/src/test/llm/output_parsers/qwen3_output_parser_test.cpp b/src/test/llm/output_parsers/qwen3_output_parser_test.cpp index 122c3622ad..598efa16b3 100644 --- a/src/test/llm/output_parsers/qwen3_output_parser_test.cpp +++ b/src/test/llm/output_parsers/qwen3_output_parser_test.cpp @@ -31,21 +31,35 @@ const std::string tokenizerPath = getWindowsRepoRootPath() + "\\src\\test\\llm_t const std::string tokenizerPath = "/ovms/src/test/llm_testing/Qwen/Qwen3-8B"; #endif -static ov::genai::Tokenizer qwen3Tokenizer(tokenizerPath); +static std::unique_ptr qwen3Tokenizer; class Qwen3OutputParserTest : public ::testing::Test { protected: std::unique_ptr outputParser; + static void SetUpTestSuite() { + try { + qwen3Tokenizer = std::make_unique(tokenizerPath); + } catch (const std::exception& e) { + FAIL() << "Failed to initialize qwen3 tokenizer: " << e.what(); + } catch (...) { + FAIL() << "Failed to initialize qwen3 tokenizer due to unknown error."; + } + } + + static void TearDownTestSuite() { + qwen3Tokenizer.reset(); + } + void SetUp() override { // For Qwen3 model we use hermes3 tool parser (due to the same format of generated tool calls) and qwen3 reasoning parser - outputParser = std::make_unique(qwen3Tokenizer, "hermes3", "qwen3"); + outputParser = std::make_unique(*qwen3Tokenizer, "hermes3", "qwen3"); } }; TEST_F(Qwen3OutputParserTest, ParseToolCallOutputWithSingleToolCallNoThinking) { std::string input = "{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}"; - auto generatedTensor = qwen3Tokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids; + auto generatedTensor = qwen3Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); ParsedOutput parsedOutput = outputParser->parse(generatedTokens, true); EXPECT_EQ(parsedOutput.content, ""); @@ -61,7 +75,7 @@ TEST_F(Qwen3OutputParserTest, ParseToolCallOutputWithSingleToolCallNoThinking) { TEST_F(Qwen3OutputParserTest, ParseToolCallOutputWithSingleToolCallAndThinking) { std::string input = "Thinking about the tool call" "{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}"; - auto generatedTensor = qwen3Tokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids; + auto generatedTensor = qwen3Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); ParsedOutput parsedOutput = outputParser->parse(generatedTokens, true); EXPECT_EQ(parsedOutput.content, ""); @@ -77,7 +91,7 @@ TEST_F(Qwen3OutputParserTest, ParseToolCallOutputWithThreeToolCallsNoThinking) { std::string input = "{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}" "{\"name\": \"another_tool\", \"arguments\": {\"param1\": \"data\", \"param2\": true}}" "{\"name\": \"third_tool\", \"arguments\": {\"key\": \"value\"}}"; - auto generatedTensor = qwen3Tokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids; + auto generatedTensor = qwen3Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); ParsedOutput parsedOutput = outputParser->parse(generatedTokens, true); EXPECT_EQ(parsedOutput.content, ""); @@ -111,7 +125,7 @@ TEST_F(Qwen3OutputParserTest, ParseToolCallOutputWithThreeToolCallsAndThinking) "{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}" "{\"name\": \"another_tool\", \"arguments\": {\"param1\": \"data\", \"param2\": true}}" "{\"name\": \"third_tool\", \"arguments\": {\"key\": \"value\"}}"; - auto generatedTensor = qwen3Tokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids; + auto generatedTensor = qwen3Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); ParsedOutput parsedOutput = outputParser->parse(generatedTokens, true); EXPECT_EQ(parsedOutput.content, ""); @@ -142,7 +156,7 @@ TEST_F(Qwen3OutputParserTest, ParseToolCallOutputWithThreeToolCallsAndThinking) TEST_F(Qwen3OutputParserTest, ParseToolCallOutputWithContentAndNoToolCalls) { std::string input = "This is a regular model response without tool calls."; - auto generatedTensor = qwen3Tokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids; + auto generatedTensor = qwen3Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); ParsedOutput parsedOutput = outputParser->parse(generatedTokens, true); EXPECT_EQ(parsedOutput.content, "This is a regular model response without tool calls."); @@ -152,7 +166,7 @@ TEST_F(Qwen3OutputParserTest, ParseToolCallOutputWithContentAndNoToolCalls) { TEST_F(Qwen3OutputParserTest, ParseToolCallOutputWithContentAndSingleToolCall) { std::string input = "This is a content part and next will be a tool call.\n\n{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}"; - auto generatedTensor = qwen3Tokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids; + auto generatedTensor = qwen3Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); // generatedTokens should now contain content followed by bot token ID and then tool call ParsedOutput parsedOutput = outputParser->parse(generatedTokens, true);