Skip to content

Commit 5c3752d

Browse files
authored
Remove strftime_now from chat template functions, fix unit tests w/ python (#3607)
1 parent 6ad8cac commit 5c3752d

File tree

6 files changed

+119
-55
lines changed

6 files changed

+119
-55
lines changed

src/llm/servable_initializer.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -191,8 +191,6 @@ void GenAiServableInitializer::loadPyTemplateProcessor(std::shared_ptr<GenAiServ
191191
global json
192192
import json
193193
from pathlib import Path
194-
global datetime
195-
import datetime
196194
197195
global contextmanager
198196
from contextlib import contextmanager
@@ -205,9 +203,6 @@ void GenAiServableInitializer::loadPyTemplateProcessor(std::shared_ptr<GenAiServ
205203
206204
def raise_exception(message):
207205
raise jinja2.exceptions.TemplateError(message)
208-
# Appears in some of mistral chat templates
209-
def strftime_now(format):
210-
return datetime.datetime.now().strftime(format)
211206
# Following the logic from:
212207
# https://github.com/huggingface/transformers/blob/7188e2e28c6d663284634732564143b820a03f8b/src/transformers/utils/chat_template_utils.py#L398
213208
class AssistantTracker(Extension):
@@ -272,7 +267,6 @@ void GenAiServableInitializer::loadPyTemplateProcessor(std::shared_ptr<GenAiServ
272267
jinja_env = ImmutableSandboxedEnvironment(trim_blocks=True, lstrip_blocks=True, extensions=[AssistantTracker, jinja2.ext.loopcontrols], loader=template_loader)
273268
jinja_env.policies["json.dumps_kwargs"]["ensure_ascii"] = False
274269
jinja_env.globals["raise_exception"] = raise_exception
275-
jinja_env.globals["strftime_now"] = strftime_now
276270
if jinja_file.is_file():
277271
template = jinja_env.get_template("chat_template.jinja")
278272
elif jinja_file_legacy.is_file():

src/test/llm/output_parsers/hermes3_output_parser_test.cpp

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,17 +31,31 @@ const std::string tokenizerPath = getWindowsRepoRootPath() + "\\src\\test\\llm_t
3131
const std::string tokenizerPath = "/ovms/src/test/llm_testing/NousResearch/Hermes-3-Llama-3.1-8B";
3232
#endif
3333

34-
static ov::genai::Tokenizer hermes3Tokenizer(tokenizerPath);
34+
static std::unique_ptr<ov::genai::Tokenizer> hermes3Tokenizer;
3535

3636
class Hermes3OutputParserTest : public ::testing::Test {
3737
protected:
3838
std::unique_ptr<OutputParser> outputParserWithRegularToolParsing;
3939
std::unique_ptr<OutputParser> outputParserWithImmediateToolParsing;
4040

41+
static void SetUpTestSuite() {
42+
try {
43+
hermes3Tokenizer = std::make_unique<ov::genai::Tokenizer>(tokenizerPath);
44+
} catch (const std::exception& e) {
45+
FAIL() << "Failed to initialize hermes3 tokenizer: " << e.what();
46+
} catch (...) {
47+
FAIL() << "Failed to initialize hermes3 tokenizer due to unknown error.";
48+
}
49+
}
50+
51+
static void TearDownTestSuite() {
52+
hermes3Tokenizer.reset();
53+
}
54+
4155
void SetUp() override {
4256
// For Hermes3 model there is only tool parser available
43-
outputParserWithRegularToolParsing = std::make_unique<OutputParser>(hermes3Tokenizer, "hermes3", "");
44-
outputParserWithImmediateToolParsing = std::make_unique<OutputParser>(hermes3Tokenizer, "hermes3", "");
57+
outputParserWithRegularToolParsing = std::make_unique<OutputParser>(*hermes3Tokenizer, "hermes3", "");
58+
outputParserWithImmediateToolParsing = std::make_unique<OutputParser>(*hermes3Tokenizer, "hermes3", "");
4559
outputParserWithImmediateToolParsing->enableImmediateToolParsing();
4660
}
4761
};
@@ -59,7 +73,7 @@ TEST_F(Hermes3OutputParserTest, ParseToolCallOutputWithSingleToolCall) {
5973
// Remove opening tag for immediate parsing
6074
input = input.substr(std::string("<tool_call>").length());
6175
}
62-
auto generatedTensor = hermes3Tokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids;
76+
auto generatedTensor = hermes3Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids;
6377
std::vector<int64_t> generatedTokens(generatedTensor.data<int64_t>(), generatedTensor.data<int64_t>() + generatedTensor.get_size());
6478
ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse(generatedTokens, true) : outputParserWithRegularToolParsing->parse(generatedTokens, true);
6579
EXPECT_EQ(parsedOutput.content, "");
@@ -88,7 +102,7 @@ TEST_F(Hermes3OutputParserTest, ParseToolCallOutputWithNoToolsInTheRequest) {
88102
// Remove opening tag for immediate parsing
89103
testInput = testInput.substr(std::string("<tool_call>").length());
90104
}
91-
auto generatedTensor = hermes3Tokenizer.encode(testInput, ov::genai::add_special_tokens(false)).input_ids;
105+
auto generatedTensor = hermes3Tokenizer->encode(testInput, ov::genai::add_special_tokens(false)).input_ids;
92106
std::vector<int64_t> generatedTokens(generatedTensor.data<int64_t>(), generatedTensor.data<int64_t>() + generatedTensor.get_size());
93107
ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse(generatedTokens, false) : outputParserWithRegularToolParsing->parse(generatedTokens, false);
94108
EXPECT_EQ(parsedOutput.content, testInput);
@@ -115,7 +129,7 @@ TEST_F(Hermes3OutputParserTest, ParseToolCallOutputWithThreeToolCalls) {
115129
if (immediateParsing) {
116130
input = input.substr(std::string("<tool_call>").length());
117131
}
118-
auto generatedTensor = hermes3Tokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids;
132+
auto generatedTensor = hermes3Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids;
119133
std::vector<int64_t> generatedTokens(generatedTensor.data<int64_t>(), generatedTensor.data<int64_t>() + generatedTensor.get_size());
120134
ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse(generatedTokens, true) : outputParserWithRegularToolParsing->parse(generatedTokens, true);
121135
EXPECT_EQ(parsedOutput.content, "");
@@ -162,7 +176,7 @@ TEST_F(Hermes3OutputParserTest, ParseToolCallOutputWithTwoValidToolCallsAndOneIn
162176
if (immediateParsing) {
163177
input = input.substr(std::string("<tool_call>").length());
164178
}
165-
auto generatedTensor = hermes3Tokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids;
179+
auto generatedTensor = hermes3Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids;
166180
std::vector<int64_t> generatedTokens(generatedTensor.data<int64_t>(), generatedTensor.data<int64_t>() + generatedTensor.get_size());
167181
ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse(generatedTokens, true) : outputParserWithRegularToolParsing->parse(generatedTokens, true);
168182
EXPECT_EQ(parsedOutput.content, "");
@@ -188,7 +202,7 @@ TEST_F(Hermes3OutputParserTest, ParseToolCallOutputWithTwoValidToolCallsAndOneIn
188202

189203
TEST_F(Hermes3OutputParserTest, ParseToolCallOutputWithContentAndNoToolCalls) {
190204
std::string input = "This is a regular model response without tool calls.";
191-
auto generatedTensor = hermes3Tokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids;
205+
auto generatedTensor = hermes3Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids;
192206
std::vector<int64_t> generatedTokens(generatedTensor.data<int64_t>(), generatedTensor.data<int64_t>() + generatedTensor.get_size());
193207
ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true);
194208
EXPECT_EQ(parsedOutput.content, "This is a regular model response without tool calls.");
@@ -204,7 +218,7 @@ TEST_F(Hermes3OutputParserTest, ParseToolCallOutputWithContentAndNoToolCalls) {
204218

205219
TEST_F(Hermes3OutputParserTest, ParseToolCallOutputWithContentAndSingleToolCall) {
206220
std::string input = "This is a content part and next will be a tool call.\n\n<tool_call>{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}</tool_call>";
207-
auto generatedTensor = hermes3Tokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids;
221+
auto generatedTensor = hermes3Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids;
208222
std::vector<int64_t> generatedTokens(generatedTensor.data<int64_t>(), generatedTensor.data<int64_t>() + generatedTensor.get_size());
209223
// generatedTokens should now contain content followed by bot token ID and then tool call
210224
ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true);

src/test/llm/output_parsers/llama3_output_parser_test.cpp

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ const std::string tokenizerPath = getWindowsRepoRootPath() + "\\src\\test\\llm_t
3131
const std::string tokenizerPath = "/ovms/src/test/llm_testing/meta-llama/Llama-3.1-8B-Instruct";
3232
#endif
3333

34-
static ov::genai::Tokenizer llama3Tokenizer(tokenizerPath);
34+
static std::unique_ptr<ov::genai::Tokenizer> llama3Tokenizer;
3535

3636
// Id of the <|python_tag|> which is a special token used to indicate the start of a tool calls
3737
constexpr int64_t botTokenId = 128010;
@@ -41,16 +41,30 @@ class Llama3OutputParserTest : public ::testing::Test {
4141
std::unique_ptr<OutputParser> outputParserWithRegularToolParsing;
4242
std::unique_ptr<OutputParser> outputParserWithImmediateToolParsing;
4343

44+
static void SetUpTestSuite() {
45+
try {
46+
llama3Tokenizer = std::make_unique<ov::genai::Tokenizer>(tokenizerPath);
47+
} catch (const std::exception& e) {
48+
FAIL() << "Failed to initialize llama3 tokenizer: " << e.what();
49+
} catch (...) {
50+
FAIL() << "Failed to initialize llama3 tokenizer due to unknown error.";
51+
}
52+
}
53+
54+
static void TearDownTestSuite() {
55+
llama3Tokenizer.reset();
56+
}
57+
4458
void SetUp() override {
45-
outputParserWithRegularToolParsing = std::make_unique<OutputParser>(llama3Tokenizer, "llama3", "");
46-
outputParserWithImmediateToolParsing = std::make_unique<OutputParser>(llama3Tokenizer, "llama3", "");
59+
outputParserWithRegularToolParsing = std::make_unique<OutputParser>(*llama3Tokenizer, "llama3", "");
60+
outputParserWithImmediateToolParsing = std::make_unique<OutputParser>(*llama3Tokenizer, "llama3", "");
4761
outputParserWithImmediateToolParsing->enableImmediateToolParsing();
4862
}
4963
};
5064

5165
TEST_F(Llama3OutputParserTest, ParseToolCallOutputWithSingleToolCall) {
5266
std::string input = "{\"name\": \"example_tool\", \"parameters\": {\"arg1\": \"value1\", \"arg2\": 42}}";
53-
auto generatedTensor = llama3Tokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids;
67+
auto generatedTensor = llama3Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids;
5468
std::vector<int64_t> generatedTokens(generatedTensor.data<int64_t>(), generatedTensor.data<int64_t>() + generatedTensor.get_size());
5569
generatedTokens.insert(generatedTokens.begin(), botTokenId);
5670
for (bool immediateParsing : {false, true}) {
@@ -66,7 +80,7 @@ TEST_F(Llama3OutputParserTest, ParseToolCallOutputWithSingleToolCall) {
6680

6781
TEST_F(Llama3OutputParserTest, ParseToolCallOutputNoToolsInTheRequest) {
6882
std::string input = "{\"name\": \"example_tool\", \"parameters\": {\"arg1\": \"value1\", \"arg2\": 42}}";
69-
auto generatedTensor = llama3Tokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids;
83+
auto generatedTensor = llama3Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids;
7084
std::vector<int64_t> generatedTokens(generatedTensor.data<int64_t>(), generatedTensor.data<int64_t>() + generatedTensor.get_size());
7185
for (bool immediateParsing : {false, true}) {
7286
ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse(generatedTokens, false) : outputParserWithRegularToolParsing->parse(generatedTokens, false);
@@ -79,7 +93,7 @@ TEST_F(Llama3OutputParserTest, ParseToolCallOutputNoToolsInTheRequest) {
7993
// Tool parser assumes entire output are tool calls since it starts with "{", but it's not the case
8094
TEST_F(Llama3OutputParserTest, ParseRegularJsonOutputToolsInTheRequest) {
8195
std::string input = "{\"name\": \"Jane Doe\", \"location\": \"unknown\"}";
82-
auto generatedTensor = llama3Tokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids;
96+
auto generatedTensor = llama3Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids;
8397
std::vector<int64_t> generatedTokens(generatedTensor.data<int64_t>(), generatedTensor.data<int64_t>() + generatedTensor.get_size());
8498
for (bool immediateParsing : {false, true}) {
8599
ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse(generatedTokens, true) : outputParserWithRegularToolParsing->parse(generatedTokens, true);
@@ -92,7 +106,7 @@ TEST_F(Llama3OutputParserTest, ParseRegularJsonOutputToolsInTheRequest) {
92106
// Tool parser is available, but there are no tools in the request, so all output should be treated as content
93107
TEST_F(Llama3OutputParserTest, ParseRegularJsonOutputNoToolsInTheRequest) {
94108
std::string input = "{\"name\": \"Jane Doe\", \"location\": \"unknown\"}";
95-
auto generatedTensor = llama3Tokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids;
109+
auto generatedTensor = llama3Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids;
96110
std::vector<int64_t> generatedTokens(generatedTensor.data<int64_t>(), generatedTensor.data<int64_t>() + generatedTensor.get_size());
97111
for (bool immediateParsing : {false, true}) {
98112
ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse(generatedTokens, false) : outputParserWithRegularToolParsing->parse(generatedTokens, false);
@@ -105,7 +119,7 @@ TEST_F(Llama3OutputParserTest, ParseToolCallOutputWithThreeToolCalls) {
105119
std::string input = "{\"name\": \"example_tool\", \"parameters\": {\"arg1\": \"value1\", \"arg2\": 42}};"
106120
"{\"name\": \"another_tool\", \"parameters\": {\"param1\": \"data\", \"param2\": true}};"
107121
"{\"name\": \"third_tool\", \"parameters\": {\"key\": \"value\"}}";
108-
auto generatedTensor = llama3Tokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids;
122+
auto generatedTensor = llama3Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids;
109123
std::vector<int64_t> generatedTokens(generatedTensor.data<int64_t>(), generatedTensor.data<int64_t>() + generatedTensor.get_size());
110124
for (bool immediateParsing : {false, true}) {
111125
ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse(generatedTokens, true) : outputParserWithRegularToolParsing->parse(generatedTokens, true);
@@ -132,7 +146,7 @@ TEST_F(Llama3OutputParserTest, ParseToolCallOutputWithThreeToolCalls) {
132146

133147
TEST_F(Llama3OutputParserTest, ParseToolCallOutputWithContentAndNoToolCalls) {
134148
std::string input = "This is a regular model response without tool calls.";
135-
auto generatedTensor = llama3Tokenizer.encode(input, ov::genai::add_special_tokens(false)).input_ids;
149+
auto generatedTensor = llama3Tokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids;
136150
std::vector<int64_t> generatedTokens(generatedTensor.data<int64_t>(), generatedTensor.data<int64_t>() + generatedTensor.get_size());
137151
for (bool immediateParsing : {false, true}) {
138152
ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse(generatedTokens, true) : outputParserWithRegularToolParsing->parse(generatedTokens, true);
@@ -145,9 +159,9 @@ TEST_F(Llama3OutputParserTest, ParseToolCallOutputWithContentAndNoToolCalls) {
145159
TEST_F(Llama3OutputParserTest, ParseToolCallOutputWithContentAndSingleToolCall) {
146160
std::string content = "This is a content part and next will be a tool call.";
147161
std::string toolCall = "{\"name\": \"example_tool\", \"parameters\": {\"arg1\": \"value1\", \"arg2\": 42}}";
148-
auto generatedContentTensor = llama3Tokenizer.encode(content, ov::genai::add_special_tokens(false)).input_ids;
162+
auto generatedContentTensor = llama3Tokenizer->encode(content, ov::genai::add_special_tokens(false)).input_ids;
149163
std::vector<int64_t> generatedContentTokens(generatedContentTensor.data<int64_t>(), generatedContentTensor.data<int64_t>() + generatedContentTensor.get_size());
150-
auto generatedToolCallTensor = llama3Tokenizer.encode(toolCall, ov::genai::add_special_tokens(false)).input_ids;
164+
auto generatedToolCallTensor = llama3Tokenizer->encode(toolCall, ov::genai::add_special_tokens(false)).input_ids;
151165
std::vector<int64_t> generatedToolCallTokens(generatedToolCallTensor.data<int64_t>(), generatedToolCallTensor.data<int64_t>() + generatedToolCallTensor.get_size());
152166
std::vector<int64_t> generatedTokens;
153167
generatedTokens.insert(generatedTokens.end(), generatedContentTokens.begin(), generatedContentTokens.end());

0 commit comments

Comments
 (0)