@@ -31,7 +31,7 @@ const std::string tokenizerPath = getWindowsRepoRootPath() + "\\src\\test\\llm_t
3131const std::string tokenizerPath = " /ovms/src/test/llm_testing/meta-llama/Llama-3.1-8B-Instruct" ;
3232#endif
3333
34- static ov::genai::Tokenizer llama3Tokenizer (tokenizerPath) ;
34+ static std::unique_ptr< ov::genai::Tokenizer> llama3Tokenizer;
3535
3636// Id of the <|python_tag|> which is a special token used to indicate the start of a tool calls
3737constexpr int64_t botTokenId = 128010 ;
@@ -41,16 +41,30 @@ class Llama3OutputParserTest : public ::testing::Test {
4141 std::unique_ptr<OutputParser> outputParserWithRegularToolParsing;
4242 std::unique_ptr<OutputParser> outputParserWithImmediateToolParsing;
4343
44+ static void SetUpTestSuite () {
45+ try {
46+ llama3Tokenizer = std::make_unique<ov::genai::Tokenizer>(tokenizerPath);
47+ } catch (const std::exception& e) {
48+ FAIL () << " Failed to initialize llama3 tokenizer: " << e.what ();
49+ } catch (...) {
50+ FAIL () << " Failed to initialize llama3 tokenizer due to unknown error." ;
51+ }
52+ }
53+
54+ static void TearDownTestSuite () {
55+ llama3Tokenizer.reset ();
56+ }
57+
4458 void SetUp () override {
45- outputParserWithRegularToolParsing = std::make_unique<OutputParser>(llama3Tokenizer, " llama3" , " " );
46- outputParserWithImmediateToolParsing = std::make_unique<OutputParser>(llama3Tokenizer, " llama3" , " " );
59+ outputParserWithRegularToolParsing = std::make_unique<OutputParser>(* llama3Tokenizer, " llama3" , " " );
60+ outputParserWithImmediateToolParsing = std::make_unique<OutputParser>(* llama3Tokenizer, " llama3" , " " );
4761 outputParserWithImmediateToolParsing->enableImmediateToolParsing ();
4862 }
4963};
5064
5165TEST_F (Llama3OutputParserTest, ParseToolCallOutputWithSingleToolCall) {
5266 std::string input = " {\" name\" : \" example_tool\" , \" parameters\" : {\" arg1\" : \" value1\" , \" arg2\" : 42}}" ;
53- auto generatedTensor = llama3Tokenizer. encode (input, ov::genai::add_special_tokens (false )).input_ids ;
67+ auto generatedTensor = llama3Tokenizer-> encode (input, ov::genai::add_special_tokens (false )).input_ids ;
5468 std::vector<int64_t > generatedTokens (generatedTensor.data <int64_t >(), generatedTensor.data <int64_t >() + generatedTensor.get_size ());
5569 generatedTokens.insert (generatedTokens.begin (), botTokenId);
5670 for (bool immediateParsing : {false , true }) {
@@ -66,7 +80,7 @@ TEST_F(Llama3OutputParserTest, ParseToolCallOutputWithSingleToolCall) {
6680
6781TEST_F (Llama3OutputParserTest, ParseToolCallOutputNoToolsInTheRequest) {
6882 std::string input = " {\" name\" : \" example_tool\" , \" parameters\" : {\" arg1\" : \" value1\" , \" arg2\" : 42}}" ;
69- auto generatedTensor = llama3Tokenizer. encode (input, ov::genai::add_special_tokens (false )).input_ids ;
83+ auto generatedTensor = llama3Tokenizer-> encode (input, ov::genai::add_special_tokens (false )).input_ids ;
7084 std::vector<int64_t > generatedTokens (generatedTensor.data <int64_t >(), generatedTensor.data <int64_t >() + generatedTensor.get_size ());
7185 for (bool immediateParsing : {false , true }) {
7286 ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse (generatedTokens, false ) : outputParserWithRegularToolParsing->parse (generatedTokens, false );
@@ -79,7 +93,7 @@ TEST_F(Llama3OutputParserTest, ParseToolCallOutputNoToolsInTheRequest) {
7993// Tool parser assumes entire output are tool calls since it starts with "{", but it's not the case
8094TEST_F (Llama3OutputParserTest, ParseRegularJsonOutputToolsInTheRequest) {
8195 std::string input = " {\" name\" : \" Jane Doe\" , \" location\" : \" unknown\" }" ;
82- auto generatedTensor = llama3Tokenizer. encode (input, ov::genai::add_special_tokens (false )).input_ids ;
96+ auto generatedTensor = llama3Tokenizer-> encode (input, ov::genai::add_special_tokens (false )).input_ids ;
8397 std::vector<int64_t > generatedTokens (generatedTensor.data <int64_t >(), generatedTensor.data <int64_t >() + generatedTensor.get_size ());
8498 for (bool immediateParsing : {false , true }) {
8599 ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse (generatedTokens, true ) : outputParserWithRegularToolParsing->parse (generatedTokens, true );
@@ -92,7 +106,7 @@ TEST_F(Llama3OutputParserTest, ParseRegularJsonOutputToolsInTheRequest) {
92106// Tool parser is available, but there are no tools in the request, so all output should be treated as content
93107TEST_F (Llama3OutputParserTest, ParseRegularJsonOutputNoToolsInTheRequest) {
94108 std::string input = " {\" name\" : \" Jane Doe\" , \" location\" : \" unknown\" }" ;
95- auto generatedTensor = llama3Tokenizer. encode (input, ov::genai::add_special_tokens (false )).input_ids ;
109+ auto generatedTensor = llama3Tokenizer-> encode (input, ov::genai::add_special_tokens (false )).input_ids ;
96110 std::vector<int64_t > generatedTokens (generatedTensor.data <int64_t >(), generatedTensor.data <int64_t >() + generatedTensor.get_size ());
97111 for (bool immediateParsing : {false , true }) {
98112 ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse (generatedTokens, false ) : outputParserWithRegularToolParsing->parse (generatedTokens, false );
@@ -105,7 +119,7 @@ TEST_F(Llama3OutputParserTest, ParseToolCallOutputWithThreeToolCalls) {
105119 std::string input = " {\" name\" : \" example_tool\" , \" parameters\" : {\" arg1\" : \" value1\" , \" arg2\" : 42}};"
106120 " {\" name\" : \" another_tool\" , \" parameters\" : {\" param1\" : \" data\" , \" param2\" : true}};"
107121 " {\" name\" : \" third_tool\" , \" parameters\" : {\" key\" : \" value\" }}" ;
108- auto generatedTensor = llama3Tokenizer. encode (input, ov::genai::add_special_tokens (false )).input_ids ;
122+ auto generatedTensor = llama3Tokenizer-> encode (input, ov::genai::add_special_tokens (false )).input_ids ;
109123 std::vector<int64_t > generatedTokens (generatedTensor.data <int64_t >(), generatedTensor.data <int64_t >() + generatedTensor.get_size ());
110124 for (bool immediateParsing : {false , true }) {
111125 ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse (generatedTokens, true ) : outputParserWithRegularToolParsing->parse (generatedTokens, true );
@@ -132,7 +146,7 @@ TEST_F(Llama3OutputParserTest, ParseToolCallOutputWithThreeToolCalls) {
132146
133147TEST_F (Llama3OutputParserTest, ParseToolCallOutputWithContentAndNoToolCalls) {
134148 std::string input = " This is a regular model response without tool calls." ;
135- auto generatedTensor = llama3Tokenizer. encode (input, ov::genai::add_special_tokens (false )).input_ids ;
149+ auto generatedTensor = llama3Tokenizer-> encode (input, ov::genai::add_special_tokens (false )).input_ids ;
136150 std::vector<int64_t > generatedTokens (generatedTensor.data <int64_t >(), generatedTensor.data <int64_t >() + generatedTensor.get_size ());
137151 for (bool immediateParsing : {false , true }) {
138152 ParsedOutput parsedOutput = immediateParsing ? outputParserWithImmediateToolParsing->parse (generatedTokens, true ) : outputParserWithRegularToolParsing->parse (generatedTokens, true );
@@ -145,9 +159,9 @@ TEST_F(Llama3OutputParserTest, ParseToolCallOutputWithContentAndNoToolCalls) {
145159TEST_F (Llama3OutputParserTest, ParseToolCallOutputWithContentAndSingleToolCall) {
146160 std::string content = " This is a content part and next will be a tool call." ;
147161 std::string toolCall = " {\" name\" : \" example_tool\" , \" parameters\" : {\" arg1\" : \" value1\" , \" arg2\" : 42}}" ;
148- auto generatedContentTensor = llama3Tokenizer. encode (content, ov::genai::add_special_tokens (false )).input_ids ;
162+ auto generatedContentTensor = llama3Tokenizer-> encode (content, ov::genai::add_special_tokens (false )).input_ids ;
149163 std::vector<int64_t > generatedContentTokens (generatedContentTensor.data <int64_t >(), generatedContentTensor.data <int64_t >() + generatedContentTensor.get_size ());
150- auto generatedToolCallTensor = llama3Tokenizer. encode (toolCall, ov::genai::add_special_tokens (false )).input_ids ;
164+ auto generatedToolCallTensor = llama3Tokenizer-> encode (toolCall, ov::genai::add_special_tokens (false )).input_ids ;
151165 std::vector<int64_t > generatedToolCallTokens (generatedToolCallTensor.data <int64_t >(), generatedToolCallTensor.data <int64_t >() + generatedToolCallTensor.get_size ());
152166 std::vector<int64_t > generatedTokens;
153167 generatedTokens.insert (generatedTokens.end (), generatedContentTokens.begin (), generatedContentTokens.end ());
0 commit comments