diff --git a/src/pull_module/optimum_export.cpp b/src/pull_module/optimum_export.cpp index 32528bf31c..094cca2c88 100644 --- a/src/pull_module/optimum_export.cpp +++ b/src/pull_module/optimum_export.cpp @@ -114,23 +114,96 @@ std::string OptimumDownloader::getExportCmd() { return cmd; } -OptimumDownloader::OptimumDownloader(const ExportSettings& inExportSettings, const GraphExportType& inTask, const std::string& inSourceModel, const std::string& inDownloadPath, bool inOverwrite, const std::string& cliExportCmd, const std::string& cliCheckCmd) : +std::string OptimumDownloader::getConvertCmd() { + std::string cmd = ""; + switch (this->task) { + case TEXT_GENERATION_GRAPH: { + cmd = getConvertCmdWithDetokenizer(); + break; + } + case EMBEDDINGS_GRAPH: { + cmd = getConvertCmdOnlyTokenizer(); + break; + } + case RERANK_GRAPH: { + cmd = getConvertCmdOnlyTokenizer(); + break; + } + case IMAGE_GENERATION_GRAPH: { + cmd = ""; + break; + } + case UNKNOWN_GRAPH: { + SPDLOG_ERROR("Optimum cli task options not initialised."); + break; + } + } + + return cmd; +} + +std::string OptimumDownloader::getConvertCmdWithDetokenizer() { + std::ostringstream oss; + // clang-format off + oss << this->CONVERT_TOKENIZER_EXPORT_COMMAND; + oss << this->sourceModel; + oss << " --with-detokenizer -o "; + oss << this->downloadPath; + // clang-format on + + return oss.str(); +} + +std::string OptimumDownloader::getConvertCmdOnlyTokenizer() { + std::ostringstream oss; + // clang-format off + oss << this->CONVERT_TOKENIZER_EXPORT_COMMAND; + oss << this->sourceModel; + oss << " -o "; + oss << this->downloadPath; + // clang-format on + + return oss.str(); +} + +bool OptimumDownloader::checkIfDetokenizerFileIsExported() { + return std::filesystem::exists(FileSystem::joinPath({this->downloadPath, "openvino_detokenizer.xml"})); +} + +bool OptimumDownloader::checkIfTokenizerFileIsExported() { + return std::filesystem::exists(FileSystem::joinPath({this->downloadPath, "openvino_tokenizer.xml"})); +} + +OptimumDownloader::OptimumDownloader(const ExportSettings& inExportSettings, const GraphExportType& inTask, + const std::string& inSourceModel, const std::string& inDownloadPath, bool inOverwrite, const std::string& cliExportCmd, + const std::string& cliCheckCmd, const std::string& convertExportCmd, const std::string& convertCheckCmd) : IModelDownloader(inSourceModel, inDownloadPath, inOverwrite), exportSettings(inExportSettings), task(inTask), OPTIMUM_CLI_CHECK_COMMAND(cliCheckCmd), - OPTIMUM_CLI_EXPORT_COMMAND(cliExportCmd) {} + OPTIMUM_CLI_EXPORT_COMMAND(cliExportCmd), + CONVERT_TOKENIZER_CHECK_COMMAND(convertCheckCmd), + CONVERT_TOKENIZER_EXPORT_COMMAND(convertExportCmd) {} Status OptimumDownloader::checkRequiredToolsArePresent() { int retCode = -1; std::string output = exec_cmd(this->OPTIMUM_CLI_CHECK_COMMAND, retCode); if (retCode != 0) { SPDLOG_DEBUG("Command output {}", output); - SPDLOG_ERROR("Trying to pull {} from HuggingFace but missing optimum-intel. Use the ovms package with optimum-intel.", this->sourceModel); + SPDLOG_ERROR("Trying to pull {} from HuggingFace but missing optimum-intel. Use the ovms package with optimum-intel installed.", this->sourceModel); return StatusCode::HF_FAILED_TO_INIT_OPTIMUM_CLI; } SPDLOG_DEBUG("Optimum-cli executable is present"); + + output = exec_cmd(this->CONVERT_TOKENIZER_CHECK_COMMAND, retCode); + if (retCode != 0) { + SPDLOG_DEBUG("Command output {}", output); + SPDLOG_ERROR("Trying to pull {} from HuggingFace but missing convert_tokenizer. This is likely because you are using OVMS without Python support which is required for pulling with conversion.", this->sourceModel); + return StatusCode::HF_FAILED_TO_INIT_OPTIMUM_CLI; + } + + SPDLOG_DEBUG("Convert_tokenizer executable is present"); return StatusCode::OK; } @@ -171,6 +244,23 @@ Status OptimumDownloader::downloadModel() { return StatusCode::HF_RUN_OPTIMUM_CLI_EXPORT_FAILED; } + if (!this->checkIfTokenizerFileIsExported()) { + cmd = getConvertCmd(); + retCode = -1; + // Tokenizer, detokenizer not required for image generation + if (cmd != "") { + SPDLOG_DEBUG("Tokenizer not found in the exported model. Exporting tokenizer and detokenizer from HF model."); + output = exec_cmd(cmd, retCode); + if (retCode != 0) { + SPDLOG_DEBUG("Command output {}", output); + SPDLOG_ERROR("convert_tokenizer command failed."); + return StatusCode::HF_RUN_CONVERT_TOKENIZER_EXPORT_FAILED; + } + } + } else { + SPDLOG_DEBUG("Tokenizer is found in the exported model directory. Convert_tokenizer command not required."); + } + return StatusCode::OK; } diff --git a/src/pull_module/optimum_export.hpp b/src/pull_module/optimum_export.hpp index 8d2f4fe944..ae8cd5edbb 100644 --- a/src/pull_module/optimum_export.hpp +++ b/src/pull_module/optimum_export.hpp @@ -24,7 +24,15 @@ class Status; class OptimumDownloader : public IModelDownloader { public: - OptimumDownloader(const ExportSettings& exportSettings, const GraphExportType& task, const std::string& inSourceModel, const std::string& inDownloadPath, bool inOverwrite, const std::string& cliExportCmd = "optimum-cli export openvino ", const std::string& cliCheckCmd = "optimum-cli -h"); + OptimumDownloader(const ExportSettings& exportSettings, const GraphExportType& task, const std::string& inSourceModel, + const std::string& inDownloadPath, bool inOverwrite, const std::string& cliExportCmd = "optimum-cli export openvino ", + const std::string& cliCheckCmd = "optimum-cli -h", + const std::string& convertExportCmd = "convert_tokenizer ", +#ifdef _WIN32 + const std::string& convertCheckCmd = "where convert_tokenizer"); +#else + const std::string& convertCheckCmd = "which convert_tokenizer"); +#endif Status downloadModel() override; protected: @@ -32,9 +40,16 @@ class OptimumDownloader : public IModelDownloader { const GraphExportType task; std::string OPTIMUM_CLI_CHECK_COMMAND; std::string OPTIMUM_CLI_EXPORT_COMMAND; + std::string CONVERT_TOKENIZER_CHECK_COMMAND; + std::string CONVERT_TOKENIZER_EXPORT_COMMAND; Status checkRequiredToolsArePresent(); + bool checkIfDetokenizerFileIsExported(); + bool checkIfTokenizerFileIsExported(); std::string getExportCmd(); + std::string getConvertCmd(); + std::string getConvertCmdWithDetokenizer(); + std::string getConvertCmdOnlyTokenizer(); std::string getExportCmdText(); std::string getExportCmdEmbeddings(); std::string getExportCmdRerank(); diff --git a/src/status.cpp b/src/status.cpp index 6dcfae8e34..38640f52fc 100644 --- a/src/status.cpp +++ b/src/status.cpp @@ -342,6 +342,7 @@ const std::unordered_map Status::statusMessageMap = { {StatusCode::HF_FAILED_TO_INIT_LIBGIT2, "Failed to initialize libgit2 library"}, {StatusCode::HF_FAILED_TO_INIT_OPTIMUM_CLI, "Failed to run optimum-cli executable"}, {StatusCode::HF_RUN_OPTIMUM_CLI_EXPORT_FAILED, "Failed to run optimum-cli export command"}, + {StatusCode::HF_RUN_CONVERT_TOKENIZER_EXPORT_FAILED, "Failed to run convert-tokenizer export command"}, {StatusCode::HF_GIT_CLONE_FAILED, "Failed in libgit2 execution of clone method"}, {StatusCode::PARTIAL_END, "Request has finished and no further communication is needed"}, diff --git a/src/status.hpp b/src/status.hpp index 39c31c6bfe..d604e792d2 100644 --- a/src/status.hpp +++ b/src/status.hpp @@ -354,6 +354,7 @@ enum class StatusCode { HF_FAILED_TO_INIT_LIBGIT2, HF_FAILED_TO_INIT_OPTIMUM_CLI, HF_RUN_OPTIMUM_CLI_EXPORT_FAILED, + HF_RUN_CONVERT_TOKENIZER_EXPORT_FAILED, HF_GIT_CLONE_FAILED, PARTIAL_END, diff --git a/src/test/pull_hf_model_test.cpp b/src/test/pull_hf_model_test.cpp index 8d35a7b623..5993d4cffa 100644 --- a/src/test/pull_hf_model_test.cpp +++ b/src/test/pull_hf_model_test.cpp @@ -286,10 +286,15 @@ class TestOptimumDownloader : public ovms::OptimumDownloader { TestOptimumDownloader(const ovms::HFSettingsImpl& inHfSettings) : ovms::OptimumDownloader(inHfSettings.exportSettings, inHfSettings.task, inHfSettings.sourceModel, ovms::HfDownloader::getGraphDirectory(inHfSettings.downloadPath, inHfSettings.sourceModel), inHfSettings.overwriteModels) {} std::string getExportCmd() { return ovms::OptimumDownloader::getExportCmd(); } + std::string getConvertCmd() { return ovms::OptimumDownloader::getConvertCmd(); } std::string getGraphDirectory() { return ovms::OptimumDownloader::getGraphDirectory(); } void setExportCliCheckCommand(const std::string& input) { this->OPTIMUM_CLI_CHECK_COMMAND = input; } + void setConvertCliCheckCommand(const std::string& input) { this->CONVERT_TOKENIZER_CHECK_COMMAND = input; } void setExportCliExportCommand(const std::string& input) { this->OPTIMUM_CLI_EXPORT_COMMAND = input; } + void setConvertCliExportCommand(const std::string& input) { this->CONVERT_TOKENIZER_EXPORT_COMMAND = input; } ovms::Status checkRequiredToolsArePresent() { return ovms::OptimumDownloader::checkRequiredToolsArePresent(); } + bool checkIfDetokenizerFileIsExported() { return ovms::OptimumDownloader::checkIfDetokenizerFileIsExported(); } + bool checkIfTokenizerFileIsExported() { return ovms::OptimumDownloader::checkIfTokenizerFileIsExported(); } }; class TestHfDownloader : public ovms::HfDownloader { @@ -347,48 +352,101 @@ class TestOptimumDownloaderSetup : public ::testing::Test { } }; +class TestOptimumDownloaderSetupWithFile : public TestOptimumDownloaderSetup { +public: + ovms::HFSettingsImpl inHfSettings; + std::string cliMockPath; + std::filesystem::path file_path; + std::filesystem::path dir_path; + void TearDown() override { + std::filesystem::remove(file_path); + std::filesystem::remove_all(dir_path); + } +}; + TEST_F(TestOptimumDownloaderSetup, Methods) { std::unique_ptr optimumDownloader = std::make_unique(inHfSettings); std::string expectedPath = inHfSettings.downloadPath + "/" + inHfSettings.sourceModel; std::string expectedCmd = "optimum-cli export openvino --model model/name --trust-remote-code --weight-format fp64 --param --param value \\path\\to\\Download\\model\\name"; + std::string expectedCmd2 = "convert_tokenizer model/name --with-detokenizer -o \\path\\to\\Download\\model\\name"; #ifdef _WIN32 std::replace(expectedPath.begin(), expectedPath.end(), '/', '\\'); #endif #ifdef __linux__ std::replace(expectedCmd.begin(), expectedCmd.end(), '\\', '/'); + std::replace(expectedCmd2.begin(), expectedCmd2.end(), '\\', '/'); #endif ASSERT_EQ(optimumDownloader->getGraphDirectory(), expectedPath); ASSERT_EQ(optimumDownloader->getExportCmd(), expectedCmd); + ASSERT_EQ(optimumDownloader->getConvertCmd(), expectedCmd2); } TEST_F(TestOptimumDownloaderSetup, RerankExportCmd) { inHfSettings.task = ovms::RERANK_GRAPH; std::unique_ptr optimumDownloader = std::make_unique(inHfSettings); std::string expectedCmd = "optimum-cli export openvino --disable-convert-tokenizer --model model/name --trust-remote-code --weight-format fp64 --task text-classification \\path\\to\\Download\\model\\name"; + std::string expectedCmd2 = "convert_tokenizer model/name -o \\path\\to\\Download\\model\\name"; #ifdef __linux__ std::replace(expectedCmd.begin(), expectedCmd.end(), '\\', '/'); + std::replace(expectedCmd2.begin(), expectedCmd2.end(), '\\', '/'); #endif ASSERT_EQ(optimumDownloader->getExportCmd(), expectedCmd); + ASSERT_EQ(optimumDownloader->getConvertCmd(), expectedCmd2); } TEST_F(TestOptimumDownloaderSetup, ImageGenExportCmd) { inHfSettings.task = ovms::IMAGE_GENERATION_GRAPH; std::unique_ptr optimumDownloader = std::make_unique(inHfSettings); std::string expectedCmd = "optimum-cli export openvino --model model/name --weight-format fp64 \\path\\to\\Download\\model\\name"; + std::string expectedCmd2 = ""; #ifdef __linux__ std::replace(expectedCmd.begin(), expectedCmd.end(), '\\', '/'); #endif ASSERT_EQ(optimumDownloader->getExportCmd(), expectedCmd); + ASSERT_EQ(optimumDownloader->getConvertCmd(), expectedCmd2); } TEST_F(TestOptimumDownloaderSetup, EmbeddingsExportCmd) { inHfSettings.task = ovms::EMBEDDINGS_GRAPH; std::unique_ptr optimumDownloader = std::make_unique(inHfSettings); std::string expectedCmd = "optimum-cli export openvino --disable-convert-tokenizer --task feature-extraction --library sentence_transformers --model model/name --trust-remote-code --weight-format fp64 \\path\\to\\Download\\model\\name"; + std::string expectedCmd2 = "convert_tokenizer model/name -o \\path\\to\\Download\\model\\name"; #ifdef __linux__ std::replace(expectedCmd.begin(), expectedCmd.end(), '\\', '/'); + std::replace(expectedCmd2.begin(), expectedCmd2.end(), '\\', '/'); #endif ASSERT_EQ(optimumDownloader->getExportCmd(), expectedCmd); + ASSERT_EQ(optimumDownloader->getConvertCmd(), expectedCmd2); +} + +TEST_F(TestOptimumDownloaderSetup, DetokenizerCheckNegative) { + std::unique_ptr optimumDownloader = std::make_unique(inHfSettings); + ASSERT_EQ(optimumDownloader->checkIfDetokenizerFileIsExported(), false); + ASSERT_EQ(optimumDownloader->checkIfTokenizerFileIsExported(), false); +} + +TEST_F(TestOptimumDownloaderSetupWithFile, DetokenizerCheckPositive) { + file_path = getGenericFullPathForBazelOut("/ovms/bazel-bin/src/model/name/openvino_detokenizer.xml"); + inHfSettings.sourceModel = "model/name"; + inHfSettings.downloadPath = getGenericFullPathForBazelOut("/ovms/bazel-bin/src/"); + dir_path = getGenericFullPathForBazelOut("/ovms/bazel-bin/src/model/"); + std::filesystem::create_directories(getGenericFullPathForBazelOut("/ovms/bazel-bin/src/model/name")); + std::ofstream ofs(file_path); // Creates an empty file + ofs.close(); + std::unique_ptr optimumDownloader = std::make_unique(inHfSettings); + ASSERT_EQ(optimumDownloader->checkIfDetokenizerFileIsExported(), true); +} + +TEST_F(TestOptimumDownloaderSetupWithFile, TokenizerCheckPositive) { + file_path = getGenericFullPathForBazelOut("/ovms/bazel-bin/src/model/name/openvino_tokenizer.xml"); + inHfSettings.sourceModel = "model/name"; + inHfSettings.downloadPath = getGenericFullPathForBazelOut("/ovms/bazel-bin/src/"); + dir_path = getGenericFullPathForBazelOut("/ovms/bazel-bin/src/model/"); + std::filesystem::create_directories(getGenericFullPathForBazelOut("/ovms/bazel-bin/src/model/name")); + std::ofstream ofs(file_path); // Creates an empty file + ofs.close(); + std::unique_ptr optimumDownloader = std::make_unique(inHfSettings); + ASSERT_EQ(optimumDownloader->checkIfTokenizerFileIsExported(), true); } TEST_F(TestOptimumDownloaderSetup, UnknownExportCmd) { @@ -405,17 +463,32 @@ TEST_F(TestOptimumDownloaderSetup, NegativeWrongPath) { TEST_F(TestOptimumDownloaderSetup, NegativeExportCommandFailed) { std::unique_ptr optimumDownloader = std::make_unique(inHfSettings); - optimumDownloader->setExportCliCheckCommand("ls"); -#ifdef _WIN32 - optimumDownloader->setExportCliCheckCommand("dir"); -#endif + optimumDownloader->setExportCliCheckCommand("echo "); + optimumDownloader->setConvertCliCheckCommand("echo "); optimumDownloader->setExportCliExportCommand("NonExistingCommand22"); ASSERT_EQ(optimumDownloader->downloadModel(), ovms::StatusCode::HF_RUN_OPTIMUM_CLI_EXPORT_FAILED); } +TEST_F(TestOptimumDownloaderSetup, NegativeConvertCommandFailed) { + std::unique_ptr optimumDownloader = std::make_unique(inHfSettings); + optimumDownloader->setExportCliCheckCommand("echo "); + optimumDownloader->setConvertCliCheckCommand("echo "); + optimumDownloader->setExportCliExportCommand("echo "); + optimumDownloader->setConvertCliExportCommand("nonExistingCommand222"); + ASSERT_EQ(optimumDownloader->downloadModel(), ovms::StatusCode::HF_RUN_CONVERT_TOKENIZER_EXPORT_FAILED); +} + TEST_F(TestOptimumDownloaderSetup, NegativeCheckOptimumExistsCommandFailed) { std::unique_ptr optimumDownloader = std::make_unique(inHfSettings); optimumDownloader->setExportCliCheckCommand("NonExistingCommand33"); + optimumDownloader->setConvertCliCheckCommand("echo "); + ASSERT_EQ(optimumDownloader->checkRequiredToolsArePresent(), ovms::StatusCode::HF_FAILED_TO_INIT_OPTIMUM_CLI); +} + +TEST_F(TestOptimumDownloaderSetup, NegativeCheckConverterExistsCommandFailed) { + std::unique_ptr optimumDownloader = std::make_unique(inHfSettings); + optimumDownloader->setExportCliCheckCommand("echo "); + optimumDownloader->setConvertCliCheckCommand("NonExistingCommand33"); ASSERT_EQ(optimumDownloader->checkRequiredToolsArePresent(), ovms::StatusCode::HF_FAILED_TO_INIT_OPTIMUM_CLI); } @@ -423,6 +496,7 @@ TEST_F(TestOptimumDownloaderSetup, PositiveOptimumExistsCommandPassed) { std::unique_ptr optimumDownloader = std::make_unique(inHfSettings); cliMockPath += " -h"; optimumDownloader->setExportCliCheckCommand(cliMockPath); + optimumDownloader->setConvertCliCheckCommand("echo "); ASSERT_EQ(optimumDownloader->checkRequiredToolsArePresent(), ovms::StatusCode::OK); } @@ -430,8 +504,10 @@ TEST_F(TestOptimumDownloaderSetup, PositiveOptimumExportCommandPassed) { std::unique_ptr optimumDownloader = std::make_unique(inHfSettings); std::string cliCheckCommand = cliMockPath += " -h"; optimumDownloader->setExportCliCheckCommand(cliCheckCommand); + optimumDownloader->setConvertCliCheckCommand("echo "); cliMockPath += " export"; optimumDownloader->setExportCliExportCommand(cliMockPath); + optimumDownloader->setConvertCliExportCommand("echo "); ASSERT_EQ(optimumDownloader->downloadModel(), ovms::StatusCode::OK); } diff --git a/windows_test.bat b/windows_test.bat index 2e5449d755..a426e93c02 100644 --- a/windows_test.bat +++ b/windows_test.bat @@ -34,9 +34,15 @@ IF "%~2"=="--with_python" ( set "bazelBuildArgs=--config=win_mp_on_py_off --action_env OpenVINO_DIR=%openvino_dir%" ) +IF "%~3"=="" ( + set "gtestFilter=*" +) ELSE ( + set "gtestFilter=%3" +) + set "buildTestCommand=bazel %bazelStartupCmd% build %bazelBuildArgs% --jobs=%NUMBER_OF_PROCESSORS% --verbose_failures //src:ovms_test" set "changeConfigsCmd=python windows_change_test_configs.py" -set "runTest=%cd%\bazel-bin\src\ovms_test.exe --gtest_filter=* 2>&1 > win_full_test.log" +set "runTest=%cd%\bazel-bin\src\ovms_test.exe --gtest_filter=!gtestFilter! 2>&1 | tee win_full_test.log" :: Setting PATH environment variable based on default windows node settings: Added ovms_windows specific python settings and c:/opt and removed unused Nvidia and OCL specific tools. :: When changing the values here you can print the node default PATH value and base your changes on it. @@ -75,10 +81,8 @@ if !errorlevel! neq 0 exit /b !errorlevel! if !errorlevel! neq 0 exit /b !errorlevel! :: Start bazel build test -%buildTestCommand% > win_build_test.log 2>&1 +%buildTestCommand% 2>&1 | tee win_build_test.log set "bazelExitCode=!errorlevel!" -:: Output the log to the console -type win_build_test.log :: Check the exit code and exit if it's not 0 if !bazelExitCode! neq 0 exit /b !bazelExitCode!