Skip to content
Merged
96 changes: 93 additions & 3 deletions src/pull_module/optimum_export.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,23 +114,96 @@ std::string OptimumDownloader::getExportCmd() {
return cmd;
}

OptimumDownloader::OptimumDownloader(const ExportSettings& inExportSettings, const GraphExportType& inTask, const std::string& inSourceModel, const std::string& inDownloadPath, bool inOverwrite, const std::string& cliExportCmd, const std::string& cliCheckCmd) :
std::string OptimumDownloader::getConvertCmd() {
std::string cmd = "";
switch (this->task) {
case TEXT_GENERATION_GRAPH: {
cmd = getConvertCmdWithDetokenizer();
break;
}
case EMBEDDINGS_GRAPH: {
cmd = getConvertCmdOnlyTokenizer();
break;
}
case RERANK_GRAPH: {
cmd = getConvertCmdOnlyTokenizer();
break;
}
case IMAGE_GENERATION_GRAPH: {
cmd = "";
break;
}
case UNKNOWN_GRAPH: {
SPDLOG_ERROR("Optimum cli task options not initialised.");
break;
}
}

return cmd;
}

std::string OptimumDownloader::getConvertCmdWithDetokenizer() {
std::ostringstream oss;
// clang-format off
oss << this->CONVERT_TOKENIZER_EXPORT_COMMAND;
oss << this->sourceModel;
oss << " --with-detokenizer -o ";
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in some pipelines there is no detokenizer. was it tested?

oss << this->downloadPath;
// clang-format on

return oss.str();
}

std::string OptimumDownloader::getConvertCmdOnlyTokenizer() {
std::ostringstream oss;
// clang-format off
oss << this->CONVERT_TOKENIZER_EXPORT_COMMAND;
oss << this->sourceModel;
oss << " -o ";
oss << this->downloadPath;
// clang-format on

return oss.str();
}

bool OptimumDownloader::checkIfDetokenizerFileIsExported() {
return std::filesystem::exists(FileSystem::joinPath({this->downloadPath, "openvino_detokenizer.xml"}));
}

bool OptimumDownloader::checkIfTokenizerFileIsExported() {
return std::filesystem::exists(FileSystem::joinPath({this->downloadPath, "openvino_tokenizer.xml"}));
}

OptimumDownloader::OptimumDownloader(const ExportSettings& inExportSettings, const GraphExportType& inTask,
const std::string& inSourceModel, const std::string& inDownloadPath, bool inOverwrite, const std::string& cliExportCmd,
const std::string& cliCheckCmd, const std::string& convertExportCmd, const std::string& convertCheckCmd) :
IModelDownloader(inSourceModel, inDownloadPath, inOverwrite),
exportSettings(inExportSettings),
task(inTask),
OPTIMUM_CLI_CHECK_COMMAND(cliCheckCmd),
OPTIMUM_CLI_EXPORT_COMMAND(cliExportCmd) {}
OPTIMUM_CLI_EXPORT_COMMAND(cliExportCmd),
CONVERT_TOKENIZER_CHECK_COMMAND(convertCheckCmd),
CONVERT_TOKENIZER_EXPORT_COMMAND(convertExportCmd) {}

Status OptimumDownloader::checkRequiredToolsArePresent() {
int retCode = -1;
std::string output = exec_cmd(this->OPTIMUM_CLI_CHECK_COMMAND, retCode);
if (retCode != 0) {
SPDLOG_DEBUG("Command output {}", output);
SPDLOG_ERROR("Trying to pull {} from HuggingFace but missing optimum-intel. Use the ovms package with optimum-intel.", this->sourceModel);
SPDLOG_ERROR("Trying to pull {} from HuggingFace but missing optimum-intel. Use the ovms package with optimum-intel installed.", this->sourceModel);
return StatusCode::HF_FAILED_TO_INIT_OPTIMUM_CLI;
}

SPDLOG_DEBUG("Optimum-cli executable is present");

output = exec_cmd(this->CONVERT_TOKENIZER_CHECK_COMMAND, retCode);
if (retCode != 0) {
SPDLOG_DEBUG("Command output {}", output);
SPDLOG_ERROR("Trying to pull {} from HuggingFace but missing convert_tokenizer. This is likely because you are using OVMS without Python support which is required for pulling with conversion.", this->sourceModel);
return StatusCode::HF_FAILED_TO_INIT_OPTIMUM_CLI;
}

SPDLOG_DEBUG("Convert_tokenizer executable is present");
return StatusCode::OK;
}

Expand Down Expand Up @@ -171,6 +244,23 @@ Status OptimumDownloader::downloadModel() {
return StatusCode::HF_RUN_OPTIMUM_CLI_EXPORT_FAILED;
}

if (!this->checkIfTokenizerFileIsExported()) {
cmd = getConvertCmd();
retCode = -1;
// Tokenizer, detokenizer not required for image generation
if (cmd != "") {
SPDLOG_DEBUG("Tokenizer not found in the exported model. Exporting tokenizer and detokenizer from HF model.");
output = exec_cmd(cmd, retCode);
if (retCode != 0) {
SPDLOG_DEBUG("Command output {}", output);
SPDLOG_ERROR("convert_tokenizer command failed.");
return StatusCode::HF_RUN_CONVERT_TOKENIZER_EXPORT_FAILED;
}
}
} else {
SPDLOG_DEBUG("Tokenizer is found in the exported model directory. Convert_tokenizer command not required.");
}

return StatusCode::OK;
}

Expand Down
17 changes: 16 additions & 1 deletion src/pull_module/optimum_export.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,32 @@ class Status;

class OptimumDownloader : public IModelDownloader {
public:
OptimumDownloader(const ExportSettings& exportSettings, const GraphExportType& task, const std::string& inSourceModel, const std::string& inDownloadPath, bool inOverwrite, const std::string& cliExportCmd = "optimum-cli export openvino ", const std::string& cliCheckCmd = "optimum-cli -h");
OptimumDownloader(const ExportSettings& exportSettings, const GraphExportType& task, const std::string& inSourceModel,
const std::string& inDownloadPath, bool inOverwrite, const std::string& cliExportCmd = "optimum-cli export openvino ",
const std::string& cliCheckCmd = "optimum-cli -h",
const std::string& convertExportCmd = "convert_tokenizer ",
#ifdef _WIN32
const std::string& convertCheckCmd = "where convert_tokenizer");
#else
const std::string& convertCheckCmd = "which convert_tokenizer");
#endif
Status downloadModel() override;

protected:
ExportSettings exportSettings;
const GraphExportType task;
std::string OPTIMUM_CLI_CHECK_COMMAND;
std::string OPTIMUM_CLI_EXPORT_COMMAND;
std::string CONVERT_TOKENIZER_CHECK_COMMAND;
std::string CONVERT_TOKENIZER_EXPORT_COMMAND;

Status checkRequiredToolsArePresent();
bool checkIfDetokenizerFileIsExported();
bool checkIfTokenizerFileIsExported();
std::string getExportCmd();
std::string getConvertCmd();
std::string getConvertCmdWithDetokenizer();
std::string getConvertCmdOnlyTokenizer();
std::string getExportCmdText();
std::string getExportCmdEmbeddings();
std::string getExportCmdRerank();
Expand Down
1 change: 1 addition & 0 deletions src/status.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,7 @@ const std::unordered_map<StatusCode, std::string> Status::statusMessageMap = {
{StatusCode::HF_FAILED_TO_INIT_LIBGIT2, "Failed to initialize libgit2 library"},
{StatusCode::HF_FAILED_TO_INIT_OPTIMUM_CLI, "Failed to run optimum-cli executable"},
{StatusCode::HF_RUN_OPTIMUM_CLI_EXPORT_FAILED, "Failed to run optimum-cli export command"},
{StatusCode::HF_RUN_CONVERT_TOKENIZER_EXPORT_FAILED, "Failed to run convert-tokenizer export command"},
{StatusCode::HF_GIT_CLONE_FAILED, "Failed in libgit2 execution of clone method"},

{StatusCode::PARTIAL_END, "Request has finished and no further communication is needed"},
Expand Down
1 change: 1 addition & 0 deletions src/status.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,7 @@ enum class StatusCode {
HF_FAILED_TO_INIT_LIBGIT2,
HF_FAILED_TO_INIT_OPTIMUM_CLI,
HF_RUN_OPTIMUM_CLI_EXPORT_FAILED,
HF_RUN_CONVERT_TOKENIZER_EXPORT_FAILED,
HF_GIT_CLONE_FAILED,

PARTIAL_END,
Expand Down
84 changes: 80 additions & 4 deletions src/test/pull_hf_model_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -286,10 +286,15 @@ class TestOptimumDownloader : public ovms::OptimumDownloader {
TestOptimumDownloader(const ovms::HFSettingsImpl& inHfSettings) :
ovms::OptimumDownloader(inHfSettings.exportSettings, inHfSettings.task, inHfSettings.sourceModel, ovms::HfDownloader::getGraphDirectory(inHfSettings.downloadPath, inHfSettings.sourceModel), inHfSettings.overwriteModels) {}
std::string getExportCmd() { return ovms::OptimumDownloader::getExportCmd(); }
std::string getConvertCmd() { return ovms::OptimumDownloader::getConvertCmd(); }
std::string getGraphDirectory() { return ovms::OptimumDownloader::getGraphDirectory(); }
void setExportCliCheckCommand(const std::string& input) { this->OPTIMUM_CLI_CHECK_COMMAND = input; }
void setConvertCliCheckCommand(const std::string& input) { this->CONVERT_TOKENIZER_CHECK_COMMAND = input; }
void setExportCliExportCommand(const std::string& input) { this->OPTIMUM_CLI_EXPORT_COMMAND = input; }
void setConvertCliExportCommand(const std::string& input) { this->CONVERT_TOKENIZER_EXPORT_COMMAND = input; }
ovms::Status checkRequiredToolsArePresent() { return ovms::OptimumDownloader::checkRequiredToolsArePresent(); }
bool checkIfDetokenizerFileIsExported() { return ovms::OptimumDownloader::checkIfDetokenizerFileIsExported(); }
bool checkIfTokenizerFileIsExported() { return ovms::OptimumDownloader::checkIfTokenizerFileIsExported(); }
};

class TestHfDownloader : public ovms::HfDownloader {
Expand Down Expand Up @@ -347,48 +352,101 @@ class TestOptimumDownloaderSetup : public ::testing::Test {
}
};

class TestOptimumDownloaderSetupWithFile : public TestOptimumDownloaderSetup {
public:
ovms::HFSettingsImpl inHfSettings;
std::string cliMockPath;
std::filesystem::path file_path;
std::filesystem::path dir_path;
void TearDown() override {
std::filesystem::remove(file_path);
std::filesystem::remove_all(dir_path);
}
};

TEST_F(TestOptimumDownloaderSetup, Methods) {
std::unique_ptr<TestOptimumDownloader> optimumDownloader = std::make_unique<TestOptimumDownloader>(inHfSettings);
std::string expectedPath = inHfSettings.downloadPath + "/" + inHfSettings.sourceModel;
std::string expectedCmd = "optimum-cli export openvino --model model/name --trust-remote-code --weight-format fp64 --param --param value \\path\\to\\Download\\model\\name";
std::string expectedCmd2 = "convert_tokenizer model/name --with-detokenizer -o \\path\\to\\Download\\model\\name";
#ifdef _WIN32
std::replace(expectedPath.begin(), expectedPath.end(), '/', '\\');
#endif
#ifdef __linux__
std::replace(expectedCmd.begin(), expectedCmd.end(), '\\', '/');
std::replace(expectedCmd2.begin(), expectedCmd2.end(), '\\', '/');
#endif
ASSERT_EQ(optimumDownloader->getGraphDirectory(), expectedPath);
ASSERT_EQ(optimumDownloader->getExportCmd(), expectedCmd);
ASSERT_EQ(optimumDownloader->getConvertCmd(), expectedCmd2);
}

TEST_F(TestOptimumDownloaderSetup, RerankExportCmd) {
inHfSettings.task = ovms::RERANK_GRAPH;
std::unique_ptr<TestOptimumDownloader> optimumDownloader = std::make_unique<TestOptimumDownloader>(inHfSettings);
std::string expectedCmd = "optimum-cli export openvino --disable-convert-tokenizer --model model/name --trust-remote-code --weight-format fp64 --task text-classification \\path\\to\\Download\\model\\name";
std::string expectedCmd2 = "convert_tokenizer model/name -o \\path\\to\\Download\\model\\name";
#ifdef __linux__
std::replace(expectedCmd.begin(), expectedCmd.end(), '\\', '/');
std::replace(expectedCmd2.begin(), expectedCmd2.end(), '\\', '/');
#endif
ASSERT_EQ(optimumDownloader->getExportCmd(), expectedCmd);
ASSERT_EQ(optimumDownloader->getConvertCmd(), expectedCmd2);
}

TEST_F(TestOptimumDownloaderSetup, ImageGenExportCmd) {
inHfSettings.task = ovms::IMAGE_GENERATION_GRAPH;
std::unique_ptr<TestOptimumDownloader> optimumDownloader = std::make_unique<TestOptimumDownloader>(inHfSettings);
std::string expectedCmd = "optimum-cli export openvino --model model/name --weight-format fp64 \\path\\to\\Download\\model\\name";
std::string expectedCmd2 = "";
#ifdef __linux__
std::replace(expectedCmd.begin(), expectedCmd.end(), '\\', '/');
#endif
ASSERT_EQ(optimumDownloader->getExportCmd(), expectedCmd);
ASSERT_EQ(optimumDownloader->getConvertCmd(), expectedCmd2);
}

TEST_F(TestOptimumDownloaderSetup, EmbeddingsExportCmd) {
inHfSettings.task = ovms::EMBEDDINGS_GRAPH;
std::unique_ptr<TestOptimumDownloader> optimumDownloader = std::make_unique<TestOptimumDownloader>(inHfSettings);
std::string expectedCmd = "optimum-cli export openvino --disable-convert-tokenizer --task feature-extraction --library sentence_transformers --model model/name --trust-remote-code --weight-format fp64 \\path\\to\\Download\\model\\name";
std::string expectedCmd2 = "convert_tokenizer model/name -o \\path\\to\\Download\\model\\name";
#ifdef __linux__
std::replace(expectedCmd.begin(), expectedCmd.end(), '\\', '/');
std::replace(expectedCmd2.begin(), expectedCmd2.end(), '\\', '/');
#endif
ASSERT_EQ(optimumDownloader->getExportCmd(), expectedCmd);
ASSERT_EQ(optimumDownloader->getConvertCmd(), expectedCmd2);
}

TEST_F(TestOptimumDownloaderSetup, DetokenizerCheckNegative) {
std::unique_ptr<TestOptimumDownloader> optimumDownloader = std::make_unique<TestOptimumDownloader>(inHfSettings);
ASSERT_EQ(optimumDownloader->checkIfDetokenizerFileIsExported(), false);
ASSERT_EQ(optimumDownloader->checkIfTokenizerFileIsExported(), false);
}

TEST_F(TestOptimumDownloaderSetupWithFile, DetokenizerCheckPositive) {
file_path = getGenericFullPathForBazelOut("/ovms/bazel-bin/src/model/name/openvino_detokenizer.xml");
inHfSettings.sourceModel = "model/name";
inHfSettings.downloadPath = getGenericFullPathForBazelOut("/ovms/bazel-bin/src/");
dir_path = getGenericFullPathForBazelOut("/ovms/bazel-bin/src/model/");
std::filesystem::create_directories(getGenericFullPathForBazelOut("/ovms/bazel-bin/src/model/name"));
std::ofstream ofs(file_path); // Creates an empty file
ofs.close();
std::unique_ptr<TestOptimumDownloader> optimumDownloader = std::make_unique<TestOptimumDownloader>(inHfSettings);
ASSERT_EQ(optimumDownloader->checkIfDetokenizerFileIsExported(), true);
}

TEST_F(TestOptimumDownloaderSetupWithFile, TokenizerCheckPositive) {
file_path = getGenericFullPathForBazelOut("/ovms/bazel-bin/src/model/name/openvino_tokenizer.xml");
inHfSettings.sourceModel = "model/name";
inHfSettings.downloadPath = getGenericFullPathForBazelOut("/ovms/bazel-bin/src/");
dir_path = getGenericFullPathForBazelOut("/ovms/bazel-bin/src/model/");
std::filesystem::create_directories(getGenericFullPathForBazelOut("/ovms/bazel-bin/src/model/name"));
std::ofstream ofs(file_path); // Creates an empty file
ofs.close();
std::unique_ptr<TestOptimumDownloader> optimumDownloader = std::make_unique<TestOptimumDownloader>(inHfSettings);
ASSERT_EQ(optimumDownloader->checkIfTokenizerFileIsExported(), true);
}

TEST_F(TestOptimumDownloaderSetup, UnknownExportCmd) {
Expand All @@ -405,33 +463,51 @@ TEST_F(TestOptimumDownloaderSetup, NegativeWrongPath) {

TEST_F(TestOptimumDownloaderSetup, NegativeExportCommandFailed) {
std::unique_ptr<TestOptimumDownloader> optimumDownloader = std::make_unique<TestOptimumDownloader>(inHfSettings);
optimumDownloader->setExportCliCheckCommand("ls");
#ifdef _WIN32
optimumDownloader->setExportCliCheckCommand("dir");
#endif
optimumDownloader->setExportCliCheckCommand("echo ");
optimumDownloader->setConvertCliCheckCommand("echo ");
optimumDownloader->setExportCliExportCommand("NonExistingCommand22");
ASSERT_EQ(optimumDownloader->downloadModel(), ovms::StatusCode::HF_RUN_OPTIMUM_CLI_EXPORT_FAILED);
}

TEST_F(TestOptimumDownloaderSetup, NegativeConvertCommandFailed) {
std::unique_ptr<TestOptimumDownloader> optimumDownloader = std::make_unique<TestOptimumDownloader>(inHfSettings);
optimumDownloader->setExportCliCheckCommand("echo ");
optimumDownloader->setConvertCliCheckCommand("echo ");
optimumDownloader->setExportCliExportCommand("echo ");
optimumDownloader->setConvertCliExportCommand("nonExistingCommand222");
ASSERT_EQ(optimumDownloader->downloadModel(), ovms::StatusCode::HF_RUN_CONVERT_TOKENIZER_EXPORT_FAILED);
}

TEST_F(TestOptimumDownloaderSetup, NegativeCheckOptimumExistsCommandFailed) {
std::unique_ptr<TestOptimumDownloader> optimumDownloader = std::make_unique<TestOptimumDownloader>(inHfSettings);
optimumDownloader->setExportCliCheckCommand("NonExistingCommand33");
optimumDownloader->setConvertCliCheckCommand("echo ");
ASSERT_EQ(optimumDownloader->checkRequiredToolsArePresent(), ovms::StatusCode::HF_FAILED_TO_INIT_OPTIMUM_CLI);
}

TEST_F(TestOptimumDownloaderSetup, NegativeCheckConverterExistsCommandFailed) {
std::unique_ptr<TestOptimumDownloader> optimumDownloader = std::make_unique<TestOptimumDownloader>(inHfSettings);
optimumDownloader->setExportCliCheckCommand("echo ");
optimumDownloader->setConvertCliCheckCommand("NonExistingCommand33");
ASSERT_EQ(optimumDownloader->checkRequiredToolsArePresent(), ovms::StatusCode::HF_FAILED_TO_INIT_OPTIMUM_CLI);
}

TEST_F(TestOptimumDownloaderSetup, PositiveOptimumExistsCommandPassed) {
std::unique_ptr<TestOptimumDownloader> optimumDownloader = std::make_unique<TestOptimumDownloader>(inHfSettings);
cliMockPath += " -h";
optimumDownloader->setExportCliCheckCommand(cliMockPath);
optimumDownloader->setConvertCliCheckCommand("echo ");
ASSERT_EQ(optimumDownloader->checkRequiredToolsArePresent(), ovms::StatusCode::OK);
}

TEST_F(TestOptimumDownloaderSetup, PositiveOptimumExportCommandPassed) {
std::unique_ptr<TestOptimumDownloader> optimumDownloader = std::make_unique<TestOptimumDownloader>(inHfSettings);
std::string cliCheckCommand = cliMockPath += " -h";
optimumDownloader->setExportCliCheckCommand(cliCheckCommand);
optimumDownloader->setConvertCliCheckCommand("echo ");
cliMockPath += " export";
optimumDownloader->setExportCliExportCommand(cliMockPath);
optimumDownloader->setConvertCliExportCommand("echo ");
ASSERT_EQ(optimumDownloader->downloadModel(), ovms::StatusCode::OK);
}

Expand Down
12 changes: 8 additions & 4 deletions windows_test.bat
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,15 @@ IF "%~2"=="--with_python" (
set "bazelBuildArgs=--config=win_mp_on_py_off --action_env OpenVINO_DIR=%openvino_dir%"
)

IF "%~3"=="" (
set "gtestFilter=*"
) ELSE (
set "gtestFilter=%3"
)

set "buildTestCommand=bazel %bazelStartupCmd% build %bazelBuildArgs% --jobs=%NUMBER_OF_PROCESSORS% --verbose_failures //src:ovms_test"
set "changeConfigsCmd=python windows_change_test_configs.py"
set "runTest=%cd%\bazel-bin\src\ovms_test.exe --gtest_filter=* 2>&1 > win_full_test.log"
set "runTest=%cd%\bazel-bin\src\ovms_test.exe --gtest_filter=!gtestFilter! 2>&1 | tee win_full_test.log"

:: Setting PATH environment variable based on default windows node settings: Added ovms_windows specific python settings and c:/opt and removed unused Nvidia and OCL specific tools.
:: When changing the values here you can print the node default PATH value and base your changes on it.
Expand Down Expand Up @@ -75,10 +81,8 @@ if !errorlevel! neq 0 exit /b !errorlevel!
if !errorlevel! neq 0 exit /b !errorlevel!

:: Start bazel build test
%buildTestCommand% > win_build_test.log 2>&1
%buildTestCommand% 2>&1 | tee win_build_test.log
set "bazelExitCode=!errorlevel!"
:: Output the log to the console
type win_build_test.log
:: Check the exit code and exit if it's not 0
if !bazelExitCode! neq 0 exit /b !bazelExitCode!

Expand Down