Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion packages/qvac-lib-infer-nmtcpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,19 @@ if(USE_BERGAMOT)
find_package(bergamot-translator CONFIG REQUIRED)
endif()

add_bare_module(qvac-lib-infer-nmtcpp EXPORTS)
bare_target(bare_target_value)
bare_module_target("." unused_target NAME module_name VERSION unused_version)
set(BACKENDS_SUBDIR_VALUE "${bare_target_value}/${module_name}")
message("Building qvac-lib-infer-nmtcpp with BACKENDS_SUBDIR='${BACKENDS_SUBDIR_VALUE}'")

set(BACKEND_DL_LIBS "")
if((ANDROID OR UNIX) AND NOT APPLE)
foreach(_backend ${GGML_AVAILABLE_BACKENDS})
list(APPEND BACKEND_DL_LIBS INSTALL TARGET ggml::${_backend})
endforeach()
endif()

add_bare_module(qvac-lib-infer-nmtcpp EXPORTS ${BACKEND_DL_LIBS})

if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
target_link_options(${qvac-lib-infer-nmtcpp}_module PRIVATE -Wl,--exclude-libs,ALL)
Expand All @@ -154,6 +166,7 @@ target_sources(
${PROJECT_SOURCE_DIR}/addon/src/model-interface/nmt_graph_encoder.cpp
${PROJECT_SOURCE_DIR}/addon/src/model-interface/nmt_beam_search.cpp
${PROJECT_SOURCE_DIR}/addon/src/model-interface/nmt_utils.cpp
${PROJECT_SOURCE_DIR}/addon/src/model-interface/NmtLazyInitializeBackend.cpp
)

# Add bergamot source files if enabled
Expand Down Expand Up @@ -185,6 +198,8 @@ target_compile_definitions(
JS_LOGGER
)

target_compile_definitions(${qvac-lib-infer-nmtcpp} PRIVATE BACKENDS_SUBDIR="${BACKENDS_SUBDIR_VALUE}")

# Add bergamot compile definition if enabled
if(USE_BERGAMOT)
target_compile_definitions(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
#include "NmtLazyInitializeBackend.hpp"

#include <filesystem>
#include <string>

#include <ggml-backend.h>

#include "qvac-lib-inference-addon-cpp/Logger.hpp"

using namespace qvac_lib_inference_addon_cpp::logger;

std::mutex NmtLazyInitializeBackend::g_initMutex;
bool NmtLazyInitializeBackend::g_initialized = false;
std::string NmtLazyInitializeBackend::g_recordedBackendsDir;
int NmtLazyInitializeBackend::g_refCount = 0;

bool NmtLazyInitializeBackend::initialize(
const std::string& backendsDir, const std::string& openclCacheDir) {
std::lock_guard<std::mutex> lock(g_initMutex);

if (g_initialized) {
if (!backendsDir.empty() && !g_recordedBackendsDir.empty() &&
backendsDir != g_recordedBackendsDir) {
QLOG(
Priority::WARNING,
"Backend already initialized with different backendsDir. "
"Previously initialized at: " +
g_recordedBackendsDir + ", requested: " + backendsDir);
}
return false;
}

if (!backendsDir.empty()) {
g_recordedBackendsDir = backendsDir;
}

#ifdef __ANDROID__
if (!openclCacheDir.empty()) {
auto oclCachePath =
(std::filesystem::path(openclCacheDir) / "opencl-cache").string();
setenv("GGML_OPENCL_CACHE_DIR", oclCachePath.c_str(), /*overwrite=*/1);
}
#endif

if (!backendsDir.empty()) {
std::filesystem::path backendsDirPath(backendsDir);
#ifdef BACKENDS_SUBDIR
std::filesystem::path subdirPath(BACKENDS_SUBDIR);
backendsDirPath = backendsDirPath / subdirPath;
backendsDirPath = backendsDirPath.lexically_normal();
#endif
QLOG(
Priority::INFO,
"Loading backends from directory: " + backendsDirPath.string());
ggml_backend_load_all_from_path(backendsDirPath.string().c_str());
} else {
QLOG(Priority::DEBUG, "Loading backends using default path");
ggml_backend_load_all();
}

g_initialized = true;
return true;
}

void NmtLazyInitializeBackend::incrementRefCount() {
std::lock_guard<std::mutex> lock(g_initMutex);
g_refCount++;
}

void NmtLazyInitializeBackend::decrementRefCount() {
std::lock_guard<std::mutex> lock(g_initMutex);
if (g_refCount > 0) {
g_refCount--;
if (g_refCount == 0 && g_initialized) {
QLOG(
Priority::DEBUG,
"Resetting backend state (reference count reached zero)");
g_initialized = false;
g_recordedBackendsDir.clear();
}
}
}

NmtBackendsHandle::NmtBackendsHandle(
const std::string& backendsDir, const std::string& openclCacheDir)
: ownsHandle_(true) {
NmtLazyInitializeBackend::initialize(backendsDir, openclCacheDir);
NmtLazyInitializeBackend::incrementRefCount();
}

NmtBackendsHandle::~NmtBackendsHandle() {
if (ownsHandle_) {
NmtLazyInitializeBackend::decrementRefCount();
}
}

NmtBackendsHandle::NmtBackendsHandle(NmtBackendsHandle&& other) noexcept
: ownsHandle_(other.ownsHandle_) {
other.ownsHandle_ = false;
}

NmtBackendsHandle&
NmtBackendsHandle::operator=(NmtBackendsHandle&& other) noexcept {
if (this != &other) {
if (ownsHandle_) {
NmtLazyInitializeBackend::decrementRefCount();
}
ownsHandle_ = other.ownsHandle_;
other.ownsHandle_ = false;
}
return *this;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#pragma once

#include <mutex>
#include <string>

/**
* Lazy initialization class for NMT GGML backend.
* Ensures backend is initialized only once (even when instantiating multiple
* TranslationModel objects) and tracks the backends directory.
*/
class NmtLazyInitializeBackend {
public:
/**
* Initialize the backend lazily.
* @param backendsDir - path to the backends directory (optional).
* If empty, uses default backend loading.
* @param openclCacheDir - writable directory for OpenCL kernel cache
* (optional).
* @return true if initialization was successful, false if already
* initialized.
*/
static bool initialize(
const std::string& backendsDir = "",
const std::string& openclCacheDir = "");

/**
* Increment the reference count.
*/
static void incrementRefCount();

/**
* Decrement the reference count and reset state if count reaches zero.
*/
static void decrementRefCount();

private:
static std::mutex g_initMutex;
static bool g_initialized;
static std::string g_recordedBackendsDir;
static int g_refCount;
};

/**
* RAII handle for NMT backend initialization.
* Increments reference count on construction and decrements on destruction.
* When the last handle is destroyed, the backend state is reset.
*/
class NmtBackendsHandle {
public:
/**
* No-op default constructor (does not own a handle).
*/
NmtBackendsHandle() : ownsHandle_(false) {}

/**
* Construct a handle and increment the reference count.
* @param backendsDir - optional path to the backends directory.
* @param openclCacheDir - writable directory for OpenCL kernel cache
* (optional).
*/
explicit NmtBackendsHandle(
const std::string& backendsDir, const std::string& openclCacheDir = "");

/**
* Destructor decrements reference count and may reset backend state.
*/
~NmtBackendsHandle();

// Non-copyable
NmtBackendsHandle(const NmtBackendsHandle&) = delete;
NmtBackendsHandle& operator=(const NmtBackendsHandle&) = delete;

// Movable
NmtBackendsHandle(NmtBackendsHandle&&) noexcept;
NmtBackendsHandle& operator=(NmtBackendsHandle&&) noexcept;

private:
bool ownsHandle_;
};
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ PivotTranslationModel::PivotTranslationModel(
firstModel_(std::make_unique<TranslationModel>(firstModelPath)),
secondModel_(std::make_unique<TranslationModel>(secondModelPath)),
stopTranslation_(false) {

firstModel_->setConfig(std::move(firstModelConfig));
secondModel_->setConfig(std::move(secondModelConfig));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,12 @@ BackendType TranslationModel::detectBackendType(const std::string& modelPath) {
std::string filename = entry.path().filename().string();
// Check for bergamot model signatures
if (filename.find(".intgemm") != std::string::npos ||
(filename.find("vocab.") != std::string::npos && filename.find(".spm") != std::string::npos)) {
(filename.find("vocab.") != std::string::npos &&
filename.find(".spm") != std::string::npos)) {
QLOG(
qvac_lib_inference_addon_cpp::logger::Priority::INFO,
"[TRANSLATION MODEL] Detected Bergamot backend based on model files");
"[TRANSLATION MODEL] Detected Bergamot backend based on model "
"files");
return BackendType::BERGAMOT;
}
}
Expand All @@ -62,14 +64,16 @@ BackendType TranslationModel::detectBackendType(const std::string& modelPath) {
if (pathStr.find(".intgemm") != std::string::npos) {
QLOG(
qvac_lib_inference_addon_cpp::logger::Priority::INFO,
"[TRANSLATION MODEL] Detected Bergamot backend based on model filename");
"[TRANSLATION MODEL] Detected Bergamot backend based on model "
"filename");
return BackendType::BERGAMOT;
}
}
} catch (const std::exception& e) {
QLOG(
qvac_lib_inference_addon_cpp::logger::Priority::WARNING,
"[TRANSLATION MODEL] Error during backend detection: " + std::string(e.what()));
"[TRANSLATION MODEL] Error during backend detection: " +
std::string(e.what()));
}
#endif

Expand All @@ -88,6 +92,33 @@ void TranslationModel::unload() {
}

void TranslationModel::load() {
// Read backend loading config and initialize backends before any model
// loading. Keys are preserved in config_ so reload() can re-initialize with
// the same backends directory.
std::string backendsDir;
if (auto it = config_.find("backendsdir"); it != config_.end()) {
if (const auto* value = std::get_if<std::string>(&it->second)) {
backendsDir = *value;
} else {
QLOG(
qvac_lib_inference_addon_cpp::logger::Priority::WARNING,
"[TRANSLATION MODEL] 'backendsdir' config value is not a string; "
"ignoring");
}
}
std::string openclCacheDir;
if (auto it = config_.find("openclcachedir"); it != config_.end()) {
if (const auto* value = std::get_if<std::string>(&it->second)) {
openclCacheDir = *value;
} else {
QLOG(
qvac_lib_inference_addon_cpp::logger::Priority::WARNING,
"[TRANSLATION MODEL] 'openclcachedir' config value is not a string; "
"ignoring");
}
}
backendsHandle_.emplace(backendsDir, openclCacheDir);

QLOG(
qvac_lib_inference_addon_cpp::logger::Priority::INFO,
"[TRANSLATION MODEL] modelPath_: " + modelPath_);
Expand Down Expand Up @@ -335,10 +366,7 @@ std::any TranslationModel::process(const std::any& input) {
}
}

void TranslationModel::cancel() const
{
reset();
}
void TranslationModel::cancel() const { reset(); }
std::string TranslationModel::processString(const std::string& text) {
#ifdef HAVE_BERGAMOT
if (backendType_ == BackendType::BERGAMOT) {
Expand Down Expand Up @@ -546,7 +574,8 @@ TranslationModel::getConfig() const {
}

void TranslationModel::setConfig(
std::unordered_map<std::string, std::variant<double, int64_t, std::string>> config) {
std::unordered_map<std::string, std::variant<double, int64_t, std::string>>
config) {
config_ = std::move(config);
updateConfig();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@

#include <memory>
#include <mutex>
#include <optional>
#include <string>
#include <unordered_map>
#include <variant>

#include "NmtLazyInitializeBackend.hpp"
#include "nmt.hpp"
#ifdef HAVE_BERGAMOT
#include "bergamot.hpp"
Expand All @@ -22,7 +24,9 @@ enum class BackendType {
#endif
};

class TranslationModel : public qvac_lib_inference_addon_cpp::model::IModel, public qvac_lib_inference_addon_cpp::model::IModelCancel {
class TranslationModel
: public qvac_lib_inference_addon_cpp::model::IModel,
public qvac_lib_inference_addon_cpp::model::IModelCancel {
public:
TranslationModel() {};

Expand All @@ -38,14 +42,14 @@ class TranslationModel : public qvac_lib_inference_addon_cpp::model::IModel, pub

void unload();

void reload();
void reload();

void reset() const;

void setUseGpu(bool useGpu);

std::unordered_map<std::string, std::variant<double, int64_t, std::string>>
getConfig() const;
getConfig() const;

bool isLoaded() const;

Expand Down Expand Up @@ -87,10 +91,12 @@ class TranslationModel : public qvac_lib_inference_addon_cpp::model::IModel, pub

BackendType backendType_ = BackendType::GGML;

mutable std::unique_ptr<nmt_context, decltype(&nmt_free)> nmtCtx_{nullptr, nmt_free};
mutable std::unique_ptr<nmt_context, decltype(&nmt_free)> nmtCtx_{
nullptr, nmt_free};

#ifdef HAVE_BERGAMOT
std::unique_ptr<bergamot_context, decltype(&bergamot_free)> bergamotCtx_{nullptr, bergamot_free};
std::unique_ptr<bergamot_context, decltype(&bergamot_free)> bergamotCtx_{
nullptr, bergamot_free};
#endif

mutable bool isFirstSentence_ = true;
Expand All @@ -99,6 +105,8 @@ class TranslationModel : public qvac_lib_inference_addon_cpp::model::IModel, pub

std::unordered_map<std::string, std::variant<double, int64_t, std::string>>
config_;

std::optional<NmtBackendsHandle> backendsHandle_;
};

} // namespace qvac_lib_inference_addon_nmt
Loading
Loading