Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,31 @@ if(USE_GUIDANCE)
# bcrypt is needed for the rust std lib
target_link_libraries(onnxruntime-genai PRIVATE bcrypt)
endif()
if(MSVC)
# The Rust llguidance static library is always compiled against the release MSVC CRT
# (Rust has no debug CRT concept). The .lib embeds /DEFAULTLIB directives for the release
# CRT and /NODEFAULTLIB directives that suppress the debug CRT (msvcrtd, ucrtd, vcruntimed).
# In Debug builds, C++ code (e.g. onnxruntime-extensions) is compiled with /MDd and references
# debug-only CRT functions like _CrtDbgReport (in ucrtd.lib). Because the Rust .lib suppresses
# ucrtd.lib via its embedded /NODEFAULTLIB, _CrtDbgReport becomes unresolved.
#
# Fix: explicitly add the debug CRT import libraries in Debug builds. Explicitly specified
# libraries are not affected by /NODEFAULTLIB directives (those only suppress /DEFAULTLIB
# auto-linking). Also suppress the conflicting release CRT to avoid LNK4098 warnings.
target_link_libraries(onnxruntime-genai PRIVATE
$<$<CONFIG:Debug>:msvcrtd.lib>
$<$<CONFIG:Debug>:ucrtd.lib>
$<$<CONFIG:Debug>:vcruntimed.lib>
)
target_link_options(onnxruntime-genai PRIVATE
$<$<CONFIG:Debug>:/NODEFAULTLIB:msvcrt.lib>
$<$<CONFIG:Debug>:/NODEFAULTLIB:ucrt.lib>
$<$<CONFIG:Debug>:/NODEFAULTLIB:vcruntime.lib>
$<$<CONFIG:Debug>:/NODEFAULTLIB:libcmt.lib>
$<$<CONFIG:Debug>:/NODEFAULTLIB:libucrt.lib>
$<$<CONFIG:Debug>:/NODEFAULTLIB:libvcruntime.lib>
)
endif()
endif()

if(CMAKE_GENERATOR_TOOLSET MATCHES "Visual Studio")
Expand Down
4 changes: 3 additions & 1 deletion src/constrained_logits_processor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -260,8 +260,10 @@ std::unique_ptr<ConstrainedLogitsProcessor> CreateGuidanceLogitsProcessor(const
if (!state.params_->guidance_type.empty() && !state.params_->guidance_data.empty()) {
#if USE_GUIDANCE
return std::make_unique<GuidanceLogitsProcessor>(state);
#else
if (g_log.enabled)
Log("warning", "No supported ConstrainedLogitsProcessor found. To use guidance, build with use_guidance=true");
#endif
Log("warning", "No supported ConstrainedLogitsProcessor found. To use guidance, build with use_guidance=true");
}
return nullptr;
}
Expand Down
11 changes: 7 additions & 4 deletions src/generators.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,10 @@ void Generator::ComputeLogits(DeviceSpan<int32_t> next_tokens) {
if (computed_logits_)
throw std::runtime_error("ComputeLogits called again without calling AppendTokens or GenerateNextToken first");

if (last_action_ == Action::generated && guidance_logits_processor_) {
// search_->GetSequenceLength() != next_tokens.size() implies that this is not the first time ComputeLogits
// is being called (i.e. we're not computing logits for the initial input tokens), so we need to commit
// tokens to the guidance logits processor before running the model.
if (guidance_logits_processor_ && search_->GetSequenceLength() != next_tokens.size()) {
auto next_tokens_span = next_tokens.CopyDeviceToCpu();
guidance_logits_processor_->CommitTokens(next_tokens_span);
}
Expand All @@ -472,7 +475,7 @@ void Generator::ComputeLogits(DeviceSpan<int32_t> next_tokens) {
}
SetLogits(logits);

if (last_action_ == Action::generated && guidance_logits_processor_) {
if (guidance_logits_processor_ && search_->GetSequenceLength() != next_tokens.size()) {
auto ff_tokens = guidance_logits_processor_->GetFFTokens(0);
if (!ff_tokens.empty()) {
// process fast-forward tokens
Expand Down Expand Up @@ -514,7 +517,7 @@ bool Generator::IsDone() {
if (is_done) {
state_->Finalize(search_->GetSequenceLength());
if (guidance_logits_processor_) {
guidance_logits_processor_->Reset();
guidance_logits_processor_->ResetWithoutCompute();
last_action_ = Action::standard;
}
}
Expand Down Expand Up @@ -619,7 +622,7 @@ void Generator::RewindToLength(size_t new_length) {
search_->RewindTo(new_length);
state_->RewindTo(new_length);
if (guidance_logits_processor_) {
guidance_logits_processor_->Reset();
guidance_logits_processor_->ResetWithoutCompute();
}
computed_logits_ = false;
last_action_ = Action::rewound;
Expand Down
2 changes: 1 addition & 1 deletion src/models/model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ class DirGuard {
}

~DirGuard() {
if (CHDIR(original_dir_.c_str()) != 0) {
if (CHDIR(original_dir_.c_str()) != 0 && g_log.enabled) {
Log("warning", "Failed to change back to original directory: " + original_dir_.string());
}
}
Expand Down
9 changes: 5 additions & 4 deletions src/models/qwen_vl_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,10 +196,11 @@ void Qwen2_5_VL_PipelineState::InjectVisionEmbeddings(const std::string& embeddi

// Warn if there's a mismatch between image tokens and vision features
if (image_embed_consumed_ != static_cast<size_t>(num_vision_tokens)) {
Log("warning", "Vision embedding mismatch: consumed " + std::to_string(image_embed_consumed_) +
" of " + std::to_string(num_vision_tokens) + " available vision tokens. " +
"This may indicate a mismatch between the number of image placeholders in the prompt " +
"and the number of images provided.");
if (g_log.enabled)
Log("warning", "Vision embedding mismatch: consumed " + std::to_string(image_embed_consumed_) +
" of " + std::to_string(num_vision_tokens) + " available vision tokens. " +
"This may indicate a mismatch between the number of image placeholders in the prompt " +
"and the number of images provided.");
}
}

Expand Down
Loading