diff --git a/CMakeLists.txt b/CMakeLists.txt index 502cc06ebc..03b50ac7ad 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -235,6 +235,31 @@ if(USE_GUIDANCE) # bcrypt is needed for the rust std lib target_link_libraries(onnxruntime-genai PRIVATE bcrypt) endif() + if(MSVC) + # The Rust llguidance static library is always compiled against the release MSVC CRT + # (Rust has no debug CRT concept). The .lib embeds /DEFAULTLIB directives for the release + # CRT and /NODEFAULTLIB directives that suppress the debug CRT (msvcrtd, ucrtd, vcruntimed). + # In Debug builds, C++ code (e.g. onnxruntime-extensions) is compiled with /MDd and references + # debug-only CRT functions like _CrtDbgReport (in ucrtd.lib). Because the Rust .lib suppresses + # ucrtd.lib via its embedded /NODEFAULTLIB, _CrtDbgReport becomes unresolved. + # + # Fix: explicitly add the debug CRT import libraries in Debug builds. Explicitly specified + # libraries are not affected by /NODEFAULTLIB directives (those only suppress /DEFAULTLIB + # auto-linking). Also suppress the conflicting release CRT to avoid LNK4098 warnings. + target_link_libraries(onnxruntime-genai PRIVATE + $<$:msvcrtd.lib> + $<$:ucrtd.lib> + $<$:vcruntimed.lib> + ) + target_link_options(onnxruntime-genai PRIVATE + $<$:/NODEFAULTLIB:msvcrt.lib> + $<$:/NODEFAULTLIB:ucrt.lib> + $<$:/NODEFAULTLIB:vcruntime.lib> + $<$:/NODEFAULTLIB:libcmt.lib> + $<$:/NODEFAULTLIB:libucrt.lib> + $<$:/NODEFAULTLIB:libvcruntime.lib> + ) + endif() endif() if(CMAKE_GENERATOR_TOOLSET MATCHES "Visual Studio") diff --git a/src/constrained_logits_processor.cpp b/src/constrained_logits_processor.cpp index 42a7623fdc..0a3ac13c83 100644 --- a/src/constrained_logits_processor.cpp +++ b/src/constrained_logits_processor.cpp @@ -260,8 +260,10 @@ std::unique_ptr CreateGuidanceLogitsProcessor(const if (!state.params_->guidance_type.empty() && !state.params_->guidance_data.empty()) { #if USE_GUIDANCE return std::make_unique(state); +#else + if (g_log.enabled) + Log("warning", "No supported ConstrainedLogitsProcessor found. To use guidance, build with use_guidance=true"); #endif - Log("warning", "No supported ConstrainedLogitsProcessor found. To use guidance, build with use_guidance=true"); } return nullptr; } diff --git a/src/generators.cpp b/src/generators.cpp index 0473c51a07..67b7f34130 100644 --- a/src/generators.cpp +++ b/src/generators.cpp @@ -459,7 +459,10 @@ void Generator::ComputeLogits(DeviceSpan next_tokens) { if (computed_logits_) throw std::runtime_error("ComputeLogits called again without calling AppendTokens or GenerateNextToken first"); - if (last_action_ == Action::generated && guidance_logits_processor_) { + // search_->GetSequenceLength() != next_tokens.size() implies that this is not the first time ComputeLogits + // is being called (i.e. we're not computing logits for the initial input tokens), so we need to commit + // tokens to the guidance logits processor before running the model. + if (guidance_logits_processor_ && search_->GetSequenceLength() != next_tokens.size()) { auto next_tokens_span = next_tokens.CopyDeviceToCpu(); guidance_logits_processor_->CommitTokens(next_tokens_span); } @@ -472,7 +475,7 @@ void Generator::ComputeLogits(DeviceSpan next_tokens) { } SetLogits(logits); - if (last_action_ == Action::generated && guidance_logits_processor_) { + if (guidance_logits_processor_ && search_->GetSequenceLength() != next_tokens.size()) { auto ff_tokens = guidance_logits_processor_->GetFFTokens(0); if (!ff_tokens.empty()) { // process fast-forward tokens @@ -514,7 +517,7 @@ bool Generator::IsDone() { if (is_done) { state_->Finalize(search_->GetSequenceLength()); if (guidance_logits_processor_) { - guidance_logits_processor_->Reset(); + guidance_logits_processor_->ResetWithoutCompute(); last_action_ = Action::standard; } } @@ -619,7 +622,7 @@ void Generator::RewindToLength(size_t new_length) { search_->RewindTo(new_length); state_->RewindTo(new_length); if (guidance_logits_processor_) { - guidance_logits_processor_->Reset(); + guidance_logits_processor_->ResetWithoutCompute(); } computed_logits_ = false; last_action_ = Action::rewound; diff --git a/src/models/model.cpp b/src/models/model.cpp index a79bdb55bf..ab5903410d 100644 --- a/src/models/model.cpp +++ b/src/models/model.cpp @@ -66,7 +66,7 @@ class DirGuard { } ~DirGuard() { - if (CHDIR(original_dir_.c_str()) != 0) { + if (CHDIR(original_dir_.c_str()) != 0 && g_log.enabled) { Log("warning", "Failed to change back to original directory: " + original_dir_.string()); } } diff --git a/src/models/qwen_vl_model.cpp b/src/models/qwen_vl_model.cpp index f510aabcf3..39aa9369a5 100644 --- a/src/models/qwen_vl_model.cpp +++ b/src/models/qwen_vl_model.cpp @@ -196,10 +196,11 @@ void Qwen2_5_VL_PipelineState::InjectVisionEmbeddings(const std::string& embeddi // Warn if there's a mismatch between image tokens and vision features if (image_embed_consumed_ != static_cast(num_vision_tokens)) { - Log("warning", "Vision embedding mismatch: consumed " + std::to_string(image_embed_consumed_) + - " of " + std::to_string(num_vision_tokens) + " available vision tokens. " + - "This may indicate a mismatch between the number of image placeholders in the prompt " + - "and the number of images provided."); + if (g_log.enabled) + Log("warning", "Vision embedding mismatch: consumed " + std::to_string(image_embed_consumed_) + + " of " + std::to_string(num_vision_tokens) + " available vision tokens. " + + "This may indicate a mismatch between the number of image placeholders in the prompt " + + "and the number of images provided."); } }