Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion src/Features/UnifiedWater/WaterCache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

#include <BS_thread_pool.hpp>

#include "Utils/WinApi.h"

bool WaterCache::SetCurrentWorldSpace(const RE::TESWorldSpace* worldSpace)
{
if (!worldSpace)
Expand Down Expand Up @@ -214,7 +216,8 @@ bool WaterCache::GenerateCaches()
}
}

const unsigned hw = std::max(1u, std::thread::hardware_concurrency());
// Use P-core logical count on Intel hybrid CPUs; falls back to hardware_concurrency() on non-hybrid.
const unsigned hw = Util::GetPerformanceCoreCount();
const unsigned threads = std::max(1u, hw > 4 ? hw - 4 : (hw * 3) / 4);
async.pool = std::make_unique<BS::thread_pool<>>(threads);

Expand Down
108 changes: 103 additions & 5 deletions src/Menu/AdvancedSettingsRenderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,15 +126,16 @@ void AdvancedSettingsRenderer::RenderLoggingSection()
ImGui::SliderInt("Compiler Threads", &shaderCache->compilationThreadCount, 1, static_cast<int32_t>(std::thread::hardware_concurrency()));
if (auto _tt = Util::HoverTooltipWrapper()) {
ImGui::Text(
"Number of threads to use to compile shaders. "
"The more threads the faster compilation will finish but may make the system unresponsive. ");
"Number of threads used to compile shaders at startup. "
"Defaults to all logical cores minus one for OS headroom (E-cores included). "
"Higher values finish compilation faster but may make the system less responsive.");
}
ImGui::SliderInt("Background Compiler Threads", &shaderCache->backgroundCompilationThreadCount, 1, static_cast<int32_t>(std::thread::hardware_concurrency()));
if (auto _tt = Util::HoverTooltipWrapper()) {
ImGui::Text(
"Number of threads to use to compile shaders while playing game. "
"This is activated if the startup compilation is skipped. "
"The more threads the faster compilation will finish but may make the system unresponsive. ");
"Number of threads used to compile shaders during gameplay. "
"Defaults to half of performance cores to avoid impacting the render thread. "
"Higher values finish compilation faster but may cause stuttering.");
}

ImGui::Columns(2, nullptr, false);
Expand Down Expand Up @@ -552,6 +553,103 @@ void AdvancedSettingsRenderer::RenderDeveloperSection()
// Statistics section (moved from Advanced/Logging)
if (ImGui::TreeNodeEx("Statistics", ImGuiTreeNodeFlags_DefaultOpen)) {
ImGui::Text(std::format("Shader Compiler : {}", shaderCache->GetShaderStatsString()).c_str());

// Derived parallelism metrics are computed lazily on demand and only shown
// once compilation has completed to avoid per-frame analysis while compiling.
if (!shaderCache->IsCompiling()) {
auto parallelism = shaderCache->GetParallelismStats();
if (parallelism.has_value()) {
const auto& p = parallelism.value();
ImGui::Spacing();
ImGui::TextDisabled("Parallelism (derived from %zu compiled tasks)", p.sampleCount);
if (auto _tt = Util::HoverTooltipWrapper()) {
ImGui::Text("Computed lazily from the last completed build.");
ImGui::Text("Only evaluated when this Statistics section is open.");
}
ImGui::Text("Work (W, sum of task wall times): %s", Util::FormatDuration(p.workMs).c_str());
if (auto _tt = Util::HoverTooltipWrapper()) {
ImGui::Text("Total compile work: sum of all per-shader wall-clock compile times.");
ImGui::Text("This is not CPU time; it is accumulated task elapsed time.");
ImGui::Text("Equivalent serial time on one worker if overhead stayed the same.");
}
ImGui::Text("Span (S, longest): %s", Util::FormatDuration(p.spanMs).c_str());
if (auto _tt = Util::HoverTooltipWrapper()) {
ImGui::Text("Critical-path lower bound, approximated by the single slowest shader.");
ImGui::Text("Even infinite cores cannot finish faster than this.");
}
ImGui::Text("Makespan (T_p): %s", Util::FormatDuration(p.makespanMs).c_str());
if (auto _tt = Util::HoverTooltipWrapper()) {
ImGui::Text("Observed wall-clock duration for the full shader build.");
}
ImGui::Text("Queue wait (avg/max): %s / %s",
Util::FormatDuration(p.avgQueueWaitMs).c_str(),
Util::FormatDuration(p.maxQueueWaitMs).c_str());
if (auto _tt = Util::HoverTooltipWrapper()) {
ImGui::Text("Time spent waiting in the ready queue before a worker started compilation.");
ImGui::Text("Useful for identifying scheduler-induced delay separate from compile cost.");
}
ImGui::Text("Average parallelism (W/S): %.2fx", p.avgParallelism);
if (auto _tt = Util::HoverTooltipWrapper()) {
ImGui::Text("Average useful concurrency in this workload.");
ImGui::Text("Roughly the worker count where adding more cores gives diminishing returns.");
}
ImGui::Text("Infinite-core efficiency (S/T_p): %.1f%%", 100.0 * p.infiniteCoreEfficiency);
if (auto _tt = Util::HoverTooltipWrapper()) {
ImGui::Text("How close runtime is to the infinite-core lower bound.");
ImGui::Text("100%% means T_p == S.");
}
ImGui::Text("Infinite-core gap: %.1f%%", p.infiniteCoreGapPercent);
if (auto _tt = Util::HoverTooltipWrapper()) {
ImGui::Text("Distance from ideal infinite-core time.");
ImGui::Text("Defined as 100 * (1 - S / T_p). Lower is better.");
}

ImGui::Spacing();
ImGui::TextDisabled("Infinite-core efficiency");
float efficiency = static_cast<float>(std::clamp(p.infiniteCoreEfficiency, 0.0, 1.0));
ImGui::ProgressBar(efficiency, ImVec2(-1.0f, 0.0f), std::format("{:.1f}% efficient / {:.1f}% gap", 100.0 * p.infiniteCoreEfficiency, p.infiniteCoreGapPercent).c_str());

ImGui::Spacing();
ImGui::TextDisabled("Relative durations (normalized)");
double maxMs = std::max({ p.workMs, p.spanMs, p.makespanMs, 1.0 });
auto drawRelativeBar = [maxMs](const char* label, double value) {
float ratio = static_cast<float>(std::clamp(value / maxMs, 0.0, 1.0));
ImGui::TextUnformatted(label);
ImGui::SameLine();
ImGui::ProgressBar(ratio, ImVec2(-1.0f, 0.0f), std::format("{} ({:.1f}%)", Util::FormatDuration(value), 100.0 * ratio).c_str());
};
drawRelativeBar("Span (S)", p.spanMs);
drawRelativeBar("Makespan (T_p)", p.makespanMs);
drawRelativeBar("Work (W)", p.workMs);
}
}

// Top-3 slowest shaders from the last build
auto topSlow = shaderCache->GetTopSlowTasks(3);
if (!topSlow.empty()) {
ImGui::Spacing();
ImGui::TextDisabled("Top %zu Slowest Shaders (last build)", topSlow.size());
for (size_t i = 0; i < topSlow.size(); ++i) {
const auto& rec = topSlow[i];
ImGui::Text("#%zu %s (weight %d)", i + 1,
Util::FormatDuration(rec.elapsedMs).c_str(), rec.priority);
ImGui::SameLine();
ImGui::TextDisabled("%s", rec.key.c_str());
if (ImGui::IsItemHovered()) {
if (auto _tt = Util::HoverTooltipWrapper()) {
ImGui::Text("%s", rec.key.c_str());
}
}
// Allow copying the full key with a right-click
if (ImGui::BeginPopupContextItem(std::format("##slowcopy{}", i).c_str())) {
if (ImGui::MenuItem("Copy key")) {
ImGui::SetClipboardText(rec.key.c_str());
}
ImGui::EndPopup();
}
}
}

ImGui::TreePop();
}

Expand Down
21 changes: 20 additions & 1 deletion src/Menu/OverlayRenderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,23 @@ void OverlayRenderer::RenderShaderCompilationStatus(const std::function<const ch
}
ImGui::TextUnformatted(progressTitle.c_str());
ImGui::ProgressBar(percent, ImVec2(0.0f, 0.0f), progressOverlay.c_str());
if (state->IsDeveloperMode()) {
int32_t threadLimit = shaderCache->backgroundCompilation ? shaderCache->backgroundCompilationThreadCount : shaderCache->compilationThreadCount;
int compilationRunning = (int)shaderCache->compilationPool.get_tasks_running();
int heavyInFlight = shaderCache->GetHeavyTasksInFlight();
int heavyLimit = static_cast<int>(Util::GetPerformanceCoreCount());
uint64_t slow = shaderCache->GetSlowTasks();
uint64_t verySlow = shaderCache->GetVerySlowTasks();
ImGui::Text("Threads: %d / %d limit | Heavy: %d / %d P-cores | %d workers",
compilationRunning,
threadLimit,
heavyInFlight,
heavyLimit,
(int)shaderCache->compilationPool.get_thread_count());
if (slow > 0) {
ImGui::Text("Slow shaders: %llu (very slow: %llu)", slow, verySlow);
}
}
if (!shaderCache->backgroundCompilation && shaderCache->menuLoaded) {
auto skipShadersText = fmt::format(
"Press {} to proceed without completing shader compilation. ",
Expand All @@ -180,7 +197,9 @@ void OverlayRenderer::RenderShaderCompilationStatus(const std::function<const ch
ImGui::TextColored(themeSettings.StatusPalette.Warning, renderDocInformation.c_str());

ImGui::End();
} else if (failed) {
}

if (failed) {
if (!hide) {
ImGui::SetNextWindowPos(ImVec2(pos, pos));
if (!ImGui::Begin("ShaderCompilationInfo", nullptr, ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_AlwaysAutoResize | ImGuiWindowFlags_NoResize | ImGuiWindowFlags_NoMove | ImGuiWindowFlags_NoSavedSettings)) {
Expand Down
72 changes: 72 additions & 0 deletions src/Menu/SettingsTabRenderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "IconLoader.h"
#include "Menu.h"
#include "ShaderCache.h"
#include "State.h"
#include "ThemeManager.h"
#include "Util.h"

Expand Down Expand Up @@ -230,6 +231,77 @@ void SettingsTabRenderer::RenderShadersTab()
if (shaderCache->GetTotalTasks() > 0) {
ImGui::Text("Last shader cache build duration: %s",
shaderCache->GetShaderStatsString(true, true).c_str());

// Stacked bar showing compilation breakdown
{
uint64_t total = shaderCache->GetTotalTasks();
uint64_t completed = shaderCache->GetCompletedTasks();
uint64_t failed = shaderCache->GetFailedTasks();
uint64_t cacheHits = shaderCache->GetCachedHitTasks();
uint64_t slow = shaderCache->GetSlowTasks();
uint64_t verySlow = shaderCache->GetVerySlowTasks();
// Compiled = tasks that actually went through compilation.
// Cache hits are separate (returned early without queueing).
uint64_t compiled = completed;
uint64_t fast = compiled > slow ? compiled - slow : 0;
uint64_t medium = slow > verySlow ? slow - verySlow : 0; // 2-8s

struct Segment
{
uint64_t count;
ImU32 color;
const char* label;
};
Segment segments[] = {
{ cacheHits, IM_COL32(120, 120, 120, 255), "Deduplicated" },
{ fast, IM_COL32(80, 180, 80, 255), "Fast (<2s)" },
{ medium, IM_COL32(220, 180, 50, 255), "Slow (2-8s)" },
{ verySlow, IM_COL32(220, 60, 60, 255), "Very slow (>=8s)" },
{ failed, IM_COL32(160, 30, 30, 255), "Failed" },
};

float barHeight = 14.0f * Util::GetUIScale();
float barWidth = ImGui::GetContentRegionAvail().x;
ImVec2 cursor = ImGui::GetCursorScreenPos();
ImDrawList* drawList = ImGui::GetWindowDrawList();

// Background
drawList->AddRectFilled(cursor, ImVec2(cursor.x + barWidth, cursor.y + barHeight), IM_COL32(40, 40, 40, 255));

// Draw segments
float x = cursor.x;
for (auto& seg : segments) {
if (seg.count == 0 || total == 0)
continue;
float segWidth = (static_cast<float>(seg.count) / static_cast<float>(total)) * barWidth;
if (segWidth < 1.0f)
segWidth = 1.0f;
drawList->AddRectFilled(ImVec2(x, cursor.y), ImVec2(x + segWidth, cursor.y + barHeight), seg.color);
x += segWidth;
}

// Reserve space and handle tooltip
ImGui::Dummy(ImVec2(barWidth, barHeight));
if (ImGui::IsItemHovered()) {
ImGui::BeginTooltip();
for (auto& seg : segments) {
if (seg.count == 0)
continue;
float pct = total > 0 ? 100.0f * static_cast<float>(seg.count) / static_cast<float>(total) : 0.0f;
ImGui::TextColored(ImGui::ColorConvertU32ToFloat4(seg.color), "%s: %llu (%.1f%%)", seg.label, seg.count, pct);
}
ImGui::EndTooltip();
}
}

auto state = globals::state;
if (state->IsDeveloperMode()) {
ImGui::Text("Threads: %d compile, %d background, %d pool | P-cores: %d",
(int)shaderCache->compilationThreadCount,
(int)shaderCache->backgroundCompilationThreadCount,
(int)shaderCache->compilationPool.get_thread_count(),
(int)Util::GetPerformanceCoreCount());
}
}

ImGui::EndTabItem();
Expand Down
Loading
Loading