diff --git a/src/Features/UnifiedWater/WaterCache.cpp b/src/Features/UnifiedWater/WaterCache.cpp
index a2addeaf97..2da8f1cbc6 100644
--- a/src/Features/UnifiedWater/WaterCache.cpp
+++ b/src/Features/UnifiedWater/WaterCache.cpp
@@ -2,8 +2,6 @@
 
 #include <BS_thread_pool.hpp>
 
-#include "Utils/WinApi.h"
-
 bool WaterCache::SetCurrentWorldSpace(const RE::TESWorldSpace* worldSpace)
 {
 	if (!worldSpace)
@@ -216,8 +214,7 @@ bool WaterCache::GenerateCaches()
 		}
 	}
 
-	// Use P-core logical count on Intel hybrid CPUs; falls back to hardware_concurrency() on non-hybrid.
-	const unsigned hw = Util::GetPerformanceCoreCount();
+	const unsigned hw = std::max(1u, std::thread::hardware_concurrency());
 	const unsigned threads = std::max(1u, hw > 4 ? hw - 4 : (hw * 3) / 4);
 	async.pool = std::make_unique<BS::thread_pool<>>(threads);
 
diff --git a/src/Menu/AdvancedSettingsRenderer.cpp b/src/Menu/AdvancedSettingsRenderer.cpp
index 85310a0ec4..f985f6f680 100644
--- a/src/Menu/AdvancedSettingsRenderer.cpp
+++ b/src/Menu/AdvancedSettingsRenderer.cpp
@@ -126,16 +126,15 @@ void AdvancedSettingsRenderer::RenderLoggingSection()
 	ImGui::SliderInt("Compiler Threads", &shaderCache->compilationThreadCount, 1, static_cast<int32_t>(std::thread::hardware_concurrency()));
 	if (auto _tt = Util::HoverTooltipWrapper()) {
 		ImGui::Text(
-			"Number of threads used to compile shaders at startup. "
-			"Defaults to all logical cores minus one for OS headroom (E-cores included). "
-			"Higher values finish compilation faster but may make the system less responsive.");
+			"Number of threads to use to compile shaders. "
+			"The more threads the faster compilation will finish but may make the system unresponsive. ");
 	}
 	ImGui::SliderInt("Background Compiler Threads", &shaderCache->backgroundCompilationThreadCount, 1, static_cast<int32_t>(std::thread::hardware_concurrency()));
 	if (auto _tt = Util::HoverTooltipWrapper()) {
 		ImGui::Text(
-			"Number of threads used to compile shaders during gameplay. "
-			"Defaults to half of performance cores to avoid impacting the render thread. "
-			"Higher values finish compilation faster but may cause stuttering.");
+			"Number of threads to use to compile shaders while playing game. "
+			"This is activated if the startup compilation is skipped. "
+			"The more threads the faster compilation will finish but may make the system unresponsive. ");
 	}
 
 	ImGui::Columns(2, nullptr, false);
@@ -553,103 +552,6 @@ void AdvancedSettingsRenderer::RenderDeveloperSection()
 	// Statistics section (moved from Advanced/Logging)
 	if (ImGui::TreeNodeEx("Statistics", ImGuiTreeNodeFlags_DefaultOpen)) {
 		ImGui::Text(std::format("Shader Compiler : {}", shaderCache->GetShaderStatsString()).c_str());
-
-		// Derived parallelism metrics are computed lazily on demand and only shown
-		// once compilation has completed to avoid per-frame analysis while compiling.
-		if (!shaderCache->IsCompiling()) {
-			auto parallelism = shaderCache->GetParallelismStats();
-			if (parallelism.has_value()) {
-				const auto& p = parallelism.value();
-				ImGui::Spacing();
-				ImGui::TextDisabled("Parallelism (derived from %zu compiled tasks)", p.sampleCount);
-				if (auto _tt = Util::HoverTooltipWrapper()) {
-					ImGui::Text("Computed lazily from the last completed build.");
-					ImGui::Text("Only evaluated when this Statistics section is open.");
-				}
-				ImGui::Text("Work (W, sum of task wall times): %s", Util::FormatDuration(p.workMs).c_str());
-				if (auto _tt = Util::HoverTooltipWrapper()) {
-					ImGui::Text("Total compile work: sum of all per-shader wall-clock compile times.");
-					ImGui::Text("This is not CPU time; it is accumulated task elapsed time.");
-					ImGui::Text("Equivalent serial time on one worker if overhead stayed the same.");
-				}
-				ImGui::Text("Span (S, longest): %s", Util::FormatDuration(p.spanMs).c_str());
-				if (auto _tt = Util::HoverTooltipWrapper()) {
-					ImGui::Text("Critical-path lower bound, approximated by the single slowest shader.");
-					ImGui::Text("Even infinite cores cannot finish faster than this.");
-				}
-				ImGui::Text("Makespan (T_p): %s", Util::FormatDuration(p.makespanMs).c_str());
-				if (auto _tt = Util::HoverTooltipWrapper()) {
-					ImGui::Text("Observed wall-clock duration for the full shader build.");
-				}
-				ImGui::Text("Queue wait (avg/max): %s / %s",
-					Util::FormatDuration(p.avgQueueWaitMs).c_str(),
-					Util::FormatDuration(p.maxQueueWaitMs).c_str());
-				if (auto _tt = Util::HoverTooltipWrapper()) {
-					ImGui::Text("Time spent waiting in the ready queue before a worker started compilation.");
-					ImGui::Text("Useful for identifying scheduler-induced delay separate from compile cost.");
-				}
-				ImGui::Text("Average parallelism (W/S): %.2fx", p.avgParallelism);
-				if (auto _tt = Util::HoverTooltipWrapper()) {
-					ImGui::Text("Average useful concurrency in this workload.");
-					ImGui::Text("Roughly the worker count where adding more cores gives diminishing returns.");
-				}
-				ImGui::Text("Infinite-core efficiency (S/T_p): %.1f%%", 100.0 * p.infiniteCoreEfficiency);
-				if (auto _tt = Util::HoverTooltipWrapper()) {
-					ImGui::Text("How close runtime is to the infinite-core lower bound.");
-					ImGui::Text("100%% means T_p == S.");
-				}
-				ImGui::Text("Infinite-core gap: %.1f%%", p.infiniteCoreGapPercent);
-				if (auto _tt = Util::HoverTooltipWrapper()) {
-					ImGui::Text("Distance from ideal infinite-core time.");
-					ImGui::Text("Defined as 100 * (1 - S / T_p). Lower is better.");
-				}
-
-				ImGui::Spacing();
-				ImGui::TextDisabled("Infinite-core efficiency");
-				float efficiency = static_cast<float>(std::clamp(p.infiniteCoreEfficiency, 0.0, 1.0));
-				ImGui::ProgressBar(efficiency, ImVec2(-1.0f, 0.0f), std::format("{:.1f}% efficient / {:.1f}% gap", 100.0 * p.infiniteCoreEfficiency, p.infiniteCoreGapPercent).c_str());
-
-				ImGui::Spacing();
-				ImGui::TextDisabled("Relative durations (normalized)");
-				double maxMs = std::max({ p.workMs, p.spanMs, p.makespanMs, 1.0 });
-				auto drawRelativeBar = [maxMs](const char* label, double value) {
-					float ratio = static_cast<float>(std::clamp(value / maxMs, 0.0, 1.0));
-					ImGui::TextUnformatted(label);
-					ImGui::SameLine();
-					ImGui::ProgressBar(ratio, ImVec2(-1.0f, 0.0f), std::format("{} ({:.1f}%)", Util::FormatDuration(value), 100.0 * ratio).c_str());
-				};
-				drawRelativeBar("Span (S)", p.spanMs);
-				drawRelativeBar("Makespan (T_p)", p.makespanMs);
-				drawRelativeBar("Work (W)", p.workMs);
-			}
-		}
-
-		// Top-3 slowest shaders from the last build
-		auto topSlow = shaderCache->GetTopSlowTasks(3);
-		if (!topSlow.empty()) {
-			ImGui::Spacing();
-			ImGui::TextDisabled("Top %zu Slowest Shaders (last build)", topSlow.size());
-			for (size_t i = 0; i < topSlow.size(); ++i) {
-				const auto& rec = topSlow[i];
-				ImGui::Text("#%zu  %s  (weight %d)", i + 1,
-					Util::FormatDuration(rec.elapsedMs).c_str(), rec.priority);
-				ImGui::SameLine();
-				ImGui::TextDisabled("%s", rec.key.c_str());
-				if (ImGui::IsItemHovered()) {
-					if (auto _tt = Util::HoverTooltipWrapper()) {
-						ImGui::Text("%s", rec.key.c_str());
-					}
-				}
-				// Allow copying the full key with a right-click
-				if (ImGui::BeginPopupContextItem(std::format("##slowcopy{}", i).c_str())) {
-					if (ImGui::MenuItem("Copy key")) {
-						ImGui::SetClipboardText(rec.key.c_str());
-					}
-					ImGui::EndPopup();
-				}
-			}
-		}
-
 		ImGui::TreePop();
 	}
 
diff --git a/src/Menu/OverlayRenderer.cpp b/src/Menu/OverlayRenderer.cpp
index e7ccaeb5b5..c80bf97bed 100644
--- a/src/Menu/OverlayRenderer.cpp
+++ b/src/Menu/OverlayRenderer.cpp
@@ -168,23 +168,6 @@ void OverlayRenderer::RenderShaderCompilationStatus(const std::function<const ch
 		}
 		ImGui::TextUnformatted(progressTitle.c_str());
 		ImGui::ProgressBar(percent, ImVec2(0.0f, 0.0f), progressOverlay.c_str());
-		if (state->IsDeveloperMode()) {
-			int32_t threadLimit = shaderCache->backgroundCompilation ? shaderCache->backgroundCompilationThreadCount : shaderCache->compilationThreadCount;
-			int compilationRunning = (int)shaderCache->compilationPool.get_tasks_running();
-			int heavyInFlight = shaderCache->GetHeavyTasksInFlight();
-			int heavyLimit = static_cast<int>(Util::GetPerformanceCoreCount());
-			uint64_t slow = shaderCache->GetSlowTasks();
-			uint64_t verySlow = shaderCache->GetVerySlowTasks();
-			ImGui::Text("Threads: %d / %d limit | Heavy: %d / %d P-cores | %d workers",
-				compilationRunning,
-				threadLimit,
-				heavyInFlight,
-				heavyLimit,
-				(int)shaderCache->compilationPool.get_thread_count());
-			if (slow > 0) {
-				ImGui::Text("Slow shaders: %llu (very slow: %llu)", slow, verySlow);
-			}
-		}
 		if (!shaderCache->backgroundCompilation && shaderCache->menuLoaded) {
 			auto skipShadersText = fmt::format(
 				"Press {} to proceed without completing shader compilation. ",
@@ -197,9 +180,7 @@ void OverlayRenderer::RenderShaderCompilationStatus(const std::function<const ch
 			ImGui::TextColored(themeSettings.StatusPalette.Warning, renderDocInformation.c_str());
 
 		ImGui::End();
-	}
-
-	if (failed) {
+	} else if (failed) {
 		if (!hide) {
 			ImGui::SetNextWindowPos(ImVec2(pos, pos));
 			if (!ImGui::Begin("ShaderCompilationInfo", nullptr, ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_AlwaysAutoResize | ImGuiWindowFlags_NoResize | ImGuiWindowFlags_NoMove | ImGuiWindowFlags_NoSavedSettings)) {
diff --git a/src/Menu/SettingsTabRenderer.cpp b/src/Menu/SettingsTabRenderer.cpp
index 90daa7ded4..20d1cca3a4 100644
--- a/src/Menu/SettingsTabRenderer.cpp
+++ b/src/Menu/SettingsTabRenderer.cpp
@@ -11,7 +11,6 @@
 #include "IconLoader.h"
 #include "Menu.h"
 #include "ShaderCache.h"
-#include "State.h"
 #include "ThemeManager.h"
 #include "Util.h"
 
@@ -245,77 +244,6 @@ void SettingsTabRenderer::RenderShadersTab()
 		if (shaderCache->GetTotalTasks() > 0) {
 			ImGui::Text("Last shader cache build duration: %s",
 				shaderCache->GetShaderStatsString(true, true).c_str());
-
-			// Stacked bar showing compilation breakdown
-			{
-				uint64_t total = shaderCache->GetTotalTasks();
-				uint64_t completed = shaderCache->GetCompletedTasks();
-				uint64_t failed = shaderCache->GetFailedTasks();
-				uint64_t cacheHits = shaderCache->GetCachedHitTasks();
-				uint64_t slow = shaderCache->GetSlowTasks();
-				uint64_t verySlow = shaderCache->GetVerySlowTasks();
-				// Compiled = tasks that actually went through compilation.
-				// Cache hits are separate (returned early without queueing).
-				uint64_t compiled = completed;
-				uint64_t fast = compiled > slow ? compiled - slow : 0;
-				uint64_t medium = slow > verySlow ? slow - verySlow : 0;  // 2-8s
-
-				struct Segment
-				{
-					uint64_t count;
-					ImU32 color;
-					const char* label;
-				};
-				Segment segments[] = {
-					{ cacheHits, IM_COL32(120, 120, 120, 255), "Deduplicated" },
-					{ fast, IM_COL32(80, 180, 80, 255), "Fast (<2s)" },
-					{ medium, IM_COL32(220, 180, 50, 255), "Slow (2-8s)" },
-					{ verySlow, IM_COL32(220, 60, 60, 255), "Very slow (>=8s)" },
-					{ failed, IM_COL32(160, 30, 30, 255), "Failed" },
-				};
-
-				float barHeight = 14.0f * Util::GetUIScale();
-				float barWidth = ImGui::GetContentRegionAvail().x;
-				ImVec2 cursor = ImGui::GetCursorScreenPos();
-				ImDrawList* drawList = ImGui::GetWindowDrawList();
-
-				// Background
-				drawList->AddRectFilled(cursor, ImVec2(cursor.x + barWidth, cursor.y + barHeight), IM_COL32(40, 40, 40, 255));
-
-				// Draw segments
-				float x = cursor.x;
-				for (auto& seg : segments) {
-					if (seg.count == 0 || total == 0)
-						continue;
-					float segWidth = (static_cast<float>(seg.count) / static_cast<float>(total)) * barWidth;
-					if (segWidth < 1.0f)
-						segWidth = 1.0f;
-					drawList->AddRectFilled(ImVec2(x, cursor.y), ImVec2(x + segWidth, cursor.y + barHeight), seg.color);
-					x += segWidth;
-				}
-
-				// Reserve space and handle tooltip
-				ImGui::Dummy(ImVec2(barWidth, barHeight));
-				if (ImGui::IsItemHovered()) {
-					ImGui::BeginTooltip();
-					for (auto& seg : segments) {
-						if (seg.count == 0)
-							continue;
-						float pct = total > 0 ? 100.0f * static_cast<float>(seg.count) / static_cast<float>(total) : 0.0f;
-						ImGui::TextColored(ImGui::ColorConvertU32ToFloat4(seg.color), "%s: %llu (%.1f%%)", seg.label, seg.count, pct);
-					}
-					ImGui::EndTooltip();
-				}
-			}
-
-			auto state = globals::state;
-			if (state->IsDeveloperMode()) {
-				ImGui::Text("Threads: %d compile, %d background, %d pool | P-cores: %d",
-					(int)shaderCache->compilationThreadCount,
-					(int)shaderCache->backgroundCompilationThreadCount,
-					(int)shaderCache->compilationPool.get_thread_count(),
-					(int)Util::GetPerformanceCoreCount());
-			}
 		}
 
 		ImGui::EndTabItem();
diff --git a/src/ShaderCache.cpp b/src/ShaderCache.cpp
index 9bdbb45e9b..1b3f3edd3d 100644
--- a/src/ShaderCache.cpp
+++ b/src/ShaderCache.cpp
@@ -1341,24 +1341,20 @@ namespace SIE
 				return nullptr;
 			}
 
+			// check hashmap
 			auto& cache = ShaderCache::Instance();
-			auto key = SShaderCache::GetShaderString(shaderClass, shader, descriptor, true);
-
-			// Atomically check the shaderMap and either:
-			//  - return the blob if already Completed (cache hit),
-			//  - wait if another thread is compiling (Pending),
-			//  - claim the slot with Pending if nobody started yet.
-			auto [claimResult, cachedBlob] = cache.ClaimCompilation(key);
-			if (claimResult == ShaderCache::ClaimResult::CacheHit) {
+			ID3DBlob* shaderBlob = cache.GetCompletedShader(shaderClass, shader, descriptor);
+
+			if (shaderBlob) {
+				// already compiled before
+				logger::debug("Shader already compiled; using cache: {}", SShaderCache::GetShaderString(shaderClass, shader, descriptor));
 				cache.IncCacheHitTasks();
-				return cachedBlob;
+				return shaderBlob;
 			}
-
 			const auto type = shader.shaderType.get();
 
 			// check diskcache
 			auto diskPath = GetDiskPath(shader.fxpFilename, descriptor, shaderClass);
-			ID3DBlob* shaderBlob = nullptr;
 
 			if (useDiskCache && std::filesystem::exists(diskPath)) {
 				// check build time of cache
@@ -1409,7 +1405,6 @@ namespace SIE
 			auto pathString = Util::WStringToString(path);
 			if (!std::filesystem::exists(path)) {
 				logger::error("Failed to compile {} shader {}::{:X}: {} does not exist", magic_enum::enum_name(shaderClass), magic_enum::enum_name(type), descriptor, pathString);
-				cache.AddCompletedShader(shaderClass, shader, descriptor, nullptr);
 				return nullptr;
 			}
 			logger::debug("Compiling {} {}:{}:{:X} to {}", pathString, magic_enum::enum_name(type), magic_enum::enum_name(shaderClass), descriptor, MergeDefinesString(defines));
@@ -1931,16 +1926,11 @@ namespace SIE
 	{
 		Clear();
 		StopFileWatcher();
-		// Signal management thread to stop dispatching; pool workers observe the same
-		// stop token and will not pick up new tasks after current compilations finish.
-		HANDLE managementHandle = managementJthread.native_handle();
-		managementJthread.request_stop();
-		// Purge unstarted tasks so we only wait for compilations already in flight.
-		compilationPool.purge();
 		if (!compilationPool.wait_for(std::chrono::milliseconds(1000))) {
-			logger::info("Tasks still running despite request to stop; killing management thread {}!", GetThreadId(managementHandle));
-			WaitForSingleObject(managementHandle, 1000);
-			TerminateThread(managementHandle, 0);
+			logger::info("Tasks still running despite request to stop; killing thread {}!", GetThreadId(managementThread));
+			WaitForSingleObject(managementThread, 1000);
+			TerminateThread(managementThread, 0);
+			CloseHandle(managementThread);
 		}
 	}
 
@@ -2112,7 +2102,6 @@ namespace SIE
 			std::unique_lock lockM{ mapMutex };
 			shaderMap.insert_or_assign(key, ShaderCacheResult{ a_blob, status, system_clock::now() });
 		}
-		mapCV.notify_all();  // wake threads waiting on a Pending→Completed/Failed transition
 		const std::wstring path = SIE::SShaderCache::GetShaderPath(
 			shader.shaderType == RE::BSShader::Type::ImageSpace ?
 				static_cast<const RE::BSImagespaceShader&>(shader).originalShaderName :
@@ -2149,53 +2138,6 @@ namespace SIE
 		return a_blob != nullptr;
 	}
 
-	std::pair<ShaderCache::ClaimResult, ID3DBlob*> ShaderCache::ClaimCompilation(const std::string& key)
-	{
-		std::unique_lock lockM{ mapMutex };
-
-		for (;;) {
-			auto it = shaderMap.find(key);
-			if (it != shaderMap.end()) {
-				auto& entry = it->second;
-				if (entry.status == ShaderCompilationTask::Status::Completed) {
-					if (entry.blob) {
-						logger::debug("Shader already compiled; using cache: {}", key);
-						return { ClaimResult::CacheHit, entry.blob };
-					}
-					break;  // Completed with nullptr blob — re-compile
-				}
-				if (entry.status == ShaderCompilationTask::Status::Failed) {
-					break;  // Previous attempt failed — re-compile
-				}
-				// Status is Pending — another thread is compiling this shader.
-				logger::debug("Shader compilation in progress, waiting: {}", key);
-				mapCV.wait(lockM);
-				continue;  // re-check after wakeup
-			}
-			break;  // not in map at all
-		}
-
-		// Claim the slot as Pending before releasing the lock
-		shaderMap.insert_or_assign(key, ShaderCacheResult{ nullptr, ShaderCompilationTask::Status::Pending, system_clock::now() });
-		return { ClaimResult::Claimed, nullptr };
-	}
-
-	void ShaderCache::ResolvePendingFailure(const std::string& key)
-	{
-		bool changed = false;
-		{
-			std::unique_lock lockM{ mapMutex };
-			auto it = shaderMap.find(key);
-			if (it != shaderMap.end() && it->second.status == ShaderCompilationTask::Status::Pending) {
-				it->second = ShaderCacheResult{ nullptr, ShaderCompilationTask::Status::Failed, system_clock::now() };
-				changed = true;
-			}
-		}
-		if (changed) {
-			mapCV.notify_all();
-		}
-	}
-
 	ID3DBlob* ShaderCache::GetCompletedShader(const std::string& a_key)
 	{
 		std::string type = SIE::SShaderCache::GetTypeFromShaderString(a_key);
@@ -2269,8 +2211,7 @@ namespace SIE
 		if (IsCompiling()) {
 			logger::info("Stopping {} remaining shader compilation tasks", compilationSet.totalTasks - compilationSet.completedTasks - compilationSet.failedTasks);
 		}
-		ssource.request_stop();            // signals any legacy stop_token users
-		managementJthread.request_stop();  // stops management thread + in-flight compilations
+		ssource.request_stop();
 		compilationSet.Clear();
 	}
 
@@ -2370,13 +2311,8 @@ namespace SIE
 	ShaderCache::ShaderCache()
 	{
 		dependencyTracker = std::make_unique<ShaderFileDependencyTracker>();
-		logger::debug("ShaderCache initialized: {} startup threads, {} background threads, {} pool threads",
-			(int)compilationThreadCount, (int)backgroundCompilationThreadCount, (int)compilationPool.get_thread_count());
-		// Management thread runs on a dedicated jthread, not in the compilation pool,
-		// so it doesn't consume a pool slot that could be used for shader compilation.
-		managementJthread = std::jthread([this](std::stop_token stoken) {
-			ManageCompilationSet(stoken);
-		});
+		logger::debug("ShaderCache initialized with {} compiler threads", (int)compilationThreadCount);
+		compilationPool.detach_task([this, token = ssource.get_token()] { ManageCompilationSet(token); });
 	}
 
 	bool ShaderCache::UseFileWatcher() const
@@ -2410,12 +2346,7 @@ namespace SIE
 				pathStr += std::format("{}; ", path);
 			}
 			logger::debug("ShaderCache watching for changes in {}", pathStr);
-			// Capture listener by value so the thread does not race with StopFileWatcher()
-			// nulling this->listener before the thread has had a chance to start.
-			auto* capturedListener = listener;
-			capturedListener->fileWatcherThread = std::jthread([capturedListener]() {
-				capturedListener->processQueue();
-			});
+			compilationPool.detach_task([this] { listener->processQueue(); });
 		} else {
 			logger::debug("ShaderCache already enabled");
 		}
@@ -2424,16 +2355,11 @@ namespace SIE
 	void ShaderCache::StopFileWatcher()
 	{
 		logger::info("Stopping FileWatcher");
-		// Set flag first so processQueue()'s loop condition becomes false before we join.
-		useFileWatcher = false;
 		if (fileWatcher) {
 			fileWatcher->removeWatch(watchID);
 			fileWatcher = nullptr;
 		}
 		if (listener) {
-			// ~jthread() calls request_stop() + join(); processQueue() exits when
-			// UseFileWatcher() returns false (set above).
-			delete listener;
 			listener = nullptr;
 		}
 	}
@@ -2620,87 +2546,6 @@ namespace SIE
 		return hideError;
 	}
 
-	int ShaderCache::GetHeavyTasksInFlight()
-	{
-		return static_cast<int>(compilationSet.heavyTasksInFlight.load(std::memory_order_relaxed));
-	}
-
-	uint64_t ShaderCache::GetSlowTasks()
-	{
-		return compilationSet.slowTasks.load(std::memory_order_relaxed);
-	}
-
-	uint64_t ShaderCache::GetVerySlowTasks()
-	{
-		return compilationSet.verySlowTasks.load(std::memory_order_relaxed);
-	}
-
-	std::vector<CompilationSet::SlowTaskRecord> CompilationSet::GetTopSlowTasks(size_t n) const
-	{
-		std::lock_guard lock(slowTasksMutex);
-		// Partial sort to get the N highest without fully sorting the whole vector.
-		std::vector<SlowTaskRecord> result = slowTaskRecords;
-		if (result.size() > n) {
-			std::partial_sort(result.begin(), result.begin() + n, result.end(),
-				[](const SlowTaskRecord& a, const SlowTaskRecord& b) { return a.elapsedMs > b.elapsedMs; });
-			result.resize(n);
-		} else {
-			std::sort(result.begin(), result.end(),
-				[](const SlowTaskRecord& a, const SlowTaskRecord& b) { return a.elapsedMs > b.elapsedMs; });
-		}
-		return result;
-	}
-
-	std::vector<CompilationSet::SlowTaskRecord> ShaderCache::GetTopSlowTasks(size_t n)
-	{
-		return compilationSet.GetTopSlowTasks(n);
-	}
-
-	std::optional<CompilationSet::ParallelismStats> CompilationSet::GetParallelismStats() const
-	{
-		std::vector<SlowTaskRecord> records;
-		{
-			std::lock_guard lock(slowTasksMutex);
-			if (slowTaskRecords.empty()) {
-				return std::nullopt;
-			}
-			records = slowTaskRecords;
-		}
-
-		ParallelismStats stats;
-		stats.sampleCount = records.size();
-		for (const auto& rec : records) {
-			stats.workMs += rec.elapsedMs;
-			stats.spanMs = std::max(stats.spanMs, rec.elapsedMs);
-			stats.avgQueueWaitMs += rec.queueWaitMs;
-			stats.maxQueueWaitMs = std::max(stats.maxQueueWaitMs, rec.queueWaitMs);
-		}
-		stats.avgQueueWaitMs /= static_cast<double>(stats.sampleCount);
-
-		LARGE_INTEGER now;
-		QueryPerformanceCounter(&now);
-		int64_t endTime = completionTime.load(std::memory_order_relaxed);
-		if (endTime == 0) {
-			endTime = now.QuadPart;
-		}
-		stats.makespanMs = static_cast<double>(endTime - lastReset.QuadPart) * 1000.0 / frequency.QuadPart;
-
-		if (stats.spanMs > 0.0) {
-			stats.avgParallelism = stats.workMs / stats.spanMs;
-		}
-		if (stats.makespanMs > 0.0) {
-			stats.infiniteCoreEfficiency = stats.spanMs / stats.makespanMs;
-			stats.infiniteCoreGapPercent = std::max(0.0, 100.0 * (1.0 - stats.infiniteCoreEfficiency));
-		}
-
-		return stats;
-	}
-
-	std::optional<CompilationSet::ParallelismStats> ShaderCache::GetParallelismStats()
-	{
-		return compilationSet.GetParallelismStats();
-	}
-
 	void ShaderCache::ClearShaderMap(RE::BSShader::Type a_type)
 	{
 		std::string_view shaderTypeStr = magic_enum::enum_name(a_type);
@@ -2885,79 +2730,8 @@ namespace SIE
 			return;
 		}
 
-		const auto taskKey = task.GetString();
-
-		// Thread priority serves as a signal to Intel Thread Director and
-		// the Windows scheduler for P-core vs E-core placement on hybrid CPUs.
-		// Heavy shaders compile at normal priority (favouring P-cores); light
-		// shaders stay below-normal (allowing E-core placement).  On non-hybrid
-		// CPUs this still gives heavy compiles slightly more scheduler attention.
-		SetThreadPriority(GetCurrentThread(),
-			task.GetPriority() >= SIE::kHeavyPriorityThreshold ? THREAD_PRIORITY_NORMAL : THREAD_PRIORITY_BELOW_NORMAL);
-
-		LARGE_INTEGER start, end, freq;
-		QueryPerformanceFrequency(&freq);
-		QueryPerformanceCounter(&start);
-		const double queueWaitMs = task.GetEnqueuedQpc() > 0 ?
-		                               static_cast<double>(start.QuadPart - task.GetEnqueuedQpc()) * 1000.0 / freq.QuadPart :
-		                               0.0;
-
-		try {
-			task.Perform();
-		} catch (const std::exception& e) {
-			logger::error("Unhandled exception compiling shader task {}: {}", taskKey, e.what());
-			ResolvePendingFailure(taskKey);
-		} catch (...) {
-			logger::error("Unhandled non-standard exception compiling shader task {}", taskKey);
-			ResolvePendingFailure(taskKey);
-		}
-
-		QueryPerformanceCounter(&end);
-		const double elapsedMs = static_cast<double>(end.QuadPart - start.QuadPart) * 1000.0 / freq.QuadPart;
-		const uint64_t remaining = compilationSet.totalTasks - compilationSet.completedTasks.load(std::memory_order_relaxed) - compilationSet.failedTasks.load(std::memory_order_relaxed);
-
-		// Proxy for permutation complexity: descriptor low 32 bits from GetId(); popcount = active defines.
-		// Shader file size provides a secondary signal for source complexity.
-		const auto descriptorComplexity = std::popcount(static_cast<uint32_t>(task.GetId()));
-		uintmax_t sourceBytes = 0;
-		{
-			// GetString() format: "fxpFilename:ShaderClass:defines" — filename is before the first colon.
-			const auto taskStr = task.GetString();
-			const auto sep = taskStr.find(':');
-			if (sep != std::string::npos) {
-				const auto shaderName = taskStr.substr(0, sep);
-				if (auto path = SIE::SShaderCache::GetShaderPath(shaderName); !path.empty()) {
-					std::error_code ec;
-					sourceBytes = std::filesystem::file_size(path, ec);
-				}
-			}
-		}
-
-		// Debug: full per-task record for post-mortem straggler analysis.
-		logger::debug("[ShaderTiming] {:.0f}ms | queue_wait={:.0f}ms | remaining={} | defines={} | src={}B | prio={} | tid={} | {}",
-			elapsedMs, queueWaitMs, remaining, descriptorComplexity, sourceBytes,
-			task.GetPriority(), GetCurrentThreadId(), taskKey);
-
-		constexpr double kSlowMs = 2000.0;
-		constexpr double kVerySlowMs = 8000.0;
-
-		// Record every task for post-mortem analysis and developer UI (top-N display).
-		{
-			std::lock_guard lock(compilationSet.slowTasksMutex);
-			compilationSet.slowTaskRecords.push_back({ taskKey, elapsedMs, queueWaitMs, task.GetPriority(),
-				static_cast<int>(descriptorComplexity), sourceBytes });
-		}
-
-		if (elapsedMs >= kVerySlowMs) {
-			compilationSet.verySlowTasks++;
-			compilationSet.slowTasks++;
-			logger::info("[ShaderTiming] Very slow {:.0f}ms | queue_wait={:.0f}ms | remaining={} | defines={} | src={}B | prio={} | {}",
-				elapsedMs, queueWaitMs, remaining, descriptorComplexity, sourceBytes, task.GetPriority(), taskKey);
-		} else if (elapsedMs >= kSlowMs) {
-			compilationSet.slowTasks++;
-			logger::debug("[ShaderTiming] Slow {:.0f}ms | queue_wait={:.0f}ms | remaining={} | defines={} | src={}B | prio={} | {}",
-				elapsedMs, queueWaitMs, remaining, descriptorComplexity, sourceBytes, task.GetPriority(), taskKey);
-		}
+		SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_BELOW_NORMAL);
+		task.Perform();
 
 		if (stoken.stop_requested()) {
 			return;
@@ -2970,8 +2744,7 @@ namespace SIE
 		const RE::BSShader& aShader,
 		uint32_t aDescriptor) :
 		shaderClass(aShaderClass),
-		shader(aShader), descriptor(aDescriptor),
-		cachedPriority(ComputePriority(aShaderClass, aShader, aDescriptor))
+		shader(aShader), descriptor(aDescriptor)
 	{}
 
 	void ShaderCompilationTask::Perform() const
@@ -3004,72 +2777,6 @@ namespace SIE
 		return GetId() == other.GetId();
 	}
 
-	int ShaderCompilationTask::ComputePriority(ShaderClass shaderClass, const RE::BSShader& shader, uint32_t descriptor)
-	{
-		int priority = 0;
-		const auto type = shader.shaderType.get();
-
-		// Base priority by shader type — Lighting is consistently the slowest
-		// (123KB source, 12s+ compile), followed by Effect (~31KB, up to 12s).
-		switch (type) {
-		case RE::BSShader::Type::Lighting:
-			priority += 1000;
-			break;
-		case RE::BSShader::Type::Effect:
-			priority += 500;
-			break;
-		case RE::BSShader::Type::Water:
-			priority += 300;
-			break;
-		default:
-			break;
-		}
-
-		// Pixel shaders compile significantly slower than vertex shaders
-		if (shaderClass == ShaderClass::Pixel)
-			priority += 200;
-
-		// More active descriptor bits → more #defines → more code paths for the compiler
-		priority += std::popcount(descriptor) * 30;
-
-		// Known heavy Lighting techniques and flags from straggler analysis
-		if (type == RE::BSShader::Type::Lighting) {
-			const auto technique = static_cast<ShaderCache::LightingShaderTechniques>(0x3F & (descriptor >> 24));
-
-			// LANDSCAPE techniques (MTLand, MTLandLODBlend) are among the heaviest
-			// due to multi-texture blending codegen — regularly 60-130s compile times
-			if (technique == ShaderCache::LightingShaderTechniques::MTLand ||
-				technique == ShaderCache::LightingShaderTechniques::MTLandLODBlend)
-				priority += 500;
-			if (technique == ShaderCache::LightingShaderTechniques::Parallax ||
-				technique == ShaderCache::LightingShaderTechniques::ParallaxOcc)
-				priority += 300;
-			if (technique == ShaderCache::LightingShaderTechniques::Eye)
-				priority += 200;
-			if (technique == ShaderCache::LightingShaderTechniques::MultilayerParallax)
-				priority += 200;
-
-			// TRUE_PBR and ANISO_LIGHTING are the dominant cost drivers,
-			// especially in combination with LANDSCAPE (115-130s observed)
-			if (descriptor & static_cast<uint32_t>(ShaderCache::LightingShaderFlags::TruePbr))
-				priority += 500;
-			if (descriptor & static_cast<uint32_t>(ShaderCache::LightingShaderFlags::AnisoLighting))
-				priority += 300;
-			// Deferred adds extra codegen overhead
-			if (descriptor & static_cast<uint32_t>(ShaderCache::LightingShaderFlags::Deferred))
-				priority += 200;
-
-			// LANDSCAPE + TRUE_PBR combination triggers extreme register pressure
-			// (6x unrolled texture layers * PBR params = 30+ textures, 180s+ compile)
-			if ((technique == ShaderCache::LightingShaderTechniques::MTLand ||
-					technique == ShaderCache::LightingShaderTechniques::MTLandLODBlend) &&
-				(descriptor & static_cast<uint32_t>(ShaderCache::LightingShaderFlags::TruePbr)))
-				priority += 500;
-		}
-
-		return priority;
-	}
-
 	std::optional<ShaderCompilationTask> CompilationSet::WaitTake(std::stop_token stoken)
 	{
 		std::unique_lock lock(compilationMutex);
@@ -3077,9 +2784,8 @@ namespace SIE
 		if (!conditionVariable.wait(
 				lock, stoken,
 				[this, &shaderCache]() { return !availableTasks.empty() &&
-			                                    // Dispatch when pool has room. Use < (not <=) so that after
-			                                    // push_task() the total never exceeds the limit.
-			                                    (int)shaderCache->compilationPool.get_tasks_total() <
+			                                    // check against all tasks in queue to trickle the work. It cannot be the active tasks count because the thread pool itself is maximum.
+			                                    (int)shaderCache->compilationPool.get_tasks_total() <=
 			                                        (!shaderCache->backgroundCompilation ? shaderCache->compilationThreadCount : shaderCache->backgroundCompilationThreadCount); })) {
 			/*Woke up because of a stop request. */
 			return std::nullopt;
@@ -3088,27 +2794,9 @@ namespace SIE
 			QueryPerformanceCounter(&lastReset);
 			lastCalculation = lastReset;
 		}
-
-		// Startup policy: keep dispatching the hardest queued work first.
-		// This preserves the existing priority score while preventing light tasks
-		// from bypassing queued heavy shaders and stretching the tail.
-		auto bestIt = availableTasks.end();
-		if (!availableTasks.empty()) {
-			bestIt = std::prev(availableTasks.end());
-		}
-
-		if (bestIt == availableTasks.end()) {
-			return std::nullopt;
-		}
-
-		ShaderCompilationTask task = *bestIt;
-		availableTasks.erase(bestIt);
-
-		if (task.GetPriority() >= kHeavyPriorityThreshold) {
-			heavyTasksInFlight.fetch_add(1, std::memory_order_relaxed);
-		}
-
-		tasksInProgress.insert(task);
+		auto node = availableTasks.extract(availableTasks.begin());
+		auto& task = node.value();
+		tasksInProgress.insert(std::move(node));
 		return task;
 	}
 
@@ -3118,16 +2806,11 @@ namespace SIE
 		auto inProgressIt = tasksInProgress.find(task);
 		auto processedIt = processedTasks.find(task);
 		if (inProgressIt == tasksInProgress.end() && processedIt == processedTasks.end() && !globals::shaderCache->GetCompletedShader(task)) {
-			LARGE_INTEGER now;
-			QueryPerformanceCounter(&now);
-			auto queuedTask = task;
-			queuedTask.SetEnqueuedQpc(now.QuadPart);
-			auto [_, wasAdded] = availableTasks.insert(queuedTask);
+			auto [availableIt, wasAdded] = availableTasks.insert(task);
 			lock.unlock();
 			if (wasAdded) {
 				conditionVariable.notify_one();
 				totalTasks++;
-				totalPriorityWeight += static_cast<uint64_t>(task.GetPriority()) + 1;
 			}
 		}
 	}
@@ -3153,17 +2836,6 @@ namespace SIE
 				logger::debug("Compiling Task failed: {}", key);
 				failedTasks++;
 			}
-			completedPriorityWeight += static_cast<uint64_t>(task.GetPriority()) + 1;
-
-			// Track heavy task completion for P-core concurrency limiting
-			if (task.GetPriority() >= kHeavyPriorityThreshold) {
-				auto current = heavyTasksInFlight.load(std::memory_order_relaxed);
-				while (current > 0 &&
-					   !heavyTasksInFlight.compare_exchange_weak(current, current - 1,
-						   std::memory_order_relaxed,
-						   std::memory_order_relaxed)) {
-				}
-			}
 
 			// Update timing
 			LARGE_INTEGER now;
@@ -3201,49 +2873,36 @@ namespace SIE
 		completedTasks = 0;
 		failedTasks = 0;
 		cacheHitTasks = 0;
-		slowTasks = 0;
-		verySlowTasks = 0;
-		totalPriorityWeight = 0;
-		completedPriorityWeight = 0;
-		heavyTasksInFlight = 0;
 		QueryPerformanceCounter(&lastReset);
 		QueryPerformanceCounter(&lastCalculation);
 		completionTime = { 0 };  // Reset completion time
 		totalTime = { 0 };
-		{
-			std::lock_guard slowLock(slowTasksMutex);
-			slowTaskRecords.clear();
-		}
 	}
 
 	std::string CompilationSet::GetHumanTime(double a_totalMs)
 	{
-		return Util::FormatDuration(a_totalMs);
+		int milliseconds = static_cast<int>(a_totalMs);
+		int seconds = milliseconds / 1000;
+		int minutes = seconds / 60;
+		seconds %= 60;
+		int hours = minutes / 60;
+		minutes %= 60;
+
+		return fmt::format("{:02}:{:02}:{:02}", hours, minutes, seconds);
 	}
 
 	double CompilationSet::GetEta()
 	{
-		// Use wall-clock elapsed time since compilation started
-		LARGE_INTEGER now;
-		QueryPerformanceCounter(&now);
-		int64_t endTime = (completionTime.load(std::memory_order_relaxed) != 0) ? completionTime.load(std::memory_order_relaxed) : now.QuadPart;
-		double elapsedMs = static_cast<double>(endTime - lastReset.QuadPart) * 1000.0 / frequency.QuadPart;
-
-		if (elapsedMs <= 0.0)
-			return 0.0;
+		// For ETA calculation, we still use the active compilation time (totalTime)
+		// because it reflects the actual work time, not wall-clock time
+		double totalMs = static_cast<double>(totalTime.QuadPart) * 1000.0 / frequency.QuadPart;
 
-		// Priority-weighted ETA: heavy tasks completing early should not inflate
-		// the estimate. We measure progress as a fraction of total priority weight
-		// completed, which accounts for the decreasing cost of remaining tasks.
-		double doneWeight = static_cast<double>(completedPriorityWeight.load(std::memory_order_relaxed));
-		double totalWeight = static_cast<double>(totalPriorityWeight.load(std::memory_order_relaxed));
-
-		if (doneWeight <= 0.0 || totalWeight <= 0.0)
-			return 0.0;
-
-		double fractionDone = doneWeight / totalWeight;
-		double estimatedTotalMs = elapsedMs / fractionDone;
-		return std::max(estimatedTotalMs - elapsedMs, 0.0);
+		if (totalMs == 0.0) {
+			return 0.0;  // Avoid division by zero
+		}
+		auto rate = completedTasks / totalMs;
+		auto remaining = totalTasks - completedTasks - failedTasks;
+		return std::max(remaining / rate, 0.0);
 	}
 
 	std::string CompilationSet::GetStatsString(bool a_timeOnly, bool a_elapsedOnly)
@@ -3268,7 +2927,7 @@ namespace SIE
 			}
 		}
 
-		return fmt::format("{}/{} (successful/total)\tfailed: {}\tdeduplicated: {}\nElapsed/Estimated Time: {}/{}",
+		return fmt::format("{}/{} (successful/total)\tfailed: {}\tcachehits: {}\nElapsed/Estimated Time: {}/{}",
 			(std::uint64_t)completedTasks,
 			(std::uint64_t)totalTasks,
 			(std::uint64_t)failedTasks,
diff --git a/src/ShaderCache.h b/src/ShaderCache.h
index 72d4c6cec8..89bcedf92d 100644
--- a/src/ShaderCache.h
+++ b/src/ShaderCache.h
@@ -4,8 +4,6 @@
 #include <efsw/efsw.hpp>
 #include <vector>
 
-#include "Utils/WinApi.h"
-
 using namespace std::chrono;
 
 namespace ShaderConstants
@@ -220,24 +218,12 @@ namespace SIE
 		size_t GetId() const;
 		std::string GetString() const;
 
-		/// LPT scheduling score: higher = more expensive = should be dispatched first.
-		/// Based on shader type, class, descriptor complexity, and known heavy defines.
-		/// Computed once at construction and cached.
-		int GetPriority() const { return cachedPriority; }
-		void SetEnqueuedQpc(int64_t qpc) { enqueuedQpc = qpc; }
-		int64_t GetEnqueuedQpc() const { return enqueuedQpc; }
-
 		bool operator==(const ShaderCompilationTask& other) const;
 
 	protected:
 		ShaderClass shaderClass;
 		const RE::BSShader& shader;
 		uint32_t descriptor;
-
-	private:
-		static int ComputePriority(ShaderClass shaderClass, const RE::BSShader& shader, uint32_t descriptor);
-		int cachedPriority;
-		int64_t enqueuedQpc = 0;
 	};
 }
 
@@ -250,24 +236,8 @@ struct std::hash<SIE::ShaderCompilationTask>
 	}
 };
 
-struct TaskPriorityLess
-{
-	bool operator()(const SIE::ShaderCompilationTask& a, const SIE::ShaderCompilationTask& b) const
-	{
-		if (a.GetPriority() != b.GetPriority()) {
-			return a.GetPriority() < b.GetPriority();
-		}
-		return a.GetId() < b.GetId();
-	}
-};
-
 namespace SIE
 {
-	/// Threshold above which a shader task is considered "heavy" and benefits
-	/// from P-core placement on hybrid CPUs. Used for thread-priority hints,
-	/// telemetry, and developer-facing diagnostics.
-	constexpr int kHeavyPriorityThreshold = 500;
-
 	class CompilationSet
 	{
 	public:
@@ -289,61 +259,19 @@ namespace SIE
 		void Add(const ShaderCompilationTask& task);
 		void Complete(const ShaderCompilationTask& task);
 		void Clear();
-		static std::string GetHumanTime(double a_totalMs);
+		std::string GetHumanTime(double a_totalMs);
 		double GetEta();
 		std::string GetStatsString(bool a_timeOnly = false, bool a_elapsedOnly = false);
 		std::atomic<uint64_t> completedTasks = 0;
 		std::atomic<uint64_t> totalTasks = 0;
 		std::atomic<uint64_t> failedTasks = 0;
-		std::atomic<uint64_t> cacheHitTasks = 0;            // number of compiles of a previously seen shader combo
-		std::atomic<uint64_t> slowTasks = 0;                // shaders taking >= 2s
-		std::atomic<uint64_t> verySlowTasks = 0;            // shaders taking >= 8s
-		std::atomic<uint64_t> totalPriorityWeight = 0;      // sum of (GetPriority()+1) for all queued tasks
-		std::atomic<uint64_t> completedPriorityWeight = 0;  // sum of (GetPriority()+1) for completed/failed tasks
-		std::atomic<uint32_t> heavyTasksInFlight = 0;       // number of dispatched heavy (>= kHeavyPriorityThreshold) tasks still running
+		std::atomic<uint64_t> cacheHitTasks = 0;  // number of compiles of a previously seen shader combo
 		std::mutex compilationMutex;
 
-		/// Per-task timing record stored for post-mortem analysis and developer UI.
-		struct SlowTaskRecord
-		{
-			std::string key;  // ShaderCompilationTask::GetString() — "fxpFile:Class:defines"
-			double elapsedMs = 0.0;
-			double queueWaitMs = 0.0;
-			int priority = 0;               // estimated compile weight (see ComputePriority)
-			int defineCount = 0;            // popcount of descriptor — active define permutations
-			uintmax_t sourceSizeBytes = 0;  // HLSL source file size at compile time
-		};
-
-		/// On-demand parallelism metrics derived from task timings.
-		struct ParallelismStats
-		{
-			double workMs = 0.0;                  // W = sum of all task times
-			double spanMs = 0.0;                  // S ~= longest single task
-			double makespanMs = 0.0;              // T_p = wall-clock compile duration
-			double avgParallelism = 0.0;          // W / S
-			double infiniteCoreEfficiency = 0.0;  // S / T_p
-			double infiniteCoreGapPercent = 0.0;  // 100 * (1 - S / T_p)
-			double avgQueueWaitMs = 0.0;          // average enqueue -> dispatch delay
-			double maxQueueWaitMs = 0.0;          // worst enqueue -> dispatch delay
-			size_t sampleCount = 0;
-		};
-
-		/// All per-task timing records for this build (appended from multiple threads).
-		/// Protected by slowTasksMutex.
-		std::vector<SlowTaskRecord> slowTaskRecords;
-		mutable std::mutex slowTasksMutex;
-
-		/// Returns a copy of the N records with the highest elapsedMs, sorted descending.
-		std::vector<SlowTaskRecord> GetTopSlowTasks(size_t n = 3) const;
-
-		/// Computes parallelism metrics on demand from collected task timings.
-		std::optional<ParallelismStats> GetParallelismStats() const;
-
 	private:
-		/// Tasks awaiting dispatch, ordered by cached priority and task id.
-		std::set<ShaderCompilationTask, TaskPriorityLess> availableTasks;
-		std::set<ShaderCompilationTask, TaskPriorityLess> tasksInProgress;
-		std::set<ShaderCompilationTask, TaskPriorityLess> processedTasks;  // completed or failed
+		std::unordered_set<ShaderCompilationTask> availableTasks;
+		std::unordered_set<ShaderCompilationTask> tasksInProgress;
+		std::unordered_set<ShaderCompilationTask> processedTasks;  // completed or failed
 		std::condition_variable_any conditionVariable;
 	};
 
@@ -466,15 +394,6 @@ namespace SIE
 		bool Clear(const std::string& a_path);
 
 		bool AddCompletedShader(ShaderClass shaderClass, const RE::BSShader& shader, uint32_t descriptor, ID3DBlob* a_blob);
-
-		enum class ClaimResult
-		{
-			CacheHit,  // Already compiled; use the returned blob
-			Claimed    // Claimed as Pending; caller must compile and call AddCompletedShader
-		};
-		std::pair<ClaimResult, ID3DBlob*> ClaimCompilation(const std::string& key);
-		void ResolvePendingFailure(const std::string& key);
-
 		ID3DBlob* GetCompletedShader(const std::string& a_key);
 		ID3DBlob* GetCompletedShader(const SIE::ShaderCompilationTask& a_task);
 		ID3DBlob* GetCompletedShader(ShaderClass shaderClass, const RE::BSShader& shader, uint32_t descriptor);
@@ -513,15 +432,6 @@ namespace SIE
 		void IterateShaderBlock(bool a_forward = true);
 		bool IsHideErrors();
 
-		// Overlay stats
-		int GetHeavyTasksInFlight();
-		uint64_t GetSlowTasks();
-		uint64_t GetVerySlowTasks();
-
-		/// Returns a copy of the top-N slowest task records from the last build, sorted descending.
-		std::vector<CompilationSet::SlowTaskRecord> GetTopSlowTasks(size_t n = 3);
-		std::optional<CompilationSet::ParallelismStats> GetParallelismStats();
-
 		/**
 		 * @brief Clears all shaders of a specific type from the shader map.
 		 *
@@ -535,13 +445,9 @@ namespace SIE
 
 		ShaderFileDependencyTracker* GetDependencyTracker() { return dependencyTracker.get(); }
 
-		// Use all logical cores minus one at startup for OS headroom (E-cores included).
-		// Management and file watcher run on dedicated jthreads, not pool slots.
-		// Background (in-game): half of P-cores only, to avoid starving the render thread.
-		int32_t compilationThreadCount = std::max(static_cast<int32_t>(std::thread::hardware_concurrency()) - 1, 1);
-		int32_t backgroundCompilationThreadCount = std::max(static_cast<int32_t>(Util::GetPerformanceCoreCount()) / 2, 1);
-		BS::thread_pool<> compilationPool{ static_cast<std::size_t>(compilationThreadCount) };
-		std::jthread managementJthread;  // dedicated thread for ManageCompilationSet (not in pool)
+		int32_t compilationThreadCount = std::max({ static_cast<int32_t>(std::thread::hardware_concurrency()) - 4, static_cast<int32_t>(std::thread::hardware_concurrency()) * 3 / 4, 1 });
+		int32_t backgroundCompilationThreadCount = std::max(static_cast<int32_t>(std::thread::hardware_concurrency()) / 2, 1);
+		BS::thread_pool<> compilationPool{};
 		bool backgroundCompilation = false;
 		bool menuLoaded = false;
 
@@ -811,7 +717,6 @@ namespace SIE
 		CompilationSet compilationSet;
 		ankerl::unordered_dense::map<std::string, ShaderCacheResult> shaderMap{};
 		std::mutex mapMutex;                                                                      // guard for shaderMap
-		std::condition_variable mapCV;                                                            // signalled when a Pending entry transitions to Completed/Failed
 		ankerl::unordered_dense::map<std::string, system_clock::time_point> modifiedShaderMap{};  // hashmap when a shader source file last modified
 		std::mutex modifiedMapMutex;                                                              // guard for modifiedShaderMap
 		ankerl::unordered_dense::map<std::string, std::set<hlslRecord>> hlslToShaderMap{};        // hashmap linking specific hlsl files to shader keys in shaderMap
@@ -851,8 +756,6 @@ namespace SIE
 		void processQueue();
 		void handleFileAction(efsw::WatchID, const std::string& dir, const std::string& filename, efsw::Action action, std::string) override;
 
-		std::jthread fileWatcherThread;  // dedicated thread for processQueue (not in pool)
-
 	private:
 		ShaderFileDependencyTracker* deps;
 		struct fileAction
diff --git a/src/Utils/Format.cpp b/src/Utils/Format.cpp
index 2b0d88905b..61f1d91195 100644
--- a/src/Utils/Format.cpp
+++ b/src/Utils/Format.cpp
@@ -158,21 +158,6 @@ namespace Util
 		}
 	}
 
-	std::string FormatDuration(double ms)
-	{
-		// Validate input: handle negative, NaN, and infinite values
-		if (!std::isfinite(ms) || ms < 0.0) {
-			return "00:00:00";
-		}
-
-		// Use int64_t to avoid overflow on long durations (>596 hours with int)
-		int64_t total_s = static_cast<int64_t>(ms) / 1000;
-		int64_t hours = total_s / 3600;
-		int64_t minutes = (total_s % 3600) / 60;
-		int64_t seconds = total_s % 60;
-		return fmt::format("{:02}:{:02}:{:02}", hours, minutes, seconds);
-	}
-
 	std::string TimeAgoString(std::chrono::steady_clock::time_point last)
 	{
 		using namespace std::chrono;
diff --git a/src/Utils/Format.h b/src/Utils/Format.h
index dcee025bce..5448581c7c 100644
--- a/src/Utils/Format.h
+++ b/src/Utils/Format.h
@@ -66,17 +66,6 @@ namespace Util
 	 */
 	std::string FormatTimeAgo(std::filesystem::file_time_type fileTime);
 
-	/**
-	 * Formats a duration given in milliseconds as HH:MM:SS.
-	 * Suitable for displaying long-running operation times (e.g. shader compilation).
-	 *
-	 * @param ms Duration in milliseconds. Fractional milliseconds are truncated.
-	 *           Non-finite (NaN/inf) or negative values are clamped to "00:00:00".
-	 *           Durations >= 24 hours display hours without limit (e.g., "125:34:56").
-	 * @return Formatted string like "00:02:35" or "00:00:00" for invalid inputs
-	 */
-	std::string FormatDuration(double ms);
-
 	/**
 	 * Formats a delta value with percentage difference for A/B test comparisons.
 	 * Returns a string like "+0.45 ms (+12.3%)" or "-0.23 ms (-8.1%)".
diff --git a/src/Utils/WinApi.cpp b/src/Utils/WinApi.cpp
index 7f3e810015..cef816e213 100644
--- a/src/Utils/WinApi.cpp
+++ b/src/Utils/WinApi.cpp
@@ -27,46 +27,4 @@ namespace Util
 
 		return std::nullopt;
 	}
-
-	uint32_t GetPerformanceCoreCount()
-	{
-		// Cache the result — CPU topology never changes at runtime.
-		// C++11 guarantees thread-safe initialisation of static locals.
-		static const uint32_t cached = []() -> uint32_t {
-			const uint32_t fallback = std::max(1u, std::thread::hardware_concurrency());
-
-			DWORD size = 0;
-			GetLogicalProcessorInformationEx(RelationProcessorCore, nullptr, &size);
-			if (GetLastError() != ERROR_INSUFFICIENT_BUFFER || size == 0)
-				return fallback;
-
-			std::vector<uint8_t> buf(size);
-			auto* info = reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(buf.data());
-			if (!GetLogicalProcessorInformationEx(RelationProcessorCore, info, &size))
-				return fallback;
-
-			// First pass: find the highest efficiency class present.
-			BYTE maxClass = 0;
-			for (DWORD offset = 0; offset < size;) {
-				auto* entry = reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(buf.data() + offset);
-				if (entry->Processor.EfficiencyClass > maxClass)
-					maxClass = entry->Processor.EfficiencyClass;
-				offset += entry->Size;
-			}
-
-			// Second pass: count logical processors on those (P-)cores.
-			uint32_t count = 0;
-			for (DWORD offset = 0; offset < size;) {
-				auto* entry = reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(buf.data() + offset);
-				if (entry->Processor.EfficiencyClass == maxClass) {
-					for (WORD g = 0; g < entry->Processor.GroupCount; ++g)
-						count += static_cast<uint32_t>(std::popcount(entry->Processor.GroupMask[g].Mask));
-				}
-				offset += entry->Size;
-			}
-
-			return count > 0 ? count : fallback;
-		}();
-		return cached;
-	}
 }  // namespace Util
diff --git a/src/Utils/WinApi.h b/src/Utils/WinApi.h
index 3b21ccd8a0..cf048b3d31 100644
--- a/src/Utils/WinApi.h
+++ b/src/Utils/WinApi.h
@@ -3,10 +3,4 @@
 namespace Util
 {
 	std::optional<REL::Version> GetDllVersion(const std::wstring& dllPath);
-
-	/// Returns the number of logical processors on the highest-efficiency cores
-	/// (P-cores on Intel hybrid CPUs). On non-hybrid CPUs all cores share the
-	/// same efficiency class, so this returns std::thread::hardware_concurrency().
-	/// Falls back to hardware_concurrency() on any API failure.
-	uint32_t GetPerformanceCoreCount();
 }  // namespace Util