diff --git a/lib/winapi/profiling.c b/lib/winapi/profiling.c index e2b735060..33316b7b2 100644 --- a/lib/winapi/profiling.c +++ b/lib/winapi/profiling.c @@ -2,6 +2,7 @@ // SPDX-FileCopyrightText: 2019 Stefan Schmidt +#define USE_RDTSC_FOR_FREQ #include #ifdef USE_RDTSC_FOR_FREQ #include @@ -19,6 +20,7 @@ static void __attribute__((constructor)) PrimeQueryPerformanceFrequency () #define NV_PTIMER_NUM *(volatile ULONG *)0xFD009200 #define NV_PTIMER_DEN *(volatile ULONG *)0xFD009210 #define NV_PTIMER_COUNT 0xFD009400 + #define ptr_PTIMER_COUNT *(volatile ULONG *)0xFD009400 #define ASM_LOOPS 1024 * 4 ULARGE_INTEGER rdtsc_count_1 = {{0, 0}}, rdtsc_count_2 = {{0, 0}}; @@ -33,60 +35,38 @@ static void __attribute__((constructor)) PrimeQueryPerformanceFrequency () KeEnterCriticalRegion(); + // Turn off caches __asm { - push eax - push edx - push ecx - cli sfence - - // Turn off caches mov eax, cr0 or eax, 1 << 30 // Set CD bit mov cr0, eax wbinvd - // Reset PTIMER - mov eax, [NV_PTIMER_COUNT] - and eax, ~(0xFFFFFFE0) // First 5 bits are not used - mov [NV_PTIMER_COUNT], eax - - rdtsc - mov rdtsc_count_1.LowPart, eax - mov rdtsc_count_1.HighPart, edx - - mov eax, [NV_PTIMER_COUNT] - mov ptimer_count_1, eax - - // Spin for a bit - mov eax, ASM_LOOPS - loop_1: - dec eax - jnz loop_1 - - rdtsc - mov rdtsc_count_2.LowPart, eax - mov rdtsc_count_2.HighPart, edx - - mov eax, [NV_PTIMER_COUNT] - mov ptimer_count_2, eax - - // Without this, invaldidating the cache below will crash the system - sfence + } + + ptr_PTIMER_COUNT &= ~(0xFFFFFFE0); // First 5 bits are not used + + rdtsc_count_1.QuadPart = __rdtsc(); + ptimer_count_1 = ptr_PTIMER_COUNT; + + KeStallExecutionProcessor(10); + rdtsc_count_2.QuadPart = __rdtsc(); + ptimer_count_2 = ptr_PTIMER_COUNT; + + __asm + { + sfence mov eax, cr0 and eax, ~(1 << 30) // Clear CD bit mov cr0, eax wbinvd - sti - - pop ecx - pop edx - pop eax } + KeLeaveCriticalRegion(); double ptimer_diff = (ptimer_count_2 >> 5) - (ptimer_count_1 >> 5); @@ -116,6 +96,7 @@ BOOL QueryPerformanceFrequency (LARGE_INTEGER *lpFrequency) assert(lpFrequency != NULL); #ifdef USE_RDTSC_FOR_FREQ + PrimeQueryPerformanceFrequency(); lpFrequency->QuadPart = frequency.QuadPart; #else lpFrequency->QuadPart = 733333333;