Skip to content

Commit fe917d0

Browse files
committed
Disable RecyclerWatsonTelemetry + Use RDTSC for GetTickCount
Brings ~4% perf improvement [ http-load test, measured on xplat ] - Disables RecyclerWatsonTelemetry for ChakraCore [ reduces the number of calls to system clock api <=~1.5% ] - Use RDTSC for GetTickCount. [ this affects only xplat <=~3% ]
1 parent c84503c commit fe917d0

File tree

7 files changed

+120
-41
lines changed

7 files changed

+120
-41
lines changed

Build/Chakra.Build.props

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
<?xml version="1.0" encoding="utf-8"?>
22
<Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
3-
<Import Project="Common.Build.props"/>
3+
<Import Project="Common.Build.props"/>
44
<PropertyGroup>
55
<Win32_WinNTVersion Condition="'$(NtTargetVersion)'=='$(NtTargetVersion_Win7)'">0x0601</Win32_WinNTVersion>
66
<Win32_WinNTVersion Condition="'$(NtTargetVersion)'=='$(NtTargetVersion_Win8)'">0x0602</Win32_WinNTVersion>

lib/Common/Memory/Recycler.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,9 @@ Recycler::Recycler(AllocationPolicyManager * policyManager, IdleDecommitPageAllo
242242
#ifdef HEAP_ENUMERATION_VALIDATION
243243
,pfPostHeapEnumScanCallback(nullptr)
244244
#endif
245+
#ifdef NTBUILD
245246
, telemetryBlock(&localTelemetryBlock)
247+
#endif
246248
#ifdef ENABLE_JS_ETW
247249
,bulkFreeMemoryWrittenCount(0)
248250
#endif
@@ -321,7 +323,9 @@ Recycler::Recycler(AllocationPolicyManager * policyManager, IdleDecommitPageAllo
321323
this->inDetachProcess = false;
322324
#endif
323325

326+
#ifdef NTBUILD
324327
memset(&localTelemetryBlock, 0, sizeof(localTelemetryBlock));
328+
#endif
325329

326330
#ifdef ENABLE_DEBUG_CONFIG_OPTIONS
327331
// recycler requires at least Recycler::PrimaryMarkStackReservedPageCount to function properly for the main mark context
@@ -3426,8 +3430,10 @@ Recycler::Collect()
34263430

34273431
{
34283432
RECORD_TIMESTAMP(initialCollectionStartTime);
3433+
#ifdef NTBUILD
34293434
this->telemetryBlock->initialCollectionStartProcessUsedBytes = PageAllocator::GetProcessUsedBytes();
34303435
this->telemetryBlock->exhaustiveRepeatedCount = 0;
3436+
#endif
34313437

34323438
return DoCollectWrapped(finalFlags);
34333439
}
@@ -3554,7 +3560,9 @@ Recycler::DoCollect(CollectionFlags flags)
35543560
{
35553561
INC_TIMESTAMP_FIELD(exhaustiveRepeatedCount);
35563562
RECORD_TIMESTAMP(currentCollectionStartTime);
3563+
#ifdef NTBUILD
35573564
this->telemetryBlock->currentCollectionStartProcessUsedBytes = PageAllocator::GetProcessUsedBytes();
3565+
#endif
35583566

35593567
#if ENABLE_CONCURRENT_GC
35603568
// DisposeObject may call script again and start another GC, so we may still be in concurrent GC state
@@ -6910,7 +6918,7 @@ Recycler::FillCheckPad(void * address, size_t size, size_t alignedAllocSize, boo
69106918
}
69116919
}
69126920

6913-
void
6921+
void
69146922
Recycler::FillPadNoCheck(void * address, size_t size, size_t alignedAllocSize, bool objectAlreadyInitialized)
69156923
{
69166924
// Ignore the first word
@@ -8187,4 +8195,3 @@ RecyclerHeapObjectInfo::GetSize() const
81878195
}
81888196

81898197
template char* Recycler::AllocWithAttributesInlined<(Memory::ObjectInfoBits)32, false>(size_t);
8190-

lib/Common/Memory/Recycler.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1009,9 +1009,10 @@ class Recycler
10091009
#if DBG || defined(RECYCLER_STATS)
10101010
bool isForceSweeping;
10111011
#endif
1012+
#ifdef NTBUILD
10121013
RecyclerWatsonTelemetryBlock localTelemetryBlock;
10131014
RecyclerWatsonTelemetryBlock * telemetryBlock;
1014-
1015+
#endif
10151016
#ifdef RECYCLER_STATS
10161017
RecyclerCollectionStats collectionStats;
10171018
void PrintHeapBlockStats(char16 const * name, HeapBlock::HeapBlockType type);
@@ -1072,7 +1073,9 @@ class Recycler
10721073
void LogMemProtectHeapSize(bool fromGC);
10731074

10741075
char* Realloc(void* buffer, DECLSPEC_GUARD_OVERFLOW size_t existingBytes, DECLSPEC_GUARD_OVERFLOW size_t requestedBytes, bool truncate = true);
1076+
#ifdef NTBUILD
10751077
void SetTelemetryBlock(RecyclerWatsonTelemetryBlock * telemetryBlock) { this->telemetryBlock = telemetryBlock; }
1078+
#endif
10761079

10771080
void Prime();
10781081

lib/Common/Memory/RecyclerWatsonTelemetry.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,13 @@
22
// Copyright (C) Microsoft. All rights reserved.
33
// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
44
//-------------------------------------------------------------------------------------------------------
5+
#ifndef NTBUILD
6+
7+
#define RECORD_TIMESTAMP(Field)
8+
#define INC_TIMESTAMP_FIELD(Field)
9+
#define AUTO_TIMESTAMP(Field)
10+
11+
#else // CHAKRA_FULL
512
namespace Memory
613
{
714
/*
@@ -46,3 +53,4 @@ namespace Memory
4653
DWORD exhaustiveRepeatedCount;
4754
};
4855
};
56+
#endif

lib/Runtime/Base/ThreadContext.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,9 @@ ThreadContext::ThreadContext(AllocationPolicyManager * allocationPolicyManager,
178178
#endif
179179
dynamicObjectEnumeratorCacheMap(&HeapAllocator::Instance, 16),
180180
//threadContextFlags(ThreadContextFlagNoFlag),
181+
#ifdef NTBUILD
181182
telemetryBlock(&localTelemetryBlock),
183+
#endif
182184
configuration(enableExperimentalFeatures),
183185
jsrtRuntime(nullptr),
184186
propertyMap(nullptr),
@@ -248,7 +250,9 @@ ThreadContext::ThreadContext(AllocationPolicyManager * allocationPolicyManager,
248250
this->threadId = ::GetCurrentThreadId();
249251
#endif
250252

253+
#ifdef NTBUILD
251254
memset(&localTelemetryBlock, 0, sizeof(localTelemetryBlock));
255+
#endif
252256

253257
AutoCriticalSection autocs(ThreadContext::GetCriticalSection());
254258
ThreadContext::LinkToBeginning(this, &ThreadContext::globalListFirst, &ThreadContext::globalListLast);

lib/Runtime/Base/ThreadContext.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -204,11 +204,13 @@ class IProjectionContextMemoryInfo abstract
204204
#endif
205205
#endif
206206

207+
#ifdef NTBUILD
207208
struct ThreadContextWatsonTelemetryBlock
208209
{
209210
FILETIME lastScriptStartTime;
210211
FILETIME lastScriptEndTime;
211212
};
213+
#endif
212214

213215
class NativeLibraryEntryRecord
214216
{
@@ -781,8 +783,10 @@ class ThreadContext sealed :
781783
typedef JsUtil::BaseDictionary<Js::DynamicType const *, void *, HeapAllocator, PowerOf2SizePolicy> DynamicObjectEnumeratorCacheMap;
782784
DynamicObjectEnumeratorCacheMap dynamicObjectEnumeratorCacheMap;
783785

786+
#ifdef NTBUILD
784787
ThreadContextWatsonTelemetryBlock localTelemetryBlock;
785788
ThreadContextWatsonTelemetryBlock * telemetryBlock;
789+
#endif
786790

787791
NativeLibraryEntryRecord nativeLibraryEntry;
788792

@@ -1002,8 +1006,6 @@ class ThreadContext sealed :
10021006
#endif
10031007
}
10041008

1005-
1006-
10071009
DateTime::HiResTimer * GetHiResTimer() { return &hTimer; }
10081010
ArenaAllocator* GetThreadAlloc() { return &threadAlloc; }
10091011
static CriticalSection * GetCriticalSection() { return &s_csThreadContext; }
@@ -1014,7 +1016,9 @@ class ThreadContext sealed :
10141016
ThreadConfiguration const * GetConfig() const { return &configuration; }
10151017

10161018
public:
1019+
#ifdef NTBUILD
10171020
void SetTelemetryBlock(ThreadContextWatsonTelemetryBlock * telemetryBlock) { this->telemetryBlock = telemetryBlock; }
1021+
#endif
10181022

10191023
static ThreadContext* GetContextForCurrentThread();
10201024

pal/src/misc/time.cpp

Lines changed: 88 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
//
22
// Copyright (c) Microsoft. All rights reserved.
3-
// Licensed under the MIT license. See LICENSE file in the project root for full license information.
3+
// Licensed under the MIT license. See LICENSE file in the project root for full license information.
44
//
55

66
/*++
@@ -75,8 +75,8 @@ time. The system time is expressed in Coordinated Universal Time
7575
7676
Parameters
7777
78-
lpSystemTime
79-
[out] Pointer to a SYSTEMTIME structure to receive the current system date and time.
78+
lpSystemTime
79+
[out] Pointer to a SYSTEMTIME structure to receive the current system date and time.
8080
8181
Return Values
8282
@@ -101,10 +101,10 @@ GetSystemTime(
101101

102102
tt = time(NULL);
103103

104-
/* We can't get millisecond resolution from time(), so we get it from
104+
/* We can't get millisecond resolution from time(), so we get it from
105105
gettimeofday() */
106106
timeofday_retval = gettimeofday(&timeval,NULL);
107-
107+
108108
#if HAVE_GMTIME_R
109109
utPtr = &ut;
110110
if (gmtime_r(&tt, utPtr) == NULL)
@@ -134,20 +134,20 @@ GetSystemTime(
134134
{
135135
int old_seconds;
136136
int new_seconds;
137-
137+
138138
lpSystemTime->wMilliseconds = timeval.tv_usec/tccMillieSecondsToMicroSeconds;
139-
139+
140140
old_seconds = utPtr->tm_sec;
141141
new_seconds = timeval.tv_sec%60;
142-
143-
/* just in case we reached the next second in the interval between
142+
143+
/* just in case we reached the next second in the interval between
144144
time() and gettimeofday() */
145145
if( old_seconds!=new_seconds )
146146
{
147147
TRACE("crossed seconds boundary; setting milliseconds to 999\n");
148148
lpSystemTime->wMilliseconds = 999;
149-
}
150-
}
149+
}
150+
}
151151
EXIT:
152152
LOGEXIT("GetSystemTime returns void\n");
153153
PERF_EXIT(GetSystemTime);
@@ -164,7 +164,7 @@ use the GetSystemTimeAdjustment function.
164164
165165
Parameters
166166
167-
This function has no parameters.
167+
This function has no parameters.
168168
169169
Return Values
170170
@@ -212,7 +212,7 @@ QueryPerformanceCounter(
212212
retval = FALSE;
213213
break;
214214
}
215-
lpPerformanceCount->QuadPart =
215+
lpPerformanceCount->QuadPart =
216216
(LONGLONG)ts.tv_sec * (LONGLONG)tccSecondsToNanoSeconds + (LONGLONG)ts.tv_nsec;
217217
}
218218
#elif HAVE_MACH_ABSOLUTE_TIME
@@ -233,22 +233,22 @@ QueryPerformanceCounter(
233233
retval = FALSE;
234234
break;
235235
}
236-
lpPerformanceCount->QuadPart =
236+
lpPerformanceCount->QuadPart =
237237
(LONGLONG)tb.tb_high * (LONGLONG)tccSecondsToNanoSeconds + (LONGLONG)tb.tb_low;
238238
}
239239
#else
240240
{
241-
struct timeval tv;
241+
struct timeval tv;
242242
if (gettimeofday(&tv, NULL) == -1)
243243
{
244244
ASSERT("gettimeofday() failed; errno is %d (%s)\n", errno, strerror(errno));
245245
retval = FALSE;
246246
break;
247247
}
248-
lpPerformanceCount->QuadPart =
249-
(LONGLONG)tv.tv_sec * (LONGLONG)tccSecondsToMicroSeconds + (LONGLONG)tv.tv_usec;
248+
lpPerformanceCount->QuadPart =
249+
(LONGLONG)tv.tv_sec * (LONGLONG)tccSecondsToMicroSeconds + (LONGLONG)tv.tv_usec;
250250
}
251-
#endif // HAVE_CLOCK_MONOTONIC
251+
#endif // HAVE_CLOCK_MONOTONIC
252252
while (false);
253253

254254
LOGEXIT("QueryPerformanceCounter\n");
@@ -280,7 +280,7 @@ QueryPerformanceFrequency(
280280
}
281281
#else
282282
lpFrequency->QuadPart = (LONGLONG)tccSecondsToMicroSeconds;
283-
#endif // HAVE_GETHRTIME || HAVE_READ_REAL_TIME || HAVE_CLOCK_MONOTONIC
283+
#endif // HAVE_GETHRTIME || HAVE_READ_REAL_TIME || HAVE_CLOCK_MONOTONIC
284284
LOGEXIT("QueryPerformanceFrequency\n");
285285
PERF_EXIT(QueryPerformanceFrequency);
286286
return retval;
@@ -324,24 +324,14 @@ QueryThreadCycleTime(
324324
return retval;
325325
}
326326

327-
/*++
328-
Function:
329-
GetTickCount64
330-
331-
Returns a 64-bit tick count with a millisecond resolution. It tries its best
332-
to return monotonically increasing counts and avoid being affected by changes
333-
to the system clock (either due to drift or due to explicit changes to system
334-
time).
335-
--*/
336-
PALAPI
337-
ULONGLONG
338-
GetTickCount64()
327+
static ULONGLONG
328+
GetTickCount64Fallback()
339329
{
340330
ULONGLONG retval = 0;
341331

342332
#if HAVE_CLOCK_MONOTONIC_COARSE || HAVE_CLOCK_MONOTONIC
343333
{
344-
clockid_t clockType =
334+
clockid_t clockType =
345335
#if HAVE_CLOCK_MONOTONIC_COARSE
346336
CLOCK_MONOTONIC_COARSE; // good enough resolution, fastest speed
347337
#else
@@ -382,16 +372,79 @@ GetTickCount64()
382372
}
383373
#else
384374
{
385-
struct timeval tv;
375+
struct timeval tv;
386376
if (gettimeofday(&tv, NULL) == -1)
387377
{
388378
ASSERT("gettimeofday() failed; errno is %d (%s)\n", errno, strerror(errno));
389379
goto EXIT;
390380
}
391381
retval = (tv.tv_sec * tccSecondsToMillieSeconds) + (tv.tv_usec / tccMillieSecondsToMicroSeconds);
392382
}
393-
#endif // HAVE_CLOCK_MONOTONIC
394-
EXIT:
383+
#endif // HAVE_CLOCK_MONOTONIC
384+
EXIT:
395385
return retval;
396386
}
397387

388+
#if defined(_X86_) || defined(__AMD64__) || defined(__x86_64__)
389+
inline ULONGLONG rdtsc()
390+
{
391+
ULONGLONG H, L;
392+
__asm volatile ("rdtsc":"=a"(L), "=d"(H));
393+
#ifdef _X86_
394+
return L;
395+
#else
396+
return (H << 32) | L;
397+
#endif
398+
}
399+
400+
static double CPUFreq()
401+
{
402+
struct timeval tstart, tend;
403+
ULONGLONG start, end;
404+
405+
struct timezone tzone;
406+
memset(&tzone, 0, sizeof(tzone));
407+
408+
start = rdtsc();
409+
gettimeofday(&tstart, &tzone);
410+
411+
usleep(1000); // 1ms
412+
413+
end = rdtsc();
414+
gettimeofday(&tend, &tzone);
415+
416+
ULONGLONG usec = ((tend.tv_sec - tstart.tv_sec)*1e6)
417+
+ (tend.tv_usec - tstart.tv_usec);
418+
419+
if (!usec) return 0;
420+
return (end - start) / usec;
421+
}
422+
423+
static ULONGLONG cpu_speed = CPUFreq() * 1e3; // 1000 * 1e6 => ns to ms
424+
typedef ULONGLONG (*GetTickCount64FallbackCB)(void);
425+
inline ULONGLONG FastTickCount()
426+
{
427+
return rdtsc() / cpu_speed;
428+
}
429+
static GetTickCount64FallbackCB getTickCount64FallbackCB = cpu_speed ? FastTickCount : GetTickCount64Fallback;
430+
#endif
431+
432+
/*++
433+
Function:
434+
GetTickCount64
435+
436+
Returns a 64-bit tick count with a millisecond resolution. It tries its best
437+
to return monotonically increasing counts and avoid being affected by changes
438+
to the system clock (either due to drift or due to explicit changes to system
439+
time).
440+
--*/
441+
PALAPI
442+
ULONGLONG
443+
GetTickCount64()
444+
{
445+
#if defined(_X86_) || defined(__AMD64__) || defined(__x86_64__)
446+
return getTickCount64FallbackCB();
447+
#else
448+
return GetTickCount64Fallback();
449+
#endif
450+
}

0 commit comments

Comments
 (0)