Skip to content

Commit c405019

Browse files
committed
Disable RecyclerWatsonTelemetry + Use RDTSC for GetTickCount
Brings ~4% perf improvement [ http-load test, measured on xplat ] - Disables RecyclerWatsonTelemetry for ChakraCore [ reduces the number of calls to system clock api <=~1.5% ] - Use RDTSC for GetTickCount. [ this affects only xplat <=~3% ]
1 parent c84503c commit c405019

File tree

8 files changed

+115
-39
lines changed

8 files changed

+115
-39
lines changed

Build/Chakra.Build.props

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
<?xml version="1.0" encoding="utf-8"?>
22
<Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
3-
<Import Project="Common.Build.props"/>
3+
<Import Project="Common.Build.props"/>
44
<PropertyGroup>
55
<Win32_WinNTVersion Condition="'$(NtTargetVersion)'=='$(NtTargetVersion_Win7)'">0x0601</Win32_WinNTVersion>
66
<Win32_WinNTVersion Condition="'$(NtTargetVersion)'=='$(NtTargetVersion_Win8)'">0x0602</Win32_WinNTVersion>
@@ -18,7 +18,8 @@
1818
%(PreprocessorDefinitions);
1919
_WIN32_WINNT=$(Win32_WinNTVersion);
2020
WINVER=$(Win32_WinNTVersion);
21-
WIN32_LEAN_AND_MEAN=1
21+
WIN32_LEAN_AND_MEAN=1;
22+
TARGET_CHAKRACORE
2223
</PreprocessorDefinitions>
2324
<PreprocessorDefinitions Condition="'$(BuildJIT)'=='false'">
2425
%(PreprocessorDefinitions);

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,7 @@ add_subdirectory (pal)
332332
add_definitions(
333333
-DNO_PAL_MINMAX
334334
-DPAL_STDCPP_COMPAT
335+
-DTARGET_CHAKRACORE
335336
)
336337
add_subdirectory (lib)
337338
add_subdirectory (bin)

lib/Common/Memory/Recycler.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,9 @@ Recycler::Recycler(AllocationPolicyManager * policyManager, IdleDecommitPageAllo
242242
#ifdef HEAP_ENUMERATION_VALIDATION
243243
,pfPostHeapEnumScanCallback(nullptr)
244244
#endif
245+
#ifndef TARGET_CHAKRACORE
245246
, telemetryBlock(&localTelemetryBlock)
247+
#endif
246248
#ifdef ENABLE_JS_ETW
247249
,bulkFreeMemoryWrittenCount(0)
248250
#endif
@@ -321,7 +323,9 @@ Recycler::Recycler(AllocationPolicyManager * policyManager, IdleDecommitPageAllo
321323
this->inDetachProcess = false;
322324
#endif
323325

326+
#ifndef TARGET_CHAKRACORE
324327
memset(&localTelemetryBlock, 0, sizeof(localTelemetryBlock));
328+
#endif
325329

326330
#ifdef ENABLE_DEBUG_CONFIG_OPTIONS
327331
// recycler requires at least Recycler::PrimaryMarkStackReservedPageCount to function properly for the main mark context
@@ -3426,8 +3430,10 @@ Recycler::Collect()
34263430

34273431
{
34283432
RECORD_TIMESTAMP(initialCollectionStartTime);
3433+
#ifndef TARGET_CHAKRACORE
34293434
this->telemetryBlock->initialCollectionStartProcessUsedBytes = PageAllocator::GetProcessUsedBytes();
34303435
this->telemetryBlock->exhaustiveRepeatedCount = 0;
3436+
#endif
34313437

34323438
return DoCollectWrapped(finalFlags);
34333439
}
@@ -3554,7 +3560,9 @@ Recycler::DoCollect(CollectionFlags flags)
35543560
{
35553561
INC_TIMESTAMP_FIELD(exhaustiveRepeatedCount);
35563562
RECORD_TIMESTAMP(currentCollectionStartTime);
3563+
#ifndef TARGET_CHAKRACORE
35573564
this->telemetryBlock->currentCollectionStartProcessUsedBytes = PageAllocator::GetProcessUsedBytes();
3565+
#endif
35583566

35593567
#if ENABLE_CONCURRENT_GC
35603568
// DisposeObject may call script again and start another GC, so we may still be in concurrent GC state
@@ -6910,7 +6918,7 @@ Recycler::FillCheckPad(void * address, size_t size, size_t alignedAllocSize, boo
69106918
}
69116919
}
69126920

6913-
void
6921+
void
69146922
Recycler::FillPadNoCheck(void * address, size_t size, size_t alignedAllocSize, bool objectAlreadyInitialized)
69156923
{
69166924
// Ignore the first word
@@ -8187,4 +8195,3 @@ RecyclerHeapObjectInfo::GetSize() const
81878195
}
81888196

81898197
template char* Recycler::AllocWithAttributesInlined<(Memory::ObjectInfoBits)32, false>(size_t);
8190-

lib/Common/Memory/Recycler.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1009,9 +1009,10 @@ class Recycler
10091009
#if DBG || defined(RECYCLER_STATS)
10101010
bool isForceSweeping;
10111011
#endif
1012+
#ifndef TARGET_CHAKRACORE
10121013
RecyclerWatsonTelemetryBlock localTelemetryBlock;
10131014
RecyclerWatsonTelemetryBlock * telemetryBlock;
1014-
1015+
#endif
10151016
#ifdef RECYCLER_STATS
10161017
RecyclerCollectionStats collectionStats;
10171018
void PrintHeapBlockStats(char16 const * name, HeapBlock::HeapBlockType type);
@@ -1072,7 +1073,9 @@ class Recycler
10721073
void LogMemProtectHeapSize(bool fromGC);
10731074

10741075
char* Realloc(void* buffer, DECLSPEC_GUARD_OVERFLOW size_t existingBytes, DECLSPEC_GUARD_OVERFLOW size_t requestedBytes, bool truncate = true);
1076+
#ifndef TARGET_CHAKRACORE
10751077
void SetTelemetryBlock(RecyclerWatsonTelemetryBlock * telemetryBlock) { this->telemetryBlock = telemetryBlock; }
1078+
#endif
10761079

10771080
void Prime();
10781081

lib/Common/Memory/RecyclerWatsonTelemetry.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,13 @@
22
// Copyright (C) Microsoft. All rights reserved.
33
// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
44
//-------------------------------------------------------------------------------------------------------
5+
#ifdef TARGET_CHAKRACORE
6+
7+
#define RECORD_TIMESTAMP(Field)
8+
#define INC_TIMESTAMP_FIELD(Field)
9+
#define AUTO_TIMESTAMP(Field)
10+
11+
#else // CHAKRA_FULL
512
namespace Memory
613
{
714
/*
@@ -46,3 +53,4 @@ namespace Memory
4653
DWORD exhaustiveRepeatedCount;
4754
};
4855
};
56+
#endif

lib/Runtime/Base/ThreadContext.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,9 @@ ThreadContext::ThreadContext(AllocationPolicyManager * allocationPolicyManager,
178178
#endif
179179
dynamicObjectEnumeratorCacheMap(&HeapAllocator::Instance, 16),
180180
//threadContextFlags(ThreadContextFlagNoFlag),
181+
#ifndef TARGET_CHAKRACORE
181182
telemetryBlock(&localTelemetryBlock),
183+
#endif
182184
configuration(enableExperimentalFeatures),
183185
jsrtRuntime(nullptr),
184186
propertyMap(nullptr),
@@ -248,7 +250,9 @@ ThreadContext::ThreadContext(AllocationPolicyManager * allocationPolicyManager,
248250
this->threadId = ::GetCurrentThreadId();
249251
#endif
250252

253+
#ifndef TARGET_CHAKRACORE
251254
memset(&localTelemetryBlock, 0, sizeof(localTelemetryBlock));
255+
#endif
252256

253257
AutoCriticalSection autocs(ThreadContext::GetCriticalSection());
254258
ThreadContext::LinkToBeginning(this, &ThreadContext::globalListFirst, &ThreadContext::globalListLast);

lib/Runtime/Base/ThreadContext.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,11 +204,13 @@ class IProjectionContextMemoryInfo abstract
204204
#endif
205205
#endif
206206

207+
#ifndef TARGET_CHAKRACORE
207208
struct ThreadContextWatsonTelemetryBlock
208209
{
209210
FILETIME lastScriptStartTime;
210211
FILETIME lastScriptEndTime;
211212
};
213+
#endif
212214

213215
class NativeLibraryEntryRecord
214216
{
@@ -781,8 +783,10 @@ class ThreadContext sealed :
781783
typedef JsUtil::BaseDictionary<Js::DynamicType const *, void *, HeapAllocator, PowerOf2SizePolicy> DynamicObjectEnumeratorCacheMap;
782784
DynamicObjectEnumeratorCacheMap dynamicObjectEnumeratorCacheMap;
783785

786+
#ifndef TARGET_CHAKRACORE
784787
ThreadContextWatsonTelemetryBlock localTelemetryBlock;
785788
ThreadContextWatsonTelemetryBlock * telemetryBlock;
789+
#endif
786790

787791
NativeLibraryEntryRecord nativeLibraryEntry;
788792

@@ -1014,7 +1018,9 @@ class ThreadContext sealed :
10141018
ThreadConfiguration const * GetConfig() const { return &configuration; }
10151019

10161020
public:
1021+
#ifndef TARGET_CHAKRACORE
10171022
void SetTelemetryBlock(ThreadContextWatsonTelemetryBlock * telemetryBlock) { this->telemetryBlock = telemetryBlock; }
1023+
#endif
10181024

10191025
static ThreadContext* GetContextForCurrentThread();
10201026

pal/src/misc/time.cpp

Lines changed: 80 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
//
22
// Copyright (c) Microsoft. All rights reserved.
3-
// Licensed under the MIT license. See LICENSE file in the project root for full license information.
3+
// Licensed under the MIT license. See LICENSE file in the project root for full license information.
44
//
55

66
/*++
@@ -75,8 +75,8 @@ time. The system time is expressed in Coordinated Universal Time
7575
7676
Parameters
7777
78-
lpSystemTime
79-
[out] Pointer to a SYSTEMTIME structure to receive the current system date and time.
78+
lpSystemTime
79+
[out] Pointer to a SYSTEMTIME structure to receive the current system date and time.
8080
8181
Return Values
8282
@@ -101,10 +101,10 @@ GetSystemTime(
101101

102102
tt = time(NULL);
103103

104-
/* We can't get millisecond resolution from time(), so we get it from
104+
/* We can't get millisecond resolution from time(), so we get it from
105105
gettimeofday() */
106106
timeofday_retval = gettimeofday(&timeval,NULL);
107-
107+
108108
#if HAVE_GMTIME_R
109109
utPtr = &ut;
110110
if (gmtime_r(&tt, utPtr) == NULL)
@@ -134,20 +134,20 @@ GetSystemTime(
134134
{
135135
int old_seconds;
136136
int new_seconds;
137-
137+
138138
lpSystemTime->wMilliseconds = timeval.tv_usec/tccMillieSecondsToMicroSeconds;
139-
139+
140140
old_seconds = utPtr->tm_sec;
141141
new_seconds = timeval.tv_sec%60;
142-
143-
/* just in case we reached the next second in the interval between
142+
143+
/* just in case we reached the next second in the interval between
144144
time() and gettimeofday() */
145145
if( old_seconds!=new_seconds )
146146
{
147147
TRACE("crossed seconds boundary; setting milliseconds to 999\n");
148148
lpSystemTime->wMilliseconds = 999;
149-
}
150-
}
149+
}
150+
}
151151
EXIT:
152152
LOGEXIT("GetSystemTime returns void\n");
153153
PERF_EXIT(GetSystemTime);
@@ -164,7 +164,7 @@ use the GetSystemTimeAdjustment function.
164164
165165
Parameters
166166
167-
This function has no parameters.
167+
This function has no parameters.
168168
169169
Return Values
170170
@@ -212,7 +212,7 @@ QueryPerformanceCounter(
212212
retval = FALSE;
213213
break;
214214
}
215-
lpPerformanceCount->QuadPart =
215+
lpPerformanceCount->QuadPart =
216216
(LONGLONG)ts.tv_sec * (LONGLONG)tccSecondsToNanoSeconds + (LONGLONG)ts.tv_nsec;
217217
}
218218
#elif HAVE_MACH_ABSOLUTE_TIME
@@ -233,22 +233,22 @@ QueryPerformanceCounter(
233233
retval = FALSE;
234234
break;
235235
}
236-
lpPerformanceCount->QuadPart =
236+
lpPerformanceCount->QuadPart =
237237
(LONGLONG)tb.tb_high * (LONGLONG)tccSecondsToNanoSeconds + (LONGLONG)tb.tb_low;
238238
}
239239
#else
240240
{
241-
struct timeval tv;
241+
struct timeval tv;
242242
if (gettimeofday(&tv, NULL) == -1)
243243
{
244244
ASSERT("gettimeofday() failed; errno is %d (%s)\n", errno, strerror(errno));
245245
retval = FALSE;
246246
break;
247247
}
248-
lpPerformanceCount->QuadPart =
249-
(LONGLONG)tv.tv_sec * (LONGLONG)tccSecondsToMicroSeconds + (LONGLONG)tv.tv_usec;
248+
lpPerformanceCount->QuadPart =
249+
(LONGLONG)tv.tv_sec * (LONGLONG)tccSecondsToMicroSeconds + (LONGLONG)tv.tv_usec;
250250
}
251-
#endif // HAVE_CLOCK_MONOTONIC
251+
#endif // HAVE_CLOCK_MONOTONIC
252252
while (false);
253253

254254
LOGEXIT("QueryPerformanceCounter\n");
@@ -280,7 +280,7 @@ QueryPerformanceFrequency(
280280
}
281281
#else
282282
lpFrequency->QuadPart = (LONGLONG)tccSecondsToMicroSeconds;
283-
#endif // HAVE_GETHRTIME || HAVE_READ_REAL_TIME || HAVE_CLOCK_MONOTONIC
283+
#endif // HAVE_GETHRTIME || HAVE_READ_REAL_TIME || HAVE_CLOCK_MONOTONIC
284284
LOGEXIT("QueryPerformanceFrequency\n");
285285
PERF_EXIT(QueryPerformanceFrequency);
286286
return retval;
@@ -324,24 +324,52 @@ QueryThreadCycleTime(
324324
return retval;
325325
}
326326

327-
/*++
328-
Function:
329-
GetTickCount64
327+
#if defined(_X86_) || defined(__AMD64__)
328+
inline size_t rdtsc()
329+
{
330+
uint32_t H, L;
331+
__asm volatile ("rdtsc":"=a"(L), "=d"(H));
332+
#ifdef _X86_
333+
return L;
334+
#else
335+
return ((size_t)H << 32) | L;
336+
#endif
337+
}
330338

331-
Returns a 64-bit tick count with a millisecond resolution. It tries its best
332-
to return monotonically increasing counts and avoid being affected by changes
333-
to the system clock (either due to drift or due to explicit changes to system
334-
time).
335-
--*/
336-
PALAPI
337-
ULONGLONG
338-
GetTickCount64()
339+
static double CPUFreq()
340+
{
341+
struct timeval tstart, tend;
342+
size_t start, end;
343+
344+
struct timezone tzone;
345+
memset(&tzone, 0, sizeof(tzone));
346+
347+
start = rdtsc();
348+
gettimeofday(&tstart, &tzone);
349+
350+
usleep(1000); // 1ms
351+
352+
end = rdtsc();
353+
gettimeofday(&tend, &tzone);
354+
355+
size_t usec = ((tend.tv_sec - tstart.tv_sec)*1e6)
356+
+ (tend.tv_usec - tstart.tv_usec);
357+
358+
if (!usec) return 0;
359+
return (end - start) / usec;
360+
}
361+
362+
static uint32_t cpu_speed = CPUFreq() * 1e3; // 1000 + 1e6 => ns to ms
363+
#endif
364+
365+
static ULONGLONG
366+
GetTickCount64Fallback()
339367
{
340368
ULONGLONG retval = 0;
341369

342370
#if HAVE_CLOCK_MONOTONIC_COARSE || HAVE_CLOCK_MONOTONIC
343371
{
344-
clockid_t clockType =
372+
clockid_t clockType =
345373
#if HAVE_CLOCK_MONOTONIC_COARSE
346374
CLOCK_MONOTONIC_COARSE; // good enough resolution, fastest speed
347375
#else
@@ -382,16 +410,34 @@ GetTickCount64()
382410
}
383411
#else
384412
{
385-
struct timeval tv;
413+
struct timeval tv;
386414
if (gettimeofday(&tv, NULL) == -1)
387415
{
388416
ASSERT("gettimeofday() failed; errno is %d (%s)\n", errno, strerror(errno));
389417
goto EXIT;
390418
}
391419
retval = (tv.tv_sec * tccSecondsToMillieSeconds) + (tv.tv_usec / tccMillieSecondsToMicroSeconds);
392420
}
393-
#endif // HAVE_CLOCK_MONOTONIC
394-
EXIT:
421+
#endif // HAVE_CLOCK_MONOTONIC
422+
EXIT:
395423
return retval;
396424
}
425+
/*++
426+
Function:
427+
GetTickCount64
397428
429+
Returns a 64-bit tick count with a millisecond resolution. It tries its best
430+
to return monotonically increasing counts and avoid being affected by changes
431+
to the system clock (either due to drift or due to explicit changes to system
432+
time).
433+
--*/
434+
PALAPI
435+
ULONGLONG
436+
GetTickCount64()
437+
{
438+
#if defined(_X86_) || defined(__AMD64__)
439+
return cpu_speed ? (rdtsc() / cpu_speed) : GetTickCount64Fallback();
440+
#else
441+
return GetTickCount64Fallback();
442+
#endif
443+
}

0 commit comments

Comments
 (0)