Skip to content

Commit

Permalink
[scudo] Make local cache be agnostic to the type of node in freelist (l…
Browse files Browse the repository at this point in the history
…lvm#67379)

This change moves the `TransferBatch` and `BatchGroup` out of
SizeClassAllocatorLocalCache. It allows us that the node in freelist can
store more blocks instead of depending on the number of blocks cached.

That means we will be able to store more blocks in each node of freelist
and therefore reduce the memory used by BatchClass (with little
performance overhead). Note that we haven't enabled that in this patch.
This is the first step of this transition.
  • Loading branch information
ChiaHungDuan authored Oct 9, 2023
1 parent 7a73da4 commit b9c6737
Show file tree
Hide file tree
Showing 6 changed files with 167 additions and 118 deletions.
1 change: 1 addition & 0 deletions compiler-rt/lib/scudo/standalone/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ if(ANDROID)
endif()

set(SCUDO_HEADERS
allocator_common.h
allocator_config.h
atomic_helpers.h
bytemap.h
Expand Down
85 changes: 85 additions & 0 deletions compiler-rt/lib/scudo/standalone/allocator_common.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
//===-- allocator_common.h --------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef SCUDO_ALLOCATOR_COMMON_H_
#define SCUDO_ALLOCATOR_COMMON_H_

#include "common.h"
#include "list.h"

namespace scudo {

template <class SizeClassAllocator> struct TransferBatch {
typedef typename SizeClassAllocator::SizeClassMap SizeClassMap;
typedef typename SizeClassAllocator::CompactPtrT CompactPtrT;

static const u16 MaxNumCached = SizeClassMap::MaxNumCachedHint;
void setFromArray(CompactPtrT *Array, u16 N) {
DCHECK_LE(N, MaxNumCached);
Count = N;
memcpy(Batch, Array, sizeof(Batch[0]) * Count);
}
void appendFromArray(CompactPtrT *Array, u16 N) {
DCHECK_LE(N, MaxNumCached - Count);
memcpy(Batch + Count, Array, sizeof(Batch[0]) * N);
// u16 will be promoted to int by arithmetic type conversion.
Count = static_cast<u16>(Count + N);
}
void appendFromTransferBatch(TransferBatch *B, u16 N) {
DCHECK_LE(N, MaxNumCached - Count);
DCHECK_GE(B->Count, N);
// Append from the back of `B`.
memcpy(Batch + Count, B->Batch + (B->Count - N), sizeof(Batch[0]) * N);
// u16 will be promoted to int by arithmetic type conversion.
Count = static_cast<u16>(Count + N);
B->Count = static_cast<u16>(B->Count - N);
}
void clear() { Count = 0; }
void add(CompactPtrT P) {
DCHECK_LT(Count, MaxNumCached);
Batch[Count++] = P;
}
void moveToArray(CompactPtrT *Array) {
memcpy(Array, Batch, sizeof(Batch[0]) * Count);
clear();
}
u16 getCount() const { return Count; }
bool isEmpty() const { return Count == 0U; }
CompactPtrT get(u16 I) const {
DCHECK_LE(I, Count);
return Batch[I];
}
TransferBatch *Next;

private:
CompactPtrT Batch[MaxNumCached];
u16 Count;
};

// A BatchGroup is used to collect blocks. Each group has a group id to
// identify the group kind of contained blocks.
template <class SizeClassAllocator> struct BatchGroup {
// `Next` is used by IntrusiveList.
BatchGroup *Next;
// The compact base address of each group
uptr CompactPtrGroupBase;
// Cache value of SizeClassAllocatorLocalCache::getMaxCached()
u16 MaxCachedPerBatch;
// Number of blocks pushed into this group. This is an increment-only
// counter.
uptr PushedBlocks;
// This is used to track how many bytes are not in-use since last time we
// tried to release pages.
uptr BytesInBGAtLastCheckpoint;
// Blocks are managed by TransferBatch in a list.
SinglyLinkedList<TransferBatch<SizeClassAllocator>> Batches;
};

} // namespace scudo

#endif // SCUDO_ALLOCATOR_COMMON_H_
110 changes: 19 additions & 91 deletions compiler-rt/lib/scudo/standalone/local_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,74 +22,6 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache {
typedef typename SizeClassAllocator::SizeClassMap SizeClassMap;
typedef typename SizeClassAllocator::CompactPtrT CompactPtrT;

struct TransferBatch {
static const u16 MaxNumCached = SizeClassMap::MaxNumCachedHint;
void setFromArray(CompactPtrT *Array, u16 N) {
DCHECK_LE(N, MaxNumCached);
Count = N;
memcpy(Batch, Array, sizeof(Batch[0]) * Count);
}
void appendFromArray(CompactPtrT *Array, u16 N) {
DCHECK_LE(N, MaxNumCached - Count);
memcpy(Batch + Count, Array, sizeof(Batch[0]) * N);
// u16 will be promoted to int by arithmetic type conversion.
Count = static_cast<u16>(Count + N);
}
void appendFromTransferBatch(TransferBatch *B, u16 N) {
DCHECK_LE(N, MaxNumCached - Count);
DCHECK_GE(B->Count, N);
// Append from the back of `B`.
memcpy(Batch + Count, B->Batch + (B->Count - N), sizeof(Batch[0]) * N);
// u16 will be promoted to int by arithmetic type conversion.
Count = static_cast<u16>(Count + N);
B->Count = static_cast<u16>(B->Count - N);
}
void clear() { Count = 0; }
void add(CompactPtrT P) {
DCHECK_LT(Count, MaxNumCached);
Batch[Count++] = P;
}
void copyToArray(CompactPtrT *Array) const {
memcpy(Array, Batch, sizeof(Batch[0]) * Count);
}
u16 getCount() const { return Count; }
bool isEmpty() const { return Count == 0U; }
CompactPtrT get(u16 I) const {
DCHECK_LE(I, Count);
return Batch[I];
}
static u16 getMaxCached(uptr Size) {
return Min(MaxNumCached, SizeClassMap::getMaxCachedHint(Size));
}
TransferBatch *Next;

private:
CompactPtrT Batch[MaxNumCached];
u16 Count;
};

// A BatchGroup is used to collect blocks. Each group has a group id to
// identify the group kind of contained blocks.
struct BatchGroup {
// `Next` is used by IntrusiveList.
BatchGroup *Next;
// The compact base address of each group
uptr CompactPtrGroupBase;
// Cache value of TransferBatch::getMaxCached()
u16 MaxCachedPerBatch;
// Number of blocks pushed into this group. This is an increment-only
// counter.
uptr PushedBlocks;
// This is used to track how many bytes are not in-use since last time we
// tried to release pages.
uptr BytesInBGAtLastCheckpoint;
// Blocks are managed by TransferBatch in a list.
SinglyLinkedList<TransferBatch> Batches;
};

static_assert(sizeof(BatchGroup) <= sizeof(TransferBatch),
"BatchGroup uses the same class size as TransferBatch");

void init(GlobalStats *S, SizeClassAllocator *A) {
DCHECK(isEmpty());
Stats.init();
Expand Down Expand Up @@ -151,7 +83,7 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache {
}

void drain() {
// Drain BatchClassId last as createBatch can refill it.
// Drain BatchClassId last as it may be needed while draining normal blocks.
for (uptr I = 0; I < NumClasses; ++I) {
if (I == BatchClassId)
continue;
Expand All @@ -163,19 +95,11 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache {
DCHECK(isEmpty());
}

TransferBatch *createBatch(uptr ClassId, void *B) {
if (ClassId != BatchClassId)
B = allocate(BatchClassId);
void *getBatchClassBlock() {
void *B = allocate(BatchClassId);
if (UNLIKELY(!B))
reportOutOfMemory(SizeClassAllocator::getSizeByClassId(BatchClassId));
return reinterpret_cast<TransferBatch *>(B);
}

BatchGroup *createGroup() {
void *Ptr = allocate(BatchClassId);
if (UNLIKELY(!Ptr))
reportOutOfMemory(SizeClassAllocator::getSizeByClassId(BatchClassId));
return reinterpret_cast<BatchGroup *>(Ptr);
return B;
}

LocalStats &getStats() { return Stats; }
Expand Down Expand Up @@ -203,6 +127,11 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache {
Str->append(" No block is cached.\n");
}

static u16 getMaxCached(uptr Size) {
return Min(SizeClassMap::MaxNumCachedHint,
SizeClassMap::getMaxCachedHint(Size));
}

private:
static const uptr NumClasses = SizeClassMap::NumClasses;
static const uptr BatchClassId = SizeClassMap::BatchClassId;
Expand All @@ -211,7 +140,7 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache {
u16 MaxCount;
// Note: ClassSize is zero for the transfer batch.
uptr ClassSize;
CompactPtrT Chunks[2 * TransferBatch::MaxNumCached];
CompactPtrT Chunks[2 * SizeClassMap::MaxNumCachedHint];
};
PerClass PerClassArray[NumClasses] = {};
LocalStats Stats;
Expand All @@ -228,7 +157,7 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache {
for (uptr I = 0; I < NumClasses; I++) {
PerClass *P = &PerClassArray[I];
const uptr Size = SizeClassAllocator::getSizeByClassId(I);
P->MaxCount = static_cast<u16>(2 * TransferBatch::getMaxCached(Size));
P->MaxCount = static_cast<u16>(2 * getMaxCached(Size));
if (I != BatchClassId) {
P->ClassSize = Size;
} else {
Expand All @@ -246,15 +175,14 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache {

NOINLINE bool refill(PerClass *C, uptr ClassId) {
initCacheMaybe(C);
TransferBatch *B = Allocator->popBatch(this, ClassId);
if (UNLIKELY(!B))
return false;
DCHECK_GT(B->getCount(), 0);
C->Count = B->getCount();
B->copyToArray(C->Chunks);
B->clear();
destroyBatch(ClassId, B);
return true;

// TODO(chiahungduan): Pass the max number cached for each size class.
const u16 NumBlocksRefilled =
Allocator->popBlocks(this, ClassId, C->Chunks);
DCHECK_LE(NumBlocksRefilled,
getMaxCached(SizeClassAllocator::getSizeByClassId(ClassId)));
C->Count += NumBlocksRefilled;
return NumBlocksRefilled != 0;
}

NOINLINE void drain(PerClass *C, uptr ClassId) {
Expand Down
41 changes: 29 additions & 12 deletions compiler-rt/lib/scudo/standalone/primary32.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#ifndef SCUDO_PRIMARY32_H_
#define SCUDO_PRIMARY32_H_

#include "allocator_common.h"
#include "bytemap.h"
#include "common.h"
#include "list.h"
Expand Down Expand Up @@ -53,8 +54,11 @@ template <typename Config> class SizeClassAllocator32 {
"");
typedef SizeClassAllocator32<Config> ThisT;
typedef SizeClassAllocatorLocalCache<ThisT> CacheT;
typedef typename CacheT::TransferBatch TransferBatch;
typedef typename CacheT::BatchGroup BatchGroup;
typedef TransferBatch<ThisT> TransferBatch;
typedef BatchGroup<ThisT> BatchGroup;

static_assert(sizeof(BatchGroup) <= sizeof(TransferBatch),
"BatchGroup uses the same class size as TransferBatch");

static uptr getSizeByClassId(uptr ClassId) {
return (ClassId == SizeClassMap::BatchClassId)
Expand Down Expand Up @@ -187,6 +191,21 @@ template <typename Config> class SizeClassAllocator32 {
return BlockSize > PageSize;
}

u16 popBlocks(CacheT *C, uptr ClassId, CompactPtrT *ToArray) {
TransferBatch *B = popBatch(C, ClassId);
if (!B)
return 0;

const u16 Count = B->getCount();
DCHECK_GT(Count, 0U);
B->moveToArray(ToArray);

if (ClassId != SizeClassMap::BatchClassId)
C->deallocate(SizeClassMap::BatchClassId, B);

return Count;
}

TransferBatch *popBatch(CacheT *C, uptr ClassId) {
DCHECK_LT(ClassId, NumClasses);
SizeClassInfo *Sci = getSizeClassInfo(ClassId);
Expand Down Expand Up @@ -520,8 +539,8 @@ template <typename Config> class SizeClassAllocator32 {
// from `CreateGroup` in `pushBlocksImpl`
BG->PushedBlocks = 1;
BG->BytesInBGAtLastCheckpoint = 0;
BG->MaxCachedPerBatch = TransferBatch::getMaxCached(
getSizeByClassId(SizeClassMap::BatchClassId));
BG->MaxCachedPerBatch =
CacheT::getMaxCached(getSizeByClassId(SizeClassMap::BatchClassId));

Sci->FreeListInfo.BlockList.push_front(BG);
}
Expand Down Expand Up @@ -600,17 +619,17 @@ template <typename Config> class SizeClassAllocator32 {
DCHECK_GT(Size, 0U);

auto CreateGroup = [&](uptr CompactPtrGroupBase) {
BatchGroup *BG = C->createGroup();
BatchGroup *BG = reinterpret_cast<BatchGroup *>(C->getBatchClassBlock());
BG->Batches.clear();
TransferBatch *TB = C->createBatch(ClassId, nullptr);
TransferBatch *TB =
reinterpret_cast<TransferBatch *>(C->getBatchClassBlock());
TB->clear();

BG->CompactPtrGroupBase = CompactPtrGroupBase;
BG->Batches.push_front(TB);
BG->PushedBlocks = 0;
BG->BytesInBGAtLastCheckpoint = 0;
BG->MaxCachedPerBatch =
TransferBatch::getMaxCached(getSizeByClassId(ClassId));
BG->MaxCachedPerBatch = CacheT::getMaxCached(getSizeByClassId(ClassId));

return BG;
};
Expand All @@ -625,9 +644,7 @@ template <typename Config> class SizeClassAllocator32 {
u16 UnusedSlots =
static_cast<u16>(BG->MaxCachedPerBatch - CurBatch->getCount());
if (UnusedSlots == 0) {
CurBatch = C->createBatch(
ClassId,
reinterpret_cast<void *>(decompactPtr(ClassId, Array[I])));
CurBatch = reinterpret_cast<TransferBatch *>(C->getBatchClassBlock());
CurBatch->clear();
Batches.push_front(CurBatch);
UnusedSlots = BG->MaxCachedPerBatch;
Expand Down Expand Up @@ -775,7 +792,7 @@ template <typename Config> class SizeClassAllocator32 {
}

const uptr Size = getSizeByClassId(ClassId);
const u16 MaxCount = TransferBatch::getMaxCached(Size);
const u16 MaxCount = CacheT::getMaxCached(Size);
DCHECK_GT(MaxCount, 0U);
// The maximum number of blocks we should carve in the region is dictated
// by the maximum number of batches we want to fill, and the amount of
Expand Down
Loading

0 comments on commit b9c6737

Please sign in to comment.