From 510b8d512de7793ae94ddc500170476a2462df2f Mon Sep 17 00:00:00 2001 From: "Gang Zhao (Hermes)" Date: Mon, 21 Oct 2024 17:07:21 -0700 Subject: [PATCH] Allocate cards and boundaries array separately in CardTable for large segment (#1507) Summary: For large segment, the fixed size for `cards` and `boundaries` array is not large enough to manage the entire segment. This diff adds pointers to the two arrays in SHSegmentInfo. For normal segment, the pointer just points to the inline array in CardTable. For large segment, it points to separately allocated array. Differential Revision: D61747510 --- include/hermes/VM/AlignedHeapSegment.h | 8 +- include/hermes/VM/CardTableNC.h | 99 +++++++++++++++++++------ include/hermes/VM/sh_segment_info.h | 6 ++ lib/VM/gcs/AlignedHeapSegment.cpp | 2 +- lib/VM/gcs/CardTableNC.cpp | 16 ++-- unittests/VMRuntime/CardTableNCTest.cpp | 43 +++++++---- 6 files changed, 127 insertions(+), 47 deletions(-) diff --git a/include/hermes/VM/AlignedHeapSegment.h b/include/hermes/VM/AlignedHeapSegment.h index 8278fc0a8be..81acde4377e 100644 --- a/include/hermes/VM/AlignedHeapSegment.h +++ b/include/hermes/VM/AlignedHeapSegment.h @@ -80,6 +80,10 @@ class AlignedHeapSegmentBase { friend class AlignedHeapSegment; friend class AlignedHeapSegmentBase; + /// Pass segment size to CardTable constructor to allocate its data + /// separately if \p sz > kSegmentUnitSize. + Contents(size_t segmentSize) : cardTable_(segmentSize) {} + /// Note that because of the Contents object, the first few bytes of the /// card table are unused, we instead use them to store a small /// SHSegmentInfo struct. @@ -215,9 +219,9 @@ class AlignedHeapSegmentBase { AlignedHeapSegmentBase() = default; /// Construct Contents() at the address of \p lowLim. - AlignedHeapSegmentBase(void *lowLim) + AlignedHeapSegmentBase(void *lowLim, size_t segmentSize) : lowLim_(reinterpret_cast(lowLim)) { - new (contents()) Contents(); + new (contents()) Contents(segmentSize); contents()->protectGuardPage(oscompat::ProtectMode::None); } diff --git a/include/hermes/VM/CardTableNC.h b/include/hermes/VM/CardTableNC.h index f4b2c62a5cf..f332954a5bb 100644 --- a/include/hermes/VM/CardTableNC.h +++ b/include/hermes/VM/CardTableNC.h @@ -22,10 +22,16 @@ namespace hermes { namespace vm { /// The card table optimizes young gen collections by restricting the amount of -/// heap belonging to the old gen that must be scanned. The card table expects -/// to be constructed inside an AlignedHeapSegment's storage, at some position -/// before the allocation region, and covers the extent of that storage's -/// memory. +/// heap belonging to the old gen that must be scanned. The card table expects +/// to be constructed at the beginning of a segment's storage, and covers the +/// extent of that storage's memory. There are two cases: +/// 1. For AlignedHeapSegment, the inline CardStatus array and Boundary array +/// in the card table is large enough. +/// 2. For JumboHeapSegment, the two arrays are allocated separately. +/// In either case, the pointers to the CardStatus array and Boundary array are +/// stored in \c cards and \c boundaries field of SHSegmentInfo, which occupies +/// the prefix bytes of card table that are mapped to auxiliary data structures +/// for a segment. /// /// Also supports the following query: Given a card in the heap that intersects /// with the used portion of its segment, find its "crossing object" -- the @@ -58,14 +64,19 @@ class CardTable { const char *address_{nullptr}; }; + enum class CardStatus : char { Clean = 0, Dirty = 1 }; + /// The size (and base-two log of the size) of cards used in the card table. static constexpr size_t kLogCardSize = 9; // ==> 512-byte cards. static constexpr size_t kCardSize = 1 << kLogCardSize; // ==> 512-byte cards. - static constexpr size_t kSegmentSize = 1 << HERMESVM_LOG_HEAP_SEGMENT_SIZE; + /// Maximum ize of segment that can have inline cards and boundaries array. + static constexpr size_t kSegmentUnitSize = 1 + << HERMESVM_LOG_HEAP_SEGMENT_SIZE; /// The size of the maximum inline card table. CardStatus array and boundary /// array for larger segment has larger size and is storage separately. - static constexpr size_t kInlineCardTableSize = kSegmentSize >> kLogCardSize; + static constexpr size_t kInlineCardTableSize = + kSegmentUnitSize >> kLogCardSize; /// For convenience, this is a conversion factor to determine how many bytes /// in the heap correspond to a single byte in the card table. This is @@ -93,9 +104,22 @@ class CardTable { sizeof(SHSegmentInfo), (2 * kInlineCardTableSize) >> kLogCardSize); - CardTable() { - // Preserve the segment size. - segmentInfo_.segmentSize = kSegmentSize; + CardTable(size_t segmentSize) { + assert( + segmentSize && segmentSize % kSegmentUnitSize == 0 && + "segmentSize must be a multiple of kSegmentUnitSize"); + + segmentInfo_.segmentSize = segmentSize; + if (segmentSize == kSegmentUnitSize) { + // Just use the inline storage. + setCards(inlineCardStatusArray); + setBoundaries(inlineBoundaryArray_); + } else { + size_t cardTableSize = segmentSize >> kLogCardSize; + // CardStatus is clean by default, so must zero-initialize it. + setCards(new AtomicIfConcurrentGC[cardTableSize] {}); + setBoundaries(new int8_t[cardTableSize]); + } } /// CardTable is not copyable or movable: It must be constructed in-place. CardTable(const CardTable &) = delete; @@ -103,6 +127,16 @@ class CardTable { CardTable &operator=(const CardTable &) = delete; CardTable &operator=(CardTable &&) = delete; + ~CardTable() { + // If CardStatus/Boundary array is allocated separately, free them. + if (cards() != inlineCardStatusArray) { + delete[] cards(); + } + if (boundaries() != inlineBoundaryArray_) { + delete[] boundaries(); + } + } + /// Returns the card table index corresponding to a byte at the given address. /// \pre \p addr must be within the bounds of the segment owning this card /// table or at most 1 card after it, that is to say @@ -115,8 +149,7 @@ class CardTable { /// of how this is used. inline size_t addressToIndex(const void *addr) const LLVM_NO_SANITIZE("null"); - /// Returns the address corresponding to the given card table - /// index. + /// Returns the address corresponding to the given card table index. /// /// \pre \p index is bounded: /// @@ -146,7 +179,7 @@ class CardTable { inline OptValue findNextDirtyCard(size_t fromIndex, size_t endIndex) const; - /// If there is a card card at or after \p fromIndex, at an index less than + /// If there is a card at or after \p fromIndex, at an index less than /// \p endIndex, returns the index of the clean card, else returns none. inline OptValue findNextCleanCard(size_t fromIndex, size_t endIndex) const; @@ -197,7 +230,7 @@ class CardTable { /// for the given \p index. /// TODO(T48709128): remove this when the problem is diagnosed. int8_t cardObjectTableValue(unsigned index) const { - return boundaries_[index]; + return boundaries()[index]; } /// These methods protect and unprotect, respectively, the memory @@ -234,7 +267,21 @@ class CardTable { } #endif - enum class CardStatus : char { Clean = 0, Dirty = 1 }; + void setCards(AtomicIfConcurrentGC *cards) { + segmentInfo_.cards = cards; + } + + AtomicIfConcurrentGC *cards() const { + return static_cast *>(segmentInfo_.cards); + } + + void setBoundaries(int8_t *boundaries) { + segmentInfo_.boundaries = boundaries; + } + + int8_t *boundaries() const { + return segmentInfo_.boundaries; + } /// \return The lowest address whose card can be dirtied in this array. i.e. /// The smallest address such that @@ -272,16 +319,24 @@ class CardTable { union { /// The bytes occupied by segmentInfo_ are guaranteed to be not override by /// writes to cards_ array. See static assertions in AlignedHeapSegmentBase. + /// Pointers to the underlying CardStatus array and boundary array are + /// stored in it. Note that we could also store the boundary array in a + /// union along with inlineBoundaryArray_, since that array has unused + /// prefix bytes as well. It will save 8 bytes here. But it makes the size + /// check more complex as we need to ensure that the segment size is large + /// enough so that inlineBoundaryArray_ has enough unused prefix bytes to + /// store the pointer. SHSegmentInfo segmentInfo_; /// This needs to be atomic so that the background thread in Hades can /// safely dirty cards when compacting. - std::array, kInlineCardTableSize> cards_{}; + AtomicIfConcurrentGC + inlineCardStatusArray[kInlineCardTableSize]{}; }; /// See the comment at kHeapBytesPerCardByte above to see why this is /// necessary. static_assert( - sizeof(cards_[0]) == 1, + sizeof(inlineCardStatusArray[0]) == 1, "Validate assumption that card table entries are one byte"); /// Each card has a corresponding signed byte in the boundaries_ table. A @@ -294,7 +349,7 @@ class CardTable { /// time: If we allocate a large object that crosses many cards, the first /// crossed cards gets a non-negative value, and each subsequent one uses the /// maximum exponent that stays within the card range for the object. - int8_t boundaries_[kInlineCardTableSize]; + int8_t inlineBoundaryArray_[kInlineCardTableSize]; }; /// Implementations of inlines. @@ -333,7 +388,7 @@ inline const char *CardTable::indexToAddress(size_t index) const { } inline void CardTable::dirtyCardForAddress(const void *addr) { - cards_[addressToIndex(addr)].store( + cards()[addressToIndex(addr)].store( CardStatus::Dirty, std::memory_order_relaxed); } @@ -343,7 +398,7 @@ inline bool CardTable::isCardForAddressDirty(const void *addr) const { inline bool CardTable::isCardForIndexDirty(size_t index) const { assert(index < getEndIndex() && "index is required to be in range."); - return cards_[index].load(std::memory_order_relaxed) == CardStatus::Dirty; + return cards()[index].load(std::memory_order_relaxed) == CardStatus::Dirty; } inline OptValue CardTable::findNextDirtyCard( @@ -367,9 +422,9 @@ inline CardTable::Boundary CardTable::nextBoundary(const char *level) const { } inline const char *CardTable::base() const { - // As we know the card table is laid out inline before the allocation region - // of its aligned heap segment, we can use its own this pointer as the base - // address. + // As we know the card table is laid out inline at the beginning of the + // segment storage, which is before the allocation region, we can use its own + // this pointer as the base address. return reinterpret_cast(this); } diff --git a/include/hermes/VM/sh_segment_info.h b/include/hermes/VM/sh_segment_info.h index 0bc4539c52d..7683afc7b9e 100644 --- a/include/hermes/VM/sh_segment_info.h +++ b/include/hermes/VM/sh_segment_info.h @@ -15,6 +15,12 @@ typedef struct SHSegmentInfo { /// The storage size for this segment. We practically don't support segment /// with size larger than UINT32_MAX. unsigned segmentSize; + /// Pointer that points to the CardStatus array for this segment. + /// Erase the actual type AtomicIfConcurrent here to avoid using + /// C++ type and forward declaring nested type. + void *cards; + /// Pointer that points to the boundary array for this segment. + int8_t *boundaries; } SHSegmentInfo; #endif diff --git a/lib/VM/gcs/AlignedHeapSegment.cpp b/lib/VM/gcs/AlignedHeapSegment.cpp index 62b0c416904..a1220b60d1c 100644 --- a/lib/VM/gcs/AlignedHeapSegment.cpp +++ b/lib/VM/gcs/AlignedHeapSegment.cpp @@ -61,7 +61,7 @@ llvh::ErrorOr AlignedHeapSegment::create( } AlignedHeapSegment::AlignedHeapSegment(StorageProvider *provider, void *lowLim) - : AlignedHeapSegmentBase(lowLim), provider_(provider) { + : AlignedHeapSegmentBase(lowLim, kSize), provider_(provider) { assert( storageStart(lowLim_) == lowLim_ && "The lower limit of this storage must be aligned"); diff --git a/lib/VM/gcs/CardTableNC.cpp b/lib/VM/gcs/CardTableNC.cpp index a3d94078364..10937e15192 100644 --- a/lib/VM/gcs/CardTableNC.cpp +++ b/lib/VM/gcs/CardTableNC.cpp @@ -31,7 +31,7 @@ OptValue CardTable::findNextCardWithStatus( size_t fromIndex, size_t endIndex) const { for (size_t idx = fromIndex; idx < endIndex; idx++) - if (cards_[idx].load(std::memory_order_relaxed) == status) + if (cards()[idx].load(std::memory_order_relaxed) == status) return idx; return llvh::None; @@ -66,7 +66,7 @@ void CardTable::cleanOrDirtyRange( size_t to, CardStatus cleanOrDirty) { for (size_t index = from; index < to; index++) { - cards_[index].store(cleanOrDirty, std::memory_order_relaxed); + cards()[index].store(cleanOrDirty, std::memory_order_relaxed); } } @@ -87,7 +87,7 @@ void CardTable::updateBoundaries( "Precondition: must have crossed boundary."); // The object may be large, and may cross multiple cards, but first // handle the first card. - boundaries_[boundary->index()] = + boundaries()[boundary->index()] = (boundary->address() - start) >> LogHeapAlign; boundary->bump(); @@ -100,7 +100,7 @@ void CardTable::updateBoundaries( unsigned currentIndexDelta = 1; unsigned numWithCurrentExp = 0; while (boundary->address() < end) { - boundaries_[boundary->index()] = encodeExp(currentExp); + boundaries()[boundary->index()] = encodeExp(currentExp); numWithCurrentExp++; if (numWithCurrentExp == currentIndexDelta) { numWithCurrentExp = 0; @@ -114,14 +114,14 @@ void CardTable::updateBoundaries( } GCCell *CardTable::firstObjForCard(unsigned index) const { - int8_t val = boundaries_[index]; + int8_t val = boundaries()[index]; // If val is negative, it means skip backwards some number of cards. // In general, for an object crossing 2^N cards, a query for one of // those cards will examine at most N entries in the table. while (val < 0) { index -= 1 << decodeExp(val); - val = boundaries_[index]; + val = boundaries()[index]; } char *boundary = const_cast(indexToAddress(index)); @@ -141,12 +141,12 @@ protectBoundaryTableWork(void *table, size_t sz, oscompat::ProtectMode mode) { void CardTable::protectBoundaryTable() { protectBoundaryTableWork( - &boundaries_[0], getEndIndex(), oscompat::ProtectMode::None); + boundaries(), getEndIndex(), oscompat::ProtectMode::None); } void CardTable::unprotectBoundaryTable() { protectBoundaryTableWork( - &boundaries_[0], getEndIndex(), oscompat::ProtectMode::ReadWrite); + boundaries(), getEndIndex(), oscompat::ProtectMode::ReadWrite); } #endif // HERMES_EXTRA_DEBUG diff --git a/unittests/VMRuntime/CardTableNCTest.cpp b/unittests/VMRuntime/CardTableNCTest.cpp index 67d1450e566..c5bdf04e4a6 100644 --- a/unittests/VMRuntime/CardTableNCTest.cpp +++ b/unittests/VMRuntime/CardTableNCTest.cpp @@ -22,7 +22,11 @@ using namespace hermes::vm; namespace { -struct CardTableNCTest : public ::testing::Test { +struct CardTableParam { + size_t segmentSize; +}; + +struct CardTableNCTest : public ::testing::TestWithParam { CardTableNCTest(); /// Run a test scenario whereby we dirty [dirtyStart, dirtyEnd], and then test @@ -38,7 +42,7 @@ struct CardTableNCTest : public ::testing::Test { std::unique_ptr provider{StorageProvider::mmapProvider()}; AlignedHeapSegment seg{ std::move(AlignedHeapSegment::create(provider.get()).get())}; - CardTable *table{new (seg.lowLim()) CardTable()}; + CardTable *table{nullptr}; // Addresses in the aligned storage to interact with during the tests. std::vector addrs; @@ -57,6 +61,9 @@ void CardTableNCTest::dirtyRangeTest( } CardTableNCTest::CardTableNCTest() { + auto ¶m = GetParam(); + table = new (seg.lowLim()) CardTable(param.segmentSize); + // For purposes of this test, we'll assume the first writeable byte of // the segment comes just after the memory region that can be mapped by // kFirstUsedIndex bytes. @@ -77,7 +84,7 @@ CardTableNCTest::CardTableNCTest() { EXPECT_TRUE(std::is_sorted(addrs.begin(), addrs.end())); } -TEST_F(CardTableNCTest, AddressToIndex) { +TEST_P(CardTableNCTest, AddressToIndex) { // Expected indices in the card table corresponding to the probe // addresses into the storage. const size_t lastIx = table->getEndIndex() - 1; @@ -100,7 +107,7 @@ TEST_F(CardTableNCTest, AddressToIndex) { } } -TEST_F(CardTableNCTest, AddressToIndexBoundary) { +TEST_P(CardTableNCTest, AddressToIndexBoundary) { // This test only works if the card table is laid out at the very beginning of // the storage. ASSERT_EQ(seg.lowLim(), reinterpret_cast(table)); @@ -110,7 +117,7 @@ TEST_F(CardTableNCTest, AddressToIndexBoundary) { EXPECT_EQ(hiLim, table->addressToIndex(seg.hiLim())); } -TEST_F(CardTableNCTest, DirtyAddress) { +TEST_P(CardTableNCTest, DirtyAddress) { const size_t lastIx = table->getEndIndex() - 1; for (char *addr : addrs) { @@ -135,7 +142,7 @@ TEST_F(CardTableNCTest, DirtyAddress) { } /// Dirty an emtpy range. -TEST_F(CardTableNCTest, DirtyAddressRangeEmpty) { +TEST_P(CardTableNCTest, DirtyAddressRangeEmpty) { char *addr = addrs.at(0); table->dirtyCardsForAddressRange(addr, addr); EXPECT_FALSE(table->findNextDirtyCard( @@ -143,7 +150,7 @@ TEST_F(CardTableNCTest, DirtyAddressRangeEmpty) { } /// Dirty an address range smaller than a single card. -TEST_F(CardTableNCTest, DirtyAddressRangeSmall) { +TEST_P(CardTableNCTest, DirtyAddressRangeSmall) { char *addr = addrs.at(0); dirtyRangeTest( /* expectedStart */ addr, @@ -153,7 +160,7 @@ TEST_F(CardTableNCTest, DirtyAddressRangeSmall) { } /// Dirty an address range corresponding exactly to a card. -TEST_F(CardTableNCTest, DirtyAddressRangeCard) { +TEST_P(CardTableNCTest, DirtyAddressRangeCard) { char *addr = addrs.at(0); dirtyRangeTest( /* expectedStart */ addr, @@ -164,7 +171,7 @@ TEST_F(CardTableNCTest, DirtyAddressRangeCard) { /// Dirty an address range the width of a card but spread across a card /// boundary. -TEST_F(CardTableNCTest, DirtyAddressRangeCardOverlapping) { +TEST_P(CardTableNCTest, DirtyAddressRangeCardOverlapping) { char *addr = addrs.at(0); char *start = addr + CardTable::kCardSize / 2; dirtyRangeTest( @@ -176,7 +183,7 @@ TEST_F(CardTableNCTest, DirtyAddressRangeCardOverlapping) { /// Dirty an address range spanning multiple cards, with overhang on either /// side. -TEST_F(CardTableNCTest, DirtyAddressRangeLarge) { +TEST_P(CardTableNCTest, DirtyAddressRangeLarge) { char *addr = addrs.at(0); char *start = addr + CardTable::kCardSize / 2; dirtyRangeTest( @@ -186,13 +193,13 @@ TEST_F(CardTableNCTest, DirtyAddressRangeLarge) { /* expectedEnd */ addr + 4 * CardTable::kCardSize); } -TEST_F(CardTableNCTest, Initial) { +TEST_P(CardTableNCTest, Initial) { for (char *addr : addrs) { EXPECT_FALSE(table->isCardForAddressDirty(addr)); } } -TEST_F(CardTableNCTest, Clear) { +TEST_P(CardTableNCTest, Clear) { for (char *addr : addrs) { ASSERT_FALSE(table->isCardForAddressDirty(addr)); } @@ -211,7 +218,7 @@ TEST_F(CardTableNCTest, Clear) { } } -TEST_F(CardTableNCTest, NextDirtyCardImmediate) { +TEST_P(CardTableNCTest, NextDirtyCardImmediate) { char *addr = addrs.at(addrs.size() / 2); size_t ind = table->addressToIndex(addr); @@ -222,7 +229,7 @@ TEST_F(CardTableNCTest, NextDirtyCardImmediate) { EXPECT_EQ(ind, *dirty); } -TEST_F(CardTableNCTest, NextDirtyCard) { +TEST_P(CardTableNCTest, NextDirtyCard) { /// Empty case: No dirty cards EXPECT_FALSE(table->findNextDirtyCard( CardTable::kFirstUsedIndex, table->getEndIndex())); @@ -246,6 +253,14 @@ TEST_F(CardTableNCTest, NextDirtyCard) { } } +INSTANTIATE_TEST_CASE_P( + CardTableNCTests, + CardTableNCTest, + ::testing::Values( + CardTableParam{AlignedHeapSegmentBase::kSegmentUnitSize}, + CardTableParam{AlignedHeapSegmentBase::kSegmentUnitSize * 8}, + CardTableParam{AlignedHeapSegmentBase::kSegmentUnitSize * 128})); + } // namespace #endif // HERMESVM_GC_MALLOC