Skip to content

Commit

Permalink
Only store partial ID in DictPropertyMap's hash table
Browse files Browse the repository at this point in the history
Summary:
Halve the size of each entry in the hash table of DictPropertyMap by instead of storing a SymbolID + a (32-bit) descriptor index, only storing part of the ID (for filtering) and using fewer bits for the descriptor index (as allowed by the max capacity of DPM). The partial symbol ID allows for a first fast filtering.

Introduce an abstraction for HashPair that also checks the allowed state transitions of an entry (empty -> valid -> deleted -> valid -> ...)

Reviewed By: tmikov

Differential Revision: D17327870

fbshipit-source-id: aeb3667a4dbfe23020046e6849aca935a3a3a86f
  • Loading branch information
kodafb authored and facebook-github-bot committed Sep 18, 2019
1 parent 4e33221 commit 41a8072
Show file tree
Hide file tree
Showing 2 changed files with 131 additions and 35 deletions.
112 changes: 99 additions & 13 deletions include/hermes/VM/DictPropertyMap.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,20 @@
namespace hermes {
namespace vm {

/// This class provides simple property metadata storage for JavaScript
/// DictPropertyMap provides simple property metadata storage for JavaScript
/// objects. It maps from SymbolID to PropertyDescriptor and provides
/// iteration in insertion order.
///
/// The object contains two data structures:
/// - an open addressing hash table mapping from SymbolID to an integer index.
/// - a descriptor array containing pairs of SymbolID and PropertyDescriptor.
/// The layout in memory is actually the opposite order (because it simplifies
/// iteration over the symbols).
///
/// Fast property lookup is supported by the hash table - it maps from a
/// SymbolID to an index in the descriptor array.
/// Fast property lookup is supported by the hash table - it conceptually maps
/// from a SymbolID to an index in the descriptor array. To save memory, only
/// part of the SymbolID is stored in the hash table itself, so the SymbolID
/// in the descriptor array entry is also checked on a match.
///
/// New properties are inserted in the hash table and appended sequentially to
/// the end of the descriptor array, thus encoding the original insertion order.
Expand Down Expand Up @@ -58,18 +62,96 @@ namespace vm {
/// - "invalid". It contains SymbolID::empty(). It used to be "deleted"
/// but its slot was re-used by a new property.
///
namespace detail {

/// A valid entry in the hash table holds an index into the descriptor array
/// and part of the SymbolID (for filtering). Entries transition as follows:
/// empty -> valid -> deleted -> valid -> ...
class DPMHashPair {
public:
DPMHashPair() : idpart_(0), desc_(0) {}
bool isEmpty() const {
return desc_ == EMPTY;
}
bool isDeleted() const {
return desc_ == DELETED;
}
bool isValid() const {
return desc_ >= FIRST_VALID;
}
uint32_t getDescIndex() const {
assert(isValid() && "asked for descriptor of invalid pair");
return desc_ - FIRST_VALID;
}
/// Returns false if this entry does not match \p id.
bool mayBe(SymbolID id) const {
assert(isValid() && "tried to match invalid pair");
return idpart_ == (id.unsafeGetRaw() & ID_MASK);
}
/// (Re)initialize an empty or deleted hash table entry.
/// Returns true iff the previous state was deleted.
bool setDescIndex(uint32_t idx, SymbolID id) {
assert(!isValid() && "overwriting a valid entry");
assert(canStore(idx) && "impossibly large descriptor index");
bool ret = isDeleted();
desc_ = idx + FIRST_VALID;
idpart_ = (id.unsafeGetRaw() & ID_MASK);
assert(isValid() && "failed to make a valid entry");
return ret;
}
/// Mark a valid hash table position as deleted.
/// Returns the descriptor index it held.
uint32_t setDeleted() {
assert(isValid() && "tried to delete an empty/deleted entry");
uint32_t ret = getDescIndex();
desc_ = DELETED;
return ret;
}
/// Returns true if idx is small enough to be stored as a descriptor index
/// in this class.
static constexpr bool canStore(uint32_t idx) {
return idx < ((1 << DESC_BITS) - FIRST_VALID);
}

private:
/// Encoding of desc_. Empty is 0 so that zeroed memory means all empty.
enum { EMPTY = 0, DELETED, FIRST_VALID };

/// Number of bits of SymbolID to store. A static_assert checks that
/// the max possible descriptor index can be stored in the other bits.
#ifdef HERMESVM_GC_MALLOC
/// MallocGC supports allocations up to 4 GB. Each descriptor consumes at
/// least 16 bytes. Thus at least log(16) = 4 bits remain for the ID.
static constexpr size_t ID_BITS = 4;
#else
/// Could be slightly higher, but single byte is efficient to access.
static constexpr size_t ID_BITS = 8;
#endif
static constexpr size_t ID_MASK = (1 << ID_BITS) - 1;

/// Bits that can hold (max possible descriptor index + FIRST_VALID).
static constexpr size_t DESC_BITS = 32 - ID_BITS;

struct {
/// Part of a SymbolID.
uint32_t idpart_ : ID_BITS;
/// Encoded descriptor index (or EMPTY/DELETED).
uint32_t desc_ : DESC_BITS;
};
};

} // namespace detail

class DictPropertyMap final : public VariableSizeRuntimeCell,
private llvm::TrailingObjects<
DictPropertyMap,
std::pair<SymbolID, NamedPropertyDescriptor>,
std::pair<SymbolID, uint32_t>> {
detail::DPMHashPair> {
friend TrailingObjects;
friend void DictPropertyMapBuildMeta(
const GCCell *cell,
Metadata::Builder &mb);

using HashPair = std::pair<SymbolID, uint32_t>;

public:
using size_type = uint32_t;
#ifdef HERMESVM_SERIALIZE
Expand Down Expand Up @@ -195,6 +277,8 @@ class DictPropertyMap final : public VariableSizeRuntimeCell,
void dump();

private:
using HashPair = detail::DPMHashPair;

/// Total size of the descriptor array.
const size_type descriptorCapacity_;
/// Total size of the hash table. It will always be a power of 2.
Expand Down Expand Up @@ -263,7 +347,7 @@ class DictPropertyMap final : public VariableSizeRuntimeCell,
descriptorCapacity_(descriptorCapacity),
hashCapacity_(hashCapacity) {
// Clear the hash table.
std::fill_n(getHashPairs(), hashCapacity_, HashPair{SymbolID::empty(), 0});
std::fill_n(getHashPairs(), hashCapacity_, HashPair{});
}

DescriptorPair *getDescriptorPairs() {
Expand Down Expand Up @@ -310,6 +394,12 @@ class DictPropertyMap final : public VariableSizeRuntimeCell,
DictPropertyMap *self,
SymbolID symbolID);

/// Given a valid HashPair, return whether it's an entry for the given ID.
bool isMatch(const HashPair *entry, SymbolID symbolID) const {
return entry->mayBe(symbolID) &&
getDescriptorPairs()[entry->getDescIndex()].first == symbolID;
}

/// Allocate a new property map with the specified capacity, copy the existing
/// valid entries into it. If the specified capacity exceeds the maximum
/// possible capacity, an exception will be raised.
Expand Down Expand Up @@ -463,15 +553,11 @@ inline DictPropertyMap::DescriptorPair *DictPropertyMap::getDescriptorPair(
pos.hashPairIndex < self->hashCapacity_ && "property pos out of range");

auto *hashPair = self->getHashPairs() + pos.hashPairIndex;
assert(hashPair->first.isValid() && "accessing invalid property");

auto descIndex = hashPair->second;
auto descIndex = hashPair->getDescIndex();
assert(descIndex < self->numDescriptors_ && "descriptor index out of range");

auto *res = self->getDescriptorPairs() + descIndex;
assert(
res->first == hashPair->first && "accessing incorrect descriptor pair");

assert(hashPair->mayBe(res->first) && "accessing incorrect descriptor pair");
return res;
}

Expand Down
54 changes: 32 additions & 22 deletions lib/VM/DictPropertyMap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ struct DictPropertyMap::detail {
static_assert(
kMaxCapacity < (1u << 31),
"kMaxCapacity is unrealistically large");

static_assert(
DictPropertyMap::HashPair::canStore(kMaxCapacity),
"too few bits to store max possible descriptor index");
};

VTable DictPropertyMap::vt{CellKind::DictPropertyMapKind, 0};
Expand Down Expand Up @@ -153,19 +157,21 @@ std::pair<bool, DictPropertyMap::HashPair *> DictPropertyMap::lookupEntryFor(
for (;;) {
HashPair *curEntry = tableStart + index;

// Did we find it?
if (curEntry->first == symbolID)
return {true, curEntry};
if (curEntry->isValid()) {
if (self->isMatch(curEntry, symbolID))
return {true, curEntry};
} else if (curEntry->isEmpty()) {
// If we encountered an empty pair, the search is over - we failed.
// Return either this entry or a deleted one, if we encountered one.

// If we encountered an empty pair, the search is over - we failed.
// Return either this entry or a deleted one, if we encountered one.
if (curEntry->first == SymbolID::empty())
return {false, deleted ? deleted : curEntry};

// The first time we encounter a deleted entry, record it so we can
// potentially reuse it for insertion.
if (curEntry->first == SymbolID::deleted() && !deleted)
deleted = curEntry;
} else {
assert(curEntry->isDeleted() && "unexpected HashPair state");
// The first time we encounter a deleted entry, record it so we can
// potentially reuse it for insertion.
if (!deleted)
deleted = curEntry;
}

++NumExtraHashProbes;
index = (index + step) & mask;
Expand Down Expand Up @@ -202,8 +208,7 @@ ExecutionStatus DictPropertyMap::grow(

auto result = lookupEntryFor(newSelf, key);
assert(!result.first && "found duplicate entry while growing");
result.second->first = key;
result.second->second = count;
result.second->setDescIndex(count, key);

++dst;
++count;
Expand Down Expand Up @@ -252,7 +257,8 @@ DictPropertyMap::findOrAdd(
auto found = lookupEntryFor(self, id);
if (found.first) {
return std::make_pair(
&self->getDescriptorPairs()[found.second->second].second, false);
&self->getDescriptorPairs()[found.second->getDescIndex()].second,
false);
}

// We want to grow the hash table if the number of occupied hash entries
Expand Down Expand Up @@ -291,11 +297,10 @@ DictPropertyMap::findOrAdd(
}

++self->numProperties_;
if (found.second->first == SymbolID::deleted())
if (found.second->isDeleted())
self->decDeletedHashCount();

found.second->first = id;
found.second->second = self->numDescriptors_;
found.second->setDescIndex(self->numDescriptors_, id);

auto *descPair = self->getDescriptorPairs() + self->numDescriptors_;

Expand All @@ -307,17 +312,15 @@ DictPropertyMap::findOrAdd(

void DictPropertyMap::erase(DictPropertyMap *self, PropertyPos pos) {
auto *hashPair = self->getHashPairs() + pos.hashPairIndex;
assert(hashPair->first.isValid() && "erasing invalid property");

auto descIndex = hashPair->second;
auto descIndex = hashPair->getDescIndex();
assert(descIndex < self->numDescriptors_ && "descriptor index out of range");

auto *descPair = self->getDescriptorPairs() + descIndex;
assert(
descPair->first != SymbolID::empty() &&
"accessing deleted descriptor pair");

hashPair->first = SymbolID::deleted();
hashPair->setDeleted();
descPair->first = SymbolID::deleted();
// Add the descriptor to the deleted list.
setNextDeletedIndex(descPair, self->deletedListHead_);
Expand Down Expand Up @@ -357,7 +360,14 @@ void DictPropertyMap::dump() {
OS << " HashPairs[" << hashCapacity_ << "]:\n";
for (unsigned i = 0; i < hashCapacity_; ++i) {
auto *pair = getHashPairs() + i;
OS << " (" << pair->first << ", " << pair->second << ")\n";
if (pair->isValid()) {
OS << " " << pair->getDescIndex() << "\n";
} else if (pair->isEmpty()) {
OS << " (empty)\n";
} else {
assert(pair->isDeleted());
OS << " (deleted)\n";
}
}
OS << " Descriptors[" << descriptorCapacity_ << "]:\n";
for (unsigned i = 0; i < descriptorCapacity_; ++i) {
Expand Down

0 comments on commit 41a8072

Please sign in to comment.