Skip to content

Commit

Permalink
Add a rudimentary implementation to CacheNewObject
Browse files Browse the repository at this point in the history
Summary:
Add a first-attempt, fairly conservative set of rules to the
CacheNewObject optimization pass.

If a function's first uses of `this` are as the target objects
in property stores using literal keys function,
and they occur in a sequence in which earlier users dominate later ones,
and there's no `try` surrounding them, we can add
the CacheNewObject instruction. Otherwise, we bail.

The optimization only applies if there are MIN_PROPERTIES_FOR_CACHE
writes to literal `this` properties in a row.

Reviewed By: neildhar

Differential Revision: D40532426

fbshipit-source-id: 5c1ad5748f71528ecedcab3ea621c0d9daf5be62
  • Loading branch information
avp authored and facebook-github-bot committed Dec 4, 2024
1 parent 7bee3d9 commit 3149ba9
Show file tree
Hide file tree
Showing 5 changed files with 495 additions and 1 deletion.
197 changes: 196 additions & 1 deletion lib/Optimizer/Scalar/CacheNewObject.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,207 @@
* LICENSE file in the root directory of this source tree.
*/

//===----------------------------------------------------------------------===//
/// \file
///
/// The CacheNewObject optimization adds a CacheNewObjectInst at the start
/// of the function if it detects that the function has an "initialization
/// block", i.e. a sequence of writes to the `this` parameter of fixed
/// properties.
/// For example:
/// this.x = x;
/// this.y = y;
/// // etc.
/// constitutes an initialization block and we want to be able to cache the
/// hidden class and just fetch it so we don't have to rebuild it each time
/// this function is called to make a new object.
///
/// The instruction checks new.target to ensure that the function is being
/// called as a constructor, ensuring `this` is a new object.
///
/// This optimization has a few requirements:
/// 1. Property initializations must be ordered so that every user of 'this'
/// dominates subsequent users.
/// 2. The only users of "this" in the chain must be StorePropertyInsts
/// which store to it.
/// 3. Property writes to "this" must use literal keys (not use computed keys).
///
/// As a result, the optimization is fairly conservative but should account
/// for many of the most common construction patterns.
///
/// TODO: There are ways to make this optimization more complex:
/// * Use a more involved heuristic for determining if a function is likely
/// to be invoked as a constructor.
/// * When we have classes, we know which functions are constructors.

#define DEBUG_TYPE "cachenewobject"

#include "hermes/IR/CFG.h"
#include "hermes/IR/IRBuilder.h"
#include "hermes/IR/IRUtils.h"
#include "hermes/IR/Instrs.h"
#include "hermes/Optimizer/PassManager/Pass.h"

namespace hermes {
namespace {

/// The minimum number of cachable property names to actually insert the cache
/// instruction.
constexpr size_t kMinPropertiesForCache = 2;

/// \param thisParam the instruction loading the 'this' param in the function.
/// \return vector of unique property names which can be cached, in the order
/// in which they must be cached.
static std::vector<LiteralString *> getPropsForCaching(
const DominanceInfo &domInfo,
Function *func,
Instruction *thisParam) {
BasicBlock *entryBlock = &func->front();

// Put the users of 'this' into a set for fast checking of which instructions
// we care about.
llvh::DenseSet<Instruction *> thisUsers{};
llvh::DenseSet<BasicBlock *> thisUsersBlocks{};
for (Instruction *user : thisParam->getUsers()) {
thisUsers.insert(user);
thisUsersBlocks.insert(user->getParent());
}

auto orderedBlocks = orderBlocksByDominance(
domInfo, entryBlock, [&thisUsersBlocks](BasicBlock *block) -> bool {
// Must dominate all returns as well, because the caller can read the
// value of 'this' after the constructor returns.
return thisUsersBlocks.count(block) != 0 ||
llvh::isa<ReturnInst>(block->getTerminator());
});

// Result vector of property keys to cache.
std::vector<LiteralString *> props{};

// Keep track of keys that we've seen, so we only add unique keys.
llvh::DenseSet<LiteralString *> seenProps{};

// Go through instructions in order to find the StorePropertyInsts we
// care about.
for (BasicBlock *block : orderedBlocks) {
for (Instruction &inst : *block) {
// From here on we only care about instructions that use 'this'.
if (!thisUsers.count(&inst))
continue;

StorePropertyInst *store = llvh::dyn_cast<StorePropertyInst>(&inst);

// 'this' used outside of a StorePropertyInst, bail.
if (!store)
return props;

auto *prop = llvh::dyn_cast<LiteralString>(store->getProperty());

// Property name is not a literal string, bail.
if (!prop)
return props;

// Check if "this" is being used in a non-Object operand position.
for (uint32_t i = 0, e = store->getNumOperands(); i < e; ++i) {
if (i != StorePropertyInst::ObjectIdx &&
store->getOperand(i) == thisParam) {
return props;
}
}

// Valid store for caching, append to the list if it's new.
if (!seenProps.count(prop)) {
props.push_back(prop);
seenProps.insert(prop);
}
}
}

return props;
}

/// Insert the CacheNewObject instruction into \p func after \p thisParam.
/// \param keys the literal names of the keys of the this object.
/// \param thisParam the instruction loading the 'this' param in the function.
static void insertCacheInstruction(
Function *func,
llvh::ArrayRef<LiteralString *> keys,
Instruction *thisParam) {
IRBuilder builder{func};

builder.setInsertionPointAfter(thisParam);
GetNewTargetInst *newTargetInst =
builder.createGetNewTargetInst(func->getNewTargetParam());
builder.createCacheNewObjectInst(thisParam, newTargetInst, keys);
}

/// Attempt to cache the new object in \p F.
/// \return true if the function was modified.
static bool cacheNewObjectInFunction(Function *func) {
LLVM_DEBUG(
llvh::dbgs() << "Attempting to cache new object in function: '"
<< func->getInternalNameStr() << "'\n");

if (func->getDefinitionKind() != Function::DefinitionKind::ES5Function) {
// Bail if the function is not a normal function.
// TODO: Apply this optimization to ES6 constructors once they're
// implemented.
return false;
}

JSDynamicParam *thisDynParam = func->getJSDynamicParam(0);
if (!thisDynParam->hasOneUser()) {
// Bail if there's no users or if there's more than one LoadParam.
return false;
}

Instruction *thisParam =
llvh::dyn_cast<LoadParamInst>(thisDynParam->getUsers().front());

if (!thisParam || !thisParam->hasUsers()) {
// No usage of 'this', don't cache anything.
return false;
}

// In loose functions, 'this' can also be coerced into an object,
// so check for that and update to the 'this' that's actually used.
// If the function is invoked as a constructor,
// 'this' is already an object, CoerceThisNS is effectively a Mov,
// and it actually is the same object.
for (Instruction *user : thisParam->getUsers()) {
if (auto *coerce = llvh::dyn_cast<CoerceThisNSInst>(user)) {
thisParam = coerce;
break;
}
}

DominanceInfo domInfo{func};
std::vector<LiteralString *> keys =
getPropsForCaching(domInfo, func, thisParam);

// Not enough stores to cache.
if (keys.size() < kMinPropertiesForCache) {
LLVM_DEBUG(
llvh::dbgs() << llvh::format(
"Not caching new object, needs at least %u keys, found %u\n",
kMinPropertiesForCache,
keys.size()));
return false;
}

LLVM_DEBUG(llvh::dbgs() << llvh::format("Caching %u keys\n", keys.size()));

static_assert(
kMinPropertiesForCache > 0,
"CacheNewObjectInst requires at least one key");

// Actually insert the CacheNewObject instruction.
insertCacheInstruction(func, keys, thisParam);

return true;
}

} // namespace

Pass *createCacheNewObject() {
/// Inserts the CacheNewObjectInst if possible, to reduce the time spent
Expand All @@ -21,7 +216,7 @@ Pass *createCacheNewObject() {
~ThisPass() override = default;

bool runOnFunction(Function *F) override {
return false;
return cacheNewObjectInFunction(F);
}
};

Expand Down
80 changes: 80 additions & 0 deletions test/BCGen/HBC/cache-new-object.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/**
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/

// RUN: %hermesc -O -dump-bytecode %s | %FileCheckOrRegen --match-full-lines %s

function simple(x, y) {
// Check that CacheNewObject is used and refers to the shape table entry.
this.x = x;
this.y = y;
}

// Auto-generated content below. Please do not modify manually.

// CHECK:Bytecode File Information:
// CHECK-NEXT: Bytecode version number: {{.*}}
// CHECK-NEXT: Source hash: {{.*}}
// CHECK-NEXT: Function count: 2
// CHECK-NEXT: String count: 4
// CHECK-NEXT: BigInt count: 0
// CHECK-NEXT: String Kind Entry count: 2
// CHECK-NEXT: RegExp count: 0
// CHECK-NEXT: Segment ID: 0
// CHECK-NEXT: CommonJS module count: 0
// CHECK-NEXT: CommonJS module count (static): 0
// CHECK-NEXT: Function source count: 0
// CHECK-NEXT: Bytecode options:
// CHECK-NEXT: staticBuiltins: 0
// CHECK-NEXT: cjsModulesStaticallyResolved: 0

// CHECK:Global String Table:
// CHECK-NEXT:s0[ASCII, 0..5]: global
// CHECK-NEXT:i1[ASCII, 6..11] #147A1A16: simple
// CHECK-NEXT:i2[ASCII, 12..12] #0001E7F9: x
// CHECK-NEXT:i3[ASCII, 13..13] #0001E3E8: y

// CHECK:Object Key Buffer:
// CHECK-NEXT:[String 2]
// CHECK-NEXT:[String 3]
// CHECK-NEXT:Object Shape Table:
// CHECK-NEXT:0[0, 2]
// CHECK-NEXT:Function<global>(1 params, 3 registers, 0 numbers, 1 non-pointers):
// CHECK-NEXT:Offset in debug table: source 0x0000, lexical 0x0000
// CHECK-NEXT: DeclareGlobalVar "simple"
// CHECK-NEXT: CreateTopLevelEnvironment r1, 0
// CHECK-NEXT: CreateClosure r2, r1, Function<simple>
// CHECK-NEXT: GetGlobalObject r1
// CHECK-NEXT: PutByIdLoose r1, r2, 1, "simple"
// CHECK-NEXT: LoadConstUndefined r0
// CHECK-NEXT: Ret r0

// CHECK:Function<simple>(3 params, 3 registers, 0 numbers, 1 non-pointers):
// CHECK-NEXT:Offset in debug table: source 0x000a, lexical 0x0000
// CHECK-NEXT: LoadThisNS r2
// CHECK-NEXT: GetNewTarget r1
// CHECK-NEXT: CacheNewObject r2, 0
// CHECK-NEXT: LoadParam r1, 1
// CHECK-NEXT: PutByIdLoose r2, r1, 1, "x"
// CHECK-NEXT: LoadParam r1, 2
// CHECK-NEXT: PutByIdLoose r2, r1, 2, "y"
// CHECK-NEXT: LoadConstUndefined r0
// CHECK-NEXT: Ret r0

// CHECK:Debug filename table:
// CHECK-NEXT: 0: {{.*}}cache-new-object.js

// CHECK:Debug file table:
// CHECK-NEXT: source table offset 0x0000: filename id 0

// CHECK:Debug source table:
// CHECK-NEXT: 0x0000 function idx 0, starts at line 10 col 1
// CHECK-NEXT: bc 0: line 10 col 1
// CHECK-NEXT: bc 18: line 10 col 1
// CHECK-NEXT: 0x000a function idx 1, starts at line 10 col 1
// CHECK-NEXT: bc 13: line 12 col 10
// CHECK-NEXT: bc 22: line 13 col 10
// CHECK-NEXT: 0x0014 end of debug source table
56 changes: 56 additions & 0 deletions test/Optimizer/cache-new-object-analysis.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/**
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/

// RUN: %hermesc -fno-inline -dump-ir %s -O | %FileCheckOrRegen --match-full-lines %s

function main() {

// Make sure allCallsitesKnownInStrictMode is true.
function simple(x, y) {
this.x = x;
this.y = y;
}

return new simple(1, 2);

}

// Auto-generated content below. Please do not modify manually.

// CHECK:scope %VS0 []

// CHECK:function global(): undefined
// CHECK-NEXT:%BB0:
// CHECK-NEXT: %0 = CreateScopeInst (:environment) %VS0: any, empty: any
// CHECK-NEXT: DeclareGlobalVarInst "main": string
// CHECK-NEXT: %2 = CreateFunctionInst (:object) %0: environment, %main(): functionCode
// CHECK-NEXT: StorePropertyLooseInst %2: object, globalObject: object, "main": string
// CHECK-NEXT: ReturnInst undefined: undefined
// CHECK-NEXT:function_end

// CHECK:function main(): object
// CHECK-NEXT:%BB0:
// CHECK-NEXT: %0 = GetParentScopeInst (:environment) %VS0: any, %parentScope: environment
// CHECK-NEXT: %1 = CreateFunctionInst (:object) %0: environment, %simple(): functionCode
// CHECK-NEXT: %2 = CreateThisInst (:undefined|object) %1: object, empty: any
// CHECK-NEXT: %3 = CallInst (:undefined) %1: object, %simple(): functionCode, true: boolean, empty: any, undefined: undefined, %2: undefined|object, 1: number, 2: number
// CHECK-NEXT: %4 = GetConstructedObjectInst (:object) %2: undefined|object, undefined: undefined
// CHECK-NEXT: ReturnInst %4: object
// CHECK-NEXT:function_end

// CHECK:function simple(x: any, y: any): undefined [allCallsitesKnownInStrictMode]
// CHECK-NEXT:%BB0:
// CHECK-NEXT: %0 = LoadParamInst (:any) %<this>: any
// CHECK-NEXT: %1 = CoerceThisNSInst (:object) %0: any
// CHECK-NEXT: %2 = GetNewTargetInst (:undefined|object) %new.target: undefined|object
// CHECK-NEXT: CacheNewObjectInst %1: object, %2: undefined|object, "x": string, "y": string
// CHECK-NEXT: %4 = LoadParamInst (:any) %x: any
// CHECK-NEXT: %5 = LoadParamInst (:any) %y: any
// CHECK-NEXT: StorePropertyLooseInst %4: any, %1: object, "x": string
// CHECK-NEXT: StorePropertyLooseInst %5: any, %1: object, "y": string
// CHECK-NEXT: ReturnInst undefined: undefined
// CHECK-NEXT:function_end
40 changes: 40 additions & 0 deletions test/Optimizer/cache-new-object-bail.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/**
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/

// RUN: %hermesc -dump-ir %s -O | %FileCheck --match-full-lines %s

// A variety of different functions which don't trigger CacheNewObjectInst
// to be emitted.

function cond(x, y) {
if (y)
this.y = y;
this.x = x;
}

function assignProp(x, y) {
x[this] = y;
}

function assignNotLit(x, y) {
this[x] = x;
}

function usesThis(x, y) {
this.x = x;
this.y = this.z;
}

function noUses(x, y) {
print(x);
}

function callArg(x, y) {
print(this);
}

// CHECK-NOT: %{{.*}} CacheNewObjectInst {{.*}}
Loading

0 comments on commit 3149ba9

Please sign in to comment.