Skip to content

Commit

Permalink
[vm/compiler] Extend Assembler::AddScaled to take a base register.
Browse files Browse the repository at this point in the history
Add an additional base argument to Assembler::AddScaled so it now
computes:
  dest <- base + (index << scale) + offset

If base is kNoRegister (or ZR when available), then it emits
instructions optimized for computing:
  dest <- (index << scale) + offset
(i.e., its previous implementation)

Add AddScaled to AssemblerBase to ensure the same interface across all
architectures.

Rework the backend of CalculateElementAddress to use AddScaled
appropriately, which unifies it across architectures.

TEST=ci (refactoring)

Cq-Include-Trybots: luci.dart.try:vm-aot-linux-debug-simarm_x64-try,vm-aot-linux-debug-simriscv64-try,vm-aot-linux-debug-x64-try,vm-aot-linux-debug-x64c-try,vm-linux-debug-ia32-try,vm-linux-debug-x64-try,vm-linux-debug-x64c-try,vm-linux-debug-simriscv64-try,vm-mac-debug-arm64-try,vm-aot-mac-release-arm64-try
Change-Id: I33c8f99604b68360f10b79050bd66ceb9d65ac9b
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/370504
Commit-Queue: Tess Strickland <[email protected]>
Reviewed-by: Alexander Markov <[email protected]>
  • Loading branch information
sstrickl authored and Commit Queue committed Jun 11, 2024
1 parent 9544fe9 commit 324ba0c
Show file tree
Hide file tree
Showing 14 changed files with 158 additions and 437 deletions.
3 changes: 1 addition & 2 deletions runtime/vm/compiler/assembler/assembler_arm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3539,9 +3539,8 @@ void Assembler::LoadAllocationTracingStateAddress(Register dest, Register cid) {
ldr(dest,
Address(dest,
target::ClassTable::allocation_tracing_state_table_offset()));
AddScaled(cid, cid, TIMES_1,
AddScaled(dest, dest, cid, TIMES_1,
target::ClassTable::AllocationTracingStateSlotOffsetFor(0));
AddRegisters(dest, cid);
}

void Assembler::LoadAllocationTracingStateAddress(Register dest, intptr_t cid) {
Expand Down
19 changes: 12 additions & 7 deletions runtime/vm/compiler/assembler/assembler_arm.h
Original file line number Diff line number Diff line change
Expand Up @@ -832,16 +832,21 @@ class Assembler : public AssemblerBase {
void AddRegisters(Register dest, Register src) {
add(dest, dest, Operand(src));
}
// [dest] = [src] << [scale] + [value].
void AddScaled(Register dest,
Register src,
Register base,
Register index,
ScaleFactor scale,
int32_t value) {
if (scale == 0) {
AddImmediate(dest, src, value);
int32_t disp) override {
if (base == kNoRegister) {
if (scale == TIMES_1) {
AddImmediate(dest, index, disp);
} else {
Lsl(dest, index, Operand(scale));
AddImmediate(dest, disp);
}
} else {
Lsl(dest, src, Operand(scale));
AddImmediate(dest, dest, value);
add(dest, base, compiler::Operand(index, LSL, scale));
AddImmediate(dest, disp);
}
}
void SubImmediate(Register rd,
Expand Down
19 changes: 12 additions & 7 deletions runtime/vm/compiler/assembler/assembler_arm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -1784,16 +1784,21 @@ class Assembler : public AssemblerBase {
void AddRegisters(Register dest, Register src) {
add(dest, dest, Operand(src));
}
// [dest] = [src] << [scale] + [value].
void AddScaled(Register dest,
Register src,
Register base,
Register index,
ScaleFactor scale,
int32_t value) {
if (scale == 0) {
AddImmediate(dest, src, value);
int32_t disp) override {
if (base == kNoRegister || base == ZR) {
if (scale == TIMES_1) {
AddImmediate(dest, index, disp);
} else {
orr(dest, ZR, Operand(index, LSL, scale));
AddImmediate(dest, disp);
}
} else {
orr(dest, ZR, Operand(src, LSL, scale));
AddImmediate(dest, dest, value);
add(dest, base, compiler::Operand(index, LSL, scale));
AddImmediate(dest, disp);
}
}
void SubImmediateSetFlags(Register dest,
Expand Down
11 changes: 11 additions & 0 deletions runtime/vm/compiler/assembler/assembler_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -1093,6 +1093,17 @@ class AssemblerBase : public StackResource {
/*Nullability*/ int8_t value,
Register scratch);

// [dst] = [base] + ([index] << [scale]) + [disp].
//
// Base can be kNoRegister (or ZR if available), in which case
// [dst] = [index] << [scale] + [disp]
// with a set of emitted instructions optimized for that case.
virtual void AddScaled(Register dst,
Register base,
Register index,
ScaleFactor scale,
int32_t disp) = 0;

virtual void LoadImmediate(Register dst, target::word imm) = 0;

virtual void CompareImmediate(Register reg,
Expand Down
13 changes: 8 additions & 5 deletions runtime/vm/compiler/assembler/assembler_ia32.h
Original file line number Diff line number Diff line change
Expand Up @@ -717,14 +717,17 @@ class Assembler : public AssemblerBase {
}
void AddImmediate(Register dest, Register src, int32_t value);
void AddRegisters(Register dest, Register src) { addl(dest, src); }
// [dest] = [src] << [scale] + [value].
void AddScaled(Register dest,
Register src,
Register base,
Register index,
ScaleFactor scale,
int32_t value) {
leal(dest, Address(src, scale, value));
int32_t disp) override {
if (base == kNoRegister) {
leal(dest, Address(index, scale, disp));
} else {
leal(dest, Address(base, index, scale, disp));
}
}

void SubImmediate(Register reg, const Immediate& imm);
void SubRegisters(Register dest, Register src) { subl(dest, src); }
void MulImmediate(Register reg,
Expand Down
19 changes: 12 additions & 7 deletions runtime/vm/compiler/assembler/assembler_riscv.h
Original file line number Diff line number Diff line change
Expand Up @@ -1039,16 +1039,21 @@ class Assembler : public MicroAssembler {
MulImmediate(dest, dest, imm, width);
}
void AddRegisters(Register dest, Register src) { add(dest, dest, src); }
// [dest] = [src] << [scale] + [value].
void AddScaled(Register dest,
Register src,
Register base,
Register index,
ScaleFactor scale,
int32_t value) {
if (scale == 0) {
AddImmediate(dest, src, value);
int32_t disp) override {
if (base == kNoRegister || base == ZR) {
if (scale == TIMES_1) {
AddImmediate(dest, index, disp);
} else {
slli(dest, index, scale);
AddImmediate(dest, disp);
}
} else {
slli(dest, src, scale);
AddImmediate(dest, dest, value);
AddShifted(dest, base, index, scale);
AddImmediate(dest, disp);
}
}
void AddShifted(Register dest, Register base, Register index, intx_t shift);
Expand Down
12 changes: 8 additions & 4 deletions runtime/vm/compiler/assembler/assembler_x64.h
Original file line number Diff line number Diff line change
Expand Up @@ -784,12 +784,16 @@ class Assembler : public AssemblerBase {
AddImmediate(reg, Immediate(value), width);
}
void AddRegisters(Register dest, Register src) { addq(dest, src); }
// [dest] = [src] << [scale] + [value].
void AddScaled(Register dest,
Register src,
Register base,
Register index,
ScaleFactor scale,
int32_t value) {
leaq(dest, Address(src, scale, value));
int32_t disp) override {
if (base == kNoRegister) {
leaq(dest, Address(index, scale, disp));
} else {
leaq(dest, Address(base, index, scale, disp));
}
}
void AddImmediate(Register dest, Register src, int64_t value);
void AddImmediate(const Address& address, const Immediate& imm);
Expand Down
92 changes: 92 additions & 0 deletions runtime/vm/compiler/backend/il.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7802,6 +7802,98 @@ void StoreFieldInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
}
}

LocationSummary* CalculateElementAddressInstr::MakeLocationSummary(
Zone* zone,
bool opt) const {
const intptr_t kNumInputs = 3;
const intptr_t kNumTemps = 0;
auto* const summary = new (zone)
LocationSummary(zone, kNumInputs, kNumTemps, LocationSummary::kNoCall);

summary->set_in(kBasePos, Location::RequiresRegister());
// Only use a Smi constant for the index if multiplying it by the index
// scale would be an int32 constant.
const intptr_t scale_shift = Utils::ShiftForPowerOfTwo(index_scale());
summary->set_in(kIndexPos, LocationRegisterOrSmiConstant(
index(), kMinInt32 >> scale_shift,
kMaxInt32 >> scale_shift));
// Only use a Smi constant for the offset if it is an int32 constant.
summary->set_in(kOffsetPos, LocationRegisterOrSmiConstant(offset(), kMinInt32,
kMaxInt32));
// Special case for when both inputs are appropriate constants.
if (summary->in(kIndexPos).IsConstant() &&
summary->in(kOffsetPos).IsConstant()) {
const int64_t offset_in_bytes = Utils::AddWithWrapAround<int64_t>(
Utils::MulWithWrapAround<int64_t>(index()->BoundSmiConstant(),
index_scale()),
offset()->BoundSmiConstant());
if (!Utils::IsInt(32, offset_in_bytes)) {
// The offset in bytes calculated from the index and offset cannot
// fit in a 32-bit immediate, so pass the index as a register instead.
summary->set_in(kIndexPos, Location::RequiresRegister());
}
}

// Currently this instruction can only be used in optimized mode as it takes
// and puts untagged values on the stack, and the canonicalization pass should
// always remove no-op uses of this instruction. Flag this for handling if
// this ever changes.
ASSERT(opt && !IsNoop());
summary->set_out(0, Location::RequiresRegister());

return summary;
}

void CalculateElementAddressInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
const Register base_reg = locs()->in(kBasePos).reg();
const Location& index_loc = locs()->in(kIndexPos);
const Location& offset_loc = locs()->in(kOffsetPos);
const Register result_reg = locs()->out(0).reg();

ASSERT(!IsNoop());

if (index_loc.IsConstant()) {
const int64_t index = Smi::Cast(index_loc.constant()).Value();
ASSERT(Utils::IsInt(32, index));
const int64_t scaled_index = index * index_scale();
ASSERT(Utils::IsInt(32, scaled_index));
if (offset_loc.IsConstant()) {
const int64_t disp =
scaled_index + Smi::Cast(offset_loc.constant()).Value();
ASSERT(Utils::IsInt(32, disp));
__ AddScaled(result_reg, kNoRegister, base_reg, TIMES_1, disp);
} else {
__ AddScaled(result_reg, base_reg, offset_loc.reg(), TIMES_1,
scaled_index);
}
} else {
Register index_reg = index_loc.reg();
ASSERT(RepresentationUtils::IsUnboxedInteger(
RequiredInputRepresentation(kIndexPos)));
auto scale = ToScaleFactor(index_scale(), /*index_unboxed=*/true);
#if defined(TARGET_ARCH_X64) || defined(TARGET_ARCH_IA32)
if (scale == TIMES_16) {
COMPILE_ASSERT(kSmiTagShift == 1);
// A ScaleFactor of TIMES_16 is invalid for x86, so box the index as a Smi
// (using the result register to store it to avoid allocating a writable
// register for the index) to reduce the ScaleFactor to TIMES_8.
__ MoveAndSmiTagRegister(result_reg, index_reg);
index_reg = result_reg;
scale = TIMES_8;
}
#endif
if (offset_loc.IsConstant()) {
const intptr_t disp = Smi::Cast(offset_loc.constant()).Value();
ASSERT(Utils::IsInt(32, disp));
__ AddScaled(result_reg, base_reg, index_reg, scale, disp);
} else {
// No architecture can do this case in a single instruction.
__ AddScaled(result_reg, base_reg, index_reg, scale, /*disp=*/0);
__ AddRegisters(result_reg, offset_loc.reg());
}
}
}

const Code& DartReturnInstr::GetReturnStub(FlowGraphCompiler* compiler) const {
const Function& function = compiler->parsed_function().function();
ASSERT(function.IsSuspendableFunction());
Expand Down
76 changes: 0 additions & 76 deletions runtime/vm/compiler/backend/il_arm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -449,82 +449,6 @@ void MemoryCopyInstr::EmitComputeStartPointer(FlowGraphCompiler* compiler,
__ AddImmediate(payload_reg, offset);
}

LocationSummary* CalculateElementAddressInstr::MakeLocationSummary(
Zone* zone,
bool opt) const {
const intptr_t kNumInputs = 3;
const intptr_t kNumTemps = 0;
auto* const summary = new (zone)
LocationSummary(zone, kNumInputs, kNumTemps, LocationSummary::kNoCall);

summary->set_in(kBasePos, Location::RequiresRegister());
// Only use a Smi constant for the index if multiplying it by the index
// scale would be an int32 constant.
const intptr_t scale_shift = Utils::ShiftForPowerOfTwo(index_scale());
summary->set_in(kIndexPos, LocationRegisterOrSmiConstant(
index(), kMinInt32 >> scale_shift,
kMaxInt32 >> scale_shift));
summary->set_in(kOffsetPos, LocationRegisterOrSmiConstant(offset()));
// Special case for when both inputs are appropriate constants.
if (summary->in(kIndexPos).IsConstant() &&
summary->in(kOffsetPos).IsConstant()) {
const int64_t offset_in_bytes = Utils::AddWithWrapAround<int64_t>(
Utils::MulWithWrapAround<int64_t>(index()->BoundSmiConstant(),
index_scale()),
offset()->BoundSmiConstant());
if (!Utils::IsInt(32, offset_in_bytes)) {
// The offset in bytes calculated from the index and offset cannot
// fit in a 32-bit immediate, so pass the index as a register instead.
summary->set_in(kIndexPos, Location::RequiresRegister());
}
}

if (IsNoop()) {
summary->set_out(0, Location::SameAsFirstInput());
} else {
summary->set_out(0, Location::RequiresRegister());
}

return summary;
}

void CalculateElementAddressInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
const Register base_reg = locs()->in(kBasePos).reg();
const Location& index_loc = locs()->in(kIndexPos);
const Location& offset_loc = locs()->in(kOffsetPos);
const Register result_reg = locs()->out(0).reg();

if (IsNoop()) {
ASSERT_EQUAL(base_reg, result_reg);
return;
}

if (index_loc.IsConstant()) {
const intptr_t scaled_index =
Smi::Cast(index_loc.constant()).Value() * index_scale();
if (offset_loc.IsConstant()) {
const intptr_t offset_in_bytes =
scaled_index + Smi::Cast(offset_loc.constant()).Value();
__ AddImmediate(result_reg, base_reg, offset_in_bytes);
} else {
__ add(result_reg, base_reg, compiler::Operand(offset_loc.reg()));
// Don't need wrap-around as the index is constant only if multiplying
// it by the scale is an int32.
__ AddImmediate(result_reg, scaled_index);
}
} else {
__ add(result_reg, base_reg,
compiler::Operand(index_loc.reg(), LSL,
Utils::ShiftForPowerOfTwo(index_scale())));
if (offset_loc.IsConstant()) {
const int32_t offset_value = Smi::Cast(offset_loc.constant()).Value();
__ AddImmediate(result_reg, offset_value);
} else {
__ AddRegisters(result_reg, offset_loc.reg());
}
}
}

LocationSummary* MoveArgumentInstr::MakeLocationSummary(Zone* zone,
bool opt) const {
const intptr_t kNumInputs = 1;
Expand Down
Loading

0 comments on commit 324ba0c

Please sign in to comment.