Skip to content

Commit fa83375

Browse files
authored
Optimize bulk memory.copy (#3038)
Replace it with a load and a store when the size is a small constant and remove it entirely when it would be a nop.
1 parent 7d1c174 commit fa83375

File tree

3 files changed

+208
-0
lines changed

3 files changed

+208
-0
lines changed

src/passes/OptimizeInstructions.cpp

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -710,6 +710,11 @@ struct OptimizeInstructions
710710
store->value = unary->value;
711711
}
712712
}
713+
} else if (auto* memCopy = curr->dynCast<MemoryCopy>()) {
714+
assert(features.hasBulkMemory());
715+
if (auto* ret = optimizeMemoryCopy(memCopy)) {
716+
return ret;
717+
}
713718
}
714719
return nullptr;
715720
}
@@ -1393,6 +1398,69 @@ struct OptimizeInstructions
13931398
return binary;
13941399
}
13951400

1401+
Expression* optimizeMemoryCopy(MemoryCopy* memCopy) {
1402+
FeatureSet features = getModule()->features;
1403+
1404+
// memory.copy(x, x, sz) ==> nop
1405+
if (!EffectAnalyzer(getPassOptions(), features, memCopy->dest)
1406+
.hasSideEffects() &&
1407+
ExpressionAnalyzer::equal(memCopy->dest, memCopy->source)) {
1408+
return ExpressionManipulator::nop(memCopy);
1409+
}
1410+
// memory.copy(dst, src, C) ==> store(dst, load(src))
1411+
if (auto* csize = memCopy->size->dynCast<Const>()) {
1412+
auto bytes = csize->value.geti32();
1413+
Builder builder(*getModule());
1414+
1415+
switch (bytes) {
1416+
case 0: {
1417+
return builder.makeBlock({builder.makeDrop(memCopy->dest),
1418+
builder.makeDrop(memCopy->source)});
1419+
break;
1420+
}
1421+
case 1:
1422+
case 2:
1423+
case 4: {
1424+
return builder.makeStore(
1425+
bytes, // bytes
1426+
0, // offset
1427+
1, // align
1428+
memCopy->dest,
1429+
builder.makeLoad(bytes, false, 0, 1, memCopy->source, Type::i32),
1430+
Type::i32);
1431+
}
1432+
case 8: {
1433+
return builder.makeStore(
1434+
bytes, // bytes
1435+
0, // offset
1436+
1, // align
1437+
memCopy->dest,
1438+
builder.makeLoad(bytes, false, 0, 1, memCopy->source, Type::i64),
1439+
Type::i64);
1440+
}
1441+
case 16: {
1442+
if (getPassOptions().shrinkLevel == 0) {
1443+
// This adds an extra 2 bytes so apply it only for
1444+
// minimal shrink level
1445+
if (features.hasSIMD()) {
1446+
return builder.makeStore(
1447+
bytes, // bytes
1448+
0, // offset
1449+
1, // align
1450+
memCopy->dest,
1451+
builder.makeLoad(
1452+
bytes, false, 0, 1, memCopy->source, Type::v128),
1453+
Type::v128);
1454+
}
1455+
}
1456+
}
1457+
default: {
1458+
}
1459+
}
1460+
}
1461+
return nullptr;
1462+
}
1463+
13961464
// given a binary expression with equal children and no side effects in
13971465
// either, we can fold various things
13981466
// TODO: trinaries, things like (x & (y & x)) ?

test/passes/optimize-instructions_all-features.txt

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
(type $i64_=>_i64 (func (param i64) (result i64)))
1111
(type $i32_i64_f32_=>_none (func (param i32 i64 f32)))
1212
(type $i32_i64_f32_f64_=>_none (func (param i32 i64 f32 f64)))
13+
(type $i32_i32_i32_=>_none (func (param i32 i32 i32)))
1314
(type $i32_i32_f64_f64_=>_none (func (param i32 i32 f64 f64)))
1415
(type $i32_i64_f64_i32_=>_none (func (param i32 i64 f64 i32)))
1516
(type $none_=>_f64 (func (result f64)))
@@ -3727,6 +3728,72 @@
37273728
)
37283729
)
37293730
)
3731+
(func $optimize-bulk-memory-copy (param $dst i32) (param $src i32) (param $sz i32)
3732+
(nop)
3733+
(block
3734+
(drop
3735+
(local.get $dst)
3736+
)
3737+
(drop
3738+
(local.get $src)
3739+
)
3740+
)
3741+
(i32.store8
3742+
(local.get $dst)
3743+
(i32.load8_u
3744+
(local.get $src)
3745+
)
3746+
)
3747+
(i32.store16 align=1
3748+
(local.get $dst)
3749+
(i32.load16_u align=1
3750+
(local.get $src)
3751+
)
3752+
)
3753+
(memory.copy
3754+
(local.get $dst)
3755+
(local.get $src)
3756+
(i32.const 3)
3757+
)
3758+
(i32.store align=1
3759+
(local.get $dst)
3760+
(i32.load align=1
3761+
(local.get $src)
3762+
)
3763+
)
3764+
(memory.copy
3765+
(local.get $dst)
3766+
(local.get $src)
3767+
(i32.const 5)
3768+
)
3769+
(memory.copy
3770+
(local.get $dst)
3771+
(local.get $src)
3772+
(i32.const 6)
3773+
)
3774+
(memory.copy
3775+
(local.get $dst)
3776+
(local.get $src)
3777+
(i32.const 7)
3778+
)
3779+
(i64.store align=1
3780+
(local.get $dst)
3781+
(i64.load align=1
3782+
(local.get $src)
3783+
)
3784+
)
3785+
(v128.store align=1
3786+
(local.get $dst)
3787+
(v128.load align=1
3788+
(local.get $src)
3789+
)
3790+
)
3791+
(memory.copy
3792+
(local.get $dst)
3793+
(local.get $src)
3794+
(local.get $sz)
3795+
)
3796+
)
37303797
)
37313798
(module
37323799
(type $none_=>_none (func))

test/passes/optimize-instructions_all-features.wast

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4233,6 +4233,79 @@
42334233
)
42344234
))
42354235
)
4236+
(func $optimize-bulk-memory-copy (param $dst i32) (param $src i32) (param $sz i32)
4237+
(memory.copy ;; nop
4238+
(local.get $dst)
4239+
(local.get $dst)
4240+
(local.get $sz)
4241+
)
4242+
4243+
(memory.copy ;; nop
4244+
(local.get $dst)
4245+
(local.get $src)
4246+
(i32.const 0)
4247+
)
4248+
4249+
(memory.copy
4250+
(local.get $dst)
4251+
(local.get $src)
4252+
(i32.const 1)
4253+
)
4254+
4255+
(memory.copy
4256+
(local.get $dst)
4257+
(local.get $src)
4258+
(i32.const 2)
4259+
)
4260+
4261+
(memory.copy
4262+
(local.get $dst)
4263+
(local.get $src)
4264+
(i32.const 3)
4265+
)
4266+
4267+
(memory.copy
4268+
(local.get $dst)
4269+
(local.get $src)
4270+
(i32.const 4)
4271+
)
4272+
4273+
(memory.copy
4274+
(local.get $dst)
4275+
(local.get $src)
4276+
(i32.const 5)
4277+
)
4278+
4279+
(memory.copy
4280+
(local.get $dst)
4281+
(local.get $src)
4282+
(i32.const 6)
4283+
)
4284+
4285+
(memory.copy
4286+
(local.get $dst)
4287+
(local.get $src)
4288+
(i32.const 7)
4289+
)
4290+
4291+
(memory.copy
4292+
(local.get $dst)
4293+
(local.get $src)
4294+
(i32.const 8)
4295+
)
4296+
4297+
(memory.copy
4298+
(local.get $dst)
4299+
(local.get $src)
4300+
(i32.const 16)
4301+
)
4302+
4303+
(memory.copy ;; skip
4304+
(local.get $dst)
4305+
(local.get $src)
4306+
(local.get $sz)
4307+
)
4308+
)
42364309
)
42374310
(module
42384311
(import "env" "memory" (memory $0 (shared 256 256)))

0 commit comments

Comments
 (0)