Skip to content

Commit 738c2e9

Browse files
authored
[VM][Adreno] Fix using buffers for weights in VM (#15671)
* [VM][Adreno] Fix using buffers for weights in VM In VM `fn->attrs` doesn't contain information about `kernel_layout`. So we can get this value from `expr_attrib`. In this PR function `CanUseBuffers` was modified to work with VM. A new test which checks memory scope for VM was added. * Fix ci
1 parent 5d3f3dd commit 738c2e9

File tree

3 files changed

+76
-27
lines changed

3 files changed

+76
-27
lines changed

src/relay/transforms/annotate_texture_storage.cc

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -174,8 +174,11 @@ class StorageInfo : private transform::DeviceAwareExprVisitor {
174174
for (const auto& ttype : FlattenTupleType(fn->params[i]->checked_type())) {
175175
std::string scope = Scope(ttype->shape, GetVirtualDevice(GetRef<Expr>(call)));
176176
if (expr_attrib.as<Conv2DAttrs>() || expr_attrib.as<Conv2DWinogradAttrs>()) {
177+
String kernel_layout = expr_attrib.as<Conv2DAttrs>()
178+
? expr_attrib.as<Conv2DAttrs>()->kernel_layout
179+
: expr_attrib.as<Conv2DWinogradAttrs>()->kernel_layout;
177180
if ((i == weights_pos) && !ttype->dtype.is_float16() &&
178-
CanUseBuffers(call->args[i], ttype->shape, fn->attrs)) {
181+
CanUseBuffers(call->args[i], ttype->shape, kernel_layout)) {
179182
buffers_params.insert(fn->params[i]);
180183
buffers_args.insert(call->args[i]);
181184
scope = "global";
@@ -426,10 +429,9 @@ class StorageInfo : private transform::DeviceAwareExprVisitor {
426429
}
427430

428431
bool CanUseBuffers(const Expr param, const Array<PrimExpr> shape,
429-
const tvm::DictAttrs param_attrs) const {
432+
const String kernel_layout) const {
430433
bool use_buffer = false;
431434
if (param.as<ConstantNode>() && shape.size() == 5) {
432-
auto kernel_layout = param_attrs.GetAttr<String>("kernel_layout");
433435
if (kernel_layout == "HWOI4o" || kernel_layout == "HWIO4o") {
434436
int a0 = shape[0].as<IntImmNode>()->value;
435437
int a1 = shape[1].as<IntImmNode>()->value;

tests/python/relay/opencl_texture/test_conv2d_nchw_texture.py

Lines changed: 66 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -692,7 +692,6 @@ def test_residual_block(remote, target, executor_type, dtype):
692692
{"data": input_shape},
693693
{"data": dtype},
694694
target,
695-
static_memory_scope,
696695
)
697696

698697

@@ -790,11 +789,12 @@ def test_concat(remote, target, executor_type, dtype):
790789

791790
static_memory_scope = [
792791
"",
792+
"global.texture",
793793
"global",
794794
"global.texture-weight",
795-
"global.texture-weight",
796795
"global",
797-
"global.texture-weight",
796+
"global.texture-nhwc",
797+
"global",
798798
"global.texture-weight",
799799
"",
800800
"",
@@ -803,8 +803,6 @@ def test_concat(remote, target, executor_type, dtype):
803803
"",
804804
]
805805

806-
static_memory_scope = []
807-
808806
if executor_type == "ge":
809807
build_run_compare(
810808
remote,
@@ -823,7 +821,6 @@ def test_concat(remote, target, executor_type, dtype):
823821
{"data": input_shape},
824822
{"data": dtype},
825823
target,
826-
static_memory_scope,
827824
)
828825

829826

@@ -968,7 +965,6 @@ def test_pooling_branching_texture_params(remote, target, executor_type, dtype):
968965
{"data": input_shape},
969966
{"data": dtype},
970967
target,
971-
static_memory_scope,
972968
)
973969

974970

@@ -1111,7 +1107,6 @@ def test_branching_texture_params(remote, target, executor_type, dtype):
11111107
{"data": input_shape},
11121108
{"data": dtype},
11131109
target,
1114-
static_memory_scope,
11151110
)
11161111

11171112

@@ -1212,7 +1207,6 @@ def test_conv2d_different_lowering_same_op(remote, target, executor_type, dtype)
12121207
{"data": input_shape},
12131208
{"data": dtype},
12141209
target,
1215-
static_memory_scope,
12161210
)
12171211

12181212

@@ -1380,7 +1374,6 @@ def test_injective_nwo_inputs1(remote, target, executor_type, dtype):
13801374
{"data": input_shape},
13811375
{"data": dtype},
13821376
target,
1383-
static_memory_scope,
13841377
)
13851378

13861379

@@ -1495,7 +1488,6 @@ def test_injective_nwo_inputs2(remote, target, executor_type, dtype):
14951488
{"data": input_shape},
14961489
{"data": dtype},
14971490
target,
1498-
static_memory_scope,
14991491
)
15001492

15011493

@@ -1534,5 +1526,68 @@ def test_conv2d_to_3_channels(remote, target, executor_type, dtype):
15341526
)
15351527

15361528

1529+
@tvm.testing.requires_opencl
1530+
@tvm.testing.parametrize_targets("opencl -device=adreno")
1531+
def test_conv2d_weight_on_buffers(remote, target, executor_type, dtype):
1532+
target = "opencl -device=adreno"
1533+
input_shape = (1, 64, 75, 75)
1534+
filter_shape = (64, 64, 3, 3)
1535+
bias_shape = (64,)
1536+
A = relay.var("data", shape=input_shape, dtype=dtype)
1537+
W = relay.var("weight", shape=filter_shape, dtype=dtype)
1538+
BS = relay.var("bias", shape=bias_shape, dtype=dtype)
1539+
conv = relay.nn.conv2d(A, W, padding=[1, 1, 1, 1], channels=64, kernel_size=(3, 3))
1540+
conv = relay.nn.bias_add(conv, BS)
1541+
conv = relay.op.nn.relu(conv)
1542+
1543+
mod = relay.Function([A, W, BS], conv)
1544+
np.random.seed(0)
1545+
initializer = relay.testing.init.Xavier()
1546+
filter_data = np.zeros(filter_shape).astype(dtype)
1547+
bias_data = np.zeros(bias_shape).astype(dtype)
1548+
initializer("weight", filter_data)
1549+
initializer("bias", bias_data)
1550+
params1 = {
1551+
"weight": tvm.nd.array(filter_data),
1552+
"bias": tvm.nd.array(bias_data),
1553+
}
1554+
1555+
if executor_type == "ge":
1556+
static_memory_scope = [
1557+
"",
1558+
"global.texture",
1559+
"global",
1560+
"global.texture-weight",
1561+
"",
1562+
"",
1563+
]
1564+
build_run_compare(
1565+
remote,
1566+
mod,
1567+
params1,
1568+
{"data": input_shape},
1569+
{"data": dtype},
1570+
target,
1571+
static_memory_scope,
1572+
)
1573+
else:
1574+
static_memory_scope = """
1575+
VM VirtualDevice[0]: device type 1, id 0 and mem_scope
1576+
VM VirtualDevice[1]: device type 4, id 0 and mem_scope
1577+
VM VirtualDevice[2]: device type 4, id 0 and mem_scope global.texture
1578+
VM VirtualDevice[3]: device type 4, id 0 and mem_scope global
1579+
VM VirtualDevice[4]: device type 4, id 0 and mem_scope global.texture-weight
1580+
"""
1581+
build_run_compare_vm(
1582+
remote,
1583+
mod,
1584+
params1,
1585+
{"data": input_shape},
1586+
{"data": dtype},
1587+
target,
1588+
static_memory_scope,
1589+
)
1590+
1591+
15371592
if __name__ == "__main__":
15381593
tvm.testing.main()

tests/python/relay/opencl_texture/utils/adreno_utils.py

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -161,19 +161,11 @@ def build_run_compare_vm(
161161
tvm_mod_nchwc, target=target, target_host=target_host, params=params1
162162
)
163163

164-
# TODO(echuraev): enable scope checking
165-
## verification that storage_scope has expected textures scopes
166-
# graph_json = json.loads(graph)
167-
# if "storage_scope" in graph_json["attrs"]:
168-
# assert (
169-
# len(static_mem_scopes) == len(graph_json["attrs"]["storage_scope"][1])
170-
# or len(static_mem_scopes) == 0
171-
# )
172-
# else:
173-
# assert len(static_mem_scopes) == 0
174-
175-
# for i in range(0, len(static_mem_scopes)):
176-
# assert static_mem_scopes[i] == graph_json["attrs"]["storage_scope"][1][i]
164+
if len(static_mem_scopes) > 0:
165+
mem_scopes_lines = static_mem_scopes.strip().split("\n")
166+
vm_lines = vmc._get_virtual_devices().strip().split("\n")
167+
for i in range(0, len(mem_scopes_lines)):
168+
assert mem_scopes_lines[i].strip() == vm_lines[i].strip()
177169

178170
if remote is None:
179171
dev = tvm.opencl()

0 commit comments

Comments
 (0)