Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Backport #16796 to 1.6 #16892

Merged
merged 1 commit into from
Nov 23, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion src/operator/fusion/fused_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ inline std::string mshadowTypeToString(int type) {
return "int";
case mshadow::kInt64:
return "long long";
case mshadow::kBool:
return "bool";
default:
LOG(FATAL) << "Unknown type enum " << type;
}
Expand All @@ -75,6 +77,8 @@ inline int mshadowTypeToVectorLength(int type) {
return 1;
case mshadow::kInt64:
return 1;
case mshadow::kBool:
return 4 / sizeof(bool);
default:
LOG(FATAL) << "Unknown type enum " << type;
}
Expand Down Expand Up @@ -159,7 +163,7 @@ void AddPointerAndShape(const TBlob& data,
std::vector<std::vector<int>>* shapes,
mshadow::Stream<gpu> * s) {
using namespace mshadow;
MSHADOW_TYPE_SWITCH(data.type_flag_, DType, {
MSHADOW_TYPE_SWITCH_WITH_BOOL(data.type_flag_, DType, {
Tensor<gpu, 1, DType> tensor = data.FlatTo1D<gpu, DType>(s);
ptrs->push_back(tensor.dptr_);
AddShape(data.shape_, shapes);
Expand Down Expand Up @@ -650,7 +654,9 @@ void FusedOp::CheckShapesAndTypes(const std::vector<TBlob> &inputs,
in_ndims->push_back(blob.ndim());
in_shapes.push_back(blob.shape_);
initialized_ = initialized_ && blob.type_flag_ == inputs_[counter].dtype;
initialized_ = initialized_ && blob.ndim() == inputs_[counter].ndim;
inputs_[counter].dtype = blob.type_flag_;
inputs_[counter].ndim = blob.ndim();
*nvec = max(*nvec, mshadowTypeToVectorLength(blob.type_flag_));
}

Expand All @@ -660,7 +666,9 @@ void FusedOp::CheckShapesAndTypes(const std::vector<TBlob> &inputs,
out_ndims->push_back(blob.ndim());
out_shapes.push_back(blob.shape_);
initialized_ = initialized_ && blob.type_flag_ == outputs_[counter].dtype;
initialized_ = initialized_ && blob.ndim() == outputs_[counter].ndim;
outputs_[counter].dtype = blob.type_flag_;
outputs_[counter].ndim = blob.ndim();
*nvec = max(*nvec, mshadowTypeToVectorLength(blob.type_flag_));
}

Expand Down
3 changes: 2 additions & 1 deletion src/operator/fusion/fused_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,9 @@ struct FusedOpConfig : public dmlc::Parameter<FusedOpConfig> {
};

struct FusedOpEntry {
FusedOpEntry() : dtype(-1) {}
FusedOpEntry() : dtype(-1), ndim(-1) {}
int dtype;
int ndim;
};

class FusedOp {
Expand Down
43 changes: 43 additions & 0 deletions tests/python/gpu/test_fusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,49 @@ def test_fusion_compiler_cache():
if num_gpus > 1:
check_fused_symbol(a+b, ctx=mx.gpu(1), a=arr1, b=arr2)

@with_seed()
@use_np
def test_fusion_boolean_inputs():
from mxnet.gluon import HybridBlock

class Foo(HybridBlock):
def __init__(self, prefix=None, params=None):
super(Foo, self).__init__(prefix=prefix, params=params)

def hybrid_forward(self, F, valid_length):
mask = valid_length.astype(np.float32)
mask2 = valid_length.astype(np.float32)
mask = mask * F.np.expand_dims(mask2, axis=-1)
return mask

foo = Foo()
foo.hybridize(static_alloc=True)
out = foo(mx.np.ones((10,), ctx=mx.gpu(), dtype=np.bool))
mx.npx.waitall()

@with_seed()
def test_fusion_different_dimensions():
from mxnet.gluon import HybridBlock

class Foo(HybridBlock):
def __init__(self, prefix=None, params=None):
super(Foo, self).__init__(prefix=prefix, params=params)

def hybrid_forward(self, F, x):
mask2 = x.astype(np.float32)
mask = F.expand_dims(mask2, axis=-1)
return mask

foo = Foo()
foo.hybridize(static_alloc=True)
# Pass 1-D data
out = foo(mx.nd.ones((10,), ctx=mx.gpu()))
assert np.all(out.asnumpy() == np.ones((10,1)))
assert out.shape == (10,1)
# Pass 2-D data
out = foo(mx.nd.ones((10,10), ctx=mx.gpu()))
assert np.all(out.asnumpy() == np.ones((10,10)))
assert out.shape == (10,10,1)

@with_seed()
def test_fusion_reshape_executor():
Expand Down