Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

add graph_compact. #13436

Merged
merged 7 commits into from
Dec 1, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/api/python/ndarray/contrib.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ In the rest of this document, we list routines provided by the `ndarray.contrib`
index_copy
getnnz
edge_id
dgl_csr_neighbor_uniform_sample
dgl_csr_neighbor_non_uniform_sample
dgl_subgraph
dgl_adjacency
dgl_graph_compact
```

## API Reference
Expand Down
11 changes: 11 additions & 0 deletions docs/api/python/symbol/contrib.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,17 @@ In the rest of this document, we list routines provided by the `symbol.contrib`
foreach
while_loop
cond
isinf
isfinite
isnan
index_copy
getnnz
edge_id
dgl_csr_neighbor_uniform_sample
dgl_csr_neighbor_non_uniform_sample
dgl_subgraph
dgl_adjacency
dgl_graph_compact
```

## API Reference
Expand Down
222 changes: 220 additions & 2 deletions src/operator/contrib/dgl_graph.cc
Original file line number Diff line number Diff line change
Expand Up @@ -768,7 +768,10 @@ static void CSRNeighborUniformSampleComputeExCPU(const nnvm::NodeAttrs& attrs,
NNVM_REGISTER_OP(_contrib_dgl_csr_neighbor_uniform_sample)
.describe(R"code(This operator samples sub-graph from a csr graph via an
uniform probability.
Example::

Example:

.. code:: python

shape = (5, 5)
data_np = np.array([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20], dtype=np.int64)
Expand Down Expand Up @@ -850,7 +853,10 @@ static void CSRNeighborNonUniformSampleComputeExCPU(const nnvm::NodeAttrs& attrs
NNVM_REGISTER_OP(_contrib_dgl_csr_neighbor_non_uniform_sample)
.describe(R"code(This operator samples sub-graph from a csr graph via an
uniform probability.
Example::

Example:

.. code:: python

shape = (5, 5)
prob = mx.nd.array([0.9, 0.8, 0.2, 0.4, 0.1], dtype=np.float32)
Expand Down Expand Up @@ -1379,6 +1385,8 @@ the data value of float32.

Example:

.. code:: python

x = [[ 1, 0, 0 ],
[ 0, 2, 0 ],
[ 0, 0, 3 ]]
Expand All @@ -1400,5 +1408,215 @@ the data value of float32.
.set_attr<FComputeEx>("FComputeEx<cpu>", DGLAdjacencyForwardEx<cpu>)
.add_argument("data", "NDArray-or-Symbol", "Input ndarray");

///////////////////////// Compact subgraphs ///////////////////////////

struct SubgraphCompactParam : public dmlc::Parameter<SubgraphCompactParam> {
int num_args;
bool return_mapping;
nnvm::Tuple<nnvm::dim_t> graph_sizes;
DMLC_DECLARE_PARAMETER(SubgraphCompactParam) {
DMLC_DECLARE_FIELD(num_args).set_lower_bound(2)
.describe("Number of input arguments.");
DMLC_DECLARE_FIELD(return_mapping)
.describe("Return mapping of vid and eid between the subgraph and the parent graph.");
DMLC_DECLARE_FIELD(graph_sizes)
.describe("the number of vertices in each graph.");
}
}; // struct SubgraphCompactParam

DMLC_REGISTER_PARAMETER(SubgraphCompactParam);

static inline size_t get_num_graphs(const SubgraphCompactParam &params) {
// Each CSR needs a 1D array to store the original vertex Id for each row.
return params.num_args / 2;
}

static void CompactSubgraph(const NDArray &csr, const NDArray &vids,
const NDArray &out_csr, size_t graph_size) {
TBlob in_idx_data = csr.aux_data(csr::kIdx);
TBlob in_ptr_data = csr.aux_data(csr::kIndPtr);
const dgl_id_t *indices_in = in_idx_data.dptr<dgl_id_t>();
const dgl_id_t *indptr_in = in_ptr_data.dptr<dgl_id_t>();
const dgl_id_t *row_ids = vids.data().dptr<dgl_id_t>();
size_t num_elems = csr.aux_data(csr::kIdx).shape_.Size();
// The last element in vids is the actual number of vertices in the subgraph.
CHECK_EQ(vids.shape()[0], in_ptr_data.shape_[0]);
CHECK_EQ(static_cast<size_t>(row_ids[vids.shape()[0] - 1]), graph_size);

// Prepare the Id map from the original graph to the subgraph.
std::unordered_map<dgl_id_t, dgl_id_t> id_map;
id_map.reserve(graph_size);
for (size_t i = 0; i < graph_size; i++) {
id_map.insert(std::pair<dgl_id_t, dgl_id_t>(row_ids[i], i));
CHECK_NE(row_ids[i], -1);
}

TShape nz_shape(1);
nz_shape[0] = num_elems;
TShape indptr_shape(1);
CHECK_EQ(out_csr.shape()[0], graph_size);
indptr_shape[0] = graph_size + 1;
CHECK_GE(in_ptr_data.shape_[0], indptr_shape[0]);

out_csr.CheckAndAllocData(nz_shape);
out_csr.CheckAndAllocAuxData(csr::kIdx, nz_shape);
out_csr.CheckAndAllocAuxData(csr::kIndPtr, indptr_shape);

dgl_id_t *indices_out = out_csr.aux_data(csr::kIdx).dptr<dgl_id_t>();
dgl_id_t *indptr_out = out_csr.aux_data(csr::kIndPtr).dptr<dgl_id_t>();
dgl_id_t *sub_eids = out_csr.data().dptr<dgl_id_t>();
std::copy(indptr_in, indptr_in + indptr_shape[0], indptr_out);
for (int64_t i = 0; i < nz_shape[0]; i++) {
dgl_id_t old_id = indices_in[i];
auto it = id_map.find(old_id);
CHECK(it != id_map.end());
indices_out[i] = it->second;
sub_eids[i] = i;
}
}

static void SubgraphCompactComputeExCPU(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<NDArray>& inputs,
const std::vector<OpReqType>& req,
const std::vector<NDArray>& outputs) {
const SubgraphCompactParam& params = nnvm::get<SubgraphCompactParam>(attrs.parsed);
int num_g = get_num_graphs(params);
#pragma omp parallel for
for (int i = 0; i < num_g; i++) {
CompactSubgraph(inputs[i], inputs[i + num_g], outputs[i], params.graph_sizes[i]);
}
}

static bool SubgraphCompactStorageType(const nnvm::NodeAttrs& attrs,
const int dev_mask,
DispatchMode* dispatch_mode,
std::vector<int> *in_attrs,
std::vector<int> *out_attrs) {
const SubgraphCompactParam& params = nnvm::get<SubgraphCompactParam>(attrs.parsed);
size_t num_g = get_num_graphs(params);
CHECK_EQ(num_g * 2, in_attrs->size());
// These are the input subgraphs.
for (size_t i = 0; i < num_g; i++)
CHECK_EQ(in_attrs->at(i), kCSRStorage);
// These are the vertex Ids in the original graph.
for (size_t i = 0; i < num_g; i++)
CHECK_EQ(in_attrs->at(i + num_g), kDefaultStorage);

bool success = true;
*dispatch_mode = DispatchMode::kFComputeEx;
for (size_t i = 0; i < out_attrs->size(); i++) {
if (!type_assign(&(*out_attrs)[i], mxnet::kCSRStorage))
success = false;
}
return success;
}

static bool SubgraphCompactShape(const nnvm::NodeAttrs& attrs,
std::vector<TShape> *in_attrs,
std::vector<TShape> *out_attrs) {
const SubgraphCompactParam& params = nnvm::get<SubgraphCompactParam>(attrs.parsed);
size_t num_g = get_num_graphs(params);
CHECK_EQ(num_g * 2, in_attrs->size());
// These are the input subgraphs.
for (size_t i = 0; i < num_g; i++) {
CHECK_EQ(in_attrs->at(i).ndim(), 2U);
CHECK_GE(in_attrs->at(i)[0], params.graph_sizes[i]);
CHECK_GE(in_attrs->at(i)[1], params.graph_sizes[i]);
}
// These are the vertex Ids in the original graph.
for (size_t i = 0; i < num_g; i++) {
CHECK_EQ(in_attrs->at(i + num_g).ndim(), 1U);
CHECK_GE(in_attrs->at(i + num_g)[0], params.graph_sizes[i]);
}

for (size_t i = 0; i < num_g; i++) {
TShape gshape(2);
gshape[0] = params.graph_sizes[i];
gshape[1] = params.graph_sizes[i];
out_attrs->at(i) = gshape;
if (params.return_mapping)
out_attrs->at(i + num_g) = gshape;
}
return true;
}

static bool SubgraphCompactType(const nnvm::NodeAttrs& attrs,
std::vector<int> *in_attrs,
std::vector<int> *out_attrs) {
for (size_t i = 0; i < in_attrs->size(); i++) {
CHECK_EQ(in_attrs->at(i), mshadow::kInt64);
}
for (size_t i = 0; i < out_attrs->size(); i++) {
out_attrs->at(i) = mshadow::kInt64;
}
return true;
}

NNVM_REGISTER_OP(_contrib_dgl_graph_compact)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please also update ndarray/contrib.md and symbol/contrib.md

.describe(R"code(This operator compacts a CSR matrix generated by
dgl_csr_neighbor_uniform_sample and dgl_csr_neighbor_non_uniform_sample.
The CSR matrices generated by these two operators may have many empty
rows at the end and many empty columns. This operator removes these
empty rows and empty columns.

Example:

.. code:: python

shape = (5, 5)
data_np = np.array([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20], dtype=np.int64)
indices_np = np.array([1,2,3,4,0,2,3,4,0,1,3,4,0,1,2,4,0,1,2,3], dtype=np.int64)
indptr_np = np.array([0,4,8,12,16,20], dtype=np.int64)
a = mx.nd.sparse.csr_matrix((data_np, indices_np, indptr_np), shape=shape)
seed = mx.nd.array([0,1,2,3,4], dtype=np.int64)
out = mx.nd.contrib.dgl_csr_neighbor_uniform_sample(a, seed, num_args=2, num_hops=1,
num_neighbor=2, max_num_vertices=6)
subg_v = out[0]
subg = out[1]
compact = mx.nd.contrib.dgl_graph_compact(subg, subg_v,
graph_sizes=(subg_v[-1].asnumpy()[0]), return_mapping=False)

compact.asnumpy()
array([[0, 0, 0, 1, 0],
[2, 0, 3, 0, 0],
[0, 4, 0, 0, 5],
[0, 6, 0, 0, 7],
[8, 9, 0, 0, 0]])

)code" ADD_FILELINE)
.set_attr_parser(ParamParser<SubgraphCompactParam>)
.set_num_inputs([](const NodeAttrs& attrs) {
const SubgraphCompactParam& params = nnvm::get<SubgraphCompactParam>(attrs.parsed);
return params.num_args;
})
.set_num_outputs([](const NodeAttrs& attrs) {
const SubgraphCompactParam& params = nnvm::get<SubgraphCompactParam>(attrs.parsed);
int num_varray = get_num_graphs(params);
if (params.return_mapping)
return num_varray * 2;
else
return num_varray;
})
.set_attr<nnvm::FListInputNames>("FListInputNames",
[](const NodeAttrs& attrs) {
const SubgraphCompactParam& params = nnvm::get<SubgraphCompactParam>(attrs.parsed);
std::vector<std::string> names;
names.reserve(params.num_args);
size_t num_graphs = get_num_graphs(params);
for (size_t i = 0; i < num_graphs; i++)
names.push_back("graph" + std::to_string(i));
for (size_t i = 0; i < num_graphs; ++i)
names.push_back("varray" + std::to_string(i));
return names;
})
.set_attr<FInferStorageType>("FInferStorageType", SubgraphCompactStorageType)
.set_attr<nnvm::FInferShape>("FInferShape", SubgraphCompactShape)
.set_attr<nnvm::FInferType>("FInferType", SubgraphCompactType)
.set_attr<FComputeEx>("FComputeEx<cpu>", SubgraphCompactComputeExCPU)
.set_attr<std::string>("key_var_num_args", "num_args")
.add_argument("graph_data", "NDArray-or-Symbol[]", "Input graphs and input vertex Ids.")
.add_arguments(SubgraphCompactParam::__FIELDS__());

} // namespace op
} // namespace mxnet
40 changes: 40 additions & 0 deletions tests/python/unittest/test_dgl_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,18 @@ def check_non_uniform(out, num_hops, max_num_vertices):
for data in layer:
assert(data <= num_hops)

def check_compact(csr, id_arr, num_nodes):
compact = mx.nd.contrib.dgl_graph_compact(csr, id_arr, graph_sizes=num_nodes, return_mapping=False)
assert compact.shape[0] == num_nodes
assert compact.shape[1] == num_nodes
assert mx.nd.sum(compact.indptr == csr.indptr[0:(num_nodes + 1)]).asnumpy() == num_nodes + 1
sub_indices = compact.indices.asnumpy()
indices = csr.indices.asnumpy()
id_arr = id_arr.asnumpy()
for i in range(len(sub_indices)):
sub_id = sub_indices[i]
assert id_arr[sub_id] == indices[i]

def test_uniform_sample():
shape = (5, 5)
data_np = np.array([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20], dtype=np.int64)
Expand All @@ -74,36 +86,64 @@ def test_uniform_sample():
out = mx.nd.contrib.dgl_csr_neighbor_uniform_sample(a, seed, num_args=2, num_hops=1, num_neighbor=2, max_num_vertices=5)
assert (len(out) == 3)
check_uniform(out, num_hops=1, max_num_vertices=5)
num_nodes = out[0][-1].asnumpy()
assert num_nodes > 0
assert num_nodes < len(out[0])
check_compact(out[1], out[0], num_nodes)

seed = mx.nd.array([0], dtype=np.int64)
out = mx.nd.contrib.dgl_csr_neighbor_uniform_sample(a, seed, num_args=2, num_hops=1, num_neighbor=1, max_num_vertices=4)
assert (len(out) == 3)
check_uniform(out, num_hops=1, max_num_vertices=4)
num_nodes = out[0][-1].asnumpy()
assert num_nodes > 0
assert num_nodes < len(out[0])
check_compact(out[1], out[0], num_nodes)

seed = mx.nd.array([0], dtype=np.int64)
out = mx.nd.contrib.dgl_csr_neighbor_uniform_sample(a, seed, num_args=2, num_hops=2, num_neighbor=1, max_num_vertices=4)
assert (len(out) == 3)
check_uniform(out, num_hops=2, max_num_vertices=4)
num_nodes = out[0][-1].asnumpy()
assert num_nodes > 0
assert num_nodes < len(out[0])
check_compact(out[1], out[0], num_nodes)

seed = mx.nd.array([0,2,4], dtype=np.int64)
out = mx.nd.contrib.dgl_csr_neighbor_uniform_sample(a, seed, num_args=2, num_hops=1, num_neighbor=2, max_num_vertices=5)
assert (len(out) == 3)
check_uniform(out, num_hops=1, max_num_vertices=5)
num_nodes = out[0][-1].asnumpy()
assert num_nodes > 0
assert num_nodes < len(out[0])
check_compact(out[1], out[0], num_nodes)

seed = mx.nd.array([0,4], dtype=np.int64)
out = mx.nd.contrib.dgl_csr_neighbor_uniform_sample(a, seed, num_args=2, num_hops=1, num_neighbor=2, max_num_vertices=5)
assert (len(out) == 3)
check_uniform(out, num_hops=1, max_num_vertices=5)
num_nodes = out[0][-1].asnumpy()
assert num_nodes > 0
assert num_nodes < len(out[0])
check_compact(out[1], out[0], num_nodes)

seed = mx.nd.array([0,4], dtype=np.int64)
out = mx.nd.contrib.dgl_csr_neighbor_uniform_sample(a, seed, num_args=2, num_hops=2, num_neighbor=2, max_num_vertices=5)
assert (len(out) == 3)
check_uniform(out, num_hops=2, max_num_vertices=5)
num_nodes = out[0][-1].asnumpy()
assert num_nodes > 0
assert num_nodes < len(out[0])
check_compact(out[1], out[0], num_nodes)

seed = mx.nd.array([0,4], dtype=np.int64)
out = mx.nd.contrib.dgl_csr_neighbor_uniform_sample(a, seed, num_args=2, num_hops=1, num_neighbor=2, max_num_vertices=5)
assert (len(out) == 3)
check_uniform(out, num_hops=1, max_num_vertices=5)
num_nodes = out[0][-1].asnumpy()
assert num_nodes > 0
assert num_nodes < len(out[0])
check_compact(out[1], out[0], num_nodes)

def test_non_uniform_sample():
shape = (5, 5)
Expand Down