From 7ceaaf427ab84018e904d9bf5fc9344377f7345c Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Wed, 28 Nov 2018 12:06:32 +0800 Subject: [PATCH 1/7] add graph_compact. --- src/operator/contrib/dgl_graph.cc | 180 ++++++++++++++++++++++++++++++ 1 file changed, 180 insertions(+) diff --git a/src/operator/contrib/dgl_graph.cc b/src/operator/contrib/dgl_graph.cc index 74ad3d435648..4b0578c48014 100644 --- a/src/operator/contrib/dgl_graph.cc +++ b/src/operator/contrib/dgl_graph.cc @@ -1400,5 +1400,185 @@ the data value of float32. .set_attr("FComputeEx", DGLAdjacencyForwardEx) .add_argument("data", "NDArray-or-Symbol", "Input ndarray"); +///////////////////////// Compact subgraphs /////////////////////////// + +struct SubgraphCompactParam : public dmlc::Parameter { + int num_args; + bool return_mapping; + nnvm::Tuple graph_sizes; + DMLC_DECLARE_PARAMETER(SubgraphCompactParam) { + DMLC_DECLARE_FIELD(num_args).set_lower_bound(2) + .describe("Number of input arguments."); + DMLC_DECLARE_FIELD(return_mapping) + .describe("Return mapping of vid and eid between the subgraph and the parent graph."); + DMLC_DECLARE_FIELD(graph_sizes) + .describe("the number of vertices in each graph."); + } +}; // struct SubgraphCompactParam + +DMLC_REGISTER_PARAMETER(SubgraphCompactParam); + +static inline size_t get_num_graphs(const SubgraphCompactParam ¶ms) { + // Each CSR needs a 1D array to store the original vertex Id for each row. + return params.num_args / 2; +} + +static void CompactSubgraph(const NDArray &csr, const NDArray &vids, + const NDArray &out_csr, size_t graph_size) { + TBlob in_idx_data = csr.aux_data(csr::kIdx); + TBlob in_ptr_data = csr.aux_data(csr::kIndPtr); + const dgl_id_t *indices_in = in_idx_data.dptr(); + const dgl_id_t *indptr_in = in_ptr_data.dptr(); + const dgl_id_t *row_ids = vids.data().dptr(); + size_t num_elems = csr.aux_data(csr::kIdx).shape_.Size(); + // The last element in vids is the actual number of vertices in the subgraph. + CHECK_EQ(vids.shape()[0], in_ptr_data.shape_[0]); + CHECK_EQ((size_t) row_ids[vids.shape()[0] - 1], graph_size); + + // Prepare the Id map from the original graph to the subgraph. + std::unordered_map id_map; + id_map.reserve(graph_size); + for (size_t i = 0; i < graph_size; i++) { + id_map.insert(std::pair(row_ids[i], i)); + CHECK_NE(row_ids[i], -1); + } + + TShape nz_shape(1); + nz_shape[0] = num_elems; + TShape indptr_shape(1); + CHECK_EQ(out_csr.shape()[0], graph_size); + indptr_shape[0] = graph_size + 1; + CHECK_GE(in_ptr_data.shape_[0], indptr_shape[0]); + + out_csr.CheckAndAllocData(nz_shape); + out_csr.CheckAndAllocAuxData(csr::kIdx, nz_shape); + out_csr.CheckAndAllocAuxData(csr::kIndPtr, indptr_shape); + + dgl_id_t *indices_out = out_csr.aux_data(csr::kIdx).dptr(); + dgl_id_t *indptr_out = out_csr.aux_data(csr::kIndPtr).dptr(); + dgl_id_t *sub_eids = out_csr.data().dptr(); + std::copy(indptr_in, indptr_in + indptr_shape[0], indptr_out); + for (int64_t i = 0; i < nz_shape[0]; i++) { + dgl_id_t old_id = indices_in[i]; + auto it = id_map.find(old_id); + CHECK(it != id_map.end()); + indices_out[i] = it->second; + sub_eids[i] = i; + } +} + +static void SubgraphCompactComputeExCPU(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + const SubgraphCompactParam& params = nnvm::get(attrs.parsed); + size_t num_g = get_num_graphs(params); +#pragma omp parallel for + for (size_t i = 0; i < num_g; i++) { + CompactSubgraph(inputs[i], inputs[i + num_g], outputs[i], params.graph_sizes[i]); + } +} + +static bool SubgraphCompactStorageType(const nnvm::NodeAttrs& attrs, + const int dev_mask, + DispatchMode* dispatch_mode, + std::vector *in_attrs, + std::vector *out_attrs) { + const SubgraphCompactParam& params = nnvm::get(attrs.parsed); + size_t num_g = get_num_graphs(params); + CHECK_EQ(num_g * 2, in_attrs->size()); + // These are the input subgraphs. + for (size_t i = 0; i < num_g; i++) + CHECK_EQ(in_attrs->at(i), kCSRStorage); + // These are the vertex Ids in the original graph. + for (size_t i = 0; i < num_g; i++) + CHECK_EQ(in_attrs->at(i + num_g), kDefaultStorage); + + bool success = true; + *dispatch_mode = DispatchMode::kFComputeEx; + for (size_t i = 0; i < out_attrs->size(); i++) { + if (!type_assign(&(*out_attrs)[i], mxnet::kCSRStorage)) + success = false; + } + return success; +} + +static bool SubgraphCompactShape(const nnvm::NodeAttrs& attrs, + std::vector *in_attrs, + std::vector *out_attrs) { + const SubgraphCompactParam& params = nnvm::get(attrs.parsed); + size_t num_g = get_num_graphs(params); + CHECK_EQ(num_g * 2, in_attrs->size()); + // These are the input subgraphs. + for (size_t i = 0; i < num_g; i++) { + CHECK_EQ(in_attrs->at(i).ndim(), 2U); + CHECK_GE(in_attrs->at(i)[0], params.graph_sizes[i]); + CHECK_GE(in_attrs->at(i)[1], params.graph_sizes[i]); + } + // These are the vertex Ids in the original graph. + for (size_t i = 0; i < num_g; i++) { + CHECK_EQ(in_attrs->at(i + num_g).ndim(), 1U); + CHECK_GE(in_attrs->at(i + num_g)[0], params.graph_sizes[i]); + } + + for (size_t i = 0; i < num_g; i++) { + TShape gshape(2); + gshape[0] = params.graph_sizes[i]; + gshape[1] = params.graph_sizes[i]; + out_attrs->at(i) = gshape; + if (params.return_mapping) + out_attrs->at(i + num_g) = gshape; + } + return true; +} + +static bool SubgraphCompactType(const nnvm::NodeAttrs& attrs, + std::vector *in_attrs, + std::vector *out_attrs) { + for (size_t i = 0; i < in_attrs->size(); i++) { + CHECK_EQ(in_attrs->at(i), mshadow::kInt64); + } + for (size_t i = 0; i < out_attrs->size(); i++) { + out_attrs->at(i) = mshadow::kInt64; + } + return true; +} + +NNVM_REGISTER_OP(_contrib_dgl_graph_compact) +.MXNET_DESCRIBE("") +.set_attr_parser(ParamParser) +.set_num_inputs([](const NodeAttrs& attrs) { + const SubgraphCompactParam& params = nnvm::get(attrs.parsed); + return params.num_args; +}) +.set_num_outputs([](const NodeAttrs& attrs) { + const SubgraphCompactParam& params = nnvm::get(attrs.parsed); + int num_varray = get_num_graphs(params); + if (params.return_mapping) + return num_varray * 2; + else + return num_varray; +}) +.set_attr("FListInputNames", + [](const NodeAttrs& attrs) { + const SubgraphCompactParam& params = nnvm::get(attrs.parsed); + std::vector names; + names.reserve(params.num_args); + size_t num_graphs = get_num_graphs(params); + for (size_t i = 0; i < num_graphs; i++) + names.push_back("graph" + std::to_string(i)); + for (size_t i = 0; i < num_graphs; ++i) + names.push_back("varray" + std::to_string(i)); + return names; +}) +.set_attr("FInferStorageType", SubgraphCompactStorageType) +.set_attr("FInferShape", SubgraphCompactShape) +.set_attr("FInferType", SubgraphCompactType) +.set_attr("FComputeEx", SubgraphCompactComputeExCPU) +.set_attr("key_var_num_args", "num_args") +.add_argument("graph_data", "NDArray-or-Symbol[]", "Input graphs and input vertex Ids.") +.add_arguments(SubgraphCompactParam::__FIELDS__()); + } // namespace op } // namespace mxnet From b52d5a80c9398699174c363a00d411c9fcd1df91 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Wed, 28 Nov 2018 14:05:55 +0800 Subject: [PATCH 2/7] fix. --- src/operator/contrib/dgl_graph.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/operator/contrib/dgl_graph.cc b/src/operator/contrib/dgl_graph.cc index 4b0578c48014..153590323e17 100644 --- a/src/operator/contrib/dgl_graph.cc +++ b/src/operator/contrib/dgl_graph.cc @@ -1473,9 +1473,9 @@ static void SubgraphCompactComputeExCPU(const nnvm::NodeAttrs& attrs, const std::vector& req, const std::vector& outputs) { const SubgraphCompactParam& params = nnvm::get(attrs.parsed); - size_t num_g = get_num_graphs(params); + int num_g = get_num_graphs(params); #pragma omp parallel for - for (size_t i = 0; i < num_g; i++) { + for (int i = 0; i < num_g; i++) { CompactSubgraph(inputs[i], inputs[i + num_g], outputs[i], params.graph_sizes[i]); } } From 29a5d17c4451877dcb799fbb240cbb87c88d9509 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Wed, 28 Nov 2018 14:32:44 +0800 Subject: [PATCH 3/7] add doc. --- src/operator/contrib/dgl_graph.cc | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/operator/contrib/dgl_graph.cc b/src/operator/contrib/dgl_graph.cc index 153590323e17..2184b048022c 100644 --- a/src/operator/contrib/dgl_graph.cc +++ b/src/operator/contrib/dgl_graph.cc @@ -1546,7 +1546,18 @@ static bool SubgraphCompactType(const nnvm::NodeAttrs& attrs, } NNVM_REGISTER_OP(_contrib_dgl_graph_compact) -.MXNET_DESCRIBE("") +.describe(R"code(This operator compacts a CSR matrix generated by +csr_neighbor_uniform_sample and csr_neighbor_non_uniform_sample. +The CSR matrices generated by these two operators may have many empty +rows at the end. This operator removes these empty rows and empty columns. +Example:: + subgs = mx.nd.contrib.csr_neighbor_uniform_sample(csr, seed, num_hops=1, + num_neighbor=2, max_num_vertices=5) + subg_v = subgs[0] + subg = subgs[1] + compacts = mx.nd.contrib.dgl_graph_compact(subg, subg_v, + graph_sizes=(subg_v[-1].asnumpy()[0])) +)code" ADD_FILELINE) .set_attr_parser(ParamParser) .set_num_inputs([](const NodeAttrs& attrs) { const SubgraphCompactParam& params = nnvm::get(attrs.parsed); From c089024ec5ca0a0d73dddcdcb8927e9ed7606b9a Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Thu, 29 Nov 2018 08:26:39 +0000 Subject: [PATCH 4/7] add tests for graph_compact. --- tests/python/unittest/test_dgl_graph.py | 40 +++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/tests/python/unittest/test_dgl_graph.py b/tests/python/unittest/test_dgl_graph.py index f996d7f38de8..069fef6e32f0 100644 --- a/tests/python/unittest/test_dgl_graph.py +++ b/tests/python/unittest/test_dgl_graph.py @@ -63,6 +63,18 @@ def check_non_uniform(out, num_hops, max_num_vertices): for data in layer: assert(data <= num_hops) +def check_compact(csr, id_arr, num_nodes): + compact = mx.nd.contrib.dgl_graph_compact(csr, id_arr, graph_sizes=num_nodes, return_mapping=False) + assert compact.shape[0] == num_nodes + assert compact.shape[1] == num_nodes + assert mx.nd.sum(compact.indptr == csr.indptr[0:(num_nodes + 1)]).asnumpy() == num_nodes + 1 + sub_indices = compact.indices.asnumpy() + indices = csr.indices.asnumpy() + id_arr = id_arr.asnumpy() + for i in range(len(sub_indices)): + sub_id = sub_indices[i] + assert id_arr[sub_id] == indices[i] + def test_uniform_sample(): shape = (5, 5) data_np = np.array([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20], dtype=np.int64) @@ -74,36 +86,64 @@ def test_uniform_sample(): out = mx.nd.contrib.dgl_csr_neighbor_uniform_sample(a, seed, num_args=2, num_hops=1, num_neighbor=2, max_num_vertices=5) assert (len(out) == 3) check_uniform(out, num_hops=1, max_num_vertices=5) + num_nodes = out[0][-1].asnumpy() + assert num_nodes > 0 + assert num_nodes < len(out[0]) + check_compact(out[1], out[0], num_nodes) seed = mx.nd.array([0], dtype=np.int64) out = mx.nd.contrib.dgl_csr_neighbor_uniform_sample(a, seed, num_args=2, num_hops=1, num_neighbor=1, max_num_vertices=4) assert (len(out) == 3) check_uniform(out, num_hops=1, max_num_vertices=4) + num_nodes = out[0][-1].asnumpy() + assert num_nodes > 0 + assert num_nodes < len(out[0]) + check_compact(out[1], out[0], num_nodes) seed = mx.nd.array([0], dtype=np.int64) out = mx.nd.contrib.dgl_csr_neighbor_uniform_sample(a, seed, num_args=2, num_hops=2, num_neighbor=1, max_num_vertices=4) assert (len(out) == 3) check_uniform(out, num_hops=2, max_num_vertices=4) + num_nodes = out[0][-1].asnumpy() + assert num_nodes > 0 + assert num_nodes < len(out[0]) + check_compact(out[1], out[0], num_nodes) seed = mx.nd.array([0,2,4], dtype=np.int64) out = mx.nd.contrib.dgl_csr_neighbor_uniform_sample(a, seed, num_args=2, num_hops=1, num_neighbor=2, max_num_vertices=5) assert (len(out) == 3) check_uniform(out, num_hops=1, max_num_vertices=5) + num_nodes = out[0][-1].asnumpy() + assert num_nodes > 0 + assert num_nodes < len(out[0]) + check_compact(out[1], out[0], num_nodes) seed = mx.nd.array([0,4], dtype=np.int64) out = mx.nd.contrib.dgl_csr_neighbor_uniform_sample(a, seed, num_args=2, num_hops=1, num_neighbor=2, max_num_vertices=5) assert (len(out) == 3) check_uniform(out, num_hops=1, max_num_vertices=5) + num_nodes = out[0][-1].asnumpy() + assert num_nodes > 0 + assert num_nodes < len(out[0]) + check_compact(out[1], out[0], num_nodes) seed = mx.nd.array([0,4], dtype=np.int64) out = mx.nd.contrib.dgl_csr_neighbor_uniform_sample(a, seed, num_args=2, num_hops=2, num_neighbor=2, max_num_vertices=5) assert (len(out) == 3) check_uniform(out, num_hops=2, max_num_vertices=5) + num_nodes = out[0][-1].asnumpy() + assert num_nodes > 0 + assert num_nodes < len(out[0]) + check_compact(out[1], out[0], num_nodes) seed = mx.nd.array([0,4], dtype=np.int64) out = mx.nd.contrib.dgl_csr_neighbor_uniform_sample(a, seed, num_args=2, num_hops=1, num_neighbor=2, max_num_vertices=5) assert (len(out) == 3) check_uniform(out, num_hops=1, max_num_vertices=5) + num_nodes = out[0][-1].asnumpy() + assert num_nodes > 0 + assert num_nodes < len(out[0]) + check_compact(out[1], out[0], num_nodes) def test_non_uniform_sample(): shape = (5, 5) From 2f7175f3acfe1ad6095b92f730a55e3a18c99847 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Sat, 1 Dec 2018 09:42:49 +0800 Subject: [PATCH 5/7] address comments. --- docs/api/python/ndarray/contrib.md | 5 +++++ docs/api/python/symbol/contrib.md | 11 +++++++++++ src/operator/contrib/dgl_graph.cc | 4 ++-- 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/docs/api/python/ndarray/contrib.md b/docs/api/python/ndarray/contrib.md index 709ddae007c5..d7c9021b5957 100644 --- a/docs/api/python/ndarray/contrib.md +++ b/docs/api/python/ndarray/contrib.md @@ -61,6 +61,11 @@ In the rest of this document, we list routines provided by the `ndarray.contrib` index_copy getnnz edge_id + dgl_csr_neighbor_uniform_sample + dgl_csr_neighbor_non_uniform_sample + dgl_subgraph + dgl_adjacency + dgl_graph_compact ``` ## API Reference diff --git a/docs/api/python/symbol/contrib.md b/docs/api/python/symbol/contrib.md index c0a4da54cbde..35cd11c89a70 100644 --- a/docs/api/python/symbol/contrib.md +++ b/docs/api/python/symbol/contrib.md @@ -55,6 +55,17 @@ In the rest of this document, we list routines provided by the `symbol.contrib` foreach while_loop cond + isinf + isfinite + isnan + index_copy + getnnz + edge_id + dgl_csr_neighbor_uniform_sample + dgl_csr_neighbor_non_uniform_sample + dgl_subgraph + dgl_adjacency + dgl_graph_compact ``` ## API Reference diff --git a/src/operator/contrib/dgl_graph.cc b/src/operator/contrib/dgl_graph.cc index 2184b048022c..89ba4fc4b40d 100644 --- a/src/operator/contrib/dgl_graph.cc +++ b/src/operator/contrib/dgl_graph.cc @@ -1433,7 +1433,7 @@ static void CompactSubgraph(const NDArray &csr, const NDArray &vids, size_t num_elems = csr.aux_data(csr::kIdx).shape_.Size(); // The last element in vids is the actual number of vertices in the subgraph. CHECK_EQ(vids.shape()[0], in_ptr_data.shape_[0]); - CHECK_EQ((size_t) row_ids[vids.shape()[0] - 1], graph_size); + CHECK_EQ(static_cast(row_ids[vids.shape()[0] - 1]), graph_size); // Prepare the Id map from the original graph to the subgraph. std::unordered_map id_map; @@ -1547,7 +1547,7 @@ static bool SubgraphCompactType(const nnvm::NodeAttrs& attrs, NNVM_REGISTER_OP(_contrib_dgl_graph_compact) .describe(R"code(This operator compacts a CSR matrix generated by -csr_neighbor_uniform_sample and csr_neighbor_non_uniform_sample. +dgl_csr_neighbor_uniform_sample and dgl_csr_neighbor_non_uniform_sample. The CSR matrices generated by these two operators may have many empty rows at the end. This operator removes these empty rows and empty columns. Example:: From ebbcb4aeaa03925ae59ab0f35ae3f351ebbe0f3d Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Sat, 1 Dec 2018 10:37:32 +0800 Subject: [PATCH 6/7] update docs. --- src/operator/contrib/dgl_graph.cc | 47 ++++++++++++++++++++++++------- 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/src/operator/contrib/dgl_graph.cc b/src/operator/contrib/dgl_graph.cc index 89ba4fc4b40d..ed7caacfdbae 100644 --- a/src/operator/contrib/dgl_graph.cc +++ b/src/operator/contrib/dgl_graph.cc @@ -768,7 +768,10 @@ static void CSRNeighborUniformSampleComputeExCPU(const nnvm::NodeAttrs& attrs, NNVM_REGISTER_OP(_contrib_dgl_csr_neighbor_uniform_sample) .describe(R"code(This operator samples sub-graph from a csr graph via an uniform probability. -Example:: + +Example: + + .. code:: python shape = (5, 5) data_np = np.array([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20], dtype=np.int64) @@ -850,7 +853,10 @@ static void CSRNeighborNonUniformSampleComputeExCPU(const nnvm::NodeAttrs& attrs NNVM_REGISTER_OP(_contrib_dgl_csr_neighbor_non_uniform_sample) .describe(R"code(This operator samples sub-graph from a csr graph via an uniform probability. -Example:: + +Example: + + .. code:: python shape = (5, 5) prob = mx.nd.array([0.9, 0.8, 0.2, 0.4, 0.1], dtype=np.float32) @@ -1379,6 +1385,8 @@ the data value of float32. Example: + .. code:: python + x = [[ 1, 0, 0 ], [ 0, 2, 0 ], [ 0, 0, 3 ]] @@ -1549,14 +1557,33 @@ NNVM_REGISTER_OP(_contrib_dgl_graph_compact) .describe(R"code(This operator compacts a CSR matrix generated by dgl_csr_neighbor_uniform_sample and dgl_csr_neighbor_non_uniform_sample. The CSR matrices generated by these two operators may have many empty -rows at the end. This operator removes these empty rows and empty columns. -Example:: - subgs = mx.nd.contrib.csr_neighbor_uniform_sample(csr, seed, num_hops=1, - num_neighbor=2, max_num_vertices=5) - subg_v = subgs[0] - subg = subgs[1] - compacts = mx.nd.contrib.dgl_graph_compact(subg, subg_v, - graph_sizes=(subg_v[-1].asnumpy()[0])) +rows at the end and many empty columns. This operator removes these +empty rows and empty columns. + +Example: + + .. code:: python + + shape = (5, 5) + data_np = np.array([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20], dtype=np.int64) + indices_np = np.array([1,2,3,4,0,2,3,4,0,1,3,4,0,1,2,4,0,1,2,3], dtype=np.int64) + indptr_np = np.array([0,4,8,12,16,20], dtype=np.int64) + a = mx.nd.sparse.csr_matrix((data_np, indices_np, indptr_np), shape=shape) + seed = mx.nd.array([0,1,2,3,4], dtype=np.int64) + out = mx.nd.contrib.dgl_csr_neighbor_uniform_sample(a, seed, num_args=2, num_hops=1, + num_neighbor=2, max_num_vertices=6) + subg_v = out[0] + subg = out[1] + compact = mx.nd.contrib.dgl_graph_compact(subg, subg_v, + graph_sizes=(subg_v[-1].asnumpy()[0]), return_mapping=False) + + compact.asnumpy() + array([[0, 0, 0, 1, 0], + [2, 0, 3, 0, 0], + [0, 4, 0, 0, 5], + [0, 6, 0, 0, 7], + [8, 9, 0, 0, 0]]) + )code" ADD_FILELINE) .set_attr_parser(ParamParser) .set_num_inputs([](const NodeAttrs& attrs) { From b72ec27e90686515717024009041c110898b875e Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Sat, 1 Dec 2018 13:48:03 +0800 Subject: [PATCH 7/7] trigger CI