Skip to content

Commit de7dc90

Browse files
authored
Merge branch 'dmlc:master' into add-igbh-to-rgcn
2 parents d11f815 + e8022e9 commit de7dc90

File tree

10 files changed

+144
-54
lines changed

10 files changed

+144
-54
lines changed

graphbolt/include/graphbolt/cuda_ops.h

+8-3
Original file line numberDiff line numberDiff line change
@@ -288,14 +288,17 @@ torch::Tensor IndptrEdgeIdsImpl(
288288
* @param rank The rank of the current GPU.
289289
* @param world_size The total # GPUs, world size.
290290
*
291-
* @return
291+
* @return (unique_ids, compacted_src_ids, compacted_dst_ids, unique_offsets)
292292
* - A tensor representing all unique elements in 'src_ids' and 'dst_ids' after
293293
* removing duplicates. The indices in this tensor precisely match the compacted
294294
* IDs of the corresponding elements.
295295
* - The tensor corresponding to the 'src_ids' tensor, where the entries are
296296
* mapped to compacted IDs.
297297
* - The tensor corresponding to the 'dst_ids' tensor, where the entries are
298298
* mapped to compacted IDs.
299+
* - The tensor corresponding to the offsets into the unique_ids tensor. Has
300+
* size `world_size + 1` and unique_ids[offsets[i]: offsets[i + 1]] belongs to
301+
* the rank `(rank + i) % world_size`.
299302
*
300303
* @example
301304
* torch::Tensor src_ids = src
@@ -306,7 +309,8 @@ torch::Tensor IndptrEdgeIdsImpl(
306309
* torch::Tensor compacted_src_ids = std::get<1>(result);
307310
* torch::Tensor compacted_dst_ids = std::get<2>(result);
308311
*/
309-
std::tuple<torch::Tensor, torch::Tensor, torch::Tensor> UniqueAndCompact(
312+
std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
313+
UniqueAndCompact(
310314
const torch::Tensor src_ids, const torch::Tensor dst_ids,
311315
const torch::Tensor unique_dst_ids, const int64_t rank,
312316
const int64_t world_size);
@@ -316,7 +320,8 @@ std::tuple<torch::Tensor, torch::Tensor, torch::Tensor> UniqueAndCompact(
316320
* value is equal to the passing the ith elements of the input arguments to
317321
* UniqueAndCompact.
318322
*/
319-
std::vector<std::tuple<torch::Tensor, torch::Tensor, torch::Tensor>>
323+
std::vector<
324+
std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>>
320325
UniqueAndCompactBatched(
321326
const std::vector<torch::Tensor>& src_ids,
322327
const std::vector<torch::Tensor>& dst_ids,

graphbolt/include/graphbolt/unique_and_compact.h

+10-5
Original file line numberDiff line numberDiff line change
@@ -38,14 +38,17 @@ namespace sampling {
3838
* @param rank The rank of the current GPU.
3939
* @param world_size The total # GPUs, world size.
4040
*
41-
* @return
41+
* @return (unique_ids, compacted_src_ids, compacted_dst_ids, unique_offsets)
4242
* - A tensor representing all unique elements in 'src_ids' and 'dst_ids' after
4343
* removing duplicates. The indices in this tensor precisely match the compacted
4444
* IDs of the corresponding elements.
4545
* - The tensor corresponding to the 'src_ids' tensor, where the entries are
4646
* mapped to compacted IDs.
4747
* - The tensor corresponding to the 'dst_ids' tensor, where the entries are
4848
* mapped to compacted IDs.
49+
* - The tensor corresponding to the offsets into the unique_ids tensor. Has
50+
* size `world_size + 1` and unique_ids[offsets[i]: offsets[i + 1]] belongs to
51+
* the rank `(rank + i) % world_size`.
4952
*
5053
* @example
5154
* torch::Tensor src_ids = src
@@ -56,20 +59,22 @@ namespace sampling {
5659
* torch::Tensor compacted_src_ids = std::get<1>(result);
5760
* torch::Tensor compacted_dst_ids = std::get<2>(result);
5861
*/
59-
std::tuple<torch::Tensor, torch::Tensor, torch::Tensor> UniqueAndCompact(
62+
std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
63+
UniqueAndCompact(
6064
const torch::Tensor& src_ids, const torch::Tensor& dst_ids,
6165
const torch::Tensor unique_dst_ids, const int64_t rank,
6266
const int64_t world_size);
6367

64-
std::vector<std::tuple<torch::Tensor, torch::Tensor, torch::Tensor>>
68+
std::vector<
69+
std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>>
6570
UniqueAndCompactBatched(
6671
const std::vector<torch::Tensor>& src_ids,
6772
const std::vector<torch::Tensor>& dst_ids,
6873
const std::vector<torch::Tensor> unique_dst_ids, const int64_t rank,
6974
const int64_t world_size);
7075

71-
c10::intrusive_ptr<Future<
72-
std::vector<std::tuple<torch::Tensor, torch::Tensor, torch::Tensor>>>>
76+
c10::intrusive_ptr<Future<std::vector<
77+
std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>>>>
7378
UniqueAndCompactBatchedAsync(
7479
const std::vector<torch::Tensor>& src_ids,
7580
const std::vector<torch::Tensor>& dst_ids,

graphbolt/src/cuda/unique_and_compact_impl.cu

+20-11
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,8 @@ UniqueAndCompactBatchedSortBased(
272272
}));
273273
}
274274

275-
std::vector<std::tuple<torch::Tensor, torch::Tensor, torch::Tensor>>
275+
std::vector<
276+
std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>>
276277
UniqueAndCompactBatched(
277278
const std::vector<torch::Tensor>& src_ids,
278279
const std::vector<torch::Tensor>& dst_ids,
@@ -282,15 +283,8 @@ UniqueAndCompactBatched(
282283
// Utilizes a hash table based implementation, the mapped id of a vertex
283284
// will be monotonically increasing as the first occurrence index of it in
284285
// torch.cat([unique_dst_ids, src_ids]). Thus, it is deterministic.
285-
auto results4 = UniqueAndCompactBatchedHashMapBased(
286+
return UniqueAndCompactBatchedHashMapBased(
286287
src_ids, dst_ids, unique_dst_ids, rank, world_size);
287-
std::vector<std::tuple<torch::Tensor, torch::Tensor, torch::Tensor>>
288-
results3;
289-
// TODO @mfbalin: expose the `d` result in a later PR.
290-
for (const auto& [a, b, c, d] : results4) {
291-
results3.emplace_back(a, b, c);
292-
}
293-
return results3;
294288
}
295289
TORCH_CHECK(
296290
world_size <= 1,
@@ -299,10 +293,25 @@ UniqueAndCompactBatched(
299293
// Utilizes a sort based algorithm, the mapped id of a vertex part of the
300294
// src_ids but not part of the unique_dst_ids will be monotonically increasing
301295
// as the actual vertex id increases. Thus, it is deterministic.
302-
return UniqueAndCompactBatchedSortBased(src_ids, dst_ids, unique_dst_ids);
296+
auto results3 =
297+
UniqueAndCompactBatchedSortBased(src_ids, dst_ids, unique_dst_ids);
298+
std::vector<
299+
std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>>
300+
results4;
301+
auto offsets = torch::zeros(
302+
2 * results3.size(),
303+
c10::TensorOptions().dtype(torch::kInt64).pinned_memory(true));
304+
for (const auto& [a, b, c] : results3) {
305+
auto d = offsets.slice(0, 0, 2);
306+
d.data_ptr<int64_t>()[1] = a.size(0);
307+
results4.emplace_back(a, b, c, d);
308+
offsets = offsets.slice(0, 2);
309+
}
310+
return results4;
303311
}
304312

305-
std::tuple<torch::Tensor, torch::Tensor, torch::Tensor> UniqueAndCompact(
313+
std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
314+
UniqueAndCompact(
306315
const torch::Tensor src_ids, const torch::Tensor dst_ids,
307316
const torch::Tensor unique_dst_ids, const int64_t rank,
308317
const int64_t world_size) {

graphbolt/src/python_binding.cc

+5-4
Original file line numberDiff line numberDiff line change
@@ -51,13 +51,14 @@ TORCH_LIBRARY(graphbolt, m) {
5151
m.class_<Future<c10::intrusive_ptr<FusedSampledSubgraph>>>(
5252
"FusedSampledSubgraphFuture")
5353
.def("wait", &Future<c10::intrusive_ptr<FusedSampledSubgraph>>::Wait);
54-
m.class_<Future<
55-
std::vector<std::tuple<torch::Tensor, torch::Tensor, torch::Tensor>>>>(
54+
m.class_<Future<std::vector<
55+
std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>>>>(
5656
"UniqueAndCompactBatchedFuture")
5757
.def(
5858
"wait",
59-
&Future<std::vector<
60-
std::tuple<torch::Tensor, torch::Tensor, torch::Tensor>>>::Wait);
59+
&Future<std::vector<std::tuple<
60+
torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>>>::
61+
Wait);
6162
m.class_<Future<std::tuple<torch::Tensor, torch::Tensor, int64_t, int64_t>>>(
6263
"GpuGraphCacheQueryFuture")
6364
.def(

graphbolt/src/unique_and_compact.cc

+13-6
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@
1414

1515
namespace graphbolt {
1616
namespace sampling {
17-
std::tuple<torch::Tensor, torch::Tensor, torch::Tensor> UniqueAndCompact(
17+
std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
18+
UniqueAndCompact(
1819
const torch::Tensor& src_ids, const torch::Tensor& dst_ids,
1920
const torch::Tensor unique_dst_ids, const int64_t rank,
2021
const int64_t world_size) {
@@ -31,16 +32,20 @@ std::tuple<torch::Tensor, torch::Tensor, torch::Tensor> UniqueAndCompact(
3132
"Cooperative Minibatching (arXiv:2310.12403) is supported only on GPUs.");
3233
auto num_dst = unique_dst_ids.size(0);
3334
torch::Tensor ids = torch::cat({unique_dst_ids, src_ids});
34-
return AT_DISPATCH_INDEX_TYPES(
35+
auto [unique_ids, compacted_src, compacted_dst] = AT_DISPATCH_INDEX_TYPES(
3536
ids.scalar_type(), "unique_and_compact", ([&] {
3637
ConcurrentIdHashMap<index_t> id_map(ids, num_dst);
3738
return std::make_tuple(
3839
id_map.GetUniqueIds(), id_map.MapIds(src_ids),
3940
id_map.MapIds(dst_ids));
4041
}));
42+
auto offsets = torch::zeros(2, c10::TensorOptions().dtype(torch::kInt64));
43+
offsets.data_ptr<int64_t>()[1] = unique_ids.size(0);
44+
return {unique_ids, compacted_src, compacted_dst, offsets};
4145
}
4246

43-
std::vector<std::tuple<torch::Tensor, torch::Tensor, torch::Tensor>>
47+
std::vector<
48+
std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>>
4449
UniqueAndCompactBatched(
4550
const std::vector<torch::Tensor>& src_ids,
4651
const std::vector<torch::Tensor>& dst_ids,
@@ -64,7 +69,9 @@ UniqueAndCompactBatched(
6469
src_ids, dst_ids, unique_dst_ids, rank, world_size);
6570
});
6671
}
67-
std::vector<std::tuple<torch::Tensor, torch::Tensor, torch::Tensor>> results;
72+
std::vector<
73+
std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>>
74+
results;
6875
results.reserve(src_ids.size());
6976
for (std::size_t i = 0; i < src_ids.size(); i++) {
7077
results.emplace_back(UniqueAndCompact(
@@ -73,8 +80,8 @@ UniqueAndCompactBatched(
7380
return results;
7481
}
7582

76-
c10::intrusive_ptr<Future<
77-
std::vector<std::tuple<torch::Tensor, torch::Tensor, torch::Tensor>>>>
83+
c10::intrusive_ptr<Future<std::vector<
84+
std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>>>>
7885
UniqueAndCompactBatchedAsync(
7986
const std::vector<torch::Tensor>& src_ids,
8087
const std::vector<torch::Tensor>& dst_ids,

python/dgl/graphbolt/impl/in_subgraph_sampler.py

+1
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ def sample_subgraphs(
7474
(
7575
original_row_node_ids,
7676
compacted_csc_formats,
77+
_,
7778
) = unique_and_compact_csc_formats(subgraph.sampled_csc, seeds)
7879
subgraph = SampledSubgraphImpl(
7980
sampled_csc=compacted_csc_formats,

python/dgl/graphbolt/impl/neighbor_sampler.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -471,6 +471,7 @@ def _compact_per_layer(self, minibatch):
471471
(
472472
original_row_node_ids,
473473
compacted_csc_format,
474+
_,
474475
) = unique_and_compact_csc_formats(subgraph.sampled_csc, seeds)
475476
subgraph = SampledSubgraphImpl(
476477
sampled_csc=compacted_csc_format,
@@ -506,7 +507,11 @@ def _compact_per_layer_async(self, minibatch):
506507
def _compact_per_layer_wait_future(minibatch):
507508
subgraph = minibatch.sampled_subgraphs[0]
508509
seeds = minibatch._seed_nodes
509-
original_row_node_ids, compacted_csc_format = minibatch._future.wait()
510+
(
511+
original_row_node_ids,
512+
compacted_csc_format,
513+
_,
514+
) = minibatch._future.wait()
510515
delattr(minibatch, "_future")
511516
subgraph = SampledSubgraphImpl(
512517
sampled_csc=compacted_csc_format,

0 commit comments

Comments
 (0)