diff --git a/Cargo.lock b/Cargo.lock index da3f471414..40d0471572 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -269,6 +269,7 @@ dependencies = [ "num-integer", "num-traits", "rawpointer", + "rayon", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 2711be5807..0142e56f33 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,7 +20,6 @@ crate-type = ["cdylib"] petgraph = "0.5.1" fixedbitset = "0.2.0" numpy = "0.12.1" -ndarray = "0.13.0" rand = "0.7" rand_pcg = "0.2" rayon = "1.5" @@ -32,3 +31,7 @@ features = ["extension-module", "hashbrown"] [dependencies.hashbrown] version = "0.9" features = ["rayon"] + +[dependencies.ndarray] +version = "0.13.0" +features = ["rayon"] diff --git a/docs/source/api.rst b/docs/source/api.rst index 9a4bda0f68..d5b7ad92bc 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -57,6 +57,8 @@ Algorithm Functions retworkx.descendants retworkx.ancestors retworkx.lexicographical_topological_sort + retworkx.graph_distance_matrix + retworkx.digraph_distance_matrix retworkx.floyd_warshall retworkx.graph_floyd_warshall_numpy retworkx.digraph_floyd_warshall_numpy diff --git a/src/lib.rs b/src/lib.rs index b29545ef75..50ec1e2a7f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1010,6 +1010,163 @@ fn layers( Ok(PyList::new(py, output).into()) } +/// Get the distance matrix for a directed graph +/// +/// This differs from functions like digraph_floyd_warshall_numpy in that the +/// edge weight/data payload is not used and each edge is treated as a +/// distance of 1. +/// +/// This function is also multithreaded and will run in parallel if the number +/// of nodes in the graph is above the value of ``parallel_threshold`` (it +/// defaults to 300). If the function will be running in parallel the env var +/// ``RAYON_NUM_THREADS`` can be used to adjust how many threads will be used. +/// +/// :param PyDiGraph graph: The graph to get the distance matrix for +/// :param int parallel_threshold: The number of nodes to calculate the +/// the distance matrix in parallel at. It defaults to 300, but this can +/// be tuned +/// :param bool as_undirected: If set to ``True`` the input directed graph +/// will be treat as if each edge was bidirectional/undirected in the +/// output distance matrix. +/// +/// :returns: The distance matrix +/// :rtype: numpy.ndarray +#[pyfunction(parallel_threshold = "300", as_undirected = "false")] +#[text_signature = "(graph, /, parallel_threshold=300, as_undirected=False)"] +pub fn digraph_distance_matrix( + py: Python, + graph: &digraph::PyDiGraph, + parallel_threshold: usize, + as_undirected: bool, +) -> PyResult { + let n = graph.node_count(); + let mut matrix = Array2::::zeros((n, n)); + let bfs_traversal = |index: usize, mut row: ArrayViewMut1| { + let mut seen: HashMap = HashMap::new(); + let start_index = NodeIndex::new(index); + let mut level = 0; + let mut next_level: HashSet = HashSet::new(); + next_level.insert(start_index); + while !next_level.is_empty() { + let this_level = next_level; + next_level = HashSet::new(); + let mut found: Vec = Vec::new(); + for v in this_level { + if !seen.contains_key(&v) { + seen.insert(v, level); + found.push(v); + row[[v.index()]] = level as f64; + } + } + if seen.len() == n { + return; + } + for node in found { + for v in graph + .graph + .neighbors_directed(node, petgraph::Direction::Outgoing) + { + next_level.insert(v); + } + if as_undirected { + for v in graph + .graph + .neighbors_directed(node, petgraph::Direction::Incoming) + { + next_level.insert(v); + } + } + } + level += 1 + } + }; + if n < parallel_threshold { + matrix + .axis_iter_mut(Axis(0)) + .enumerate() + .for_each(|(index, row)| bfs_traversal(index, row)); + } else { + // Parallelize by row and iterate from each row index in BFS order + matrix + .axis_iter_mut(Axis(0)) + .into_par_iter() + .enumerate() + .for_each(|(index, row)| bfs_traversal(index, row)); + } + Ok(matrix.into_pyarray(py).into()) +} + +/// Get the distance matrix for an undirected graph +/// +/// This differs from functions like digraph_floyd_warshall_numpy in that the +/// edge weight/data payload is not used and each edge is treated as a +/// distance of 1. +/// +/// This function is also multithreaded and will run in parallel if the number +/// of nodes in the graph is above the value of ``paralllel_threshold`` (it +/// defaults to 300). If the function will be running in parallel the env var +/// ``RAYON_NUM_THREADS`` can be used to adjust how many threads will be used. +/// +/// :param PyGraph graph: The graph to get the distance matrix for +/// :param int parallel_threshold: The number of nodes to calculate the +/// the distance matrix in parallel at. It defaults to 300, but this can +/// be tuned +/// +/// :returns: The distance matrix +/// :rtype: numpy.ndarray +#[pyfunction(parallel_threshold = "300")] +#[text_signature = "(graph, /, parallel_threshold=300)"] +pub fn graph_distance_matrix( + py: Python, + graph: &graph::PyGraph, + parallel_threshold: usize, +) -> PyResult { + let n = graph.node_count(); + let mut matrix = Array2::::zeros((n, n)); + let bfs_traversal = |index: usize, mut row: ArrayViewMut1| { + let mut seen: HashMap = HashMap::new(); + let start_index = NodeIndex::new(index); + let mut level = 0; + let mut next_level: HashSet = HashSet::new(); + next_level.insert(start_index); + while !next_level.is_empty() { + let this_level = next_level; + next_level = HashSet::new(); + let mut found: Vec = Vec::new(); + for v in this_level { + if !seen.contains_key(&v) { + seen.insert(v, level); + found.push(v); + row[[v.index()]] = level as f64; + } + } + if seen.len() == n { + return; + } + for node in found { + for v in graph.graph.neighbors(node) { + next_level.insert(v); + } + } + level += 1 + } + }; + if n < parallel_threshold { + matrix + .axis_iter_mut(Axis(0)) + .enumerate() + .for_each(|(index, row)| bfs_traversal(index, row)); + } else { + // Parallelize by row and iterate from each row index in BFS order + matrix + .axis_iter_mut(Axis(0)) + .into_par_iter() + .enumerate() + .for_each(|(index, row)| bfs_traversal(index, row)); + } + Ok(matrix.into_pyarray(py).into()) +} + fn weight_callable( py: Python, weight_fn: &Option, @@ -2071,6 +2228,8 @@ fn retworkx(py: Python<'_>, m: &PyModule) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(graph_floyd_warshall_numpy))?; m.add_wrapped(wrap_pyfunction!(digraph_floyd_warshall_numpy))?; m.add_wrapped(wrap_pyfunction!(layers))?; + m.add_wrapped(wrap_pyfunction!(graph_distance_matrix))?; + m.add_wrapped(wrap_pyfunction!(digraph_distance_matrix))?; m.add_wrapped(wrap_pyfunction!(digraph_adjacency_matrix))?; m.add_wrapped(wrap_pyfunction!(graph_adjacency_matrix))?; m.add_wrapped(wrap_pyfunction!(graph_all_simple_paths))?; diff --git a/tests/test_dist_matrix.py b/tests/test_dist_matrix.py new file mode 100644 index 0000000000..a29a17c86a --- /dev/null +++ b/tests/test_dist_matrix.py @@ -0,0 +1,111 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import unittest + +import numpy + +import retworkx + + +class TestDistanceMatrix(unittest.TestCase): + + def test_graph_distance_matrix(self): + graph = retworkx.PyGraph() + graph.add_nodes_from(list(range(7))) + graph.add_edges_from_no_data( + [(0, 1), (0, 6), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6)]) + dist = retworkx.graph_distance_matrix(graph) + expected = numpy.array([[0., 1., 2., 3., 3., 2., 1.], + [1., 0., 1., 2., 3., 3., 2.], + [2., 1., 0., 1., 2., 3., 3.], + [3., 2., 1., 0., 1., 2., 3.], + [3., 3., 2., 1., 0., 1., 2.], + [2., 3., 3., 2., 1., 0., 1.], + [1., 2., 3., 3., 2., 1., 0.]]) + self.assertTrue(numpy.array_equal(dist, expected)) + + def test_graph_distance_matrix_parallel(self): + graph = retworkx.PyGraph() + graph.add_nodes_from(list(range(7))) + graph.add_edges_from_no_data( + [(0, 1), (0, 6), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6)]) + dist = retworkx.graph_distance_matrix(graph, parallel_threshold=5) + expected = numpy.array([[0., 1., 2., 3., 3., 2., 1.], + [1., 0., 1., 2., 3., 3., 2.], + [2., 1., 0., 1., 2., 3., 3.], + [3., 2., 1., 0., 1., 2., 3.], + [3., 3., 2., 1., 0., 1., 2.], + [2., 3., 3., 2., 1., 0., 1.], + [1., 2., 3., 3., 2., 1., 0.]]) + self.assertTrue(numpy.array_equal(dist, expected)) + + def test_digraph_distance_matrix(self): + graph = retworkx.PyDiGraph() + graph.add_nodes_from(list(range(7))) + graph.add_edges_from_no_data( + [(0, 1), (0, 6), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6)]) + dist = retworkx.digraph_distance_matrix(graph) + expected = numpy.array([[0., 1., 2., 3., 4., 5., 1.], + [0., 0., 1., 2., 3., 4., 5.], + [0., 0., 0., 1., 2., 3., 4.], + [0., 0., 0., 0., 1., 2., 3.], + [0., 0., 0., 0., 0., 1., 2.], + [0., 0., 0., 0., 0., 0., 1.], + [0., 0., 0., 0., 0., 0., 0.]]) + self.assertTrue(numpy.array_equal(dist, expected)) + + def test_digraph_distance_matrix_parallel(self): + graph = retworkx.PyDiGraph() + graph.add_nodes_from(list(range(7))) + graph.add_edges_from_no_data( + [(0, 1), (0, 6), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6)]) + dist = retworkx.digraph_distance_matrix(graph, parallel_threshold=5) + expected = numpy.array([[0., 1., 2., 3., 4., 5., 1.], + [0., 0., 1., 2., 3., 4., 5.], + [0., 0., 0., 1., 2., 3., 4.], + [0., 0., 0., 0., 1., 2., 3.], + [0., 0., 0., 0., 0., 1., 2.], + [0., 0., 0., 0., 0., 0., 1.], + [0., 0., 0., 0., 0., 0., 0.]]) + self.assertTrue(numpy.array_equal(dist, expected)) + + def test_digraph_distance_matrix_as_undirected(self): + graph = retworkx.PyDiGraph() + graph.add_nodes_from(list(range(7))) + graph.add_edges_from_no_data( + [(0, 1), (0, 6), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6)]) + dist = retworkx.digraph_distance_matrix(graph, as_undirected=True) + expected = numpy.array([[0., 1., 2., 3., 3., 2., 1.], + [1., 0., 1., 2., 3., 3., 2.], + [2., 1., 0., 1., 2., 3., 3.], + [3., 2., 1., 0., 1., 2., 3.], + [3., 3., 2., 1., 0., 1., 2.], + [2., 3., 3., 2., 1., 0., 1.], + [1., 2., 3., 3., 2., 1., 0.]]) + self.assertTrue(numpy.array_equal(dist, expected)) + + def test_digraph_distance_matrix_parallel_as_undirected(self): + graph = retworkx.PyDiGraph() + graph.add_nodes_from(list(range(7))) + graph.add_edges_from_no_data( + [(0, 1), (0, 6), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6)]) + dist = retworkx.digraph_distance_matrix(graph, parallel_threshold=5, + as_undirected=True) + expected = numpy.array([[0., 1., 2., 3., 3., 2., 1.], + [1., 0., 1., 2., 3., 3., 2.], + [2., 1., 0., 1., 2., 3., 3.], + [3., 2., 1., 0., 1., 2., 3.], + [3., 3., 2., 1., 0., 1., 2.], + [2., 3., 3., 2., 1., 0., 1.], + [1., 2., 3., 3., 2., 1., 0.]]) + self.assertTrue(numpy.array_equal(dist, expected))