From 155f2f902bbc16252a719b280dc4979ec7bff3db Mon Sep 17 00:00:00 2001 From: Jake Lishman Date: Tue, 5 Sep 2023 13:08:25 +0100 Subject: [PATCH 1/2] Use `SmallVec` in `NeighborTable` for cache locality A reasonable chunk of our time in Sabre is spent reading through the `NeighborTable` to find the candidate swaps for a given layout. Most coupling maps that we care about have a relatively low number of edges between qubits, yet we needed to redirect to the heap for each individual physical-qubit lookup currently. This switches from using a `Vec` (which is always a fat pointer to heap memory) to `SmallVec` with an inline buffer space of four qubits. With the qubit type being `u32`, the `SmallVec` now takes up the same stack size as a `Vec` but can store (usually) all the swaps directly inline in the outer `Vec` of qubits. This means that most lookups of the available swaps are looking in the same (up to relatively small offsets) in memory, which makes the access patterns much easier for prefetching to optimise for. --- Cargo.lock | 1 + crates/accelerate/Cargo.toml | 4 ++ .../src/sabre_swap/neighbor_table.rs | 45 +++++++++++-------- 3 files changed, 32 insertions(+), 18 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 97ca32bae717..78421193ca79 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -442,6 +442,7 @@ dependencies = [ "rand_pcg", "rayon", "rustworkx-core", + "smallvec", ] [[package]] diff --git a/crates/accelerate/Cargo.toml b/crates/accelerate/Cargo.toml index a0b59e6460de..a5db75b52102 100644 --- a/crates/accelerate/Cargo.toml +++ b/crates/accelerate/Cargo.toml @@ -25,6 +25,10 @@ num-complex = "0.4" num-bigint = "0.4" rustworkx-core = "0.13" +[dependencies.smallvec] +version = "1.11" +features = ["union"] + [dependencies.pyo3] workspace = true features = ["hashbrown", "indexmap", "num-complex", "num-bigint"] diff --git a/crates/accelerate/src/sabre_swap/neighbor_table.rs b/crates/accelerate/src/sabre_swap/neighbor_table.rs index 6cb44536dc0e..56ababd43ed9 100644 --- a/crates/accelerate/src/sabre_swap/neighbor_table.rs +++ b/crates/accelerate/src/sabre_swap/neighbor_table.rs @@ -16,6 +16,7 @@ use numpy::PyReadonlyArray2; use pyo3::prelude::*; use rayon::prelude::*; use rustworkx_core::petgraph::prelude::*; +use smallvec::SmallVec; use crate::nlayout::PhysicalQubit; @@ -32,7 +33,11 @@ use crate::nlayout::PhysicalQubit; #[pyclass(module = "qiskit._accelerate.sabre_swap")] #[derive(Clone, Debug)] pub struct NeighborTable { - neighbors: Vec>, + // The choice of 4 `PhysicalQubit`s in the stack-allocated region is because a) this causes the + // `SmallVec` to be the same width as a `Vec` on 64-bit systems (three machine words == 24 + // bytes); b) the majority of coupling maps we're likely to encounter have a degree of 3 (heavy + // hex) or 4 (grid / heavy square). + neighbors: Vec>, } impl NeighborTable { @@ -63,21 +68,22 @@ impl NeighborTable { let neighbors = match adjacency_matrix { Some(adjacency_matrix) => { let adj_mat = adjacency_matrix.as_array(); - let build_neighbors = |row: ArrayView1| -> PyResult> { - row.iter() - .enumerate() - .filter_map(|(row_index, value)| { - if *value == 0. { - None - } else { - Some(match row_index.try_into() { - Ok(index) => Ok(PhysicalQubit::new(index)), - Err(err) => Err(err.into()), - }) - } - }) - .collect() - }; + let build_neighbors = + |row: ArrayView1| -> PyResult> { + row.iter() + .enumerate() + .filter_map(|(row_index, value)| { + if *value == 0. { + None + } else { + Some(match row_index.try_into() { + Ok(index) => Ok(PhysicalQubit::new(index)), + Err(err) => Err(err.into()), + }) + } + }) + .collect() + }; if run_in_parallel { adj_mat .axis_iter(Axis(0)) @@ -97,10 +103,13 @@ impl NeighborTable { } fn __getstate__(&self) -> Vec> { - self.neighbors.clone() + self.neighbors + .iter() + .map(|v| v.iter().copied().collect()) + .collect() } fn __setstate__(&mut self, state: Vec>) { - self.neighbors = state + self.neighbors = state.into_iter().map(|v| v.into()).collect() } } From 5f313e22d645f715ddae2d702259e84fb1d41cd4 Mon Sep 17 00:00:00 2001 From: Jake Lishman Date: Wed, 6 Sep 2023 17:30:20 +0100 Subject: [PATCH 2/2] Pickle via `PyList` instead of duplicate conversion `SmallVec` doesn't have implementations of the PyO3 conversion trait, so it needs to be done manually. The previous state used to convert to a Rust-space `Vec` that then needed to have its data moved from the Python heap to the Rust heap. This instead changes the conversions to interact directly with Python lists, rather than using intermediary structures. --- .../src/sabre_swap/neighbor_table.rs | 27 ++++++++++++++----- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/crates/accelerate/src/sabre_swap/neighbor_table.rs b/crates/accelerate/src/sabre_swap/neighbor_table.rs index 56ababd43ed9..577466b514db 100644 --- a/crates/accelerate/src/sabre_swap/neighbor_table.rs +++ b/crates/accelerate/src/sabre_swap/neighbor_table.rs @@ -14,6 +14,7 @@ use crate::getenv_use_multiple_threads; use ndarray::prelude::*; use numpy::PyReadonlyArray2; use pyo3::prelude::*; +use pyo3::types::PyList; use rayon::prelude::*; use rustworkx_core::petgraph::prelude::*; use smallvec::SmallVec; @@ -102,14 +103,26 @@ impl NeighborTable { Ok(NeighborTable { neighbors }) } - fn __getstate__(&self) -> Vec> { - self.neighbors - .iter() - .map(|v| v.iter().copied().collect()) - .collect() + fn __getstate__(&self, py: Python<'_>) -> Py { + PyList::new( + py, + self.neighbors + .iter() + .map(|v| PyList::new(py, v.iter()).to_object(py)), + ) + .into() } - fn __setstate__(&mut self, state: Vec>) { - self.neighbors = state.into_iter().map(|v| v.into()).collect() + fn __setstate__(&mut self, state: &PyList) -> PyResult<()> { + self.neighbors = state + .iter() + .map(|v| { + v.downcast::()? + .iter() + .map(PyAny::extract) + .collect::>() + }) + .collect::>()?; + Ok(()) } }