Skip to content
Open
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
0fbfe21
add basic cpu rotation kernel
ryan-berger Oct 15, 2025
9b26511
make code more straight line for better vectorization
ryan-berger Oct 15, 2025
b174811
commit for test
ryan-berger Oct 15, 2025
a48c2fb
add a bunch more documentation
ryan-berger Oct 15, 2025
2b2722f
clean up lints, speed up????
ryan-berger Oct 19, 2025
0f61852
revert rotation kernel from clippy-fied rotation due to bugs
ryan-berger Oct 19, 2025
89173a7
add lots of documentation, do very efficient simd calculation doublin…
ryan-berger Oct 20, 2025
979c243
fix docs
ryan-berger Oct 20, 2025
bfceed3
fix small vector bug
ryan-berger Oct 20, 2025
6753c71
fix all lints
ryan-berger Oct 21, 2025
00fb982
use nightly channel
ryan-berger Oct 21, 2025
51b5774
install nightly
ryan-berger Oct 21, 2025
dfebb59
install nightly
ryan-berger Oct 21, 2025
0198d59
add clippy component
ryan-berger Oct 21, 2025
f046bfa
add cargo fmt
ryan-berger Oct 21, 2025
d4b162d
add nightly and regular version
ryan-berger Oct 21, 2025
efa779f
fmt, add components to toolchain file
ryan-berger Oct 21, 2025
1105858
redo toolchain actions
ryan-berger Oct 21, 2025
36050c3
fix expect
ryan-berger Oct 21, 2025
115cc36
add rustfmt
ryan-berger Oct 21, 2025
2e16cb3
improve perf by getting rid of prefix max buffer
ryan-berger Oct 24, 2025
8d77224
Revert "improve perf by getting rid of prefix max buffer"
ryan-berger Oct 24, 2025
59726a6
add correct kernel
ryan-berger Oct 25, 2025
7172491
add cargo config for x86, crazy dependent types/trait specialization …
ryan-berger Oct 28, 2025
b5a4a45
use 4-wide, 16-unrolled
ryan-berger Oct 28, 2025
4868ad2
refactor to foldable method
ryan-berger Oct 30, 2025
81438b5
Changes to at least compile on my M1 (#13)
tombh Oct 30, 2025
8a26730
Merge branch 'main' into cpu-clean
ryan-berger Oct 30, 2025
1a14cf9
add lots of documentation, clean up lints
ryan-berger Oct 30, 2025
4726c16
cargo fmt
ryan-berger Oct 30, 2025
bcfd18c
remove unused packages
ryan-berger Oct 30, 2025
0f18b25
try avx512 kernel implementation
ryan-berger Nov 1, 2025
768bb7e
add longest line of sight heatmap
ryan-berger Nov 9, 2025
0d59d32
WIP: use rayon/mutex for accumulation, basic bitmap code
ryan-berger Nov 12, 2025
b60677e
thread sector data option through full kernel
ryan-berger Nov 26, 2025
4e296c8
fix lots of lints
ryan-berger Nov 28, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@ jobs:

steps:
- uses: actions/checkout@v4
- run: rustup toolchain install $RUST_VERSION --profile minimal
- run: |
rustup toolchain add nightly-2025-10-20 --profile minimal
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now that we have a rust-toolchain.toml it's enough just to have rustup toolchain install --profile minimal here and it automatically gets the version from the file. And we don't need the stable toolchain either right?

rustup toolchain install $RUST_VERSION --profile minimal
- uses: Swatinem/rust-cache@v2
with:
cache-on-failure: true
Expand All @@ -46,7 +48,9 @@ jobs:

steps:
- uses: actions/checkout@v4
- run: rustup toolchain install $RUST_VERSION --profile minimal
- run: |
rustup toolchain add nightly-2025-10-20 --profile minimal
rustup toolchain install $RUST_VERSION --profile minimal
- uses: Swatinem/rust-cache@v2
with:
cache-on-failure: true
Expand All @@ -68,7 +72,9 @@ jobs:

steps:
- uses: actions/checkout@v4
- run: rustup toolchain install $RUST_VERSION --profile minimal
- run: |
rustup toolchain add nightly-2025-10-20 --profile minimal
rustup toolchain install $RUST_VERSION --profile minimal
- uses: taiki-e/install-action@v2
with:
tool: cargo-shear, cargo-msrv
Expand Down
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,9 @@ non_ascii_literal = "allow"
std_instead_of_alloc = "allow"
std_instead_of_core = "allow"
float_arithmetic = "allow"

# We're not worried about cryptographic code, integer
integer_division_remainder_used = "allow"

# We're not concerned about modulo not computing negative numbers
modulo_arithmetic = "allow"
1 change: 1 addition & 0 deletions crates/total-viewsheds/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ tracing = { version = "0.1.41" }
tracing-subscriber = { version = "0.3.19", features = ["env-filter"] }
wgpu = { version = "27", default-features = false, features = ["spirv", "vulkan"] }
radsort = "0.1.1"
itertools = "0.11.0"

[dev-dependencies]
googletest = "0.14.2"
Expand Down
38 changes: 33 additions & 5 deletions crates/total-viewsheds/src/compute.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
//! The main entrypoint for running computations.

use crate::cpu;
use color_eyre::{eyre::Ok, Result};

/// Handles all the computations.
Expand Down Expand Up @@ -30,6 +31,11 @@ pub struct Compute<'compute> {
pub longest_lines: Vec<f32>,
}

/// `NUM_CORES` is the physical number of cores on a machine. Currently hardcoded to 8
/// as that is what an i9900k has, and is a common configuration.
/// TODO find a good syscall for this
const NUM_CORES: usize = 8;

impl<'compute> Compute<'compute> {
/// Instantiate.
pub fn new(
Expand Down Expand Up @@ -76,10 +82,6 @@ impl<'compute> Compute<'compute> {
..Default::default()
};

#[expect(
clippy::if_then_some_else_none,
reason = "The `?` is hard to use in the closure"
)]
let vulkan = if matches!(backend, crate::config::Backend::Vulkan) {
let elevations = dem.elevations.clone();
dem.elevations = Vec::new(); // Free up some RAM.
Expand Down Expand Up @@ -189,6 +191,32 @@ impl<'compute> Compute<'compute> {
Vec::new()
};

if matches!(self.backend, crate::config::Backend::CPU) {
#[expect(
clippy::as_conversions,
clippy::cast_possible_truncation,
reason = "elevations start out as i16s, and i16 -> f32 -> i16 is lossless"
)]
let elevations = self
.dem
.elevations
.iter()
.map(|&x| x as i16)
.collect::<Vec<i16>>();

#[expect(clippy::as_conversions, reason = "u32 -> usize is valid")]
let surfaces = cpu::multithreaded_kernel(
&elevations,
self.dem.max_los_as_points as usize,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can actually just do: usize::try_from(self.dem.max_los_as_points)? here and no need for the lint exception.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, can do that

360,
NUM_CORES,
);

self.add_sector_surfaces_to_running_total(&surfaces);
self.render_total_surfaces()?;
return Ok(());
}

for angle in 0..crate::axes::SECTOR_STEPS {
self.load_or_compute_cache(angle)?;
let mut sector_ring_data = vec![0; self.total_reserved_rings];
Expand Down Expand Up @@ -537,7 +565,7 @@ pub mod test {
compute.total_surfaces,
[
2687.689, 2546.9956, 2622.3494,
2564.7678, 3231.647, 2239.714,
2564.7678, 3231.647, 2239.714,
2604.2551, 2186.5012, 1768.3433
]
);
Expand Down
Loading
Loading