Skip to content

Commit 5560c1e

Browse files
committed
runtime: use a more efficient swizzling implementation without array_chunks_mut
1 parent f8c0555 commit 5560c1e

File tree

3 files changed

+82
-100
lines changed

3 files changed

+82
-100
lines changed

librashader-runtime/src/array_chunks_mut.rs

-87
This file was deleted.

librashader-runtime/src/image.rs

+82-10
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ pub use image::ImageError;
22
use librashader_common::Size;
33
use std::marker::PhantomData;
44

5-
use crate::array_chunks_mut::ArrayChunksMut;
65
use std::path::Path;
76

87
/// An uncompressed raw image ready to upload to GPU buffers.
@@ -43,20 +42,15 @@ impl PixelFormat for RGBA8 {
4342

4443
impl PixelFormat for BGRA8 {
4544
fn convert(pixels: &mut Vec<u8>) {
46-
assert!(pixels.len() % 4 == 0);
47-
for [r, _g, b, _a] in ArrayChunksMut::new(pixels) {
48-
std::mem::swap(b, r)
49-
}
45+
const BGRA_SWIZZLE: &[usize; 32] = &generate_swizzle([2, 1, 0, 3]);
46+
swizzle_pixels(pixels, BGRA_SWIZZLE);
5047
}
5148
}
5249

5350
impl PixelFormat for ARGB8 {
5451
fn convert(pixels: &mut Vec<u8>) {
55-
assert!(pixels.len() % 4 == 0);
56-
for [r, _g, b, a] in ArrayChunksMut::new(pixels) {
57-
std::mem::swap(r, a); // abgr
58-
std::mem::swap(b, r); // argb
59-
}
52+
const ARGB_SWIZZLE: &[usize; 32] = &generate_swizzle([3, 0, 1, 2]);
53+
swizzle_pixels(pixels, ARGB_SWIZZLE);
6054
}
6155
}
6256

@@ -97,3 +91,81 @@ impl<P: PixelFormat> Image<P> {
9791
})
9892
}
9993
}
94+
95+
fn swizzle_pixels(pixels: &mut Vec<u8>, swizzle: &'static [usize; 32]) {
96+
assert!(pixels.len() % 4 == 0);
97+
let mut chunks = pixels.chunks_exact_mut(32);
98+
99+
// This should vectorize faster than a naive mem swap
100+
for chunk in &mut chunks {
101+
let tmp = swizzle.map(|i| chunk[i]);
102+
chunk.copy_from_slice(&tmp[..])
103+
}
104+
105+
let remainder = chunks.into_remainder();
106+
for chunk in remainder.chunks_exact_mut(4) {
107+
let argb = [chunk[3], chunk[0], chunk[1], chunk[2]];
108+
chunk.copy_from_slice(&argb[..])
109+
}
110+
}
111+
112+
const fn generate_swizzle<const LEN: usize>(swizzle: [usize; 4]) -> [usize; LEN] {
113+
assert!(LEN % 4 == 0, "length of swizzle must be divisible by 4");
114+
let mut out: [usize; LEN] = [0; LEN];
115+
116+
let mut index = 0;
117+
while index < LEN {
118+
let chunk = [index, index + 1, index + 2, index + 3];
119+
out[index + 0] = chunk[swizzle[0]];
120+
out[index + 1] = chunk[swizzle[1]];
121+
out[index + 2] = chunk[swizzle[2]];
122+
out[index + 3] = chunk[swizzle[3]];
123+
124+
index += 4;
125+
}
126+
127+
out
128+
}
129+
130+
#[cfg(test)]
131+
mod test {
132+
use crate::image::generate_swizzle;
133+
134+
#[test]
135+
pub fn generate_normal_swizzle() {
136+
let swizzle = generate_swizzle::<32>([0, 1, 2, 3]);
137+
assert_eq!(
138+
swizzle,
139+
#[rustfmt::skip]
140+
[
141+
0, 1, 2, 3,
142+
4, 5, 6, 7,
143+
8, 9, 10, 11,
144+
12, 13, 14, 15,
145+
16, 17, 18, 19,
146+
20, 21, 22, 23,
147+
24, 25, 26, 27,
148+
28, 29, 30, 31
149+
]
150+
)
151+
}
152+
153+
#[test]
154+
pub fn generate_argb_swizzle() {
155+
let swizzle = generate_swizzle::<32>([3, 0, 1, 2]);
156+
assert_eq!(
157+
swizzle,
158+
#[rustfmt::skip]
159+
[
160+
3, 0, 1, 2,
161+
7, 4, 5, 6,
162+
11, 8, 9, 10,
163+
15, 12, 13, 14,
164+
19, 16, 17, 18,
165+
23, 20, 21, 22,
166+
27, 24, 25, 26,
167+
31, 28, 29, 30
168+
]
169+
)
170+
}
171+
}

librashader-runtime/src/lib.rs

-3
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,3 @@ pub mod render_target;
3636

3737
/// Helpers for handling framebuffers.
3838
pub mod framebuffer;
39-
40-
/// array_chunks_mut polyfill
41-
mod array_chunks_mut;

0 commit comments

Comments
 (0)