Skip to content

Commit

Permalink
Avoid use of Plane::p in Plane::downsample
Browse files Browse the repository at this point in the history
Plane::p accesses pixels in a fashion that the compiler
cannot assume to be linear. This is harmful to both
compiler optimization and cache linearity.

Although we cannot completely eliminate this problem
since two rows must be accessed at a time for downsampling,
we can alleviate it by accessing the data slice directly.

Improves performance of av-scenechange by 15%.
Did not benchmark rav1e but presumably helps by 1-2% there also.
  • Loading branch information
shssoichiro committed Sep 20, 2020
1 parent f55d508 commit edfc865
Showing 1 changed file with 15 additions and 13 deletions.
28 changes: 15 additions & 13 deletions v_frame/src/plane.rs
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,6 @@ impl<T: Pixel> Plane<T> {
}
}

#[allow(clippy::needless_range_loop)]
pub fn downsampled(
&self, frame_width: usize, frame_height: usize,
) -> Plane<T> {
Expand All @@ -443,23 +442,26 @@ impl<T: Pixel> Plane<T> {

let width = new.cfg.width;
let height = new.cfg.height;
let xorigin = new.cfg.xorigin;
let yorigin = new.cfg.yorigin;
let stride = new.cfg.stride;

assert!(width * 2 <= src.cfg.stride - src.cfg.xorigin);
assert!(height * 2 <= src.cfg.alloc_height - src.cfg.yorigin);

for row in 0..height {
let base = (yorigin + row) * stride + xorigin;
let dst = &mut new.data[base..base + width];

for (col, dst) in dst.iter_mut().enumerate() {
let data_origin = src.data_origin();
for (row_idx, dst_row) in new
.mut_slice(PlaneOffset::default())
.rows_iter_mut()
.enumerate()
.take(height)
{
let src_top_row = &data_origin[(src.cfg.stride * row_idx * 2)..];
let src_bottom_row =
&data_origin[(src.cfg.stride * (row_idx * 2 + 1))..];
for (col, dst) in dst_row.iter_mut().take(width).enumerate() {
let mut sum = 0;
sum += u32::cast_from(src.p(2 * col, 2 * row));
sum += u32::cast_from(src.p(2 * col + 1, 2 * row));
sum += u32::cast_from(src.p(2 * col, 2 * row + 1));
sum += u32::cast_from(src.p(2 * col + 1, 2 * row + 1));
sum += u32::cast_from(src_top_row[2 * col]);
sum += u32::cast_from(src_top_row[2 * col + 1]);
sum += u32::cast_from(src_bottom_row[2 * col]);
sum += u32::cast_from(src_bottom_row[2 * col + 1]);
let avg = (sum + 2) >> 2;
*dst = T::cast_from(avg);
}
Expand Down

0 comments on commit edfc865

Please sign in to comment.