Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix playback of HDR AV1 videos in native viewer #7978

Merged
merged 5 commits into from
Nov 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
189 changes: 103 additions & 86 deletions crates/store/re_video/src/decode/av1.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,8 @@ impl SyncDav1dDecoder {
};
match picture {
Ok(picture) => {
output_picture(&self.debug_name, &picture, on_output);
let frame = create_frame(&self.debug_name, &picture);
on_output(frame);
count += 1;
}
Err(dav1d::Error::Again) => {
Expand All @@ -121,98 +122,115 @@ impl SyncDav1dDecoder {
}
}

fn output_picture(
debug_name: &str,
picture: &dav1d::Picture,
on_output: &(dyn Fn(Result<Frame>) + Send + Sync),
) {
let data = {
re_tracing::profile_scope!("copy_picture_data");

match picture.pixel_layout() {
PixelLayout::I400 => picture.plane(PlanarImageComponent::Y).to_vec(),
PixelLayout::I420 | PixelLayout::I422 | PixelLayout::I444 => {
// TODO(#7594): If `picture.bit_depth()` isn't 8 we have a problem:
// We can't handle high bit depths yet and the YUV converter at the other side
// bases its opinion on what an acceptable number of incoming bytes is on this.
// So we just clamp to that expectation, ignoring `picture.stride(PlanarImageComponent::Y)` & friends.
// Note that `bit_depth` is either 8 or 16, which is semi-independent `bits_per_component` (which is None/8/10/12).
if picture.bit_depth() != 8 {
re_log::warn_once!(
"Video {debug_name:?} uses {} bis per component. Only a bit depth of 8 bit is currently unsupported.",
picture.bits_per_component().map_or(picture.bit_depth(), |bpc| bpc.0)
);
}
fn create_frame(debug_name: &str, picture: &dav1d::Picture) -> Result<Frame> {
re_tracing::profile_function!();

let bits_per_component = picture
.bits_per_component()
.map_or(picture.bit_depth(), |bpc| bpc.0);

let bytes_per_component = if bits_per_component == 8 {
1
} else if 8 < bits_per_component && bits_per_component <= 16 {
// TODO(#7594): Support HDR video.
// We currently handle HDR videos by throwing away the lowest bits,
// and doing so rather slowly on CPU. It works, but the colors won't be perfectly correct.
re_log::warn_once!(
"{debug_name:?} is a High-Dynamic-Range (HDR) video with {bits_per_component} bits per component. Rerun does not support this fully. Color accuracy and performance may suffer.",
);
// Note that `bit_depth` is either 8 or 16, which is semi-independent `bits_per_component` (which is None/8/10/12).
2
} else {
return Err(Error::BadBitsPerComponent(bits_per_component));
};

let mut data = match picture.pixel_layout() {
// Monochrome
PixelLayout::I400 => picture.plane(PlanarImageComponent::Y).to_vec(),

PixelLayout::I420 | PixelLayout::I422 | PixelLayout::I444 => {
let height_y = picture.height() as usize;
let height_uv = match picture.pixel_layout() {
PixelLayout::I400 => 0,
PixelLayout::I420 => height_y / 2,
PixelLayout::I422 | PixelLayout::I444 => height_y,
};

let packed_stride_y = bytes_per_component * picture.width() as usize;
let actual_stride_y = picture.stride(PlanarImageComponent::Y) as usize;

let height_y = picture.height() as usize;
let height_uv = match picture.pixel_layout() {
PixelLayout::I400 => 0,
PixelLayout::I420 => height_y / 2,
PixelLayout::I422 | PixelLayout::I444 => height_y,
};

let packed_stride_y = picture.width() as usize;
let actual_stride_y = picture.stride(PlanarImageComponent::Y) as usize;

let packed_stride_uv = match picture.pixel_layout() {
PixelLayout::I400 => 0,
PixelLayout::I420 | PixelLayout::I422 => packed_stride_y / 2,
PixelLayout::I444 => packed_stride_y,
};
let actual_stride_uv = picture.stride(PlanarImageComponent::U) as usize; // U / V stride is always the same.

let num_packed_bytes_y = packed_stride_y * height_y;
let num_packed_bytes_uv = packed_stride_uv * height_uv;

if actual_stride_y == packed_stride_y && actual_stride_uv == packed_stride_uv {
// Best case scenario: There's no additional strides at all, so we can just copy the data directly.
// TODO(andreas): This still has *significant* overhead for 8k video. Can we take ownership of the data instead without a copy?
re_tracing::profile_scope!("fast path");
let plane_y = &picture.plane(PlanarImageComponent::Y)[0..num_packed_bytes_y];
let plane_u = &picture.plane(PlanarImageComponent::U)[0..num_packed_bytes_uv];
let plane_v = &picture.plane(PlanarImageComponent::V)[0..num_packed_bytes_uv];
[plane_y, plane_u, plane_v].concat()
} else {
// At least either y or u/v have strides.
//
// We could make our image ingestion pipeline even more sophisticated and pass that stride information through.
// But given that this is a matter of replacing a single large memcpy with a few hundred _still_ quite large ones,
// this should not make a lot of difference (citation needed!).

let mut data = Vec::with_capacity(num_packed_bytes_y + num_packed_bytes_uv * 2);
{
let plane = picture.plane(PlanarImageComponent::Y);
if packed_stride_y == actual_stride_y {
data.extend_from_slice(&plane[0..num_packed_bytes_y]);
} else {
re_tracing::profile_scope!("slow path, y-plane");

for y in 0..height_y {
let offset = y * actual_stride_y;
data.extend_from_slice(&plane[offset..(offset + packed_stride_y)]);
}
let packed_stride_uv = match picture.pixel_layout() {
PixelLayout::I400 => 0,
PixelLayout::I420 | PixelLayout::I422 => packed_stride_y / 2,
PixelLayout::I444 => packed_stride_y,
};
let actual_stride_uv = picture.stride(PlanarImageComponent::U) as usize; // U / V stride is always the same.

let num_packed_bytes_y = packed_stride_y * height_y;
let num_packed_bytes_uv = packed_stride_uv * height_uv;

if actual_stride_y == packed_stride_y && actual_stride_uv == packed_stride_uv {
// Best case scenario: There's no additional strides at all, so we can just copy the data directly.
// TODO(andreas): This still has *significant* overhead for 8k video. Can we take ownership of the data instead without a copy?
re_tracing::profile_scope!("fast path");
let plane_y = &picture.plane(PlanarImageComponent::Y)[0..num_packed_bytes_y];
let plane_u = &picture.plane(PlanarImageComponent::U)[0..num_packed_bytes_uv];
let plane_v = &picture.plane(PlanarImageComponent::V)[0..num_packed_bytes_uv];
[plane_y, plane_u, plane_v].concat()
} else {
// At least either y or u/v have strides.
//
// We could make our image ingestion pipeline even more sophisticated and pass that stride information through.
// But given that this is a matter of replacing a single large memcpy with a few hundred _still_ quite large ones,
// this should not make a lot of difference (citation needed!).

let mut data = Vec::with_capacity(num_packed_bytes_y + num_packed_bytes_uv * 2);
{
let plane = picture.plane(PlanarImageComponent::Y);
if packed_stride_y == actual_stride_y {
data.extend_from_slice(&plane[0..num_packed_bytes_y]);
} else {
re_tracing::profile_scope!("slow path, y-plane");

for y in 0..height_y {
let offset = y * actual_stride_y;
data.extend_from_slice(&plane[offset..(offset + packed_stride_y)]);
}
}
for comp in [PlanarImageComponent::U, PlanarImageComponent::V] {
let plane = picture.plane(comp);
if actual_stride_uv == packed_stride_uv {
data.extend_from_slice(&plane[0..num_packed_bytes_uv]);
} else {
re_tracing::profile_scope!("slow path, u/v-plane");

for y in 0..height_uv {
let offset = y * actual_stride_uv;
data.extend_from_slice(&plane[offset..(offset + packed_stride_uv)]);
}
}
for comp in [PlanarImageComponent::U, PlanarImageComponent::V] {
let plane = picture.plane(comp);
if actual_stride_uv == packed_stride_uv {
data.extend_from_slice(&plane[0..num_packed_bytes_uv]);
} else {
re_tracing::profile_scope!("slow path, u/v-plane");

for y in 0..height_uv {
let offset = y * actual_stride_uv;
data.extend_from_slice(&plane[offset..(offset + packed_stride_uv)]);
}
}

data
}

data
}
}
};

if bytes_per_component == 2 {
re_tracing::profile_scope!("Truncate HDR"); // costs around 1.5ms per megapixel on MacBook Pro M3 Max
let rshift = bits_per_component - 8; // we throw away the low bits
data = data
.chunks(2)
.map(|c| {
let lo = c[0] as u16;
let hi = c[1] as u16;
let full = (hi << 8) | lo;
(full >> rshift) as u8
})
.collect();
}

let format = PixelFormat::Yuv {
layout: match picture.pixel_layout() {
PixelLayout::I400 => YuvPixelLayout::Y400,
Expand All @@ -227,7 +245,7 @@ fn output_picture(
coefficients: yuv_matrix_coefficients(debug_name, picture),
};

let frame = Frame {
Ok(Frame {
content: FrameContent {
data,
width: picture.width(),
Expand All @@ -238,8 +256,7 @@ fn output_picture(
presentation_timestamp: Time(picture.timestamp().unwrap_or(0)),
duration: Time(picture.duration()),
},
};
on_output(Ok(frame));
})
}

fn yuv_matrix_coefficients(debug_name: &str, picture: &dav1d::Picture) -> YuvMatrixCoefficients {
Expand Down
19 changes: 11 additions & 8 deletions crates/store/re_video/src/decode/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,10 @@ pub enum Error {

#[cfg(target_arch = "wasm32")]
#[error(transparent)]
WebDecoderError(#[from] webcodecs::Error),
WebDecoder(#[from] webcodecs::Error),

#[error("Unsupported bits per component: {0}")]
BadBitsPerComponent(usize),
}

pub type Result<T = (), E = Error> = std::result::Result<T, E>;
Expand Down Expand Up @@ -167,14 +170,14 @@ pub fn new_decoder(
{
if cfg!(debug_assertions) {
return Err(Error::NoNativeAv1Debug); // because debug builds of rav1d is EXTREMELY slow
} else {
re_log::trace!("Decoding AV1…");
return Ok(Box::new(async_decoder_wrapper::AsyncDecoderWrapper::new(
debug_name.to_owned(),
Box::new(av1::SyncDav1dDecoder::new(debug_name.to_owned())?),
on_output,
)));
}

re_log::trace!("Decoding AV1…");
return Ok(Box::new(async_decoder_wrapper::AsyncDecoderWrapper::new(
debug_name.to_owned(),
Box::new(av1::SyncDav1dDecoder::new(debug_name.to_owned())?),
on_output,
)));
}
}

Expand Down
2 changes: 1 addition & 1 deletion crates/store/re_video/src/decode/webcodecs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ fn init_video_decoder(
};

let on_error = Closure::wrap(Box::new(move |err: js_sys::Error| {
on_output_callback(Err(super::Error::WebDecoderError(Error::Decoding(
on_output_callback(Err(super::Error::WebDecoder(Error::Decoding(
js_error_to_string(&err),
))));
}) as Box<dyn Fn(js_sys::Error)>);
Expand Down
25 changes: 17 additions & 8 deletions crates/viewer/re_data_ui/src/video.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,23 @@ pub fn show_video_blob_info(
)),
);
if let Some(bit_depth) = data.config.stsd.contents.bit_depth() {
let mut bit_depth = bit_depth.to_string();
if data.is_monochrome() == Some(true) {
bit_depth = format!("{bit_depth} (monochrome)");
}

ui.list_item_flat_noninteractive(
PropertyContent::new("Bit depth").value_text(bit_depth),
);
ui.list_item_flat_noninteractive(PropertyContent::new("Bit depth").value_fn(
|ui, _| {
ui.label(bit_depth.to_string());
if 8 < bit_depth {
// TODO(#7594): HDR videos
ui.warning_label("HDR").on_hover_ui(|ui| {
ui.label(
"High-dynamic-range videos not yet supported by Rerun",
);
ui.hyperlink("https://github.com/rerun-io/rerun/issues/7594");
});
}
if data.is_monochrome() == Some(true) {
ui.label("(monochrome)");
}
},
));
}
if let Some(subsampling_mode) = data.subsampling_mode() {
// Don't show subsampling mode for monochrome, doesn't make sense usually.
Expand Down
6 changes: 6 additions & 0 deletions crates/viewer/re_ui/src/ui_ext.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@ pub trait UiExt {
fn ui(&self) -> &egui::Ui;
fn ui_mut(&mut self) -> &mut egui::Ui;

/// Shows a warning label.
fn warning_label(&mut self, warning_text: &str) -> egui::Response {
let label = egui::Label::new(self.ui().ctx().warning_text(warning_text));
self.ui_mut().add(label)
}

/// Shows a small error label with the given text on hover and copies the text to the clipboard on click.
fn error_label(&mut self, error_text: &str) -> egui::Response {
let label = egui::Label::new(self.ui().ctx().error_text("Error"))
Expand Down
2 changes: 1 addition & 1 deletion crates/viewer/re_viewer/src/web_tools.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ pub fn url_to_receiver(
re_rrdp_comms::stream_recording(url, Some(ui_waker)).map_err(|err| err.into())
}
#[cfg(not(feature = "rrdp"))]
EndpointCategory::DataPlatform(url) => {
EndpointCategory::DataPlatform(_url) => {
anyhow::bail!("Missing 'rrdp' feature flag");
}
EndpointCategory::WebEventListener(url) => {
Expand Down
Loading