Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix some videos having offsetted (incorrect) timestamps #8029

Merged
merged 14 commits into from
Nov 8, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ table VideoFrameReference (
///
/// Note that this uses the closest video frame instead of the latest at this timestamp
/// in order to be more forgiving of rounding errors for inprecise timestamp types.
///
/// Timestamps are relative to the start of the video, i.e. a timestamp of 0 always corresponds to the first frame.
/// This is oftentimes equivalent to presentation timestamps (known as PTS), but in the presence of B-frames
/// (bidirectionally predicted frames) there may be an offset on the first presentation timestamp in the video.
timestamp: rerun.components.VideoTimestamp ("attr.rerun.component_required", required, order: 1000);

// --- Optional ---
Expand Down
4 changes: 4 additions & 0 deletions crates/store/re_types/src/archetypes/video_frame_reference.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 2 additions & 3 deletions crates/store/re_video/src/decode/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -167,8 +167,7 @@ pub fn new_decoder(

#[cfg(target_arch = "wasm32")]
return Ok(Box::new(webcodecs::WebVideoDecoder::new(
video.config.clone(),
video.timescale,
video,
hw_acceleration,
on_output,
)?));
Expand Down Expand Up @@ -282,7 +281,7 @@ impl Default for FrameInfo {

impl FrameInfo {
/// Presentation timestamp range in which this frame is valid.
pub fn time_range(&self) -> std::ops::Range<Time> {
pub fn presentation_time_range(&self) -> std::ops::Range<Time> {
self.presentation_timestamp..self.presentation_timestamp + self.duration
}
}
Expand Down
46 changes: 34 additions & 12 deletions crates/store/re_video/src/decode/webcodecs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use web_sys::{
use super::{
AsyncDecoder, Chunk, DecodeHardwareAcceleration, Frame, FrameInfo, OutputCallback, Result,
};
use crate::{Config, Time, Timescale};
use crate::{Config, Time, Timescale, VideoData};

#[derive(Clone)]
#[repr(transparent)]
Expand All @@ -34,6 +34,7 @@ impl std::ops::Deref for WebVideoFrame {
pub struct WebVideoDecoder {
video_config: Config,
timescale: Timescale,
minimum_presentation_timestamp: Time,
decoder: web_sys::VideoDecoder,
hw_acceleration: DecodeHardwareAcceleration,
on_output: Arc<OutputCallback>,
Expand Down Expand Up @@ -102,17 +103,21 @@ impl Drop for WebVideoDecoder {

impl WebVideoDecoder {
pub fn new(
video_config: Config,
timescale: Timescale,
video: &VideoData,
hw_acceleration: DecodeHardwareAcceleration,
on_output: impl Fn(Result<Frame>) + Send + Sync + 'static,
) -> Result<Self, Error> {
let on_output = Arc::new(on_output);
let decoder = init_video_decoder(on_output.clone(), timescale)?;
let decoder = init_video_decoder(
on_output.clone(),
video.timescale,
video.samples_statistics.minimum_presentation_timestamp,
)?;

Ok(Self {
video_config,
timescale,
video_config: video.config.clone(),
timescale: video.timescale,
minimum_presentation_timestamp: video.samples_statistics.minimum_presentation_timestamp,
decoder,
hw_acceleration,
on_output,
Expand All @@ -133,10 +138,13 @@ impl AsyncDecoder for WebVideoDecoder {
&data,
video_chunk
.presentation_timestamp
.into_micros(self.timescale),
.into_micros_since_start(self.timescale, self.minimum_presentation_timestamp),
type_,
);
web_chunk.set_duration(video_chunk.duration.into_micros(self.timescale));

let duration_millis =
1e-3 * video_chunk.duration.duration(self.timescale).as_nanos() as f64;
web_chunk.set_duration(duration_millis);
let web_chunk = EncodedVideoChunk::new(&web_chunk)
.map_err(|err| Error::CreateChunk(js_error_to_string(&err)))?;
self.decoder
Expand All @@ -154,7 +162,11 @@ impl AsyncDecoder for WebVideoDecoder {
// At least on Firefox, it can happen that reset on a previous error fails.
// In that case, start over completely and try again!
re_log::debug!("Video decoder reset failed, recreating decoder.");
self.decoder = init_video_decoder(self.on_output.clone(), self.timescale)?;
self.decoder = init_video_decoder(
self.on_output.clone(),
self.timescale,
self.minimum_presentation_timestamp,
)?;
};

self.decoder
Expand All @@ -171,13 +183,23 @@ impl AsyncDecoder for WebVideoDecoder {
fn init_video_decoder(
on_output_callback: Arc<OutputCallback>,
timescale: Timescale,
minimum_presentation_timestamp: Time,
) -> Result<web_sys::VideoDecoder, Error> {
let on_output = {
let on_output = on_output_callback.clone();
Closure::wrap(Box::new(move |frame: web_sys::VideoFrame| {
let presentation_timestamp =
Time::from_micros(frame.timestamp().unwrap_or(0.0), timescale);
let duration = Time::from_micros(frame.duration().unwrap_or(0.0), timescale);
// We assume that the timestamp returned by the decoder is in time since start,
// and does not represent demuxed "raw" presentation timestamps.
let presentation_timestamp = Time::from_micros_since_start(
frame.timestamp().unwrap_or(0.0),
timescale,
minimum_presentation_timestamp,
);
let duration = Time::from_micros_since_start(
frame.duration().unwrap_or(0.0),
timescale,
minimum_presentation_timestamp,
);

on_output(Ok(Frame {
content: WebVideoFrame(frame),
Expand Down
59 changes: 56 additions & 3 deletions crates/store/re_video/src/demux/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,57 @@ pub struct VideoData {
/// and should be presented in composition-timestamp order.
pub samples: Vec<Sample>,

/// Meta information about the samples.
pub samples_statistics: SamplesStatistics,

/// All the tracks in the mp4; not just the video track.
///
/// Can be nice to show in a UI.
pub mp4_tracks: BTreeMap<TrackId, Option<TrackKind>>,
}

/// Meta informationa about the video samples.
#[derive(Clone, Debug)]
pub struct SamplesStatistics {
/// The smallest presentation timestamp observed in this video.
///
/// This is typically 0, but in the presence of B-frames, it may be non-zero.
/// In fact, many formats don't require this to be zero, but video players typically
/// normalize the shown time to start at zero.
/// Note that timestamps in the [`Sample`]s are *not* automatically adjusted with this value.
// This is roughly equivalent to FFmpeg's internal `min_corrected_pts`
// https://github.com/FFmpeg/FFmpeg/blob/4047b887fc44b110bccb1da09bcb79d6e454b88b/libavformat/isom.h#L202
// (unlike us, this handles a bunch more edge cases but it fulfills the same role)
// To learn more about this I recommend reading the patch that introduced this in FFmpeg:
// https://patchwork.ffmpeg.org/project/ffmpeg/patch/[email protected]/#12592
pub minimum_presentation_timestamp: Time,

/// Whether all decode timestamps are equal to presentation timestamps.
///
/// If true, the video typically has no B-frames as those require frame reordering.
pub dts_always_equal_pts: bool,
}

impl SamplesStatistics {
pub fn new(samples: &[Sample]) -> Self {
re_tracing::profile_function!();

let minimum_presentation_timestamp = samples
.iter()
.map(|s| s.presentation_timestamp)
.min()
.unwrap_or_default();
let dts_always_equal_pts = samples
.iter()
.all(|s| s.decode_timestamp == s.presentation_timestamp);

Self {
minimum_presentation_timestamp,
dts_always_equal_pts,
}
}
}

impl VideoData {
/// Loads a video from the given data.
///
Expand Down Expand Up @@ -93,7 +138,7 @@ impl VideoData {
/// Length of the video.
#[inline]
pub fn duration(&self) -> std::time::Duration {
std::time::Duration::from_nanos(self.duration.into_nanos(self.timescale) as _)
self.duration.duration(self.timescale)
}

/// Natural width and height of the video
Expand Down Expand Up @@ -229,17 +274,25 @@ impl VideoData {
}
}

/// Determines the presentation timestamps of all frames inside a video, returning raw time values.
/// Determines the video timestamps of all frames inside a video, returning raw time values.
///
/// Returned timestamps are in nanoseconds since start and are guaranteed to be monotonically increasing.
/// These are *not* necessarily the same as the presentation timestamps, as the returned timestamps are
/// normalized respect to the start of the video, see [`SamplesStatistics::minimum_presentation_timestamp`].
pub fn frame_timestamps_ns(&self) -> impl Iterator<Item = i64> + '_ {
// Segments are guaranteed to be sorted among each other, but within a segment,
// presentation timestamps may not be sorted since this is sorted by decode timestamps.
self.gops.iter().flat_map(|seg| {
self.samples[seg.range()]
.iter()
.map(|sample| sample.presentation_timestamp.into_nanos(self.timescale))
.map(|sample| sample.presentation_timestamp)
.sorted()
.map(|pts| {
pts.into_nanos_since_start(
self.timescale,
self.samples_statistics.minimum_presentation_timestamp,
)
})
})
}
}
Expand Down
57 changes: 32 additions & 25 deletions crates/store/re_video/src/demux/mp4.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

use super::{Config, GroupOfPictures, Sample, VideoData, VideoLoadError};

use crate::{Time, Timescale};
use crate::{demux::SamplesStatistics, Time, Timescale};

impl VideoData {
pub fn load_mp4(bytes: &[u8]) -> Result<Self, VideoLoadError> {
Expand Down Expand Up @@ -42,32 +42,36 @@ impl VideoData {
let mut gops = Vec::<GroupOfPictures>::new();
let mut gop_sample_start_index = 0;

for sample in &track.samples {
if sample.is_sync && !samples.is_empty() {
let start = samples[gop_sample_start_index].decode_timestamp;
let sample_range = gop_sample_start_index as u32..samples.len() as u32;
gops.push(GroupOfPictures {
start,
sample_range,
{
re_tracing::profile_scope!("copy samples & build gops");

for sample in &track.samples {
if sample.is_sync && !samples.is_empty() {
let start = samples[gop_sample_start_index].decode_timestamp;
let sample_range = gop_sample_start_index as u32..samples.len() as u32;
gops.push(GroupOfPictures {
start,
sample_range,
});
gop_sample_start_index = samples.len();
}

let decode_timestamp = Time::new(sample.decode_timestamp as i64);
let presentation_timestamp = Time::new(sample.composition_timestamp as i64);
let duration = Time::new(sample.duration as i64);

let byte_offset = sample.offset as u32;
let byte_length = sample.size as u32;

samples.push(Sample {
is_sync: sample.is_sync,
decode_timestamp,
presentation_timestamp,
duration,
byte_offset,
byte_length,
});
gop_sample_start_index = samples.len();
}

let decode_timestamp = Time::new(sample.decode_timestamp as i64);
let presentation_timestamp = Time::new(sample.composition_timestamp as i64);
let duration = Time::new(sample.duration as i64);

let byte_offset = sample.offset as u32;
let byte_length = sample.size as u32;

samples.push(Sample {
is_sync: sample.is_sync,
decode_timestamp,
presentation_timestamp,
duration,
byte_offset,
byte_length,
});
}

if !samples.is_empty() {
Expand All @@ -79,10 +83,13 @@ impl VideoData {
});
}

let samples_statistics = SamplesStatistics::new(&samples);

Ok(Self {
config,
timescale,
duration,
samples_statistics,
gops,
samples,
mp4_tracks,
Expand Down
2 changes: 1 addition & 1 deletion crates/store/re_video/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ pub use re_mp4::{TrackId, TrackKind};

pub use self::{
decode::{Chunk, Frame, PixelFormat},
demux::{Config, Sample, VideoData, VideoLoadError},
demux::{Config, Sample, SamplesStatistics, VideoData, VideoLoadError},
time::{Time, Timescale},
};

Expand Down
Loading
Loading