@@ -89,7 +89,7 @@ class VideoClips:
89
89
video_paths (List[str]): paths to the video files
90
90
clip_length_in_frames (int): size of a clip in number of frames
91
91
frames_between_clips (int): step (in frames) between each clip
92
- frame_rate (int , optional): if specified, it will resample the video
92
+ frame_rate (float , optional): if specified, it will resample the video
93
93
so that it has `frame_rate`, and then the clips will be defined
94
94
on the resampled video
95
95
num_workers (int): how many subprocesses to use for data loading.
@@ -102,7 +102,7 @@ def __init__(
102
102
video_paths : List [str ],
103
103
clip_length_in_frames : int = 16 ,
104
104
frames_between_clips : int = 1 ,
105
- frame_rate : Optional [int ] = None ,
105
+ frame_rate : Optional [float ] = None ,
106
106
_precomputed_metadata : Optional [Dict [str , Any ]] = None ,
107
107
num_workers : int = 0 ,
108
108
_video_width : int = 0 ,
@@ -136,7 +136,7 @@ def __init__(
136
136
137
137
def _compute_frame_pts (self ) -> None :
138
138
self .video_pts = [] # len = num_videos. Each entry is a tensor of shape (num_frames_in_video,)
139
- self .video_fps : List [int ] = [] # len = num_videos
139
+ self .video_fps : List [float ] = [] # len = num_videos
140
140
141
141
# strategy: use a DataLoader to parallelize read_video_timestamps
142
142
# so need to create a dummy dataset first
@@ -203,15 +203,15 @@ def subset(self, indices: List[int]) -> "VideoClips":
203
203
204
204
@staticmethod
205
205
def compute_clips_for_video (
206
- video_pts : torch .Tensor , num_frames : int , step : int , fps : int , frame_rate : Optional [int ] = None
206
+ video_pts : torch .Tensor , num_frames : int , step : int , fps : Optional [ float ] , frame_rate : Optional [float ] = None
207
207
) -> Tuple [torch .Tensor , Union [List [slice ], torch .Tensor ]]:
208
208
if fps is None :
209
209
# if for some reason the video doesn't have fps (because doesn't have a video stream)
210
210
# set the fps to 1. The value doesn't matter, because video_pts is empty anyway
211
211
fps = 1
212
212
if frame_rate is None :
213
213
frame_rate = fps
214
- total_frames = len (video_pts ) * ( float ( frame_rate ) / fps )
214
+ total_frames = len (video_pts ) * frame_rate / fps
215
215
_idxs = VideoClips ._resample_video_idx (int (math .floor (total_frames )), fps , frame_rate )
216
216
video_pts = video_pts [_idxs ]
217
217
clips = unfold (video_pts , num_frames , step )
@@ -227,7 +227,7 @@ def compute_clips_for_video(
227
227
idxs = unfold (_idxs , num_frames , step )
228
228
return clips , idxs
229
229
230
- def compute_clips (self , num_frames : int , step : int , frame_rate : Optional [int ] = None ) -> None :
230
+ def compute_clips (self , num_frames : int , step : int , frame_rate : Optional [float ] = None ) -> None :
231
231
"""
232
232
Compute all consecutive sequences of clips from video_pts.
233
233
Always returns clips of size `num_frames`, meaning that the
@@ -275,8 +275,8 @@ def get_clip_location(self, idx: int) -> Tuple[int, int]:
275
275
return video_idx , clip_idx
276
276
277
277
@staticmethod
278
- def _resample_video_idx (num_frames : int , original_fps : int , new_fps : int ) -> Union [slice , torch .Tensor ]:
279
- step = float ( original_fps ) / new_fps
278
+ def _resample_video_idx (num_frames : int , original_fps : float , new_fps : float ) -> Union [slice , torch .Tensor ]:
279
+ step = original_fps / new_fps
280
280
if step .is_integer ():
281
281
# optimization: if step is integer, don't need to perform
282
282
# advanced indexing
0 commit comments