Skip to content

Commit d110303

Browse files
authored
Fix audio segment miscount (#604)
2 parents 5c8260b + b505019 commit d110303

File tree

2 files changed

+103
-11
lines changed

2 files changed

+103
-11
lines changed

Diff for: transcoder/src/info.go

+6-2
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"path/filepath"
1010
"strconv"
1111
"strings"
12+
"sync"
1213
"time"
1314

1415
"golang.org/x/text/language"
@@ -36,7 +37,7 @@ type MediaInfo struct {
3637
/// The file size of the video file.
3738
Size int64 `json:"size"`
3839
/// The length of the media in seconds.
39-
Duration float32 `json:"duration"`
40+
Duration float64 `json:"duration"`
4041
/// The container of the video file of this episode.
4142
Container *string `json:"container"`
4243
/// Version of the metadata. This can be used to invalidate older metadata from db if the extraction code has changed.
@@ -55,6 +56,9 @@ type MediaInfo struct {
5556
Fonts []string `json:"fonts"`
5657
/// The list of chapters. See Chapter for more information.
5758
Chapters []Chapter `json:"chapters"`
59+
60+
/// lock used to read/set keyframes of video/audio
61+
lock sync.Mutex
5862
}
5963

6064
type Video struct {
@@ -238,7 +242,7 @@ func RetriveMediaInfo(path string, sha string) (*MediaInfo, error) {
238242
// Remove leading .
239243
Extension: filepath.Ext(path)[1:],
240244
Size: ParseInt64(mi.Format.Size),
241-
Duration: float32(mi.Format.DurationSeconds),
245+
Duration: mi.Format.DurationSeconds,
242246
Container: OrNull(mi.Format.FormatName),
243247
Versions: Versions{
244248
Info: InfoVersion,

Diff for: transcoder/src/keyframes.go

+97-9
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package src
22

33
import (
44
"bufio"
5+
"errors"
56
"fmt"
67
"log"
78
"os/exec"
@@ -88,11 +89,17 @@ type KeyframeKey struct {
8889
}
8990

9091
func (s *MetadataService) GetKeyframes(info *MediaInfo, isVideo bool, idx uint32) (*Keyframe, error) {
92+
info.lock.Lock()
93+
var ret *Keyframe
9194
if isVideo && info.Videos[idx].Keyframes != nil {
92-
return info.Videos[idx].Keyframes, nil
95+
ret = info.Videos[idx].Keyframes
9396
}
9497
if !isVideo && info.Audios[idx].Keyframes != nil {
95-
return info.Audios[idx].Keyframes, nil
98+
ret = info.Audios[idx].Keyframes
99+
}
100+
info.lock.Unlock()
101+
if ret != nil {
102+
return ret, nil
96103
}
97104

98105
get_running, set := s.keyframeLock.Start(KeyframeKey{
@@ -110,6 +117,14 @@ func (s *MetadataService) GetKeyframes(info *MediaInfo, isVideo bool, idx uint32
110117
}
111118
kf.info.ready.Add(1)
112119

120+
info.lock.Lock()
121+
if isVideo {
122+
info.Videos[idx].Keyframes = kf
123+
} else {
124+
info.Audios[idx].Keyframes = kf
125+
}
126+
info.lock.Unlock()
127+
113128
go func() {
114129
var table string
115130
var err error
@@ -122,7 +137,7 @@ func (s *MetadataService) GetKeyframes(info *MediaInfo, isVideo bool, idx uint32
122137
}
123138

124139
if err != nil {
125-
log.Printf("Couldn't retrive keyframes for %s %s %d: %v", info.Path, table, idx, err)
140+
log.Printf("Couldn't retrieve keyframes for %s %s %d: %v", info.Path, table, idx, err)
126141
return
127142
}
128143

@@ -235,16 +250,89 @@ func getVideoKeyframes(path string, video_idx uint32, kf *Keyframe) error {
235250
return nil
236251
}
237252

253+
const DummyKeyframeDuration = float64(4)
254+
238255
// we can pretty much cut audio at any point so no need to get specific frames, just cut every 4s
239256
func getAudioKeyframes(info *MediaInfo, audio_idx uint32, kf *Keyframe) error {
240-
dummyKeyframeDuration := float64(4)
241-
segmentCount := int((float64(info.Duration) / dummyKeyframeDuration) + 1)
242-
kf.Keyframes = make([]float64, segmentCount)
243-
for segmentIndex := 0; segmentIndex < segmentCount; segmentIndex += 1 {
244-
kf.Keyframes[segmentIndex] = float64(segmentIndex) * dummyKeyframeDuration
257+
defer printExecTime("ffprobe keyframe analysis for %s audio n%d", info.Path, audio_idx)()
258+
// Format's duration CAN be different than audio's duration. To make sure we do not
259+
// miss a segment or make one more, we need to check the audio's duration.
260+
//
261+
// Since fetching the duration requires reading packets and is SLOW, we start by generating
262+
// keyframes until a reasonably safe point of the file (if the format has a 20min duration, audio
263+
// probably has a close duration).
264+
// You can read why duration retrieval is slow on the comment below.
265+
safe_duration := info.Duration - 20
266+
segment_count := int((safe_duration / DummyKeyframeDuration) + 1)
267+
if segment_count > 0 {
268+
kf.Keyframes = make([]float64, segment_count)
269+
for i := 0; i < segment_count; i += 1 {
270+
kf.Keyframes[i] = float64(i) * DummyKeyframeDuration
271+
}
272+
kf.info.ready.Done()
273+
} else {
274+
segment_count = 0
275+
}
276+
277+
// Some formats DO NOT contain a duration metadata, we need to manually fetch it
278+
// from the packets.
279+
//
280+
// We could use the same command to retrieve all packets and know when we can cut PRECISELY
281+
// but since packets always contain only a few ms we don't need this precision.
282+
cmd := exec.Command(
283+
"ffprobe",
284+
"-select_streams", fmt.Sprintf("a:%d", audio_idx),
285+
"-show_entries", "packet=pts_time",
286+
// some avi files don't have pts, we use this to ask ffmpeg to generate them (it uses the dts under the hood)
287+
"-fflags", "+genpts",
288+
// We use a read_interval LARGER than the file (at least we estimate)
289+
// This allows us to only decode the LAST packets
290+
"-read_intervals", fmt.Sprintf("%f", info.Duration+10_000),
291+
"-of", "csv=print_section=0",
292+
info.Path,
293+
)
294+
stdout, err := cmd.StdoutPipe()
295+
if err != nil {
296+
return err
297+
}
298+
err = cmd.Start()
299+
if err != nil {
300+
return err
301+
}
302+
303+
scanner := bufio.NewScanner(stdout)
304+
var duration float64
305+
for scanner.Scan() {
306+
pts := scanner.Text()
307+
if pts == "" || pts == "N/A" {
308+
continue
309+
}
310+
311+
duration, err = strconv.ParseFloat(pts, 64)
312+
if err != nil {
313+
return err
314+
}
315+
316+
}
317+
if err := scanner.Err(); err != nil {
318+
return err
319+
}
320+
if duration <= 0 {
321+
return errors.New("could not find audio's duration")
322+
}
323+
324+
new_seg_count := int((duration / DummyKeyframeDuration) + 1)
325+
if new_seg_count > segment_count {
326+
new_segments := make([]float64, new_seg_count-segment_count)
327+
for i := segment_count; i < new_seg_count; i += 1 {
328+
new_segments[i-segment_count] = float64(i) * DummyKeyframeDuration
329+
}
330+
kf.add(new_segments)
331+
if segment_count == 0 {
332+
kf.info.ready.Done()
333+
}
245334
}
246335

247336
kf.IsDone = true
248-
kf.info.ready.Done()
249337
return nil
250338
}

0 commit comments

Comments
 (0)