Skip to content

Commit

Permalink
fix(text): Fix webvtt offset in sequence mode
Browse files Browse the repository at this point in the history
When running in sequence mode, we ignore the normal timestamps
of video and audio segments. This lead to problems in some Apple-
encoded webvtt content, which used the X-TIMESTAMP-MAP tag to account
for the timestamp offsets in their video. Thus, those subtitles would
end up 10 seconds offset.
This changes the webvtt parser to ignore the X-TIMESTAMP-MAP when in
sequence mode.

Issue shaka-project#2337

Change-Id: I1596e064be42ad8fb536729816a64514dac07c3e
  • Loading branch information
theodab committed Feb 15, 2022
1 parent f1c1585 commit 82fe602
Show file tree
Hide file tree
Showing 14 changed files with 102 additions and 11 deletions.
7 changes: 7 additions & 0 deletions externs/shaka/text.js
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,13 @@ shaka.extern.TextParser = class {
* @exportDoc
*/
parseMedia(data, timeContext) {}

/**
* Notifies the stream if the manifest is in sequence mode or not.
*
* @param {boolean} sequenceMode
*/
setSequenceMode(sequenceMode) {}
};


Expand Down
11 changes: 6 additions & 5 deletions lib/media/media_source_engine.js
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ shaka.media.MediaSourceEngine = class {
let mimeType = shaka.util.MimeUtils.getFullType(
stream.mimeType, stream.codecs);
if (contentType == ContentType.TEXT) {
this.reinitText(mimeType);
this.reinitText(mimeType, sequenceMode);
} else {
if ((forceTransmuxTS || !MediaSource.isTypeSupported(mimeType)) &&
shaka.media.Transmuxer.isSupported(mimeType, contentType)) {
Expand Down Expand Up @@ -361,12 +361,13 @@ shaka.media.MediaSourceEngine = class {
/**
* Reinitialize the TextEngine for a new text type.
* @param {string} mimeType
* @param {boolean} sequenceMode
*/
reinitText(mimeType) {
reinitText(mimeType, sequenceMode) {
if (!this.textEngine_) {
this.textEngine_ = new shaka.text.TextEngine(this.textDisplayer_);
}
this.textEngine_.initParser(mimeType);
this.textEngine_.initParser(mimeType, sequenceMode);
}

/**
Expand Down Expand Up @@ -535,7 +536,7 @@ shaka.media.MediaSourceEngine = class {
// For HLS CEA-608/708 CLOSED-CAPTIONS, text data is embedded in
// the video stream, so textEngine may not have been initialized.
if (!this.textEngine_) {
this.reinitText('text/vtt');
this.reinitText('text/vtt', sequenceMode || false);
}

if (transmuxedData.metadata) {
Expand All @@ -562,7 +563,7 @@ shaka.media.MediaSourceEngine = class {
contentType, () => this.append_(contentType, transmuxedSegment));
} else if (hasClosedCaptions) {
if (!this.textEngine_) {
this.reinitText('text/vtt');
this.reinitText('text/vtt', sequenceMode || false);
}
// If it is the init segment for closed captions, initialize the closed
// caption parser.
Expand Down
6 changes: 4 additions & 2 deletions lib/media/streaming_engine.js
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,8 @@ shaka.media.StreamingEngine = class {

const mimeType = shaka.util.MimeUtils.getFullType(
stream.mimeType, stream.codecs);
this.playerInterface_.mediaSourceEngine.reinitText(mimeType);
this.playerInterface_.mediaSourceEngine.reinitText(
mimeType, this.manifest_.sequenceMode);

const textDisplayer =
this.playerInterface_.mediaSourceEngine.getTextDisplayer();
Expand Down Expand Up @@ -428,7 +429,8 @@ shaka.media.StreamingEngine = class {
// init segment again.
const fullMimeType = shaka.util.MimeUtils.getFullType(
stream.mimeType, stream.codecs);
this.playerInterface_.mediaSourceEngine.reinitText(fullMimeType);
this.playerInterface_.mediaSourceEngine.reinitText(
fullMimeType, this.manifest_.sequenceMode);
}

// Releases the segmentIndex of the old stream.
Expand Down
8 changes: 8 additions & 0 deletions lib/text/lrc_text_parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@ shaka.text.LrcTextParser = class {
goog.asserts.assert(false, 'LRC does not have init segments');
}

/**
* @override
* @export
*/
setSequenceMode(sequenceMode) {
// Unused.
}

/**
* @override
* @export
Expand Down
8 changes: 8 additions & 0 deletions lib/text/mp4_ttml_parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,14 @@ shaka.text.Mp4TtmlParser = class {
}
}

/**
* @override
* @export
*/
setSequenceMode(sequenceMode) {
// Unused.
}

/**
* @override
* @export
Expand Down
8 changes: 8 additions & 0 deletions lib/text/mp4_vtt_parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,14 @@ shaka.text.Mp4VttParser = class {
}
}

/**
* @override
* @export
*/
setSequenceMode(sequenceMode) {
// Unused.
}

/**
* @override
* @export
Expand Down
8 changes: 8 additions & 0 deletions lib/text/sbv_text_parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,14 @@ shaka.text.SbvTextParser = class {
goog.asserts.assert(false, 'SubViewer does not have init segments');
}

/**
* @override
* @export
*/
setSequenceMode(sequenceMode) {
// Unused.
}

/**
* @override
* @export
Expand Down
8 changes: 8 additions & 0 deletions lib/text/srt_text_parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,14 @@ shaka.text.SrtTextParser = class {
goog.asserts.assert(false, 'SRT does not have init segments');
}

/**
* @override
* @export
*/
setSequenceMode(sequenceMode) {
// Unused.
}

/**
* @override
* @export
Expand Down
8 changes: 8 additions & 0 deletions lib/text/ssa_text_parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@ shaka.text.SsaTextParser = class {
goog.asserts.assert(false, 'SSA does not have init segments');
}

/**
* @override
* @export
*/
setSequenceMode(sequenceMode) {
// Unused.
}

/**
* @override
* @export
Expand Down
4 changes: 3 additions & 1 deletion lib/text/text_engine.js
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,9 @@ shaka.text.TextEngine = class {
* called at least once before appendBuffer.
*
* @param {string} mimeType
* @param {boolean} sequenceMode
*/
initParser(mimeType) {
initParser(mimeType, sequenceMode) {
// No parser for CEA, which is extracted from video and side-loaded
// into TextEngine and TextDisplayer.
if (mimeType == shaka.util.MimeUtils.CEA608_CLOSED_CAPTION_MIMETYPE ||
Expand All @@ -141,6 +142,7 @@ shaka.text.TextEngine = class {
goog.asserts.assert(
factory, 'Text type negotiation should have happened already');
this.parser_ = shaka.util.Functional.callFactory(factory);
this.parser_.setSequenceMode(sequenceMode);
}

/**
Expand Down
8 changes: 8 additions & 0 deletions lib/text/ttml_text_parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,14 @@ shaka.text.TtmlTextParser = class {
goog.asserts.assert(false, 'TTML does not have init segments');
}

/**
* @override
* @export
*/
setSequenceMode(sequenceMode) {
// Unused.
}

/**
* @override
* @export
Expand Down
20 changes: 19 additions & 1 deletion lib/text/vtt_text_parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@ goog.require('shaka.util.XmlUtils');
* @export
*/
shaka.text.VttTextParser = class {
/** Constructs a VTT parser. */
constructor() {
/** @private {boolean} */
this.sequenceMode_ = false;
}

/**
* @override
* @export
Expand All @@ -30,6 +36,14 @@ shaka.text.VttTextParser = class {
goog.asserts.assert(false, 'VTT does not have init segments');
}

/**
* @override
* @export
*/
setSequenceMode(sequenceMode) {
this.sequenceMode_ = sequenceMode;
}

/**
* @override
* @export
Expand All @@ -52,7 +66,11 @@ shaka.text.VttTextParser = class {
// It is no longer closely tied to periods, but the name stuck around.
let offset = time.periodStart;

if (blocks[0].includes('X-TIMESTAMP-MAP')) {
// Do not honor the 'X-TIMESTAMP-MAP' value when in sequence mode.
// That is because it is used mainly (solely?) to account for the timestamp
// offset of the video/audio; when in sequence mode, we normalize that
// timestamp offset to 0, so we should not account for it.
if (blocks[0].includes('X-TIMESTAMP-MAP') && !this.sequenceMode_) {
// https://bit.ly/2K92l7y
// The 'X-TIMESTAMP-MAP' header is used in HLS to align text with
// the rest of the media.
Expand Down
2 changes: 1 addition & 1 deletion test/media/media_source_engine_unit.js
Original file line number Diff line number Diff line change
Expand Up @@ -1120,7 +1120,7 @@ describe('MediaSourceEngine', () => {
});

it('destroys text engines', async () => {
mediaSourceEngine.reinitText('text/vtt');
mediaSourceEngine.reinitText('text/vtt', false);

await mediaSourceEngine.destroy();
expect(mockTextEngine).toBeTruthy();
Expand Down
7 changes: 6 additions & 1 deletion test/text/text_engine_unit.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ describe('TextEngine', () => {
/** @type {!jasmine.Spy} */
let mockParseInit;

/** @type {!jasmine.Spy} */
let mockSetSequenceMode;

/** @type {!jasmine.Spy} */
let mockParseMedia;

Expand All @@ -32,11 +35,13 @@ describe('TextEngine', () => {

beforeEach(() => {
mockParseInit = jasmine.createSpy('mockParseInit');
mockSetSequenceMode = jasmine.createSpy('mockSetSequenceMode');
mockParseMedia = jasmine.createSpy('mockParseMedia');
// eslint-disable-next-line no-restricted-syntax
mockParserPlugIn = function() {
return {
parseInit: mockParseInit,
setSequenceMode: mockSetSequenceMode,
parseMedia: mockParseMedia,
};
};
Expand All @@ -46,7 +51,7 @@ describe('TextEngine', () => {

TextEngine.registerParser(dummyMimeType, mockParserPlugIn);
textEngine = new TextEngine(mockDisplayer);
textEngine.initParser(dummyMimeType);
textEngine.initParser(dummyMimeType, false);
});

afterEach(() => {
Expand Down

0 comments on commit 82fe602

Please sign in to comment.