From 6eb61e84600eec7022c2b4eac0ad0ab64446d858 Mon Sep 17 00:00:00 2001 From: Justin Taylor <j.taylor8727@gmail.com> Date: Tue, 17 Dec 2024 13:09:57 -0800 Subject: [PATCH] Allow for more lenient duration parsing --- srt.go | 6 +++--- srt_test.go | 17 +++++++++++++++++ webvtt.go | 6 +++--- webvtt_test.go | 24 ++++++++++++++++++++++++ 4 files changed, 47 insertions(+), 6 deletions(-) diff --git a/srt.go b/srt.go index a47321a..0803a58 100644 --- a/srt.go +++ b/srt.go @@ -14,12 +14,12 @@ import ( // Constants const ( - srtTimeBoundariesSeparator = " --> " + srtTimeBoundariesSeparator = "-->" ) // Vars var ( - bytesSRTTimeBoundariesSeparator = []byte(srtTimeBoundariesSeparator) + bytesSRTTimeBoundariesSeparator = []byte(" "+srtTimeBoundariesSeparator+" ") ) // parseDurationSRT parses an .srt duration @@ -106,7 +106,7 @@ func ReadFromSRT(i io.Reader) (o *Subtitles, err error) { return } // We do this to eliminate extra stuff like positions which are not documented anywhere - s2 := strings.Split(s1[1], " ") + s2 := strings.Fields(s1[1]) // Parse time boundaries if s.StartAt, err = parseDurationSRT(s1[0]); err != nil { diff --git a/srt_test.go b/srt_test.go index b7bd4bd..256e548 100644 --- a/srt_test.go +++ b/srt_test.go @@ -4,11 +4,13 @@ import ( "bytes" "io/ioutil" "os" + "strings" "testing" "time" "github.com/asticode/go-astisub" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestSRT(t *testing.T) { @@ -138,3 +140,18 @@ func TestSRTStyled(t *testing.T) { assert.NoError(t, err) assert.Equal(t, string(c), w.String()) } + +func TestSRTParseDuration(t *testing.T) { + testData := ` + 1 + 00:00:01.876-->00:0:03.390 + Duration without enclosing space` + + s, err := astisub.ReadFromSRT(strings.NewReader(testData)) + require.NoError(t, err) + + require.Len(t, s.Items, 1) + assert.Equal(t, 1*time.Second+876*time.Millisecond, s.Items[0].StartAt) + assert.Equal(t, 3*time.Second+390*time.Millisecond, s.Items[0].EndAt) + assert.Equal(t, "Duration without enclosing space", s.Items[0].Lines[0].String()) +} diff --git a/webvtt.go b/webvtt.go index af75a93..c40850b 100644 --- a/webvtt.go +++ b/webvtt.go @@ -25,7 +25,7 @@ const ( webvttBlockNameStyle = "style" webvttBlockNameText = "text" webvttDefaultStyleID = "astisub-webvtt-default-style-id" - webvttTimeBoundariesSeparator = " --> " + webvttTimeBoundariesSeparator = "-->" webvttTimestampMapHeader = "X-TIMESTAMP-MAP" ) @@ -33,7 +33,7 @@ const ( var ( bytesWebVTTItalicEndTag = []byte("</i>") bytesWebVTTItalicStartTag = []byte("<i>") - bytesWebVTTTimeBoundariesSeparator = []byte(webvttTimeBoundariesSeparator) + bytesWebVTTTimeBoundariesSeparator = []byte(" "+webvttTimeBoundariesSeparator+" ") webVTTRegexpInlineTimestamp = regexp.MustCompile(`<((?:\d{2,}:)?\d{2}:\d{2}\.\d{3})>`) webVTTRegexpTag = regexp.MustCompile(`(</*\s*([^\.\s]+)(\.[^\s/]*)*\s*([^/]*)\s*/*>)`) ) @@ -237,7 +237,7 @@ func ReadFromWebVTT(i io.Reader) (o *Subtitles, err error) { var left = strings.Split(line, webvttTimeBoundariesSeparator) // Split line on space to get remaining of time data - var right = strings.Split(left[1], " ") + var right = strings.Fields(left[1]) // Parse time boundaries if item.StartAt, err = parseDurationWebVTT(left[0]); err != nil { diff --git a/webvtt_test.go b/webvtt_test.go index 7d8a21c..11a1f26 100644 --- a/webvtt_test.go +++ b/webvtt_test.go @@ -224,3 +224,27 @@ Text with a <00:06:30.000>timestamp in the middle Do no fall to the next item `, b.String()) } + +func TestWebVTTParseDuration(t *testing.T) { + testData := `WEBVTT + 1 + 00:00:01.876-->00:0:03.390 + Duration without enclosing space + + 2 + 00:00:03.391-->00:00:06.567 align:middle + Duration with tab spaced styles` + + s, err := astisub.ReadFromWebVTT(strings.NewReader(testData)) + require.NoError(t, err) + + require.Len(t, s.Items, 2) + assert.Equal(t, 1*time.Second+876*time.Millisecond, s.Items[0].StartAt) + assert.Equal(t, 3*time.Second+390*time.Millisecond, s.Items[0].EndAt) + assert.Equal(t, "Duration without enclosing space", s.Items[0].Lines[0].String()) + assert.Equal(t, 3*time.Second+391*time.Millisecond, s.Items[1].StartAt) + assert.Equal(t, 6*time.Second+567*time.Millisecond, s.Items[1].EndAt) + assert.Equal(t, "Duration with tab spaced styles", s.Items[1].Lines[0].String()) + assert.NotNil(t, s.Items[1].InlineStyle) + assert.Equal(t, s.Items[1].InlineStyle.WebVTTAlign, "middle") +}