From 6eb61e84600eec7022c2b4eac0ad0ab64446d858 Mon Sep 17 00:00:00 2001
From: Justin Taylor <j.taylor8727@gmail.com>
Date: Tue, 17 Dec 2024 13:09:57 -0800
Subject: [PATCH] Allow for more lenient duration parsing

---
 srt.go         |  6 +++---
 srt_test.go    | 17 +++++++++++++++++
 webvtt.go      |  6 +++---
 webvtt_test.go | 24 ++++++++++++++++++++++++
 4 files changed, 47 insertions(+), 6 deletions(-)

diff --git a/srt.go b/srt.go
index a47321a..0803a58 100644
--- a/srt.go
+++ b/srt.go
@@ -14,12 +14,12 @@ import (
 
 // Constants
 const (
-	srtTimeBoundariesSeparator = " --> "
+	srtTimeBoundariesSeparator = "-->"
 )
 
 // Vars
 var (
-	bytesSRTTimeBoundariesSeparator = []byte(srtTimeBoundariesSeparator)
+	bytesSRTTimeBoundariesSeparator = []byte(" "+srtTimeBoundariesSeparator+" ")
 )
 
 // parseDurationSRT parses an .srt duration
@@ -106,7 +106,7 @@ func ReadFromSRT(i io.Reader) (o *Subtitles, err error) {
 				return
 			}
 			// We do this to eliminate extra stuff like positions which are not documented anywhere
-			s2 := strings.Split(s1[1], " ")
+			s2 := strings.Fields(s1[1])
 
 			// Parse time boundaries
 			if s.StartAt, err = parseDurationSRT(s1[0]); err != nil {
diff --git a/srt_test.go b/srt_test.go
index b7bd4bd..256e548 100644
--- a/srt_test.go
+++ b/srt_test.go
@@ -4,11 +4,13 @@ import (
 	"bytes"
 	"io/ioutil"
 	"os"
+	"strings"
 	"testing"
 	"time"
 
 	"github.com/asticode/go-astisub"
 	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
 )
 
 func TestSRT(t *testing.T) {
@@ -138,3 +140,18 @@ func TestSRTStyled(t *testing.T) {
 	assert.NoError(t, err)
 	assert.Equal(t, string(c), w.String())
 }
+
+func TestSRTParseDuration(t *testing.T) {
+	testData := `
+	1
+	00:00:01.876-->00:0:03.390
+	Duration without enclosing space`
+
+	s, err := astisub.ReadFromSRT(strings.NewReader(testData))
+	require.NoError(t, err)
+
+	require.Len(t, s.Items, 1)
+	assert.Equal(t, 1*time.Second+876*time.Millisecond, s.Items[0].StartAt)
+	assert.Equal(t, 3*time.Second+390*time.Millisecond, s.Items[0].EndAt)
+	assert.Equal(t, "Duration without enclosing space", s.Items[0].Lines[0].String())
+}
diff --git a/webvtt.go b/webvtt.go
index af75a93..c40850b 100644
--- a/webvtt.go
+++ b/webvtt.go
@@ -25,7 +25,7 @@ const (
 	webvttBlockNameStyle          = "style"
 	webvttBlockNameText           = "text"
 	webvttDefaultStyleID          = "astisub-webvtt-default-style-id"
-	webvttTimeBoundariesSeparator = " --> "
+	webvttTimeBoundariesSeparator = "-->"
 	webvttTimestampMapHeader      = "X-TIMESTAMP-MAP"
 )
 
@@ -33,7 +33,7 @@ const (
 var (
 	bytesWebVTTItalicEndTag            = []byte("</i>")
 	bytesWebVTTItalicStartTag          = []byte("<i>")
-	bytesWebVTTTimeBoundariesSeparator = []byte(webvttTimeBoundariesSeparator)
+	bytesWebVTTTimeBoundariesSeparator = []byte(" "+webvttTimeBoundariesSeparator+" ")
 	webVTTRegexpInlineTimestamp        = regexp.MustCompile(`<((?:\d{2,}:)?\d{2}:\d{2}\.\d{3})>`)
 	webVTTRegexpTag                    = regexp.MustCompile(`(</*\s*([^\.\s]+)(\.[^\s/]*)*\s*([^/]*)\s*/*>)`)
 )
@@ -237,7 +237,7 @@ func ReadFromWebVTT(i io.Reader) (o *Subtitles, err error) {
 			var left = strings.Split(line, webvttTimeBoundariesSeparator)
 
 			// Split line on space to get remaining of time data
-			var right = strings.Split(left[1], " ")
+			var right = strings.Fields(left[1])
 
 			// Parse time boundaries
 			if item.StartAt, err = parseDurationWebVTT(left[0]); err != nil {
diff --git a/webvtt_test.go b/webvtt_test.go
index 7d8a21c..11a1f26 100644
--- a/webvtt_test.go
+++ b/webvtt_test.go
@@ -224,3 +224,27 @@ Text with a <00:06:30.000>timestamp in the middle
 Do no fall to the next item
 `, b.String())
 }
+
+func TestWebVTTParseDuration(t *testing.T) {
+	testData := `WEBVTT
+	1
+	00:00:01.876-->00:0:03.390
+	Duration without enclosing space
+
+	2
+	00:00:03.391-->00:00:06.567	align:middle
+	Duration with tab spaced styles`
+
+	s, err := astisub.ReadFromWebVTT(strings.NewReader(testData))
+	require.NoError(t, err)
+
+	require.Len(t, s.Items, 2)
+	assert.Equal(t, 1*time.Second+876*time.Millisecond, s.Items[0].StartAt)
+	assert.Equal(t, 3*time.Second+390*time.Millisecond, s.Items[0].EndAt)
+	assert.Equal(t, "Duration without enclosing space", s.Items[0].Lines[0].String())
+	assert.Equal(t, 3*time.Second+391*time.Millisecond, s.Items[1].StartAt)
+	assert.Equal(t, 6*time.Second+567*time.Millisecond, s.Items[1].EndAt)
+	assert.Equal(t, "Duration with tab spaced styles", s.Items[1].Lines[0].String())
+	assert.NotNil(t, s.Items[1].InlineStyle)
+	assert.Equal(t, s.Items[1].InlineStyle.WebVTTAlign, "middle")
+}