Skip to content

Commit

Permalink
Allow for styles to persist over lines (#123)
Browse files Browse the repository at this point in the history
* Allow for styles to persist over lines

* Update to pass by reference for style attributes
  • Loading branch information
justin-taylor authored Dec 17, 2024
1 parent f285923 commit 646078d
Show file tree
Hide file tree
Showing 8 changed files with 134 additions and 63 deletions.
50 changes: 23 additions & 27 deletions srt.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ func ReadFromSRT(i io.Reader) (o *Subtitles, err error) {
var line string
var lineNum int
var s = &Item{}
var sa = &StyleAttributes{}
for scanner.Scan() {
// Fetch line
line = strings.TrimSpace(scanner.Text())
Expand All @@ -58,6 +59,9 @@ func ReadFromSRT(i io.Reader) (o *Subtitles, err error) {

// Line contains time boundaries
if strings.Contains(line, srtTimeBoundariesSeparator) {
// Reset style attributes
sa = &StyleAttributes{}

// Remove last item of previous subtitle since it should be the index.
// If the last line is empty then the item is missing an index.
var index string
Expand Down Expand Up @@ -118,7 +122,7 @@ func ReadFromSRT(i io.Reader) (o *Subtitles, err error) {
o.Items = append(o.Items, s)
} else {
// Add text
if l := parseTextSrt(strings.TrimSpace(line)); len(l.Items) > 0 {
if l := parseTextSrt(line, sa); len(l.Items) > 0 {
s.Lines = append(s.Lines, l)
}
}
Expand All @@ -127,7 +131,7 @@ func ReadFromSRT(i io.Reader) (o *Subtitles, err error) {
}

// parseTextSrt parses the input line to fill the Line
func parseTextSrt(i string) (o Line) {
func parseTextSrt(i string, sa *StyleAttributes) (o Line) {
// special handling needed for empty line
if strings.TrimSpace(i) == "" {
o.Items = []LineItem{{Text: ""}}
Expand All @@ -138,13 +142,6 @@ func parseTextSrt(i string) (o Line) {
tr := html.NewTokenizer(strings.NewReader(i))

// Loop
var (
bold bool
italic bool
underline bool
color *string
pos byte
)
for {
// Get next tag
t := tr.Next()
Expand All @@ -164,46 +161,45 @@ func parseTextSrt(i string) (o Line) {
// Parse italic/bold/underline
switch token.Data {
case "b":
bold = false
sa.SRTBold = false
case "i":
italic = false
sa.SRTItalics = false
case "u":
underline = false
sa.SRTUnderline = false
case "font":
color = nil
sa.SRTColor = nil
}
case html.StartTagToken:
// Parse italic/bold/underline
switch token.Data {
case "b":
bold = true
sa.SRTBold = true
case "i":
italic = true
sa.SRTItalics = true
case "u":
underline = true
sa.SRTUnderline = true
case "font":
if c := htmlTokenAttribute(&token, "color"); c != nil {
color = c
sa.SRTColor = c
}
}
case html.TextToken:
if s := strings.TrimSpace(raw); s != "" {
// Get style attribute
var sa *StyleAttributes
if bold || italic || underline || color != nil || pos != 0 {
sa = &StyleAttributes{
SRTBold: bold,
SRTColor: color,
SRTItalics: italic,
SRTPosition: pos,
SRTUnderline: underline,
var styleAttributes *StyleAttributes
if sa.SRTBold || sa.SRTColor != nil || sa.SRTItalics || sa.SRTUnderline {
styleAttributes = &StyleAttributes{
SRTBold: sa.SRTBold,
SRTColor: sa.SRTColor,
SRTItalics: sa.SRTItalics,
SRTUnderline: sa.SRTUnderline,
}
sa.propagateSRTAttributes()
styleAttributes.propagateSRTAttributes()
}

// Append item
o.Items = append(o.Items, LineItem{
InlineStyle: sa,
InlineStyle: styleAttributes,
Text: unescapeHTML(s),
})
}
Expand Down
25 changes: 18 additions & 7 deletions srt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ func TestSRTStyled(t *testing.T) {
assert.NoError(t, err)

// assert the items are properly parsed
assert.Len(t, s.Items, 6)
assert.Len(t, s.Items, 9)
assert.Equal(t, 17*time.Second+985*time.Millisecond, s.Items[0].StartAt)
assert.Equal(t, 20*time.Second+521*time.Millisecond, s.Items[0].EndAt)
assert.Equal(t, "[instrumental music]", s.Items[0].Lines[0].String())
Expand All @@ -80,36 +80,47 @@ func TestSRTStyled(t *testing.T) {
assert.Equal(t, 1*time.Minute+31*time.Second+992*time.Millisecond, s.Items[5].EndAt)
assert.Equal(t, "[automated]", s.Items[5].Lines[0].String())
assert.Equal(t, "'The time is 7:35.'", s.Items[5].Lines[1].String())
assert.Equal(t, "Test with multi line italics", s.Items[6].Lines[0].String())
assert.Equal(t, "Terminated on the next line", s.Items[6].Lines[1].String())
assert.Equal(t, "Unterminated styles", s.Items[7].Lines[0].String())
assert.Equal(t, "Do no fall to the next item", s.Items[8].Lines[0].String())

// assert the styles of the items
assert.Len(t, s.Items, 6)
assert.Equal(t, "#00ff00", *s.Items[0].Lines[0].Items[0].InlineStyle.SRTColor)
assert.Zero(t, s.Items[0].Lines[0].Items[0].InlineStyle.SRTPosition)
assert.True(t, s.Items[0].Lines[0].Items[0].InlineStyle.SRTBold)
assert.False(t, s.Items[0].Lines[0].Items[0].InlineStyle.SRTItalics)
assert.False(t, s.Items[0].Lines[0].Items[0].InlineStyle.SRTUnderline)
assert.Equal(t, "#ff00ff", *s.Items[1].Lines[0].Items[0].InlineStyle.SRTColor)
assert.Zero(t, s.Items[1].Lines[0].Items[0].InlineStyle.SRTPosition)
assert.False(t, s.Items[1].Lines[0].Items[0].InlineStyle.SRTBold)
assert.False(t, s.Items[1].Lines[0].Items[0].InlineStyle.SRTItalics)
assert.False(t, s.Items[1].Lines[0].Items[0].InlineStyle.SRTUnderline)
assert.Equal(t, "#00ff00", *s.Items[2].Lines[0].Items[0].InlineStyle.SRTColor)
assert.Zero(t, s.Items[2].Lines[0].Items[0].InlineStyle.SRTPosition)
assert.False(t, s.Items[2].Lines[0].Items[0].InlineStyle.SRTBold)
assert.False(t, s.Items[2].Lines[0].Items[0].InlineStyle.SRTItalics)
assert.False(t, s.Items[2].Lines[0].Items[0].InlineStyle.SRTUnderline)
assert.Nil(t, s.Items[3].Lines[0].Items[0].InlineStyle.SRTColor)
assert.Zero(t, s.Items[3].Lines[0].Items[0].InlineStyle.SRTPosition)
assert.True(t, s.Items[3].Lines[0].Items[0].InlineStyle.SRTBold)
assert.False(t, s.Items[3].Lines[0].Items[0].InlineStyle.SRTItalics)
assert.True(t, s.Items[3].Lines[0].Items[0].InlineStyle.SRTUnderline)
assert.Nil(t, s.Items[4].Lines[0].Items[0].InlineStyle)
assert.Nil(t, s.Items[5].Lines[0].Items[0].InlineStyle)
assert.Nil(t, s.Items[5].Lines[1].Items[0].InlineStyle.SRTColor)
assert.Zero(t, s.Items[5].Lines[1].Items[0].InlineStyle.SRTPosition)
assert.False(t, s.Items[5].Lines[1].Items[0].InlineStyle.SRTBold)
assert.True(t, s.Items[5].Lines[1].Items[0].InlineStyle.SRTItalics)
assert.False(t, s.Items[5].Lines[1].Items[0].InlineStyle.SRTUnderline)
assert.True(t, s.Items[6].Lines[0].Items[0].InlineStyle.SRTItalics)
assert.False(t, s.Items[6].Lines[0].Items[0].InlineStyle.SRTUnderline)
assert.False(t, s.Items[6].Lines[0].Items[0].InlineStyle.SRTBold)
assert.Nil(t, s.Items[6].Lines[0].Items[0].InlineStyle.SRTColor)
assert.True(t, s.Items[6].Lines[1].Items[0].InlineStyle.SRTItalics)
assert.False(t, s.Items[6].Lines[1].Items[0].InlineStyle.SRTUnderline)
assert.False(t, s.Items[6].Lines[1].Items[0].InlineStyle.SRTBold)
assert.Nil(t, s.Items[6].Lines[1].Items[0].InlineStyle.SRTColor)
assert.True(t, s.Items[7].Lines[0].Items[0].InlineStyle.SRTItalics)
assert.False(t, s.Items[7].Lines[0].Items[0].InlineStyle.SRTUnderline)
assert.False(t, s.Items[7].Lines[0].Items[0].InlineStyle.SRTBold)
assert.Nil(t, s.Items[7].Lines[0].Items[0].InlineStyle.SRTColor)
assert.Nil(t, s.Items[8].Lines[0].Items[0].InlineStyle)

// Write to srt
w := &bytes.Buffer{}
Expand Down
13 changes: 13 additions & 0 deletions testdata/example-in-styled.srt
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,16 @@
00:01:29,590 --> 00:01:31,992
[automated]
<i>'The time is 7:35.'</i>

7
00:08:00,000 --> 00:09:00,000
<i>Test with multi line italics
Terminated on the next line</i>

8
00:09:00,000 --> 00:10:00,000
<i>Unterminated styles
9
00:10:00,000 --> 00:11:00,000
Do no fall to the next item
13 changes: 13 additions & 0 deletions testdata/example-out-styled.srt
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,16 @@
00:01:29,590 --> 00:01:31,992
[automated]
<i>'The time is 7:35.'</i>

7
00:08:00,000 --> 00:09:00,000
<i>Test with multi line italics</i>
<i>Terminated on the next line</i>

8
00:09:00,000 --> 00:10:00,000
<i>Unterminated styles</i>

9
00:10:00,000 --> 00:11:00,000
Do no fall to the next item
13 changes: 13 additions & 0 deletions testdata/example-out-styled.vtt
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,16 @@ WEBVTT
00:01:29.590 --> 00:01:31.992
[automated]
<i>'The time is 7:35.'</i>

7
00:08:00.000 --> 00:09:00.000
<i>Test with multi line italics</i>
<i>Terminated on the next line</i>

8
00:09:00.000 --> 00:10:00.000
<i>Unterminated styles</i>

9
00:10:00.000 --> 00:11:00.000
Do no fall to the next item
44 changes: 23 additions & 21 deletions webvtt.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ func ReadFromWebVTT(i io.Reader) (o *Subtitles, err error) {
var blockName string
var comments []string
var index int
var webVTTStyles *StyleAttributes
var sa = &StyleAttributes{}

for scanner.Scan() {
// Fetch line
Expand All @@ -162,11 +162,15 @@ func ReadFromWebVTT(i io.Reader) (o *Subtitles, err error) {
// Reset block name, if we are not in the middle of CSS.
// If we are in STYLE block and the CSS is empty or we meet the right brace at the end of last line,
// then we are not in CSS and can switch to parse next WebVTT block.
if blockName != webvttBlockNameStyle || webVTTStyles == nil ||
len(webVTTStyles.WebVTTStyles) == 0 ||
strings.HasSuffix(webVTTStyles.WebVTTStyles[len(webVTTStyles.WebVTTStyles)-1], "}") {
if blockName != webvttBlockNameStyle || sa == nil ||
len(sa.WebVTTStyles) == 0 ||
strings.HasSuffix(sa.WebVTTStyles[len(sa.WebVTTStyles)-1], "}") {
blockName = ""
}

// Reset WebVTTTags
sa.WebVTTTags = []WebVTTTag{}

// Region
case strings.HasPrefix(line, "Region: "):
// Add region styles
Expand Down Expand Up @@ -207,9 +211,9 @@ func ReadFromWebVTT(i io.Reader) (o *Subtitles, err error) {
blockName = webvttBlockNameStyle

if _, ok := o.Styles[webvttDefaultStyleID]; !ok {
webVTTStyles = &StyleAttributes{}
sa = &StyleAttributes{}
o.Styles[webvttDefaultStyleID] = &Style{
InlineStyle: webVTTStyles,
InlineStyle: sa,
ID: webvttDefaultStyleID,
}
}
Expand Down Expand Up @@ -314,10 +318,10 @@ func ReadFromWebVTT(i io.Reader) (o *Subtitles, err error) {
case webvttBlockNameComment:
comments = append(comments, line)
case webvttBlockNameStyle:
webVTTStyles.WebVTTStyles = append(webVTTStyles.WebVTTStyles, line)
sa.WebVTTStyles = append(sa.WebVTTStyles, line)
case webvttBlockNameText:
// Parse line
if l := parseTextWebVTT(line); len(l.Items) > 0 {
if l := parseTextWebVTT(line, sa); len(l.Items) > 0 {
item.Lines = append(item.Lines, l)
}
default:
Expand All @@ -330,12 +334,10 @@ func ReadFromWebVTT(i io.Reader) (o *Subtitles, err error) {
}

// parseTextWebVTT parses the input line to fill the Line
func parseTextWebVTT(i string) (o Line) {
func parseTextWebVTT(i string, sa *StyleAttributes) (o Line) {
// Create tokenizer
tr := html.NewTokenizer(strings.NewReader(i))

webVTTTagStack := make([]WebVTTTag, 0, 16)

// Loop
for {
// Get next tag
Expand All @@ -348,8 +350,8 @@ func parseTextWebVTT(i string) (o Line) {
switch t {
case html.EndTagToken:
// Pop the top of stack if we meet end tag
if len(webVTTTagStack) > 0 {
webVTTTagStack = webVTTTagStack[:len(webVTTTagStack)-1]
if len(sa.WebVTTTags) > 0 {
sa.WebVTTTags = sa.WebVTTTags[:len(sa.WebVTTTags)-1]
}
case html.StartTagToken:
if matches := webVTTRegexpTag.FindStringSubmatch(string(tr.Raw())); len(matches) > 4 {
Expand Down Expand Up @@ -377,7 +379,7 @@ func parseTextWebVTT(i string) (o Line) {
}

// Push the tag to stack
webVTTTagStack = append(webVTTTagStack, WebVTTTag{
sa.WebVTTTags = append(sa.WebVTTTags, WebVTTTag{
Name: tagName,
Classes: classes,
Annotation: annotation,
Expand All @@ -386,18 +388,18 @@ func parseTextWebVTT(i string) (o Line) {

case html.TextToken:
// Get style attribute
var sa *StyleAttributes
if len(webVTTTagStack) > 0 {
tags := make([]WebVTTTag, len(webVTTTagStack))
copy(tags, webVTTTagStack)
sa = &StyleAttributes{
var styleAttributes *StyleAttributes
if len(sa.WebVTTTags) > 0 {
tags := make([]WebVTTTag, len(sa.WebVTTTags))
copy(tags, sa.WebVTTTags)
styleAttributes = &StyleAttributes{
WebVTTTags: tags,
}
sa.propagateWebVTTAttributes()
styleAttributes.propagateWebVTTAttributes()
}

// Append items
o.Items = append(o.Items, parseTextWebVTTTextToken(sa, string(tr.Raw()))...)
o.Items = append(o.Items, parseTextWebVTTTextToken(styleAttributes, string(tr.Raw()))...)
}
}
return
Expand Down
12 changes: 6 additions & 6 deletions webvtt_internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ func TestParseTextWebVTT(t *testing.T) {
t.Run("When both voice tags are available", func(t *testing.T) {
testData := `<v Bob>Correct tag</v>`

s := parseTextWebVTT(testData)
s := parseTextWebVTT(testData, &StyleAttributes{})
assert.Equal(t, "Bob", s.VoiceName)
assert.Equal(t, 1, len(s.Items))
assert.Equal(t, "Correct tag", s.Items[0].Text)
Expand All @@ -22,7 +22,7 @@ func TestParseTextWebVTT(t *testing.T) {
t.Run("When there is no end tag", func(t *testing.T) {
testData := `<v Bob> Text without end tag`

s := parseTextWebVTT(testData)
s := parseTextWebVTT(testData, &StyleAttributes{})
assert.Equal(t, "Bob", s.VoiceName)
assert.Equal(t, 1, len(s.Items))
assert.Equal(t, "Text without end tag", s.Items[0].Text)
Expand All @@ -31,7 +31,7 @@ func TestParseTextWebVTT(t *testing.T) {
t.Run("When the end tag is correct", func(t *testing.T) {
testData := `<v Bob>Incorrect end tag</vi>`

s := parseTextWebVTT(testData)
s := parseTextWebVTT(testData, &StyleAttributes{})
assert.Equal(t, "Bob", s.VoiceName)
assert.Equal(t, 1, len(s.Items))
assert.Equal(t, "Incorrect end tag", s.Items[0].Text)
Expand All @@ -40,7 +40,7 @@ func TestParseTextWebVTT(t *testing.T) {
t.Run("When inline timestamps are included", func(t *testing.T) {
testData := `<00:01:01.000>With inline <00:01:02.000>timestamps`

s := parseTextWebVTT(testData)
s := parseTextWebVTT(testData, &StyleAttributes{})
assert.Equal(t, 2, len(s.Items))
assert.Equal(t, "With inline", s.Items[0].Text)
assert.Equal(t, time.Minute+time.Second, s.Items[0].StartAt)
Expand All @@ -51,7 +51,7 @@ func TestParseTextWebVTT(t *testing.T) {
t.Run("When inline timestamps together", func(t *testing.T) {
testData := `<00:01:01.000><00:01:02.000>With timestamp tags together`

s := parseTextWebVTT(testData)
s := parseTextWebVTT(testData, &StyleAttributes{})
assert.Equal(t, 1, len(s.Items))
assert.Equal(t, "With timestamp tags together", s.Items[0].Text)
assert.Equal(t, time.Minute+2*time.Second, s.Items[0].StartAt)
Expand All @@ -60,7 +60,7 @@ func TestParseTextWebVTT(t *testing.T) {
t.Run("When inline timestamps is at end", func(t *testing.T) {
testData := `With end timestamp<00:01:02.000>`

s := parseTextWebVTT(testData)
s := parseTextWebVTT(testData, &StyleAttributes{})
assert.Equal(t, 1, len(s.Items))
assert.Equal(t, "With end timestamp", s.Items[0].Text)
assert.Equal(t, time.Duration(0), s.Items[0].StartAt)
Expand Down
Loading

0 comments on commit 646078d

Please sign in to comment.