Skip to content

Commit

Permalink
refactor and export api
Browse files Browse the repository at this point in the history
  • Loading branch information
martinlindhe committed Jun 18, 2017
1 parent 77513ff commit 9228f87
Show file tree
Hide file tree
Showing 20 changed files with 265 additions and 144 deletions.
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
The MIT License (MIT)

Copyright (c) 2015 Martin Lindhe
Copyright (c) 2015-2017 Martin Lindhe

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
25 changes: 23 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,31 @@ WARNING: The API is unstable, work in progress!
go get -u github.com/martinlindhe/subtitles
```

# Example - convert srt to vtt

# Example
```go
in := "1\n" +
"00:00:04,630 --> 00:00:06,018\n" +
"Go ninja!\n" +
"\n" +
"1\n" +
"00:01:09,630 --> 00:01:11,005\n" +
"No ninja!\n"

res, _ := NewFromSRT(in)

// Output: WEBVTT
//
// 00:00:04.630 --> 00:00:06.018
// Go ninja!
//
// 00:01:09.630 --> 00:01:11.005
// No ninja!
fmt.Println(res.AsVTT())
```

# Example - download subtitle from thesubdb.com

Fetch subtitle from thesubdb.com:
```go
f, _ := os.Open(fileName)
defer f.Close()
Expand Down
11 changes: 0 additions & 11 deletions caption.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package subtitles

import (
"fmt"
"time"
)

Expand All @@ -12,13 +11,3 @@ type Caption struct {
End time.Time
Text []string
}

// AsSrt renders the caption as srt
func (cap Caption) AsSrt() string {
res := fmt.Sprintf("%d", cap.Seq) + eol +
SrtTime(cap.Start) + " --> " + SrtTime(cap.End) + eol
for _, line := range cap.Text {
res += line + eol
}
return res + eol
}
2 changes: 1 addition & 1 deletion caption_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,5 @@ func TestRenderTime(t *testing.T) {
assert.Equal(t, "1\n"+
"18:40:22,110 --> 18:41:20,123\n"+
"<i>Go ninja!</i>\n\n",
cap.AsSrt())
cap.AsSRT())
}
44 changes: 23 additions & 21 deletions cleaner.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package subtitles

import (
"fmt"
"strings"
"time"

Expand All @@ -10,44 +9,45 @@ import (

// CleanupSub parses .srt or .ssa, performs cleanup and renders to a .srt, returning a string. caller is responsible for passing UTF8 string
func CleanupSub(utf8 string, filterName string, keepAds bool, sync int) (string, error) {
var captions []Caption
var subtitle Subtitle
var err error

if looksLikeSrt(utf8) {
captions = parseSrt(utf8)
subtitle, err = NewFromSRT(utf8)
} else {
// falls back on .ssa decoding, for now
captions = parseSsa(utf8)
subtitle, err = NewFromSSA(utf8)
}
if err != nil {
return "", err
}

if !keepAds {
captions = removeAds(captions)
subtitle.removeAds()
}

if sync != 0 {
captions = resyncSubs(captions, sync)
subtitle.resyncSubs(sync)
}

captions = filterSubs(captions, filterName)
out := renderSrt(captions)
subtitle.filterSubs(filterName)
out := subtitle.AsSRT()

return out, nil
}

func resyncSubs(subs []Caption, sync int) []Caption {

// var res []caption
fmt.Printf("resyncing with %d\n", sync)

for i := range subs {
subs[i].Start = subs[i].Start.Add(time.Duration(sync) * time.Millisecond)
subs[i].End = subs[i].End.Add(time.Duration(sync) * time.Millisecond)
func (subtitle *Subtitle) resyncSubs(sync int) {
// log.Printf("resyncing with %d\n", sync)
for i := range subtitle.Captions {
subtitle.Captions[i].Start = subtitle.Captions[i].Start.
Add(time.Duration(sync) * time.Millisecond)
subtitle.Captions[i].End = subtitle.Captions[i].End.
Add(time.Duration(sync) * time.Millisecond)
}

return subs
}

// RemoveAds removes advertisement from the subtitles
func removeAds(subs []Caption) (res []Caption) {
func (subtitle *Subtitle) removeAds() *Subtitle {
ads := []string{
// english:
"captions paid for by",
Expand Down Expand Up @@ -109,7 +109,8 @@ func removeAds(subs []Caption) (res []Caption) {
}

seq := 1
for orgSeq, sub := range subs {
res := []Caption{}
for orgSeq, sub := range subtitle.Captions {

isAd := false

Expand All @@ -130,5 +131,6 @@ func removeAds(subs []Caption) (res []Caption) {
seq++
}
}
return
subtitle.Captions = res
return subtitle
}
10 changes: 5 additions & 5 deletions cleaner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import (

func TestRemoveAds(t *testing.T) {

in := []Caption{{
in := Subtitle{[]Caption{{
1,
MakeTime(0, 0, 4, 630),
MakeTime(0, 0, 6, 18),
Expand All @@ -23,9 +23,9 @@ func TestRemoveAds(t *testing.T) {
MakeTime(0, 1, 9, 630),
MakeTime(0, 1, 11, 005),
[]string{"No ninja!"},
}}
}}}

expected := []Caption{{
expected := Subtitle{[]Caption{{
1,
MakeTime(0, 0, 4, 630),
MakeTime(0, 0, 6, 18),
Expand All @@ -35,7 +35,7 @@ func TestRemoveAds(t *testing.T) {
MakeTime(0, 1, 9, 630),
MakeTime(0, 1, 11, 005),
[]string{"No ninja!"},
}}
}}}

assert.Equal(t, expected, removeAds(in))
assert.Equal(t, &expected, in.removeAds())
}
7 changes: 3 additions & 4 deletions filter.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,14 @@ import (
)

// filterSubs pass the captions through a filter function
func filterSubs(captions []Caption, filter string) []Caption {
func (subtitle *Subtitle) filterSubs(filter string) {
if filter == "caps" {
return filterCapitalization(captions)
subtitle.filterCapitalization()
}
if filter == "html" {
return filterHTML(captions)
subtitle.filterHTML()
}
if filter != "none" {
fmt.Printf("Unrecognized filter name: %s\n", filter)
}
return captions
}
14 changes: 4 additions & 10 deletions caps.go → filter_caps.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,34 +7,28 @@ import (
)

// filterCapitalization converts "ALL CAPS" text into "Initial letter capped"
func filterCapitalization(captions []Caption) []Caption {

for _, cap := range captions {
func (subtitle *Subtitle) filterCapitalization() *Subtitle {
for _, cap := range subtitle.Captions {
for i, line := range cap.Text {

clean := ucFirst(line)

if clean != cap.Text[i] {
log.Printf("[caps] %s -> %s\n", cap.Text[i], clean)
cap.Text[i] = clean
cap.Text[i] = clean // XXX updated?!
}
}
}

return captions
return subtitle
}

func ucFirst(s string) string {

res := ""

for i, c := range s {
if i == 0 {
res += strings.ToUpper(string(c))
} else {
res += strings.ToLower(string(c))
}
}

return res
}
10 changes: 5 additions & 5 deletions caps_test.go → filter_caps_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,19 @@ import (

func TestFilterCapitalization(t *testing.T) {

in := []Caption{{
in := Subtitle{Captions: []Caption{{
Seq: 1,
Start: MakeTime(0, 0, 4, 630),
End: MakeTime(0, 0, 6, 18),
Text: []string{"GO NINJA!", "NINJA GO!"},
}}
}}}

expected := []Caption{{
expected := Subtitle{[]Caption{{
1,
MakeTime(0, 0, 4, 630),
MakeTime(0, 0, 6, 18),
[]string{"Go ninja!", "Ninja go!"},
}}
}}}

assert.Equal(t, expected, filterCapitalization(in))
assert.Equal(t, &expected, in.filterCapitalization())
}
8 changes: 4 additions & 4 deletions html.go → filter_html.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@ import (
)

// filterHTML removes all html tags from captions
func filterHTML(captions []Caption) []Caption {
for _, cap := range captions {
func (subtitle *Subtitle) filterHTML() *Subtitle {
for _, cap := range subtitle.Captions {
for i, line := range cap.Text {
clean := sanitize.HTML(line)
if clean != cap.Text[i] {
log.Printf("[html] %s -> %s\n", cap.Text[i], clean)
cap.Text[i] = clean
cap.Text[i] = clean // XXX works?!
}
}
}
return captions
return subtitle
}
10 changes: 5 additions & 5 deletions html_test.go → filter_html_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,19 @@ import (

func TestFilterHTML(t *testing.T) {

in := []Caption{{
in := Subtitle{[]Caption{{
1,
MakeTime(0, 0, 4, 630),
MakeTime(0, 0, 6, 18),
[]string{"<b>GO NINJA!</b>", "NINJA&nbsp;GO!"},
}}
}}}

expected := []Caption{{
expected := Subtitle{[]Caption{{
1,
MakeTime(0, 0, 4, 630),
MakeTime(0, 0, 6, 18),
[]string{"GO NINJA!", "NINJA GO!"},
}}
}}}

assert.Equal(t, expected, filterHTML(in))
assert.Equal(t, &expected, in.filterHTML())
}
2 changes: 1 addition & 1 deletion thesubdb.go → finder_thesubdb.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ func (f SubFinder) TheSubDb(args ...string) ([]byte, error) {
}

req.Header.Set("User-Agent",
"SubDB/1.0 (GoSubber/1.0; https://github.com/martinlindhe/subber)")
"SubDB/1.0 (GoSubber/1.0; https://github.com/martinlindhe/subtitles)")

resp, err := client.Do(req)
if err != nil {
Expand Down
2 changes: 2 additions & 0 deletions thesubdb_test.go → finder_thesubdb_test.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
// +build network

package subtitles

import (
Expand Down
8 changes: 4 additions & 4 deletions parser.go
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
package subtitles

// parse tries to parse a subtitle from the data stream
func parse(b []byte) []Caption {
func parse(b []byte) (Subtitle, error) {

s := convertToUTF8(b)

if s[0] == '[' {
// looks like ssa
return parseSsa(s)
return NewFromSSA(s)
}

// XXXX
return parseSrt(s)
// XXX
return NewFromSRT(s)
}
Loading

0 comments on commit 9228f87

Please sign in to comment.