Skip to content

Commit

Permalink
api breakage: rework the subtitle dl code, added usage example
Browse files Browse the repository at this point in the history
  • Loading branch information
martinlindhe committed Feb 20, 2016
1 parent e2ac6cb commit 20ea91b
Show file tree
Hide file tree
Showing 7 changed files with 118 additions and 79 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
conformance-files/
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,18 @@ go get github.com/martinlindhe/subtitles
```


# Example

Fetch subtitle from thesubdb.com:
```go
f, _ := os.Open(fileName)

finder := NewSubFinder(f, fileName, "en")

text, err := finder.TheSubDb()
```


# See also

- [subber](https://github.com/martinlindhe/subber) command line tool for subtitles
Expand Down
7 changes: 7 additions & 0 deletions dl-conformance-files
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/sh

mkdir -p conformance-files/thesubdb

# fetch conformance check videos from http://thesubdb.com/api/
curl http://thesubdb.com/api/samples/dexter.mp4 -o conformance-files/thesubdb/dexter.mp4
curl http://thesubdb.com/api/samples/justified.mp4 -o conformance-files/thesubdb/justified.mp4
21 changes: 21 additions & 0 deletions finder.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package subtitles

import "os"

// SubFinder represents a video being queried for subtitles
type SubFinder struct {
FileName string
Language string
VideoFile *os.File
Quiet bool
}

// NewSubFinder creates a SubFilePair object used to download subs for a video
func NewSubFinder(video *os.File, fileName string, language string) *SubFinder {

return &SubFinder{
FileName: fileName,
Language: language,
VideoFile: video,
}
}
4 changes: 4 additions & 0 deletions testextras.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,20 @@ import (
const tempFilePrefix = "moviehash-temp"

func check(e error) {

if e != nil {
fmt.Println(e)
panic(e)
}
}

func makeTime(h int, m int, s int, ms int) time.Time {

return time.Date(0, 1, 1, h, m, s, ms*1000*1000, time.UTC)
}

func createTempFile(byteSize int) string {

data := make([]byte, byteSize)

cnt := uint8(0)
Expand All @@ -39,6 +42,7 @@ func createTempFile(byteSize int) string {
}

func createZeroedTempFile(byteSize int) string {

data := make([]byte, byteSize)

f, err := ioutil.TempFile("/tmp", tempFilePrefix)
Expand Down
109 changes: 40 additions & 69 deletions thesubdb.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,72 +6,80 @@ import (
"io/ioutil"
"net/http"
"os"

log "github.com/Sirupsen/logrus"
)

// FindSub finds subtitle online, returns untouched data
func FindSub(videoFileName string, language string) ([]byte, error) {
if !exists(videoFileName) {
return nil, fmt.Errorf("%s not found", videoFileName)
}
// TheSubDb downloads a subtitle from thesubdb.com
func (f SubFinder) TheSubDb(args ...string) ([]byte, error) {

if isDirectory(videoFileName) {
return nil, fmt.Errorf("%s is not a file", videoFileName)
apiHost := ""
if len(args) > 0 {
apiHost = args[0]
} else {
apiHost = "api.thesubdb.com"
}

text, err := fromTheSubDb(videoFileName, language)
hash, err := SubDbHashFromFile(f.VideoFile)
if err != nil {
return nil, err
}

return text, nil
}
client := &http.Client{}

// FromTheSubDb downloads a subtitle from thesubdb.com
func fromTheSubDb(videoFileName string, language string, optional ...string) ([]byte, error) {
query := "http://" + apiHost +
"/?action=download" +
"&hash=" + hash +
"&language=" + f.Language

_apiHost := "api.thesubdb.com"
if len(optional) > 0 {
_apiHost = optional[0]
if !f.Quiet {
fmt.Println("Fetching", query, "...")
}

hash, err := createMovieHashFromMovieFile(videoFileName)
req, err := http.NewRequest("GET", query, nil)
if err != nil {
return nil, err
}

actualText, err := downloadSubtitleByHash(hash, language, _apiHost)
req.Header.Set("User-Agent",
"SubDB/1.0 (GoSubber/1.0; https://github.com/martinlindhe/subber)")

resp, err := client.Do(req)
if err != nil {
return nil, err
}

return actualText, nil
}
if resp.StatusCode == 404 {
return nil, fmt.Errorf("Subtitle not found")
}

// returns a md5-sum in hex-string representation
func createMovieHashFromMovieFile(fileName string) (string, error) {
if resp.StatusCode != 200 {
return nil, fmt.Errorf("Server error %s", resp.Status)
}

if !exists(fileName) {
return "", fmt.Errorf("File %s not found", fileName)
slurp, err := ioutil.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("Server reading request body: %v", err)
}

return slurp, nil
}

// SubDbHashFromFile returns a checksum in hex-string representation
// conforming to http://trac.opensubtitles.org/projects/opensubtitles/wiki/HashSourceCodes
func SubDbHashFromFile(f *os.File) (string, error) {

// rewind
f.Seek(0, 0)

// block size which is required for the API call
readSize := int64(64 * 1024)

f, err := os.Open(fileName)
if err != nil {
return "", err
}
defer f.Close()

fi, err := f.Stat()
if err != nil {
return "", err
}

if fi.Size() < readSize {
return "", fmt.Errorf("File is too small: %s", fileName)
return "", fmt.Errorf("Stream is too small: %d", fi.Size())
}

// read first part
Expand Down Expand Up @@ -99,40 +107,3 @@ func createMovieHashFromMovieFile(fileName string) (string, error) {

return fmt.Sprintf("%x", md5.Sum(combined)), nil
}

func downloadSubtitleByHash(hash string, language string, apiHost string) ([]byte, error) {

client := &http.Client{}

query := "http://" + apiHost + "/?action=download&hash=" + hash + "&language=" + language

log.Printf("Fetching %s ...\n", query)

req, err := http.NewRequest("GET", query, nil)
if err != nil {
return nil, err
}

req.Header.Set("User-Agent",
"SubDB/1.0 (GoSubber/1.0; https://github.com/martinlindhe/subber)")

resp, err := client.Do(req)
if err != nil {
return nil, err
}

if resp.StatusCode == 404 {
return nil, fmt.Errorf("Subtitle not found")
}

if resp.StatusCode != 200 {
return nil, fmt.Errorf("Server error %s", resp.Status)
}

slurp, err := ioutil.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("Server reading request body: %v", err)
}

return slurp, nil
}
43 changes: 33 additions & 10 deletions thesubdb_test.go
Original file line number Diff line number Diff line change
@@ -1,30 +1,53 @@
package subtitles

import (
"fmt"
"os"
"testing"

"github.com/stretchr/testify/assert"
)

func TestCreateMovieHashFromMovieFile(t *testing.T) {
func TestDownloadFromTheSubDb(t *testing.T) {

fileName := createTempFile(1024 * 1024 * 2)
fileName := createZeroedTempFile(1024 * 1024 * 4)
defer os.Remove(fileName)

hash, err := createMovieHashFromMovieFile(fileName)
f, err := os.Open(fileName)
assert.Equal(t, nil, err)

hash, err := SubDbHashFromFile(f)
assert.Equal(t, nil, err)
assert.Equal(t, "38a503307786991a982f8ded498b90e0", hash)
assert.Equal(t, "0dfbe8aa4c20b52e1b8bf3cb6cbdf193", hash)

os.Remove(fileName)
finder := NewSubFinder(f, fileName, "en")

text, err := finder.TheSubDb("sandbox.thesubdb.com")
assert.Equal(t, nil, err)
assert.True(t, len(text) > 1000)
}

func TestDownloadFromTheSubDb(t *testing.T) {
fileName := createZeroedTempFile(1024 * 1024 * 2)
func subDbConformTest(t *testing.T, fileName string, expectedHash string) {
if !exists(fileName) {
fmt.Println("ERROR thesubdb.com conformance tests missing, run ./hash-conformance-deps if you want to run these tests")
return
}

text, err := fromTheSubDb(fileName, "en", "sandbox.thesubdb.com")
f, err := os.Open(fileName)
assert.Equal(t, nil, err)
assert.True(t, len(text) > 1000)

os.Remove(fileName)
hash, err := SubDbHashFromFile(f)
assert.Equal(t, nil, err)
assert.Equal(t, expectedHash, hash)
}

func TestSubDbHashFromFile(t *testing.T) {

// NOTE for this to work, run "./hash-conformance-deps" to fetch needed files

// http://thesubdb.com/api/samples/dexter.mp4
subDbConformTest(t, "conformance-files/thesubdb/dexter.mp4", "ffd8d4aa68033dc03d1c8ef373b9028c")

// http://thesubdb.com/api/samples/justified.mp4
subDbConformTest(t, "conformance-files/thesubdb/justified.mp4", "edc1981d6459c6111fe36205b4aff6c2")
}

0 comments on commit 20ea91b

Please sign in to comment.