Skip to content

Commit

Permalink
🎨 Improve EPUB asset file content parsing siyuan-note/siyuan#9072
Browse files Browse the repository at this point in the history
  • Loading branch information
88250 committed Aug 30, 2023
1 parent 85070ec commit 059c34f
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 19 deletions.
15 changes: 15 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Binaries for programs and plugins
*.exe
*.exe~
*.dll
*.so
*.dylib

# Test binary, built with `go test -c`
*.test

# Output of the go coverage tool, specifically when used with LiteIDE
*.out

# IDE
.idea
22 changes: 19 additions & 3 deletions book.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ import (
"encoding/xml"
"errors"
"io"
"io/ioutil"
"path"
"strings"
)

type Book struct {
Expand Down Expand Up @@ -55,7 +55,7 @@ func (p *Book) readBytes(n string) ([]byte, error) {
}
defer fd.Close()

return ioutil.ReadAll(fd)
return io.ReadAll(fd)

}

Expand All @@ -65,5 +65,21 @@ func (p *Book) open(n string) (io.ReadCloser, error) {
return f.Open()
}
}
return nil, errors.New(n + " not found!")

if !strings.ContainsAny(n, "-_") {
return nil, errors.New(n + " not found")
}

// Improve EPUB asset file content parsing https://github.com/siyuan-note/siyuan/issues/9072
nn := strings.ReplaceAll(n, "-", "")
nn = strings.ReplaceAll(nn, "_", "")
for _, f := range p.fd.File {
fn := strings.ReplaceAll(f.Name, "-", "")
fn = strings.ReplaceAll(fn, "_", "")

if fn == nn {
return f.Open()
}
}
return nil, errors.New(n + " not found")
}
18 changes: 5 additions & 13 deletions epub.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,8 @@ package epub
import (
"archive/zip"
"bytes"
"io"
"io/ioutil"

"github.com/wmentor/html"
"io"
)

func Open(fn string) (*Book, error) {
Expand Down Expand Up @@ -58,36 +56,31 @@ func Reader(filename string, onChapter func(chapter string, data []byte) bool) e
return nil, err
}
defer fd.Close()

return ioutil.ReadAll(fd)
return io.ReadAll(fd)
}

for _, pt := range bk.Ncx.Points {
for _, np := range pt.Points {

name := np.Text

data, err := readerF(np.Content.Src)
if err != nil {
return err
// Improve EPUB asset file content parsing https://github.com/siyuan-note/siyuan/issues/9072
// Ignore error
return nil
}

if !onChapter(name, data) {
return nil
}

}
}

return nil
}

func ToTxt(filename string, output io.Writer) error {

notFirst := false

return Reader(filename, func(chapter string, data []byte) bool {

parser := html.New()
parser.Parse(bytes.NewReader(data))

Expand All @@ -98,7 +91,6 @@ func ToTxt(filename string, output io.Writer) error {
}

output.Write(parser.Text())

return true
})
}
8 changes: 5 additions & 3 deletions epub_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ import (
)

func TestReader(t *testing.T) {
bk, err := Open("./data/test.epub")
fn := "./data/test.epub"

bk, err := Open(fn)
if err != nil {
t.Fatal(err)
}
Expand All @@ -19,7 +21,7 @@ func TestReader(t *testing.T) {

i := 0

Reader("./data/test.epub", func(n string, data []byte) bool {
Reader(fn, func(n string, data []byte) bool {
i++
if data == nil {
t.Fatal("reader failed")
Expand All @@ -33,7 +35,7 @@ func TestReader(t *testing.T) {

buf := bytes.NewBuffer(nil)

ToTxt("./data/test.epub", buf)
ToTxt(fn, buf)

if buf.Len() == 0 {
t.Fatal("ToTxt failed")
Expand Down

0 comments on commit 059c34f

Please sign in to comment.