Skip to content

Commit 1f66178

Browse files
authored
Improve Search Result Parsing / Show Errors In Web UI (#43)
* WIP * Add testify for better test output. * Don't share a logger in the irc / dcc mock servers. * Rewrite search parser to be more maintainable. - Don't mutate the line as we parse. It makes it difficult to reason about the current state. - Split each piece into a separate function that takes the original line and extracts only what we are looking for - Handle and add tests for special cases like when the file size isn't present. Fixes #41. * Handle cases when author has weird %\w% text. - Example: %F77FE9FF1CCD% - Look for the above format and remove it if present. - Fixes #42. * Add additional file extensions. * Display parse errors in Web UI. - Allow users to manually copy and paste the result string so they can still download the files. - When there are parse errors, a button appears at the top of the grid, allowing users to view all errors and the raw search result string in a separate errors grid. They can copy the search result manually to the top input box and click download. * Remove fmt.Println() call. * Update packages.
1 parent ef086d3 commit 1f66178

29 files changed

+1302
-471
lines changed

.idea/.gitignore

+8
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/codeStyles/Project.xml

+48
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/codeStyles/codeStyleConfig.xml

+5
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/modules.xml

+8
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/openbooks.iml

+12
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/prettier.xml

+7
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/vcs.xml

+6
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

core/search_parser.go

+147-8
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package core
22

33
import (
44
"bufio"
5+
"encoding/json"
56
"errors"
67
"fmt"
78
"io"
@@ -11,7 +12,9 @@ import (
1112
"strings"
1213
)
1314

14-
// List of common file extensions
15+
// List of file extensions that I've encountered.
16+
// Some of them aren't eBooks, but they were returned
17+
// in previous search results.
1518
var fileTypes = [...]string{
1619
"epub",
1720
"mobi",
@@ -20,7 +23,13 @@ var fileTypes = [...]string{
2023
"rtf",
2124
"pdf",
2225
"cdr",
23-
"rar",
26+
"lit",
27+
"cbr",
28+
"doc",
29+
"htm",
30+
"jpg",
31+
"txt",
32+
"rar", // Compressed extensions should always be last 2 items
2433
"zip",
2534
}
2635

@@ -35,8 +44,19 @@ type BookDetail struct {
3544
}
3645

3746
type ParseError struct {
38-
Line string
39-
Error error
47+
Line string `json:"line"`
48+
Error error `json:"error"`
49+
}
50+
51+
func (p *ParseError) MarshalJSON() ([]byte, error) {
52+
item := struct {
53+
Line string `json:"line"`
54+
Error string `json:"error"`
55+
}{
56+
Line: p.Line,
57+
Error: p.Error.Error(),
58+
}
59+
return json.Marshal(item)
4060
}
4161

4262
func (p ParseError) String() string {
@@ -51,20 +71,20 @@ func ParseSearchFile(filePath string) ([]BookDetail, []ParseError) {
5171
}
5272
defer file.Close()
5373

54-
return ParseSearch(file)
74+
return ParseSearchV2(file)
5575
}
5676

5777
func ParseSearch(reader io.Reader) ([]BookDetail, []ParseError) {
5878
var books []BookDetail
59-
var errors []ParseError
79+
var parseErrors []ParseError
6080

6181
scanner := bufio.NewScanner(reader)
6282
for scanner.Scan() {
6383
line := scanner.Text()
6484
if strings.HasPrefix(line, "!") {
6585
dat, err := parseLine(line)
6686
if err != nil {
67-
errors = append(errors, ParseError{Line: line, Error: err})
87+
parseErrors = append(parseErrors, ParseError{Line: line, Error: err})
6888
} else {
6989
books = append(books, dat)
7090
}
@@ -73,7 +93,7 @@ func ParseSearch(reader io.Reader) ([]BookDetail, []ParseError) {
7393

7494
sort.Slice(books, func(i, j int) bool { return books[i].Server < books[j].Server })
7595

76-
return books, errors
96+
return books, parseErrors
7797
}
7898

7999
// Parse line extracts data from a single line
@@ -138,3 +158,122 @@ func parseLine(line string) (BookDetail, error) {
138158

139159
return book, nil
140160
}
161+
162+
func ParseSearchV2(reader io.Reader) ([]BookDetail, []ParseError) {
163+
var books []BookDetail
164+
var parseErrors []ParseError
165+
166+
scanner := bufio.NewScanner(reader)
167+
for scanner.Scan() {
168+
line := scanner.Text()
169+
if strings.HasPrefix(line, "!") {
170+
dat, err := parseLineV2(line)
171+
if err != nil {
172+
parseErrors = append(parseErrors, ParseError{Line: line, Error: err})
173+
} else {
174+
books = append(books, dat)
175+
}
176+
}
177+
}
178+
179+
sort.Slice(books, func(i, j int) bool { return books[i].Server < books[j].Server })
180+
181+
return books, parseErrors
182+
}
183+
184+
func parseLineV2(line string) (BookDetail, error) {
185+
getServer := func(line string) (string, error) {
186+
if line[0] != '!' {
187+
return "", errors.New("result lines must start with '!'")
188+
}
189+
190+
firstSpace := strings.Index(line, " ")
191+
if firstSpace == -1 {
192+
return "", errors.New("unable parse server name")
193+
}
194+
195+
return line[1:firstSpace], nil
196+
}
197+
198+
getAuthor := func(line string) (string, error) {
199+
firstSpace := strings.Index(line, " ")
200+
dashChar := strings.Index(line, " - ")
201+
if dashChar == -1 {
202+
return "", errors.New("unable to parse author")
203+
}
204+
author := line[firstSpace+len(" ") : dashChar]
205+
206+
// Handles case with weird author characters %\w% ("%F77FE9FF1CCD% Michael Haag")
207+
if strings.Contains(author, "%") {
208+
split := strings.SplitAfterN(author, " ", 2)
209+
return split[1], nil
210+
}
211+
212+
return author, nil
213+
}
214+
215+
getTitle := func(line string) (string, string, int) {
216+
title := ""
217+
fileFormat := ""
218+
endIndex := -1
219+
// Get the Title
220+
for _, ext := range fileTypes { //Loop through each possible file extension we've got on record
221+
endTitle := strings.Index(line, "."+ext) // check if it contains our extension
222+
if endTitle == -1 {
223+
continue
224+
}
225+
fileFormat = ext
226+
if ext == "rar" || ext == "zip" { // If the extension is .rar or .zip the actual format is contained in ()
227+
for _, ext2 := range fileTypes[:len(fileTypes)-2] { // Range over the eBook formats (exclude archives)
228+
if strings.Contains(strings.ToLower(line[:endTitle]), ext2) {
229+
fileFormat = ext2
230+
}
231+
}
232+
}
233+
startIndex := strings.Index(line, " - ")
234+
title = line[startIndex+len(" - ") : endTitle]
235+
endIndex = endTitle
236+
}
237+
238+
return title, fileFormat, endIndex
239+
}
240+
241+
getSize := func(line string) (string, int) {
242+
const delimiter = " ::INFO:: "
243+
infoIndex := strings.LastIndex(line, delimiter)
244+
245+
if infoIndex != -1 {
246+
// Handle cases when there is additional info after the file size (ex ::HASH:: )
247+
parts := strings.Split(line[infoIndex+len(delimiter):], " ")
248+
return parts[0], infoIndex
249+
}
250+
251+
return "N/A", len(line)
252+
}
253+
254+
server, err := getServer(line)
255+
if err != nil {
256+
return BookDetail{}, err
257+
}
258+
259+
author, err := getAuthor(line)
260+
if err != nil {
261+
return BookDetail{}, err
262+
}
263+
264+
title, format, titleIndex := getTitle(line)
265+
if titleIndex == -1 {
266+
return BookDetail{}, errors.New("unable to parse title")
267+
}
268+
269+
size, endIndex := getSize(line)
270+
271+
return BookDetail{
272+
Server: server,
273+
Author: author,
274+
Title: title,
275+
Format: format,
276+
Size: size,
277+
Full: strings.TrimSpace(line[:endIndex]),
278+
}, nil
279+
}

0 commit comments

Comments
 (0)