Skip to content

Commit dc020e4

Browse files
authored
Merge pull request #181 from nlnwa/refactor/error-handling
Consistent error handling between subcommands
2 parents 28bf016 + 2fef93f commit dc020e4

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

67 files changed

+1492
-1591
lines changed

arcreader/arcreader.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ type ArcFileReader struct {
1717
}
1818

1919
func NewArcFileReader(fs afero.Fs, filename string, offset int64, opts ...gowarc.WarcRecordOption) (*ArcFileReader, error) {
20-
file, err := fs.Open(filename) // For read access.
20+
file, err := fs.Open(filename)
2121
if err != nil {
2222
return nil, err
2323
}

arcreader/arcreader_test.go

+3-4
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
package arcreader
22

33
import (
4-
"context"
54
"path/filepath"
65
"testing"
76

@@ -43,9 +42,9 @@ func TestArcReader(t *testing.T) {
4342
t.Fatalf("unexpected error: %v", err)
4443
}
4544

46-
for record := range warc.NewIterator(context.Background(), arcFileReader, nil, 0, 0) {
47-
if record.Err != nil {
48-
t.Errorf("unexpected error: %v", record.Err)
45+
for _, err := range warc.Records(arcFileReader, nil, 0, 0) {
46+
if err != nil {
47+
t.Errorf("unexpected error: %v", err)
4948
}
5049
}
5150
})

arcreader/unmarshaler.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,7 @@ func (u *unmarshaler) parseRecord(r *bufio.Reader, l1 string) (gowarc.WarcRecord
246246
func (u *unmarshaler) parseUrlRecordV1(l string) (gowarc.RecordType, string, string, time.Time, string, int64, error) {
247247
reg := regexp.MustCompile(`([^ ]*) ([^ ]*) (\d*) ([^ ]*) (\d*)`)
248248
subs := reg.FindStringSubmatch(l)
249-
if subs == nil || len(subs) < 4 {
249+
if len(subs) < 4 {
250250
return 0, "", "", time.Time{}, "", 0, fmt.Errorf("could not parse ARC record from: %s", l)
251251
}
252252
url := subs[1]

cmd/aart/aart.go

+33-21
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ package aart
22

33
import (
44
"bytes"
5-
"context"
65
"errors"
76
"fmt"
87
"image"
@@ -11,7 +10,6 @@ import (
1110
_ "image/jpeg"
1211
_ "image/png"
1312
"io"
14-
"os"
1513

1614
"github.com/nfnt/resize"
1715
"github.com/nlnwa/gowarc/v2"
@@ -72,7 +70,7 @@ func NewCmdAart() *cobra.Command {
7270
flags := AartFlags{}
7371

7472
cmd := &cobra.Command{
75-
Use: "aart",
73+
Use: "aart FILE",
7674
Short: "Show images",
7775
Long: ``,
7876
Hidden: true,
@@ -123,30 +121,44 @@ func (o *AartOptions) Run() error {
123121
return fmt.Errorf("failed to create WARC reader: %v", err)
124122
}
125123

126-
for record := range warc.NewIterator(context.TODO(), wf, o.filter, o.recordNum, o.recordCount) {
127-
if record.Err != nil {
128-
return fmt.Errorf("failed reading records: %w", record.Err)
124+
for record, err := range warc.Records(wf, o.filter, o.recordNum, o.recordCount) {
125+
if err != nil {
126+
return err
129127
}
130-
wr := record.WarcRecord
131-
132-
if b, ok := wr.Block().(gowarc.HttpResponseBlock); ok {
133-
fmt.Printf("\u001B[2J\u001B[HUrl: %s\n\n", wr.WarcHeader().Get(gowarc.WarcTargetURI))
134-
r, err := b.PayloadBytes()
135-
if err != nil {
136-
fmt.Fprintf(os.Stderr, "Failed to get payload bytes: %v\n", err)
137-
}
138-
err = display(r, o.width)
139-
if err != nil {
140-
fmt.Fprintf(os.Stderr, "Failed to display: %v\n", err)
141-
continue
142-
}
143-
fmt.Printf("Hit enter to continue\n")
144-
_, _ = fmt.Scanln()
128+
err = o.handleRecord(record)
129+
if err != nil {
130+
return err
145131
}
146132
}
147133
return nil
148134
}
149135

136+
const ansiClearScreenEscapeSequence = "\u001B[2J\u001B[H"
137+
138+
func (o *AartOptions) handleRecord(record warc.Record) error {
139+
wr := record.WarcRecord
140+
141+
block, ok := wr.Block().(gowarc.HttpResponseBlock)
142+
if !ok {
143+
return nil
144+
}
145+
146+
fmt.Print(ansiClearScreenEscapeSequence)
147+
fmt.Printf("Url: %s\n\n", wr.WarcHeader().Get(gowarc.WarcTargetURI))
148+
b, err := block.PayloadBytes()
149+
if err != nil {
150+
return fmt.Errorf("failed to get payload bytes: %w", err)
151+
}
152+
err = display(b, o.width)
153+
if err != nil {
154+
return fmt.Errorf("failed to display: %w", err)
155+
}
156+
fmt.Printf("Hit enter to continue\n")
157+
_, _ = fmt.Scanln()
158+
159+
return nil
160+
}
161+
150162
var asciiChar = "MND8OZ$7I?+=~:,.."
151163

152164
func asciiArt(img image.Image, w, h int) []byte {

cmd/cat/cat.go

+62-33
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ import (
1313

1414
"github.com/nlnwa/gowarc/v2"
1515
"github.com/nlnwa/warchaeology/v3/cmd/internal/flag"
16-
"github.com/nlnwa/warchaeology/v3/cmd/internal/log"
1716
"github.com/nlnwa/warchaeology/v3/internal/filewalker"
1817
"github.com/nlnwa/warchaeology/v3/internal/filter"
1918
"github.com/nlnwa/warchaeology/v3/internal/warc"
@@ -34,14 +33,20 @@ const (
3433
ShowPayload = "payload"
3534
ShowPayloadShort = "P"
3635
ShowPayloadHelp = "show payload"
36+
37+
Compression = "compress"
38+
CompressionShort = "z"
39+
CompressionHelp = "compress output (per record)"
3740
)
3841

3942
type CatOptions struct {
4043
paths []string
4144
offset int64
4245
recordNum int
4346
recordCount int
47+
force bool
4448
compress bool
49+
continueOnError bool
4550
filter *filter.RecordFilter
4651
writer *writer
4752
fileWalker *filewalker.FileWalker
@@ -53,6 +58,7 @@ type CatFlags struct {
5358
FilterFlags flag.FilterFlags
5459
WarcIteratorFlags flag.WarcIteratorFlags
5560
WarcRecordOptionFlags flag.WarcRecordOptionFlags
61+
ErrorFlags flag.ErrorFlags
5662
}
5763

5864
func (f CatFlags) AddFlags(cmd *cobra.Command) {
@@ -61,11 +67,12 @@ func (f CatFlags) AddFlags(cmd *cobra.Command) {
6167
f.FilterFlags.AddFlags(cmd)
6268
f.WarcIteratorFlags.AddFlags(cmd)
6369
f.WarcRecordOptionFlags.AddFlags(cmd)
70+
f.ErrorFlags.AddFlags(cmd)
6471

6572
flags.BoolP(ShowWarcHeader, ShowWarcHeaderShort, false, ShowWarcHeaderHelp)
6673
flags.BoolP(ShowProtocolHeader, ShowProtocolHeaderShort, false, ShowProtocolHeaderHelp)
6774
flags.BoolP(ShowPayload, ShowPayloadShort, false, ShowPayloadHelp)
68-
flags.BoolP("compress", "z", false, "output is compressed (per record)")
75+
flags.BoolP(Compression, CompressionShort, false, CompressionHelp)
6976
}
7077

7178
func (f CatFlags) ShowWarcHeader() bool {
@@ -108,11 +115,13 @@ func (f CatFlags) ToOptions() (*CatOptions, error) {
108115

109116
return &CatOptions{
110117
paths: fileList,
111-
fileWalker: fileWalker,
112-
filter: filter,
113118
offset: f.WarcIteratorFlags.Offset(),
114119
recordCount: f.WarcIteratorFlags.Limit(),
115120
recordNum: f.WarcIteratorFlags.RecordNum(),
121+
force: f.WarcIteratorFlags.Force(),
122+
filter: filter,
123+
continueOnError: f.ErrorFlags.ContinueOnError(),
124+
fileWalker: fileWalker,
116125
compress: f.Compress(),
117126
writer: writer,
118127
warcRecordOptions: f.WarcRecordOptionFlags.ToWarcRecordOptions(),
@@ -126,10 +135,11 @@ func NewCmdCat() *cobra.Command {
126135
Use: "cat FILE/DIR ...",
127136
Short: "Concatenate and print warc files",
128137
Long: ``,
129-
Example: `Print all content from a WARC file
138+
Example: `
139+
# Print all content from a WARC file (in principle the same as zcat)
130140
warc cat file1.warc.gz
131141
132-
# Pipe payload from record #4 into the image viewer feh
142+
# Pipe the payload of the 4th record into the image viewer feh
133143
warc cat -n4 -P file1.warc.gz | feh -`,
134144
RunE: func(cmd *cobra.Command, args []string) error {
135145
o, err := flags.ToOptions()
@@ -145,7 +155,11 @@ warc cat -n4 -P file1.warc.gz | feh -`,
145155
return err
146156
}
147157
cmd.SilenceUsage = true
148-
return o.Run()
158+
err = o.Run()
159+
if errors.Is(err, context.Canceled) {
160+
os.Exit(1)
161+
}
162+
return err
149163
},
150164
}
151165

@@ -181,33 +195,36 @@ func (o *CatOptions) Run() error {
181195
ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGTERM, syscall.SIGINT)
182196
defer cancel()
183197

184-
closer, err := log.InitLogger(os.Stderr)
185-
if err != nil {
186-
return err
187-
}
188-
defer closer.Close()
198+
for _, path := range o.paths {
199+
err := o.fileWalker.Walk(ctx, path, func(fs afero.Fs, path string, err error) error {
200+
if err != nil {
201+
return err
202+
}
189203

190-
walkFn := func(fs afero.Fs, path string, err error) error {
191-
if err != nil {
192-
return err
193-
}
194-
if err := o.catFile(ctx, fs, path); err != nil {
195-
slog.Error(err.Error(), "path", path)
196-
}
197-
return nil
198-
}
204+
err = o.handleFile(ctx, fs, path)
205+
if err != nil {
206+
if !o.continueOnError {
207+
cancel()
208+
}
209+
var recordErr warc.RecordError
210+
if errors.As(err, &recordErr) {
211+
slog.Error(recordErr.Error(), "path", path, "offset", recordErr.Offset())
212+
} else {
213+
slog.Error(err.Error(), "path", path)
214+
}
215+
}
199216

200-
for _, file := range o.paths {
201-
err := o.fileWalker.Walk(file, walkFn)
217+
return nil
218+
})
202219
if err != nil {
203220
return err
204221
}
205222
}
206223
return nil
207224
}
208225

209-
// catFile reads a WARC file and writes the content to stdout
210-
func (o *CatOptions) catFile(ctx context.Context, fs afero.Fs, path string) error {
226+
// handleFile reads a WARC file and writes the content to stdout
227+
func (o *CatOptions) handleFile(ctx context.Context, fs afero.Fs, path string) error {
211228
f, err := fs.Open(path)
212229
if err != nil {
213230
return err
@@ -216,24 +233,36 @@ func (o *CatOptions) catFile(ctx context.Context, fs afero.Fs, path string) erro
216233
if err != nil {
217234
return err
218235
}
219-
defer func() { _ = warcFileReader.Close() }()
236+
defer func() {
237+
_ = warcFileReader.Close()
238+
}()
220239

221-
ctx, cancel := context.WithCancel(ctx)
222-
defer cancel()
240+
var lastOffset int64 = -1
223241

224-
for record := range warc.NewIterator(ctx, warcFileReader, o.filter, o.recordNum, o.recordCount) {
242+
for record, err := range warc.Records(warcFileReader, o.filter, o.recordNum, o.recordCount) {
243+
select {
244+
case <-ctx.Done():
245+
return ctx.Err()
246+
default:
247+
}
248+
if err != nil {
249+
// When forcing, avoid infinite loop by ensuring the iterator moves forward
250+
if o.force && lastOffset != record.Offset {
251+
slog.Warn(err.Error(), "offset", record.Offset, "path", path)
252+
lastOffset = record.Offset
253+
continue
254+
}
255+
return warc.Error(record, err)
256+
}
225257
if err := o.handleRecord(record); err != nil {
226-
return err
258+
return warc.Error(record, err)
227259
}
228260
}
229261
return nil
230262
}
231263

232264
func (o *CatOptions) handleRecord(record warc.Record) error {
233265
defer record.Close()
234-
if record.Err != nil {
235-
return record.Err
236-
}
237266
var w io.Writer
238267
if o.compress {
239268
gw := gzip.NewWriter(os.Stdout)

cmd/cat/cat_test.go

+26-16
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,12 @@ package cat
33
import (
44
"bytes"
55
"compress/gzip"
6-
"context"
6+
"errors"
77
"io"
88
"math"
99
"os"
1010
"path/filepath"
1111
"testing"
12-
"time"
1312

1413
"github.com/nlnwa/gowarc/v2"
1514
"github.com/nlnwa/warchaeology/v3/internal/warc"
@@ -25,15 +24,31 @@ var testFiles = map[string]string{
2524
"samsung-with-error": filepath.Join(testDataDir, "warc", "samsung-with-error", "rec-33318048d933-20240317162652059-0.warc.gz"),
2625
}
2726

27+
var tests = []struct {
28+
name string
29+
err error
30+
}{
31+
{
32+
name: "empty",
33+
},
34+
{
35+
name: "single-record",
36+
},
37+
{
38+
name: "samsung-with-error",
39+
err: io.ErrUnexpectedEOF,
40+
},
41+
}
42+
2843
// TestWriteWarcRecord tests the writeWarcRecord function.
2944
func TestWriteWarcRecord(t *testing.T) {
30-
for name, testFile := range testFiles {
31-
t.Run(name, func(t *testing.T) {
45+
for _, test := range tests {
46+
t.Run(test.name, func(t *testing.T) {
3247
// capture testFile variable from outer scope
33-
testFile := testFile
48+
test := test
3449

3550
// resolve test file path
36-
testFile, err := filepath.Abs(testFile)
51+
testFile, err := filepath.Abs(testFiles[test.name])
3752
if err != nil {
3853
t.Fatal(err)
3954
}
@@ -114,8 +129,11 @@ func TestWriteWarcRecord(t *testing.T) {
114129
got := new(bytes.Buffer)
115130
var currentOffset int64
116131

117-
for record := range warc.NewIterator(context.Background(), warcFileReader, nil, 0, 0) {
118-
if record.Err != nil {
132+
for record, err := range warc.Records(warcFileReader, nil, 0, 0) {
133+
if err != nil {
134+
if !errors.Is(err, test.err) {
135+
t.Fatal(err)
136+
}
119137
break
120138
}
121139

@@ -155,11 +173,3 @@ want is %d bytes, got is %d bytes
155173
})
156174
}
157175
}
158-
159-
func BenchmarkDummy(b *testing.B) {
160-
// This is a dummy test, it should be replaced with something more
161-
// meaningful in a later commit
162-
for i := 0; i < b.N; i++ {
163-
time.Sleep(1 * time.Nanosecond)
164-
}
165-
}

0 commit comments

Comments
 (0)