@@ -13,7 +13,6 @@ import (
13
13
14
14
"github.com/nlnwa/gowarc/v2"
15
15
"github.com/nlnwa/warchaeology/v3/cmd/internal/flag"
16
- "github.com/nlnwa/warchaeology/v3/cmd/internal/log"
17
16
"github.com/nlnwa/warchaeology/v3/internal/filewalker"
18
17
"github.com/nlnwa/warchaeology/v3/internal/filter"
19
18
"github.com/nlnwa/warchaeology/v3/internal/warc"
@@ -34,14 +33,20 @@ const (
34
33
ShowPayload = "payload"
35
34
ShowPayloadShort = "P"
36
35
ShowPayloadHelp = "show payload"
36
+
37
+ Compression = "compress"
38
+ CompressionShort = "z"
39
+ CompressionHelp = "compress output (per record)"
37
40
)
38
41
39
42
type CatOptions struct {
40
43
paths []string
41
44
offset int64
42
45
recordNum int
43
46
recordCount int
47
+ force bool
44
48
compress bool
49
+ continueOnError bool
45
50
filter * filter.RecordFilter
46
51
writer * writer
47
52
fileWalker * filewalker.FileWalker
@@ -53,6 +58,7 @@ type CatFlags struct {
53
58
FilterFlags flag.FilterFlags
54
59
WarcIteratorFlags flag.WarcIteratorFlags
55
60
WarcRecordOptionFlags flag.WarcRecordOptionFlags
61
+ ErrorFlags flag.ErrorFlags
56
62
}
57
63
58
64
func (f CatFlags ) AddFlags (cmd * cobra.Command ) {
@@ -61,11 +67,12 @@ func (f CatFlags) AddFlags(cmd *cobra.Command) {
61
67
f .FilterFlags .AddFlags (cmd )
62
68
f .WarcIteratorFlags .AddFlags (cmd )
63
69
f .WarcRecordOptionFlags .AddFlags (cmd )
70
+ f .ErrorFlags .AddFlags (cmd )
64
71
65
72
flags .BoolP (ShowWarcHeader , ShowWarcHeaderShort , false , ShowWarcHeaderHelp )
66
73
flags .BoolP (ShowProtocolHeader , ShowProtocolHeaderShort , false , ShowProtocolHeaderHelp )
67
74
flags .BoolP (ShowPayload , ShowPayloadShort , false , ShowPayloadHelp )
68
- flags .BoolP ("compress" , "z" , false , "output is compressed (per record)" )
75
+ flags .BoolP (Compression , CompressionShort , false , CompressionHelp )
69
76
}
70
77
71
78
func (f CatFlags ) ShowWarcHeader () bool {
@@ -108,11 +115,13 @@ func (f CatFlags) ToOptions() (*CatOptions, error) {
108
115
109
116
return & CatOptions {
110
117
paths : fileList ,
111
- fileWalker : fileWalker ,
112
- filter : filter ,
113
118
offset : f .WarcIteratorFlags .Offset (),
114
119
recordCount : f .WarcIteratorFlags .Limit (),
115
120
recordNum : f .WarcIteratorFlags .RecordNum (),
121
+ force : f .WarcIteratorFlags .Force (),
122
+ filter : filter ,
123
+ continueOnError : f .ErrorFlags .ContinueOnError (),
124
+ fileWalker : fileWalker ,
116
125
compress : f .Compress (),
117
126
writer : writer ,
118
127
warcRecordOptions : f .WarcRecordOptionFlags .ToWarcRecordOptions (),
@@ -126,10 +135,11 @@ func NewCmdCat() *cobra.Command {
126
135
Use : "cat FILE/DIR ..." ,
127
136
Short : "Concatenate and print warc files" ,
128
137
Long : `` ,
129
- Example : `Print all content from a WARC file
138
+ Example : `
139
+ # Print all content from a WARC file (in principle the same as zcat)
130
140
warc cat file1.warc.gz
131
141
132
- # Pipe payload from record #4 into the image viewer feh
142
+ # Pipe the payload of the 4th record into the image viewer feh
133
143
warc cat -n4 -P file1.warc.gz | feh -` ,
134
144
RunE : func (cmd * cobra.Command , args []string ) error {
135
145
o , err := flags .ToOptions ()
@@ -145,7 +155,11 @@ warc cat -n4 -P file1.warc.gz | feh -`,
145
155
return err
146
156
}
147
157
cmd .SilenceUsage = true
148
- return o .Run ()
158
+ err = o .Run ()
159
+ if errors .Is (err , context .Canceled ) {
160
+ os .Exit (1 )
161
+ }
162
+ return err
149
163
},
150
164
}
151
165
@@ -181,33 +195,36 @@ func (o *CatOptions) Run() error {
181
195
ctx , cancel := signal .NotifyContext (context .Background (), syscall .SIGTERM , syscall .SIGINT )
182
196
defer cancel ()
183
197
184
- closer , err := log . InitLogger ( os . Stderr )
185
- if err != nil {
186
- return err
187
- }
188
- defer closer . Close ()
198
+ for _ , path := range o . paths {
199
+ err := o . fileWalker . Walk ( ctx , path , func ( fs afero. Fs , path string , err error ) error {
200
+ if err != nil {
201
+ return err
202
+ }
189
203
190
- walkFn := func (fs afero.Fs , path string , err error ) error {
191
- if err != nil {
192
- return err
193
- }
194
- if err := o .catFile (ctx , fs , path ); err != nil {
195
- slog .Error (err .Error (), "path" , path )
196
- }
197
- return nil
198
- }
204
+ err = o .handleFile (ctx , fs , path )
205
+ if err != nil {
206
+ if ! o .continueOnError {
207
+ cancel ()
208
+ }
209
+ var recordErr warc.RecordError
210
+ if errors .As (err , & recordErr ) {
211
+ slog .Error (recordErr .Error (), "path" , path , "offset" , recordErr .Offset ())
212
+ } else {
213
+ slog .Error (err .Error (), "path" , path )
214
+ }
215
+ }
199
216
200
- for _ , file := range o . paths {
201
- err := o . fileWalker . Walk ( file , walkFn )
217
+ return nil
218
+ } )
202
219
if err != nil {
203
220
return err
204
221
}
205
222
}
206
223
return nil
207
224
}
208
225
209
- // catFile reads a WARC file and writes the content to stdout
210
- func (o * CatOptions ) catFile (ctx context.Context , fs afero.Fs , path string ) error {
226
+ // handleFile reads a WARC file and writes the content to stdout
227
+ func (o * CatOptions ) handleFile (ctx context.Context , fs afero.Fs , path string ) error {
211
228
f , err := fs .Open (path )
212
229
if err != nil {
213
230
return err
@@ -216,24 +233,36 @@ func (o *CatOptions) catFile(ctx context.Context, fs afero.Fs, path string) erro
216
233
if err != nil {
217
234
return err
218
235
}
219
- defer func () { _ = warcFileReader .Close () }()
236
+ defer func () {
237
+ _ = warcFileReader .Close ()
238
+ }()
220
239
221
- ctx , cancel := context .WithCancel (ctx )
222
- defer cancel ()
240
+ var lastOffset int64 = - 1
223
241
224
- for record := range warc .NewIterator (ctx , warcFileReader , o .filter , o .recordNum , o .recordCount ) {
242
+ for record , err := range warc .Records (warcFileReader , o .filter , o .recordNum , o .recordCount ) {
243
+ select {
244
+ case <- ctx .Done ():
245
+ return ctx .Err ()
246
+ default :
247
+ }
248
+ if err != nil {
249
+ // When forcing, avoid infinite loop by ensuring the iterator moves forward
250
+ if o .force && lastOffset != record .Offset {
251
+ slog .Warn (err .Error (), "offset" , record .Offset , "path" , path )
252
+ lastOffset = record .Offset
253
+ continue
254
+ }
255
+ return warc .Error (record , err )
256
+ }
225
257
if err := o .handleRecord (record ); err != nil {
226
- return err
258
+ return warc . Error ( record , err )
227
259
}
228
260
}
229
261
return nil
230
262
}
231
263
232
264
func (o * CatOptions ) handleRecord (record warc.Record ) error {
233
265
defer record .Close ()
234
- if record .Err != nil {
235
- return record .Err
236
- }
237
266
var w io.Writer
238
267
if o .compress {
239
268
gw := gzip .NewWriter (os .Stdout )
0 commit comments