Skip to content
This repository was archived by the owner on Nov 19, 2024. It is now read-only.

Commit f9dfd58

Browse files
committed
Refactor and simplify interfaces
Split Archival into Archival/Extraction since some archive formats can't do both. Rar is proprietary for creating, and there's no pure-Go 7z writing implementation that I know of. - Extractor no longer requires a filename filter (kind of pointless at best, confusing at worst) - CompressedArchive renamed to Archive - Archival is now just creating archives - New Extraction interface is for reading archives - Archive format can compose compression, archival, and extraction
1 parent 76ea0d6 commit f9dfd58

File tree

8 files changed

+98
-98
lines changed

8 files changed

+98
-98
lines changed

7z.go

+5-8
Original file line numberDiff line numberDiff line change
@@ -51,18 +51,15 @@ func (z SevenZip) Match(_ context.Context, filename string, stream io.Reader) (M
5151
return mr, nil
5252
}
5353

54-
// Archive is not implemented for 7z, but the method exists so that SevenZip satisfies the ArchiveFormat interface.
55-
func (z SevenZip) Archive(_ context.Context, _ io.Writer, _ []FileInfo) error {
56-
return fmt.Errorf("not implemented for 7z because there is no pure Go implementation found")
57-
}
54+
// Archive is not implemented for 7z because I do not know of a pure-Go 7z writer.
5855

5956
// Extract extracts files from z, implementing the Extractor interface. Uniquely, however,
6057
// sourceArchive must be an io.ReaderAt and io.Seeker, which are oddly disjoint interfaces
6158
// from io.Reader which is what the method signature requires. We chose this signature for
6259
// the interface because we figure you can Read() from anything you can ReadAt() or Seek()
6360
// with. Due to the nature of the zip archive format, if sourceArchive is not an io.Seeker
6461
// and io.ReaderAt, an error is returned.
65-
func (z SevenZip) Extract(ctx context.Context, sourceArchive io.Reader, pathsInArchive []string, handleFile FileHandler) error {
62+
func (z SevenZip) Extract(ctx context.Context, sourceArchive io.Reader, handleFile FileHandler) error {
6663
sra, ok := sourceArchive.(seekReaderAt)
6764
if !ok {
6865
return fmt.Errorf("input type must be an io.ReaderAt and io.Seeker because of zip format constraints")
@@ -87,9 +84,6 @@ func (z SevenZip) Extract(ctx context.Context, sourceArchive io.Reader, pathsInA
8784
return err // honor context cancellation
8885
}
8986

90-
if !fileIsIncluded(pathsInArchive, f.Name) {
91-
continue
92-
}
9387
if fileIsIncluded(skipDirs, f.Name) {
9488
continue
9589
}
@@ -130,3 +124,6 @@ func (z SevenZip) Extract(ctx context.Context, sourceArchive io.Reader, pathsInA
130124

131125
// https://py7zr.readthedocs.io/en/latest/archive_format.html#signature
132126
var sevenZipHeader = []byte("7z\xBC\xAF\x27\x1C")
127+
128+
// Interface guard
129+
var _ Extractor = SevenZip{}

formats.go

+65-51
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,14 @@ func RegisterFormat(format Format) {
4242
func Identify(ctx context.Context, filename string, stream io.Reader) (Format, io.Reader, error) {
4343
var compression Compression
4444
var archival Archival
45+
var extraction Extraction
4546

4647
rewindableStream, err := newRewindReader(stream)
4748
if err != nil {
4849
return nil, nil, err
4950
}
5051

51-
// try compression format first, since that's the outer "layer"
52+
// try compression format first, since that's the outer "layer" if combined
5253
for name, format := range formats {
5354
cf, isCompression := format.(Compression)
5455
if !isCompression {
@@ -68,10 +69,11 @@ func Identify(ctx context.Context, filename string, stream io.Reader) (Format, i
6869
}
6970
}
7071

71-
// try archive format next
72+
// try archival and extraction format next
7273
for name, format := range formats {
73-
af, isArchive := format.(Archival)
74-
if !isArchive {
74+
ar, isArchive := format.(Archival)
75+
ex, isExtract := format.(Extraction)
76+
if !isArchive && !isExtract {
7577
continue
7678
}
7779

@@ -81,20 +83,23 @@ func Identify(ctx context.Context, filename string, stream io.Reader) (Format, i
8183
}
8284

8385
if matchResult.Matched() {
84-
archival = af
86+
archival = ar
87+
extraction = ex
8588
break
8689
}
8790
}
8891

89-
// the stream should be rewound by identifyOne
92+
// the stream should be rewound by identifyOne; then return the most specific type of match
9093
bufferedStream := rewindableStream.reader()
9194
switch {
92-
case compression != nil && archival == nil:
95+
case compression != nil && archival == nil && extraction == nil:
9396
return compression, bufferedStream, nil
94-
case compression == nil && archival != nil:
97+
case compression == nil && archival != nil && extraction == nil:
9598
return archival, bufferedStream, nil
96-
case compression != nil && archival != nil:
97-
return CompressedArchive{compression, archival}, bufferedStream, nil
99+
case compression == nil && archival == nil && extraction != nil:
100+
return extraction, bufferedStream, nil
101+
case archival != nil || extraction != nil:
102+
return Archive{compression, archival, extraction}, bufferedStream, nil
98103
default:
99104
return nil, bufferedStream, NoMatch
100105
}
@@ -161,44 +166,44 @@ func readAtMost(stream io.Reader, n int) ([]byte, error) {
161166
return nil, err
162167
}
163168

164-
// CompressedArchive combines a compression format on top of an archive
165-
// format (e.g. "tar.gz") and provides both functionalities in a single
166-
// type. It ensures that archive functions are wrapped by compressors and
169+
// Archive represents an archive which may be compressed at the outer layer.
170+
// It combines a compression format on top of an archive/extraction
171+
// format (e.g. ".tar.gz") and provides both functionalities in a single
172+
// type. It ensures that archival functions are wrapped by compressors and
167173
// decompressors. However, compressed archives have some limitations; for
168174
// example, files cannot be inserted/appended because of complexities with
169175
// modifying existing compression state (perhaps this could be overcome,
170176
// but I'm not about to try it).
171177
//
172-
// As this type is intended to compose compression and archive formats,
173-
// both must be specified in order for this value to be valid, or its
174-
// methods will return errors.
175-
type CompressedArchive struct {
178+
// The embedded Archival and Extraction values are used for writing and
179+
// reading, respectively. Compression is optional and is only needed if the
180+
// format is compressed externally (for example, tar archives).
181+
type Archive struct {
176182
Compression
177183
Archival
184+
Extraction
178185
}
179186

180-
// Name returns a concatenation of the archive format name
181-
// and the compression format name.
182-
func (caf CompressedArchive) Extension() string {
183-
if caf.Compression == nil && caf.Archival == nil {
184-
panic("missing both compression and archive formats")
185-
}
187+
// Name returns a concatenation of the archive and compression format extensions.
188+
func (ar Archive) Extension() string {
186189
var name string
187-
if caf.Archival != nil {
188-
name += caf.Archival.Extension()
190+
if ar.Archival != nil {
191+
name += ar.Archival.Extension()
192+
} else if ar.Extraction != nil {
193+
name += ar.Extraction.Extension()
189194
}
190-
if caf.Compression != nil {
191-
name += caf.Compression.Extension()
195+
if ar.Compression != nil {
196+
name += ar.Compression.Extension()
192197
}
193198
return name
194199
}
195200

196-
// Match matches if the input matches both the compression and archive format.
197-
func (caf CompressedArchive) Match(ctx context.Context, filename string, stream io.Reader) (MatchResult, error) {
201+
// Match matches if the input matches both the compression and archival/extraction format.
202+
func (ar Archive) Match(ctx context.Context, filename string, stream io.Reader) (MatchResult, error) {
198203
var conglomerate MatchResult
199204

200-
if caf.Compression != nil {
201-
matchResult, err := caf.Compression.Match(ctx, filename, stream)
205+
if ar.Compression != nil {
206+
matchResult, err := ar.Compression.Match(ctx, filename, stream)
202207
if err != nil {
203208
return MatchResult{}, err
204209
}
@@ -208,7 +213,7 @@ func (caf CompressedArchive) Match(ctx context.Context, filename string, stream
208213

209214
// wrap the reader with the decompressor so we can
210215
// attempt to match the archive by reading the stream
211-
rc, err := caf.Compression.OpenReader(stream)
216+
rc, err := ar.Compression.OpenReader(stream)
212217
if err != nil {
213218
return matchResult, err
214219
}
@@ -218,8 +223,8 @@ func (caf CompressedArchive) Match(ctx context.Context, filename string, stream
218223
conglomerate = matchResult
219224
}
220225

221-
if caf.Archival != nil {
222-
matchResult, err := caf.Archival.Match(ctx, filename, stream)
226+
if ar.Archival != nil {
227+
matchResult, err := ar.Archival.Match(ctx, filename, stream)
223228
if err != nil {
224229
return MatchResult{}, err
225230
}
@@ -234,26 +239,32 @@ func (caf CompressedArchive) Match(ctx context.Context, filename string, stream
234239
}
235240

236241
// Archive adds files to the output archive while compressing the result.
237-
func (caf CompressedArchive) Archive(ctx context.Context, output io.Writer, files []FileInfo) error {
238-
if caf.Compression != nil {
239-
wc, err := caf.Compression.OpenWriter(output)
242+
func (ar Archive) Archive(ctx context.Context, output io.Writer, files []FileInfo) error {
243+
if ar.Archival == nil {
244+
return fmt.Errorf("no archival format")
245+
}
246+
if ar.Compression != nil {
247+
wc, err := ar.Compression.OpenWriter(output)
240248
if err != nil {
241249
return err
242250
}
243251
defer wc.Close()
244252
output = wc
245253
}
246-
return caf.Archival.Archive(ctx, output, files)
254+
return ar.Archival.Archive(ctx, output, files)
247255
}
248256

249257
// ArchiveAsync adds files to the output archive while compressing the result asynchronously.
250-
func (caf CompressedArchive) ArchiveAsync(ctx context.Context, output io.Writer, jobs <-chan ArchiveAsyncJob) error {
251-
do, ok := caf.Archival.(ArchiverAsync)
258+
func (ar Archive) ArchiveAsync(ctx context.Context, output io.Writer, jobs <-chan ArchiveAsyncJob) error {
259+
if ar.Archival == nil {
260+
return fmt.Errorf("no archival format")
261+
}
262+
do, ok := ar.Archival.(ArchiverAsync)
252263
if !ok {
253-
return fmt.Errorf("%s archive does not support async writing", caf.Extension())
264+
return fmt.Errorf("%T archive does not support async writing", ar.Archival)
254265
}
255-
if caf.Compression != nil {
256-
wc, err := caf.Compression.OpenWriter(output)
266+
if ar.Compression != nil {
267+
wc, err := ar.Compression.OpenWriter(output)
257268
if err != nil {
258269
return err
259270
}
@@ -264,16 +275,19 @@ func (caf CompressedArchive) ArchiveAsync(ctx context.Context, output io.Writer,
264275
}
265276

266277
// Extract reads files out of an archive while decompressing the results.
267-
func (caf CompressedArchive) Extract(ctx context.Context, sourceArchive io.Reader, pathsInArchive []string, handleFile FileHandler) error {
268-
if caf.Compression != nil {
269-
rc, err := caf.Compression.OpenReader(sourceArchive)
278+
func (ar Archive) Extract(ctx context.Context, sourceArchive io.Reader, handleFile FileHandler) error {
279+
if ar.Extraction == nil {
280+
return fmt.Errorf("no extraction format")
281+
}
282+
if ar.Compression != nil {
283+
rc, err := ar.Compression.OpenReader(sourceArchive)
270284
if err != nil {
271285
return err
272286
}
273287
defer rc.Close()
274288
sourceArchive = rc
275289
}
276-
return caf.Archival.Extract(ctx, sourceArchive, pathsInArchive, handleFile)
290+
return ar.Extraction.Extract(ctx, sourceArchive, handleFile)
277291
}
278292

279293
// MatchResult returns true if the format was matched either
@@ -408,8 +422,8 @@ var formats = make(map[string]Format)
408422

409423
// Interface guards
410424
var (
411-
_ Format = (*CompressedArchive)(nil)
412-
_ Archiver = (*CompressedArchive)(nil)
413-
_ ArchiverAsync = (*CompressedArchive)(nil)
414-
_ Extractor = (*CompressedArchive)(nil)
425+
_ Format = (*Archive)(nil)
426+
_ Archiver = (*Archive)(nil)
427+
_ ArchiverAsync = (*Archive)(nil)
428+
_ Extractor = (*Archive)(nil)
415429
)

formats_test.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ func checkErr(t *testing.T, err error, msgFmt string, args ...any) {
111111
return
112112
}
113113
args = append(args, err)
114-
t.Errorf(msgFmt+": %s", args...)
114+
t.Fatalf(msgFmt+": %s", args...)
115115
}
116116

117117
func TestIdentifyDoesNotMatchContentFromTrimmedKnownHeaderHaving0Suffix(t *testing.T) {
@@ -418,7 +418,7 @@ func TestIdentifyAndOpenZip(t *testing.T) {
418418
t.Errorf("unexpected format found: expected=.zip actual=%s", format.Extension())
419419
}
420420

421-
err = format.(Extractor).Extract(context.Background(), reader, nil, func(ctx context.Context, f FileInfo) error {
421+
err = format.(Extractor).Extract(context.Background(), reader, func(ctx context.Context, f FileInfo) error {
422422
rc, err := f.Open()
423423
if err != nil {
424424
return err

fs.go

+11-13
Original file line numberDiff line numberDiff line change
@@ -350,14 +350,12 @@ func (f ArchiveFS) Open(name string) (fs.File, error) {
350350
}
351351

352352
var decompressor io.ReadCloser
353-
if caf, ok := f.Format.(CompressedArchive); ok {
354-
if caf.Compression != nil {
355-
decompressor, err = caf.Compression.OpenReader(inputStream)
356-
if err != nil {
357-
return nil, err
358-
}
359-
inputStream = decompressor
353+
if decomp, ok := f.Format.(Decompressor); ok {
354+
decompressor, err = decomp.OpenReader(inputStream)
355+
if err != nil {
356+
return nil, err
360357
}
358+
inputStream = decompressor
361359
}
362360

363361
// prepare the handler that we'll need if we have to iterate the
@@ -413,13 +411,13 @@ func (f ArchiveFS) Open(name string) (fs.File, error) {
413411
// files may have a "." component in them, and the underlying format doesn't
414412
// know about our file system semantics, so we need to filter ourselves (it's
415413
// not significantly less efficient).
416-
if caf, ok := f.Format.(CompressedArchive); ok {
414+
if ar, ok := f.Format.(Archive); ok {
417415
// bypass the CompressedArchive format's opening of the decompressor, since
418-
// we already did it, since we need to keep it open after returning
416+
// we already did it because we need to keep it open after returning.
419417
// "I BYPASSED THE COMPRESSOR!" -Rey
420-
err = caf.Archival.Extract(f.context(), inputStream, nil, handler)
418+
err = ar.Extraction.Extract(f.context(), inputStream, handler)
421419
} else {
422-
err = f.Format.Extract(f.context(), inputStream, nil, handler)
420+
err = f.Format.Extract(f.context(), inputStream, handler)
423421
}
424422
if err != nil {
425423
return nil, &fs.PathError{Op: "open", Path: name, Err: fmt.Errorf("extract: %w", err)}
@@ -486,7 +484,7 @@ func (f ArchiveFS) Stat(name string) (fs.FileInfo, error) {
486484
if f.Stream != nil {
487485
inputStream = io.NewSectionReader(f.Stream, 0, f.Stream.Size())
488486
}
489-
err = f.Format.Extract(f.context(), inputStream, nil, handler)
487+
err = f.Format.Extract(f.context(), inputStream, handler)
490488
if err != nil && result.FileInfo == nil {
491489
return nil, err
492490
}
@@ -601,7 +599,7 @@ func (f *ArchiveFS) ReadDir(name string) ([]fs.DirEntry, error) {
601599
inputStream = io.NewSectionReader(f.Stream, 0, f.Stream.Size())
602600
}
603601

604-
err = f.Format.Extract(f.context(), inputStream, nil, handler)
602+
err = f.Format.Extract(f.context(), inputStream, handler)
605603
if err != nil {
606604
// these being non-nil implies that we have indexed the archive,
607605
// but if an error occurred, we likely only got part of the way

interfaces.go

+8-8
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,15 @@ type Compression interface {
3333
Decompressor
3434
}
3535

36-
// Archival is an archival format with both archive and extract methods.
36+
// Archival is an archival format that can create/write archives.
3737
type Archival interface {
3838
Format
3939
Archiver
40+
}
41+
42+
// Extraction is an archival format that extract from (read) archives.
43+
type Extraction interface {
44+
Format
4045
Extractor
4146
}
4247

@@ -86,19 +91,14 @@ type ArchiverAsync interface {
8691
// Extractor can extract files from an archive.
8792
type Extractor interface {
8893
// Extract walks entries in the archive and calls handleFile for each
89-
// entry that matches the pathsInArchive filter by path/name.
90-
//
91-
// If pathsInArchive is nil, all files are extracted without discretion.
92-
// If pathsInArchive is empty, no files are extracted.
93-
// If a path refers to a directory, all files within it are extracted.
94-
// Extracted files are passed to the handleFile callback for handling.
94+
// entry in the archive.
9595
//
9696
// Any files opened in the FileHandler should be closed when it returns,
9797
// as there is no guarantee the files can be read outside the handler
9898
// or after the walk has proceeded to the next file.
9999
//
100100
// Context cancellation must be honored.
101-
Extract(ctx context.Context, archive io.Reader, pathsInArchive []string, handleFile FileHandler) error
101+
Extract(ctx context.Context, archive io.Reader, handleFile FileHandler) error
102102
}
103103

104104
// Inserter can insert files into an existing archive.

0 commit comments

Comments
 (0)