From 24fa33e9b6a0b17e8418ffc90a94a06ab79bd5c2 Mon Sep 17 00:00:00 2001 From: Dan Garrick <59327926+dpgarrick@users.noreply.github.com> Date: Tue, 12 Sep 2023 14:00:37 +1200 Subject: [PATCH] zlib: More precise matching (#386) * demo bug * check 2 bytes of zlib header * add .zz test --- formats_test.go | 35 +++++++++++++++++++++++++++++++++++ zlib.go | 30 +++++++++++++++++++++++++----- 2 files changed, 60 insertions(+), 5 deletions(-) diff --git a/formats_test.go b/formats_test.go index 2531ceb1..106f48a0 100644 --- a/formats_test.go +++ b/formats_test.go @@ -3,6 +3,7 @@ package archiver import ( "bytes" "context" + "errors" "io" "io/fs" "math/rand" @@ -370,6 +371,13 @@ func TestIdentifyFindFormatByStreamContent(t *testing.T) { compressorName: "", wantFormatName: ".rar", }, + { + name: "should recognize zz", + openCompressionWriter: Zlib{}.OpenWriter, + content: []byte("this is text"), + compressorName: ".zz", + wantFormatName: ".zz", + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -410,3 +418,30 @@ func TestIdentifyAndOpenZip(t *testing.T) { }) checkErr(t, err, "extracting zip") } + +func TestIdentifyASCIIFileStartingWithX(t *testing.T) { + // Create a temporary file starting with the letter 'x' + tmpFile, err := os.CreateTemp("", "TestIdentifyASCIIFileStartingWithX-tmp-*.txt") + if err != nil { + t.Fatalf("fail to create tmp test file for archive tests: err=%v", err) + } + + _, err = tmpFile.Write([]byte("xThis is a test file")) + if err != nil { + t.Fatalf("Failed to write to temp file: %v", err) + } + tmpFile.Close() + + // Open the file and use the Identify function + file, err := os.Open(tmpFile.Name()) + if err != nil { + t.Fatalf("Failed to open temp file: %v", err) + } + defer file.Close() + + _, _, err = Identify(tmpFile.Name(), file) + if !errors.Is(err, ErrNoMatch) { + t.Fatalf("Identify failed: %v", err) + } + +} diff --git a/zlib.go b/zlib.go index ce07890d..84275186 100644 --- a/zlib.go +++ b/zlib.go @@ -1,7 +1,6 @@ package archiver import ( - "bytes" "io" "strings" @@ -28,11 +27,13 @@ func (zz Zlib) Match(filename string, stream io.Reader) (MatchResult, error) { } // match file header - buf, err := readAtMost(stream, len(ZlibHeader)) - if err != nil { + buf, err := readAtMost(stream, 2) + // If an error occurred or buf is not 2 bytes we can't check the header + if err != nil || len(buf) < 2 { return mr, err } - mr.ByStream = bytes.Equal(buf, ZlibHeader) + + mr.ByStream = isValidZlibHeader(buf[0], buf[1]) return mr, nil } @@ -49,4 +50,23 @@ func (Zlib) OpenReader(r io.Reader) (io.ReadCloser, error) { return zlib.NewReader(r) } -var ZlibHeader = []byte{0x78} +func isValidZlibHeader(first, second byte) bool { + // Define all 32 valid zlib headers, see https://stackoverflow.com/questions/9050260/what-does-a-zlib-header-look-like/54915442#54915442 + validHeaders := map[uint16]struct{}{ + 0x081D: {}, 0x085B: {}, 0x0899: {}, 0x08D7: {}, + 0x1819: {}, 0x1857: {}, 0x1895: {}, 0x18D3: {}, + 0x2815: {}, 0x2853: {}, 0x2891: {}, 0x28CF: {}, + 0x3811: {}, 0x384F: {}, 0x388D: {}, 0x38CB: {}, + 0x480D: {}, 0x484B: {}, 0x4889: {}, 0x48C7: {}, + 0x5809: {}, 0x5847: {}, 0x5885: {}, 0x58C3: {}, + 0x6805: {}, 0x6843: {}, 0x6881: {}, 0x68DE: {}, + 0x7801: {}, 0x785E: {}, 0x789C: {}, 0x78DA: {}, + } + + // Combine the first and second bytes into a single 16-bit, big-endian value + header := uint16(first)<<8 | uint16(second) + + // Check if the header is in the map of valid headers + _, isValid := validHeaders[header] + return isValid +}