diff --git a/block.go b/block.go index f19f128..8681c78 100644 --- a/block.go +++ b/block.go @@ -18,9 +18,9 @@ package gowarc import ( "errors" - "github.com/nlnwa/gowarc/internal/diskbuffer" "io" - "io/ioutil" + + "github.com/nlnwa/gowarc/internal/diskbuffer" ) // Block is the interface used to represent the content of a WARC record as specified by the WARC specification: @@ -124,7 +124,7 @@ func (block *genericBlock) BlockDigest() string { if block.filterReader == nil { block.filterReader = newDigestFilterReader(block.rawBytes, block.blockDigest) } - _, _ = io.Copy(ioutil.Discard, block.filterReader) + _, _ = io.Copy(io.Discard, block.filterReader) block.blockDigestString = block.blockDigest.format() } return block.blockDigestString diff --git a/example_test.go b/example_test.go index e1541f3..9e8fbcb 100644 --- a/example_test.go +++ b/example_test.go @@ -20,8 +20,9 @@ import ( "bufio" "bytes" "fmt" - "github.com/nlnwa/gowarc" "io" + + "github.com/nlnwa/gowarc" ) func ExampleNewRecordBuilder() { @@ -69,6 +70,7 @@ func ExampleUnmarshaler() { // gowarc: Validation errors: // 1: gowarc: record was found 2 bytes after expected offset // 2: block: wrong digest: expected sha1:af4d582b4ffc017d07a947d841e392a821f754f3, computed: sha1:8a936f9fd60d664cf95b1ffb40f1c4093e65bb40 + // 3: too few bytes in end of record marker. Expected "\r\n\r\n", was "" } func ExampleNewWarcFileWriter() { diff --git a/go.mod b/go.mod index 0248b3b..415fedd 100644 --- a/go.mod +++ b/go.mod @@ -1,23 +1,25 @@ module github.com/nlnwa/gowarc -go 1.17 +go 1.22.0 + +toolchain go1.22.7 require ( - github.com/google/uuid v1.3.0 - github.com/klauspost/compress v1.15.12 - github.com/nlnwa/whatwg-url v0.4.0 - github.com/prometheus/prometheus v0.40.3 - github.com/stretchr/testify v1.8.1 + github.com/google/uuid v1.6.0 + github.com/klauspost/compress v1.17.11 + github.com/nlnwa/whatwg-url v0.5.0 + github.com/prometheus/prometheus v0.55.0 + github.com/stretchr/testify v1.9.0 ) require ( - github.com/bits-and-blooms/bitset v1.5.0 // indirect - github.com/davecgh/go-spew v1.1.1 // indirect + github.com/bits-and-blooms/bitset v1.14.3 // indirect + github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/kr/pretty v0.3.1 // indirect github.com/pkg/errors v0.9.1 // indirect - github.com/pmezard/go-difflib v1.0.0 // indirect - golang.org/x/net v0.23.0 // indirect - golang.org/x/sys v0.18.0 // indirect - golang.org/x/text v0.14.0 // indirect + github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect + golang.org/x/net v0.30.0 // indirect + golang.org/x/sys v0.26.0 // indirect + golang.org/x/text v0.19.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 09ca4b4..4a3c633 100644 --- a/go.sum +++ b/go.sum @@ -331,6 +331,9 @@ github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6r github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= github.com/bits-and-blooms/bitset v1.5.0 h1:NpE8frKRLGHIcEzkR+gZhiioW1+WbYV6fKwD6ZIpQT8= github.com/bits-and-blooms/bitset v1.5.0/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edYb8uY+O0FJTyyDA= +github.com/bits-and-blooms/bitset v1.13.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8= +github.com/bits-and-blooms/bitset v1.14.3 h1:Gd2c8lSNf9pKXom5JtD7AaKO8o7fGQ2LtFj1436qilA= +github.com/bits-and-blooms/bitset v1.14.3/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8= github.com/casbin/casbin/v2 v2.1.2/go.mod h1:YcPU1XXisHhLzuxH9coDNf2FbKpjGlbCg3n9yuLkIJQ= github.com/cenkalti/backoff v2.2.1+incompatible/go.mod h1:90ReRw6GdpyfrHakVjL/QHaoyV4aDUVVkXQJJJ3NXXM= github.com/cenkalti/backoff/v4 v4.1.2/go.mod h1:scbssz8iZGpm3xbr14ovlUdkxfGXNInqkPWOWmG2CLw= @@ -373,6 +376,8 @@ github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dennwc/varint v1.0.0/go.mod h1:hnItb35rvZvJrbTALZtY/iQfDs48JKRG1RPpgziApxA= github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= github.com/dgryski/go-sip13 v0.0.0-20200911182023-62edffca9245/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no= @@ -592,6 +597,8 @@ github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+ github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/googleapis/enterprise-certificate-proxy v0.0.0-20220520183353-fd19c99a87aa/go.mod h1:17drOmN3MwGY7t0e+Ei9b45FFGA3fBs3x36SsCg1hq8= github.com/googleapis/enterprise-certificate-proxy v0.1.0/go.mod h1:17drOmN3MwGY7t0e+Ei9b45FFGA3fBs3x36SsCg1hq8= github.com/googleapis/enterprise-certificate-proxy v0.2.0/go.mod h1:8C0jb7/mgJe/9KK8Lm7X9ctZC2t60YyIpYEI16jx0Qg= @@ -706,6 +713,8 @@ github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+o github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= github.com/klauspost/compress v1.15.12 h1:YClS/PImqYbn+UILDnqxQCZ3RehC9N318SU3kElDUEM= github.com/klauspost/compress v1.15.12/go.mod h1:QPwzmACJjUTFsnSHH934V6woptycfrDDJnH7hvFVbGM= +github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= +github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= github.com/kolo/xmlrpc v0.0.0-20220921171641-a4b6fa1dd06b/go.mod h1:pcaDhQK0/NJZEvtCO0qQPPropqV0sJOJ6YW7X+9kRwM= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= @@ -795,6 +804,8 @@ github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OS github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= github.com/nlnwa/whatwg-url v0.4.0 h1:B3kFb5EL7KILeBkhrlQvFi41Ex0p4ropVA9brt5ungI= github.com/nlnwa/whatwg-url v0.4.0/go.mod h1:pLzpJjFPtA+n7RCLvp0GBxvDHa/2ckNCBK9mfEeNOMQ= +github.com/nlnwa/whatwg-url v0.5.0 h1:l71cqfqG44+VCQZQX3wD4bwheFWicPxuwaCimLEfpDo= +github.com/nlnwa/whatwg-url v0.5.0/go.mod h1:X/ejnFFVbaOWdSul+cnlsSHviCzGZJdvPkgc9zD8IY8= github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= github.com/oklog/oklog v0.3.2/go.mod h1:FCV+B7mhrz4o+ueLpx+KqkyXRGMWOYEvfiXtdGtbWGs= @@ -847,6 +858,8 @@ github.com/pkg/sftp v1.10.1/go.mod h1:lYOWFsE0bwd1+KfKJaKeuokY15vzFx25BLbzYYoAxZ github.com/pkg/sftp v1.13.1/go.mod h1:3HaPG6Dq1ILlpPZRO0HVMrsydcdLt6HRDccSgb87qRg= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI= github.com/posener/complete v1.2.3/go.mod h1:WZIdtGGp+qx0sLrYKtIRAruyNpv6hFCicSgv7Sy7s/s= github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U= @@ -891,6 +904,8 @@ github.com/prometheus/procfs v0.7.3/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1 github.com/prometheus/procfs v0.8.0/go.mod h1:z7EfXMXOkbkqb9IINtpCn86r/to3BnA0uaxHdg830/4= github.com/prometheus/prometheus v0.40.3 h1:oMw1vVyrxHTigXAcFY6QHrGUnQEbKEOKo737cPgYBwY= github.com/prometheus/prometheus v0.40.3/go.mod h1:/UhsWkOXkO11wqTW2Bx5YDOwRweSDcaFBlTIzFe7P0Y= +github.com/prometheus/prometheus v0.55.0 h1:ITinOi1zr3HemoVWHf679PfRRmpxZOcR4nEvsze6eB0= +github.com/prometheus/prometheus v0.55.0/go.mod h1:GGS7QlWKCqCbcEzWsVahYIfQwiGhcExkarHyLJTsv6I= github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= @@ -949,6 +964,8 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk= github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM= @@ -1036,6 +1053,7 @@ golang.org/x/crypto v0.0.0-20221012134737-56aed061732a/go.mod h1:IxCIyHEi3zRg3s0 golang.org/x/crypto v0.1.0/go.mod h1:RecgLatLF4+eUMCP1PoPZQb+cVrJcOPbHkTkbkB9sbw= golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU= golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs= +golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= @@ -1154,6 +1172,9 @@ golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44= golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs= golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= +golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= +golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4= +golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -1307,6 +1328,9 @@ golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= +golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= @@ -1315,6 +1339,7 @@ golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk= golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58= +golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -1330,6 +1355,9 @@ golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= +golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= diff --git a/internal/diskbuffer/filebuffer.go b/internal/diskbuffer/filebuffer.go index eca2cdf..85bc498 100644 --- a/internal/diskbuffer/filebuffer.go +++ b/internal/diskbuffer/filebuffer.go @@ -4,7 +4,6 @@ import ( "errors" "io" "io/fs" - "io/ioutil" "os" ) @@ -24,7 +23,7 @@ func newFileBuffer(maxSize int64, tmpDir string) (*fileBuffer, error) { b := &fileBuffer{ max: maxSize, } - if b.diskFile, err = ioutil.TempFile(tmpDir, tmpFilePrefix); err != nil { + if b.diskFile, err = os.CreateTemp(tmpDir, tmpFilePrefix); err != nil { return nil, err } diff --git a/record_test.go b/record_test.go index 4d4a483..c1a41aa 100644 --- a/record_test.go +++ b/record_test.go @@ -17,11 +17,12 @@ package gowarc import ( - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "io/ioutil" + "io" "net/http" "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func Test_warcRecord_ToRevisitRecord(t *testing.T) { @@ -250,7 +251,7 @@ func Test_warcRecord_ToRevisitRecord(t *testing.T) { assert.IsType(&revisitBlock{}, got.Block()) r, err := got.Block().RawBytes() assert.Nil(err) - b, err := ioutil.ReadAll(r) + b, err := io.ReadAll(r) assert.Nil(err) assert.Equal(tt.want.data, string(b)) @@ -489,7 +490,7 @@ func Test_warcRecord_Merge(t *testing.T) { r, err := got.Block().RawBytes() assert.Nil(err) - b, err := ioutil.ReadAll(r) + b, err := io.ReadAll(r) assert.Nil(err) assert.Equal(tt.want.data, string(b)) diff --git a/recordbuilder_test.go b/recordbuilder_test.go index d2062b8..40c07f3 100644 --- a/recordbuilder_test.go +++ b/recordbuilder_test.go @@ -17,10 +17,11 @@ package gowarc import ( - "github.com/stretchr/testify/assert" - "io/ioutil" + "io" "testing" "time" + + "github.com/stretchr/testify/assert" ) func TestRecordBuilder(t *testing.T) { @@ -500,7 +501,7 @@ func TestRecordBuilder(t *testing.T) { assert.Equal(tt.want.validation, validation) r, err := wr.Block().RawBytes() assert.Nil(err) - b, err := ioutil.ReadAll(r) + b, err := io.ReadAll(r) assert.Nil(err) assert.Equal(tt.want.data, string(b)) diff --git a/unmarshaler.go b/unmarshaler.go index b33a5ba..40a6a4f 100644 --- a/unmarshaler.go +++ b/unmarshaler.go @@ -20,9 +20,10 @@ import ( "bufio" "bytes" "fmt" + "io" + "github.com/klauspost/compress/gzip" "github.com/nlnwa/gowarc/internal/countingreader" - "io" ) // Unmarshaler is the interface implemented by types that can unmarshal a WARC record. A new instance of Unmarshaler is created by calling [NewUnmarshaler]. @@ -152,68 +153,72 @@ func (u *unmarshaler) Unmarshal(b *bufio.Reader) (WarcRecord, int64, *Validation closer: nil, } - length, _ := record.headers.GetInt64(ContentLength) - - content := countingreader.NewLimited(r, length) record.closer = func() error { - defer func() { - _ = record.block.Close() - }() - - _, err := io.Copy(io.Discard, content) - - // Discarding 4 bytes which makes up the end of record marker (\r\n\r\n) - b, e := r.Peek(4) - switch { - case string(b) == crlfcrlf: - _, _ = r.Discard(4) - case len(b) == 0: - e = fmt.Errorf("too few bytes in end of record marker. Expected %q, was %q", crlfcrlf, b) - case len(b) == 1 && b[0] == lf: - e = fmt.Errorf("missing carriage return in end of record marker. Expected %q, was %q", crlfcrlf, b) - _, _ = r.Discard(1) - case len(b) == 2 && b[0] == lf && b[1] == lf: - e = fmt.Errorf("missing carriage return in end of record marker. Expected %q, was %q", crlfcrlf, b) - _, _ = r.Discard(2) - case len(b) < 4: - e = fmt.Errorf("too few bytes in end of record marker. Expected %q, was %q", crlfcrlf, b) - _, _ = r.Discard(len(b)) - case e == io.EOF: - _, _ = r.Discard(len(b)) - } - if e != nil { - switch u.opts.errSpec { - case ErrFail: - err = e - case ErrWarn: - validation.addError(e) - } - } - if isGzip { - // Empty gzip reader to ensure gzip checksum is validated - b := make([]byte, 10) - var err error - for err == nil { - _, err = u.gz.Read(b) - } - if err != io.EOF { - _ = u.gz.Close() - return err - } - if err := u.gz.Close(); err != nil { - return err - } + if record.block != nil { + return record.block.Close() } - return err + return nil } + length, _ := record.headers.GetInt64(ContentLength) + content := countingreader.NewLimited(r, length) + err = record.parseBlock(bufio.NewReader(content), validation) if err != nil { return record, offset, validation, err } + err = record.ValidateDigest(validation) + if err != nil { + return record, offset, validation, err + } + + // Discard any remaining bytes in block not read by parseBlock + _, err = io.Copy(io.Discard, content) + if err != nil { + return record, offset, validation, err + } + + // Validate end of record marker + buf, err := r.Peek(4) + if string(buf) == crlfcrlf { + _, _ = r.Discard(4) + } else if len(buf) == 0 { + err = fmt.Errorf("too few bytes in end of record marker. Expected %q, was %q", crlfcrlf, buf) + } else if len(buf) == 1 && buf[0] == lf { + err = fmt.Errorf("missing carriage return in end of record marker. Expected %q, was %q", crlfcrlf, buf) + _, _ = r.Discard(1) + } else if len(buf) == 2 && buf[0] == lf && buf[1] == lf { + err = fmt.Errorf("missing carriage return in end of record marker. Expected %q, was %q", crlfcrlf, buf) + _, _ = r.Discard(2) + } else if len(buf) < 4 { + err = fmt.Errorf("too few bytes in end of record marker. Expected %q, was %q", crlfcrlf, buf) + _, _ = r.Discard(len(buf)) + } else if err == io.EOF { + err = fmt.Errorf("unexpected end of record. Expected %q, was %q", crlfcrlf, buf) + _, _ = r.Discard(len(buf)) + } + if err != nil { + switch u.opts.errSpec { + case ErrFail: + return record, offset, validation, err + case ErrWarn: + validation.addError(err) + } + } + if isGzip { + // Empty gzip reader to ensure gzip checksum is validated + _, err = io.Copy(io.Discard, u.gz) + if err != io.EOF { + _ = u.gz.Close() + return record, offset, validation, err + } + if err := u.gz.Close(); err != nil { + return record, offset, validation, err + } + } - return record, offset, validation, err + return record, offset, validation, nil } func (u *unmarshaler) resolveRecordVersion(s string, validation *Validation) (*WarcVersion, error) { diff --git a/warcfile.go b/warcfile.go index 041cf1c..177aa66 100644 --- a/warcfile.go +++ b/warcfile.go @@ -20,11 +20,6 @@ import ( "bufio" "errors" "fmt" - "github.com/klauspost/compress/gzip" - "github.com/nlnwa/gowarc/internal" - "github.com/nlnwa/gowarc/internal/countingreader" - "github.com/nlnwa/gowarc/internal/timestamp" - "github.com/prometheus/prometheus/tsdb/fileutil" "io" "log" "os" @@ -33,6 +28,12 @@ import ( "sync" "sync/atomic" "time" + + "github.com/klauspost/compress/gzip" + "github.com/nlnwa/gowarc/internal" + "github.com/nlnwa/gowarc/internal/countingreader" + "github.com/nlnwa/gowarc/internal/timestamp" + "github.com/prometheus/prometheus/tsdb/fileutil" ) // WarcFileNameGenerator is the interface that wraps the NewWarcfileName function. @@ -482,11 +483,9 @@ func (w *singleWarcFileWriter) close() error { type WarcFileReader struct { file io.Reader initialOffset int64 - offset int64 warcReader Unmarshaler countingReader *countingreader.Reader bufferedReader *bufio.Reader - currentRecord WarcRecord } var inputBufPool = sync.Pool{ @@ -530,7 +529,6 @@ func NewWarcFileReaderFromStream(r io.Reader, offset int64, opts ...WarcRecordOp wf := &WarcFileReader{ file: r, initialOffset: offset, - offset: offset, warcReader: NewUnmarshaler(opts...), countingReader: countingreader.New(r), } @@ -560,18 +558,11 @@ func NewWarcFileReaderFromStream(r io.Reader, offset int64, opts ...WarcRecordOp // // When at end of file, returned offset is equal to length of file, WarcRecord is nil and err is [io.EOF]. func (wf *WarcFileReader) Next() (WarcRecord, int64, *Validation, error) { - var validation *Validation - if wf.currentRecord != nil { - if err := wf.currentRecord.Close(); err != nil { - return nil, wf.offset, validation, err - } - } - wf.offset = wf.initialOffset + wf.countingReader.N() - int64(wf.bufferedReader.Buffered()) + offset := wf.initialOffset + wf.countingReader.N() - int64(wf.bufferedReader.Buffered()) + + record, recordOffset, validation, err := wf.warcReader.Unmarshal(wf.bufferedReader) - var err error - var recordOffset int64 - wf.currentRecord, recordOffset, validation, err = wf.warcReader.Unmarshal(wf.bufferedReader) - return wf.currentRecord, wf.offset + recordOffset, validation, err + return record, offset + recordOffset, validation, err } // Close closes the WarcFileReader.