@@ -45,11 +45,25 @@ type WarcRecord interface {
45
45
// RevisitRef extracts a RevisitRef current record if it is a revisit record.
46
46
RevisitRef () (* RevisitRef , error )
47
47
// CreateRevisitRef creates a RevisitRef which references the current record.
48
+ //
48
49
// The RevisitRef might be used by another records ToRevisitRecord to create a revisit record referencing this record.
49
50
CreateRevisitRef (profile string ) (* RevisitRef , error )
50
51
// Merge merges this record with its referenced record(s)
52
+ //
51
53
// It is implemented only for revisit records, but this function will be enhanced to also support segmented records.
52
54
Merge (record ... WarcRecord ) (WarcRecord , error )
55
+ // ValidateDigest validates block and payload digests if present.
56
+ //
57
+ // If option FixDigest is set, an invalid or missing digest will be corrected in the header.
58
+ // Digest validation requires the whole content block to be read. As a side effect the Content-Length field is also validated
59
+ // and if option FixContentLength is set, a wrong content length will be corrected in the header.
60
+ //
61
+ // If the record is not cached, it might not be possible to read any content from this record after validation.
62
+ //
63
+ // The result is dependent on the SpecViolationPolicy option:
64
+ // ErrIgnore: only fatal errors are returned.
65
+ // ErrWarn: all errors found will be added to the Validation.
66
+ // ErrFail: the first error is returned and no more validation is done.
53
67
ValidateDigest (validation * Validation ) error
54
68
}
55
69
@@ -369,11 +383,35 @@ func (wr *warcRecord) parseBlock(reader io.Reader, validation *Validation) (err
369
383
}
370
384
371
385
// ValidateDigest validates block and payload digests if present.
386
+ //
372
387
// If option FixDigest is set, an invalid or missing digest will be corrected in the header.
388
+ // Digest validation requires the whole content block to be read. As a side effect the Content-Length field is also validated
389
+ // and if option FixContentLength is set, a wrong content length will be corrected in the header.
390
+ //
373
391
// If the record is not cached, it might not be possible to read any content from this record after validation.
392
+ //
393
+ // The result is dependent on the SpecViolationPolicy option:
394
+ // ErrIgnore: only fatal errors are returned.
395
+ // ErrWarn: all errors found will be added to the Validation.
396
+ // ErrFail: the first error is returned and no more validation is done.
374
397
func (wr * warcRecord ) ValidateDigest (validation * Validation ) error {
375
398
wr .Block ().BlockDigest ()
376
399
400
+ size := strconv .FormatInt (wr .block .Size (), 10 )
401
+ if wr .opts .errSpec > ErrIgnore {
402
+ if wr .WarcHeader ().Has (ContentLength ) && size != wr .headers .Get (ContentLength ) {
403
+ switch wr .opts .errSpec {
404
+ case ErrWarn :
405
+ validation .addError (fmt .Errorf ("content length mismatch. header: %v, actual: %v" , wr .headers .Get (ContentLength ), size ))
406
+ if wr .opts .fixContentLength {
407
+ wr .WarcHeader ().Set (ContentLength , size )
408
+ }
409
+ case ErrFail :
410
+ return fmt .Errorf ("content length mismatch. header: %v, actual: %v" , wr .headers .Get (ContentLength ), size )
411
+ }
412
+ }
413
+ }
414
+
377
415
var blockDigest , payloadDigest * digest
378
416
switch v := wr .Block ().(type ) {
379
417
case * genericBlock :
0 commit comments