Skip to content

Commit

Permalink
Skip chunks already verified in previous runs for check -chunks.
Browse files Browse the repository at this point in the history
This is done by storing the list of verified chunks in a file
`.duplicacy/cache/<storage>/verified_chunks`.
  • Loading branch information
gilbertchen committed Oct 8, 2020
1 parent 7da58c6 commit d7c1903
Showing 1 changed file with 84 additions and 31 deletions.
115 changes: 84 additions & 31 deletions src/duplicacy_snapshotmanager.go
Original file line number Diff line number Diff line change
Expand Up @@ -1017,50 +1017,103 @@ func (manager *SnapshotManager) CheckSnapshots(snapshotID string, revisionsToChe
manager.ShowStatistics(snapshotMap, chunkSizeMap, chunkUniqueMap, chunkSnapshotMap)
}

if checkChunks && !checkFiles {
manager.chunkDownloader.snapshotCache = nil
LOG_INFO("SNAPSHOT_VERIFY", "Verifying %d chunks", len(*allChunkHashes))
// Don't verify chunks with -files
if !checkChunks || checkFiles {
return true
}

startTime := time.Now()
var chunkHashes []string
// This contains chunks that have been verifed in previous checks and is loaded from
// .duplicacy/cache/storage/verified_chunks. Note that it contains the chunk ids not chunk
// hashes.
verifiedChunks := make(map[string]int64)
verifiedChunksFile := "verified_chunks"

// The index of the first chunk to add to the downloader, which may have already downloaded
// some metadata chunks so the index doesn't start with 0.
chunkIndex := -1
manager.fileChunk.Reset(false)
err = manager.snapshotCache.DownloadFile(0, verifiedChunksFile, manager.fileChunk)
if err != nil && !os.IsNotExist(err) {
LOG_WARN("SNAPSHOT_VERIFY", "Failed to load the file containing verified chunks: %v", err)
} else {

This comment has been minimized.

Copy link
@gboudreau

gboudreau Oct 9, 2020

Contributor

If the file doesn't exist (i.e. if os.IsNotExist(err)), this code here (below) will be executed, and a warning will be logged: SNAPSHOT_VERIFY Failed to parse the file containing verified chunks: unexpected end of JSON input
Maybe refactor the above if to read:

	if err != nil {
		if !os.IsNotExist(err) {
			LOG_WARN("SNAPSHOT_VERIFY", "Failed to load the file containing verified chunks: %v", err)
		}
	} else {

This comment has been minimized.

Copy link
@gilbertchen

gilbertchen Oct 9, 2020

Author Owner

You're right. I'll fix it.

err = json.Unmarshal(manager.fileChunk.GetBytes(), &verifiedChunks)
if err != nil {
LOG_WARN("SNAPSHOT_VERIFY", "Failed to parse the file containing verified chunks: %v", err)
}
}
numberOfVerifiedChunks := len(verifiedChunks)

for chunkHash := range *allChunkHashes {
chunkHashes = append(chunkHashes, chunkHash)
if chunkIndex == -1 {
chunkIndex = manager.chunkDownloader.AddChunk(chunkHash)
saveVerifiedChunks := func() {
if len(verifiedChunks) > numberOfVerifiedChunks {
var description []byte
description, err = json.Marshal(verifiedChunks)
if err != nil {
LOG_WARN("SNAPSHOT_VERIFY", "Failed to create a json file for the set of verified chunks: %v", err)
} else {
manager.chunkDownloader.AddChunk(chunkHash)
err = manager.snapshotCache.UploadFile(0, verifiedChunksFile, description)
if err != nil {
LOG_WARN("SNAPSHOT_VERIFY", "Failed to save the verified chunks file: %v", err)
} else {
LOG_INFO("SNAPSHOT_VERIFY", "Added %d chunks to the list of verified chunks", len(verifiedChunks) - numberOfVerifiedChunks)
}
}
}
}
defer saveVerifiedChunks()
RunAtError = saveVerifiedChunks

This comment has been minimized.

Copy link
@gboudreau

gboudreau Oct 9, 2020

Contributor

Not sure when exactly saveVerifiedChunks will be called, but I tried to run a compiled version of duplicacy, and after seeing a few VERIFY_PROGRESS Verified chunk <hash> logs, I ran killall -HUP duplicacy, which killed duplicacy, but no verified_chunks file was found in the .duplicacy/cache/default/ folder after that..?

This comment has been minimized.

Copy link
@gilbertchen

gilbertchen Oct 9, 2020

Author Owner

HUP is not handled. Try killall -INT duplicacy instead.


manager.chunkDownloader.snapshotCache = nil
LOG_INFO("SNAPSHOT_VERIFY", "Verifying %d chunks", len(*allChunkHashes))

var downloadedChunkSize int64
totalChunks := len(*allChunkHashes)
for i := 0; i < totalChunks; i++ {
chunk := manager.chunkDownloader.WaitForChunk(i + chunkIndex)
if chunk.isBroken {
startTime := time.Now()
var chunkHashes []string

// The index of the first chunk to add to the downloader, which may have already downloaded
// some metadata chunks so the index doesn't start with 0.
chunkIndex := -1

skippedChunks := 0
for chunkHash := range *allChunkHashes {
if len(verifiedChunks) > 0 {
chunkID := manager.config.GetChunkIDFromHash(chunkHash)
if _, found := verifiedChunks[chunkID]; found {
skippedChunks++
continue
}
downloadedChunkSize += int64(chunk.GetLength())

elapsedTime := time.Now().Sub(startTime).Seconds()
speed := int64(float64(downloadedChunkSize) / elapsedTime)
remainingTime := int64(float64(totalChunks - i - 1) / float64(i + 1) * elapsedTime)
percentage := float64(i + 1) / float64(totalChunks) * 100.0
LOG_INFO("VERIFY_PROGRESS", "Verified chunk %s (%d/%d), %sB/s %s %.1f%%",
manager.config.GetChunkIDFromHash(chunkHashes[i]), i + 1, totalChunks,
PrettySize(speed), PrettyTime(remainingTime), percentage)
}

if manager.chunkDownloader.NumberOfFailedChunks > 0 {
LOG_ERROR("SNAPSHOT_VERIFY", "%d out of %d chunks are corrupted", manager.chunkDownloader.NumberOfFailedChunks, totalChunks)
chunkHashes = append(chunkHashes, chunkHash)
if chunkIndex == -1 {
chunkIndex = manager.chunkDownloader.AddChunk(chunkHash)
} else {
LOG_INFO("SNAPSHOT_VERIFY", "All %d chunks have been successfully verified", totalChunks)
manager.chunkDownloader.AddChunk(chunkHash)
}
}

if skippedChunks > 0 {
LOG_INFO("SNAPSHOT_VERIFY", "Skipped %d chunks that have already been verified before", skippedChunks)
}

var downloadedChunkSize int64
totalChunks := len(chunkHashes)
for i := 0; i < totalChunks; i++ {
chunk := manager.chunkDownloader.WaitForChunk(i + chunkIndex)
chunkID := manager.config.GetChunkIDFromHash(chunkHashes[i])
if chunk.isBroken {
continue
}
verifiedChunks[chunkID] = startTime.Unix()
downloadedChunkSize += int64(chunk.GetLength())

elapsedTime := time.Now().Sub(startTime).Seconds()
speed := int64(float64(downloadedChunkSize) / elapsedTime)
remainingTime := int64(float64(totalChunks - i - 1) / float64(i + 1) * elapsedTime)
percentage := float64(i + 1) / float64(totalChunks) * 100.0
LOG_INFO("VERIFY_PROGRESS", "Verified chunk %s (%d/%d), %sB/s %s %.1f%%",
chunkID, i + 1, totalChunks, PrettySize(speed), PrettyTime(remainingTime), percentage)
}

if manager.chunkDownloader.NumberOfFailedChunks > 0 {
LOG_ERROR("SNAPSHOT_VERIFY", "%d out of %d chunks are corrupted", manager.chunkDownloader.NumberOfFailedChunks, len(*allChunkHashes))
} else {
LOG_INFO("SNAPSHOT_VERIFY", "All %d chunks have been successfully verified", len(*allChunkHashes))
}
return true
}

Expand Down

2 comments on commit d7c1903

@gilbertchen
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This commit has been mentioned on Duplicacy Forum. There might be relevant details there:

https://forum.duplicacy.com/t/check-chunks-on-google-drive-failed-to-decrypt-the-chunk-hash-cipher-message-authentication-failed/4300/4

@gilbertchen
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This commit has been mentioned on Duplicacy Forum. There might be relevant details there:

https://forum.duplicacy.com/t/cli-2-7-2-is-now-available/4483/1

Please sign in to comment.