Skip to content

Commit

Permalink
Add error when chunkSize is greater than the strict memory limit of t…
Browse files Browse the repository at this point in the history
…he CacheLimiter. (#2228)

Scenario:

    export AZCOPY_BUFFER_GB=5
    # Copy a single 5GB file to an Azure Blob Storage Container with a block size of 4000MB
    ./acopy [...] --block-size-mb=4000 [...]

Since the chunk size in that case is greater than the strict memory limit (75% of 5GB)
this would end up in a deadlock: CacheLimiter.TryAdd would loop forever, waiting for memory
to be freed (which it never will).

This patch tries to remedy that by making it an error if a chunk is greater than the strict
memory limit of the associated CacheLimiter.
  • Loading branch information
mkrautz authored and nakulkar-msft committed May 31, 2023
1 parent 691f114 commit 9bc28d3
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 11 deletions.
27 changes: 18 additions & 9 deletions common/cacheLimiter.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,18 @@ import (
"time"
)

// The percentage of a CacheLimiter's Limit that is considered
// the strict limit.
var cacheLimiterStrictLimitPercentage = float32(0.75)
// Rationale for the level of the strict limit: as at Jan 2018, we are using 0.75 of the total as the strict
// limit, leaving the other 0.25 of the total accessible under the "relaxed" limit.
// That last 25% gets use for two things: in downloads it is used for things where we KNOW there's
// no backlogging of new chunks behind slow ones (i.e. these "good" cases are allowed to proceed without
// interruption) and for uploads its used for re-doing the prefetches when we do retries (i.e. so these are
// not blocked by other chunks using up RAM).
// TODO: now that cacheLimiter is used for multiple purposes, the hard-coding of the distinction between
// relaxed and strict limits is less appropriate. Refactor to make it a configuration param of the instance?

type Predicate func() bool

// Used to limit the amounts of things. E.g. amount of in-flight data in RAM, to keep it an an acceptable level.
Expand All @@ -39,6 +51,7 @@ type CacheLimiter interface {
WaitUntilAdd(ctx context.Context, count int64, useRelaxedLimit Predicate) error
Remove(count int64)
Limit() int64
StrictLimit() int64
}

type cacheLimiter struct {
Expand All @@ -58,15 +71,7 @@ func (c *cacheLimiter) TryAdd(count int64, useRelaxedLimit bool) (added bool) {
// for high-priority things (i.e. things we deem to be allowable under a relaxed (non-strict) limit)
strict := !useRelaxedLimit
if strict {
lim = int64(float32(lim) * 0.75)
// Rationale for the level of the strict limit: as at Jan 2018, we are using 0.75 of the total as the strict
// limit, leaving the other 0.25 of the total accessible under the "relaxed" limit.
// That last 25% gets use for two things: in downloads it is used for things where we KNOW there's
// no backlogging of new chunks behind slow ones (i.e. these "good" cases are allowed to proceed without
// interruption) and for uploads its used for re-doing the prefetches when we do retries (i.e. so these are
// not blocked by other chunks using up RAM).
// TODO: now that cacheLimiter is used for multiple purposes, the hard-coding of the distinction between
// relaxed and strict limits is less appropriate. Refactor to make it a configuration param of the instance?
lim = c.StrictLimit()
}

if atomic.AddInt64(&c.value, count) <= lim {
Expand Down Expand Up @@ -109,3 +114,7 @@ func (c *cacheLimiter) Remove(count int64) {
func (c *cacheLimiter) Limit() int64 {
return c.limit
}

func (c *cacheLimiter) StrictLimit() int64 {
return int64(float32(c.limit) * cacheLimiterStrictLimitPercentage)
}
10 changes: 8 additions & 2 deletions ste/sender-blockBlob.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ type blockBlobSenderBase struct {
completedBlockList map[int]string
}

func getVerifiedChunkParams(transferInfo TransferInfo, memLimit int64) (chunkSize int64, numChunks uint32, err error) {
func getVerifiedChunkParams(transferInfo TransferInfo, memLimit int64, strictMemLimit int64) (chunkSize int64, numChunks uint32, err error) {
chunkSize = transferInfo.BlockSize
srcSize := transferInfo.SourceSize
numChunks = getNumChunks(srcSize, chunkSize)
Expand All @@ -93,6 +93,12 @@ func getVerifiedChunkParams(transferInfo TransferInfo, memLimit int64) (chunkSiz
return
}

if chunkSize >= strictMemLimit {
err = fmt.Errorf("Cannot use a block size of %.2fGiB. AzCopy is limited to use only %.2fGiB of memory, and only %.2fGiB of these are available for chunks.",
toGiB(chunkSize), toGiB(memLimit), toGiB(strictMemLimit))
return
}

if chunkSize > common.MaxBlockBlobBlockSize {
// mercy, please
err = fmt.Errorf("block size of %.2fGiB for file %s of size %.2fGiB exceeds maximum allowed block size for a BlockBlob",
Expand Down Expand Up @@ -121,7 +127,7 @@ func getBlockNamePrefix(jobID common.JobID, partNum uint32, transferIndex uint32

func newBlockBlobSenderBase(jptm IJobPartTransferMgr, destination string, p pipeline.Pipeline, pacer pacer, srcInfoProvider ISourceInfoProvider, inferredAccessTierType azblob.AccessTierType) (*blockBlobSenderBase, error) {
// compute chunk count
chunkSize, numChunks, err := getVerifiedChunkParams(jptm.Info(), jptm.CacheLimiter().Limit())
chunkSize, numChunks, err := getVerifiedChunkParams(jptm.Info(), jptm.CacheLimiter().Limit(), jptm.CacheLimiter().StrictLimit())
if err != nil {
return nil, err
}
Expand Down

0 comments on commit 9bc28d3

Please sign in to comment.