From 3ce7ed7e32ea8604267068118f982489c827af75 Mon Sep 17 00:00:00 2001 From: afkbluey Date: Sat, 1 Nov 2025 22:05:08 +1100 Subject: [PATCH 1/4] feat: part size, parallel and explcit behaviour of 5TB cp --- cmd/client-s3.go | 12 +++++++++-- cmd/client.go | 1 + cmd/common-methods.go | 35 ++++++++++++++++++++++++++++++-- cmd/cp-main.go | 47 ++++++++++++++++++++++++++++++++++++------- 4 files changed, 84 insertions(+), 11 deletions(-) diff --git a/cmd/client-s3.go b/cmd/client-s3.go index ff95a79300..97eb561036 100644 --- a/cmd/client-s3.go +++ b/cmd/client-s3.go @@ -892,8 +892,16 @@ func (c *S3Client) Get(ctx context.Context, opts GetOptions) (io.ReadCloser, *Cl if opts.Zip { o.Set("x-minio-extract", "true") } - if opts.RangeStart != 0 { - err := o.SetRange(opts.RangeStart, 0) + if opts.RangeStart != 0 || opts.RangeEnd != 0 { + var err error + // SetRange expects offset and length, not start and end + // But when length is 0, it reads to EOF + // For explicit range: bytes=start-end (inclusive) + if opts.RangeEnd != 0 { + err = o.SetRange(opts.RangeStart, opts.RangeEnd) + } else { + err = o.SetRange(opts.RangeStart, 0) + } if err != nil { return nil, nil, probe.NewError(err) } diff --git a/cmd/client.go b/cmd/client.go index cb81f57348..b11cc81586 100644 --- a/cmd/client.go +++ b/cmd/client.go @@ -60,6 +60,7 @@ type GetOptions struct { VersionID string Zip bool RangeStart int64 + RangeEnd int64 PartNumber int Preserve bool } diff --git a/cmd/common-methods.go b/cmd/common-methods.go index 5472b4b46c..ca79629503 100644 --- a/cmd/common-methods.go +++ b/cmd/common-methods.go @@ -20,6 +20,7 @@ package cmd import ( "context" "errors" + "fmt" "io" "net/http" "os" @@ -36,6 +37,7 @@ import ( "github.com/minio/minio-go/v7" "github.com/minio/minio-go/v7/pkg/encrypt" "github.com/minio/minio-go/v7/pkg/tags" + "github.com/minio/pkg/v3/console" "github.com/minio/pkg/v3/env" ) @@ -353,8 +355,30 @@ func uploadSourceToTargetURL(ctx context.Context, uploadOpts uploadSourceToTarge metadata[http.CanonicalHeaderKey(k)] = v } - // Optimize for server side copy if the host is same. - if sourceAlias == targetAlias && !uploadOpts.isZip && !uploadOpts.urls.checksum.IsSet() { + // Debug: Print alias information + if !globalQuiet && !globalJSON { + console.Infof("DEBUG: Copy operation analysis:\n") + console.Infof(" Source alias: '%s'\n", sourceAlias) + console.Infof(" Target alias: '%s'\n", targetAlias) + console.Infof(" Aliases match: %v\n", sourceAlias == targetAlias) + console.Infof(" Is zip: %v\n", uploadOpts.isZip) + console.Infof(" Has checksum: %v\n", uploadOpts.urls.checksum.IsSet()) + console.Infof(" File size: %d bytes (%.2f GB)\n", length, float64(length)/(1024*1024*1024)) + } + + // Server-side copy has a 5TiB limit due to S3 ComposeObject API limitation + // For files > 5TiB, we must use stream copy (download + upload) even for same-alias + const maxServerSideCopySize = 5 * 1024 * 1024 * 1024 * 1024 // 5 TiB + canUseServerSideCopy := sourceAlias == targetAlias && + !uploadOpts.isZip && + !uploadOpts.urls.checksum.IsSet() && + length < maxServerSideCopySize + + // Optimize for server side copy if the host is same and file size allows it + if canUseServerSideCopy { + if !globalQuiet && !globalJSON { + console.Infof("DEBUG: Using server-side copy (fast, no client bandwidth)\n") + } // preserve new metadata and save existing ones. if uploadOpts.preserve { currentMetadata, err := getAllMetadata(ctx, sourceAlias, sourceURL.String(), srcSSE, uploadOpts.urls) @@ -394,6 +418,13 @@ func uploadSourceToTargetURL(ctx context.Context, uploadOpts uploadSourceToTarge err = copySourceToTargetURL(ctx, targetAlias, targetURL.String(), sourcePath, sourceVersion, mode, until, legalHold, length, uploadOpts.progress, opts) } else { + if !globalQuiet && !globalJSON { + console.Infof("DEBUG: Using stream copy (downloads + uploads through client)\n") + if length > 5*1024*1024*1024*1024 { // > 5TB + console.Errorln(fmt.Sprintf("WARNING: File size %.2f TB exceeds 5TB. Stream copy may fail!", float64(length)/(1024*1024*1024*1024))) + console.Errorln("Recommendation: Use --checksum flag or ensure same-alias for server-side copy") + } + } if uploadOpts.urls.SourceContent.RetentionEnabled { // preserve new metadata and save existing ones. if uploadOpts.preserve { diff --git a/cmd/cp-main.go b/cmd/cp-main.go index 9ec393ef6d..0e2ba22b54 100644 --- a/cmd/cp-main.go +++ b/cmd/cp-main.go @@ -23,6 +23,7 @@ import ( "fmt" "io" "path/filepath" + "strconv" "strings" "github.com/fatih/color" @@ -72,6 +73,14 @@ var ( Name: "disable-multipart", Usage: "disable multipart upload feature", }, + cli.StringFlag{ + Name: "part-size", + Usage: "part size for multipart uploads (e.g. 16MiB, 64MiB, 128MiB). Max 5GiB. Max file size = part-size × 10000", + }, + cli.IntFlag{ + Name: "parallel", + Usage: "number of parts to upload in parallel for multipart uploads", + }, cli.BoolFlag{ Name: "md5", Usage: "force all upload(s) to calculate md5sum checksum", @@ -194,6 +203,12 @@ EXAMPLES: 19. Set tags to the uploaded objects {{.Prompt}} {{.HelpName}} -r --tags "category=prod&type=backup" ./data/ play/another-bucket/ + 20. Copy a large file with custom part size and parallel uploads + {{.Prompt}} {{.HelpName}} --part-size 128MiB --parallel 8 largefile.bin play/mybucket/ + + 21. Copy a very large file (12TB+) with CRC32C checksum and optimized multipart settings + {{.Prompt}} {{.HelpName}} --checksum CRC32C --part-size 5GiB --parallel 8 verylargefile.bin play/mybucket/ + `, } @@ -441,17 +456,35 @@ func doCopySession(ctx context.Context, cancelCopy context.CancelFunc, cli *cli. return doCopyFake(cpURLs, pg) }, 0) } else { + // Determine if this will be a server-side copy (no data transfer through client) + // Server-side copy is used when source and target are on the same server AND file < 5TiB + // Note: S3 ComposeObject API has a 5TiB limit, so files >= 5TiB must use stream copy + const maxServerSideCopySize = 5 * 1024 * 1024 * 1024 * 1024 // 5 TiB + isServerSideCopy := cpURLs.SourceAlias == cpURLs.TargetAlias && + !isZip && + !checksum.IsSet() && + cpURLs.SourceContent.Size < maxServerSideCopySize + + // For server-side copy, pass size=0 to parallel manager since no data flows through client + // For stream copy (including large files > 5TiB), pass actual size for progress tracking + queueSize := cpURLs.SourceContent.Size + if isServerSideCopy { + queueSize = 0 // No client bandwidth used for server-side copy + } + // Print the copy resume summary once in start parallel.queueTask(func() URLs { return doCopy(ctx, doCopyOpts{ - cpURLs: cpURLs, - pg: pg, - encryptionKeys: encryptionKeys, - isMvCmd: isMvCmd, - preserve: preserve, - isZip: isZip, + cpURLs: cpURLs, + pg: pg, + encryptionKeys: encryptionKeys, + isMvCmd: isMvCmd, + preserve: preserve, + isZip: isZip, + multipartSize: cli.String("part-size"), + multipartThreads: strconv.Itoa(cli.Int("parallel")), }) - }, cpURLs.SourceContent.Size) + }, queueSize) } } } From f5333b403f5bd83a18a815f37601d85c219137ad Mon Sep 17 00:00:00 2001 From: afkbluey Date: Sat, 1 Nov 2025 23:10:11 +1100 Subject: [PATCH 2/4] feat: server side copy debugs --- cmd/client-s3.go | 6 +++++- cmd/common-methods.go | 40 +++++++++++----------------------------- cmd/cp-main.go | 21 +++++++++++++++++---- 3 files changed, 33 insertions(+), 34 deletions(-) diff --git a/cmd/client-s3.go b/cmd/client-s3.go index 97eb561036..98ca51abc7 100644 --- a/cmd/client-s3.go +++ b/cmd/client-s3.go @@ -993,7 +993,11 @@ func (c *S3Client) Copy(ctx context.Context, source string, opts CopyOptions, pr destOpts.ReplaceMetadata = len(metadata) > 0 var e error - if opts.disableMultipart || opts.size < 64*1024*1024 { + // Use CopyObject for files < 5GiB (its maximum size limit) + // Use ComposeObject for files >= 5GiB (supports multipart copy up to 5TiB) + const maxCopyObjectSize = 5 * 1024 * 1024 * 1024 // 5GiB + + if opts.disableMultipart || opts.size < maxCopyObjectSize { _, e = c.api.CopyObject(ctx, destOpts, srcOpts) } else { _, e = c.api.ComposeObject(ctx, destOpts, srcOpts) diff --git a/cmd/common-methods.go b/cmd/common-methods.go index ca79629503..353eae236a 100644 --- a/cmd/common-methods.go +++ b/cmd/common-methods.go @@ -20,7 +20,6 @@ package cmd import ( "context" "errors" - "fmt" "io" "net/http" "os" @@ -292,7 +291,7 @@ func getAllMetadata(ctx context.Context, sourceAlias, sourceURLStr string, srcSS } // uploadSourceToTargetURL - uploads to targetURL from source. -// optionally optimizes copy for object sizes <= 5GiB by using +// optionally optimizes copy for object sizes <= 5TiB by using // server side copy operation. func uploadSourceToTargetURL(ctx context.Context, uploadOpts uploadSourceToTargetURLOpts) URLs { sourceAlias := uploadOpts.urls.SourceAlias @@ -355,19 +354,8 @@ func uploadSourceToTargetURL(ctx context.Context, uploadOpts uploadSourceToTarge metadata[http.CanonicalHeaderKey(k)] = v } - // Debug: Print alias information - if !globalQuiet && !globalJSON { - console.Infof("DEBUG: Copy operation analysis:\n") - console.Infof(" Source alias: '%s'\n", sourceAlias) - console.Infof(" Target alias: '%s'\n", targetAlias) - console.Infof(" Aliases match: %v\n", sourceAlias == targetAlias) - console.Infof(" Is zip: %v\n", uploadOpts.isZip) - console.Infof(" Has checksum: %v\n", uploadOpts.urls.checksum.IsSet()) - console.Infof(" File size: %d bytes (%.2f GB)\n", length, float64(length)/(1024*1024*1024)) - } - - // Server-side copy has a 5TiB limit due to S3 ComposeObject API limitation - // For files > 5TiB, we must use stream copy (download + upload) even for same-alias + // Server-side copy using ComposeObject API has a 5TiB limit + // For files >= 5TiB, we must use stream copy (download + upload) even for same-alias const maxServerSideCopySize = 5 * 1024 * 1024 * 1024 * 1024 // 5 TiB canUseServerSideCopy := sourceAlias == targetAlias && !uploadOpts.isZip && @@ -376,9 +364,6 @@ func uploadSourceToTargetURL(ctx context.Context, uploadOpts uploadSourceToTarge // Optimize for server side copy if the host is same and file size allows it if canUseServerSideCopy { - if !globalQuiet && !globalJSON { - console.Infof("DEBUG: Using server-side copy (fast, no client bandwidth)\n") - } // preserve new metadata and save existing ones. if uploadOpts.preserve { currentMetadata, err := getAllMetadata(ctx, sourceAlias, sourceURL.String(), srcSSE, uploadOpts.urls) @@ -401,10 +386,6 @@ func uploadSourceToTargetURL(ctx context.Context, uploadOpts uploadSourceToTarge } sourcePath := filepath.ToSlash(sourceURL.Path) - if uploadOpts.urls.SourceContent.RetentionEnabled { - err = putTargetRetention(ctx, targetAlias, targetURL.String(), metadata) - return uploadOpts.urls.WithError(err.Trace(sourceURL.String())) - } opts := CopyOptions{ srcSSE: srcSSE, @@ -417,14 +398,12 @@ func uploadSourceToTargetURL(ctx context.Context, uploadOpts uploadSourceToTarge err = copySourceToTargetURL(ctx, targetAlias, targetURL.String(), sourcePath, sourceVersion, mode, until, legalHold, length, uploadOpts.progress, opts) - } else { - if !globalQuiet && !globalJSON { - console.Infof("DEBUG: Using stream copy (downloads + uploads through client)\n") - if length > 5*1024*1024*1024*1024 { // > 5TB - console.Errorln(fmt.Sprintf("WARNING: File size %.2f TB exceeds 5TB. Stream copy may fail!", float64(length)/(1024*1024*1024*1024))) - console.Errorln("Recommendation: Use --checksum flag or ensure same-alias for server-side copy") - } + + // Can apply retention after copy if enabled + if err == nil && uploadOpts.urls.SourceContent.RetentionEnabled { + err = putTargetRetention(ctx, targetAlias, targetURL.String(), metadata) } + } else { if uploadOpts.urls.SourceContent.RetentionEnabled { // preserve new metadata and save existing ones. if uploadOpts.preserve { @@ -526,6 +505,9 @@ func uploadSourceToTargetURL(ctx context.Context, uploadOpts uploadSourceToTarge return uploadOpts.urls.WithError(probe.NewError(e)) } + // Debug logs for multipart configuration + console.Debugln("DEBUG: multipart configuration - part-size:", humanize.IBytes(multipartSize), "parallel:", multipartThreads, "file size:", humanize.IBytes(uint64(length))) + putOpts := PutOptions{ metadata: filterMetadata(metadata), sse: tgtSSE, diff --git a/cmd/cp-main.go b/cmd/cp-main.go index 0e2ba22b54..c79bca1579 100644 --- a/cmd/cp-main.go +++ b/cmd/cp-main.go @@ -26,6 +26,7 @@ import ( "strconv" "strings" + "github.com/dustin/go-humanize" "github.com/fatih/color" "github.com/minio/cli" json "github.com/minio/colorjson" @@ -457,21 +458,33 @@ func doCopySession(ctx context.Context, cancelCopy context.CancelFunc, cli *cli. }, 0) } else { // Determine if this will be a server-side copy (no data transfer through client) - // Server-side copy is used when source and target are on the same server AND file < 5TiB - // Note: S3 ComposeObject API has a 5TiB limit, so files >= 5TiB must use stream copy + // ComposeObject API supports server-side copy up to 5TiB + // Files >= 5TiB must use stream copy (download + upload) const maxServerSideCopySize = 5 * 1024 * 1024 * 1024 * 1024 // 5 TiB isServerSideCopy := cpURLs.SourceAlias == cpURLs.TargetAlias && !isZip && !checksum.IsSet() && cpURLs.SourceContent.Size < maxServerSideCopySize - // For server-side copy, pass size=0 to parallel manager since no data flows through client - // For stream copy (including large files > 5TiB), pass actual size for progress tracking + // For server-side copy (< 5TiB), pass size=0 to parallel manager + // For stream copy, pass actual size for progress tracking queueSize := cpURLs.SourceContent.Size if isServerSideCopy { queueSize = 0 // No client bandwidth used for server-side copy } + // Debug log for multipart configuration + if globalDebug { + partSizeStr := cli.String("part-size") + parallelCount := cli.Int("parallel") + copyType := "stream copy" + if isServerSideCopy { + copyType = "server-side copy" + } + console.Debugln(fmt.Sprintf("DEBUG: Starting %s - file: %s, size: %s, part-size: %s, parallel: %d", + copyType, cpURLs.SourceContent.URL.Path, humanize.IBytes(uint64(cpURLs.SourceContent.Size)), partSizeStr, parallelCount)) + } + // Print the copy resume summary once in start parallel.queueTask(func() URLs { return doCopy(ctx, doCopyOpts{ From bdb6b2fcba11aea1c787c095aa4fe67f3c2af15d Mon Sep 17 00:00:00 2001 From: afkbluey Date: Sun, 2 Nov 2025 14:57:39 +1100 Subject: [PATCH 3/4] feat: tests and cp doCopy docstring --- cmd/client-s3.go | 13 +- cmd/client.go | 1 - cmd/common-methods.go | 17 +-- cmd/cp-main.go | 57 ++++++- cmd/cp-main_test.go | 334 ++++++++++++++++++++++++++++++++++++++++++ cmd/put-main.go | 2 +- 6 files changed, 395 insertions(+), 29 deletions(-) diff --git a/cmd/client-s3.go b/cmd/client-s3.go index 98ca51abc7..92dc54e454 100644 --- a/cmd/client-s3.go +++ b/cmd/client-s3.go @@ -892,16 +892,9 @@ func (c *S3Client) Get(ctx context.Context, opts GetOptions) (io.ReadCloser, *Cl if opts.Zip { o.Set("x-minio-extract", "true") } - if opts.RangeStart != 0 || opts.RangeEnd != 0 { - var err error - // SetRange expects offset and length, not start and end - // But when length is 0, it reads to EOF - // For explicit range: bytes=start-end (inclusive) - if opts.RangeEnd != 0 { - err = o.SetRange(opts.RangeStart, opts.RangeEnd) - } else { - err = o.SetRange(opts.RangeStart, 0) - } + if opts.RangeStart != 0 { + // SetRange with length 0 reads from RangeStart to EOF + err := o.SetRange(opts.RangeStart, 0) if err != nil { return nil, nil, probe.NewError(err) } diff --git a/cmd/client.go b/cmd/client.go index b11cc81586..cb81f57348 100644 --- a/cmd/client.go +++ b/cmd/client.go @@ -60,7 +60,6 @@ type GetOptions struct { VersionID string Zip bool RangeStart int64 - RangeEnd int64 PartNumber int Preserve bool } diff --git a/cmd/common-methods.go b/cmd/common-methods.go index 353eae236a..9d1bc71b95 100644 --- a/cmd/common-methods.go +++ b/cmd/common-methods.go @@ -21,6 +21,7 @@ import ( "context" "errors" "io" + "maps" "net/http" "os" "path/filepath" @@ -457,9 +458,7 @@ func uploadSourceToTargetURL(ctx context.Context, uploadOpts uploadSourceToTarge } metadata := make(map[string]string, len(content.Metadata)) - for k, v := range content.Metadata { - metadata[k] = v - } + maps.Copy(metadata, content.Metadata) // Get metadata from target content as well for k, v := range uploadOpts.urls.TargetContent.Metadata { @@ -496,13 +495,13 @@ func uploadSourceToTargetURL(ctx context.Context, uploadOpts uploadSourceToTarge } } - if uploadOpts.multipartThreads == "" { + if uploadOpts.multipartThreads == 0 { multipartThreads, e = strconv.Atoi(env.Get("MC_UPLOAD_MULTIPART_THREADS", "4")) + if e != nil { + return uploadOpts.urls.WithError(probe.NewError(e)) + } } else { - multipartThreads, e = strconv.Atoi(uploadOpts.multipartThreads) - } - if e != nil { - return uploadOpts.urls.WithError(probe.NewError(e)) + multipartThreads = uploadOpts.multipartThreads } // Debug logs for multipart configuration @@ -599,7 +598,7 @@ type uploadSourceToTargetURLOpts struct { encKeyDB map[string][]prefixSSEPair preserve, isZip bool multipartSize string - multipartThreads string + multipartThreads int updateProgressTotal bool ifNotExists bool } diff --git a/cmd/cp-main.go b/cmd/cp-main.go index c79bca1579..4d65b317f2 100644 --- a/cmd/cp-main.go +++ b/cmd/cp-main.go @@ -23,7 +23,6 @@ import ( "fmt" "io" "path/filepath" - "strconv" "strings" "github.com/dustin/go-humanize" @@ -322,6 +321,37 @@ func printCopyURLsError(cpURLs *URLs) { } } +// doCopySession manages the copy session and determines copy strategy. +// +// 1. SERVER-SIDE COPY - no data transfer through client and is preferred +// Used when all below conditions are met: +// - Source and target are on the same alias (same MinIO/S3 server) +// - File size is < 5 TiB (ComposeObject API limit) +// - Not extracting from zip (--zip not used) +// - No checksum verification requested (--checksum not used) +// +// Multipart behavior: Uses ComposeObject API with X-Amz-Copy-Source-Range headers. +// Part size and parallel settings ARE applied via --part-size and --parallel flags. +// +// 2. STREAM COPY (download + upload through client) +// Used when ANY of these conditions are met: +// - Source and target are on different aliases (cross-server copy) +// - File size is >= 5 TiB +// - Extracting from zip (--zip flag used) +// - Checksum verification requested (--checksum flag used) +// +// Multipart behavior: Uses standard multipart upload API. +// Part size and parallel settings ARE applied via --part-size and --parallel flags. +// +// 3. PUT without multipart +// Used when: +// - File size is < 64 MiB (default threshold) +// - OR --disable-multipart flag is used +// +// Notes: +// - The 5 TiB limit is a limitation of the S3 ComposeObject API +// - Default part size: based on Minio SDK of 128 MiB for multipart uploads +// - Default parallel: 4 threads if parallel is not set func doCopySession(ctx context.Context, cancelCopy context.CancelFunc, cli *cli.Context, encryptionKeys map[string][]prefixSSEPair, isMvCmd bool) error { var isCopied func(string) bool var totalObjects, totalBytes int64 @@ -475,14 +505,25 @@ func doCopySession(ctx context.Context, cancelCopy context.CancelFunc, cli *cli. // Debug log for multipart configuration if globalDebug { + copyType := "server-side copy" + if !isServerSideCopy { + copyType = "stream copy" + if checksum.IsSet() { + console.Debugln(fmt.Sprintf("DEBUG: Checksum %v requested - forcing stream copy for verification", checksum)) + } + } + partSizeStr := cli.String("part-size") - parallelCount := cli.Int("parallel") - copyType := "stream copy" - if isServerSideCopy { - copyType = "server-side copy" + if partSizeStr == "" { + partSizeStr = "default" } + console.Debugln(fmt.Sprintf("DEBUG: Starting %s - file: %s, size: %s, part-size: %s, parallel: %d", - copyType, cpURLs.SourceContent.URL.Path, humanize.IBytes(uint64(cpURLs.SourceContent.Size)), partSizeStr, parallelCount)) + copyType, + cpURLs.SourceContent.URL.Path, + humanize.IBytes(uint64(cpURLs.SourceContent.Size)), + partSizeStr, + cli.Int("parallel"))) } // Print the copy resume summary once in start @@ -495,7 +536,7 @@ func doCopySession(ctx context.Context, cancelCopy context.CancelFunc, cli *cli. preserve: preserve, isZip: isZip, multipartSize: cli.String("part-size"), - multipartThreads: strconv.Itoa(cli.Int("parallel")), + multipartThreads: cli.Int("parallel"), }) }, queueSize) } @@ -615,6 +656,6 @@ type doCopyOpts struct { isMvCmd, preserve, isZip bool updateProgressTotal bool multipartSize string - multipartThreads string + multipartThreads int ifNotExists bool } diff --git a/cmd/cp-main_test.go b/cmd/cp-main_test.go index ade868685f..88ae55d9c1 100644 --- a/cmd/cp-main_test.go +++ b/cmd/cp-main_test.go @@ -74,3 +74,337 @@ func TestParseMetaData(t *testing.T) { } } } + +// TestCopyStrategyDecision tests the decision logic for determining which copy strategy to use +// based on various conditions like alias, file size, zip flag, and checksum flag. +func TestCopyStrategyDecision(t *testing.T) { + const maxServerSideCopySize = 5 * 1024 * 1024 * 1024 * 1024 // 5 TiB + + testCases := []struct { + name string + sourceAlias string + targetAlias string + fileSize int64 + isZip bool + checksumSet bool + expectedStrategy string // "server-side" or "stream" + description string + }{ + // Server-side copy cases + { + name: "Same alias, small file, no flags", + sourceAlias: "s3", + targetAlias: "s3", + fileSize: 1024 * 1024 * 1024, // 1 GiB + isZip: false, + checksumSet: false, + expectedStrategy: "server-side", + description: "Should use server-side copy for same alias and small file", + }, + { + name: "Same alias, 4.9 TiB file, no flags", + sourceAlias: "s3", + targetAlias: "s3", + fileSize: 5393554080768, // ~4.9 TiB (< 5 TiB limit) + isZip: false, + checksumSet: false, + expectedStrategy: "server-side", + description: "Should use server-side copy for files just under 5 TiB limit", + }, + { + name: "Same alias, exactly 5 TiB minus 1 byte", + sourceAlias: "minio1", + targetAlias: "minio1", + fileSize: maxServerSideCopySize - 1, + isZip: false, + checksumSet: false, + expectedStrategy: "server-side", + description: "Should use server-side copy for files exactly at limit boundary", + }, + + // Stream copy cases - different alias + { + name: "Different alias, small file", + sourceAlias: "s3", + targetAlias: "minio", + fileSize: 1024 * 1024 * 1024, // 1 GiB + isZip: false, + checksumSet: false, + expectedStrategy: "stream", + description: "Should use stream copy for cross-alias transfers", + }, + { + name: "Different alias, large file", + sourceAlias: "minio1", + targetAlias: "minio2", + fileSize: 10 * 1024 * 1024 * 1024 * 1024, // 10 TiB + isZip: false, + checksumSet: false, + expectedStrategy: "stream", + description: "Should use stream copy for cross-alias even with large files", + }, + + // Stream copy cases - file size >= 5 TiB + { + name: "Same alias, exactly 5 TiB", + sourceAlias: "s3", + targetAlias: "s3", + fileSize: maxServerSideCopySize, + isZip: false, + checksumSet: false, + expectedStrategy: "stream", + description: "Should use stream copy for files >= 5 TiB (ComposeObject limit)", + }, + { + name: "Same alias, 6 TiB file", + sourceAlias: "minio", + targetAlias: "minio", + fileSize: 6 * 1024 * 1024 * 1024 * 1024, // 6 TiB + isZip: false, + checksumSet: false, + expectedStrategy: "stream", + description: "Should use stream copy for files over 5 TiB limit", + }, + { + name: "Same alias, 100 TiB file", + sourceAlias: "s3", + targetAlias: "s3", + fileSize: 100 * 1024 * 1024 * 1024 * 1024, // 100 TiB + isZip: false, + checksumSet: false, + expectedStrategy: "stream", + description: "Should use stream copy for very large files", + }, + + // Stream copy cases - zip flag + { + name: "Same alias, small file, zip enabled", + sourceAlias: "s3", + targetAlias: "s3", + fileSize: 100 * 1024 * 1024, // 100 MiB + isZip: true, + checksumSet: false, + expectedStrategy: "stream", + description: "Should use stream copy when extracting from zip", + }, + { + name: "Same alias, 1 TiB file, zip enabled", + sourceAlias: "minio", + targetAlias: "minio", + fileSize: 1024 * 1024 * 1024 * 1024, // 1 TiB + isZip: true, + checksumSet: false, + expectedStrategy: "stream", + description: "Should use stream copy for zip extraction regardless of size", + }, + + // Stream copy cases - checksum flag + { + name: "Same alias, small file, checksum enabled", + sourceAlias: "s3", + targetAlias: "s3", + fileSize: 500 * 1024 * 1024, // 500 MiB + isZip: false, + checksumSet: true, + expectedStrategy: "stream", + description: "Should use stream copy when checksum verification is requested", + }, + { + name: "Same alias, 2 TiB file, checksum enabled", + sourceAlias: "minio", + targetAlias: "minio", + fileSize: 2 * 1024 * 1024 * 1024 * 1024, // 2 TiB + isZip: false, + checksumSet: true, + expectedStrategy: "stream", + description: "Should use stream copy for checksum verification on large files", + }, + + // Edge cases - multiple conditions forcing stream copy + { + name: "Different alias, large file, zip enabled", + sourceAlias: "s3", + targetAlias: "minio", + fileSize: 10 * 1024 * 1024 * 1024 * 1024, // 10 TiB + isZip: true, + checksumSet: false, + expectedStrategy: "stream", + description: "Should use stream copy when multiple conditions require it", + }, + { + name: "Same alias, over 5 TiB, checksum enabled", + sourceAlias: "s3", + targetAlias: "s3", + fileSize: 7 * 1024 * 1024 * 1024 * 1024, // 7 TiB + isZip: false, + checksumSet: true, + expectedStrategy: "stream", + description: "Should use stream copy when both size and checksum require it", + }, + { + name: "Different alias, zip, checksum", + sourceAlias: "minio1", + targetAlias: "minio2", + fileSize: 100 * 1024 * 1024, // 100 MiB + isZip: true, + checksumSet: true, + expectedStrategy: "stream", + description: "Should use stream copy when all stream conditions are met", + }, + + // Additional edge cases + { + name: "Same alias, zero byte file", + sourceAlias: "s3", + targetAlias: "s3", + fileSize: 0, + isZip: false, + checksumSet: false, + expectedStrategy: "server-side", + description: "Should use server-side copy for zero byte files", + }, + { + name: "Same alias, 64 MiB file (multipart threshold)", + sourceAlias: "minio", + targetAlias: "minio", + fileSize: 64 * 1024 * 1024, // 64 MiB + isZip: false, + checksumSet: false, + expectedStrategy: "server-side", + description: "Should use server-side copy for files at multipart threshold", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + // Simulate the decision logic from doCopySession + isServerSideCopy := tc.sourceAlias == tc.targetAlias && + !tc.isZip && + !tc.checksumSet && + tc.fileSize < maxServerSideCopySize + + var actualStrategy string + if isServerSideCopy { + actualStrategy = "server-side" + } else { + actualStrategy = "stream" + } + + if actualStrategy != tc.expectedStrategy { + t.Errorf("Test '%s' failed:\n"+ + " Description: %s\n"+ + " Source Alias: %s\n"+ + " Target Alias: %s\n"+ + " File Size: %d bytes (%.2f TiB)\n"+ + " Zip Flag: %v\n"+ + " Checksum Set: %v\n"+ + " Expected Strategy: %s\n"+ + " Actual Strategy: %s", + tc.name, + tc.description, + tc.sourceAlias, + tc.targetAlias, + tc.fileSize, + float64(tc.fileSize)/(1024*1024*1024*1024), + tc.isZip, + tc.checksumSet, + tc.expectedStrategy, + actualStrategy, + ) + } + }) + } +} + +// copyStrategyConditions represents the conditions that determine copy strategy +type copyStrategyConditions struct { + sameAlias bool + underLimit bool + isZip bool + checksumSet bool +} + +// TestCopyStrategyMatrix tests all combinations of copy strategy decision factors +func TestCopyStrategyMatrix(t *testing.T) { + const maxServerSideCopySize = 5 * 1024 * 1024 * 1024 * 1024 // 5 TiB + + // Test matrix: all combinations + testCases := []copyStrategyConditions{ + // All conditions met for server-side copy + {sameAlias: true, underLimit: true, isZip: false, checksumSet: false}, + + // One condition fails resulting in stream copy + {sameAlias: false, underLimit: true, isZip: false, checksumSet: false}, + {sameAlias: true, underLimit: false, isZip: false, checksumSet: false}, + {sameAlias: true, underLimit: true, isZip: true, checksumSet: false}, + {sameAlias: true, underLimit: true, isZip: false, checksumSet: true}, + + // Multiple conditions fail + {sameAlias: false, underLimit: false, isZip: false, checksumSet: false}, + {sameAlias: false, underLimit: true, isZip: true, checksumSet: false}, + {sameAlias: false, underLimit: true, isZip: false, checksumSet: true}, + {sameAlias: true, underLimit: false, isZip: true, checksumSet: false}, + {sameAlias: true, underLimit: false, isZip: false, checksumSet: true}, + {sameAlias: true, underLimit: true, isZip: true, checksumSet: true}, + + // All conditions unfavorable + {sameAlias: false, underLimit: false, isZip: true, checksumSet: true}, + } + + for _, cond := range testCases { + t.Run(formatConditionsName(cond), func(t *testing.T) { + // Set up test parameters based on conditions + sourceAlias := "source" + targetAlias := "target" + if cond.sameAlias { + targetAlias = "source" + } + + fileSize := int64(1024 * 1024 * 1024) // 1 GiB + if !cond.underLimit { + fileSize = maxServerSideCopySize + 1 + } + + // Determine expected strategy + // Server-side copy only if ALL conditions are true + expectedServerSide := cond.sameAlias && cond.underLimit && !cond.isZip && !cond.checksumSet + + // Simulate the decision logic + isServerSideCopy := sourceAlias == targetAlias && + !cond.isZip && + !cond.checksumSet && + fileSize < maxServerSideCopySize + + if isServerSideCopy != expectedServerSide { + t.Errorf("Strategy mismatch for conditions %+v: expected server-side=%v, got=%v", + cond, expectedServerSide, isServerSideCopy) + } + }) + } +} + +// Helper function to format test names for matrix tests +func formatConditionsName(cond copyStrategyConditions) string { + name := "" + if cond.sameAlias { + name += "SameAlias_" + } else { + name += "DiffAlias_" + } + if cond.underLimit { + name += "Under5TiB_" + } else { + name += "Over5TiB_" + } + if cond.isZip { + name += "Zip_" + } else { + name += "NoZip_" + } + if cond.checksumSet { + name += "Checksum" + } else { + name += "NoChecksum" + } + return name +} diff --git a/cmd/put-main.go b/cmd/put-main.go index 14c4c3e94e..ba519c889b 100644 --- a/cmd/put-main.go +++ b/cmd/put-main.go @@ -206,7 +206,7 @@ func mainPut(cliCtx *cli.Context) (e error) { pg: pg, encryptionKeys: encryptionKeys, multipartSize: size, - multipartThreads: strconv.Itoa(threads), + multipartThreads: threads, ifNotExists: cliCtx.Bool("if-not-exists"), }) if urls.Error != nil { From e690ef2a995a642d200b3b9f8153dea5b0a96d1e Mon Sep 17 00:00:00 2001 From: afkbluey Date: Sun, 2 Nov 2025 15:46:07 +1100 Subject: [PATCH 4/4] chore: docstrings and tidy ready for PR review --- cmd/client-s3.go | 1 - cmd/common-methods.go | 5 +++-- cmd/cp-main.go | 6 +----- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/cmd/client-s3.go b/cmd/client-s3.go index 92dc54e454..660f2775b2 100644 --- a/cmd/client-s3.go +++ b/cmd/client-s3.go @@ -893,7 +893,6 @@ func (c *S3Client) Get(ctx context.Context, opts GetOptions) (io.ReadCloser, *Cl o.Set("x-minio-extract", "true") } if opts.RangeStart != 0 { - // SetRange with length 0 reads from RangeStart to EOF err := o.SetRange(opts.RangeStart, 0) if err != nil { return nil, nil, probe.NewError(err) diff --git a/cmd/common-methods.go b/cmd/common-methods.go index 9d1bc71b95..f5dd5cdd03 100644 --- a/cmd/common-methods.go +++ b/cmd/common-methods.go @@ -504,8 +504,9 @@ func uploadSourceToTargetURL(ctx context.Context, uploadOpts uploadSourceToTarge multipartThreads = uploadOpts.multipartThreads } - // Debug logs for multipart configuration - console.Debugln("DEBUG: multipart configuration - part-size:", humanize.IBytes(multipartSize), "parallel:", multipartThreads, "file size:", humanize.IBytes(uint64(length))) + if globalDebug { + console.Debugln("DEBUG: multipart configuration - part-size:", humanize.IBytes(multipartSize), "parallel:", multipartThreads, "file size:", humanize.IBytes(uint64(length))) + } putOpts := PutOptions{ metadata: filterMetadata(metadata), diff --git a/cmd/cp-main.go b/cmd/cp-main.go index 4d65b317f2..fd3393e276 100644 --- a/cmd/cp-main.go +++ b/cmd/cp-main.go @@ -351,7 +351,7 @@ func printCopyURLsError(cpURLs *URLs) { // Notes: // - The 5 TiB limit is a limitation of the S3 ComposeObject API // - Default part size: based on Minio SDK of 128 MiB for multipart uploads -// - Default parallel: 4 threads if parallel is not set +// - Default parallel: 4 threads if parallel is not set and no env var MC_UPLOAD_MULTIPART_THREADS func doCopySession(ctx context.Context, cancelCopy context.CancelFunc, cli *cli.Context, encryptionKeys map[string][]prefixSSEPair, isMvCmd bool) error { var isCopied func(string) bool var totalObjects, totalBytes int64 @@ -487,9 +487,6 @@ func doCopySession(ctx context.Context, cancelCopy context.CancelFunc, cli *cli. return doCopyFake(cpURLs, pg) }, 0) } else { - // Determine if this will be a server-side copy (no data transfer through client) - // ComposeObject API supports server-side copy up to 5TiB - // Files >= 5TiB must use stream copy (download + upload) const maxServerSideCopySize = 5 * 1024 * 1024 * 1024 * 1024 // 5 TiB isServerSideCopy := cpURLs.SourceAlias == cpURLs.TargetAlias && !isZip && @@ -503,7 +500,6 @@ func doCopySession(ctx context.Context, cancelCopy context.CancelFunc, cli *cli. queueSize = 0 // No client bandwidth used for server-side copy } - // Debug log for multipart configuration if globalDebug { copyType := "server-side copy" if !isServerSideCopy {