diff --git a/.gitignore b/.gitignore index b3de4e903..9c6f4a231 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,6 @@ cmd/gcrane/gcrane cmd/krane/krane .DS_Store +coverage.txt +examples/*/main +examples/resumable-download/resumable-download diff --git a/examples/resumable-download/README.md b/examples/resumable-download/README.md new file mode 100644 index 000000000..471078582 --- /dev/null +++ b/examples/resumable-download/README.md @@ -0,0 +1,33 @@ +# Resumable Download Example + +This example demonstrates how to use the resumable download feature to fetch specific byte ranges from container registry layers using HTTP range requests. + +## Usage + +```bash +go run main.go +``` + +## Example + +```bash +# Fetch the first 1024 bytes from a layer +go run main.go gcr.io/my-repo/my-image@sha256:abc123... 0 1023 + +# Resume a download starting from byte 1024 +go run main.go gcr.io/my-repo/my-image@sha256:abc123... 1024 2047 +``` + +## Use Cases + +- **Resumable Downloads**: If a download is interrupted, you can resume from where it left off +- **Partial Content Access**: Access only the portion of a layer you need +- **Progressive Loading**: Load content incrementally for better user experience +- **Bandwidth Optimization**: Download only the required portions of large layers + +## Notes + +- Range requests require a digest reference (not a tag) +- The byte offsets are inclusive (start and end bytes are both included) +- Hash verification is not performed on partial content +- Not all registries support range requests (though most modern ones do) diff --git a/examples/resumable-download/main.go b/examples/resumable-download/main.go new file mode 100644 index 000000000..f5823be5d --- /dev/null +++ b/examples/resumable-download/main.go @@ -0,0 +1,66 @@ +// Copyright 2024 Google LLC All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This example demonstrates how to use resumable downloads to fetch +// specific byte ranges from container registry layers. +package main + +import ( + "fmt" + "io" + "log" + "os" + + "github.com/google/go-containerregistry/pkg/name" + "github.com/google/go-containerregistry/pkg/v1/remote" +) + +func main() { + if len(os.Args) < 4 { + fmt.Fprintf(os.Stderr, "Usage: %s \n", os.Args[0]) + fmt.Fprintf(os.Stderr, "Example: %s gcr.io/my-repo/my-image@sha256:abc123... 0 1023\n", os.Args[0]) + os.Exit(1) + } + + // Parse the digest reference + ref, err := name.NewDigest(os.Args[1]) + if err != nil { + log.Fatalf("Failed to parse digest: %v", err) + } + + // Parse start and end byte offsets + var start, end int64 + if _, err := fmt.Sscanf(os.Args[2], "%d", &start); err != nil { + log.Fatalf("Failed to parse start byte: %v", err) + } + if _, err := fmt.Sscanf(os.Args[3], "%d", &end); err != nil { + log.Fatalf("Failed to parse end byte: %v", err) + } + + // Fetch the byte range + log.Printf("Fetching bytes %d-%d from %s...", start, end, ref.Name()) + rc, err := remote.LayerRange(ref, start, end) + if err != nil { + log.Fatalf("Failed to fetch byte range: %v", err) + } + defer rc.Close() + + // Copy the range to stdout + n, err := io.Copy(os.Stdout, rc) + if err != nil { + log.Fatalf("Failed to read bytes: %v", err) + } + + log.Printf("\nSuccessfully read %d bytes", n) +} diff --git a/pkg/v1/remote/README.md b/pkg/v1/remote/README.md index c1e81b310..a6a774013 100644 --- a/pkg/v1/remote/README.md +++ b/pkg/v1/remote/README.md @@ -34,6 +34,46 @@ func main() { } ``` +### Resumable Downloads + +The `remote` package supports resumable downloads via HTTP range requests. This is useful for downloading large layers or for resuming interrupted downloads: + +```go +package main + +import ( + "io" + "os" + + "github.com/google/go-containerregistry/pkg/name" + "github.com/google/go-containerregistry/pkg/v1/remote" +) + +func main() { + // Parse a blob reference (digest) + ref, err := name.NewDigest("gcr.io/my-repo/my-image@sha256:abcd...") + if err != nil { + panic(err) + } + + // Download a specific byte range (bytes 1000-9999) + // This is useful for resuming downloads or fetching specific portions + rc, err := remote.LayerRange(ref, 1000, 9999) + if err != nil { + panic(err) + } + defer rc.Close() + + // Copy the range to a file or process it + _, err = io.Copy(os.Stdout, rc) + if err != nil { + panic(err) + } +} +``` + +Note: When using range requests, hash verification is not performed on the partial content. To verify the integrity of the full blob, download it completely using `remote.Layer()` instead. + ## Structure

diff --git a/pkg/v1/remote/doc.go b/pkg/v1/remote/doc.go index 846ba07cd..d79689de7 100644 --- a/pkg/v1/remote/doc.go +++ b/pkg/v1/remote/doc.go @@ -14,4 +14,8 @@ // Package remote provides facilities for reading/writing v1.Images from/to // a remote image registry. +// +// This package supports resumable downloads via HTTP range requests. Use +// LayerRange to download specific byte ranges of layer blobs, which is useful +// for resuming interrupted downloads or implementing progressive loading. package remote diff --git a/pkg/v1/remote/fetcher.go b/pkg/v1/remote/fetcher.go index d77b37c0c..791c0a20e 100644 --- a/pkg/v1/remote/fetcher.go +++ b/pkg/v1/remote/fetcher.go @@ -246,18 +246,41 @@ func (f *fetcher) headManifest(ctx context.Context, ref name.Reference, acceptab } func (f *fetcher) fetchBlob(ctx context.Context, size int64, h v1.Hash) (io.ReadCloser, error) { + return f.fetchBlobRange(ctx, size, h, nil) +} + +// ByteRange represents a byte range for partial blob downloads. +type ByteRange struct { + Start int64 // Starting byte offset (inclusive) + End int64 // Ending byte offset (inclusive) +} + +// fetchBlobRange fetches a blob or a byte range of a blob. +// If byteRange is nil, fetches the entire blob. +func (f *fetcher) fetchBlobRange(ctx context.Context, size int64, h v1.Hash, byteRange *ByteRange) (io.ReadCloser, error) { u := f.url("blobs", h.String()) req, err := http.NewRequest(http.MethodGet, u.String(), nil) if err != nil { return nil, err } + // Add Range header if byte range is specified + if byteRange != nil { + req.Header.Set("Range", fmt.Sprintf("bytes=%d-%d", byteRange.Start, byteRange.End)) + } + resp, err := f.client.Do(req.WithContext(ctx)) if err != nil { return nil, redact.Error(err) } - if err := transport.CheckError(resp, http.StatusOK); err != nil { + // For range requests, we expect either 200 (full content) or 206 (partial content) + expectedStatus := http.StatusOK + if byteRange != nil { + expectedStatus = http.StatusPartialContent + } + + if err := transport.CheckError(resp, http.StatusOK, expectedStatus); err != nil { resp.Body.Close() return nil, err } @@ -266,13 +289,27 @@ func (f *fetcher) fetchBlob(ctx context.Context, size int64, h v1.Hash) (io.Read // If we have an expected size and Content-Length doesn't match, return an error. // If we don't have an expected size and we do have a Content-Length, use Content-Length. if hsize := resp.ContentLength; hsize != -1 { - if size == verify.SizeUnknown { - size = hsize - } else if hsize != size { - return nil, fmt.Errorf("GET %s: Content-Length header %d does not match expected size %d", u.String(), hsize, size) + // For range requests, Content-Length is the size of the range, not the full blob + if byteRange != nil { + expectedRangeSize := byteRange.End - byteRange.Start + 1 + if hsize != expectedRangeSize { + return nil, fmt.Errorf("GET %s: Content-Length header %d does not match expected range size %d", u.String(), hsize, expectedRangeSize) + } + } else { + if size == verify.SizeUnknown { + size = hsize + } else if hsize != size { + return nil, fmt.Errorf("GET %s: Content-Length header %d does not match expected size %d", u.String(), hsize, size) + } } } + // For range requests, we cannot verify the hash of partial content + if byteRange != nil { + // Just return the response body without hash verification + return resp.Body, nil + } + return verify.ReadCloser(resp.Body, size, h) } diff --git a/pkg/v1/remote/layer.go b/pkg/v1/remote/layer.go index 39c205950..24c397e69 100644 --- a/pkg/v1/remote/layer.go +++ b/pkg/v1/remote/layer.go @@ -27,16 +27,17 @@ import ( // remoteImagelayer implements partial.CompressedLayer type remoteLayer struct { - ctx context.Context - fetcher fetcher - digest v1.Hash + ctx context.Context + fetcher fetcher + digest v1.Hash + byteRange *ByteRange } // Compressed implements partial.CompressedLayer func (rl *remoteLayer) Compressed() (io.ReadCloser, error) { // We don't want to log binary layers -- this can break terminals. ctx := redact.NewContext(rl.ctx, "omitting binary blobs from logs") - return rl.fetcher.fetchBlob(ctx, verify.SizeUnknown, rl.digest) + return rl.fetcher.fetchBlobRange(ctx, verify.SizeUnknown, rl.digest, rl.byteRange) } // Compressed implements partial.CompressedLayer @@ -75,3 +76,30 @@ func Layer(ref name.Digest, options ...Option) (v1.Layer, error) { } return newPuller(o).Layer(o.context, ref) } + +// LayerRange reads a byte range of the given blob reference from a registry as an io.ReadCloser. +// A blob reference here is just a punned name.Digest where the digest portion is the +// digest of the blob to be read and the repository portion is the repo where that blob lives. +// +// The byte range is specified with start and end offsets (both inclusive). +// This is useful for resumable downloads where you want to download a specific portion of a layer. +// +// Note: Since this returns partial content, hash verification is not performed on the returned data. +func LayerRange(ref name.Digest, start, end int64, options ...Option) (io.ReadCloser, error) { + o, err := makeOptions(options...) + if err != nil { + return nil, err + } + + f, err := makeFetcher(o.context, ref.Context(), o) + if err != nil { + return nil, err + } + + h, err := v1.NewHash(ref.Identifier()) + if err != nil { + return nil, err + } + + return f.fetchBlobRange(o.context, verify.SizeUnknown, h, &ByteRange{Start: start, End: end}) +} diff --git a/pkg/v1/remote/layer_test.go b/pkg/v1/remote/layer_test.go index fc1dc6759..fc0637220 100644 --- a/pkg/v1/remote/layer_test.go +++ b/pkg/v1/remote/layer_test.go @@ -16,6 +16,7 @@ package remote import ( "fmt" + "io" "net/http/httptest" "net/url" "testing" @@ -146,3 +147,151 @@ func TestRemoteLayerDescriptor(t *testing.T) { t.Errorf("Exists() = %t != %t", got, want) } } + +func TestLayerRange(t *testing.T) { + // Create a layer with known content + layer, err := random.Layer(1024, types.DockerLayer) + if err != nil { + t.Fatal(err) + } + digest, err := layer.Digest() + if err != nil { + t.Fatal(err) + } + + // Set up a fake registry and write the layer to it + s := httptest.NewServer(registry.New()) + defer s.Close() + u, err := url.Parse(s.URL) + if err != nil { + t.Fatal(err) + } + + dst := fmt.Sprintf("%s/test/range@%s", u.Host, digest) + ref, err := name.NewDigest(dst) + if err != nil { + t.Fatal(err) + } + + if err := WriteLayer(ref.Context(), layer); err != nil { + t.Fatalf("failed to WriteLayer: %v", err) + } + + // Get the full layer content for comparison + rc, err := layer.Compressed() + if err != nil { + t.Fatal(err) + } + fullContent, err := io.ReadAll(rc) + rc.Close() + if err != nil { + t.Fatal(err) + } + + // Test fetching a range of bytes + start := int64(10) + end := int64(99) // Inclusive, so this is 90 bytes + rangeRC, err := LayerRange(ref, start, end) + if err != nil { + t.Fatalf("LayerRange failed: %v", err) + } + defer rangeRC.Close() + + rangeContent, err := io.ReadAll(rangeRC) + if err != nil { + t.Fatalf("reading range content: %v", err) + } + + // Verify the range content matches the expected slice + expectedContent := fullContent[start : end+1] + if len(rangeContent) != len(expectedContent) { + t.Errorf("range content length = %d, want %d", len(rangeContent), len(expectedContent)) + } + + for i := 0; i < len(expectedContent) && i < len(rangeContent); i++ { + if rangeContent[i] != expectedContent[i] { + t.Errorf("byte at offset %d: got %d, want %d", i, rangeContent[i], expectedContent[i]) + break + } + } +} + +func TestLayerRangeMultiple(t *testing.T) { + // Create a layer with known content + layer, err := random.Layer(2048, types.DockerLayer) + if err != nil { + t.Fatal(err) + } + digest, err := layer.Digest() + if err != nil { + t.Fatal(err) + } + + // Set up a fake registry and write the layer to it + s := httptest.NewServer(registry.New()) + defer s.Close() + u, err := url.Parse(s.URL) + if err != nil { + t.Fatal(err) + } + + dst := fmt.Sprintf("%s/test/multirange@%s", u.Host, digest) + ref, err := name.NewDigest(dst) + if err != nil { + t.Fatal(err) + } + + if err := WriteLayer(ref.Context(), layer); err != nil { + t.Fatalf("failed to WriteLayer: %v", err) + } + + // Get the full layer content for comparison + rc, err := layer.Compressed() + if err != nil { + t.Fatal(err) + } + fullContent, err := io.ReadAll(rc) + rc.Close() + if err != nil { + t.Fatal(err) + } + + // Test fetching multiple different ranges + testCases := []struct { + name string + start int64 + end int64 + }{ + {"first_100_bytes", 0, 99}, + {"middle_range", 500, 699}, + {"last_100_bytes", int64(len(fullContent) - 100), int64(len(fullContent) - 1)}, + {"single_byte", 42, 42}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + rangeRC, err := LayerRange(ref, tc.start, tc.end) + if err != nil { + t.Fatalf("LayerRange failed: %v", err) + } + defer rangeRC.Close() + + rangeContent, err := io.ReadAll(rangeRC) + if err != nil { + t.Fatalf("reading range content: %v", err) + } + + expectedContent := fullContent[tc.start : tc.end+1] + if len(rangeContent) != len(expectedContent) { + t.Errorf("range content length = %d, want %d", len(rangeContent), len(expectedContent)) + } + + for i := 0; i < len(expectedContent) && i < len(rangeContent); i++ { + if rangeContent[i] != expectedContent[i] { + t.Errorf("byte at offset %d: got %d, want %d", i, rangeContent[i], expectedContent[i]) + break + } + } + }) + } +}