Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
324 changes: 224 additions & 100 deletions go/store/blobstore/git_blobstore.go

Large diffs are not rendered by default.

91 changes: 91 additions & 0 deletions go/store/blobstore/git_blobstore_cache_merge_semantics_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
// Copyright 2026 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package blobstore

import (
"context"
"testing"

"github.com/stretchr/testify/require"
)

func TestGitBlobstore_CacheMerge_ImmutableKeyStableOnceCached(t *testing.T) {
requireGitOnPath(t)

ctx := context.Background()
remoteRepo, localRepo, _ := newRemoteAndLocalRepos(t, ctx)

// Seed remote with a key. This is representative of a tablefile key, which is
// expected to be immutable.
_, err := remoteRepo.SetRefToTree(ctx, DoltDataRef, map[string][]byte{
"k": []byte("A\n"),
}, "seed A")
require.NoError(t, err)

bs, err := NewGitBlobstoreWithOptions(localRepo.GitDir, DoltDataRef, GitBlobstoreOptions{
RemoteName: "origin",
})
require.NoError(t, err)

gotA, verA, err := GetBytes(ctx, bs, "k", AllRange)
require.NoError(t, err)
require.Equal(t, []byte("A\n"), gotA)
require.NotEmpty(t, verA)

// Simulate an external rewrite that repoints the same key to different bytes.
// With merge-only cache semantics, once "k" is cached it should not be overwritten.
_, err = remoteRepo.SetRefToTree(ctx, DoltDataRef, map[string][]byte{
"k": []byte("B\n"),
}, "rewrite B")
require.NoError(t, err)

gotAfter, verAfter, err := GetBytes(ctx, bs, "k", AllRange)
require.NoError(t, err)
require.Equal(t, []byte("A\n"), gotAfter)
require.Equal(t, verA, verAfter)
}

func TestGitBlobstore_CacheMerge_ManifestUpdatesAcrossFetches(t *testing.T) {
requireGitOnPath(t)

ctx := context.Background()
remoteRepo, localRepo, _ := newRemoteAndLocalRepos(t, ctx)

_, err := remoteRepo.SetRefToTree(ctx, DoltDataRef, map[string][]byte{
"manifest": []byte("M1\n"),
}, "seed M1")
require.NoError(t, err)

bs, err := NewGitBlobstoreWithOptions(localRepo.GitDir, DoltDataRef, GitBlobstoreOptions{
RemoteName: "origin",
})
require.NoError(t, err)

got1, ver1, err := GetBytes(ctx, bs, "manifest", AllRange)
require.NoError(t, err)
require.Equal(t, []byte("M1\n"), got1)
require.NotEmpty(t, ver1)

_, err = remoteRepo.SetRefToTree(ctx, DoltDataRef, map[string][]byte{
"manifest": []byte("M2\n"),
}, "advance M2")
require.NoError(t, err)

got2, ver2, err := GetBytes(ctx, bs, "manifest", AllRange)
require.NoError(t, err)
require.Equal(t, []byte("M2\n"), got2)
require.NotEmpty(t, ver2)
require.NotEqual(t, ver1, ver2)
}
84 changes: 84 additions & 0 deletions go/store/blobstore/git_blobstore_cache_reads_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
// Copyright 2026 Dolthub, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package blobstore

import (
"context"
"errors"
"sync/atomic"
"testing"

"github.com/stretchr/testify/require"

git "github.com/dolthub/dolt/go/store/blobstore/internal/git"
)

type forbidPathResolutionGitAPI struct {
git.GitAPI
resolvePathObjectCalls atomic.Int64
listTreeCalls atomic.Int64
}

func (f *forbidPathResolutionGitAPI) ResolvePathObject(ctx context.Context, commit git.OID, path string) (git.OID, git.ObjectType, error) {
f.resolvePathObjectCalls.Add(1)
return "", git.ObjectTypeUnknown, errors.New("forbidden call: ResolvePathObject")
}

func (f *forbidPathResolutionGitAPI) ListTree(ctx context.Context, commit git.OID, treePath string) ([]git.TreeEntry, error) {
f.listTreeCalls.Add(1)
return nil, errors.New("forbidden call: ListTree")
}

func TestGitBlobstore_ReadsUseCacheOnly_NoPathResolutionPlumbing(t *testing.T) {
requireGitOnPath(t)

ctx := context.Background()
remoteRepo, localRepo, _ := newRemoteAndLocalRepos(t, ctx)

// Seed the remote with:
// - an inline blob
// - a chunked-tree representation (chunk/0001, chunk/0002)
_, err := remoteRepo.SetRefToTree(ctx, DoltDataRef, map[string][]byte{
"manifest": []byte("hello\n"),
"dir/file": []byte("abc"),
"chunk/0001": []byte("abc"),
"chunk/0002": []byte("def"),
"chunk/0003": []byte("ghi"),
}, "seed remote")
require.NoError(t, err)

bs, err := NewGitBlobstoreWithOptions(localRepo.GitDir, DoltDataRef, GitBlobstoreOptions{
RemoteName: "origin",
})
require.NoError(t, err)

wrapped := &forbidPathResolutionGitAPI{GitAPI: bs.api}
bs.api = wrapped

ok, err := bs.Exists(ctx, "manifest")
require.NoError(t, err)
require.True(t, ok)

_, _, err = GetBytes(ctx, bs, "manifest", AllRange)
require.NoError(t, err)

// Chunked tree read: should be served via cache for path resolution (and stream part blobs by OID).
gotChunk, _, err := GetBytes(ctx, bs, "chunk", AllRange)
require.NoError(t, err)
require.Equal(t, []byte("abcdefghi"), gotChunk)

require.Equal(t, int64(0), wrapped.resolvePathObjectCalls.Load(), "expected no ResolvePathObject calls during reads")
require.Equal(t, int64(0), wrapped.listTreeCalls.Load(), "expected no ListTree calls during reads")
}
100 changes: 36 additions & 64 deletions go/store/blobstore/git_blobstore_helpers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ type fakeGitAPI struct {
resolvePathBlob func(ctx context.Context, commit git.OID, path string) (git.OID, error)
resolvePathObject func(ctx context.Context, commit git.OID, path string) (git.OID, git.ObjectType, error)
listTree func(ctx context.Context, commit git.OID, treePath string) ([]git.TreeEntry, error)
listTreeRecursive func(ctx context.Context, commit git.OID) ([]git.TreeEntry, error)
blobSize func(ctx context.Context, oid git.OID) (int64, error)
blobReader func(ctx context.Context, oid git.OID) (io.ReadCloser, error)
fetchRef func(ctx context.Context, remote string, srcRef string, dstRef string) error
Expand All @@ -52,6 +53,12 @@ func (f fakeGitAPI) ResolvePathObject(ctx context.Context, commit git.OID, path
func (f fakeGitAPI) ListTree(ctx context.Context, commit git.OID, treePath string) ([]git.TreeEntry, error) {
return f.listTree(ctx, commit, treePath)
}
func (f fakeGitAPI) ListTreeRecursive(ctx context.Context, commit git.OID) ([]git.TreeEntry, error) {
if f.listTreeRecursive == nil {
panic("unexpected call")
}
return f.listTreeRecursive(ctx, commit)
}
func (f fakeGitAPI) CatFileType(ctx context.Context, oid git.OID) (string, error) {
panic("unexpected call")
}
Expand Down Expand Up @@ -160,41 +167,6 @@ func TestGitBlobstoreHelpers_resolveCommitForGet(t *testing.T) {
})
}

func TestGitBlobstoreHelpers_resolveObjectForGet(t *testing.T) {
ctx := context.Background()
commit := git.OID("0123456789abcdef0123456789abcdef01234567")

t.Run("ok", func(t *testing.T) {
api := fakeGitAPI{
resolvePathObject: func(ctx context.Context, gotCommit git.OID, path string) (git.OID, git.ObjectType, error) {
require.Equal(t, commit, gotCommit)
require.Equal(t, "k", path)
return git.OID("89abcdef0123456789abcdef0123456789abcdef"), git.ObjectTypeBlob, nil
},
}
gbs := &GitBlobstore{api: api}

oid, typ, err := gbs.resolveObjectForGet(ctx, commit, "k")
require.NoError(t, err)
require.Equal(t, git.ObjectTypeBlob, typ)
require.Equal(t, git.OID("89abcdef0123456789abcdef0123456789abcdef"), oid)
})

t.Run("pathNotFoundMapsToNotFound", func(t *testing.T) {
api := fakeGitAPI{
resolvePathObject: func(ctx context.Context, gotCommit git.OID, path string) (git.OID, git.ObjectType, error) {
return git.OID(""), git.ObjectTypeUnknown, &git.PathNotFoundError{Commit: gotCommit.String(), Path: path}
},
}
gbs := &GitBlobstore{api: api}

_, _, err := gbs.resolveObjectForGet(ctx, commit, "k")
var nf NotFound
require.ErrorAs(t, err, &nf)
require.Equal(t, "k", nf.Key)
})
}

func TestGitBlobstoreHelpers_resolveBlobSizeForGet(t *testing.T) {
ctx := context.Background()
commit := git.OID("0123456789abcdef0123456789abcdef01234567")
Expand Down Expand Up @@ -253,37 +225,23 @@ func TestGitBlobstoreHelpers_sizeAtCommit(t *testing.T) {

t.Run("blob", func(t *testing.T) {
api := fakeGitAPI{
resolvePathObject: func(ctx context.Context, gotCommit git.OID, path string) (git.OID, git.ObjectType, error) {
require.Equal(t, commit, gotCommit)
require.Equal(t, "k", path)
return git.OID("89abcdef0123456789abcdef0123456789abcdef"), git.ObjectTypeBlob, nil
},
blobSize: func(ctx context.Context, gotOID git.OID) (int64, error) {
require.Equal(t, git.OID("89abcdef0123456789abcdef0123456789abcdef"), gotOID)
return 123, nil
},
}
gbs := &GitBlobstore{api: api}
gbs := &GitBlobstore{
api: api,
cacheHead: commit,
cacheObjects: map[string]cachedGitObject{"k": {oid: git.OID("89abcdef0123456789abcdef0123456789abcdef"), typ: git.ObjectTypeBlob}},
}
sz, err := gbs.sizeAtCommit(ctx, commit, "k")
require.NoError(t, err)
require.Equal(t, uint64(123), sz)
})

t.Run("chunkedTree", func(t *testing.T) {
api := fakeGitAPI{
resolvePathObject: func(ctx context.Context, gotCommit git.OID, path string) (git.OID, git.ObjectType, error) {
require.Equal(t, commit, gotCommit)
require.Equal(t, "k", path)
return git.OID("treeoid"), git.ObjectTypeTree, nil
},
listTree: func(ctx context.Context, gotCommit git.OID, treePath string) ([]git.TreeEntry, error) {
require.Equal(t, commit, gotCommit)
require.Equal(t, "k", treePath)
return []git.TreeEntry{
{Name: "0001", Type: git.ObjectTypeBlob, OID: "0123456789abcdef0123456789abcdef01234567"},
{Name: "0002", Type: git.ObjectTypeBlob, OID: "89abcdef0123456789abcdef0123456789abcdef"},
}, nil
},
blobSize: func(ctx context.Context, oid git.OID) (int64, error) {
switch oid {
case "0123456789abcdef0123456789abcdef01234567":
Expand All @@ -295,19 +253,29 @@ func TestGitBlobstoreHelpers_sizeAtCommit(t *testing.T) {
}
},
}
gbs := &GitBlobstore{api: api}
gbs := &GitBlobstore{
api: api,
cacheHead: commit,
cacheObjects: map[string]cachedGitObject{
"k": {oid: git.OID("treeoid"), typ: git.ObjectTypeTree},
},
cacheChildren: map[string][]git.TreeEntry{
"k": {
{Name: "0001", Type: git.ObjectTypeBlob, OID: "0123456789abcdef0123456789abcdef01234567"},
{Name: "0002", Type: git.ObjectTypeBlob, OID: "89abcdef0123456789abcdef0123456789abcdef"},
},
},
}
sz, err := gbs.sizeAtCommit(ctx, commit, "k")
require.NoError(t, err)
require.Equal(t, uint64(8), sz)
})

t.Run("notFound", func(t *testing.T) {
api := fakeGitAPI{
resolvePathObject: func(ctx context.Context, gotCommit git.OID, path string) (git.OID, git.ObjectType, error) {
return git.OID(""), git.ObjectTypeUnknown, &git.PathNotFoundError{Commit: gotCommit.String(), Path: path}
},
gbs := &GitBlobstore{
cacheHead: commit,
cacheObjects: map[string]cachedGitObject{},
}
gbs := &GitBlobstore{api: api}
_, err := gbs.sizeAtCommit(ctx, commit, "missing")
var nf NotFound
require.ErrorAs(t, err, &nf)
Expand All @@ -320,9 +288,6 @@ func TestGitBlobstoreHelpers_totalSizeAtCommit_overflowInt64(t *testing.T) {
commit := git.OID("0123456789abcdef0123456789abcdef01234567")

api := fakeGitAPI{
resolvePathObject: func(ctx context.Context, gotCommit git.OID, path string) (git.OID, git.ObjectType, error) {
return git.OID(path + "_oid"), git.ObjectTypeBlob, nil
},
blobSize: func(ctx context.Context, gotOID git.OID) (int64, error) {
// Make the total exceed int64 max with two sources.
if gotOID == "a_oid" {
Expand All @@ -331,7 +296,14 @@ func TestGitBlobstoreHelpers_totalSizeAtCommit_overflowInt64(t *testing.T) {
return 1, nil
},
}
gbs := &GitBlobstore{api: api}
gbs := &GitBlobstore{
api: api,
cacheHead: commit,
cacheObjects: map[string]cachedGitObject{
"a": {oid: git.OID("a_oid"), typ: git.ObjectTypeBlob},
"b": {oid: git.OID("b_oid"), typ: git.ObjectTypeBlob},
},
}
_, err := gbs.totalSizeAtCommit(ctx, commit, []string{"a", "b"})
require.Error(t, err)
}
Expand Down
5 changes: 5 additions & 0 deletions go/store/blobstore/internal/git/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,11 @@ type GitAPI interface {
// It returns PathNotFoundError if |treePath| does not exist.
ListTree(ctx context.Context, commit OID, treePath string) ([]TreeEntry, error)

// ListTreeRecursive lists all entries under |commit|'s root tree recursively.
// Returned entries include both blobs and trees, and each entry Name is the full
// path from the root (e.g. "dir/file.txt", "dir/sub").
ListTreeRecursive(ctx context.Context, commit OID) ([]TreeEntry, error)

// CatFileType returns the git object type for |oid| (e.g. "blob", "tree", "commit").
CatFileType(ctx context.Context, oid OID) (string, error)

Expand Down
25 changes: 25 additions & 0 deletions go/store/blobstore/internal/git/impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,31 @@ func (a *GitAPIImpl) ListTree(ctx context.Context, commit OID, treePath string)
return entries, nil
}

func (a *GitAPIImpl) ListTreeRecursive(ctx context.Context, commit OID) ([]TreeEntry, error) {
// Include trees (-t) so callers can resolve directory paths as tree objects.
// Recurse (-r) so we get a full snapshot in one invocation.
out, err := a.r.Run(ctx, RunOptions{}, "ls-tree", "-r", "-t", commit.String()+"^{tree}")
if err != nil {
return nil, err
}
lines := strings.Split(strings.TrimRight(string(out), "\n"), "\n")
if len(lines) == 1 && strings.TrimSpace(lines[0]) == "" {
return nil, nil
}
entries := make([]TreeEntry, 0, len(lines))
for _, line := range lines {
if strings.TrimSpace(line) == "" {
continue
}
e, err := parseLsTreeLine(line)
if err != nil {
return nil, err
}
entries = append(entries, e)
}
return entries, nil
}

func (a *GitAPIImpl) CatFileType(ctx context.Context, oid OID) (string, error) {
out, err := a.r.Run(ctx, RunOptions{}, "cat-file", "-t", oid.String())
if err != nil {
Expand Down
Loading
Loading