diff --git a/go/store/blobstore/git_blobstore.go b/go/store/blobstore/git_blobstore.go index c1e7055f2f3..23a4dc96725 100644 --- a/go/store/blobstore/git_blobstore.go +++ b/go/store/blobstore/git_blobstore.go @@ -34,6 +34,7 @@ type GitBlobstore struct { gitDir string ref string runner *git.Runner + api git.GitAPI } var _ Blobstore = (*GitBlobstore)(nil) @@ -45,7 +46,7 @@ func NewGitBlobstore(gitDir, ref string) (*GitBlobstore, error) { if err != nil { return nil, err } - return &GitBlobstore{gitDir: gitDir, ref: ref, runner: r}, nil + return &GitBlobstore{gitDir: gitDir, ref: ref, runner: r, api: git.NewGitAPIImpl(r)}, nil } func (gbs *GitBlobstore) Path() string { @@ -57,14 +58,14 @@ func (gbs *GitBlobstore) Exists(ctx context.Context, key string) (bool, error) { if err != nil { return false, err } - commit, ok, err := git.TryResolveRefCommit(ctx, gbs.runner, gbs.ref) + commit, ok, err := gbs.api.TryResolveRefCommit(ctx, gbs.ref) if err != nil { return false, err } if !ok { return false, nil } - _, err = git.ResolvePathBlob(ctx, gbs.runner, commit, key) + _, err = gbs.api.ResolvePathBlob(ctx, commit, key) if err != nil { if git.IsPathNotFound(err) { return false, nil @@ -79,7 +80,7 @@ func (gbs *GitBlobstore) Get(ctx context.Context, key string, br BlobRange) (io. if err != nil { return nil, 0, "", err } - commit, ok, err := git.TryResolveRefCommit(ctx, gbs.runner, gbs.ref) + commit, ok, err := gbs.api.TryResolveRefCommit(ctx, gbs.ref) if err != nil { return nil, 0, "", err } @@ -92,7 +93,7 @@ func (gbs *GitBlobstore) Get(ctx context.Context, key string, br BlobRange) (io. return nil, 0, "", &git.RefNotFoundError{Ref: gbs.ref} } - blobOID, err := git.ResolvePathBlob(ctx, gbs.runner, commit, key) + blobOID, err := gbs.api.ResolvePathBlob(ctx, commit, key) if err != nil { if git.IsPathNotFound(err) { return nil, 0, commit.String(), NotFound{Key: key} @@ -100,7 +101,7 @@ func (gbs *GitBlobstore) Get(ctx context.Context, key string, br BlobRange) (io. return nil, 0, commit.String(), err } - sz, err := git.BlobSize(ctx, gbs.runner, blobOID) + sz, err := gbs.api.BlobSize(ctx, blobOID) if err != nil { return nil, 0, commit.String(), err } @@ -108,7 +109,7 @@ func (gbs *GitBlobstore) Get(ctx context.Context, key string, br BlobRange) (io. // TODO(gitblobstore): This streaming implementation is correct but may be slow for workloads // that do many small ranged reads (e.g. table index/footer reads). Consider caching/materializing // blobs to a local file (or using a batched git cat-file mode) to serve ranges efficiently. - rc, err := git.BlobReader(ctx, gbs.runner, blobOID) + rc, err := gbs.api.BlobReader(ctx, blobOID) if err != nil { return nil, 0, commit.String(), err } diff --git a/go/store/blobstore/internal/git/write.go b/go/store/blobstore/internal/git/api.go similarity index 57% rename from go/store/blobstore/internal/git/write.go rename to go/store/blobstore/internal/git/api.go index a3d7a09c1ac..fc4f8d22269 100644 --- a/go/store/blobstore/internal/git/write.go +++ b/go/store/blobstore/internal/git/api.go @@ -16,16 +16,33 @@ package git import ( "context" - "fmt" + "io" ) -// WriteAPI defines the git plumbing operations needed for Approach A (temporary index -// via GIT_INDEX_FILE) to perform updates without a working tree checkout. -// -// This file intentionally does not implement these operations yet; the current -// GitBlobstore milestone is read-only. All methods on the default implementation -// return ErrUnimplemented. -type WriteAPI interface { +// GitAPI defines the git plumbing operations needed by GitBlobstore. It includes both +// read and write operations to allow swapping implementations (e.g. git CLI vs a Go git +// library) while keeping callers stable. +type GitAPI interface { + // TryResolveRefCommit resolves |ref| to a commit OID. Returns ok=false if the ref does not exist. + TryResolveRefCommit(ctx context.Context, ref string) (oid OID, ok bool, err error) + + // ResolveRefCommit resolves |ref| to a commit OID, returning RefNotFoundError if missing. + ResolveRefCommit(ctx context.Context, ref string) (OID, error) + + // ResolvePathBlob resolves |path| within |commit| to a blob OID. + // It returns PathNotFoundError if the path does not exist, and NotBlobError if it + // resolves to a non-blob object. + ResolvePathBlob(ctx context.Context, commit OID, path string) (OID, error) + + // CatFileType returns the git object type for |oid| (e.g. "blob", "tree", "commit"). + CatFileType(ctx context.Context, oid OID) (string, error) + + // BlobSize returns the size in bytes of the blob object |oid|. + BlobSize(ctx context.Context, oid OID) (int64, error) + + // BlobReader returns a reader for blob contents. + BlobReader(ctx context.Context, oid OID) (io.ReadCloser, error) + // ReadTree populates |indexFile| with the entries from |commit|'s root tree. // Equivalent plumbing: // GIT_DIR=... GIT_INDEX_FILE= git read-tree ^{tree} @@ -62,43 +79,9 @@ type WriteAPI interface { UpdateRef(ctx context.Context, ref string, newOID OID, msg string) error } -// Identity represents git author/committer metadata. A future implementation -// may set this via environment variables (GIT_AUTHOR_NAME, etc.). +// Identity represents git author/committer metadata. A future implementation may set +// this via environment variables (GIT_AUTHOR_NAME, etc.). type Identity struct { Name string Email string } - -// UnimplementedWriteAPI is the default write API for the read-only milestone. -// It can be embedded or returned by constructors to make write paths fail fast. -type UnimplementedWriteAPI struct{} - -var _ WriteAPI = UnimplementedWriteAPI{} - -func (UnimplementedWriteAPI) ReadTree(ctx context.Context, commit OID, indexFile string) error { - return fmt.Errorf("%w: ReadTree", ErrUnimplemented) -} - -func (UnimplementedWriteAPI) ReadTreeEmpty(ctx context.Context, indexFile string) error { - return fmt.Errorf("%w: ReadTreeEmpty", ErrUnimplemented) -} - -func (UnimplementedWriteAPI) UpdateIndexCacheInfo(ctx context.Context, indexFile string, mode string, oid OID, path string) error { - return fmt.Errorf("%w: UpdateIndexCacheInfo", ErrUnimplemented) -} - -func (UnimplementedWriteAPI) WriteTree(ctx context.Context, indexFile string) (OID, error) { - return "", fmt.Errorf("%w: WriteTree", ErrUnimplemented) -} - -func (UnimplementedWriteAPI) CommitTree(ctx context.Context, tree OID, parent *OID, message string, author *Identity) (OID, error) { - return "", fmt.Errorf("%w: CommitTree", ErrUnimplemented) -} - -func (UnimplementedWriteAPI) UpdateRefCAS(ctx context.Context, ref string, newOID OID, oldOID OID, msg string) error { - return fmt.Errorf("%w: UpdateRefCAS", ErrUnimplemented) -} - -func (UnimplementedWriteAPI) UpdateRef(ctx context.Context, ref string, newOID OID, msg string) error { - return fmt.Errorf("%w: UpdateRef", ErrUnimplemented) -} diff --git a/go/store/blobstore/internal/git/errors.go b/go/store/blobstore/internal/git/errors.go index e72bd28977c..522582fec5a 100644 --- a/go/store/blobstore/internal/git/errors.go +++ b/go/store/blobstore/internal/git/errors.go @@ -56,11 +56,6 @@ func (e *NotBlobError) Error() string { return fmt.Sprintf("git path is not a blob (%s): %s:%s", e.Type, e.Commit, e.Path) } -func IsRefNotFound(err error) bool { - var e *RefNotFoundError - return errors.As(err, &e) -} - func IsPathNotFound(err error) bool { var e *PathNotFoundError return errors.As(err, &e) diff --git a/go/store/blobstore/internal/git/impl.go b/go/store/blobstore/internal/git/impl.go new file mode 100644 index 00000000000..3f6739c3264 --- /dev/null +++ b/go/store/blobstore/internal/git/impl.go @@ -0,0 +1,233 @@ +// Copyright 2026 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package git + +import ( + "bytes" + "context" + "fmt" + "io" + "strconv" + "strings" +) + +// GitAPIImpl implements GitAPI using the git CLI plumbing commands, via Runner. +// It supports reads and writes (temporary index via GIT_INDEX_FILE) +// without requiring a working tree checkout. +type GitAPIImpl struct { + r *Runner +} + +var _ GitAPI = (*GitAPIImpl)(nil) + +func NewGitAPIImpl(r *Runner) *GitAPIImpl { + return &GitAPIImpl{r: r} +} + +func (a *GitAPIImpl) TryResolveRefCommit(ctx context.Context, ref string) (oid OID, ok bool, err error) { + out, err := a.r.Run(ctx, RunOptions{}, "rev-parse", "--verify", "--quiet", ref+"^{commit}") + if err == nil { + s := strings.TrimSpace(string(out)) + if s == "" { + // Shouldn't happen, but treat as missing. + return "", false, nil + } + return OID(s), true, nil + } + + if isRefNotFoundErr(err) { + return "", false, nil + } + return "", false, err +} + +func (a *GitAPIImpl) ResolveRefCommit(ctx context.Context, ref string) (OID, error) { + oid, ok, err := a.TryResolveRefCommit(ctx, ref) + if err != nil { + return "", err + } + if !ok { + return "", &RefNotFoundError{Ref: ref} + } + return oid, nil +} + +func (a *GitAPIImpl) ResolvePathBlob(ctx context.Context, commit OID, path string) (OID, error) { + spec := commit.String() + ":" + path + out, err := a.r.Run(ctx, RunOptions{}, "rev-parse", "--verify", spec) + if err != nil { + if isPathNotFoundErr(err) { + return "", &PathNotFoundError{Commit: commit.String(), Path: path} + } + return "", err + } + oid := strings.TrimSpace(string(out)) + if oid == "" { + return "", fmt.Errorf("git rev-parse returned empty oid for %q", spec) + } + + typ, err := a.CatFileType(ctx, OID(oid)) + if err != nil { + return "", err + } + if typ != "blob" { + return "", &NotBlobError{Commit: commit.String(), Path: path, Type: typ} + } + return OID(oid), nil +} + +func (a *GitAPIImpl) CatFileType(ctx context.Context, oid OID) (string, error) { + out, err := a.r.Run(ctx, RunOptions{}, "cat-file", "-t", oid.String()) + if err != nil { + return "", err + } + return strings.TrimSpace(string(out)), nil +} + +func (a *GitAPIImpl) BlobSize(ctx context.Context, oid OID) (int64, error) { + out, err := a.r.Run(ctx, RunOptions{}, "cat-file", "-s", oid.String()) + if err != nil { + return 0, err + } + s := strings.TrimSpace(string(out)) + n, err := strconv.ParseInt(s, 10, 64) + if err != nil { + return 0, fmt.Errorf("git cat-file -s parse error (%q): %w", s, err) + } + return n, nil +} + +func (a *GitAPIImpl) BlobReader(ctx context.Context, oid OID) (io.ReadCloser, error) { + rc, _, err := a.r.Start(ctx, RunOptions{}, "cat-file", "blob", oid.String()) + return rc, err +} + +func (a *GitAPIImpl) ReadTree(ctx context.Context, commit OID, indexFile string) error { + _, err := a.r.Run(ctx, RunOptions{IndexFile: indexFile}, "read-tree", commit.String()+"^{tree}") + return err +} + +func (a *GitAPIImpl) ReadTreeEmpty(ctx context.Context, indexFile string) error { + _, err := a.r.Run(ctx, RunOptions{IndexFile: indexFile}, "read-tree", "--empty") + return err +} + +func (a *GitAPIImpl) UpdateIndexCacheInfo(ctx context.Context, indexFile string, mode string, oid OID, path string) error { + _, err := a.r.Run(ctx, RunOptions{IndexFile: indexFile}, "update-index", "--add", "--cacheinfo", mode, oid.String(), path) + return err +} + +func (a *GitAPIImpl) WriteTree(ctx context.Context, indexFile string) (OID, error) { + out, err := a.r.Run(ctx, RunOptions{IndexFile: indexFile}, "write-tree") + if err != nil { + return "", err + } + oid := strings.TrimSpace(string(out)) + if oid == "" { + return "", fmt.Errorf("git write-tree returned empty oid") + } + return OID(oid), nil +} + +func (a *GitAPIImpl) CommitTree(ctx context.Context, tree OID, parent *OID, message string, author *Identity) (OID, error) { + args := []string{"commit-tree", tree.String(), "-m", message} + if parent != nil && parent.String() != "" { + args = append(args, "-p", parent.String()) + } + + var env []string + if author != nil { + if author.Name != "" { + env = append(env, + "GIT_AUTHOR_NAME="+author.Name, + "GIT_COMMITTER_NAME="+author.Name, + ) + } + if author.Email != "" { + env = append(env, + "GIT_AUTHOR_EMAIL="+author.Email, + "GIT_COMMITTER_EMAIL="+author.Email, + ) + } + } + + out, err := a.r.Run(ctx, RunOptions{Env: env}, args...) + if err != nil { + return "", err + } + oid := strings.TrimSpace(string(out)) + if oid == "" { + return "", fmt.Errorf("git commit-tree returned empty oid") + } + return OID(oid), nil +} + +func (a *GitAPIImpl) UpdateRefCAS(ctx context.Context, ref string, newOID OID, oldOID OID, msg string) error { + args := []string{"update-ref"} + if msg != "" { + args = append(args, "-m", msg) + } + args = append(args, ref, newOID.String(), oldOID.String()) + _, err := a.r.Run(ctx, RunOptions{}, args...) + return err +} + +func (a *GitAPIImpl) UpdateRef(ctx context.Context, ref string, newOID OID, msg string) error { + args := []string{"update-ref"} + if msg != "" { + args = append(args, "-m", msg) + } + args = append(args, ref, newOID.String()) + _, err := a.r.Run(ctx, RunOptions{}, args...) + return err +} + +func isRefNotFoundErr(err error) bool { + ce, ok := err.(*CmdError) + if !ok { + return false + } + // For `git rev-parse --verify --quiet ^{commit}`, a missing ref typically yields exit 1 and no output. + if ce.ExitCode == 1 && len(bytes.TrimSpace(ce.Output)) == 0 { + return true + } + // Some git versions may still emit "fatal: Needed a single revision" without --quiet; keep a defensive check. + msg := strings.ToLower(string(ce.Output)) + return strings.Contains(msg, "needed a single revision") || + strings.Contains(msg, "unknown revision") || + strings.Contains(msg, "not a valid object name") +} + +func isPathNotFoundErr(err error) bool { + ce, ok := err.(*CmdError) + if !ok { + return false + } + if ce.ExitCode == 128 || ce.ExitCode == 1 { + msg := strings.ToLower(string(ce.Output)) + // Common patterns: + // - "fatal: Path 'x' does not exist in 'HEAD'" + // - "fatal: invalid object name 'HEAD:x'" + // - "fatal: Needed a single revision" + // - "fatal: ambiguous argument '...': unknown revision or path not in the working tree." + if strings.Contains(msg, "does not exist in") || + strings.Contains(msg, "invalid object name") || + strings.Contains(msg, "needed a single revision") || + strings.Contains(msg, "unknown revision or path not in the working tree") { + return true + } + } + return false +} diff --git a/go/store/blobstore/internal/git/oid.go b/go/store/blobstore/internal/git/oid.go new file mode 100644 index 00000000000..2b3827faa27 --- /dev/null +++ b/go/store/blobstore/internal/git/oid.go @@ -0,0 +1,20 @@ +// Copyright 2026 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package git + +// OID is a git object id in hex (typically 40-char SHA1). +type OID string + +func (o OID) String() string { return string(o) } diff --git a/go/store/blobstore/internal/git/read.go b/go/store/blobstore/internal/git/read.go deleted file mode 100644 index 4a6b16f09ab..00000000000 --- a/go/store/blobstore/internal/git/read.go +++ /dev/null @@ -1,176 +0,0 @@ -// Copyright 2026 Dolthub, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package git - -import ( - "bytes" - "context" - "errors" - "fmt" - "io" - "strconv" - "strings" -) - -// OID is a git object id in hex (typically 40-char SHA1). -type OID string - -func (o OID) String() string { return string(o) } - -// TryResolveRefCommit resolves |ref| to a commit OID. Returns ok=false if the ref does not exist. -func TryResolveRefCommit(ctx context.Context, r *Runner, ref string) (oid OID, ok bool, err error) { - out, err := r.Run(ctx, RunOptions{}, "rev-parse", "--verify", "--quiet", ref+"^{commit}") - if err == nil { - s := strings.TrimSpace(string(out)) - if s == "" { - // Shouldn't happen, but treat as missing. - return "", false, nil - } - return OID(s), true, nil - } - - if isRefNotFoundErr(err) { - return "", false, nil - } - return "", false, err -} - -// ResolveRefCommit resolves |ref| to a commit OID. -func ResolveRefCommit(ctx context.Context, r *Runner, ref string) (OID, error) { - oid, ok, err := TryResolveRefCommit(ctx, r, ref) - if err != nil { - return "", err - } - if !ok { - return "", &RefNotFoundError{Ref: ref} - } - return oid, nil -} - -// ResolvePathBlob resolves |path| within |commit| to a blob OID. -// It returns PathNotFoundError if the path does not exist, and NotBlobError if the -// path resolves to a non-blob object (e.g. a tree). -func ResolvePathBlob(ctx context.Context, r *Runner, commit OID, path string) (OID, error) { - spec := commit.String() + ":" + path - out, err := r.Run(ctx, RunOptions{}, "rev-parse", "--verify", spec) - if err != nil { - if isPathNotFoundErr(err) { - return "", &PathNotFoundError{Commit: commit.String(), Path: path} - } - return "", err - } - oid := strings.TrimSpace(string(out)) - if oid == "" { - return "", fmt.Errorf("git rev-parse returned empty oid for %q", spec) - } - - typ, err := CatFileType(ctx, r, OID(oid)) - if err != nil { - return "", err - } - if typ != "blob" { - return "", &NotBlobError{Commit: commit.String(), Path: path, Type: typ} - } - return OID(oid), nil -} - -// CatFileType returns the git object type for |oid| (e.g. "blob", "tree", "commit"). -func CatFileType(ctx context.Context, r *Runner, oid OID) (string, error) { - out, err := r.Run(ctx, RunOptions{}, "cat-file", "-t", oid.String()) - if err != nil { - return "", err - } - return strings.TrimSpace(string(out)), nil -} - -// BlobSize returns the size in bytes of the blob object |oid|. -func BlobSize(ctx context.Context, r *Runner, oid OID) (int64, error) { - out, err := r.Run(ctx, RunOptions{}, "cat-file", "-s", oid.String()) - if err != nil { - return 0, err - } - s := strings.TrimSpace(string(out)) - n, err := strconv.ParseInt(s, 10, 64) - if err != nil { - return 0, fmt.Errorf("git cat-file -s parse error (%q): %w", s, err) - } - return n, nil -} - -// BlobReader returns a reader for blob contents. The returned ReadCloser will wait for -// the git process to exit when closed, returning a CmdError if the process fails. -func BlobReader(ctx context.Context, r *Runner, oid OID) (io.ReadCloser, error) { - rc, _, err := r.Start(ctx, RunOptions{}, "cat-file", "blob", oid.String()) - return rc, err -} - -func isRefNotFoundErr(err error) bool { - ce, ok := err.(*CmdError) - if !ok { - return false - } - // For `git rev-parse --verify --quiet ^{commit}`, a missing ref typically yields exit 1 and no output. - if ce.ExitCode == 1 && len(bytes.TrimSpace(ce.Output)) == 0 { - return true - } - // Some git versions may still emit "fatal: Needed a single revision" without --quiet; keep a defensive check. - msg := strings.ToLower(string(ce.Output)) - return strings.Contains(msg, "needed a single revision") || - strings.Contains(msg, "unknown revision") || - strings.Contains(msg, "not a valid object name") -} - -func isPathNotFoundErr(err error) bool { - ce, ok := err.(*CmdError) - if !ok { - return false - } - if ce.ExitCode == 128 || ce.ExitCode == 1 { - msg := strings.ToLower(string(ce.Output)) - // Common patterns: - // - "fatal: Path 'x' does not exist in 'HEAD'" - // - "fatal: invalid object name 'HEAD:x'" - // - "fatal: Needed a single revision" - // - "fatal: ambiguous argument '...': unknown revision or path not in the working tree." - if strings.Contains(msg, "does not exist in") || - strings.Contains(msg, "invalid object name") || - strings.Contains(msg, "needed a single revision") || - strings.Contains(msg, "unknown revision or path not in the working tree") { - return true - } - } - return false -} - -// ReadAllBytes is a small helper for read-path callers that want a whole object. -// This is not used by GitBlobstore.Get (which must support BlobRange), but it is useful in tests. -func ReadAllBytes(ctx context.Context, r *Runner, oid OID) ([]byte, error) { - rc, err := BlobReader(ctx, r, oid) - if err != nil { - return nil, err - } - defer rc.Close() - return io.ReadAll(rc) -} - -// NormalizeGitPlumbingError unwraps CmdError wrappers, returning the underlying error. -// Mostly useful for callers that want to compare against context cancellation. -func NormalizeGitPlumbingError(err error) error { - var ce *CmdError - if errors.As(err, &ce) && ce.Cause != nil { - return ce.Cause - } - return err -}