diff --git a/.gitignore b/.gitignore index b333592153c..1e6b24365d0 100644 --- a/.gitignore +++ b/.gitignore @@ -21,4 +21,7 @@ integration-tests/bats/batsee_results CLAUDE.md *~ -.dir-locals.el \ No newline at end of file +.dir-locals.el +.beads +.gitattributes + diff --git a/go/store/blobstore/git_blobstore.go b/go/store/blobstore/git_blobstore.go new file mode 100644 index 00000000000..c1e7055f2f3 --- /dev/null +++ b/go/store/blobstore/git_blobstore.go @@ -0,0 +1,217 @@ +// Copyright 2026 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package blobstore + +import ( + "context" + "fmt" + "io" + "strings" + + git "github.com/dolthub/dolt/go/store/blobstore/internal/git" +) + +// GitBlobstore is a Blobstore implementation backed by a git repository's object +// database (bare repo or .git directory). It stores keys as paths within the tree +// of the commit referenced by a git ref (e.g. refs/dolt/data). +// +// This initial implementation is intentionally READ-ONLY. Write-path methods +// (Put / CheckAndPut / Concatenate) return an explicit unimplemented error while +// we lock down read behavior for manifests and table files. +type GitBlobstore struct { + gitDir string + ref string + runner *git.Runner +} + +var _ Blobstore = (*GitBlobstore)(nil) + +// NewGitBlobstore creates a new read-only GitBlobstore rooted at |gitDir| and |ref|. +// |gitDir| should point at a bare repo directory or a .git directory. +func NewGitBlobstore(gitDir, ref string) (*GitBlobstore, error) { + r, err := git.NewRunner(gitDir) + if err != nil { + return nil, err + } + return &GitBlobstore{gitDir: gitDir, ref: ref, runner: r}, nil +} + +func (gbs *GitBlobstore) Path() string { + return fmt.Sprintf("%s@%s", gbs.gitDir, gbs.ref) +} + +func (gbs *GitBlobstore) Exists(ctx context.Context, key string) (bool, error) { + key, err := normalizeGitTreePath(key) + if err != nil { + return false, err + } + commit, ok, err := git.TryResolveRefCommit(ctx, gbs.runner, gbs.ref) + if err != nil { + return false, err + } + if !ok { + return false, nil + } + _, err = git.ResolvePathBlob(ctx, gbs.runner, commit, key) + if err != nil { + if git.IsPathNotFound(err) { + return false, nil + } + return false, err + } + return true, nil +} + +func (gbs *GitBlobstore) Get(ctx context.Context, key string, br BlobRange) (io.ReadCloser, uint64, string, error) { + key, err := normalizeGitTreePath(key) + if err != nil { + return nil, 0, "", err + } + commit, ok, err := git.TryResolveRefCommit(ctx, gbs.runner, gbs.ref) + if err != nil { + return nil, 0, "", err + } + if !ok { + // If the ref doesn't exist, treat the manifest as missing (empty store), + // but surface a hard error for other keys: the store itself is missing. + if key == "manifest" { + return nil, 0, "", NotFound{Key: key} + } + return nil, 0, "", &git.RefNotFoundError{Ref: gbs.ref} + } + + blobOID, err := git.ResolvePathBlob(ctx, gbs.runner, commit, key) + if err != nil { + if git.IsPathNotFound(err) { + return nil, 0, commit.String(), NotFound{Key: key} + } + return nil, 0, commit.String(), err + } + + sz, err := git.BlobSize(ctx, gbs.runner, blobOID) + if err != nil { + return nil, 0, commit.String(), err + } + + // TODO(gitblobstore): This streaming implementation is correct but may be slow for workloads + // that do many small ranged reads (e.g. table index/footer reads). Consider caching/materializing + // blobs to a local file (or using a batched git cat-file mode) to serve ranges efficiently. + rc, err := git.BlobReader(ctx, gbs.runner, blobOID) + if err != nil { + return nil, 0, commit.String(), err + } + + // Implement BlobRange by slicing the streamed blob contents. + if br.isAllRange() { + return rc, uint64(sz), commit.String(), nil + } + + pos := br.positiveRange(sz) + if pos.offset < 0 || pos.offset > sz { + _ = rc.Close() + return nil, uint64(sz), commit.String(), fmt.Errorf("invalid BlobRange offset %d for blob of size %d", pos.offset, sz) + } + if pos.length < 0 { + _ = rc.Close() + return nil, uint64(sz), commit.String(), fmt.Errorf("invalid BlobRange length %d", pos.length) + } + if pos.length == 0 { + // Read from offset to end. + pos.length = sz - pos.offset + } + // Clamp to end (defensive; positiveRange should already do this). + if pos.offset+pos.length > sz { + pos.length = sz - pos.offset + } + + // Skip to offset. + if pos.offset > 0 { + if _, err := io.CopyN(io.Discard, rc, pos.offset); err != nil { + _ = rc.Close() + return nil, uint64(sz), commit.String(), err + } + } + + return &limitReadCloser{r: io.LimitReader(rc, pos.length), c: rc}, uint64(sz), commit.String(), nil +} + +type limitReadCloser struct { + r io.Reader + c io.Closer +} + +func (l *limitReadCloser) Read(p []byte) (int, error) { return l.r.Read(p) } +func (l *limitReadCloser) Close() error { return l.c.Close() } + +func (gbs *GitBlobstore) Put(ctx context.Context, key string, totalSize int64, reader io.Reader) (string, error) { + if _, err := normalizeGitTreePath(key); err != nil { + return "", err + } + return "", fmt.Errorf("%w: GitBlobstore.Put", git.ErrUnimplemented) +} + +func (gbs *GitBlobstore) CheckAndPut(ctx context.Context, expectedVersion, key string, totalSize int64, reader io.Reader) (string, error) { + if _, err := normalizeGitTreePath(key); err != nil { + return "", err + } + return "", fmt.Errorf("%w: GitBlobstore.CheckAndPut", git.ErrUnimplemented) +} + +func (gbs *GitBlobstore) Concatenate(ctx context.Context, key string, sources []string) (string, error) { + if _, err := normalizeGitTreePath(key); err != nil { + return "", err + } + for _, src := range sources { + if _, err := normalizeGitTreePath(src); err != nil { + return "", err + } + } + return "", fmt.Errorf("%w: GitBlobstore.Concatenate", git.ErrUnimplemented) +} + +// normalizeGitTreePath normalizes and validates a blobstore key for use as a git tree path. +// +// Rules: +// - convert Windows-style separators: "\" -> "/" +// - disallow absolute paths (leading "/") +// - disallow empty segments and trailing "/" +// - disallow "." and ".." segments +// - disallow NUL bytes +func normalizeGitTreePath(key string) (string, error) { + if strings.ContainsRune(key, '\x00') { + return "", fmt.Errorf("invalid git blobstore key (NUL byte): %q", key) + } + key = strings.ReplaceAll(key, "\\", "/") + if key == "" { + return "", fmt.Errorf("invalid git blobstore key (empty)") + } + if strings.HasPrefix(key, "/") { + return "", fmt.Errorf("invalid git blobstore key (absolute path): %q", key) + } + + parts := strings.Split(key, "/") + for _, p := range parts { + if p == "" { + return "", fmt.Errorf("invalid git blobstore key (empty path segment): %q", key) + } + if p == "." || p == ".." { + return "", fmt.Errorf("invalid git blobstore key (path traversal): %q", key) + } + if strings.ContainsRune(p, '\x00') { + return "", fmt.Errorf("invalid git blobstore key (NUL byte): %q", key) + } + } + return key, nil +} diff --git a/go/store/blobstore/git_blobstore_test.go b/go/store/blobstore/git_blobstore_test.go new file mode 100644 index 00000000000..1d1b920f88a --- /dev/null +++ b/go/store/blobstore/git_blobstore_test.go @@ -0,0 +1,210 @@ +// Copyright 2026 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package blobstore + +import ( + "context" + "errors" + "os/exec" + "testing" + + "github.com/stretchr/testify/require" + + git "github.com/dolthub/dolt/go/store/blobstore/internal/git" + "github.com/dolthub/dolt/go/store/testutils/gitrepo" +) + +func TestGitBlobstore_RefMissingIsNotFound(t *testing.T) { + if _, err := exec.LookPath("git"); err != nil { + t.Skip("git not found on PATH") + } + + ctx := context.Background() + repo, err := gitrepo.InitBare(ctx, t.TempDir()+"/repo.git") + require.NoError(t, err) + + bs, err := NewGitBlobstore(repo.GitDir, "refs/dolt/data") + require.NoError(t, err) + + ok, err := bs.Exists(ctx, "manifest") + require.NoError(t, err) + require.False(t, ok) + + _, _, err = GetBytes(ctx, bs, "manifest", AllRange) + require.Error(t, err) + require.True(t, IsNotFoundError(err)) + + // For non-manifest keys, missing the ref is a hard error. + _, _, _, err = bs.Get(ctx, "table", AllRange) + require.Error(t, err) + require.False(t, IsNotFoundError(err)) + var rnf *git.RefNotFoundError + require.True(t, errors.As(err, &rnf)) +} + +func TestGitBlobstore_ExistsAndGet_AllRange(t *testing.T) { + if _, err := exec.LookPath("git"); err != nil { + t.Skip("git not found on PATH") + } + + ctx := context.Background() + repo, err := gitrepo.InitBare(ctx, t.TempDir()+"/repo.git") + require.NoError(t, err) + + want := []byte("hello manifest\n") + commit, err := repo.SetRefToTree(ctx, "refs/dolt/data", map[string][]byte{ + "manifest": want, + "dir/file": []byte("abc"), + }, "seed") + require.NoError(t, err) + + bs, err := NewGitBlobstore(repo.GitDir, "refs/dolt/data") + require.NoError(t, err) + + ok, err := bs.Exists(ctx, "manifest") + require.NoError(t, err) + require.True(t, ok) + + ok, err = bs.Exists(ctx, "missing") + require.NoError(t, err) + require.False(t, ok) + + // Validate key normalization: backslash -> slash. + ok, err = bs.Exists(ctx, "dir\\file") + require.NoError(t, err) + require.True(t, ok) + + got, ver, err := GetBytes(ctx, bs, "manifest", AllRange) + require.NoError(t, err) + require.Equal(t, commit, ver) + require.Equal(t, want, got) + + // Validate size + version on Get. + rc, sz, ver2, err := bs.Get(ctx, "manifest", NewBlobRange(0, 5)) + require.NoError(t, err) + require.Equal(t, uint64(len(want)), sz) + require.Equal(t, commit, ver2) + _ = rc.Close() +} + +func TestGitBlobstore_Get_NotFoundMissingKey(t *testing.T) { + if _, err := exec.LookPath("git"); err != nil { + t.Skip("git not found on PATH") + } + + ctx := context.Background() + repo, err := gitrepo.InitBare(ctx, t.TempDir()+"/repo.git") + require.NoError(t, err) + + _, err = repo.SetRefToTree(ctx, "refs/dolt/data", map[string][]byte{ + "present": []byte("x"), + }, "seed") + require.NoError(t, err) + + bs, err := NewGitBlobstore(repo.GitDir, "refs/dolt/data") + require.NoError(t, err) + + _, _, err = GetBytes(ctx, bs, "missing", AllRange) + require.Error(t, err) + require.True(t, IsNotFoundError(err)) +} + +func TestGitBlobstore_BlobRangeSemantics(t *testing.T) { + if _, err := exec.LookPath("git"); err != nil { + t.Skip("git not found on PATH") + } + + ctx := context.Background() + repo, err := gitrepo.InitBare(ctx, t.TempDir()+"/repo.git") + require.NoError(t, err) + + maxValue := int64(16 * 1024) + testData := rangeData(0, maxValue) + + commit, err := repo.SetRefToTree(ctx, "refs/dolt/data", map[string][]byte{ + "range": testData, + }, "range fixture") + require.NoError(t, err) + + bs, err := NewGitBlobstore(repo.GitDir, "refs/dolt/data") + require.NoError(t, err) + + // full range + got, ver, err := GetBytes(ctx, bs, "range", AllRange) + require.NoError(t, err) + require.Equal(t, commit, ver) + require.Equal(t, rangeData(0, maxValue), got) + + // first 2048 bytes (1024 shorts) + got, ver, err = GetBytes(ctx, bs, "range", NewBlobRange(0, 2048)) + require.NoError(t, err) + require.Equal(t, commit, ver) + require.Equal(t, rangeData(0, 1024), got) + + // bytes 2048..4096 of original + got, ver, err = GetBytes(ctx, bs, "range", NewBlobRange(2*1024, 2*1024)) + require.NoError(t, err) + require.Equal(t, commit, ver) + require.Equal(t, rangeData(1024, 2048), got) + + // last 2048 bytes + got, ver, err = GetBytes(ctx, bs, "range", NewBlobRange(-2*1024, 0)) + require.NoError(t, err) + require.Equal(t, commit, ver) + require.Equal(t, rangeData(maxValue-1024, maxValue), got) + + // tail slice: beginning 2048 bytes from end, size 512 + got, ver, err = GetBytes(ctx, bs, "range", NewBlobRange(-2*1024, 512)) + require.NoError(t, err) + require.Equal(t, commit, ver) + require.Equal(t, rangeData(maxValue-1024, maxValue-768), got) +} + +func TestGitBlobstore_InvalidKeysError(t *testing.T) { + if _, err := exec.LookPath("git"); err != nil { + t.Skip("git not found on PATH") + } + + ctx := context.Background() + repo, err := gitrepo.InitBare(ctx, t.TempDir()+"/repo.git") + require.NoError(t, err) + + _, err = repo.SetRefToTree(ctx, "refs/dolt/data", map[string][]byte{"ok": []byte("x")}, "seed") + require.NoError(t, err) + + bs, err := NewGitBlobstore(repo.GitDir, "refs/dolt/data") + require.NoError(t, err) + + invalid := []string{ + "", + "/abs", + "../x", + "a/../b", + "a//b", + "a/", + ".", + "..", + "a/./b", + "a/\x00/b", + } + + for _, k := range invalid { + _, err := bs.Exists(ctx, k) + require.Error(t, err, "expected error for key %q", k) + + _, _, _, err = bs.Get(ctx, k, AllRange) + require.Error(t, err, "expected error for key %q", k) + } +} diff --git a/go/store/blobstore/internal/git/errors.go b/go/store/blobstore/internal/git/errors.go new file mode 100644 index 00000000000..e72bd28977c --- /dev/null +++ b/go/store/blobstore/internal/git/errors.go @@ -0,0 +1,67 @@ +// Copyright 2026 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package git + +import ( + "errors" + "fmt" +) + +// ErrUnimplemented is returned by stubbed write-path APIs. It is intentionally +// exported so higher layers (e.g. GitBlobstore) can wrap or match it. +var ErrUnimplemented = errors.New("unimplemented") + +// RefNotFoundError indicates that a ref (e.g. refs/dolt/data) could not be resolved. +type RefNotFoundError struct { + Ref string +} + +func (e *RefNotFoundError) Error() string { + return fmt.Sprintf("git ref not found: %s", e.Ref) +} + +// PathNotFoundError indicates that a tree path could not be resolved within a commit. +type PathNotFoundError struct { + Commit string + Path string +} + +func (e *PathNotFoundError) Error() string { + return fmt.Sprintf("git path not found: %s:%s", e.Commit, e.Path) +} + +// NotBlobError indicates that a resolved path did not refer to a blob object. +type NotBlobError struct { + Commit string + Path string + Type string +} + +func (e *NotBlobError) Error() string { + if e.Type == "" { + return fmt.Sprintf("git path is not a blob: %s:%s", e.Commit, e.Path) + } + return fmt.Sprintf("git path is not a blob (%s): %s:%s", e.Type, e.Commit, e.Path) +} + +func IsRefNotFound(err error) bool { + var e *RefNotFoundError + return errors.As(err, &e) +} + +func IsPathNotFound(err error) bool { + var e *PathNotFoundError + return errors.As(err, &e) +} diff --git a/go/store/blobstore/internal/git/read.go b/go/store/blobstore/internal/git/read.go new file mode 100644 index 00000000000..4a6b16f09ab --- /dev/null +++ b/go/store/blobstore/internal/git/read.go @@ -0,0 +1,176 @@ +// Copyright 2026 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package git + +import ( + "bytes" + "context" + "errors" + "fmt" + "io" + "strconv" + "strings" +) + +// OID is a git object id in hex (typically 40-char SHA1). +type OID string + +func (o OID) String() string { return string(o) } + +// TryResolveRefCommit resolves |ref| to a commit OID. Returns ok=false if the ref does not exist. +func TryResolveRefCommit(ctx context.Context, r *Runner, ref string) (oid OID, ok bool, err error) { + out, err := r.Run(ctx, RunOptions{}, "rev-parse", "--verify", "--quiet", ref+"^{commit}") + if err == nil { + s := strings.TrimSpace(string(out)) + if s == "" { + // Shouldn't happen, but treat as missing. + return "", false, nil + } + return OID(s), true, nil + } + + if isRefNotFoundErr(err) { + return "", false, nil + } + return "", false, err +} + +// ResolveRefCommit resolves |ref| to a commit OID. +func ResolveRefCommit(ctx context.Context, r *Runner, ref string) (OID, error) { + oid, ok, err := TryResolveRefCommit(ctx, r, ref) + if err != nil { + return "", err + } + if !ok { + return "", &RefNotFoundError{Ref: ref} + } + return oid, nil +} + +// ResolvePathBlob resolves |path| within |commit| to a blob OID. +// It returns PathNotFoundError if the path does not exist, and NotBlobError if the +// path resolves to a non-blob object (e.g. a tree). +func ResolvePathBlob(ctx context.Context, r *Runner, commit OID, path string) (OID, error) { + spec := commit.String() + ":" + path + out, err := r.Run(ctx, RunOptions{}, "rev-parse", "--verify", spec) + if err != nil { + if isPathNotFoundErr(err) { + return "", &PathNotFoundError{Commit: commit.String(), Path: path} + } + return "", err + } + oid := strings.TrimSpace(string(out)) + if oid == "" { + return "", fmt.Errorf("git rev-parse returned empty oid for %q", spec) + } + + typ, err := CatFileType(ctx, r, OID(oid)) + if err != nil { + return "", err + } + if typ != "blob" { + return "", &NotBlobError{Commit: commit.String(), Path: path, Type: typ} + } + return OID(oid), nil +} + +// CatFileType returns the git object type for |oid| (e.g. "blob", "tree", "commit"). +func CatFileType(ctx context.Context, r *Runner, oid OID) (string, error) { + out, err := r.Run(ctx, RunOptions{}, "cat-file", "-t", oid.String()) + if err != nil { + return "", err + } + return strings.TrimSpace(string(out)), nil +} + +// BlobSize returns the size in bytes of the blob object |oid|. +func BlobSize(ctx context.Context, r *Runner, oid OID) (int64, error) { + out, err := r.Run(ctx, RunOptions{}, "cat-file", "-s", oid.String()) + if err != nil { + return 0, err + } + s := strings.TrimSpace(string(out)) + n, err := strconv.ParseInt(s, 10, 64) + if err != nil { + return 0, fmt.Errorf("git cat-file -s parse error (%q): %w", s, err) + } + return n, nil +} + +// BlobReader returns a reader for blob contents. The returned ReadCloser will wait for +// the git process to exit when closed, returning a CmdError if the process fails. +func BlobReader(ctx context.Context, r *Runner, oid OID) (io.ReadCloser, error) { + rc, _, err := r.Start(ctx, RunOptions{}, "cat-file", "blob", oid.String()) + return rc, err +} + +func isRefNotFoundErr(err error) bool { + ce, ok := err.(*CmdError) + if !ok { + return false + } + // For `git rev-parse --verify --quiet ^{commit}`, a missing ref typically yields exit 1 and no output. + if ce.ExitCode == 1 && len(bytes.TrimSpace(ce.Output)) == 0 { + return true + } + // Some git versions may still emit "fatal: Needed a single revision" without --quiet; keep a defensive check. + msg := strings.ToLower(string(ce.Output)) + return strings.Contains(msg, "needed a single revision") || + strings.Contains(msg, "unknown revision") || + strings.Contains(msg, "not a valid object name") +} + +func isPathNotFoundErr(err error) bool { + ce, ok := err.(*CmdError) + if !ok { + return false + } + if ce.ExitCode == 128 || ce.ExitCode == 1 { + msg := strings.ToLower(string(ce.Output)) + // Common patterns: + // - "fatal: Path 'x' does not exist in 'HEAD'" + // - "fatal: invalid object name 'HEAD:x'" + // - "fatal: Needed a single revision" + // - "fatal: ambiguous argument '...': unknown revision or path not in the working tree." + if strings.Contains(msg, "does not exist in") || + strings.Contains(msg, "invalid object name") || + strings.Contains(msg, "needed a single revision") || + strings.Contains(msg, "unknown revision or path not in the working tree") { + return true + } + } + return false +} + +// ReadAllBytes is a small helper for read-path callers that want a whole object. +// This is not used by GitBlobstore.Get (which must support BlobRange), but it is useful in tests. +func ReadAllBytes(ctx context.Context, r *Runner, oid OID) ([]byte, error) { + rc, err := BlobReader(ctx, r, oid) + if err != nil { + return nil, err + } + defer rc.Close() + return io.ReadAll(rc) +} + +// NormalizeGitPlumbingError unwraps CmdError wrappers, returning the underlying error. +// Mostly useful for callers that want to compare against context cancellation. +func NormalizeGitPlumbingError(err error) error { + var ce *CmdError + if errors.As(err, &ce) && ce.Cause != nil { + return ce.Cause + } + return err +} diff --git a/go/store/blobstore/internal/git/runner.go b/go/store/blobstore/internal/git/runner.go new file mode 100644 index 00000000000..02a4353709b --- /dev/null +++ b/go/store/blobstore/internal/git/runner.go @@ -0,0 +1,255 @@ +// Copyright 2026 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package git provides helpers for invoking git plumbing commands against a bare +// repository or .git directory without a working tree checkout. +package git + +import ( + "bytes" + "context" + "errors" + "fmt" + "io" + "os" + "os/exec" + "strings" +) + +const maxCapturedOutputBytes = 64 * 1024 + +// Runner executes git commands with GIT_DIR set (and optionally GIT_INDEX_FILE). +// It is intended for git plumbing usage and should not require a working tree. +type Runner struct { + gitPath string + gitDir string + // extraEnv is appended to os.Environ() for every command. + extraEnv []string +} + +// NewRunner creates a Runner using the git binary on PATH. +func NewRunner(gitDir string) (*Runner, error) { + p, err := exec.LookPath("git") + if err != nil { + return nil, fmt.Errorf("git not found on PATH: %w", err) + } + return NewRunnerWithGitPath(gitDir, p), nil +} + +// NewRunnerWithGitPath creates a Runner using an explicit git binary path. +func NewRunnerWithGitPath(gitDir, gitPath string) *Runner { + return &Runner{ + gitPath: gitPath, + gitDir: gitDir, + } +} + +// WithExtraEnv returns a copy of r that appends env entries (e.g. "K=V") to all commands. +func (r *Runner) WithExtraEnv(env ...string) *Runner { + cp := *r + cp.extraEnv = append(append([]string(nil), r.extraEnv...), env...) + return &cp +} + +// RunOptions control a single git invocation. +type RunOptions struct { + // Dir is the working directory for the git process. Optional. + Dir string + // IndexFile sets GIT_INDEX_FILE for the git process. Optional. + IndexFile string + // Stdin provides stdin to the git process. Optional. + Stdin io.Reader + // Stdout and Stderr override output destinations. If both are nil, output is captured and returned. + Stdout io.Writer + Stderr io.Writer + // Env is appended to the process environment. + Env []string +} + +// CmdError represents a failed git invocation with captured output. +type CmdError struct { + Args []string + Dir string + ExitCode int + Output []byte + Cause error +} + +func (e *CmdError) Error() string { + var b strings.Builder + b.WriteString("git command failed") + if e.ExitCode != 0 { + b.WriteString(fmt.Sprintf(" (exit %d)", e.ExitCode)) + } + if len(e.Args) > 0 { + b.WriteString("\ncommand: git ") + b.WriteString(strings.Join(e.Args, " ")) + } + if e.Dir != "" { + b.WriteString("\ndir: ") + b.WriteString(e.Dir) + } + b.WriteString("\noutput:\n") + b.WriteString(formatOutput(e.Output)) + if e.Cause != nil { + b.WriteString("\nerror: ") + b.WriteString(e.Cause.Error()) + } + return b.String() +} + +func (e *CmdError) Unwrap() error { return e.Cause } + +// Run executes "git " with GIT_DIR set and returns captured combined output +// when Stdout/Stderr are not supplied. +func (r *Runner) Run(ctx context.Context, opts RunOptions, args ...string) ([]byte, error) { + cmd := exec.CommandContext(ctx, r.gitPath, args...) //nolint:gosec // args are controlled by caller; used for internal plumbing. + if opts.Dir != "" { + cmd.Dir = opts.Dir + } + cmd.Env = r.env(opts) + + if opts.Stdin != nil { + cmd.Stdin = opts.Stdin + } + + // Capture combined output unless caller provided destinations. + var buf bytes.Buffer + if opts.Stdout == nil && opts.Stderr == nil { + cmd.Stdout = &buf + cmd.Stderr = &buf + } else { + if opts.Stdout != nil { + cmd.Stdout = opts.Stdout + } + if opts.Stderr != nil { + cmd.Stderr = opts.Stderr + } else if opts.Stdout != nil { + // Reasonable default: if only Stdout is set, send stderr there too. + cmd.Stderr = opts.Stdout + } + } + + err := cmd.Run() + out := buf.Bytes() + if err == nil { + return out, nil + } + + exitCode := 0 + var ee *exec.ExitError + if errors.As(err, &ee) { + exitCode = ee.ExitCode() + } + return out, &CmdError{ + Args: append([]string(nil), args...), + Dir: cmd.Dir, + ExitCode: exitCode, + Output: out, + Cause: err, + } +} + +// Start starts "git " and returns a ReadCloser for stdout. +// +// Resource management: +// - Call Close() on the returned ReadCloser to ensure the underlying git process +// is waited (cmd.Wait()) and resources are released. +// - The returned *exec.Cmd is provided for advanced uses (e.g. signals), but most +// callers should not call Wait() directly. +func (r *Runner) Start(ctx context.Context, opts RunOptions, args ...string) (io.ReadCloser, *exec.Cmd, error) { + cmd := exec.CommandContext(ctx, r.gitPath, args...) //nolint:gosec // args are controlled by caller; used for internal plumbing. + if opts.Dir != "" { + cmd.Dir = opts.Dir + } + cmd.Env = r.env(opts) + if opts.Stdin != nil { + cmd.Stdin = opts.Stdin + } + + stdout, err := cmd.StdoutPipe() + if err != nil { + return nil, nil, err + } + // Capture stderr into a buffer so failures have actionable output. + var stderr bytes.Buffer + cmd.Stderr = &stderr + + if err := cmd.Start(); err != nil { + _ = stdout.Close() + return nil, nil, err + } + + // Wrap stdout so that Close also waits to avoid zombies if callers bail early. + rc := &cmdReadCloser{ + r: stdout, + cmd: cmd, + stderr: &stderr, + args: append([]string(nil), args...), + dir: cmd.Dir, + } + return rc, cmd, nil +} + +type cmdReadCloser struct { + r io.ReadCloser + cmd *exec.Cmd + stderr *bytes.Buffer + args []string + dir string +} + +func (c *cmdReadCloser) Read(p []byte) (int, error) { return c.r.Read(p) } + +func (c *cmdReadCloser) Close() error { + _ = c.r.Close() + err := c.cmd.Wait() + if err == nil { + return nil + } + exitCode := 0 + var ee *exec.ExitError + if errors.As(err, &ee) { + exitCode = ee.ExitCode() + } + return &CmdError{ + Args: c.args, + Dir: c.dir, + ExitCode: exitCode, + Output: c.stderr.Bytes(), + Cause: err, + } +} + +func (r *Runner) env(opts RunOptions) []string { + env := append([]string(nil), os.Environ()...) + env = append(env, "GIT_DIR="+r.gitDir) + if opts.IndexFile != "" { + env = append(env, "GIT_INDEX_FILE="+opts.IndexFile) + } + env = append(env, r.extraEnv...) + env = append(env, opts.Env...) + return env +} + +func formatOutput(out []byte) string { + if len(out) == 0 { + return "(no output)" + } + if len(out) <= maxCapturedOutputBytes { + return strings.TrimRight(string(out), "\n") + } + trimmed := out[len(out)-maxCapturedOutputBytes:] + return fmt.Sprintf("... (truncated; showing last %d bytes)\n%s", maxCapturedOutputBytes, strings.TrimRight(string(trimmed), "\n")) +} diff --git a/go/store/blobstore/internal/git/write.go b/go/store/blobstore/internal/git/write.go new file mode 100644 index 00000000000..a3d7a09c1ac --- /dev/null +++ b/go/store/blobstore/internal/git/write.go @@ -0,0 +1,104 @@ +// Copyright 2026 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package git + +import ( + "context" + "fmt" +) + +// WriteAPI defines the git plumbing operations needed for Approach A (temporary index +// via GIT_INDEX_FILE) to perform updates without a working tree checkout. +// +// This file intentionally does not implement these operations yet; the current +// GitBlobstore milestone is read-only. All methods on the default implementation +// return ErrUnimplemented. +type WriteAPI interface { + // ReadTree populates |indexFile| with the entries from |commit|'s root tree. + // Equivalent plumbing: + // GIT_DIR=... GIT_INDEX_FILE= git read-tree ^{tree} + ReadTree(ctx context.Context, commit OID, indexFile string) error + + // ReadTreeEmpty initializes |indexFile| to an empty index. + // Equivalent plumbing: + // GIT_DIR=... GIT_INDEX_FILE= git read-tree --empty + ReadTreeEmpty(ctx context.Context, indexFile string) error + + // UpdateIndexCacheInfo adds or replaces |path| in |indexFile| with the given blob |oid| and filemode. + // Equivalent plumbing: + // GIT_DIR=... GIT_INDEX_FILE= git update-index --add --cacheinfo + UpdateIndexCacheInfo(ctx context.Context, indexFile string, mode string, oid OID, path string) error + + // WriteTree writes a tree object from the contents of |indexFile| and returns its oid. + // Equivalent plumbing: + // GIT_DIR=... GIT_INDEX_FILE= git write-tree + WriteTree(ctx context.Context, indexFile string) (OID, error) + + // CommitTree creates a commit object from |tree| with optional |parent| and returns its oid. + // Equivalent plumbing: + // GIT_DIR=... git commit-tree [-p ] -m + CommitTree(ctx context.Context, tree OID, parent *OID, message string, author *Identity) (OID, error) + + // UpdateRefCAS atomically updates |ref| from |old| to |new|. + // Equivalent plumbing: + // GIT_DIR=... git update-ref -m + UpdateRefCAS(ctx context.Context, ref string, newOID OID, oldOID OID, msg string) error + + // UpdateRef updates |ref| to |new| without a compare-and-swap. + // Equivalent plumbing: + // GIT_DIR=... git update-ref -m + UpdateRef(ctx context.Context, ref string, newOID OID, msg string) error +} + +// Identity represents git author/committer metadata. A future implementation +// may set this via environment variables (GIT_AUTHOR_NAME, etc.). +type Identity struct { + Name string + Email string +} + +// UnimplementedWriteAPI is the default write API for the read-only milestone. +// It can be embedded or returned by constructors to make write paths fail fast. +type UnimplementedWriteAPI struct{} + +var _ WriteAPI = UnimplementedWriteAPI{} + +func (UnimplementedWriteAPI) ReadTree(ctx context.Context, commit OID, indexFile string) error { + return fmt.Errorf("%w: ReadTree", ErrUnimplemented) +} + +func (UnimplementedWriteAPI) ReadTreeEmpty(ctx context.Context, indexFile string) error { + return fmt.Errorf("%w: ReadTreeEmpty", ErrUnimplemented) +} + +func (UnimplementedWriteAPI) UpdateIndexCacheInfo(ctx context.Context, indexFile string, mode string, oid OID, path string) error { + return fmt.Errorf("%w: UpdateIndexCacheInfo", ErrUnimplemented) +} + +func (UnimplementedWriteAPI) WriteTree(ctx context.Context, indexFile string) (OID, error) { + return "", fmt.Errorf("%w: WriteTree", ErrUnimplemented) +} + +func (UnimplementedWriteAPI) CommitTree(ctx context.Context, tree OID, parent *OID, message string, author *Identity) (OID, error) { + return "", fmt.Errorf("%w: CommitTree", ErrUnimplemented) +} + +func (UnimplementedWriteAPI) UpdateRefCAS(ctx context.Context, ref string, newOID OID, oldOID OID, msg string) error { + return fmt.Errorf("%w: UpdateRefCAS", ErrUnimplemented) +} + +func (UnimplementedWriteAPI) UpdateRef(ctx context.Context, ref string, newOID OID, msg string) error { + return fmt.Errorf("%w: UpdateRef", ErrUnimplemented) +} diff --git a/go/store/nbs/git_blobstore_read_smoke_test.go b/go/store/nbs/git_blobstore_read_smoke_test.go new file mode 100644 index 00000000000..590f39588df --- /dev/null +++ b/go/store/nbs/git_blobstore_read_smoke_test.go @@ -0,0 +1,110 @@ +// Copyright 2026 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package nbs + +import ( + "bytes" + "context" + "io" + "os/exec" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/dolthub/dolt/go/store/blobstore" + "github.com/dolthub/dolt/go/store/hash" + "github.com/dolthub/dolt/go/store/testutils/gitrepo" + "github.com/dolthub/dolt/go/store/types" +) + +func TestGitBlobstoreReadSmoke_ManifestAndTableAccessPatterns(t *testing.T) { + if _, err := exec.LookPath("git"); err != nil { + t.Skip("git not found on PATH") + } + + ctx := context.Background() + repo, err := gitrepo.InitBare(ctx, t.TempDir()+"/repo.git") + require.NoError(t, err) + + // Seed a valid v5 manifest with no tables. This should allow NBS to open + // without triggering any write paths. + mc := manifestContents{ + nbfVers: types.Format_DOLT.VersionString(), + lock: hash.Of([]byte("lock")), + root: hash.Of([]byte("root")), + gcGen: hash.Of([]byte("gcgen")), + specs: nil, + } + var buf bytes.Buffer + require.NoError(t, writeManifest(&buf, mc)) + + // Seed a "table-like" blob to exercise the same access patterns NBS uses: + // - tail reads via negative BlobRange offsets + // - ReadAt-style ranged reads (ReadAtWithStats) + table := make([]byte, 64*1024) + for i := range table { + table[i] = byte(i % 251) + } + + commit, err := repo.SetRefToTree(ctx, "refs/dolt/data", map[string][]byte{ + "manifest": buf.Bytes(), + "table": table, + }, "seed refs/dolt/data for smoke test") + require.NoError(t, err) + require.NotEmpty(t, commit) + + bs, err := blobstore.NewGitBlobstore(repo.GitDir, "refs/dolt/data") + require.NoError(t, err) + + // 1) Manifest read path via blobstoreManifest.ParseIfExists. + stats := NewStats() + exists, got, err := blobstoreManifest{bs: bs}.ParseIfExists(ctx, stats, nil) + require.NoError(t, err) + require.True(t, exists) + require.Equal(t, mc.nbfVers, got.nbfVers) + require.Equal(t, mc.root, got.root) + require.Equal(t, mc.lock, got.lock) + require.Equal(t, mc.gcGen, got.gcGen) + require.Len(t, got.specs, 0) + + // 2) Tail-read pattern used by table index/footer loads: + // bs.Get(key, NewBlobRange(-N, 0)) and io.ReadFull. + const tailN = 1024 + rc, totalSz, ver, err := bs.Get(ctx, "table", blobstore.NewBlobRange(-tailN, 0)) + require.NoError(t, err) + require.Equal(t, uint64(len(table)), totalSz) + require.Equal(t, commit, ver) + tail := make([]byte, tailN) + _, err = io.ReadFull(rc, tail) + require.NoError(t, err) + require.NoError(t, rc.Close()) + require.Equal(t, table[len(table)-tailN:], tail) + + // 3) ReadAt-style ranged reads used by table readers. + tr := &bsTableReaderAt{bs: bs, key: "table"} + out := make([]byte, 4096) + n, err := tr.ReadAtWithStats(ctx, out, 1234, stats) + require.NoError(t, err) + require.Equal(t, len(out), n) + require.Equal(t, table[1234:1234+int64(len(out))], out) + + // Near-end reads should return short read without error. + out2 := make([]byte, 4096) + start := int64(len(table) - 100) + n, err = tr.ReadAtWithStats(ctx, out2, start, stats) + require.NoError(t, err) + require.Equal(t, 100, n) + require.Equal(t, table[start:], out2[:n]) +} diff --git a/go/store/testutils/gitrepo/gitrepo.go b/go/store/testutils/gitrepo/gitrepo.go new file mode 100644 index 00000000000..31113c356d1 --- /dev/null +++ b/go/store/testutils/gitrepo/gitrepo.go @@ -0,0 +1,211 @@ +// Copyright 2026 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package gitrepo contains test helpers for creating and manipulating git repositories +// using plumbing commands without requiring a working tree checkout. +// +// This package is intended for tests of GitBlobstore and related read paths. It +// deliberately uses the git CLI (not a Go git library) to keep the harness small +// and to match how the initial GitBlobstore implementation interacts with git. +package gitrepo + +import ( + "bytes" + "context" + "fmt" + "os" + "os/exec" + "path/filepath" + "sort" + "strings" +) + +// Repo is a test-only handle to a bare git repository (its directory is the GIT_DIR). +type Repo struct { + // GitDir is the path to the bare repository directory. + GitDir string +} + +// InitBare initializes a new bare git repository at |dir|. +// For portability across git versions, callers should generally pass a path that +// does not exist yet. +func InitBare(ctx context.Context, dir string) (*Repo, error) { + if err := runGit(ctx, "", "", "", "init", "--bare", dir); err != nil { + return nil, err + } + return &Repo{GitDir: dir}, nil +} + +// InitBareTemp creates and initializes a new bare git repository under |parentDir| +// (or os.TempDir if empty). +func InitBareTemp(ctx context.Context, parentDir string) (*Repo, error) { + if parentDir == "" { + parentDir = os.TempDir() + } + dir, err := os.MkdirTemp(parentDir, "gitrepo-bare-") + if err != nil { + return nil, err + } + // git init --bare expects the target directory to not exist in some versions; + // to avoid that, create a child directory. + bareDir := filepath.Join(dir, "repo.git") + return InitBare(ctx, bareDir) +} + +// SetRefToTree writes a commit whose tree contains |files| and updates |ref| to point at it. +// This is done without a working tree checkout using a temporary index (GIT_INDEX_FILE). +// +// - |ref| example: "refs/dolt/data" +// - |files| keys are tree paths (e.g. "manifest", "a/b/c") +// - |message| becomes the commit message (defaults to "test commit" if empty) +func (r *Repo) SetRefToTree(ctx context.Context, ref string, files map[string][]byte, message string) (commitOID string, err error) { + if message == "" { + message = "test commit" + } + + indexDir, err := os.MkdirTemp("", "gitrepo-index-") + if err != nil { + return "", err + } + defer func() { + _ = os.RemoveAll(indexDir) + }() + + indexFile := filepath.Join(indexDir, "index") + + // Empty index. + if err := runGit(ctx, r.GitDir, indexFile, "", "read-tree", "--empty"); err != nil { + return "", err + } + + // Add paths. Sort for determinism. + paths := make([]string, 0, len(files)) + for p := range files { + paths = append(paths, p) + } + sort.Strings(paths) + + for _, p := range paths { + oid, err := hashObject(ctx, r.GitDir, files[p]) + if err != nil { + return "", err + } + if err := runGit(ctx, r.GitDir, indexFile, "", "update-index", "--add", "--cacheinfo", "100644", oid, p); err != nil { + return "", err + } + } + + treeOID, err := outputGit(ctx, r.GitDir, indexFile, nil, "write-tree") + if err != nil { + return "", err + } + treeOID = strings.TrimSpace(treeOID) + if treeOID == "" { + return "", fmt.Errorf("write-tree returned empty oid") + } + + commitOID, err = outputGit(ctx, r.GitDir, "", commitEnv(), "commit-tree", treeOID, "-m", message) + if err != nil { + return "", err + } + commitOID = strings.TrimSpace(commitOID) + if commitOID == "" { + return "", fmt.Errorf("commit-tree returned empty oid") + } + + if err := runGit(ctx, r.GitDir, "", "", "update-ref", ref, commitOID); err != nil { + return "", err + } + return commitOID, nil +} + +func commitEnv() []string { + // Deterministic-ish author/committer identity for tests. + return []string{ + "GIT_AUTHOR_NAME=gitrepo test", + "GIT_AUTHOR_EMAIL=gitrepo@test.invalid", + "GIT_COMMITTER_NAME=gitrepo test", + "GIT_COMMITTER_EMAIL=gitrepo@test.invalid", + } +} + +func hashObject(ctx context.Context, gitDir string, data []byte) (string, error) { + out, err := outputGitWithStdin(ctx, gitDir, "", "", bytes.NewReader(data), "hash-object", "-w", "--stdin") + if err != nil { + return "", err + } + oid := strings.TrimSpace(out) + if oid == "" { + return "", fmt.Errorf("hash-object returned empty oid") + } + return oid, nil +} + +func runGit(ctx context.Context, gitDir, indexFile string, extraEnv string, args ...string) error { + _, err := outputGit(ctx, gitDir, indexFile, splitEnv(extraEnv), args...) + return err +} + +func outputGit(ctx context.Context, gitDir, indexFile string, extraEnv []string, args ...string) (string, error) { + cmd := exec.CommandContext(ctx, "git", args...) //nolint:gosec // test harness invokes git with controlled args. + cmd.Env = envForGit(gitDir, indexFile, extraEnv) + var buf bytes.Buffer + cmd.Stdout = &buf + cmd.Stderr = &buf + if err := cmd.Run(); err != nil { + return "", fmt.Errorf("%w\ncommand: %s\noutput:\n%s", err, cmd.String(), strings.TrimRight(buf.String(), "\n")) + } + return buf.String(), nil +} + +func outputGitWithStdin(ctx context.Context, gitDir, indexFile string, extraEnv string, stdin *bytes.Reader, args ...string) (string, error) { + cmd := exec.CommandContext(ctx, "git", args...) //nolint:gosec // test harness invokes git with controlled args. + cmd.Env = envForGit(gitDir, indexFile, splitEnv(extraEnv)) + cmd.Stdin = stdin + var buf bytes.Buffer + cmd.Stdout = &buf + cmd.Stderr = &buf + if err := cmd.Run(); err != nil { + return "", fmt.Errorf("%w\ncommand: %s\noutput:\n%s", err, cmd.String(), strings.TrimRight(buf.String(), "\n")) + } + return buf.String(), nil +} + +func envForGit(gitDir, indexFile string, extra []string) []string { + env := append([]string(nil), os.Environ()...) + if gitDir != "" { + env = append(env, "GIT_DIR="+gitDir) + } + if indexFile != "" { + env = append(env, "GIT_INDEX_FILE="+indexFile) + } + env = append(env, extra...) + return env +} + +func splitEnv(extraEnv string) []string { + if extraEnv == "" { + return nil + } + // Allow callers to pass "K=V\nK2=V2" style strings. + lines := strings.Split(extraEnv, "\n") + out := lines[:0] + for _, l := range lines { + l = strings.TrimSpace(l) + if l != "" { + out = append(out, l) + } + } + return out +} diff --git a/go/store/testutils/gitrepo/gitrepo_test.go b/go/store/testutils/gitrepo/gitrepo_test.go new file mode 100644 index 00000000000..b7438712c31 --- /dev/null +++ b/go/store/testutils/gitrepo/gitrepo_test.go @@ -0,0 +1,57 @@ +// Copyright 2026 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package gitrepo + +import ( + "context" + "os/exec" + "path/filepath" + "strings" + "testing" +) + +func TestInitBareAndSetRefToTree(t *testing.T) { + if _, err := exec.LookPath("git"); err != nil { + t.Skip("git not found on PATH") + } + + ctx := context.Background() + root := t.TempDir() + bareDir := filepath.Join(root, "repo.git") + + repo, err := InitBare(ctx, bareDir) + if err != nil { + t.Fatalf("InitBare failed: %v", err) + } + + commit, err := repo.SetRefToTree(ctx, "refs/dolt/data", map[string][]byte{ + "manifest": []byte("hello\n"), + "dir/file": []byte("abc"), + "dir/file2": []byte("def"), + "dir2/x.txt": []byte("xyz"), + }, "seed refs/dolt/data") + if err != nil { + t.Fatalf("SetRefToTree failed: %v", err) + } + if len(strings.TrimSpace(commit)) == 0 { + t.Fatalf("expected non-empty commit oid") + } + + // Validate the path exists in the commit. + cmd := exec.CommandContext(ctx, "git", "--git-dir", repo.GitDir, "cat-file", "-e", commit+":manifest") //nolint:gosec + if out, err := cmd.CombinedOutput(); err != nil { + t.Fatalf("cat-file -e failed: %v\n%s", err, string(out)) + } +} diff --git a/go/utils/repofmt/format_repo.sh b/go/utils/repofmt/format_repo.sh index 64bf3292d88..6a0edd26751 100755 --- a/go/utils/repofmt/format_repo.sh +++ b/go/utils/repofmt/format_repo.sh @@ -10,7 +10,7 @@ paths=`find . -maxdepth 1 -mindepth 1 \( -type d -print -o -type f -name '*.go' goimports -w -local github.com/dolthub/dolt,github.com/dolthub/eventsapi_schema $paths bad_files=$(find $paths -name '*.go' | while read f; do - if [[ $(awk '/import \(/{flag=1;next}/\)/{flag=0}flag' < $f | egrep -c '$^') -gt 2 ]]; then + if [[ $(awk '/import \(/{flag=1;next}/\)/{flag=0}flag' < $f | grep -Ec '$^') -gt 2 ]]; then echo $f fi done)