From a33f7cb26b73bd639b2dd6729ae1646f4020827f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?coffeegoddd=E2=98=95=EF=B8=8F=E2=9C=A8?= Date: Tue, 10 Feb 2026 16:41:28 -0800 Subject: [PATCH 01/20] /go/{cmd,libraries}: dbfactory stuff and url stuff --- go/cmd/dolt/commands/clone.go | 7 +- go/cmd/dolt/commands/read_tables.go | 6 +- go/libraries/doltcore/dbfactory/factory.go | 28 +- go/libraries/doltcore/dbfactory/git_remote.go | 239 ++++++++++++++++++ .../doltcore/dbfactory/git_remote_test.go | 89 +++++++ go/libraries/doltcore/env/git_remote_url.go | 185 ++++++++++++++ .../doltcore/env/git_remote_url_test.go | 67 +++++ go/libraries/doltcore/env/remotes.go | 6 + .../doltcore/sqle/dprocedures/dolt_clone.go | 6 +- 9 files changed, 620 insertions(+), 13 deletions(-) create mode 100644 go/libraries/doltcore/dbfactory/git_remote.go create mode 100644 go/libraries/doltcore/dbfactory/git_remote_test.go create mode 100644 go/libraries/doltcore/env/git_remote_url.go create mode 100644 go/libraries/doltcore/env/git_remote_url_test.go diff --git a/go/cmd/dolt/commands/clone.go b/go/cmd/dolt/commands/clone.go index 3381382d608..00bf311527c 100644 --- a/go/cmd/dolt/commands/clone.go +++ b/go/cmd/dolt/commands/clone.go @@ -187,9 +187,12 @@ func parseArgs(apr *argparser.ArgParseResults) (string, string, errhand.VerboseE urlStr := apr.Arg(0) _, err := earl.Parse(urlStr) - if err != nil { - return "", "", errhand.BuildDError("error: invalid remote url: %s", urlStr).Build() + if normalized, ok, nerr := env.NormalizeGitRemoteUrl(urlStr); nerr == nil && ok { + urlStr = normalized + } else { + return "", "", errhand.BuildDError("error: invalid remote url: %s", urlStr).Build() + } } var dir string diff --git a/go/cmd/dolt/commands/read_tables.go b/go/cmd/dolt/commands/read_tables.go index 80faa375995..06e31a3a392 100644 --- a/go/cmd/dolt/commands/read_tables.go +++ b/go/cmd/dolt/commands/read_tables.go @@ -99,7 +99,11 @@ func (cmd ReadTablesCmd) Exec(ctx context.Context, commandStr string, args []str _, err := earl.Parse(urlStr) if err != nil { - return HandleVErrAndExitCode(errhand.BuildDError("Invalid remote url").AddCause(err).Build(), usage) + if normalized, ok, nerr := env.NormalizeGitRemoteUrl(urlStr); nerr == nil && ok { + urlStr = normalized + } else { + return HandleVErrAndExitCode(errhand.BuildDError("Invalid remote url").AddCause(err).Build(), usage) + } } dir := apr.GetValueOrDefault(dirParamName, path.Base(urlStr)) diff --git a/go/libraries/doltcore/dbfactory/factory.go b/go/libraries/doltcore/dbfactory/factory.go index dcb19f14386..5cf26b65be6 100644 --- a/go/libraries/doltcore/dbfactory/factory.go +++ b/go/libraries/doltcore/dbfactory/factory.go @@ -53,6 +53,12 @@ const ( OSSScheme = "oss" + // Git remote dbfactory schemes (Git remotes as Dolt remotes) + GitFileScheme = "git+file" + GitHTTPScheme = "git+http" + GitHTTPSScheme = "git+https" + GitSSHScheme = "git+ssh" + defaultScheme = HTTPSScheme defaultMemTableSize = 256 * 1024 * 1024 ) @@ -69,15 +75,19 @@ type DBFactory interface { // DBFactories is a map from url scheme name to DBFactory. Additional factories can be added to the DBFactories map // from external packages. var DBFactories = map[string]DBFactory{ - AWSScheme: AWSFactory{}, - OSSScheme: OSSFactory{}, - GSScheme: GSFactory{}, - OCIScheme: OCIFactory{}, - FileScheme: FileFactory{}, - MemScheme: MemFactory{}, - LocalBSScheme: LocalBSFactory{}, - HTTPScheme: NewDoltRemoteFactory(true), - HTTPSScheme: NewDoltRemoteFactory(false), + AWSScheme: AWSFactory{}, + OSSScheme: OSSFactory{}, + GSScheme: GSFactory{}, + OCIScheme: OCIFactory{}, + FileScheme: FileFactory{}, + MemScheme: MemFactory{}, + LocalBSScheme: LocalBSFactory{}, + HTTPScheme: NewDoltRemoteFactory(true), + HTTPSScheme: NewDoltRemoteFactory(false), + GitFileScheme: GitRemoteFactory{}, + GitHTTPScheme: GitRemoteFactory{}, + GitHTTPSScheme: GitRemoteFactory{}, + GitSSHScheme: GitRemoteFactory{}, } // CreateDB creates a database based on the supplied urlStr, and creation params. The DBFactory used for creation is diff --git a/go/libraries/doltcore/dbfactory/git_remote.go b/go/libraries/doltcore/dbfactory/git_remote.go new file mode 100644 index 00000000000..7e0ba1fcc3e --- /dev/null +++ b/go/libraries/doltcore/dbfactory/git_remote.go @@ -0,0 +1,239 @@ +// Copyright 2026 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package dbfactory + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "errors" + "fmt" + "net/url" + "os" + "os/exec" + "path/filepath" + "strings" + + "github.com/dolthub/dolt/go/store/blobstore" + "github.com/dolthub/dolt/go/store/datas" + "github.com/dolthub/dolt/go/store/nbs" + "github.com/dolthub/dolt/go/store/prolly/tree" + "github.com/dolthub/dolt/go/store/types" +) + +const ( + gitCacheDirParam = "git_cache_dir" + gitCacheDirEnv = "DOLT_GIT_REMOTE_CACHE_DIR" + defaultGitRef = "refs/dolt/data" +) + +// GitRemoteFactory opens a Dolt database backed by a Git remote, using a local bare +// repository as an object cache and remote configuration store. +// +// Supported schemes (registered in factory.go): +// - git+file +// - git+http +// - git+https +// - git+ssh +type GitRemoteFactory struct{} + +var _ DBFactory = GitRemoteFactory{} + +func (fact GitRemoteFactory) PrepareDB(ctx context.Context, nbf *types.NomsBinFormat, urlObj *url.URL, params map[string]interface{}) error { + switch strings.ToLower(urlObj.Scheme) { + case GitFileScheme: + remoteURL, _, err := parseGitRemoteFactoryURL(urlObj) + if err != nil { + return err + } + if remoteURL.Scheme != "file" { + return fmt.Errorf("git+file: expected underlying file URL, got %q", remoteURL.Scheme) + } + p := filepath.Join(remoteURL.Host, filepath.FromSlash(remoteURL.Path)) + if p == "" { + return fmt.Errorf("git+file: empty remote path") + } + if _, err := os.Stat(p); err == nil { + return nil + } else if !errors.Is(err, os.ErrNotExist) { + return err + } + return runGitInitBare(ctx, p) + default: + return fmt.Errorf("prepare not supported for scheme %q", urlObj.Scheme) + } +} + +func (fact GitRemoteFactory) CreateDB(ctx context.Context, nbf *types.NomsBinFormat, urlObj *url.URL, params map[string]interface{}) (datas.Database, types.ValueReadWriter, tree.NodeStore, error) { + remoteURL, ref, err := parseGitRemoteFactoryURL(urlObj) + if err != nil { + return nil, nil, nil, err + } + + cacheBase, err := resolveGitCacheBase(params) + if err != nil { + return nil, nil, nil, err + } + + cacheRepo, err := cacheRepoPath(cacheBase, remoteURL.String(), ref) + if err != nil { + return nil, nil, nil, err + } + if err := ensureBareRepo(ctx, cacheRepo); err != nil { + return nil, nil, nil, err + } + + // Ensure remote "origin" exists and points to the underlying git remote URL. + if err := ensureGitRemoteURL(ctx, cacheRepo, "origin", remoteURL.String()); err != nil { + return nil, nil, nil, err + } + + q := nbs.NewUnlimitedMemQuotaProvider() + cs, err := nbs.NewGitStore(ctx, nbf.VersionString(), cacheRepo, ref, blobstore.GitBlobstoreOptions{RemoteName: "origin"}, defaultMemTableSize, q) + if err != nil { + return nil, nil, nil, err + } + + vrw := types.NewValueStore(cs) + ns := tree.NewNodeStore(cs) + db := datas.NewTypesDatabase(vrw, ns) + return db, vrw, ns, nil +} + +func parseGitRemoteFactoryURL(urlObj *url.URL) (remoteURL *url.URL, ref string, err error) { + if urlObj == nil { + return nil, "", fmt.Errorf("nil url") + } + scheme := strings.ToLower(urlObj.Scheme) + if !strings.HasPrefix(scheme, "git+") { + return nil, "", fmt.Errorf("expected git+ scheme, got %q", urlObj.Scheme) + } + underlyingScheme := strings.TrimPrefix(scheme, "git+") + if underlyingScheme == "" { + return nil, "", fmt.Errorf("invalid git+ scheme %q", urlObj.Scheme) + } + + ref = urlObj.Query().Get("ref") + if ref == "" { + ref = defaultGitRef + } + + cp := *urlObj + cp.Scheme = underlyingScheme + cp.RawQuery = "" + cp.Fragment = "" + return &cp, ref, nil +} + +func resolveGitCacheBase(params map[string]interface{}) (string, error) { + if params != nil { + if v, ok := params[gitCacheDirParam]; ok && v != nil { + s, ok := v.(string) + if !ok { + return "", fmt.Errorf("%s must be a string", gitCacheDirParam) + } + if strings.TrimSpace(s) == "" { + return "", fmt.Errorf("%s cannot be empty", gitCacheDirParam) + } + return s, nil + } + } + if v := strings.TrimSpace(os.Getenv(gitCacheDirEnv)); v != "" { + return v, nil + } + base, err := os.UserCacheDir() + if err != nil { + return "", err + } + return filepath.Join(base, "dolt", "git-remote-cache"), nil +} + +func cacheRepoPath(cacheBase, remoteURL, ref string) (string, error) { + if strings.TrimSpace(cacheBase) == "" { + return "", fmt.Errorf("empty git cache base") + } + sum := sha256.Sum256([]byte(remoteURL + "|" + ref)) + h := hex.EncodeToString(sum[:]) + return filepath.Join(cacheBase, h, "repo.git"), nil +} + +func ensureBareRepo(ctx context.Context, gitDir string) error { + if gitDir == "" { + return fmt.Errorf("empty gitDir") + } + if st, err := os.Stat(gitDir); err == nil { + if !st.IsDir() { + return fmt.Errorf("git cache repo path is not a directory: %s", gitDir) + } + return nil + } else if !errors.Is(err, os.ErrNotExist) { + return err + } + if err := os.MkdirAll(filepath.Dir(gitDir), 0o755); err != nil { + return err + } + return runGitInitBare(ctx, gitDir) +} + +func ensureGitRemoteURL(ctx context.Context, gitDir string, remoteName string, remoteURL string) error { + if strings.TrimSpace(remoteName) == "" { + return fmt.Errorf("empty remote name") + } + if strings.TrimSpace(remoteURL) == "" { + return fmt.Errorf("empty remote url") + } + got, err := runGitInDir(ctx, gitDir, "remote", "get-url", remoteName) + if err != nil { + // Remote likely doesn't exist; attempt to add. + return runGitInDirNoOutput(ctx, gitDir, "remote", "add", remoteName, remoteURL) + } + got = strings.TrimSpace(got) + if got == remoteURL { + return nil + } + return runGitInDirNoOutput(ctx, gitDir, "remote", "set-url", remoteName, remoteURL) +} + +func runGitInitBare(ctx context.Context, dir string) error { + _, err := exec.LookPath("git") + if err != nil { + return fmt.Errorf("git not found on PATH: %w", err) + } + cmd := exec.CommandContext(ctx, "git", "init", "--bare", dir) //nolint:gosec // controlled args + out, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("git init --bare failed: %w\noutput:\n%s", err, strings.TrimSpace(string(out))) + } + return nil +} + +func runGitInDir(ctx context.Context, gitDir string, args ...string) (string, error) { + _, err := exec.LookPath("git") + if err != nil { + return "", fmt.Errorf("git not found on PATH: %w", err) + } + all := append([]string{"--git-dir", gitDir}, args...) + cmd := exec.CommandContext(ctx, "git", all...) //nolint:gosec // controlled args + out, err := cmd.CombinedOutput() + if err != nil { + return "", fmt.Errorf("git %s failed: %w\noutput:\n%s", strings.Join(args, " "), err, strings.TrimSpace(string(out))) + } + return string(out), nil +} + +func runGitInDirNoOutput(ctx context.Context, gitDir string, args ...string) error { + _, err := runGitInDir(ctx, gitDir, args...) + return err +} diff --git a/go/libraries/doltcore/dbfactory/git_remote_test.go b/go/libraries/doltcore/dbfactory/git_remote_test.go new file mode 100644 index 00000000000..588091b5fcc --- /dev/null +++ b/go/libraries/doltcore/dbfactory/git_remote_test.go @@ -0,0 +1,89 @@ +// Copyright 2026 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package dbfactory + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "os" + "os/exec" + "path/filepath" + "strings" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/dolthub/dolt/go/store/chunks" + "github.com/dolthub/dolt/go/store/hash" + "github.com/dolthub/dolt/go/store/testutils/gitrepo" + "github.com/dolthub/dolt/go/store/types" +) + +func TestGitRemoteFactory_GitFile_UsesConfiguredCacheDirAndCanWrite(t *testing.T) { + if _, err := exec.LookPath("git"); err != nil { + t.Skip("git not found on PATH") + } + + ctx := context.Background() + remoteRepo, err := gitrepo.InitBare(ctx, t.TempDir()+"/remote.git") + require.NoError(t, err) + + cacheDir := t.TempDir() + + remotePath := filepath.ToSlash(remoteRepo.GitDir) + remoteURL := "file://" + remotePath + urlStr := "git+file://" + remotePath + "?ref=refs/dolt/data" + + params := map[string]interface{}{ + gitCacheDirParam: cacheDir, + } + + db, vrw, _, err := CreateDB(ctx, types.Format_Default, urlStr, params) + require.NoError(t, err) + require.NotNil(t, db) + require.NotNil(t, vrw) + + // Ensure cache repo created under configured cache dir. + sum := sha256.Sum256([]byte(remoteURL + "|" + "refs/dolt/data")) + h := hex.EncodeToString(sum[:]) + cacheRepo := filepath.Join(cacheDir, h, "repo.git") + _, err = os.Stat(filepath.Join(cacheRepo, "HEAD")) + require.NoError(t, err) + + vs, ok := vrw.(*types.ValueStore) + require.True(t, ok, "expected ValueReadWriter to be *types.ValueStore, got %T", vrw) + cs := vs.ChunkStore() + + // Minimal write: put one chunk and commit its hash as the root. + c := chunks.NewChunk([]byte("hello\n")) + err = cs.Put(ctx, c, func(chunks.Chunk) chunks.GetAddrsCb { + return func(context.Context, hash.HashSet, chunks.PendingRefExists) error { return nil } + }) + require.NoError(t, err) + + last, err := cs.Root(ctx) + require.NoError(t, err) + okCommit, err := cs.Commit(ctx, c.Hash(), last) + require.NoError(t, err) + require.True(t, okCommit) + + require.NoError(t, db.Close()) + + // Remote should now have refs/dolt/data. + cmd := exec.CommandContext(ctx, "git", "--git-dir", remoteRepo.GitDir, "rev-parse", "--verify", "--quiet", "refs/dolt/data^{commit}") + out, err := cmd.CombinedOutput() + require.NoError(t, err, "git rev-parse failed: %s", strings.TrimSpace(string(out))) +} diff --git a/go/libraries/doltcore/env/git_remote_url.go b/go/libraries/doltcore/env/git_remote_url.go new file mode 100644 index 00000000000..d28cb291d07 --- /dev/null +++ b/go/libraries/doltcore/env/git_remote_url.go @@ -0,0 +1,185 @@ +// Copyright 2026 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package env + +import ( + "fmt" + "net/url" + "path/filepath" + "strings" +) + +const defaultGitRemoteRef = "refs/dolt/data" + +var supportedGitPlusSchemes = map[string]struct{}{ + "git+file": {}, + "git+http": {}, + "git+https": {}, + "git+ssh": {}, +} + +var supportedUnderlyingGitSchemes = map[string]struct{}{ + "file": {}, + "http": {}, + "https": {}, + "ssh": {}, +} + +// NormalizeGitRemoteUrl translates user-provided git remote strings into a canonical dbfactory URL +// using git+* schemes. +// +// It accepts: +// - Explicit dbfactory URLs: git+file/http/https/ssh://... +// - URLs ending in .git: file/http/https/ssh URLs +// - scp-style ssh: [user@]host:path/repo.git +// - schemeless host/path: host/org/repo.git (defaults to git+https) +// - local paths ending in .git (absolute or relative) (translated to git+file) +// +// It returns ok=false when the input is not recognized as a git remote URL (so callers can fall back +// to existing remote handling). +func NormalizeGitRemoteUrl(urlArg string) (normalized string, ok bool, err error) { + urlArg = strings.TrimSpace(urlArg) + if urlArg == "" { + return "", false, fmt.Errorf("empty remote url") + } + + // Fast-path: explicit git+* dbfactory URL. + if strings.HasPrefix(strings.ToLower(urlArg), "git+") { + u, err := url.Parse(urlArg) + if err != nil { + return "", false, err + } + if _, ok := supportedGitPlusSchemes[strings.ToLower(u.Scheme)]; !ok { + return "", false, fmt.Errorf("unsupported git dbfactory scheme %q", u.Scheme) + } + ensureDefaultRefQuery(u) + return u.String(), true, nil + } + + // Only translate obvious git remote strings (must end in .git). + base := stripQueryAndFragment(urlArg) + if !strings.HasSuffix(base, ".git") { + return "", false, nil + } + + // scp-like ssh: [user@]host:path/repo.git (no scheme, no ://) + if isScpLikeGitRemote(urlArg) { + host, p := splitScpLike(urlArg) + ssh := "git+ssh://" + host + "/" + strings.TrimPrefix(p, "/") + u, err := url.Parse(ssh) + if err != nil { + return "", false, err + } + ensureDefaultRefQuery(u) + return u.String(), true, nil + } + + // file/http/https/ssh url with a scheme. + if strings.Contains(urlArg, "://") { + u, err := url.Parse(urlArg) + if err != nil { + return "", false, err + } + s := strings.ToLower(u.Scheme) + if _, ok := supportedUnderlyingGitSchemes[s]; !ok { + return "", false, nil + } + u.Scheme = "git+" + s + ensureDefaultRefQuery(u) + return u.String(), true, nil + } + + // Local filesystem path (absolute or relative). + if looksLikeLocalPath(urlArg) { + abs, err := filepath.Abs(urlArg) + if err != nil { + return "", false, err + } + abs = filepath.ToSlash(abs) + u, err := url.Parse("git+file://" + abs) + if err != nil { + return "", false, err + } + ensureDefaultRefQuery(u) + return u.String(), true, nil + } + + // Schemeless host/path.git defaults to https. + u, err := url.Parse("git+https://" + urlArg) + if err != nil { + return "", false, err + } + ensureDefaultRefQuery(u) + return u.String(), true, nil +} + +func stripQueryAndFragment(s string) string { + // Order matters: strip fragment then query. + if i := strings.IndexByte(s, '#'); i >= 0 { + s = s[:i] + } + if i := strings.IndexByte(s, '?'); i >= 0 { + s = s[:i] + } + return s +} + +func looksLikeLocalPath(s string) bool { + return strings.HasPrefix(s, "/") || strings.HasPrefix(s, "./") || strings.HasPrefix(s, "../") +} + +func isScpLikeGitRemote(s string) bool { + // This intentionally keeps the matcher simple: + // - no scheme (no "://") + // - contains a single ':' separating host from path + // - host part contains no '/' + // - path ends in .git (already checked by caller) + if strings.Contains(s, "://") { + return false + } + colon := strings.IndexByte(s, ':') + if colon < 0 { + return false + } + host := s[:colon] + path := s[colon+1:] + if host == "" || path == "" { + return false + } + if strings.Contains(host, "/") { + return false + } + // Avoid misclassifying Windows paths; host must contain a dot or an '@' (git@host:...). + if !strings.Contains(host, ".") && !strings.Contains(host, "@") { + return false + } + return true +} + +func splitScpLike(s string) (host string, path string) { + i := strings.IndexByte(s, ':') + if i < 0 { + return "", s + } + return s[:i], s[i+1:] +} + +func ensureDefaultRefQuery(u *url.URL) { + q := u.Query() + if q.Get("ref") == "" { + q.Set("ref", defaultGitRemoteRef) + u.RawQuery = q.Encode() + } +} diff --git a/go/libraries/doltcore/env/git_remote_url_test.go b/go/libraries/doltcore/env/git_remote_url_test.go new file mode 100644 index 00000000000..7d867451a2f --- /dev/null +++ b/go/libraries/doltcore/env/git_remote_url_test.go @@ -0,0 +1,67 @@ +// Copyright 2026 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package env + +import ( + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestNormalizeGitRemoteUrl(t *testing.T) { + t.Run("explicit git+https keeps scheme and adds default ref", func(t *testing.T) { + got, ok, err := NormalizeGitRemoteUrl("git+https://example.com/org/repo.git") + require.NoError(t, err) + require.True(t, ok) + require.Equal(t, "git+https://example.com/org/repo.git?ref=refs%2Fdolt%2Fdata", got) + }) + + t.Run("https .git becomes git+https and adds default ref", func(t *testing.T) { + got, ok, err := NormalizeGitRemoteUrl("https://example.com/org/repo.git") + require.NoError(t, err) + require.True(t, ok) + require.Equal(t, "git+https://example.com/org/repo.git?ref=refs%2Fdolt%2Fdata", got) + }) + + t.Run("scp-style becomes git+ssh and adds default ref", func(t *testing.T) { + got, ok, err := NormalizeGitRemoteUrl("git@github.com:org/repo.git") + require.NoError(t, err) + require.True(t, ok) + require.Equal(t, "git+ssh://git@github.com/org/repo.git?ref=refs%2Fdolt%2Fdata", got) + }) + + t.Run("schemeless host/path defaults to git+https and adds default ref", func(t *testing.T) { + got, ok, err := NormalizeGitRemoteUrl("github.com/org/repo.git") + require.NoError(t, err) + require.True(t, ok) + require.Equal(t, "git+https://github.com/org/repo.git?ref=refs%2Fdolt%2Fdata", got) + }) + + t.Run("local absolute path becomes git+file and adds default ref", func(t *testing.T) { + p := filepath.ToSlash(filepath.Join(t.TempDir(), "remote.git")) + got, ok, err := NormalizeGitRemoteUrl(p) + require.NoError(t, err) + require.True(t, ok) + require.Equal(t, "git+file://"+p+"?ref=refs%2Fdolt%2Fdata", got) + }) + + t.Run("non .git url not recognized", func(t *testing.T) { + got, ok, err := NormalizeGitRemoteUrl("https://example.com/not-git") + require.NoError(t, err) + require.False(t, ok) + require.Empty(t, got) + }) +} diff --git a/go/libraries/doltcore/env/remotes.go b/go/libraries/doltcore/env/remotes.go index 3ea65724421..bd2b52d12a0 100644 --- a/go/libraries/doltcore/env/remotes.go +++ b/go/libraries/doltcore/env/remotes.go @@ -643,6 +643,12 @@ func NewPullSpec[C doltdb.Context]( } func GetAbsRemoteUrl(fs filesys2.Filesys, cfg config.ReadableConfig, urlArg string) (string, string, error) { + if normalized, ok, nerr := NormalizeGitRemoteUrl(urlArg); nerr != nil { + return "", "", nerr + } else if ok { + urlArg = normalized + } + u, err := earl.Parse(urlArg) if err != nil { return "", "", err diff --git a/go/libraries/doltcore/sqle/dprocedures/dolt_clone.go b/go/libraries/doltcore/sqle/dprocedures/dolt_clone.go index 043a980e6c1..12b4856b535 100644 --- a/go/libraries/doltcore/sqle/dprocedures/dolt_clone.go +++ b/go/libraries/doltcore/sqle/dprocedures/dolt_clone.go @@ -82,7 +82,11 @@ func getDirectoryAndUrlString(apr *argparser.ArgParseResults) (string, string, e urlStr := apr.Arg(0) _, err := earl.Parse(urlStr) if err != nil { - return "", "", errhand.BuildDError("error: invalid remote url: %s", urlStr).Build() + if normalized, ok, nerr := env.NormalizeGitRemoteUrl(urlStr); nerr == nil && ok { + urlStr = normalized + } else { + return "", "", errhand.BuildDError("error: invalid remote url: %s", urlStr).Build() + } } var dir string From b262057542ec1d0e3ff6cd7cda8798262463e007 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?coffeegoddd=E2=98=95=EF=B8=8F=E2=9C=A8?= Date: Tue, 10 Feb 2026 16:56:50 -0800 Subject: [PATCH 02/20] /go/{cmd,libraries}: wip --- go/cmd/dolt/cli/arg_parser_helpers.go | 3 +++ go/cmd/dolt/commands/remote.go | 9 +++++++++ go/cmd/dolt/commands/remote_test.go | 11 +++++++++++ go/libraries/doltcore/dbfactory/git_remote.go | 12 ++++++------ .../doltcore/dbfactory/git_remote_test.go | 2 +- go/libraries/doltcore/env/git_remote_url.go | 2 +- go/libraries/doltcore/env/git_remote_url_test.go | 7 +++++++ .../doltcore/sqle/dprocedures/dolt_backup.go | 8 ++++++++ .../doltcore/sqle/dprocedures/dolt_clone.go | 7 +++++++ .../doltcore/sqle/dprocedures/dolt_remote.go | 16 ++++++++++++++-- 10 files changed, 67 insertions(+), 10 deletions(-) diff --git a/go/cmd/dolt/cli/arg_parser_helpers.go b/go/cmd/dolt/cli/arg_parser_helpers.go index 5e0a3e7c588..4a6b4f23e4e 100644 --- a/go/cmd/dolt/cli/arg_parser_helpers.go +++ b/go/cmd/dolt/cli/arg_parser_helpers.go @@ -146,6 +146,7 @@ func CreateCloneArgParser() *argparser.ArgParser { ap.SupportsString(RemoteParam, "", "name", "Name of the remote to be added to the cloned database. The default is 'origin'.") ap.SupportsString(BranchParam, "b", "branch", "The branch to be cloned. If not specified all branches will be cloned.") ap.SupportsString(DepthFlag, "", "depth", "Clone a single branch and limit history to the given commit depth.") + ap.SupportsString("git-cache-dir", "", "dir", "Local directory to use for git remote cache (sets remote param git_cache_dir).") ap.SupportsString(dbfactory.AWSRegionParam, "", "region", "") ap.SupportsValidatedString(dbfactory.AWSCredsTypeParam, "", "creds-type", "", argparser.ValidatorFromStrList(dbfactory.AWSCredsTypeParam, dbfactory.AWSCredTypes)) ap.SupportsString(dbfactory.AWSCredsFileParam, "", "file", "AWS credentials file.") @@ -166,6 +167,7 @@ func CreateResetArgParser() *argparser.ArgParser { func CreateRemoteArgParser() *argparser.ArgParser { ap := argparser.NewArgParserWithVariableArgs("remote") + ap.SupportsString("git-cache-dir", "", "dir", "Local directory to use for git remote cache (sets remote param git_cache_dir).") return ap } @@ -266,6 +268,7 @@ func CreateBackupArgParser() *argparser.ArgParser { ap.ArgListHelp = append(ap.ArgListHelp, [2]string{"profile", "AWS profile to use."}) ap.SupportsFlag(VerboseFlag, "v", "When printing the list of backups adds additional details.") ap.SupportsFlag(ForceFlag, "f", "When restoring a backup, overwrite the contents of the existing database with the same name.") + ap.SupportsString("git-cache-dir", "", "dir", "Local directory to use for git remote cache (sets remote param git_cache_dir).") ap.SupportsString(dbfactory.AWSRegionParam, "", "region", "") ap.SupportsValidatedString(dbfactory.AWSCredsTypeParam, "", "creds-type", "", argparser.ValidatorFromStrList(dbfactory.AWSCredsTypeParam, dbfactory.AWSCredTypes)) ap.SupportsString(dbfactory.AWSCredsFileParam, "", "file", "AWS credentials file") diff --git a/go/cmd/dolt/commands/remote.go b/go/cmd/dolt/commands/remote.go index 60dcca8a3a8..1e85b2835f2 100644 --- a/go/cmd/dolt/commands/remote.go +++ b/go/cmd/dolt/commands/remote.go @@ -71,6 +71,7 @@ const ( addRemoteId = "add" removeRemoteId = "remove" removeRemoteShortId = "rm" + gitCacheDirFlag = "git-cache-dir" ) type RemoteCmd struct{} @@ -212,6 +213,14 @@ func parseRemoteArgs(apr *argparser.ArgParseResults, scheme, remoteUrl string) ( err = cli.AddAWSParams(remoteUrl, apr, params) case dbfactory.OSSScheme: err = cli.AddOSSParams(remoteUrl, apr, params) + case dbfactory.GitFileScheme, dbfactory.GitHTTPScheme, dbfactory.GitHTTPSScheme, dbfactory.GitSSHScheme: + if v, ok := apr.GetValue(gitCacheDirFlag); ok { + v = strings.TrimSpace(v) + if v == "" { + return nil, errhand.BuildDError("error: --%s cannot be empty", gitCacheDirFlag).Build() + } + params[dbfactory.GitCacheDirParam] = v + } default: err = cli.VerifyNoAwsParams(apr) } diff --git a/go/cmd/dolt/commands/remote_test.go b/go/cmd/dolt/commands/remote_test.go index ef5c6d3ae72..e3fd1aa4f34 100644 --- a/go/cmd/dolt/commands/remote_test.go +++ b/go/cmd/dolt/commands/remote_test.go @@ -21,6 +21,7 @@ import ( "github.com/stretchr/testify/assert" + "github.com/dolthub/dolt/go/libraries/doltcore/dbfactory" "github.com/dolthub/dolt/go/libraries/doltcore/env" "github.com/dolthub/dolt/go/libraries/utils/config" "github.com/dolthub/dolt/go/libraries/utils/filesys" @@ -135,3 +136,13 @@ func TestGetAbsRemoteUrl(t *testing.T) { }) } } + +func TestParseRemoteArgs_GitCacheDir(t *testing.T) { + ap := RemoteCmd{}.ArgParser() + apr, err := ap.Parse([]string{"add", "origin", "git+file:///tmp/remote.git", "--" + gitCacheDirFlag, "/tmp/cache"}) + assert.NoError(t, err) + + params, verr := parseRemoteArgs(apr, dbfactory.GitFileScheme, "git+file:///tmp/remote.git") + assert.Nil(t, verr) + assert.Equal(t, "/tmp/cache", params[dbfactory.GitCacheDirParam]) +} diff --git a/go/libraries/doltcore/dbfactory/git_remote.go b/go/libraries/doltcore/dbfactory/git_remote.go index 7e0ba1fcc3e..c6e4e1d4408 100644 --- a/go/libraries/doltcore/dbfactory/git_remote.go +++ b/go/libraries/doltcore/dbfactory/git_remote.go @@ -34,8 +34,8 @@ import ( ) const ( - gitCacheDirParam = "git_cache_dir" - gitCacheDirEnv = "DOLT_GIT_REMOTE_CACHE_DIR" + GitCacheDirParam = "git_cache_dir" + GitCacheDirEnv = "DOLT_GIT_REMOTE_CACHE_DIR" defaultGitRef = "refs/dolt/data" ) @@ -139,18 +139,18 @@ func parseGitRemoteFactoryURL(urlObj *url.URL) (remoteURL *url.URL, ref string, func resolveGitCacheBase(params map[string]interface{}) (string, error) { if params != nil { - if v, ok := params[gitCacheDirParam]; ok && v != nil { + if v, ok := params[GitCacheDirParam]; ok && v != nil { s, ok := v.(string) if !ok { - return "", fmt.Errorf("%s must be a string", gitCacheDirParam) + return "", fmt.Errorf("%s must be a string", GitCacheDirParam) } if strings.TrimSpace(s) == "" { - return "", fmt.Errorf("%s cannot be empty", gitCacheDirParam) + return "", fmt.Errorf("%s cannot be empty", GitCacheDirParam) } return s, nil } } - if v := strings.TrimSpace(os.Getenv(gitCacheDirEnv)); v != "" { + if v := strings.TrimSpace(os.Getenv(GitCacheDirEnv)); v != "" { return v, nil } base, err := os.UserCacheDir() diff --git a/go/libraries/doltcore/dbfactory/git_remote_test.go b/go/libraries/doltcore/dbfactory/git_remote_test.go index 588091b5fcc..55b99a4d6a4 100644 --- a/go/libraries/doltcore/dbfactory/git_remote_test.go +++ b/go/libraries/doltcore/dbfactory/git_remote_test.go @@ -48,7 +48,7 @@ func TestGitRemoteFactory_GitFile_UsesConfiguredCacheDirAndCanWrite(t *testing.T urlStr := "git+file://" + remotePath + "?ref=refs/dolt/data" params := map[string]interface{}{ - gitCacheDirParam: cacheDir, + GitCacheDirParam: cacheDir, } db, vrw, _, err := CreateDB(ctx, types.Format_Default, urlStr, params) diff --git a/go/libraries/doltcore/env/git_remote_url.go b/go/libraries/doltcore/env/git_remote_url.go index d28cb291d07..266f885badd 100644 --- a/go/libraries/doltcore/env/git_remote_url.go +++ b/go/libraries/doltcore/env/git_remote_url.go @@ -52,7 +52,7 @@ var supportedUnderlyingGitSchemes = map[string]struct{}{ func NormalizeGitRemoteUrl(urlArg string) (normalized string, ok bool, err error) { urlArg = strings.TrimSpace(urlArg) if urlArg == "" { - return "", false, fmt.Errorf("empty remote url") + return "", false, nil } // Fast-path: explicit git+* dbfactory URL. diff --git a/go/libraries/doltcore/env/git_remote_url_test.go b/go/libraries/doltcore/env/git_remote_url_test.go index 7d867451a2f..a02d66ed94c 100644 --- a/go/libraries/doltcore/env/git_remote_url_test.go +++ b/go/libraries/doltcore/env/git_remote_url_test.go @@ -22,6 +22,13 @@ import ( ) func TestNormalizeGitRemoteUrl(t *testing.T) { + t.Run("empty not recognized", func(t *testing.T) { + got, ok, err := NormalizeGitRemoteUrl("") + require.NoError(t, err) + require.False(t, ok) + require.Empty(t, got) + }) + t.Run("explicit git+https keeps scheme and adds default ref", func(t *testing.T) { got, ok, err := NormalizeGitRemoteUrl("git+https://example.com/org/repo.git") require.NoError(t, err) diff --git a/go/libraries/doltcore/sqle/dprocedures/dolt_backup.go b/go/libraries/doltcore/sqle/dprocedures/dolt_backup.go index 4e0f4f8a9db..95eeabae96d 100644 --- a/go/libraries/doltcore/sqle/dprocedures/dolt_backup.go +++ b/go/libraries/doltcore/sqle/dprocedures/dolt_backup.go @@ -315,6 +315,14 @@ func newParams(apr *argparser.ArgParseResults, url string, urlScheme string) (ma // TODO(elianddb): This func mainly interfaces with apr to set the OSS key-vals in params, but the backup arg // parser does not include any OSS-related flags? I'm guessing they must be processed elsewhere? err = cli.AddOSSParams(url, apr, params) + case dbfactory.GitFileScheme, dbfactory.GitHTTPScheme, dbfactory.GitHTTPSScheme, dbfactory.GitSSHScheme: + err = cli.VerifyNoAwsParams(apr) + if dir, ok := apr.GetValue("git-cache-dir"); ok { + dir = strings.TrimSpace(dir) + if dir != "" { + params[dbfactory.GitCacheDirParam] = dir + } + } default: err = cli.VerifyNoAwsParams(apr) } diff --git a/go/libraries/doltcore/sqle/dprocedures/dolt_clone.go b/go/libraries/doltcore/sqle/dprocedures/dolt_clone.go index 12b4856b535..46379777b81 100644 --- a/go/libraries/doltcore/sqle/dprocedures/dolt_clone.go +++ b/go/libraries/doltcore/sqle/dprocedures/dolt_clone.go @@ -16,6 +16,7 @@ package dprocedures import ( "path" + "strings" "github.com/dolthub/go-mysql-server/sql" @@ -56,6 +57,12 @@ func doltClone(ctx *sql.Context, args ...string) (sql.RowIter, error) { if user, hasUser := apr.GetValue(cli.UserFlag); hasUser { remoteParms[dbfactory.GRPCUsernameAuthParam] = user } + if dir, ok := apr.GetValue("git-cache-dir"); ok { + dir = strings.TrimSpace(dir) + if dir != "" { + remoteParms[dbfactory.GitCacheDirParam] = dir + } + } depth, ok := apr.GetInt(cli.DepthFlag) if !ok { diff --git a/go/libraries/doltcore/sqle/dprocedures/dolt_remote.go b/go/libraries/doltcore/sqle/dprocedures/dolt_remote.go index b9c8dc1d530..eaca264ca74 100644 --- a/go/libraries/doltcore/sqle/dprocedures/dolt_remote.go +++ b/go/libraries/doltcore/sqle/dprocedures/dolt_remote.go @@ -22,6 +22,7 @@ import ( "github.com/dolthub/dolt/go/cmd/dolt/cli" "github.com/dolthub/dolt/go/libraries/doltcore/branch_control" + "github.com/dolthub/dolt/go/libraries/doltcore/dbfactory" "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" "github.com/dolthub/dolt/go/libraries/doltcore/env" "github.com/dolthub/dolt/go/libraries/doltcore/ref" @@ -97,12 +98,23 @@ func addRemote(_ *sql.Context, dbName string, dbd env.DbData[*sql.Context], apr return err } - _, absRemoteUrl, err := env.GetAbsRemoteUrl(dbFs, &config.MapConfig{}, remoteUrl) + scheme, absRemoteUrl, err := env.GetAbsRemoteUrl(dbFs, &config.MapConfig{}, remoteUrl) if err != nil { return err } - r := env.NewRemote(remoteName, absRemoteUrl, map[string]string{}) + params := map[string]string{} + switch scheme { + case dbfactory.GitFileScheme, dbfactory.GitHTTPScheme, dbfactory.GitHTTPSScheme, dbfactory.GitSSHScheme: + if dir, ok := apr.GetValue("git-cache-dir"); ok { + dir = strings.TrimSpace(dir) + if dir != "" { + params[dbfactory.GitCacheDirParam] = dir + } + } + } + + r := env.NewRemote(remoteName, absRemoteUrl, params) return dbd.Rsw.AddRemote(r) } From c431af33fe7cd6046efcaffdaf1e95e0f3f07146 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?coffeegoddd=E2=98=95=EF=B8=8F=E2=9C=A8?= Date: Tue, 10 Feb 2026 19:11:46 -0800 Subject: [PATCH 03/20] /go/{cmd,libraries}: make ref an argument --- go/cmd/dolt/cli/arg_parser_helpers.go | 3 ++ go/cmd/dolt/commands/clone.go | 10 ++++-- go/cmd/dolt/commands/clone_test.go | 11 +++++++ go/cmd/dolt/commands/remote.go | 17 ++++++++++ go/cmd/dolt/commands/remote_test.go | 10 ++++++ go/libraries/doltcore/dbfactory/git_remote.go | 31 ++++++++++++++----- .../doltcore/dbfactory/git_remote_test.go | 18 ++++++++++- go/libraries/doltcore/env/git_remote_url.go | 19 +++--------- .../doltcore/env/git_remote_url_test.go | 27 ++++++++++------ .../doltcore/sqle/dprocedures/dolt_backup.go | 6 ++++ .../doltcore/sqle/dprocedures/dolt_clone.go | 10 +++++- .../doltcore/sqle/dprocedures/dolt_remote.go | 6 ++++ 12 files changed, 132 insertions(+), 36 deletions(-) diff --git a/go/cmd/dolt/cli/arg_parser_helpers.go b/go/cmd/dolt/cli/arg_parser_helpers.go index 4a6b4f23e4e..8c6e8e89d2d 100644 --- a/go/cmd/dolt/cli/arg_parser_helpers.go +++ b/go/cmd/dolt/cli/arg_parser_helpers.go @@ -147,6 +147,7 @@ func CreateCloneArgParser() *argparser.ArgParser { ap.SupportsString(BranchParam, "b", "branch", "The branch to be cloned. If not specified all branches will be cloned.") ap.SupportsString(DepthFlag, "", "depth", "Clone a single branch and limit history to the given commit depth.") ap.SupportsString("git-cache-dir", "", "dir", "Local directory to use for git remote cache (sets remote param git_cache_dir).") + ap.SupportsString("ref", "", "ref", "Git ref to use as the Dolt data ref for git remotes (sets remote param git_ref).") ap.SupportsString(dbfactory.AWSRegionParam, "", "region", "") ap.SupportsValidatedString(dbfactory.AWSCredsTypeParam, "", "creds-type", "", argparser.ValidatorFromStrList(dbfactory.AWSCredsTypeParam, dbfactory.AWSCredTypes)) ap.SupportsString(dbfactory.AWSCredsFileParam, "", "file", "AWS credentials file.") @@ -168,6 +169,7 @@ func CreateResetArgParser() *argparser.ArgParser { func CreateRemoteArgParser() *argparser.ArgParser { ap := argparser.NewArgParserWithVariableArgs("remote") ap.SupportsString("git-cache-dir", "", "dir", "Local directory to use for git remote cache (sets remote param git_cache_dir).") + ap.SupportsString("ref", "", "ref", "Git ref to use as the Dolt data ref for git remotes (sets remote param git_ref).") return ap } @@ -269,6 +271,7 @@ func CreateBackupArgParser() *argparser.ArgParser { ap.SupportsFlag(VerboseFlag, "v", "When printing the list of backups adds additional details.") ap.SupportsFlag(ForceFlag, "f", "When restoring a backup, overwrite the contents of the existing database with the same name.") ap.SupportsString("git-cache-dir", "", "dir", "Local directory to use for git remote cache (sets remote param git_cache_dir).") + ap.SupportsString("ref", "", "ref", "Git ref to use as the Dolt data ref for git remotes (sets remote param git_ref).") ap.SupportsString(dbfactory.AWSRegionParam, "", "region", "") ap.SupportsValidatedString(dbfactory.AWSCredsTypeParam, "", "creds-type", "", argparser.ValidatorFromStrList(dbfactory.AWSCredsTypeParam, dbfactory.AWSCredTypes)) ap.SupportsString(dbfactory.AWSCredsFileParam, "", "file", "AWS credentials file") diff --git a/go/cmd/dolt/commands/clone.go b/go/cmd/dolt/commands/clone.go index 00bf311527c..3585c311a92 100644 --- a/go/cmd/dolt/commands/clone.go +++ b/go/cmd/dolt/commands/clone.go @@ -199,9 +199,15 @@ func parseArgs(apr *argparser.ArgParseResults) (string, string, errhand.VerboseE if apr.NArg() == 2 { dir = apr.Arg(1) } else { - dir = path.Base(urlStr) + // Infer directory name from the URL path only (not including query/fragment). + // This avoids creating directories like "repo.git?foo=bar". + pathStr := urlStr + if u, err := earl.Parse(urlStr); err == nil && u != nil && u.Path != "" { + pathStr = strings.TrimPrefix(u.Path, "/") + } + dir = path.Base(pathStr) if dir == "." { - dir = path.Dir(urlStr) + dir = path.Dir(pathStr) } else if dir == "/" { return "", "", errhand.BuildDError("Could not infer repo name. Please explicitly define a directory for this url").Build() } diff --git a/go/cmd/dolt/commands/clone_test.go b/go/cmd/dolt/commands/clone_test.go index d67e1d52fef..1b05b19be65 100644 --- a/go/cmd/dolt/commands/clone_test.go +++ b/go/cmd/dolt/commands/clone_test.go @@ -65,3 +65,14 @@ func TestParseDolthubRepos(t *testing.T) { } } + +func TestCloneParseArgs_InferDirStripsQuery(t *testing.T) { + ap := CloneCmd{}.ArgParser() + apr, err := ap.Parse([]string{"https://example.com/org/repo.git?foo=bar"}) + require.NoError(t, err) + + dir, urlStr, verr := parseArgs(apr) + require.Nil(t, verr) + require.Equal(t, "repo.git", dir) + require.Equal(t, "https://example.com/org/repo.git?foo=bar", urlStr) +} diff --git a/go/cmd/dolt/commands/remote.go b/go/cmd/dolt/commands/remote.go index 1e85b2835f2..21366cdb263 100644 --- a/go/cmd/dolt/commands/remote.go +++ b/go/cmd/dolt/commands/remote.go @@ -72,6 +72,7 @@ const ( removeRemoteId = "remove" removeRemoteShortId = "rm" gitCacheDirFlag = "git-cache-dir" + gitRefFlag = "ref" ) type RemoteCmd struct{} @@ -221,6 +222,13 @@ func parseRemoteArgs(apr *argparser.ArgParseResults, scheme, remoteUrl string) ( } params[dbfactory.GitCacheDirParam] = v } + if v, ok := apr.GetValue(gitRefFlag); ok { + v = strings.TrimSpace(v) + if v == "" { + return nil, errhand.BuildDError("error: --%s cannot be empty", gitRefFlag).Build() + } + params[dbfactory.GitRefParam] = v + } default: err = cli.VerifyNoAwsParams(apr) } @@ -228,6 +236,15 @@ func parseRemoteArgs(apr *argparser.ArgParseResults, scheme, remoteUrl string) ( return nil, errhand.VerboseErrorFromError(err) } + // Flags that are only meaningful for git remotes should not be accepted for other schemes. + switch scheme { + case dbfactory.GitFileScheme, dbfactory.GitHTTPScheme, dbfactory.GitHTTPSScheme, dbfactory.GitSSHScheme: + default: + if _, ok := apr.GetValue(gitRefFlag); ok { + return nil, errhand.BuildDError("error: --%s is only supported for git remotes", gitRefFlag).Build() + } + } + return params, nil } diff --git a/go/cmd/dolt/commands/remote_test.go b/go/cmd/dolt/commands/remote_test.go index e3fd1aa4f34..50fd064c272 100644 --- a/go/cmd/dolt/commands/remote_test.go +++ b/go/cmd/dolt/commands/remote_test.go @@ -146,3 +146,13 @@ func TestParseRemoteArgs_GitCacheDir(t *testing.T) { assert.Nil(t, verr) assert.Equal(t, "/tmp/cache", params[dbfactory.GitCacheDirParam]) } + +func TestParseRemoteArgs_GitRef(t *testing.T) { + ap := RemoteCmd{}.ArgParser() + apr, err := ap.Parse([]string{"add", "origin", "git+file:///tmp/remote.git", "--" + gitRefFlag, "refs/dolt/custom"}) + assert.NoError(t, err) + + params, verr := parseRemoteArgs(apr, dbfactory.GitFileScheme, "git+file:///tmp/remote.git") + assert.Nil(t, verr) + assert.Equal(t, "refs/dolt/custom", params[dbfactory.GitRefParam]) +} diff --git a/go/libraries/doltcore/dbfactory/git_remote.go b/go/libraries/doltcore/dbfactory/git_remote.go index c6e4e1d4408..3651e094141 100644 --- a/go/libraries/doltcore/dbfactory/git_remote.go +++ b/go/libraries/doltcore/dbfactory/git_remote.go @@ -35,6 +35,7 @@ import ( const ( GitCacheDirParam = "git_cache_dir" + GitRefParam = "git_ref" GitCacheDirEnv = "DOLT_GIT_REMOTE_CACHE_DIR" defaultGitRef = "refs/dolt/data" ) @@ -54,7 +55,7 @@ var _ DBFactory = GitRemoteFactory{} func (fact GitRemoteFactory) PrepareDB(ctx context.Context, nbf *types.NomsBinFormat, urlObj *url.URL, params map[string]interface{}) error { switch strings.ToLower(urlObj.Scheme) { case GitFileScheme: - remoteURL, _, err := parseGitRemoteFactoryURL(urlObj) + remoteURL, _, err := parseGitRemoteFactoryURL(urlObj, params) if err != nil { return err } @@ -77,7 +78,7 @@ func (fact GitRemoteFactory) PrepareDB(ctx context.Context, nbf *types.NomsBinFo } func (fact GitRemoteFactory) CreateDB(ctx context.Context, nbf *types.NomsBinFormat, urlObj *url.URL, params map[string]interface{}) (datas.Database, types.ValueReadWriter, tree.NodeStore, error) { - remoteURL, ref, err := parseGitRemoteFactoryURL(urlObj) + remoteURL, ref, err := parseGitRemoteFactoryURL(urlObj, params) if err != nil { return nil, nil, nil, err } @@ -112,7 +113,7 @@ func (fact GitRemoteFactory) CreateDB(ctx context.Context, nbf *types.NomsBinFor return db, vrw, ns, nil } -func parseGitRemoteFactoryURL(urlObj *url.URL) (remoteURL *url.URL, ref string, err error) { +func parseGitRemoteFactoryURL(urlObj *url.URL, params map[string]interface{}) (remoteURL *url.URL, ref string, err error) { if urlObj == nil { return nil, "", fmt.Errorf("nil url") } @@ -124,12 +125,13 @@ func parseGitRemoteFactoryURL(urlObj *url.URL) (remoteURL *url.URL, ref string, if underlyingScheme == "" { return nil, "", fmt.Errorf("invalid git+ scheme %q", urlObj.Scheme) } - - ref = urlObj.Query().Get("ref") - if ref == "" { - ref = defaultGitRef + // Ref selection is configured via dolt remote parameters (e.g. `--ref`), not via URL query. + if qref := strings.TrimSpace(urlObj.Query().Get("ref")); qref != "" { + return nil, "", fmt.Errorf("git remote ref must be provided via git remote param %q (not URL query ref=)", GitRefParam) } + ref = resolveGitRemoteRef(urlObj, params) + cp := *urlObj cp.Scheme = underlyingScheme cp.RawQuery = "" @@ -137,6 +139,21 @@ func parseGitRemoteFactoryURL(urlObj *url.URL) (remoteURL *url.URL, ref string, return &cp, ref, nil } +func resolveGitRemoteRef(urlObj *url.URL, params map[string]interface{}) string { + // Prefer an explicit remote parameter (e.g. from `--ref`) over any URL query. + if params != nil { + if v, ok := params[GitRefParam]; ok && v != nil { + s, ok := v.(string) + if ok { + if s = strings.TrimSpace(s); s != "" { + return s + } + } + } + } + return defaultGitRef +} + func resolveGitCacheBase(params map[string]interface{}) (string, error) { if params != nil { if v, ok := params[GitCacheDirParam]; ok && v != nil { diff --git a/go/libraries/doltcore/dbfactory/git_remote_test.go b/go/libraries/doltcore/dbfactory/git_remote_test.go index 55b99a4d6a4..fde477e6bb0 100644 --- a/go/libraries/doltcore/dbfactory/git_remote_test.go +++ b/go/libraries/doltcore/dbfactory/git_remote_test.go @@ -45,7 +45,7 @@ func TestGitRemoteFactory_GitFile_UsesConfiguredCacheDirAndCanWrite(t *testing.T remotePath := filepath.ToSlash(remoteRepo.GitDir) remoteURL := "file://" + remotePath - urlStr := "git+file://" + remotePath + "?ref=refs/dolt/data" + urlStr := "git+file://" + remotePath params := map[string]interface{}{ GitCacheDirParam: cacheDir, @@ -87,3 +87,19 @@ func TestGitRemoteFactory_GitFile_UsesConfiguredCacheDirAndCanWrite(t *testing.T out, err := cmd.CombinedOutput() require.NoError(t, err, "git rev-parse failed: %s", strings.TrimSpace(string(out))) } + +func TestGitRemoteFactory_RejectsRefQueryParam(t *testing.T) { + if _, err := exec.LookPath("git"); err != nil { + t.Skip("git not found on PATH") + } + + ctx := context.Background() + remoteRepo, err := gitrepo.InitBare(ctx, t.TempDir()+"/remote.git") + require.NoError(t, err) + + remotePath := filepath.ToSlash(remoteRepo.GitDir) + urlStr := "git+file://" + remotePath + "?ref=refs/dolt/data" + + _, _, _, err = CreateDB(ctx, types.Format_Default, urlStr, nil) + require.Error(t, err) +} diff --git a/go/libraries/doltcore/env/git_remote_url.go b/go/libraries/doltcore/env/git_remote_url.go index 266f885badd..deca71cbb5d 100644 --- a/go/libraries/doltcore/env/git_remote_url.go +++ b/go/libraries/doltcore/env/git_remote_url.go @@ -21,8 +21,6 @@ import ( "strings" ) -const defaultGitRemoteRef = "refs/dolt/data" - var supportedGitPlusSchemes = map[string]struct{}{ "git+file": {}, "git+http": {}, @@ -64,7 +62,10 @@ func NormalizeGitRemoteUrl(urlArg string) (normalized string, ok bool, err error if _, ok := supportedGitPlusSchemes[strings.ToLower(u.Scheme)]; !ok { return "", false, fmt.Errorf("unsupported git dbfactory scheme %q", u.Scheme) } - ensureDefaultRefQuery(u) + // Ref selection is configured via dolt remote parameters (e.g. `--ref`), not via URL query. + if qref := strings.TrimSpace(u.Query().Get("ref")); qref != "" { + return "", false, fmt.Errorf("git remote ref must be provided via --ref (not URL query ref=)") + } return u.String(), true, nil } @@ -82,7 +83,6 @@ func NormalizeGitRemoteUrl(urlArg string) (normalized string, ok bool, err error if err != nil { return "", false, err } - ensureDefaultRefQuery(u) return u.String(), true, nil } @@ -97,7 +97,6 @@ func NormalizeGitRemoteUrl(urlArg string) (normalized string, ok bool, err error return "", false, nil } u.Scheme = "git+" + s - ensureDefaultRefQuery(u) return u.String(), true, nil } @@ -112,7 +111,6 @@ func NormalizeGitRemoteUrl(urlArg string) (normalized string, ok bool, err error if err != nil { return "", false, err } - ensureDefaultRefQuery(u) return u.String(), true, nil } @@ -121,7 +119,6 @@ func NormalizeGitRemoteUrl(urlArg string) (normalized string, ok bool, err error if err != nil { return "", false, err } - ensureDefaultRefQuery(u) return u.String(), true, nil } @@ -175,11 +172,3 @@ func splitScpLike(s string) (host string, path string) { } return s[:i], s[i+1:] } - -func ensureDefaultRefQuery(u *url.URL) { - q := u.Query() - if q.Get("ref") == "" { - q.Set("ref", defaultGitRemoteRef) - u.RawQuery = q.Encode() - } -} diff --git a/go/libraries/doltcore/env/git_remote_url_test.go b/go/libraries/doltcore/env/git_remote_url_test.go index a02d66ed94c..9b640a82ee2 100644 --- a/go/libraries/doltcore/env/git_remote_url_test.go +++ b/go/libraries/doltcore/env/git_remote_url_test.go @@ -29,40 +29,47 @@ func TestNormalizeGitRemoteUrl(t *testing.T) { require.Empty(t, got) }) - t.Run("explicit git+https keeps scheme and adds default ref", func(t *testing.T) { + t.Run("explicit git+https keeps scheme", func(t *testing.T) { got, ok, err := NormalizeGitRemoteUrl("git+https://example.com/org/repo.git") require.NoError(t, err) require.True(t, ok) - require.Equal(t, "git+https://example.com/org/repo.git?ref=refs%2Fdolt%2Fdata", got) + require.Equal(t, "git+https://example.com/org/repo.git", got) }) - t.Run("https .git becomes git+https and adds default ref", func(t *testing.T) { + t.Run("explicit git+https with ref query is rejected", func(t *testing.T) { + got, ok, err := NormalizeGitRemoteUrl("git+https://example.com/org/repo.git?ref=refs/dolt/data") + require.Error(t, err) + require.False(t, ok) + require.Empty(t, got) + }) + + t.Run("https .git becomes git+https", func(t *testing.T) { got, ok, err := NormalizeGitRemoteUrl("https://example.com/org/repo.git") require.NoError(t, err) require.True(t, ok) - require.Equal(t, "git+https://example.com/org/repo.git?ref=refs%2Fdolt%2Fdata", got) + require.Equal(t, "git+https://example.com/org/repo.git", got) }) - t.Run("scp-style becomes git+ssh and adds default ref", func(t *testing.T) { + t.Run("scp-style becomes git+ssh", func(t *testing.T) { got, ok, err := NormalizeGitRemoteUrl("git@github.com:org/repo.git") require.NoError(t, err) require.True(t, ok) - require.Equal(t, "git+ssh://git@github.com/org/repo.git?ref=refs%2Fdolt%2Fdata", got) + require.Equal(t, "git+ssh://git@github.com/org/repo.git", got) }) - t.Run("schemeless host/path defaults to git+https and adds default ref", func(t *testing.T) { + t.Run("schemeless host/path defaults to git+https", func(t *testing.T) { got, ok, err := NormalizeGitRemoteUrl("github.com/org/repo.git") require.NoError(t, err) require.True(t, ok) - require.Equal(t, "git+https://github.com/org/repo.git?ref=refs%2Fdolt%2Fdata", got) + require.Equal(t, "git+https://github.com/org/repo.git", got) }) - t.Run("local absolute path becomes git+file and adds default ref", func(t *testing.T) { + t.Run("local absolute path becomes git+file", func(t *testing.T) { p := filepath.ToSlash(filepath.Join(t.TempDir(), "remote.git")) got, ok, err := NormalizeGitRemoteUrl(p) require.NoError(t, err) require.True(t, ok) - require.Equal(t, "git+file://"+p+"?ref=refs%2Fdolt%2Fdata", got) + require.Equal(t, "git+file://"+p, got) }) t.Run("non .git url not recognized", func(t *testing.T) { diff --git a/go/libraries/doltcore/sqle/dprocedures/dolt_backup.go b/go/libraries/doltcore/sqle/dprocedures/dolt_backup.go index 95eeabae96d..c33648bc004 100644 --- a/go/libraries/doltcore/sqle/dprocedures/dolt_backup.go +++ b/go/libraries/doltcore/sqle/dprocedures/dolt_backup.go @@ -323,6 +323,12 @@ func newParams(apr *argparser.ArgParseResults, url string, urlScheme string) (ma params[dbfactory.GitCacheDirParam] = dir } } + if ref, ok := apr.GetValue("ref"); ok { + ref = strings.TrimSpace(ref) + if ref != "" { + params[dbfactory.GitRefParam] = ref + } + } default: err = cli.VerifyNoAwsParams(apr) } diff --git a/go/libraries/doltcore/sqle/dprocedures/dolt_clone.go b/go/libraries/doltcore/sqle/dprocedures/dolt_clone.go index 46379777b81..e2d509ef27f 100644 --- a/go/libraries/doltcore/sqle/dprocedures/dolt_clone.go +++ b/go/libraries/doltcore/sqle/dprocedures/dolt_clone.go @@ -46,7 +46,7 @@ func doltClone(ctx *sql.Context, args ...string) (sql.RowIter, error) { } sess := dsess.DSessFromSess(ctx.Session) - _, remoteUrl, err := env.GetAbsRemoteUrl(sess.Provider().FileSystem(), emptyConfig(), urlStr) + scheme, remoteUrl, err := env.GetAbsRemoteUrl(sess.Provider().FileSystem(), emptyConfig(), urlStr) if err != nil { return nil, errhand.BuildDError("error: '%s' is not valid.", urlStr).Build() } @@ -63,6 +63,14 @@ func doltClone(ctx *sql.Context, args ...string) (sql.RowIter, error) { remoteParms[dbfactory.GitCacheDirParam] = dir } } + if ref, ok := apr.GetValue("ref"); ok { + ref = strings.TrimSpace(ref) + if ref != "" && (scheme == dbfactory.GitFileScheme || scheme == dbfactory.GitHTTPScheme || scheme == dbfactory.GitHTTPSScheme || scheme == dbfactory.GitSSHScheme) { + remoteParms[dbfactory.GitRefParam] = ref + } else if ref != "" { + return nil, errhand.BuildDError("error: --ref is only supported for git remotes").Build() + } + } depth, ok := apr.GetInt(cli.DepthFlag) if !ok { diff --git a/go/libraries/doltcore/sqle/dprocedures/dolt_remote.go b/go/libraries/doltcore/sqle/dprocedures/dolt_remote.go index eaca264ca74..8d74c06df1e 100644 --- a/go/libraries/doltcore/sqle/dprocedures/dolt_remote.go +++ b/go/libraries/doltcore/sqle/dprocedures/dolt_remote.go @@ -112,6 +112,12 @@ func addRemote(_ *sql.Context, dbName string, dbd env.DbData[*sql.Context], apr params[dbfactory.GitCacheDirParam] = dir } } + if ref, ok := apr.GetValue("ref"); ok { + ref = strings.TrimSpace(ref) + if ref != "" { + params[dbfactory.GitRefParam] = ref + } + } } r := env.NewRemote(remoteName, absRemoteUrl, params) From 9a70ea0b221660187569697d8b088b9e97a80bb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?coffeegoddd=E2=98=95=EF=B8=8F=E2=9C=A8?= Date: Tue, 10 Feb 2026 19:18:02 -0800 Subject: [PATCH 04/20] /go/{cmd,libraries}: more fixes --- go/cmd/dolt/commands/read_tables.go | 2 ++ go/cmd/dolt/commands/read_tables_test.go | 33 +++++++++++++++++++ go/libraries/doltcore/env/git_remote_url.go | 22 +++++++++++-- .../doltcore/env/git_remote_url_test.go | 14 ++++++++ 4 files changed, 68 insertions(+), 3 deletions(-) create mode 100644 go/cmd/dolt/commands/read_tables_test.go diff --git a/go/cmd/dolt/commands/read_tables.go b/go/cmd/dolt/commands/read_tables.go index 06e31a3a392..e4c169a635b 100644 --- a/go/cmd/dolt/commands/read_tables.go +++ b/go/cmd/dolt/commands/read_tables.go @@ -78,6 +78,8 @@ func (cmd ReadTablesCmd) ArgParser() *argparser.ArgParser { {"table", " Optional tables to retrieve. If omitted, all tables are retrieved."}, } ap.SupportsString(dirParamName, "d", "directory", "directory to create and put retrieved table data.") + ap.SupportsString(gitCacheDirFlag, "", "dir", "Local directory to use for git remote cache (sets remote param git_cache_dir).") + ap.SupportsString(gitRefFlag, "", "ref", "Git ref to use as the Dolt data ref for git remotes (sets remote param git_ref).") return ap } diff --git a/go/cmd/dolt/commands/read_tables_test.go b/go/cmd/dolt/commands/read_tables_test.go new file mode 100644 index 00000000000..47a1a6563a6 --- /dev/null +++ b/go/cmd/dolt/commands/read_tables_test.go @@ -0,0 +1,33 @@ +// Copyright 2026 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package commands + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestReadTablesArgParser_AcceptsGitFlags(t *testing.T) { + ap := ReadTablesCmd{}.ArgParser() + apr, err := ap.Parse([]string{ + "--" + gitCacheDirFlag, "/tmp/cache", + "--" + gitRefFlag, "refs/dolt/custom", + "git+file:///tmp/remote.git", "main", + }) + require.NoError(t, err) + require.Equal(t, "/tmp/cache", apr.GetValueOrDefault(gitCacheDirFlag, "")) + require.Equal(t, "refs/dolt/custom", apr.GetValueOrDefault(gitRefFlag, "")) +} diff --git a/go/libraries/doltcore/env/git_remote_url.go b/go/libraries/doltcore/env/git_remote_url.go index deca71cbb5d..1e7a3974c13 100644 --- a/go/libraries/doltcore/env/git_remote_url.go +++ b/go/libraries/doltcore/env/git_remote_url.go @@ -62,9 +62,8 @@ func NormalizeGitRemoteUrl(urlArg string) (normalized string, ok bool, err error if _, ok := supportedGitPlusSchemes[strings.ToLower(u.Scheme)]; !ok { return "", false, fmt.Errorf("unsupported git dbfactory scheme %q", u.Scheme) } - // Ref selection is configured via dolt remote parameters (e.g. `--ref`), not via URL query. - if qref := strings.TrimSpace(u.Query().Get("ref")); qref != "" { - return "", false, fmt.Errorf("git remote ref must be provided via --ref (not URL query ref=)") + if err := rejectGitRefQuery(u); err != nil { + return "", false, err } return u.String(), true, nil } @@ -96,6 +95,9 @@ func NormalizeGitRemoteUrl(urlArg string) (normalized string, ok bool, err error if _, ok := supportedUnderlyingGitSchemes[s]; !ok { return "", false, nil } + if err := rejectGitRefQuery(u); err != nil { + return "", false, err + } u.Scheme = "git+" + s return u.String(), true, nil } @@ -119,6 +121,9 @@ func NormalizeGitRemoteUrl(urlArg string) (normalized string, ok bool, err error if err != nil { return "", false, err } + if err := rejectGitRefQuery(u); err != nil { + return "", false, err + } return u.String(), true, nil } @@ -172,3 +177,14 @@ func splitScpLike(s string) (host string, path string) { } return s[:i], s[i+1:] } + +func rejectGitRefQuery(u *url.URL) error { + // Ref selection is configured via dolt remote parameters (e.g. `--ref`), not via URL query. + if u == nil { + return nil + } + if qref := strings.TrimSpace(u.Query().Get("ref")); qref != "" { + return fmt.Errorf("git remote ref must be provided via --ref (not URL query ref=)") + } + return nil +} diff --git a/go/libraries/doltcore/env/git_remote_url_test.go b/go/libraries/doltcore/env/git_remote_url_test.go index 9b640a82ee2..a4a8093f4bd 100644 --- a/go/libraries/doltcore/env/git_remote_url_test.go +++ b/go/libraries/doltcore/env/git_remote_url_test.go @@ -43,6 +43,20 @@ func TestNormalizeGitRemoteUrl(t *testing.T) { require.Empty(t, got) }) + t.Run("https .git with ref query is rejected", func(t *testing.T) { + got, ok, err := NormalizeGitRemoteUrl("https://example.com/org/repo.git?ref=refs/dolt/data") + require.Error(t, err) + require.False(t, ok) + require.Empty(t, got) + }) + + t.Run("schemeless host/path with ref query is rejected", func(t *testing.T) { + got, ok, err := NormalizeGitRemoteUrl("github.com/org/repo.git?ref=refs/dolt/data") + require.Error(t, err) + require.False(t, ok) + require.Empty(t, got) + }) + t.Run("https .git becomes git+https", func(t *testing.T) { got, ok, err := NormalizeGitRemoteUrl("https://example.com/org/repo.git") require.NoError(t, err) From 05a68ac1ae6b6bf9d389fdef11a7159c0b68e708 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?coffeegoddd=E2=98=95=EF=B8=8F=E2=9C=A8?= Date: Tue, 10 Feb 2026 19:53:55 -0800 Subject: [PATCH 05/20] /{go,integration-tests}: add tests --- go/cmd/dolt/commands/remote.go | 2 + go/cmd/dolt/commands/remote_test.go | 2 +- go/libraries/doltcore/dbfactory/git_remote.go | 10 +- .../doltcore/dbfactory/git_remote_test.go | 74 +++++ .../doltcore/sqle/dprocedures/dolt_backup.go | 7 +- .../doltcore/sqle/dprocedures/dolt_clone.go | 15 +- .../doltcore/sqle/dprocedures/dolt_remote.go | 7 +- integration-tests/bats/remotes-git.bats | 260 ++++++++++++++++++ integration-tests/bats/sql-remotes-git.bats | 120 ++++++++ 9 files changed, 480 insertions(+), 17 deletions(-) create mode 100644 integration-tests/bats/remotes-git.bats create mode 100644 integration-tests/bats/sql-remotes-git.bats diff --git a/go/cmd/dolt/commands/remote.go b/go/cmd/dolt/commands/remote.go index 21366cdb263..fcfee3c9326 100644 --- a/go/cmd/dolt/commands/remote.go +++ b/go/cmd/dolt/commands/remote.go @@ -221,6 +221,8 @@ func parseRemoteArgs(apr *argparser.ArgParseResults, scheme, remoteUrl string) ( return nil, errhand.BuildDError("error: --%s cannot be empty", gitCacheDirFlag).Build() } params[dbfactory.GitCacheDirParam] = v + } else { + return nil, errhand.BuildDError("error: --%s is required for git remotes", gitCacheDirFlag).Build() } if v, ok := apr.GetValue(gitRefFlag); ok { v = strings.TrimSpace(v) diff --git a/go/cmd/dolt/commands/remote_test.go b/go/cmd/dolt/commands/remote_test.go index 50fd064c272..566a591beec 100644 --- a/go/cmd/dolt/commands/remote_test.go +++ b/go/cmd/dolt/commands/remote_test.go @@ -149,7 +149,7 @@ func TestParseRemoteArgs_GitCacheDir(t *testing.T) { func TestParseRemoteArgs_GitRef(t *testing.T) { ap := RemoteCmd{}.ArgParser() - apr, err := ap.Parse([]string{"add", "origin", "git+file:///tmp/remote.git", "--" + gitRefFlag, "refs/dolt/custom"}) + apr, err := ap.Parse([]string{"add", "origin", "git+file:///tmp/remote.git", "--" + gitCacheDirFlag, "/tmp/cache", "--" + gitRefFlag, "refs/dolt/custom"}) assert.NoError(t, err) params, verr := parseRemoteArgs(apr, dbfactory.GitFileScheme, "git+file:///tmp/remote.git") diff --git a/go/libraries/doltcore/dbfactory/git_remote.go b/go/libraries/doltcore/dbfactory/git_remote.go index 3651e094141..4c630116914 100644 --- a/go/libraries/doltcore/dbfactory/git_remote.go +++ b/go/libraries/doltcore/dbfactory/git_remote.go @@ -36,7 +36,6 @@ import ( const ( GitCacheDirParam = "git_cache_dir" GitRefParam = "git_ref" - GitCacheDirEnv = "DOLT_GIT_REMOTE_CACHE_DIR" defaultGitRef = "refs/dolt/data" ) @@ -167,14 +166,7 @@ func resolveGitCacheBase(params map[string]interface{}) (string, error) { return s, nil } } - if v := strings.TrimSpace(os.Getenv(GitCacheDirEnv)); v != "" { - return v, nil - } - base, err := os.UserCacheDir() - if err != nil { - return "", err - } - return filepath.Join(base, "dolt", "git-remote-cache"), nil + return "", fmt.Errorf("%s is required for git remotes", GitCacheDirParam) } func cacheRepoPath(cacheBase, remoteURL, ref string) (string, error) { diff --git a/go/libraries/doltcore/dbfactory/git_remote_test.go b/go/libraries/doltcore/dbfactory/git_remote_test.go index fde477e6bb0..a0ba277a11c 100644 --- a/go/libraries/doltcore/dbfactory/git_remote_test.go +++ b/go/libraries/doltcore/dbfactory/git_remote_test.go @@ -27,6 +27,7 @@ import ( "github.com/stretchr/testify/require" "github.com/dolthub/dolt/go/store/chunks" + "github.com/dolthub/dolt/go/store/datas" "github.com/dolthub/dolt/go/store/hash" "github.com/dolthub/dolt/go/store/testutils/gitrepo" "github.com/dolthub/dolt/go/store/types" @@ -103,3 +104,76 @@ func TestGitRemoteFactory_RejectsRefQueryParam(t *testing.T) { _, _, _, err = CreateDB(ctx, types.Format_Default, urlStr, nil) require.Error(t, err) } + +func TestGitRemoteFactory_TwoClientsDistinctCacheDirsRoundtrip(t *testing.T) { + if _, err := exec.LookPath("git"); err != nil { + t.Skip("git not found on PATH") + } + + ctx := context.Background() + remoteRepo, err := gitrepo.InitBare(ctx, t.TempDir()+"/remote.git") + require.NoError(t, err) + + remotePath := filepath.ToSlash(remoteRepo.GitDir) + urlStr := "git+file://" + remotePath + + noopGetAddrs := func(chunks.Chunk) chunks.GetAddrsCb { + return func(context.Context, hash.HashSet, chunks.PendingRefExists) error { return nil } + } + + open := func(cacheDir string) (db datas.Database, cs chunks.ChunkStore) { + params := map[string]interface{}{ + GitCacheDirParam: cacheDir, + } + d, vrw, _, err := CreateDB(ctx, types.Format_Default, urlStr, params) + require.NoError(t, err) + require.NotNil(t, d) + require.NotNil(t, vrw) + + vs, ok := vrw.(*types.ValueStore) + require.True(t, ok, "expected ValueReadWriter to be *types.ValueStore, got %T", vrw) + return d, vs.ChunkStore() + } + + cacheA := t.TempDir() + cacheB := t.TempDir() + + // Client A writes a root pointing at chunk A. + dbA, csA := open(cacheA) + cA := chunks.NewChunk([]byte("clientA\n")) + require.NoError(t, csA.Put(ctx, cA, noopGetAddrs)) + lastA, err := csA.Root(ctx) + require.NoError(t, err) + okCommitA, err := csA.Commit(ctx, cA.Hash(), lastA) + require.NoError(t, err) + require.True(t, okCommitA) + require.NoError(t, dbA.Close()) + + // Client B reads chunk A, then writes chunk B and updates the root. + dbB, csB := open(cacheB) + require.NoError(t, csB.Rebase(ctx)) + rootB, err := csB.Root(ctx) + require.NoError(t, err) + require.Equal(t, cA.Hash(), rootB) + gotA, err := csB.Get(ctx, cA.Hash()) + require.NoError(t, err) + require.Equal(t, "clientA\n", string(gotA.Data())) + + cB := chunks.NewChunk([]byte("clientB\n")) + require.NoError(t, csB.Put(ctx, cB, noopGetAddrs)) + okCommitB, err := csB.Commit(ctx, cB.Hash(), rootB) + require.NoError(t, err) + require.True(t, okCommitB) + require.NoError(t, dbB.Close()) + + // Client A re-opens (with the same cache dir as before) and should see B's update. + dbA2, csA2 := open(cacheA) + require.NoError(t, csA2.Rebase(ctx)) + rootA2, err := csA2.Root(ctx) + require.NoError(t, err) + require.Equal(t, cB.Hash(), rootA2) + gotB, err := csA2.Get(ctx, cB.Hash()) + require.NoError(t, err) + require.Equal(t, "clientB\n", string(gotB.Data())) + require.NoError(t, dbA2.Close()) +} diff --git a/go/libraries/doltcore/sqle/dprocedures/dolt_backup.go b/go/libraries/doltcore/sqle/dprocedures/dolt_backup.go index c33648bc004..21f1f2435a6 100644 --- a/go/libraries/doltcore/sqle/dprocedures/dolt_backup.go +++ b/go/libraries/doltcore/sqle/dprocedures/dolt_backup.go @@ -319,9 +319,12 @@ func newParams(apr *argparser.ArgParseResults, url string, urlScheme string) (ma err = cli.VerifyNoAwsParams(apr) if dir, ok := apr.GetValue("git-cache-dir"); ok { dir = strings.TrimSpace(dir) - if dir != "" { - params[dbfactory.GitCacheDirParam] = dir + if dir == "" { + return nil, fmt.Errorf("error: --git-cache-dir cannot be empty") } + params[dbfactory.GitCacheDirParam] = dir + } else { + return nil, fmt.Errorf("error: --git-cache-dir is required for git remotes") } if ref, ok := apr.GetValue("ref"); ok { ref = strings.TrimSpace(ref) diff --git a/go/libraries/doltcore/sqle/dprocedures/dolt_clone.go b/go/libraries/doltcore/sqle/dprocedures/dolt_clone.go index e2d509ef27f..7cf08f65201 100644 --- a/go/libraries/doltcore/sqle/dprocedures/dolt_clone.go +++ b/go/libraries/doltcore/sqle/dprocedures/dolt_clone.go @@ -57,10 +57,19 @@ func doltClone(ctx *sql.Context, args ...string) (sql.RowIter, error) { if user, hasUser := apr.GetValue(cli.UserFlag); hasUser { remoteParms[dbfactory.GRPCUsernameAuthParam] = user } - if dir, ok := apr.GetValue("git-cache-dir"); ok { - dir = strings.TrimSpace(dir) - if dir != "" { + if scheme == dbfactory.GitFileScheme || scheme == dbfactory.GitHTTPScheme || scheme == dbfactory.GitHTTPSScheme || scheme == dbfactory.GitSSHScheme { + if dir, ok := apr.GetValue("git-cache-dir"); ok { + dir = strings.TrimSpace(dir) + if dir == "" { + return nil, errhand.BuildDError("error: --git-cache-dir cannot be empty").Build() + } remoteParms[dbfactory.GitCacheDirParam] = dir + } else { + return nil, errhand.BuildDError("error: --git-cache-dir is required for git remotes").Build() + } + } else { + if _, ok := apr.GetValue("git-cache-dir"); ok { + return nil, errhand.BuildDError("error: --git-cache-dir is only supported for git remotes").Build() } } if ref, ok := apr.GetValue("ref"); ok { diff --git a/go/libraries/doltcore/sqle/dprocedures/dolt_remote.go b/go/libraries/doltcore/sqle/dprocedures/dolt_remote.go index 8d74c06df1e..4312e35f2e4 100644 --- a/go/libraries/doltcore/sqle/dprocedures/dolt_remote.go +++ b/go/libraries/doltcore/sqle/dprocedures/dolt_remote.go @@ -108,9 +108,12 @@ func addRemote(_ *sql.Context, dbName string, dbd env.DbData[*sql.Context], apr case dbfactory.GitFileScheme, dbfactory.GitHTTPScheme, dbfactory.GitHTTPSScheme, dbfactory.GitSSHScheme: if dir, ok := apr.GetValue("git-cache-dir"); ok { dir = strings.TrimSpace(dir) - if dir != "" { - params[dbfactory.GitCacheDirParam] = dir + if dir == "" { + return fmt.Errorf("error: --git-cache-dir cannot be empty") } + params[dbfactory.GitCacheDirParam] = dir + } else { + return fmt.Errorf("error: --git-cache-dir is required for git remotes") } if ref, ok := apr.GetValue("ref"); ok { ref = strings.TrimSpace(ref) diff --git a/integration-tests/bats/remotes-git.bats b/integration-tests/bats/remotes-git.bats new file mode 100644 index 00000000000..85fc5a99e2c --- /dev/null +++ b/integration-tests/bats/remotes-git.bats @@ -0,0 +1,260 @@ +#!/usr/bin/env bats +load $BATS_TEST_DIRNAME/helper/common.bash + +setup() { + skiponwindows "tests are flaky on Windows" + skip_if_remote + setup_common + cd $BATS_TMPDIR + cd dolt-repo-$$ + mkdir "dolt-repo-clones" +} + +teardown() { + assert_feature_version + teardown_common +} + +@test "remotes-git: smoke push/clone/push-back/pull" { + if ! command -v git >/dev/null 2>&1; then + skip "git not installed" + fi + + mkdir remote.git + git init --bare remote.git + + cache1=$(mktemp -d) + cache2=$(mktemp -d) + + mkdir repo1 + cd repo1 + dolt init + dolt sql -q "create table test(pk int primary key, v int);" + dolt add . + dolt commit -m "create table" + + dolt remote add --git-cache-dir "$cache1" origin ../remote.git + run dolt push --set-upstream origin main + [ "$status" -eq 0 ] + [[ ! "$output" =~ "panic:" ]] || false + + cd .. + cd dolt-repo-clones + run dolt clone --git-cache-dir "$cache2" ../remote.git repo2 + [ "$status" -eq 0 ] + + cd repo2 + dolt sql -q "insert into test values (1, 10);" + dolt add . + dolt commit -m "add row" + run dolt push origin main + [ "$status" -eq 0 ] + + cd ../../repo1 + run dolt pull + [ "$status" -eq 0 ] + + run dolt sql -q "select v from test where pk = 1;" -r csv + [ "$status" -eq 0 ] + [[ "$output" =~ "10" ]] || false + + rm -rf "$cache1" "$cache2" +} + +@test "remotes-git: empty remote bootstrap creates refs/dolt/data" { + if ! command -v git >/dev/null 2>&1; then + skip "git not installed" + fi + + mkdir remote.git + git init --bare remote.git + + # Assert the dolt data ref doesn't exist yet. + run git --git-dir remote.git show-ref refs/dolt/data + [ "$status" -eq 1 ] + + cache1=$(mktemp -d) + + mkdir repo1 + cd repo1 + dolt init + dolt sql -q "create table test(pk int primary key);" + dolt add . + dolt commit -m "create table" + + dolt remote add --git-cache-dir "$cache1" origin ../remote.git + run dolt push --set-upstream origin main + [ "$status" -eq 0 ] + + run git --git-dir ../remote.git show-ref refs/dolt/data + [ "$status" -eq 0 ] + + rm -rf "$cache1" +} + +@test "remotes-git: pull also fetches branches from git remote" { + if ! command -v git >/dev/null 2>&1; then + skip "git not installed" + fi + + mkdir remote.git + git init --bare remote.git + + cache1=$(mktemp -d) + cache2=$(mktemp -d) + + mkdir repo1 + cd repo1 + dolt init + dolt remote add --git-cache-dir "$cache1" origin ../remote.git + dolt push origin main + + cd .. + cd dolt-repo-clones + run dolt clone --git-cache-dir "$cache2" ../remote.git repo2 + [ "$status" -eq 0 ] + + cd repo2 + run dolt branch -va + [[ "$output" =~ "main" ]] || false + [[ ! "$output" =~ "other" ]] || false + + cd ../../repo1 + dolt checkout -b other + dolt commit --allow-empty -m "first commit on other" + dolt push origin other + + cd ../dolt-repo-clones/repo2 + dolt pull + run dolt branch -va + [[ "$output" =~ "main" ]] || false + [[ "$output" =~ "other" ]] || false + + rm -rf "$cache1" "$cache2" +} + +@test "remotes-git: pull fetches but does not merge other branches" { + if ! command -v git >/dev/null 2>&1; then + skip "git not installed" + fi + + mkdir remote.git + git init --bare remote.git + + cache1=$(mktemp -d) + cache2=$(mktemp -d) + + mkdir repo1 + cd repo1 + dolt init + dolt remote add --git-cache-dir "$cache1" origin ../remote.git + dolt push --set-upstream origin main + dolt checkout -b other + dolt commit --allow-empty -m "first commit on other" + dolt push --set-upstream origin other + + cd .. + cd dolt-repo-clones + run dolt clone --git-cache-dir "$cache2" ../remote.git repo2 + [ "$status" -eq 0 ] + + cd repo2 + main_state1=$(get_head_commit) + + run dolt pull + [ "$status" -eq 0 ] + + main_state2=$(get_head_commit) + [[ "$main_state1" = "$main_state2" ]] || false + + run dolt branch -va + [[ "$output" =~ "main" ]] || false + [[ "$output" =~ "other" ]] || false + + run dolt checkout other + [ "$status" -eq 0 ] + [[ "$output" =~ "branch 'other' set up to track 'origin/other'." ]] || false + + run dolt log --oneline -n 1 + [ "$status" -eq 0 ] + [[ "$output" =~ "first commit on other" ]] || false + + rm -rf "$cache1" "$cache2" +} + +@test "remotes-git: custom --ref writes to configured dolt data ref" { + if ! command -v git >/dev/null 2>&1; then + skip "git not installed" + fi + + mkdir remote.git + git init --bare remote.git + + cache1=$(mktemp -d) + cache2=$(mktemp -d) + + mkdir repo1 + cd repo1 + dolt init + dolt sql -q "create table test(pk int primary key, v int);" + dolt sql -q "insert into test values (1, 111);" + dolt add . + dolt commit -m "seed" + + dolt remote add --git-cache-dir "$cache1" --ref refs/dolt/custom origin ../remote.git + run dolt push --set-upstream origin main + [ "$status" -eq 0 ] + + run git --git-dir ../remote.git show-ref refs/dolt/custom + [ "$status" -eq 0 ] + run git --git-dir ../remote.git show-ref refs/dolt/data + [ "$status" -ne 0 ] + + cd .. + cd dolt-repo-clones + run dolt clone --git-cache-dir "$cache2" --ref refs/dolt/custom ../remote.git repo2 + [ "$status" -eq 0 ] + + cd repo2 + run dolt sql -q "select v from test where pk = 1;" -r csv + [ "$status" -eq 0 ] + [[ "$output" =~ "111" ]] || false + + run git --git-dir ../../remote.git show-ref refs/dolt/data + [ "$status" -ne 0 ] + + rm -rf "$cache1" "$cache2" +} + +@test "remotes-git: --git-cache-dir is required and creates cache base content" { + if ! command -v git >/dev/null 2>&1; then + skip "git not installed" + fi + + mkdir remote.git + git init --bare remote.git + + mkdir repo1 + cd repo1 + dolt init + dolt commit --allow-empty -m "init" + + run dolt remote add origin ../remote.git + [ "$status" -ne 0 ] + [[ "$output" =~ "git-cache-dir" ]] || false + [[ "$output" =~ "required" ]] || false + + cache1=$(mktemp -d) + dolt remote add --git-cache-dir "$cache1" origin ../remote.git + run dolt push --set-upstream origin main + [ "$status" -eq 0 ] + + run ls -A "$cache1" + [ "$status" -eq 0 ] + [ "$output" != "" ] + + run bash -c 'ls -d "$1"/*/repo.git >/dev/null 2>&1' _ "$cache1" + [ "$status" -eq 0 ] + + rm -rf "$cache1" +} diff --git a/integration-tests/bats/sql-remotes-git.bats b/integration-tests/bats/sql-remotes-git.bats new file mode 100644 index 00000000000..e4abe147204 --- /dev/null +++ b/integration-tests/bats/sql-remotes-git.bats @@ -0,0 +1,120 @@ +#!/usr/bin/env bats +load $BATS_TEST_DIRNAME/helper/common.bash + +setup() { + skiponwindows "tests are flaky on Windows" + skip_if_remote + setup_common + cd $BATS_TMPDIR + cd dolt-repo-$$ +} + +teardown() { + assert_feature_version + teardown_common +} + +@test "sql-remotes-git: dolt_remote add supports --ref for git remotes" { + if ! command -v git >/dev/null 2>&1; then + skip "git not installed" + fi + + mkdir remote.git + git init --bare remote.git + + cache1=$(mktemp -d) + + mkdir repo1 + cd repo1 + dolt init + dolt sql -q "create table test(pk int primary key, v int);" + dolt sql -q "insert into test values (1, 111);" + dolt add . + dolt commit -m "seed" + + run dolt sql </dev/null 2>&1; then + skip "git not installed" + fi + + mkdir remote.git + git init --bare remote.git + + cache1=$(mktemp -d) + cache2=$(mktemp -d) + + mkdir repo1 + cd repo1 + dolt init + dolt sql -q "create table test(pk int primary key, v int);" + dolt sql -q "insert into test values (1, 111);" + dolt add . + dolt commit -m "seed" + + dolt remote add --git-cache-dir "$cache1" --ref refs/dolt/custom origin ../remote.git + dolt push --set-upstream origin main + + cd .. + mkdir host + cd host + dolt init + + run dolt sql -q "call dolt_clone('--git-cache-dir', '$cache2', '--ref', 'refs/dolt/custom', '../remote.git', 'repo2');" + [ "$status" -eq 0 ] + + cd repo2 + run dolt sql -q "select v from test where pk = 1;" -r csv + [ "$status" -eq 0 ] + [[ "$output" =~ "111" ]] || false + + run git --git-dir ../../remote.git show-ref refs/dolt/custom + [ "$status" -eq 0 ] + run git --git-dir ../../remote.git show-ref refs/dolt/data + [ "$status" -ne 0 ] + + rm -rf "$cache1" "$cache2" +} + +@test "sql-remotes-git: dolt_backup sync-url supports --ref for git remotes" { + if ! command -v git >/dev/null 2>&1; then + skip "git not installed" + fi + + mkdir remote.git + git init --bare remote.git + + cache1=$(mktemp -d) + + mkdir repo1 + cd repo1 + dolt init + dolt sql -q "create table test(pk int primary key, v int);" + dolt sql -q "insert into test values (1, 111);" + dolt add . + dolt commit -m "seed" + + run dolt sql -q "call dolt_backup('sync-url', '--git-cache-dir', '$cache1', '--ref', 'refs/dolt/custom', '../remote.git');" + [ "$status" -eq 0 ] + + run git --git-dir ../remote.git show-ref refs/dolt/custom + [ "$status" -eq 0 ] + run git --git-dir ../remote.git show-ref refs/dolt/data + [ "$status" -ne 0 ] + + rm -rf "$cache1" +} + From 5a012bfe0de9e85c15fb8b1148ab19bb6384257a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?coffeegoddd=E2=98=95=EF=B8=8F=E2=9C=A8?= Date: Tue, 10 Feb 2026 20:00:47 -0800 Subject: [PATCH 06/20] /go/cmd/dolt/commands: update clone --- go/cmd/dolt/commands/clone.go | 11 +++-------- go/cmd/dolt/commands/clone_test.go | 6 +++--- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/go/cmd/dolt/commands/clone.go b/go/cmd/dolt/commands/clone.go index 3585c311a92..686748e16ec 100644 --- a/go/cmd/dolt/commands/clone.go +++ b/go/cmd/dolt/commands/clone.go @@ -199,15 +199,10 @@ func parseArgs(apr *argparser.ArgParseResults) (string, string, errhand.VerboseE if apr.NArg() == 2 { dir = apr.Arg(1) } else { - // Infer directory name from the URL path only (not including query/fragment). - // This avoids creating directories like "repo.git?foo=bar". - pathStr := urlStr - if u, err := earl.Parse(urlStr); err == nil && u != nil && u.Path != "" { - pathStr = strings.TrimPrefix(u.Path, "/") - } - dir = path.Base(pathStr) + // Infer directory name from the URL. + dir = path.Base(urlStr) if dir == "." { - dir = path.Dir(pathStr) + dir = path.Dir(urlStr) } else if dir == "/" { return "", "", errhand.BuildDError("Could not infer repo name. Please explicitly define a directory for this url").Build() } diff --git a/go/cmd/dolt/commands/clone_test.go b/go/cmd/dolt/commands/clone_test.go index 1b05b19be65..a0947612063 100644 --- a/go/cmd/dolt/commands/clone_test.go +++ b/go/cmd/dolt/commands/clone_test.go @@ -66,13 +66,13 @@ func TestParseDolthubRepos(t *testing.T) { } -func TestCloneParseArgs_InferDirStripsQuery(t *testing.T) { +func TestCloneParseArgs_InferDir(t *testing.T) { ap := CloneCmd{}.ArgParser() - apr, err := ap.Parse([]string{"https://example.com/org/repo.git?foo=bar"}) + apr, err := ap.Parse([]string{"https://example.com/org/repo.git"}) require.NoError(t, err) dir, urlStr, verr := parseArgs(apr) require.Nil(t, verr) require.Equal(t, "repo.git", dir) - require.Equal(t, "https://example.com/org/repo.git?foo=bar", urlStr) + require.Equal(t, "https://example.com/org/repo.git", urlStr) } From 76881c22645e358287b42240bac347e625dd530a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?coffeegoddd=E2=98=95=EF=B8=8F=E2=9C=A8?= Date: Tue, 10 Feb 2026 20:14:26 -0800 Subject: [PATCH 07/20] /{go,integration-tests}: fixes --- go/cmd/dolt/commands/remote.go | 2 -- go/libraries/doltcore/dbfactory/git_remote.go | 34 +++++++++++++++---- go/libraries/doltcore/env/remotes.go | 9 +++++ .../doltcore/sqle/dprocedures/dolt_backup.go | 2 -- .../doltcore/sqle/dprocedures/dolt_clone.go | 2 -- .../doltcore/sqle/dprocedures/dolt_remote.go | 2 -- integration-tests/bats/remotes-git.bats | 19 ++--------- 7 files changed, 39 insertions(+), 31 deletions(-) diff --git a/go/cmd/dolt/commands/remote.go b/go/cmd/dolt/commands/remote.go index fcfee3c9326..21366cdb263 100644 --- a/go/cmd/dolt/commands/remote.go +++ b/go/cmd/dolt/commands/remote.go @@ -221,8 +221,6 @@ func parseRemoteArgs(apr *argparser.ArgParseResults, scheme, remoteUrl string) ( return nil, errhand.BuildDError("error: --%s cannot be empty", gitCacheDirFlag).Build() } params[dbfactory.GitCacheDirParam] = v - } else { - return nil, errhand.BuildDError("error: --%s is required for git remotes", gitCacheDirFlag).Build() } if v, ok := apr.GetValue(gitRefFlag); ok { v = strings.TrimSpace(v) diff --git a/go/libraries/doltcore/dbfactory/git_remote.go b/go/libraries/doltcore/dbfactory/git_remote.go index 4c630116914..b3371cabe56 100644 --- a/go/libraries/doltcore/dbfactory/git_remote.go +++ b/go/libraries/doltcore/dbfactory/git_remote.go @@ -34,9 +34,11 @@ import ( ) const ( - GitCacheDirParam = "git_cache_dir" - GitRefParam = "git_ref" - defaultGitRef = "refs/dolt/data" + GitCacheDirParam = "git_cache_dir" + GitRefParam = "git_ref" + GitRemoteNameParam = "git_remote_name" + defaultGitRef = "refs/dolt/data" + defaultGitRemoteName = "origin" ) // GitRemoteFactory opens a Dolt database backed by a Git remote, using a local bare @@ -95,13 +97,15 @@ func (fact GitRemoteFactory) CreateDB(ctx context.Context, nbf *types.NomsBinFor return nil, nil, nil, err } + remoteName := resolveGitRemoteName(params) + // Ensure remote "origin" exists and points to the underlying git remote URL. - if err := ensureGitRemoteURL(ctx, cacheRepo, "origin", remoteURL.String()); err != nil { + if err := ensureGitRemoteURL(ctx, cacheRepo, remoteName, remoteURL.String()); err != nil { return nil, nil, nil, err } q := nbs.NewUnlimitedMemQuotaProvider() - cs, err := nbs.NewGitStore(ctx, nbf.VersionString(), cacheRepo, ref, blobstore.GitBlobstoreOptions{RemoteName: "origin"}, defaultMemTableSize, q) + cs, err := nbs.NewGitStore(ctx, nbf.VersionString(), cacheRepo, ref, blobstore.GitBlobstoreOptions{RemoteName: remoteName}, defaultMemTableSize, q) if err != nil { return nil, nil, nil, err } @@ -153,6 +157,20 @@ func resolveGitRemoteRef(urlObj *url.URL, params map[string]interface{}) string return defaultGitRef } +func resolveGitRemoteName(params map[string]interface{}) string { + if params != nil { + if v, ok := params[GitRemoteNameParam]; ok && v != nil { + s, ok := v.(string) + if ok { + if s = strings.TrimSpace(s); s != "" { + return s + } + } + } + } + return defaultGitRemoteName +} + func resolveGitCacheBase(params map[string]interface{}) (string, error) { if params != nil { if v, ok := params[GitCacheDirParam]; ok && v != nil { @@ -166,7 +184,11 @@ func resolveGitCacheBase(params map[string]interface{}) (string, error) { return s, nil } } - return "", fmt.Errorf("%s is required for git remotes", GitCacheDirParam) + base, err := os.UserCacheDir() + if err != nil { + return "", err + } + return filepath.Join(base, "dolt", "git-remote-cache"), nil } func cacheRepoPath(cacheBase, remoteURL, ref string) (string, error) { diff --git a/go/libraries/doltcore/env/remotes.go b/go/libraries/doltcore/env/remotes.go index bd2b52d12a0..7e819fa9f6c 100644 --- a/go/libraries/doltcore/env/remotes.go +++ b/go/libraries/doltcore/env/remotes.go @@ -104,6 +104,9 @@ func (r *Remote) GetRemoteDB(ctx context.Context, nbf *types.NomsBinFormat, dial } params[dbfactory.GRPCDialProviderParam] = dialer + if u, err := earl.Parse(r.Url); err == nil && u != nil && strings.HasPrefix(strings.ToLower(u.Scheme), "git+") { + params[dbfactory.GitRemoteNameParam] = r.Name + } return doltdb.LoadDoltDBWithParams(ctx, nbf, r.Url, filesys2.LocalFS, params) } @@ -117,6 +120,9 @@ func (r *Remote) Prepare(ctx context.Context, nbf *types.NomsBinFormat, dialer d } params[dbfactory.GRPCDialProviderParam] = dialer + if u, err := earl.Parse(r.Url); err == nil && u != nil && strings.HasPrefix(strings.ToLower(u.Scheme), "git+") { + params[dbfactory.GitRemoteNameParam] = r.Name + } return dbfactory.PrepareDB(ctx, nbf, r.Url, params) } @@ -128,6 +134,9 @@ func (r *Remote) GetRemoteDBWithoutCaching(ctx context.Context, nbf *types.NomsB } params[dbfactory.NoCachingParameter] = "true" params[dbfactory.GRPCDialProviderParam] = dialer + if u, err := earl.Parse(r.Url); err == nil && u != nil && strings.HasPrefix(strings.ToLower(u.Scheme), "git+") { + params[dbfactory.GitRemoteNameParam] = r.Name + } return doltdb.LoadDoltDBWithParams(ctx, nbf, r.Url, filesys2.LocalFS, params) } diff --git a/go/libraries/doltcore/sqle/dprocedures/dolt_backup.go b/go/libraries/doltcore/sqle/dprocedures/dolt_backup.go index 21f1f2435a6..240327cb576 100644 --- a/go/libraries/doltcore/sqle/dprocedures/dolt_backup.go +++ b/go/libraries/doltcore/sqle/dprocedures/dolt_backup.go @@ -323,8 +323,6 @@ func newParams(apr *argparser.ArgParseResults, url string, urlScheme string) (ma return nil, fmt.Errorf("error: --git-cache-dir cannot be empty") } params[dbfactory.GitCacheDirParam] = dir - } else { - return nil, fmt.Errorf("error: --git-cache-dir is required for git remotes") } if ref, ok := apr.GetValue("ref"); ok { ref = strings.TrimSpace(ref) diff --git a/go/libraries/doltcore/sqle/dprocedures/dolt_clone.go b/go/libraries/doltcore/sqle/dprocedures/dolt_clone.go index 7cf08f65201..88d953fddd3 100644 --- a/go/libraries/doltcore/sqle/dprocedures/dolt_clone.go +++ b/go/libraries/doltcore/sqle/dprocedures/dolt_clone.go @@ -64,8 +64,6 @@ func doltClone(ctx *sql.Context, args ...string) (sql.RowIter, error) { return nil, errhand.BuildDError("error: --git-cache-dir cannot be empty").Build() } remoteParms[dbfactory.GitCacheDirParam] = dir - } else { - return nil, errhand.BuildDError("error: --git-cache-dir is required for git remotes").Build() } } else { if _, ok := apr.GetValue("git-cache-dir"); ok { diff --git a/go/libraries/doltcore/sqle/dprocedures/dolt_remote.go b/go/libraries/doltcore/sqle/dprocedures/dolt_remote.go index 4312e35f2e4..1e2202d8623 100644 --- a/go/libraries/doltcore/sqle/dprocedures/dolt_remote.go +++ b/go/libraries/doltcore/sqle/dprocedures/dolt_remote.go @@ -112,8 +112,6 @@ func addRemote(_ *sql.Context, dbName string, dbd env.DbData[*sql.Context], apr return fmt.Errorf("error: --git-cache-dir cannot be empty") } params[dbfactory.GitCacheDirParam] = dir - } else { - return fmt.Errorf("error: --git-cache-dir is required for git remotes") } if ref, ok := apr.GetValue("ref"); ok { ref = strings.TrimSpace(ref) diff --git a/integration-tests/bats/remotes-git.bats b/integration-tests/bats/remotes-git.bats index 85fc5a99e2c..2551c57186b 100644 --- a/integration-tests/bats/remotes-git.bats +++ b/integration-tests/bats/remotes-git.bats @@ -226,7 +226,7 @@ teardown() { rm -rf "$cache1" "$cache2" } -@test "remotes-git: --git-cache-dir is required and creates cache base content" { +@test "remotes-git: default git cache dir works when --git-cache-dir omitted" { if ! command -v git >/dev/null 2>&1; then skip "git not installed" fi @@ -239,22 +239,7 @@ teardown() { dolt init dolt commit --allow-empty -m "init" - run dolt remote add origin ../remote.git - [ "$status" -ne 0 ] - [[ "$output" =~ "git-cache-dir" ]] || false - [[ "$output" =~ "required" ]] || false - - cache1=$(mktemp -d) - dolt remote add --git-cache-dir "$cache1" origin ../remote.git + dolt remote add origin ../remote.git run dolt push --set-upstream origin main [ "$status" -eq 0 ] - - run ls -A "$cache1" - [ "$status" -eq 0 ] - [ "$output" != "" ] - - run bash -c 'ls -d "$1"/*/repo.git >/dev/null 2>&1' _ "$cache1" - [ "$status" -eq 0 ] - - rm -rf "$cache1" } From 6432b224db5c9ffd62bed63fbe7d7e31845645f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?coffeegoddd=E2=98=95=EF=B8=8F=E2=9C=A8?= Date: Tue, 10 Feb 2026 20:19:32 -0800 Subject: [PATCH 08/20] /go/libraries/doltcore/dbfactory: cleanup --- go/libraries/doltcore/dbfactory/git_remote.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/go/libraries/doltcore/dbfactory/git_remote.go b/go/libraries/doltcore/dbfactory/git_remote.go index b3371cabe56..565ee188287 100644 --- a/go/libraries/doltcore/dbfactory/git_remote.go +++ b/go/libraries/doltcore/dbfactory/git_remote.go @@ -99,7 +99,7 @@ func (fact GitRemoteFactory) CreateDB(ctx context.Context, nbf *types.NomsBinFor remoteName := resolveGitRemoteName(params) - // Ensure remote "origin" exists and points to the underlying git remote URL. + // Ensure the configured git remote exists and points to the underlying git remote URL. if err := ensureGitRemoteURL(ctx, cacheRepo, remoteName, remoteURL.String()); err != nil { return nil, nil, nil, err } @@ -133,7 +133,7 @@ func parseGitRemoteFactoryURL(urlObj *url.URL, params map[string]interface{}) (r return nil, "", fmt.Errorf("git remote ref must be provided via git remote param %q (not URL query ref=)", GitRefParam) } - ref = resolveGitRemoteRef(urlObj, params) + ref = resolveGitRemoteRef(params) cp := *urlObj cp.Scheme = underlyingScheme @@ -142,8 +142,8 @@ func parseGitRemoteFactoryURL(urlObj *url.URL, params map[string]interface{}) (r return &cp, ref, nil } -func resolveGitRemoteRef(urlObj *url.URL, params map[string]interface{}) string { - // Prefer an explicit remote parameter (e.g. from `--ref`) over any URL query. +func resolveGitRemoteRef(params map[string]interface{}) string { + // Prefer an explicit remote parameter (e.g. from `--ref`). if params != nil { if v, ok := params[GitRefParam]; ok && v != nil { s, ok := v.(string) From 815b9501976c5cd28e7cd1e1cfa4ff2993825771 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?coffeegoddd=E2=98=95=EF=B8=8F=E2=9C=A8?= Date: Tue, 10 Feb 2026 20:30:32 -0800 Subject: [PATCH 09/20] /go/{cmd, libraries}: some cleanup --- go/cmd/dolt/commands/remote.go | 37 ++++++++++++------- go/libraries/doltcore/dbfactory/git_remote.go | 4 -- .../doltcore/dbfactory/git_remote_test.go | 16 -------- go/libraries/doltcore/env/git_remote_url.go | 20 +--------- .../doltcore/env/git_remote_url_test.go | 21 ----------- 5 files changed, 25 insertions(+), 73 deletions(-) diff --git a/go/cmd/dolt/commands/remote.go b/go/cmd/dolt/commands/remote.go index 21366cdb263..47592b97563 100644 --- a/go/cmd/dolt/commands/remote.go +++ b/go/cmd/dolt/commands/remote.go @@ -215,19 +215,9 @@ func parseRemoteArgs(apr *argparser.ArgParseResults, scheme, remoteUrl string) ( case dbfactory.OSSScheme: err = cli.AddOSSParams(remoteUrl, apr, params) case dbfactory.GitFileScheme, dbfactory.GitHTTPScheme, dbfactory.GitHTTPSScheme, dbfactory.GitSSHScheme: - if v, ok := apr.GetValue(gitCacheDirFlag); ok { - v = strings.TrimSpace(v) - if v == "" { - return nil, errhand.BuildDError("error: --%s cannot be empty", gitCacheDirFlag).Build() - } - params[dbfactory.GitCacheDirParam] = v - } - if v, ok := apr.GetValue(gitRefFlag); ok { - v = strings.TrimSpace(v) - if v == "" { - return nil, errhand.BuildDError("error: --%s cannot be empty", gitRefFlag).Build() - } - params[dbfactory.GitRefParam] = v + verr := addGitRemoteParams(apr, params) + if verr != nil { + return nil, verr } default: err = cli.VerifyNoAwsParams(apr) @@ -240,6 +230,9 @@ func parseRemoteArgs(apr *argparser.ArgParseResults, scheme, remoteUrl string) ( switch scheme { case dbfactory.GitFileScheme, dbfactory.GitHTTPScheme, dbfactory.GitHTTPSScheme, dbfactory.GitSSHScheme: default: + if _, ok := apr.GetValue(gitCacheDirFlag); ok { + return nil, errhand.BuildDError("error: --%s is only supported for git remotes", gitCacheDirFlag).Build() + } if _, ok := apr.GetValue(gitRefFlag); ok { return nil, errhand.BuildDError("error: --%s is only supported for git remotes", gitRefFlag).Build() } @@ -248,6 +241,24 @@ func parseRemoteArgs(apr *argparser.ArgParseResults, scheme, remoteUrl string) ( return params, nil } +func addGitRemoteParams(apr *argparser.ArgParseResults, params map[string]string) errhand.VerboseError { + if v, ok := apr.GetValue(gitCacheDirFlag); ok { + v = strings.TrimSpace(v) + if v == "" { + return errhand.BuildDError("error: --%s cannot be empty", gitCacheDirFlag).Build() + } + params[dbfactory.GitCacheDirParam] = v + } + if v, ok := apr.GetValue(gitRefFlag); ok { + v = strings.TrimSpace(v) + if v == "" { + return errhand.BuildDError("error: --%s cannot be empty", gitRefFlag).Build() + } + params[dbfactory.GitRefParam] = v + } + return nil +} + // callSQLRemoteAdd calls the SQL function `call `dolt_remote('add', remoteName, remoteUrl)` func callSQLRemoteAdd(sqlCtx *sql.Context, queryist cli.Queryist, remoteName, remoteUrl string) error { qry, err := dbr.InterpolateForDialect("call dolt_remote('add', ?, ?)", []interface{}{remoteName, remoteUrl}, dialect.MySQL) diff --git a/go/libraries/doltcore/dbfactory/git_remote.go b/go/libraries/doltcore/dbfactory/git_remote.go index 565ee188287..7e6bf6f0aca 100644 --- a/go/libraries/doltcore/dbfactory/git_remote.go +++ b/go/libraries/doltcore/dbfactory/git_remote.go @@ -128,10 +128,6 @@ func parseGitRemoteFactoryURL(urlObj *url.URL, params map[string]interface{}) (r if underlyingScheme == "" { return nil, "", fmt.Errorf("invalid git+ scheme %q", urlObj.Scheme) } - // Ref selection is configured via dolt remote parameters (e.g. `--ref`), not via URL query. - if qref := strings.TrimSpace(urlObj.Query().Get("ref")); qref != "" { - return nil, "", fmt.Errorf("git remote ref must be provided via git remote param %q (not URL query ref=)", GitRefParam) - } ref = resolveGitRemoteRef(params) diff --git a/go/libraries/doltcore/dbfactory/git_remote_test.go b/go/libraries/doltcore/dbfactory/git_remote_test.go index a0ba277a11c..44b7163cd2f 100644 --- a/go/libraries/doltcore/dbfactory/git_remote_test.go +++ b/go/libraries/doltcore/dbfactory/git_remote_test.go @@ -89,22 +89,6 @@ func TestGitRemoteFactory_GitFile_UsesConfiguredCacheDirAndCanWrite(t *testing.T require.NoError(t, err, "git rev-parse failed: %s", strings.TrimSpace(string(out))) } -func TestGitRemoteFactory_RejectsRefQueryParam(t *testing.T) { - if _, err := exec.LookPath("git"); err != nil { - t.Skip("git not found on PATH") - } - - ctx := context.Background() - remoteRepo, err := gitrepo.InitBare(ctx, t.TempDir()+"/remote.git") - require.NoError(t, err) - - remotePath := filepath.ToSlash(remoteRepo.GitDir) - urlStr := "git+file://" + remotePath + "?ref=refs/dolt/data" - - _, _, _, err = CreateDB(ctx, types.Format_Default, urlStr, nil) - require.Error(t, err) -} - func TestGitRemoteFactory_TwoClientsDistinctCacheDirsRoundtrip(t *testing.T) { if _, err := exec.LookPath("git"); err != nil { t.Skip("git not found on PATH") diff --git a/go/libraries/doltcore/env/git_remote_url.go b/go/libraries/doltcore/env/git_remote_url.go index 1e7a3974c13..b18ed62fda1 100644 --- a/go/libraries/doltcore/env/git_remote_url.go +++ b/go/libraries/doltcore/env/git_remote_url.go @@ -62,9 +62,6 @@ func NormalizeGitRemoteUrl(urlArg string) (normalized string, ok bool, err error if _, ok := supportedGitPlusSchemes[strings.ToLower(u.Scheme)]; !ok { return "", false, fmt.Errorf("unsupported git dbfactory scheme %q", u.Scheme) } - if err := rejectGitRefQuery(u); err != nil { - return "", false, err - } return u.String(), true, nil } @@ -95,9 +92,6 @@ func NormalizeGitRemoteUrl(urlArg string) (normalized string, ok bool, err error if _, ok := supportedUnderlyingGitSchemes[s]; !ok { return "", false, nil } - if err := rejectGitRefQuery(u); err != nil { - return "", false, err - } u.Scheme = "git+" + s return u.String(), true, nil } @@ -121,9 +115,6 @@ func NormalizeGitRemoteUrl(urlArg string) (normalized string, ok bool, err error if err != nil { return "", false, err } - if err := rejectGitRefQuery(u); err != nil { - return "", false, err - } return u.String(), true, nil } @@ -178,13 +169,4 @@ func splitScpLike(s string) (host string, path string) { return s[:i], s[i+1:] } -func rejectGitRefQuery(u *url.URL) error { - // Ref selection is configured via dolt remote parameters (e.g. `--ref`), not via URL query. - if u == nil { - return nil - } - if qref := strings.TrimSpace(u.Query().Get("ref")); qref != "" { - return fmt.Errorf("git remote ref must be provided via --ref (not URL query ref=)") - } - return nil -} +// NOTE: we intentionally do not reject URL query parameters (including `ref=`) here. diff --git a/go/libraries/doltcore/env/git_remote_url_test.go b/go/libraries/doltcore/env/git_remote_url_test.go index a4a8093f4bd..122451291ab 100644 --- a/go/libraries/doltcore/env/git_remote_url_test.go +++ b/go/libraries/doltcore/env/git_remote_url_test.go @@ -36,27 +36,6 @@ func TestNormalizeGitRemoteUrl(t *testing.T) { require.Equal(t, "git+https://example.com/org/repo.git", got) }) - t.Run("explicit git+https with ref query is rejected", func(t *testing.T) { - got, ok, err := NormalizeGitRemoteUrl("git+https://example.com/org/repo.git?ref=refs/dolt/data") - require.Error(t, err) - require.False(t, ok) - require.Empty(t, got) - }) - - t.Run("https .git with ref query is rejected", func(t *testing.T) { - got, ok, err := NormalizeGitRemoteUrl("https://example.com/org/repo.git?ref=refs/dolt/data") - require.Error(t, err) - require.False(t, ok) - require.Empty(t, got) - }) - - t.Run("schemeless host/path with ref query is rejected", func(t *testing.T) { - got, ok, err := NormalizeGitRemoteUrl("github.com/org/repo.git?ref=refs/dolt/data") - require.Error(t, err) - require.False(t, ok) - require.Empty(t, got) - }) - t.Run("https .git becomes git+https", func(t *testing.T) { got, ok, err := NormalizeGitRemoteUrl("https://example.com/org/repo.git") require.NoError(t, err) From b909f50f94702cda764a5ca2c822816ff95e93cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?coffeegoddd=E2=98=95=EF=B8=8F=E2=9C=A8?= Date: Tue, 10 Feb 2026 20:41:00 -0800 Subject: [PATCH 10/20] /go/libraries/doltcore/sqle/dprocedures: refactor --- .../doltcore/sqle/dprocedures/dolt_clone.go | 26 +++++++++------- .../doltcore/sqle/dprocedures/dolt_remote.go | 31 ++++++++++++------- 2 files changed, 33 insertions(+), 24 deletions(-) diff --git a/go/libraries/doltcore/sqle/dprocedures/dolt_clone.go b/go/libraries/doltcore/sqle/dprocedures/dolt_clone.go index 88d953fddd3..70e4bd6020e 100644 --- a/go/libraries/doltcore/sqle/dprocedures/dolt_clone.go +++ b/go/libraries/doltcore/sqle/dprocedures/dolt_clone.go @@ -57,25 +57,27 @@ func doltClone(ctx *sql.Context, args ...string) (sql.RowIter, error) { if user, hasUser := apr.GetValue(cli.UserFlag); hasUser { remoteParms[dbfactory.GRPCUsernameAuthParam] = user } - if scheme == dbfactory.GitFileScheme || scheme == dbfactory.GitHTTPScheme || scheme == dbfactory.GitHTTPSScheme || scheme == dbfactory.GitSSHScheme { - if dir, ok := apr.GetValue("git-cache-dir"); ok { - dir = strings.TrimSpace(dir) - if dir == "" { - return nil, errhand.BuildDError("error: --git-cache-dir cannot be empty").Build() - } - remoteParms[dbfactory.GitCacheDirParam] = dir + + isGitRemote := scheme == dbfactory.GitFileScheme || scheme == dbfactory.GitHTTPScheme || scheme == dbfactory.GitHTTPSScheme || scheme == dbfactory.GitSSHScheme + + if dir, ok := apr.GetValue("git-cache-dir"); ok { + dir = strings.TrimSpace(dir) + if dir == "" { + return nil, errhand.BuildDError("error: --git-cache-dir cannot be empty").Build() } - } else { - if _, ok := apr.GetValue("git-cache-dir"); ok { + if !isGitRemote { return nil, errhand.BuildDError("error: --git-cache-dir is only supported for git remotes").Build() } + remoteParms[dbfactory.GitCacheDirParam] = dir } + if ref, ok := apr.GetValue("ref"); ok { ref = strings.TrimSpace(ref) - if ref != "" && (scheme == dbfactory.GitFileScheme || scheme == dbfactory.GitHTTPScheme || scheme == dbfactory.GitHTTPSScheme || scheme == dbfactory.GitSSHScheme) { + if ref != "" { + if !isGitRemote { + return nil, errhand.BuildDError("error: --ref is only supported for git remotes").Build() + } remoteParms[dbfactory.GitRefParam] = ref - } else if ref != "" { - return nil, errhand.BuildDError("error: --ref is only supported for git remotes").Build() } } diff --git a/go/libraries/doltcore/sqle/dprocedures/dolt_remote.go b/go/libraries/doltcore/sqle/dprocedures/dolt_remote.go index 1e2202d8623..764553d09fc 100644 --- a/go/libraries/doltcore/sqle/dprocedures/dolt_remote.go +++ b/go/libraries/doltcore/sqle/dprocedures/dolt_remote.go @@ -104,20 +104,27 @@ func addRemote(_ *sql.Context, dbName string, dbd env.DbData[*sql.Context], apr } params := map[string]string{} - switch scheme { - case dbfactory.GitFileScheme, dbfactory.GitHTTPScheme, dbfactory.GitHTTPSScheme, dbfactory.GitSSHScheme: - if dir, ok := apr.GetValue("git-cache-dir"); ok { - dir = strings.TrimSpace(dir) - if dir == "" { - return fmt.Errorf("error: --git-cache-dir cannot be empty") - } - params[dbfactory.GitCacheDirParam] = dir + + isGitRemote := scheme == dbfactory.GitFileScheme || scheme == dbfactory.GitHTTPScheme || scheme == dbfactory.GitHTTPSScheme || scheme == dbfactory.GitSSHScheme + + if dir, ok := apr.GetValue("git-cache-dir"); ok { + dir = strings.TrimSpace(dir) + if dir == "" { + return fmt.Errorf("error: --git-cache-dir cannot be empty") } - if ref, ok := apr.GetValue("ref"); ok { - ref = strings.TrimSpace(ref) - if ref != "" { - params[dbfactory.GitRefParam] = ref + if !isGitRemote { + return fmt.Errorf("error: --git-cache-dir is only supported for git remotes") + } + params[dbfactory.GitCacheDirParam] = dir + } + + if ref, ok := apr.GetValue("ref"); ok { + ref = strings.TrimSpace(ref) + if ref != "" { + if !isGitRemote { + return fmt.Errorf("error: --ref is only supported for git remotes") } + params[dbfactory.GitRefParam] = ref } } From 07cad4bea2ede78757727721926ce740d703e933 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?coffeegoddd=E2=98=95=EF=B8=8F=E2=9C=A8?= Date: Tue, 10 Feb 2026 20:55:57 -0800 Subject: [PATCH 11/20] /go/libraries/doltcore/dbfactory: fix windows --- .../doltcore/dbfactory/git_remote_test.go | 26 +++++++++++++++---- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/go/libraries/doltcore/dbfactory/git_remote_test.go b/go/libraries/doltcore/dbfactory/git_remote_test.go index 44b7163cd2f..8b9645635de 100644 --- a/go/libraries/doltcore/dbfactory/git_remote_test.go +++ b/go/libraries/doltcore/dbfactory/git_remote_test.go @@ -21,6 +21,7 @@ import ( "os" "os/exec" "path/filepath" + "runtime" "strings" "testing" @@ -33,16 +34,31 @@ import ( "github.com/dolthub/dolt/go/store/types" ) +// t.TempDir() includes the test name on disk, which can create very long paths on Windows. +// These tests create deep `refs/...` paths inside bare git repos and can hit MAX_PATH without +// long path support enabled. Use a short temp prefix on Windows to keep paths under the limit. +func shortTempDir(t *testing.T) string { + t.Helper() + if runtime.GOOS != "windows" { + return t.TempDir() + } + + dir, err := os.MkdirTemp("", "dolt") + require.NoError(t, err) + t.Cleanup(func() { _ = os.RemoveAll(dir) }) + return dir +} + func TestGitRemoteFactory_GitFile_UsesConfiguredCacheDirAndCanWrite(t *testing.T) { if _, err := exec.LookPath("git"); err != nil { t.Skip("git not found on PATH") } ctx := context.Background() - remoteRepo, err := gitrepo.InitBare(ctx, t.TempDir()+"/remote.git") + remoteRepo, err := gitrepo.InitBare(ctx, filepath.Join(shortTempDir(t), "remote.git")) require.NoError(t, err) - cacheDir := t.TempDir() + cacheDir := shortTempDir(t) remotePath := filepath.ToSlash(remoteRepo.GitDir) remoteURL := "file://" + remotePath @@ -95,7 +111,7 @@ func TestGitRemoteFactory_TwoClientsDistinctCacheDirsRoundtrip(t *testing.T) { } ctx := context.Background() - remoteRepo, err := gitrepo.InitBare(ctx, t.TempDir()+"/remote.git") + remoteRepo, err := gitrepo.InitBare(ctx, filepath.Join(shortTempDir(t), "remote.git")) require.NoError(t, err) remotePath := filepath.ToSlash(remoteRepo.GitDir) @@ -119,8 +135,8 @@ func TestGitRemoteFactory_TwoClientsDistinctCacheDirsRoundtrip(t *testing.T) { return d, vs.ChunkStore() } - cacheA := t.TempDir() - cacheB := t.TempDir() + cacheA := shortTempDir(t) + cacheB := shortTempDir(t) // Client A writes a root pointing at chunk A. dbA, csA := open(cacheA) From 56684d48c2ed87f4d51ad5a4b364193e461218b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?coffeegoddd=E2=98=95=EF=B8=8F=E2=9C=A8?= Date: Tue, 10 Feb 2026 21:09:13 -0800 Subject: [PATCH 12/20] /go/libraries/doltcore/env: fix remote url --- go/libraries/doltcore/env/git_remote_url.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/go/libraries/doltcore/env/git_remote_url.go b/go/libraries/doltcore/env/git_remote_url.go index b18ed62fda1..0cb588855e0 100644 --- a/go/libraries/doltcore/env/git_remote_url.go +++ b/go/libraries/doltcore/env/git_remote_url.go @@ -130,7 +130,11 @@ func stripQueryAndFragment(s string) string { } func looksLikeLocalPath(s string) bool { - return strings.HasPrefix(s, "/") || strings.HasPrefix(s, "./") || strings.HasPrefix(s, "../") + // Treat absolute filesystem paths as local paths, including Windows drive-letter and UNC paths. + if filepath.IsAbs(s) { + return true + } + return strings.HasPrefix(s, "./") || strings.HasPrefix(s, "../") } func isScpLikeGitRemote(s string) bool { From 554ef837056553e2f483a17c645fa35bf0fec66c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?coffeegoddd=E2=98=95=EF=B8=8F=E2=9C=A8?= Date: Tue, 10 Feb 2026 21:16:55 -0800 Subject: [PATCH 13/20] /go/libraries/doltcore: pr feedback --- go/libraries/doltcore/dbfactory/git_remote.go | 7 ++++--- .../doltcore/sqle/dprocedures/dolt_backup.go | 16 ++++++++++++++-- .../doltcore/sqle/dprocedures/dolt_clone.go | 11 ++++++----- .../doltcore/sqle/dprocedures/dolt_remote.go | 11 ++++++----- 4 files changed, 30 insertions(+), 15 deletions(-) diff --git a/go/libraries/doltcore/dbfactory/git_remote.go b/go/libraries/doltcore/dbfactory/git_remote.go index 7e6bf6f0aca..f9f195a138f 100644 --- a/go/libraries/doltcore/dbfactory/git_remote.go +++ b/go/libraries/doltcore/dbfactory/git_remote.go @@ -221,16 +221,17 @@ func ensureGitRemoteURL(ctx context.Context, gitDir string, remoteName string, r if strings.TrimSpace(remoteURL) == "" { return fmt.Errorf("empty remote url") } - got, err := runGitInDir(ctx, gitDir, "remote", "get-url", remoteName) + // Insert `--` so remoteName can't be interpreted as a flag. + got, err := runGitInDir(ctx, gitDir, "remote", "get-url", "--", remoteName) if err != nil { // Remote likely doesn't exist; attempt to add. - return runGitInDirNoOutput(ctx, gitDir, "remote", "add", remoteName, remoteURL) + return runGitInDirNoOutput(ctx, gitDir, "remote", "add", "--", remoteName, remoteURL) } got = strings.TrimSpace(got) if got == remoteURL { return nil } - return runGitInDirNoOutput(ctx, gitDir, "remote", "set-url", remoteName, remoteURL) + return runGitInDirNoOutput(ctx, gitDir, "remote", "set-url", "--", remoteName, remoteURL) } func runGitInitBare(ctx context.Context, dir string) error { diff --git a/go/libraries/doltcore/sqle/dprocedures/dolt_backup.go b/go/libraries/doltcore/sqle/dprocedures/dolt_backup.go index 240327cb576..fa8fe92f003 100644 --- a/go/libraries/doltcore/sqle/dprocedures/dolt_backup.go +++ b/go/libraries/doltcore/sqle/dprocedures/dolt_backup.go @@ -307,6 +307,17 @@ func syncRemote(ctx *sql.Context, dbData env.DbData[*sql.Context], dsess *dsess. // not AWS, it verifies that no AWS parameters are present in |apr|. func newParams(apr *argparser.ArgParseResults, url string, urlScheme string) (map[string]string, error) { params := map[string]string{} + + isGitRemote := urlScheme == dbfactory.GitFileScheme || urlScheme == dbfactory.GitHTTPScheme || urlScheme == dbfactory.GitHTTPSScheme || urlScheme == dbfactory.GitSSHScheme + if !isGitRemote { + if _, ok := apr.GetValue("git-cache-dir"); ok { + return nil, fmt.Errorf("error: --git-cache-dir is only supported for git remotes") + } + if _, ok := apr.GetValue("ref"); ok { + return nil, fmt.Errorf("error: --ref is only supported for git remotes") + } + } + var err error switch urlScheme { case dbfactory.AWSScheme: @@ -326,9 +337,10 @@ func newParams(apr *argparser.ArgParseResults, url string, urlScheme string) (ma } if ref, ok := apr.GetValue("ref"); ok { ref = strings.TrimSpace(ref) - if ref != "" { - params[dbfactory.GitRefParam] = ref + if ref == "" { + return nil, fmt.Errorf("error: --ref cannot be empty") } + params[dbfactory.GitRefParam] = ref } default: err = cli.VerifyNoAwsParams(apr) diff --git a/go/libraries/doltcore/sqle/dprocedures/dolt_clone.go b/go/libraries/doltcore/sqle/dprocedures/dolt_clone.go index 70e4bd6020e..d21c1ad1ddf 100644 --- a/go/libraries/doltcore/sqle/dprocedures/dolt_clone.go +++ b/go/libraries/doltcore/sqle/dprocedures/dolt_clone.go @@ -73,12 +73,13 @@ func doltClone(ctx *sql.Context, args ...string) (sql.RowIter, error) { if ref, ok := apr.GetValue("ref"); ok { ref = strings.TrimSpace(ref) - if ref != "" { - if !isGitRemote { - return nil, errhand.BuildDError("error: --ref is only supported for git remotes").Build() - } - remoteParms[dbfactory.GitRefParam] = ref + if ref == "" { + return nil, errhand.BuildDError("error: --ref cannot be empty").Build() } + if !isGitRemote { + return nil, errhand.BuildDError("error: --ref is only supported for git remotes").Build() + } + remoteParms[dbfactory.GitRefParam] = ref } depth, ok := apr.GetInt(cli.DepthFlag) diff --git a/go/libraries/doltcore/sqle/dprocedures/dolt_remote.go b/go/libraries/doltcore/sqle/dprocedures/dolt_remote.go index 764553d09fc..bc4a5a47f92 100644 --- a/go/libraries/doltcore/sqle/dprocedures/dolt_remote.go +++ b/go/libraries/doltcore/sqle/dprocedures/dolt_remote.go @@ -120,12 +120,13 @@ func addRemote(_ *sql.Context, dbName string, dbd env.DbData[*sql.Context], apr if ref, ok := apr.GetValue("ref"); ok { ref = strings.TrimSpace(ref) - if ref != "" { - if !isGitRemote { - return fmt.Errorf("error: --ref is only supported for git remotes") - } - params[dbfactory.GitRefParam] = ref + if ref == "" { + return fmt.Errorf("error: --ref cannot be empty") + } + if !isGitRemote { + return fmt.Errorf("error: --ref is only supported for git remotes") } + params[dbfactory.GitRefParam] = ref } r := env.NewRemote(remoteName, absRemoteUrl, params) From 36b1853a8e48f4727a0bfe0f434b05e4bb63d21d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?coffeegoddd=E2=98=95=EF=B8=8F=E2=9C=A8?= Date: Wed, 11 Feb 2026 10:11:05 -0800 Subject: [PATCH 14/20] /go/{cmd,libraries}: remote git dir arg --- go/cmd/dolt/cli/arg_parser_helpers.go | 3 -- go/cmd/dolt/commands/read_tables.go | 1 - go/cmd/dolt/commands/read_tables_test.go | 2 - go/cmd/dolt/commands/remote.go | 11 ----- go/cmd/dolt/commands/remote_test.go | 12 +----- go/libraries/doltcore/dbfactory/git_remote.go | 17 +------- .../doltcore/dbfactory/git_remote_test.go | 34 ++++++--------- .../doltcore/sqle/dprocedures/dolt_backup.go | 10 ----- .../doltcore/sqle/dprocedures/dolt_clone.go | 11 ----- .../doltcore/sqle/dprocedures/dolt_remote.go | 11 ----- integration-tests/bats/remotes-git.bats | 43 ++++++------------- integration-tests/bats/sql-remotes-git.bats | 25 ++++------- 12 files changed, 39 insertions(+), 141 deletions(-) diff --git a/go/cmd/dolt/cli/arg_parser_helpers.go b/go/cmd/dolt/cli/arg_parser_helpers.go index 8c6e8e89d2d..fc071e44946 100644 --- a/go/cmd/dolt/cli/arg_parser_helpers.go +++ b/go/cmd/dolt/cli/arg_parser_helpers.go @@ -146,7 +146,6 @@ func CreateCloneArgParser() *argparser.ArgParser { ap.SupportsString(RemoteParam, "", "name", "Name of the remote to be added to the cloned database. The default is 'origin'.") ap.SupportsString(BranchParam, "b", "branch", "The branch to be cloned. If not specified all branches will be cloned.") ap.SupportsString(DepthFlag, "", "depth", "Clone a single branch and limit history to the given commit depth.") - ap.SupportsString("git-cache-dir", "", "dir", "Local directory to use for git remote cache (sets remote param git_cache_dir).") ap.SupportsString("ref", "", "ref", "Git ref to use as the Dolt data ref for git remotes (sets remote param git_ref).") ap.SupportsString(dbfactory.AWSRegionParam, "", "region", "") ap.SupportsValidatedString(dbfactory.AWSCredsTypeParam, "", "creds-type", "", argparser.ValidatorFromStrList(dbfactory.AWSCredsTypeParam, dbfactory.AWSCredTypes)) @@ -168,7 +167,6 @@ func CreateResetArgParser() *argparser.ArgParser { func CreateRemoteArgParser() *argparser.ArgParser { ap := argparser.NewArgParserWithVariableArgs("remote") - ap.SupportsString("git-cache-dir", "", "dir", "Local directory to use for git remote cache (sets remote param git_cache_dir).") ap.SupportsString("ref", "", "ref", "Git ref to use as the Dolt data ref for git remotes (sets remote param git_ref).") return ap } @@ -270,7 +268,6 @@ func CreateBackupArgParser() *argparser.ArgParser { ap.ArgListHelp = append(ap.ArgListHelp, [2]string{"profile", "AWS profile to use."}) ap.SupportsFlag(VerboseFlag, "v", "When printing the list of backups adds additional details.") ap.SupportsFlag(ForceFlag, "f", "When restoring a backup, overwrite the contents of the existing database with the same name.") - ap.SupportsString("git-cache-dir", "", "dir", "Local directory to use for git remote cache (sets remote param git_cache_dir).") ap.SupportsString("ref", "", "ref", "Git ref to use as the Dolt data ref for git remotes (sets remote param git_ref).") ap.SupportsString(dbfactory.AWSRegionParam, "", "region", "") ap.SupportsValidatedString(dbfactory.AWSCredsTypeParam, "", "creds-type", "", argparser.ValidatorFromStrList(dbfactory.AWSCredsTypeParam, dbfactory.AWSCredTypes)) diff --git a/go/cmd/dolt/commands/read_tables.go b/go/cmd/dolt/commands/read_tables.go index e4c169a635b..4b08969a300 100644 --- a/go/cmd/dolt/commands/read_tables.go +++ b/go/cmd/dolt/commands/read_tables.go @@ -78,7 +78,6 @@ func (cmd ReadTablesCmd) ArgParser() *argparser.ArgParser { {"table", " Optional tables to retrieve. If omitted, all tables are retrieved."}, } ap.SupportsString(dirParamName, "d", "directory", "directory to create and put retrieved table data.") - ap.SupportsString(gitCacheDirFlag, "", "dir", "Local directory to use for git remote cache (sets remote param git_cache_dir).") ap.SupportsString(gitRefFlag, "", "ref", "Git ref to use as the Dolt data ref for git remotes (sets remote param git_ref).") return ap } diff --git a/go/cmd/dolt/commands/read_tables_test.go b/go/cmd/dolt/commands/read_tables_test.go index 47a1a6563a6..b9adbd41c45 100644 --- a/go/cmd/dolt/commands/read_tables_test.go +++ b/go/cmd/dolt/commands/read_tables_test.go @@ -23,11 +23,9 @@ import ( func TestReadTablesArgParser_AcceptsGitFlags(t *testing.T) { ap := ReadTablesCmd{}.ArgParser() apr, err := ap.Parse([]string{ - "--" + gitCacheDirFlag, "/tmp/cache", "--" + gitRefFlag, "refs/dolt/custom", "git+file:///tmp/remote.git", "main", }) require.NoError(t, err) - require.Equal(t, "/tmp/cache", apr.GetValueOrDefault(gitCacheDirFlag, "")) require.Equal(t, "refs/dolt/custom", apr.GetValueOrDefault(gitRefFlag, "")) } diff --git a/go/cmd/dolt/commands/remote.go b/go/cmd/dolt/commands/remote.go index 47592b97563..73f522b2e1e 100644 --- a/go/cmd/dolt/commands/remote.go +++ b/go/cmd/dolt/commands/remote.go @@ -71,7 +71,6 @@ const ( addRemoteId = "add" removeRemoteId = "remove" removeRemoteShortId = "rm" - gitCacheDirFlag = "git-cache-dir" gitRefFlag = "ref" ) @@ -230,9 +229,6 @@ func parseRemoteArgs(apr *argparser.ArgParseResults, scheme, remoteUrl string) ( switch scheme { case dbfactory.GitFileScheme, dbfactory.GitHTTPScheme, dbfactory.GitHTTPSScheme, dbfactory.GitSSHScheme: default: - if _, ok := apr.GetValue(gitCacheDirFlag); ok { - return nil, errhand.BuildDError("error: --%s is only supported for git remotes", gitCacheDirFlag).Build() - } if _, ok := apr.GetValue(gitRefFlag); ok { return nil, errhand.BuildDError("error: --%s is only supported for git remotes", gitRefFlag).Build() } @@ -242,13 +238,6 @@ func parseRemoteArgs(apr *argparser.ArgParseResults, scheme, remoteUrl string) ( } func addGitRemoteParams(apr *argparser.ArgParseResults, params map[string]string) errhand.VerboseError { - if v, ok := apr.GetValue(gitCacheDirFlag); ok { - v = strings.TrimSpace(v) - if v == "" { - return errhand.BuildDError("error: --%s cannot be empty", gitCacheDirFlag).Build() - } - params[dbfactory.GitCacheDirParam] = v - } if v, ok := apr.GetValue(gitRefFlag); ok { v = strings.TrimSpace(v) if v == "" { diff --git a/go/cmd/dolt/commands/remote_test.go b/go/cmd/dolt/commands/remote_test.go index 566a591beec..a0984251c0c 100644 --- a/go/cmd/dolt/commands/remote_test.go +++ b/go/cmd/dolt/commands/remote_test.go @@ -137,19 +137,9 @@ func TestGetAbsRemoteUrl(t *testing.T) { } } -func TestParseRemoteArgs_GitCacheDir(t *testing.T) { - ap := RemoteCmd{}.ArgParser() - apr, err := ap.Parse([]string{"add", "origin", "git+file:///tmp/remote.git", "--" + gitCacheDirFlag, "/tmp/cache"}) - assert.NoError(t, err) - - params, verr := parseRemoteArgs(apr, dbfactory.GitFileScheme, "git+file:///tmp/remote.git") - assert.Nil(t, verr) - assert.Equal(t, "/tmp/cache", params[dbfactory.GitCacheDirParam]) -} - func TestParseRemoteArgs_GitRef(t *testing.T) { ap := RemoteCmd{}.ArgParser() - apr, err := ap.Parse([]string{"add", "origin", "git+file:///tmp/remote.git", "--" + gitCacheDirFlag, "/tmp/cache", "--" + gitRefFlag, "refs/dolt/custom"}) + apr, err := ap.Parse([]string{"add", "origin", "git+file:///tmp/remote.git", "--" + gitRefFlag, "refs/dolt/custom"}) assert.NoError(t, err) params, verr := parseRemoteArgs(apr, dbfactory.GitFileScheme, "git+file:///tmp/remote.git") diff --git a/go/libraries/doltcore/dbfactory/git_remote.go b/go/libraries/doltcore/dbfactory/git_remote.go index f9f195a138f..75a45a37871 100644 --- a/go/libraries/doltcore/dbfactory/git_remote.go +++ b/go/libraries/doltcore/dbfactory/git_remote.go @@ -34,7 +34,6 @@ import ( ) const ( - GitCacheDirParam = "git_cache_dir" GitRefParam = "git_ref" GitRemoteNameParam = "git_remote_name" defaultGitRef = "refs/dolt/data" @@ -84,7 +83,7 @@ func (fact GitRemoteFactory) CreateDB(ctx context.Context, nbf *types.NomsBinFor return nil, nil, nil, err } - cacheBase, err := resolveGitCacheBase(params) + cacheBase, err := defaultGitCacheBase() if err != nil { return nil, nil, nil, err } @@ -167,19 +166,7 @@ func resolveGitRemoteName(params map[string]interface{}) string { return defaultGitRemoteName } -func resolveGitCacheBase(params map[string]interface{}) (string, error) { - if params != nil { - if v, ok := params[GitCacheDirParam]; ok && v != nil { - s, ok := v.(string) - if !ok { - return "", fmt.Errorf("%s must be a string", GitCacheDirParam) - } - if strings.TrimSpace(s) == "" { - return "", fmt.Errorf("%s cannot be empty", GitCacheDirParam) - } - return s, nil - } - } +func defaultGitCacheBase() (string, error) { base, err := os.UserCacheDir() if err != nil { return "", err diff --git a/go/libraries/doltcore/dbfactory/git_remote_test.go b/go/libraries/doltcore/dbfactory/git_remote_test.go index 8b9645635de..e0dbf93ed2f 100644 --- a/go/libraries/doltcore/dbfactory/git_remote_test.go +++ b/go/libraries/doltcore/dbfactory/git_remote_test.go @@ -49,7 +49,7 @@ func shortTempDir(t *testing.T) string { return dir } -func TestGitRemoteFactory_GitFile_UsesConfiguredCacheDirAndCanWrite(t *testing.T) { +func TestGitRemoteFactory_GitFile_UsesDefaultCacheDirAndCanWrite(t *testing.T) { if _, err := exec.LookPath("git"); err != nil { t.Skip("git not found on PATH") } @@ -58,25 +58,24 @@ func TestGitRemoteFactory_GitFile_UsesConfiguredCacheDirAndCanWrite(t *testing.T remoteRepo, err := gitrepo.InitBare(ctx, filepath.Join(shortTempDir(t), "remote.git")) require.NoError(t, err) - cacheDir := shortTempDir(t) - remotePath := filepath.ToSlash(remoteRepo.GitDir) remoteURL := "file://" + remotePath urlStr := "git+file://" + remotePath - - params := map[string]interface{}{ - GitCacheDirParam: cacheDir, - } + params := map[string]interface{}{} db, vrw, _, err := CreateDB(ctx, types.Format_Default, urlStr, params) require.NoError(t, err) require.NotNil(t, db) require.NotNil(t, vrw) - // Ensure cache repo created under configured cache dir. + // Ensure cache repo created under default cache dir. + base, err := os.UserCacheDir() + require.NoError(t, err) + cacheBase := filepath.Join(base, "dolt", "git-remote-cache") + sum := sha256.Sum256([]byte(remoteURL + "|" + "refs/dolt/data")) h := hex.EncodeToString(sum[:]) - cacheRepo := filepath.Join(cacheDir, h, "repo.git") + cacheRepo := filepath.Join(cacheBase, h, "repo.git") _, err = os.Stat(filepath.Join(cacheRepo, "HEAD")) require.NoError(t, err) @@ -121,10 +120,8 @@ func TestGitRemoteFactory_TwoClientsDistinctCacheDirsRoundtrip(t *testing.T) { return func(context.Context, hash.HashSet, chunks.PendingRefExists) error { return nil } } - open := func(cacheDir string) (db datas.Database, cs chunks.ChunkStore) { - params := map[string]interface{}{ - GitCacheDirParam: cacheDir, - } + open := func() (db datas.Database, cs chunks.ChunkStore) { + params := map[string]interface{}{} d, vrw, _, err := CreateDB(ctx, types.Format_Default, urlStr, params) require.NoError(t, err) require.NotNil(t, d) @@ -135,11 +132,8 @@ func TestGitRemoteFactory_TwoClientsDistinctCacheDirsRoundtrip(t *testing.T) { return d, vs.ChunkStore() } - cacheA := shortTempDir(t) - cacheB := shortTempDir(t) - // Client A writes a root pointing at chunk A. - dbA, csA := open(cacheA) + dbA, csA := open() cA := chunks.NewChunk([]byte("clientA\n")) require.NoError(t, csA.Put(ctx, cA, noopGetAddrs)) lastA, err := csA.Root(ctx) @@ -150,7 +144,7 @@ func TestGitRemoteFactory_TwoClientsDistinctCacheDirsRoundtrip(t *testing.T) { require.NoError(t, dbA.Close()) // Client B reads chunk A, then writes chunk B and updates the root. - dbB, csB := open(cacheB) + dbB, csB := open() require.NoError(t, csB.Rebase(ctx)) rootB, err := csB.Root(ctx) require.NoError(t, err) @@ -166,8 +160,8 @@ func TestGitRemoteFactory_TwoClientsDistinctCacheDirsRoundtrip(t *testing.T) { require.True(t, okCommitB) require.NoError(t, dbB.Close()) - // Client A re-opens (with the same cache dir as before) and should see B's update. - dbA2, csA2 := open(cacheA) + // Client A re-opens and should see B's update. + dbA2, csA2 := open() require.NoError(t, csA2.Rebase(ctx)) rootA2, err := csA2.Root(ctx) require.NoError(t, err) diff --git a/go/libraries/doltcore/sqle/dprocedures/dolt_backup.go b/go/libraries/doltcore/sqle/dprocedures/dolt_backup.go index fa8fe92f003..25fd7f7c23c 100644 --- a/go/libraries/doltcore/sqle/dprocedures/dolt_backup.go +++ b/go/libraries/doltcore/sqle/dprocedures/dolt_backup.go @@ -310,9 +310,6 @@ func newParams(apr *argparser.ArgParseResults, url string, urlScheme string) (ma isGitRemote := urlScheme == dbfactory.GitFileScheme || urlScheme == dbfactory.GitHTTPScheme || urlScheme == dbfactory.GitHTTPSScheme || urlScheme == dbfactory.GitSSHScheme if !isGitRemote { - if _, ok := apr.GetValue("git-cache-dir"); ok { - return nil, fmt.Errorf("error: --git-cache-dir is only supported for git remotes") - } if _, ok := apr.GetValue("ref"); ok { return nil, fmt.Errorf("error: --ref is only supported for git remotes") } @@ -328,13 +325,6 @@ func newParams(apr *argparser.ArgParseResults, url string, urlScheme string) (ma err = cli.AddOSSParams(url, apr, params) case dbfactory.GitFileScheme, dbfactory.GitHTTPScheme, dbfactory.GitHTTPSScheme, dbfactory.GitSSHScheme: err = cli.VerifyNoAwsParams(apr) - if dir, ok := apr.GetValue("git-cache-dir"); ok { - dir = strings.TrimSpace(dir) - if dir == "" { - return nil, fmt.Errorf("error: --git-cache-dir cannot be empty") - } - params[dbfactory.GitCacheDirParam] = dir - } if ref, ok := apr.GetValue("ref"); ok { ref = strings.TrimSpace(ref) if ref == "" { diff --git a/go/libraries/doltcore/sqle/dprocedures/dolt_clone.go b/go/libraries/doltcore/sqle/dprocedures/dolt_clone.go index d21c1ad1ddf..43f90c12a77 100644 --- a/go/libraries/doltcore/sqle/dprocedures/dolt_clone.go +++ b/go/libraries/doltcore/sqle/dprocedures/dolt_clone.go @@ -60,17 +60,6 @@ func doltClone(ctx *sql.Context, args ...string) (sql.RowIter, error) { isGitRemote := scheme == dbfactory.GitFileScheme || scheme == dbfactory.GitHTTPScheme || scheme == dbfactory.GitHTTPSScheme || scheme == dbfactory.GitSSHScheme - if dir, ok := apr.GetValue("git-cache-dir"); ok { - dir = strings.TrimSpace(dir) - if dir == "" { - return nil, errhand.BuildDError("error: --git-cache-dir cannot be empty").Build() - } - if !isGitRemote { - return nil, errhand.BuildDError("error: --git-cache-dir is only supported for git remotes").Build() - } - remoteParms[dbfactory.GitCacheDirParam] = dir - } - if ref, ok := apr.GetValue("ref"); ok { ref = strings.TrimSpace(ref) if ref == "" { diff --git a/go/libraries/doltcore/sqle/dprocedures/dolt_remote.go b/go/libraries/doltcore/sqle/dprocedures/dolt_remote.go index bc4a5a47f92..8e1eed0097f 100644 --- a/go/libraries/doltcore/sqle/dprocedures/dolt_remote.go +++ b/go/libraries/doltcore/sqle/dprocedures/dolt_remote.go @@ -107,17 +107,6 @@ func addRemote(_ *sql.Context, dbName string, dbd env.DbData[*sql.Context], apr isGitRemote := scheme == dbfactory.GitFileScheme || scheme == dbfactory.GitHTTPScheme || scheme == dbfactory.GitHTTPSScheme || scheme == dbfactory.GitSSHScheme - if dir, ok := apr.GetValue("git-cache-dir"); ok { - dir = strings.TrimSpace(dir) - if dir == "" { - return fmt.Errorf("error: --git-cache-dir cannot be empty") - } - if !isGitRemote { - return fmt.Errorf("error: --git-cache-dir is only supported for git remotes") - } - params[dbfactory.GitCacheDirParam] = dir - } - if ref, ok := apr.GetValue("ref"); ok { ref = strings.TrimSpace(ref) if ref == "" { diff --git a/integration-tests/bats/remotes-git.bats b/integration-tests/bats/remotes-git.bats index 2551c57186b..af38f9e4196 100644 --- a/integration-tests/bats/remotes-git.bats +++ b/integration-tests/bats/remotes-git.bats @@ -8,11 +8,15 @@ setup() { cd $BATS_TMPDIR cd dolt-repo-$$ mkdir "dolt-repo-clones" + + # Keep auto-selected git cache dir inside this test's sandbox. + export XDG_CACHE_HOME="$(mktemp -d)" } teardown() { assert_feature_version teardown_common + rm -rf "$XDG_CACHE_HOME" } @test "remotes-git: smoke push/clone/push-back/pull" { @@ -23,9 +27,6 @@ teardown() { mkdir remote.git git init --bare remote.git - cache1=$(mktemp -d) - cache2=$(mktemp -d) - mkdir repo1 cd repo1 dolt init @@ -33,14 +34,14 @@ teardown() { dolt add . dolt commit -m "create table" - dolt remote add --git-cache-dir "$cache1" origin ../remote.git + dolt remote add origin ../remote.git run dolt push --set-upstream origin main [ "$status" -eq 0 ] [[ ! "$output" =~ "panic:" ]] || false cd .. cd dolt-repo-clones - run dolt clone --git-cache-dir "$cache2" ../remote.git repo2 + run dolt clone ../remote.git repo2 [ "$status" -eq 0 ] cd repo2 @@ -58,7 +59,6 @@ teardown() { [ "$status" -eq 0 ] [[ "$output" =~ "10" ]] || false - rm -rf "$cache1" "$cache2" } @test "remotes-git: empty remote bootstrap creates refs/dolt/data" { @@ -73,8 +73,6 @@ teardown() { run git --git-dir remote.git show-ref refs/dolt/data [ "$status" -eq 1 ] - cache1=$(mktemp -d) - mkdir repo1 cd repo1 dolt init @@ -82,14 +80,13 @@ teardown() { dolt add . dolt commit -m "create table" - dolt remote add --git-cache-dir "$cache1" origin ../remote.git + dolt remote add origin ../remote.git run dolt push --set-upstream origin main [ "$status" -eq 0 ] run git --git-dir ../remote.git show-ref refs/dolt/data [ "$status" -eq 0 ] - rm -rf "$cache1" } @test "remotes-git: pull also fetches branches from git remote" { @@ -100,18 +97,15 @@ teardown() { mkdir remote.git git init --bare remote.git - cache1=$(mktemp -d) - cache2=$(mktemp -d) - mkdir repo1 cd repo1 dolt init - dolt remote add --git-cache-dir "$cache1" origin ../remote.git + dolt remote add origin ../remote.git dolt push origin main cd .. cd dolt-repo-clones - run dolt clone --git-cache-dir "$cache2" ../remote.git repo2 + run dolt clone ../remote.git repo2 [ "$status" -eq 0 ] cd repo2 @@ -130,7 +124,6 @@ teardown() { [[ "$output" =~ "main" ]] || false [[ "$output" =~ "other" ]] || false - rm -rf "$cache1" "$cache2" } @test "remotes-git: pull fetches but does not merge other branches" { @@ -141,13 +134,10 @@ teardown() { mkdir remote.git git init --bare remote.git - cache1=$(mktemp -d) - cache2=$(mktemp -d) - mkdir repo1 cd repo1 dolt init - dolt remote add --git-cache-dir "$cache1" origin ../remote.git + dolt remote add origin ../remote.git dolt push --set-upstream origin main dolt checkout -b other dolt commit --allow-empty -m "first commit on other" @@ -155,7 +145,7 @@ teardown() { cd .. cd dolt-repo-clones - run dolt clone --git-cache-dir "$cache2" ../remote.git repo2 + run dolt clone ../remote.git repo2 [ "$status" -eq 0 ] cd repo2 @@ -179,7 +169,6 @@ teardown() { [ "$status" -eq 0 ] [[ "$output" =~ "first commit on other" ]] || false - rm -rf "$cache1" "$cache2" } @test "remotes-git: custom --ref writes to configured dolt data ref" { @@ -190,9 +179,6 @@ teardown() { mkdir remote.git git init --bare remote.git - cache1=$(mktemp -d) - cache2=$(mktemp -d) - mkdir repo1 cd repo1 dolt init @@ -201,7 +187,7 @@ teardown() { dolt add . dolt commit -m "seed" - dolt remote add --git-cache-dir "$cache1" --ref refs/dolt/custom origin ../remote.git + dolt remote add --ref refs/dolt/custom origin ../remote.git run dolt push --set-upstream origin main [ "$status" -eq 0 ] @@ -212,7 +198,7 @@ teardown() { cd .. cd dolt-repo-clones - run dolt clone --git-cache-dir "$cache2" --ref refs/dolt/custom ../remote.git repo2 + run dolt clone --ref refs/dolt/custom ../remote.git repo2 [ "$status" -eq 0 ] cd repo2 @@ -223,10 +209,9 @@ teardown() { run git --git-dir ../../remote.git show-ref refs/dolt/data [ "$status" -ne 0 ] - rm -rf "$cache1" "$cache2" } -@test "remotes-git: default git cache dir works when --git-cache-dir omitted" { +@test "remotes-git: push works with auto-selected git cache dir" { if ! command -v git >/dev/null 2>&1; then skip "git not installed" fi diff --git a/integration-tests/bats/sql-remotes-git.bats b/integration-tests/bats/sql-remotes-git.bats index e4abe147204..a1ebe2ab8ad 100644 --- a/integration-tests/bats/sql-remotes-git.bats +++ b/integration-tests/bats/sql-remotes-git.bats @@ -7,11 +7,15 @@ setup() { setup_common cd $BATS_TMPDIR cd dolt-repo-$$ + + # Keep auto-selected git cache dir inside this test's sandbox. + export XDG_CACHE_HOME="$(mktemp -d)" } teardown() { assert_feature_version teardown_common + rm -rf "$XDG_CACHE_HOME" } @test "sql-remotes-git: dolt_remote add supports --ref for git remotes" { @@ -22,8 +26,6 @@ teardown() { mkdir remote.git git init --bare remote.git - cache1=$(mktemp -d) - mkdir repo1 cd repo1 dolt init @@ -33,7 +35,7 @@ teardown() { dolt commit -m "seed" run dolt sql < Date: Wed, 11 Feb 2026 10:20:17 -0800 Subject: [PATCH 15/20] /go/cmd/dolt/{cli, commands}: fix usage line --- go/cmd/dolt/cli/arg_parser_helpers.go | 6 +++--- go/cmd/dolt/commands/read_tables.go | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/go/cmd/dolt/cli/arg_parser_helpers.go b/go/cmd/dolt/cli/arg_parser_helpers.go index fc071e44946..4551cfa2548 100644 --- a/go/cmd/dolt/cli/arg_parser_helpers.go +++ b/go/cmd/dolt/cli/arg_parser_helpers.go @@ -146,7 +146,7 @@ func CreateCloneArgParser() *argparser.ArgParser { ap.SupportsString(RemoteParam, "", "name", "Name of the remote to be added to the cloned database. The default is 'origin'.") ap.SupportsString(BranchParam, "b", "branch", "The branch to be cloned. If not specified all branches will be cloned.") ap.SupportsString(DepthFlag, "", "depth", "Clone a single branch and limit history to the given commit depth.") - ap.SupportsString("ref", "", "ref", "Git ref to use as the Dolt data ref for git remotes (sets remote param git_ref).") + ap.SupportsString("ref", "", "ref", "Git ref to use as the Dolt data ref for git remotes (default: refs/dolt/data).") ap.SupportsString(dbfactory.AWSRegionParam, "", "region", "") ap.SupportsValidatedString(dbfactory.AWSCredsTypeParam, "", "creds-type", "", argparser.ValidatorFromStrList(dbfactory.AWSCredsTypeParam, dbfactory.AWSCredTypes)) ap.SupportsString(dbfactory.AWSCredsFileParam, "", "file", "AWS credentials file.") @@ -167,7 +167,7 @@ func CreateResetArgParser() *argparser.ArgParser { func CreateRemoteArgParser() *argparser.ArgParser { ap := argparser.NewArgParserWithVariableArgs("remote") - ap.SupportsString("ref", "", "ref", "Git ref to use as the Dolt data ref for git remotes (sets remote param git_ref).") + ap.SupportsString("ref", "", "ref", "Git ref to use as the Dolt data ref for git remotes (default: refs/dolt/data).") return ap } @@ -268,7 +268,7 @@ func CreateBackupArgParser() *argparser.ArgParser { ap.ArgListHelp = append(ap.ArgListHelp, [2]string{"profile", "AWS profile to use."}) ap.SupportsFlag(VerboseFlag, "v", "When printing the list of backups adds additional details.") ap.SupportsFlag(ForceFlag, "f", "When restoring a backup, overwrite the contents of the existing database with the same name.") - ap.SupportsString("ref", "", "ref", "Git ref to use as the Dolt data ref for git remotes (sets remote param git_ref).") + ap.SupportsString("ref", "", "ref", "Git ref to use as the Dolt data ref for git remotes (default: refs/dolt/data).") ap.SupportsString(dbfactory.AWSRegionParam, "", "region", "") ap.SupportsValidatedString(dbfactory.AWSCredsTypeParam, "", "creds-type", "", argparser.ValidatorFromStrList(dbfactory.AWSCredsTypeParam, dbfactory.AWSCredTypes)) ap.SupportsString(dbfactory.AWSCredsFileParam, "", "file", "AWS credentials file") diff --git a/go/cmd/dolt/commands/read_tables.go b/go/cmd/dolt/commands/read_tables.go index 4b08969a300..68ae9121309 100644 --- a/go/cmd/dolt/commands/read_tables.go +++ b/go/cmd/dolt/commands/read_tables.go @@ -78,7 +78,7 @@ func (cmd ReadTablesCmd) ArgParser() *argparser.ArgParser { {"table", " Optional tables to retrieve. If omitted, all tables are retrieved."}, } ap.SupportsString(dirParamName, "d", "directory", "directory to create and put retrieved table data.") - ap.SupportsString(gitRefFlag, "", "ref", "Git ref to use as the Dolt data ref for git remotes (sets remote param git_ref).") + ap.SupportsString(gitRefFlag, "", "ref", "Git ref to use as the Dolt data ref for git remotes (default: refs/dolt/data).") return ap } From e64f8d82e4dcd4f3e6cadbd0a466240a77a4d532 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?coffeegoddd=E2=98=95=EF=B8=8F=E2=9C=A8?= Date: Wed, 11 Feb 2026 11:45:44 -0800 Subject: [PATCH 16/20] /{go, integration-tests}: fix repo cache --- go/cmd/dolt/commands/clone.go | 26 +++++++++++-- go/libraries/doltcore/dbfactory/git_remote.go | 37 +++++++++++++++--- .../doltcore/dbfactory/git_remote_test.go | 36 ++++++++++++------ go/libraries/doltcore/env/environment.go | 10 +++++ go/libraries/doltcore/env/remotes.go | 15 ++++++++ .../doltcore/sqle/database_provider.go | 38 +++++++++++++++++-- integration-tests/bats/remotes-git.bats | 4 -- integration-tests/bats/sql-remotes-git.bats | 4 -- 8 files changed, 139 insertions(+), 31 deletions(-) diff --git a/go/cmd/dolt/commands/clone.go b/go/cmd/dolt/commands/clone.go index 686748e16ec..5009ca63895 100644 --- a/go/cmd/dolt/commands/clone.go +++ b/go/cmd/dolt/commands/clone.go @@ -49,6 +49,18 @@ This default configuration is achieved by creating references to the remote bran }, } +type remoteDialerWithGitCacheRoot struct { + dbfactory.GRPCDialProvider + root string +} + +func (d remoteDialerWithGitCacheRoot) GitCacheRoot() (string, bool) { + if strings.TrimSpace(d.root) == "" { + return "", false + } + return d.root, true +} + type CloneCmd struct{} // Name is returns the name of the Dolt cli command. This is what is used on the command line to invoke the command @@ -130,7 +142,11 @@ func clone(ctx context.Context, apr *argparser.ArgParseResults, dEnv *env.DoltEn var r env.Remote var srcDB *doltdb.DoltDB - r, srcDB, verr = createRemote(ctx, remoteName, remoteUrl, params, dEnv) + cloneRoot, err := dEnv.FS.Abs(dir) + if err != nil { + return errhand.VerboseErrorFromError(err) + } + r, srcDB, verr = createRemote(ctx, remoteName, remoteUrl, params, dEnv, cloneRoot) if verr != nil { return verr } @@ -211,11 +227,15 @@ func parseArgs(apr *argparser.ArgParseResults) (string, string, errhand.VerboseE return dir, urlStr, nil } -func createRemote(ctx context.Context, remoteName, remoteUrl string, params map[string]string, dEnv *env.DoltEnv) (env.Remote, *doltdb.DoltDB, errhand.VerboseError) { +func createRemote(ctx context.Context, remoteName, remoteUrl string, params map[string]string, dEnv *env.DoltEnv, cloneRoot string) (env.Remote, *doltdb.DoltDB, errhand.VerboseError) { cli.Printf("cloning %s\n", remoteUrl) r := env.NewRemote(remoteName, remoteUrl, params) - ddb, err := r.GetRemoteDB(ctx, types.Format_Default, dEnv) + dialer := dbfactory.GRPCDialProvider(dEnv) + if strings.TrimSpace(cloneRoot) != "" { + dialer = remoteDialerWithGitCacheRoot{GRPCDialProvider: dEnv, root: cloneRoot} + } + ddb, err := r.GetRemoteDB(ctx, types.Format_Default, dialer) if err != nil { bdr := errhand.BuildDError("error: failed to get remote db").AddCause(err) return env.NoRemote, nil, bdr.Build() diff --git a/go/libraries/doltcore/dbfactory/git_remote.go b/go/libraries/doltcore/dbfactory/git_remote.go index 75a45a37871..b4ef58c4d6c 100644 --- a/go/libraries/doltcore/dbfactory/git_remote.go +++ b/go/libraries/doltcore/dbfactory/git_remote.go @@ -34,12 +34,21 @@ import ( ) const ( + // GitCacheRootParam is the absolute path to the local Dolt repository root (the directory that contains `.dolt/`). + // When set for git remotes, callers can choose a per-repo cache location under `.dolt/`. + GitCacheRootParam = "git_cache_root" GitRefParam = "git_ref" GitRemoteNameParam = "git_remote_name" defaultGitRef = "refs/dolt/data" defaultGitRemoteName = "origin" ) +// GitCacheRootProvider provides the local Dolt repo root for per-repo git remote caches. +// Implementations should return ok=false when no repo root is available. +type GitCacheRootProvider interface { + GitCacheRoot() (string, bool) +} + // GitRemoteFactory opens a Dolt database backed by a Git remote, using a local bare // repository as an object cache and remote configuration store. // @@ -83,10 +92,14 @@ func (fact GitRemoteFactory) CreateDB(ctx context.Context, nbf *types.NomsBinFor return nil, nil, nil, err } - cacheBase, err := defaultGitCacheBase() + cacheRoot, ok, err := resolveGitCacheRoot(params) if err != nil { return nil, nil, nil, err } + if !ok { + return nil, nil, nil, fmt.Errorf("%s is required for git remotes", GitCacheRootParam) + } + cacheBase := filepath.Join(cacheRoot, DoltDir, "git-remote-cache") cacheRepo, err := cacheRepoPath(cacheBase, remoteURL.String(), ref) if err != nil { @@ -166,12 +179,24 @@ func resolveGitRemoteName(params map[string]interface{}) string { return defaultGitRemoteName } -func defaultGitCacheBase() (string, error) { - base, err := os.UserCacheDir() - if err != nil { - return "", err +// resolveGitCacheRoot parses and validates the optional GitCacheRootParam. +// It returns ok=false when the param is not present. +func resolveGitCacheRoot(params map[string]interface{}) (root string, ok bool, err error) { + if params == nil { + return "", false, nil + } + v, ok := params[GitCacheRootParam] + if !ok || v == nil { + return "", false, nil + } + s, ok := v.(string) + if !ok { + return "", false, fmt.Errorf("%s must be a string", GitCacheRootParam) + } + if strings.TrimSpace(s) == "" { + return "", false, fmt.Errorf("%s cannot be empty", GitCacheRootParam) } - return filepath.Join(base, "dolt", "git-remote-cache"), nil + return s, true, nil } func cacheRepoPath(cacheBase, remoteURL, ref string) (string, error) { diff --git a/go/libraries/doltcore/dbfactory/git_remote_test.go b/go/libraries/doltcore/dbfactory/git_remote_test.go index e0dbf93ed2f..a46c5387102 100644 --- a/go/libraries/doltcore/dbfactory/git_remote_test.go +++ b/go/libraries/doltcore/dbfactory/git_remote_test.go @@ -49,7 +49,14 @@ func shortTempDir(t *testing.T) string { return dir } -func TestGitRemoteFactory_GitFile_UsesDefaultCacheDirAndCanWrite(t *testing.T) { +func TestGitRemoteFactory_GitFile_RequiresGitCacheRootParam(t *testing.T) { + ctx := context.Background() + _, _, _, err := CreateDB(ctx, types.Format_Default, "git+file:///tmp/remote.git", map[string]interface{}{}) + require.Error(t, err) + require.Contains(t, err.Error(), GitCacheRootParam) +} + +func TestGitRemoteFactory_GitFile_CachesUnderRepoDoltDirAndCanWrite(t *testing.T) { if _, err := exec.LookPath("git"); err != nil { t.Skip("git not found on PATH") } @@ -58,20 +65,22 @@ func TestGitRemoteFactory_GitFile_UsesDefaultCacheDirAndCanWrite(t *testing.T) { remoteRepo, err := gitrepo.InitBare(ctx, filepath.Join(shortTempDir(t), "remote.git")) require.NoError(t, err) + localRepoRoot := shortTempDir(t) + remotePath := filepath.ToSlash(remoteRepo.GitDir) remoteURL := "file://" + remotePath urlStr := "git+file://" + remotePath - params := map[string]interface{}{} + params := map[string]interface{}{ + GitCacheRootParam: localRepoRoot, + } db, vrw, _, err := CreateDB(ctx, types.Format_Default, urlStr, params) require.NoError(t, err) require.NotNil(t, db) require.NotNil(t, vrw) - // Ensure cache repo created under default cache dir. - base, err := os.UserCacheDir() - require.NoError(t, err) - cacheBase := filepath.Join(base, "dolt", "git-remote-cache") + // Ensure cache repo created under /.dolt/git-remote-cache. + cacheBase := filepath.Join(localRepoRoot, DoltDir, "git-remote-cache") sum := sha256.Sum256([]byte(remoteURL + "|" + "refs/dolt/data")) h := hex.EncodeToString(sum[:]) @@ -120,8 +129,10 @@ func TestGitRemoteFactory_TwoClientsDistinctCacheDirsRoundtrip(t *testing.T) { return func(context.Context, hash.HashSet, chunks.PendingRefExists) error { return nil } } - open := func() (db datas.Database, cs chunks.ChunkStore) { - params := map[string]interface{}{} + open := func(cacheRoot string) (db datas.Database, cs chunks.ChunkStore) { + params := map[string]interface{}{ + GitCacheRootParam: cacheRoot, + } d, vrw, _, err := CreateDB(ctx, types.Format_Default, urlStr, params) require.NoError(t, err) require.NotNil(t, d) @@ -132,8 +143,11 @@ func TestGitRemoteFactory_TwoClientsDistinctCacheDirsRoundtrip(t *testing.T) { return d, vs.ChunkStore() } + cacheA := shortTempDir(t) + cacheB := shortTempDir(t) + // Client A writes a root pointing at chunk A. - dbA, csA := open() + dbA, csA := open(cacheA) cA := chunks.NewChunk([]byte("clientA\n")) require.NoError(t, csA.Put(ctx, cA, noopGetAddrs)) lastA, err := csA.Root(ctx) @@ -144,7 +158,7 @@ func TestGitRemoteFactory_TwoClientsDistinctCacheDirsRoundtrip(t *testing.T) { require.NoError(t, dbA.Close()) // Client B reads chunk A, then writes chunk B and updates the root. - dbB, csB := open() + dbB, csB := open(cacheB) require.NoError(t, csB.Rebase(ctx)) rootB, err := csB.Root(ctx) require.NoError(t, err) @@ -161,7 +175,7 @@ func TestGitRemoteFactory_TwoClientsDistinctCacheDirsRoundtrip(t *testing.T) { require.NoError(t, dbB.Close()) // Client A re-opens and should see B's update. - dbA2, csA2 := open() + dbA2, csA2 := open(cacheA) require.NoError(t, csA2.Rebase(ctx)) rootA2, err := csA2.Root(ctx) require.NoError(t, err) diff --git a/go/libraries/doltcore/env/environment.go b/go/libraries/doltcore/env/environment.go index 618f21b22c1..06e59d8e4c6 100644 --- a/go/libraries/doltcore/env/environment.go +++ b/go/libraries/doltcore/env/environment.go @@ -184,6 +184,16 @@ func (dEnv *DoltEnv) UrlStr() string { return dEnv.urlStr } +// GitCacheRoot returns the absolute path to the local Dolt repository root (the directory that contains `.dolt/`). +// It is used to place git-remote caches under `/.dolt/...`. +func (dEnv *DoltEnv) GitCacheRoot() (string, bool) { + doltDir := dEnv.GetDoltDir() + if doltDir == "" { + return "", false + } + return filepath.Dir(doltDir), true +} + func createRepoState(fs filesys.Filesys) (*RepoState, error) { repoState, rsErr := LoadRepoState(fs) diff --git a/go/libraries/doltcore/env/remotes.go b/go/libraries/doltcore/env/remotes.go index 7e819fa9f6c..a4c3d64c76b 100644 --- a/go/libraries/doltcore/env/remotes.go +++ b/go/libraries/doltcore/env/remotes.go @@ -106,6 +106,11 @@ func (r *Remote) GetRemoteDB(ctx context.Context, nbf *types.NomsBinFormat, dial params[dbfactory.GRPCDialProviderParam] = dialer if u, err := earl.Parse(r.Url); err == nil && u != nil && strings.HasPrefix(strings.ToLower(u.Scheme), "git+") { params[dbfactory.GitRemoteNameParam] = r.Name + if p, ok := dialer.(dbfactory.GitCacheRootProvider); ok { + if root, ok := p.GitCacheRoot(); ok { + params[dbfactory.GitCacheRootParam] = root + } + } } return doltdb.LoadDoltDBWithParams(ctx, nbf, r.Url, filesys2.LocalFS, params) @@ -122,6 +127,11 @@ func (r *Remote) Prepare(ctx context.Context, nbf *types.NomsBinFormat, dialer d params[dbfactory.GRPCDialProviderParam] = dialer if u, err := earl.Parse(r.Url); err == nil && u != nil && strings.HasPrefix(strings.ToLower(u.Scheme), "git+") { params[dbfactory.GitRemoteNameParam] = r.Name + if p, ok := dialer.(dbfactory.GitCacheRootProvider); ok { + if root, ok := p.GitCacheRoot(); ok { + params[dbfactory.GitCacheRootParam] = root + } + } } return dbfactory.PrepareDB(ctx, nbf, r.Url, params) @@ -136,6 +146,11 @@ func (r *Remote) GetRemoteDBWithoutCaching(ctx context.Context, nbf *types.NomsB params[dbfactory.GRPCDialProviderParam] = dialer if u, err := earl.Parse(r.Url); err == nil && u != nil && strings.HasPrefix(strings.ToLower(u.Scheme), "git+") { params[dbfactory.GitRemoteNameParam] = r.Name + if p, ok := dialer.(dbfactory.GitCacheRootProvider); ok { + if root, ok := p.GitCacheRoot(); ok { + params[dbfactory.GitCacheRootParam] = root + } + } } return doltdb.LoadDoltDBWithParams(ctx, nbf, r.Url, filesys2.LocalFS, params) diff --git a/go/libraries/doltcore/sqle/database_provider.go b/go/libraries/doltcore/sqle/database_provider.go index 5d7cca5d6db..69a4401be71 100644 --- a/go/libraries/doltcore/sqle/database_provider.go +++ b/go/libraries/doltcore/sqle/database_provider.go @@ -77,6 +77,18 @@ type DoltDatabaseProvider struct { InitDatabaseHooks []InitDatabaseHook } +type remoteDialerWithGitCacheRoot struct { + dbfactory.GRPCDialProvider + root string +} + +func (d remoteDialerWithGitCacheRoot) GitCacheRoot() (string, bool) { + if strings.TrimSpace(d.root) == "" { + return "", false + } + return d.root, true +} + var _ sql.DatabaseProvider = (*DoltDatabaseProvider)(nil) var _ sql.FunctionProvider = (*DoltDatabaseProvider)(nil) var _ sql.MutableDatabaseProvider = (*DoltDatabaseProvider)(nil) @@ -502,10 +514,26 @@ func (p *DoltDatabaseProvider) allRevisionDbs(ctx *sql.Context, db dsess.SqlData } func (p *DoltDatabaseProvider) GetRemoteDB(ctx context.Context, format *types.NomsBinFormat, r env.Remote, withCaching bool) (*doltdb.DoltDB, error) { + // For git remotes, thread through the initiating database's repo root so git caches can be located under + // `/.dolt/...` instead of a user-global cache dir. + dialer := p.remoteDialer + if sqlCtx, ok := ctx.(*sql.Context); ok { + baseName, _ := doltdb.SplitRevisionDbName(sqlCtx.GetCurrentDatabase()) + dbKey := strings.ToLower(baseName) + p.mu.RLock() + dbLoc, ok := p.dbLocations[dbKey] + p.mu.RUnlock() + if ok && dbLoc != nil { + if root, err := dbLoc.Abs("."); err == nil && strings.TrimSpace(root) != "" { + dialer = remoteDialerWithGitCacheRoot{GRPCDialProvider: p.remoteDialer, root: root} + } + } + } + if withCaching { - return r.GetRemoteDB(ctx, format, p.remoteDialer) + return r.GetRemoteDB(ctx, format, dialer) } - return r.GetRemoteDBWithoutCaching(ctx, format, p.remoteDialer) + return r.GetRemoteDBWithoutCaching(ctx, format, dialer) } func (p *DoltDatabaseProvider) CreateDatabase(ctx *sql.Context, name string) error { @@ -814,7 +842,11 @@ func (p *DoltDatabaseProvider) cloneDatabaseFromRemote( } r := env.NewRemote(remoteName, remoteUrl, remoteParams) - srcDB, err := r.GetRemoteDB(ctx, types.Format_Default, p.remoteDialer) + destRoot, err := p.fs.Abs(dbName) + if err != nil { + return err + } + srcDB, err := r.GetRemoteDB(ctx, types.Format_Default, remoteDialerWithGitCacheRoot{GRPCDialProvider: p.remoteDialer, root: destRoot}) if err != nil { return err } diff --git a/integration-tests/bats/remotes-git.bats b/integration-tests/bats/remotes-git.bats index af38f9e4196..58d0449b415 100644 --- a/integration-tests/bats/remotes-git.bats +++ b/integration-tests/bats/remotes-git.bats @@ -8,15 +8,11 @@ setup() { cd $BATS_TMPDIR cd dolt-repo-$$ mkdir "dolt-repo-clones" - - # Keep auto-selected git cache dir inside this test's sandbox. - export XDG_CACHE_HOME="$(mktemp -d)" } teardown() { assert_feature_version teardown_common - rm -rf "$XDG_CACHE_HOME" } @test "remotes-git: smoke push/clone/push-back/pull" { diff --git a/integration-tests/bats/sql-remotes-git.bats b/integration-tests/bats/sql-remotes-git.bats index a1ebe2ab8ad..05fbe71fcfe 100644 --- a/integration-tests/bats/sql-remotes-git.bats +++ b/integration-tests/bats/sql-remotes-git.bats @@ -7,15 +7,11 @@ setup() { setup_common cd $BATS_TMPDIR cd dolt-repo-$$ - - # Keep auto-selected git cache dir inside this test's sandbox. - export XDG_CACHE_HOME="$(mktemp -d)" } teardown() { assert_feature_version teardown_common - rm -rf "$XDG_CACHE_HOME" } @test "sql-remotes-git: dolt_remote add supports --ref for git remotes" { From f2e26ce3fccbb69e57cb53aaff4ff2e0376e82ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?coffeegoddd=E2=98=95=EF=B8=8F=E2=9C=A8?= Date: Wed, 11 Feb 2026 11:49:15 -0800 Subject: [PATCH 17/20] /integration-tests: fix tests --- integration-tests/bats/remotes-git.bats | 28 +++---------------------- 1 file changed, 3 insertions(+), 25 deletions(-) diff --git a/integration-tests/bats/remotes-git.bats b/integration-tests/bats/remotes-git.bats index 58d0449b415..1a64bb087b8 100644 --- a/integration-tests/bats/remotes-git.bats +++ b/integration-tests/bats/remotes-git.bats @@ -5,6 +5,9 @@ setup() { skiponwindows "tests are flaky on Windows" skip_if_remote setup_common + if ! command -v git >/dev/null 2>&1; then + skip "git not installed" + fi cd $BATS_TMPDIR cd dolt-repo-$$ mkdir "dolt-repo-clones" @@ -16,10 +19,6 @@ teardown() { } @test "remotes-git: smoke push/clone/push-back/pull" { - if ! command -v git >/dev/null 2>&1; then - skip "git not installed" - fi - mkdir remote.git git init --bare remote.git @@ -33,7 +32,6 @@ teardown() { dolt remote add origin ../remote.git run dolt push --set-upstream origin main [ "$status" -eq 0 ] - [[ ! "$output" =~ "panic:" ]] || false cd .. cd dolt-repo-clones @@ -58,10 +56,6 @@ teardown() { } @test "remotes-git: empty remote bootstrap creates refs/dolt/data" { - if ! command -v git >/dev/null 2>&1; then - skip "git not installed" - fi - mkdir remote.git git init --bare remote.git @@ -86,10 +80,6 @@ teardown() { } @test "remotes-git: pull also fetches branches from git remote" { - if ! command -v git >/dev/null 2>&1; then - skip "git not installed" - fi - mkdir remote.git git init --bare remote.git @@ -123,10 +113,6 @@ teardown() { } @test "remotes-git: pull fetches but does not merge other branches" { - if ! command -v git >/dev/null 2>&1; then - skip "git not installed" - fi - mkdir remote.git git init --bare remote.git @@ -168,10 +154,6 @@ teardown() { } @test "remotes-git: custom --ref writes to configured dolt data ref" { - if ! command -v git >/dev/null 2>&1; then - skip "git not installed" - fi - mkdir remote.git git init --bare remote.git @@ -208,10 +190,6 @@ teardown() { } @test "remotes-git: push works with auto-selected git cache dir" { - if ! command -v git >/dev/null 2>&1; then - skip "git not installed" - fi - mkdir remote.git git init --bare remote.git From 027fd3e982a42aee5c50dd277cd730f6fd91370d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?coffeegoddd=E2=98=95=EF=B8=8F=E2=9C=A8?= Date: Wed, 11 Feb 2026 11:55:10 -0800 Subject: [PATCH 18/20] /{go,integration-tests}: some fixes and cleanup --- go/cmd/dolt/commands/read_tables.go | 3 ++- go/libraries/doltcore/dbfactory/git_remote.go | 5 +++-- integration-tests/bats/remotes-git.bats | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/go/cmd/dolt/commands/read_tables.go b/go/cmd/dolt/commands/read_tables.go index 68ae9121309..b868d81dd60 100644 --- a/go/cmd/dolt/commands/read_tables.go +++ b/go/cmd/dolt/commands/read_tables.go @@ -208,7 +208,8 @@ func pullTableValue(ctx context.Context, dEnv *env.DoltEnv, srcDB *doltdb.DoltDB } func getRemoteDBAtCommit(ctx context.Context, remoteUrl string, remoteUrlParams map[string]string, commitStr string, dEnv *env.DoltEnv) (*doltdb.DoltDB, doltdb.RootValue, errhand.VerboseError) { - _, srcDB, verr := createRemote(ctx, "temp", remoteUrl, remoteUrlParams, dEnv) + cacheRoot, _ := dEnv.GitCacheRoot() + _, srcDB, verr := createRemote(ctx, "temp", remoteUrl, remoteUrlParams, dEnv, cacheRoot) if verr != nil { return nil, nil, verr diff --git a/go/libraries/doltcore/dbfactory/git_remote.go b/go/libraries/doltcore/dbfactory/git_remote.go index b4ef58c4d6c..e69d553b3e3 100644 --- a/go/libraries/doltcore/dbfactory/git_remote.go +++ b/go/libraries/doltcore/dbfactory/git_remote.go @@ -35,7 +35,8 @@ import ( const ( // GitCacheRootParam is the absolute path to the local Dolt repository root (the directory that contains `.dolt/`). - // When set for git remotes, callers can choose a per-repo cache location under `.dolt/`. + // Required for git remotes. GitRemoteFactory stores its local cache repo under: + // `/.dolt/git-remote-cache//repo.git`. GitCacheRootParam = "git_cache_root" GitRefParam = "git_ref" GitRemoteNameParam = "git_remote_name" @@ -179,7 +180,7 @@ func resolveGitRemoteName(params map[string]interface{}) string { return defaultGitRemoteName } -// resolveGitCacheRoot parses and validates the optional GitCacheRootParam. +// resolveGitCacheRoot parses and validates GitCacheRootParam. // It returns ok=false when the param is not present. func resolveGitCacheRoot(params map[string]interface{}) (root string, ok bool, err error) { if params == nil { diff --git a/integration-tests/bats/remotes-git.bats b/integration-tests/bats/remotes-git.bats index 1a64bb087b8..64d897bc610 100644 --- a/integration-tests/bats/remotes-git.bats +++ b/integration-tests/bats/remotes-git.bats @@ -189,7 +189,7 @@ teardown() { } -@test "remotes-git: push works with auto-selected git cache dir" { +@test "remotes-git: push works with per-repo git cache under .dolt/" { mkdir remote.git git init --bare remote.git From ce10f624e6debc696d0046f74beca8b661ea2461 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?coffeegoddd=E2=98=95=EF=B8=8F=E2=9C=A8?= Date: Wed, 11 Feb 2026 11:59:51 -0800 Subject: [PATCH 19/20] /integration-tests: more cleanup --- integration-tests/bats/sql-remotes-git.bats | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/integration-tests/bats/sql-remotes-git.bats b/integration-tests/bats/sql-remotes-git.bats index 05fbe71fcfe..44a289423ef 100644 --- a/integration-tests/bats/sql-remotes-git.bats +++ b/integration-tests/bats/sql-remotes-git.bats @@ -5,6 +5,9 @@ setup() { skiponwindows "tests are flaky on Windows" skip_if_remote setup_common + if ! command -v git >/dev/null 2>&1; then + skip "git not installed" + fi cd $BATS_TMPDIR cd dolt-repo-$$ } @@ -15,10 +18,6 @@ teardown() { } @test "sql-remotes-git: dolt_remote add supports --ref for git remotes" { - if ! command -v git >/dev/null 2>&1; then - skip "git not installed" - fi - mkdir remote.git git init --bare remote.git @@ -43,10 +42,6 @@ SQL } @test "sql-remotes-git: dolt_clone supports --ref for git remotes" { - if ! command -v git >/dev/null 2>&1; then - skip "git not installed" - fi - mkdir remote.git git init --bare remote.git @@ -81,10 +76,6 @@ SQL } @test "sql-remotes-git: dolt_backup sync-url supports --ref for git remotes" { - if ! command -v git >/dev/null 2>&1; then - skip "git not installed" - fi - mkdir remote.git git init --bare remote.git From 3b2ff8c0d61c1a812ebbf7dfea9954d9bc0d8a38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?coffeegoddd=E2=98=95=EF=B8=8F=E2=9C=A8?= Date: Wed, 11 Feb 2026 12:48:20 -0800 Subject: [PATCH 20/20] /go/libraries/doltcore/env: fix bats maybe --- go/libraries/doltcore/env/environment.go | 10 +++++++- go/libraries/doltcore/env/environment_test.go | 23 +++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/go/libraries/doltcore/env/environment.go b/go/libraries/doltcore/env/environment.go index 06e59d8e4c6..441581f4611 100644 --- a/go/libraries/doltcore/env/environment.go +++ b/go/libraries/doltcore/env/environment.go @@ -540,7 +540,8 @@ var ErrCannotCreateDoltDirAlreadyExists = errors.New(".dolt dir already exists") // * |dir|/.dolt exists and is a directory and is empty, or // * |dir|/.dolt exists and is a directory and has only one other entry in it, a directory with name "tmp", or // * |dir|/.dolt exists and is a directory and has only one other entry in it, a file with name "config.json", or -// * |dir|/.dolt exists and is a directory and contains both a |tmp| directory and a |config.json| file and nothing else. +// * |dir|/.dolt exists and is a directory and contains both a |tmp| directory and a |config.json| file and nothing else, or +// * |dir|/.dolt exists and is a directory and contains a |git-remote-cache| directory (and any contents under it) plus any of the above. func CanCreateDatabaseAtPath(fs filesys.Filesys, dir string) (bool, error) { absPath, err := fs.Abs(dir) if err != nil { @@ -560,6 +561,7 @@ func CanCreateDatabaseAtPath(fs filesys.Filesys, dir string) (bool, error) { } tmpPath := filepath.Join(doltDirPath, TmpDirName) configPath := filepath.Join(doltDirPath, configFile) + gitRemoteCachePath := filepath.Join(doltDirPath, "git-remote-cache") isOK := true err := fs.Iter(doltDirPath, true, func(path string, sz int64, isDir bool) (stop bool) { if path == doltDirPath { @@ -568,6 +570,12 @@ func CanCreateDatabaseAtPath(fs filesys.Filesys, dir string) (bool, error) { return false } else if path == configPath && !isDir { return false + } else if path == gitRemoteCachePath && isDir { + // Allow git remote cache contents to exist under .dolt/ when cloning / creating a DB. + return false + } else if strings.HasPrefix(path, gitRemoteCachePath+string(filepath.Separator)) { + // Allow any children of .dolt/git-remote-cache. + return false } else { isOK = false return true diff --git a/go/libraries/doltcore/env/environment_test.go b/go/libraries/doltcore/env/environment_test.go index b03a53fdbad..790b98da41e 100644 --- a/go/libraries/doltcore/env/environment_test.go +++ b/go/libraries/doltcore/env/environment_test.go @@ -38,6 +38,29 @@ const ( workingDir = "/user/bheni/datasets/addresses" ) +func TestCanCreateDatabaseAtPathAllowsGitRemoteCache(t *testing.T) { + dir := "/user/bheni/datasets/allow_git_remote_cache" + doltDir := filepath.Join(dir, dbfactory.DoltDir) + cacheDir := filepath.Join(doltDir, "git-remote-cache") + + // Any contents under .dolt/git-remote-cache should be ignored by CanCreateDatabaseAtPath. + fs := filesys.NewInMemFS( + []string{ + testHomeDir, + dir, + doltDir, + cacheDir, + filepath.Join(cacheDir, "somecache"), + }, + map[string][]byte{}, + dir, + ) + + ok, err := CanCreateDatabaseAtPath(fs, dir) + require.NoError(t, err) + require.True(t, ok) +} + func testHomeDirFunc() (string, error) { return testHomeDir, nil }