diff --git a/go/cmd/dolt/cli/arg_parser_helpers.go b/go/cmd/dolt/cli/arg_parser_helpers.go index 5e0a3e7c588..4551cfa2548 100644 --- a/go/cmd/dolt/cli/arg_parser_helpers.go +++ b/go/cmd/dolt/cli/arg_parser_helpers.go @@ -146,6 +146,7 @@ func CreateCloneArgParser() *argparser.ArgParser { ap.SupportsString(RemoteParam, "", "name", "Name of the remote to be added to the cloned database. The default is 'origin'.") ap.SupportsString(BranchParam, "b", "branch", "The branch to be cloned. If not specified all branches will be cloned.") ap.SupportsString(DepthFlag, "", "depth", "Clone a single branch and limit history to the given commit depth.") + ap.SupportsString("ref", "", "ref", "Git ref to use as the Dolt data ref for git remotes (default: refs/dolt/data).") ap.SupportsString(dbfactory.AWSRegionParam, "", "region", "") ap.SupportsValidatedString(dbfactory.AWSCredsTypeParam, "", "creds-type", "", argparser.ValidatorFromStrList(dbfactory.AWSCredsTypeParam, dbfactory.AWSCredTypes)) ap.SupportsString(dbfactory.AWSCredsFileParam, "", "file", "AWS credentials file.") @@ -166,6 +167,7 @@ func CreateResetArgParser() *argparser.ArgParser { func CreateRemoteArgParser() *argparser.ArgParser { ap := argparser.NewArgParserWithVariableArgs("remote") + ap.SupportsString("ref", "", "ref", "Git ref to use as the Dolt data ref for git remotes (default: refs/dolt/data).") return ap } @@ -266,6 +268,7 @@ func CreateBackupArgParser() *argparser.ArgParser { ap.ArgListHelp = append(ap.ArgListHelp, [2]string{"profile", "AWS profile to use."}) ap.SupportsFlag(VerboseFlag, "v", "When printing the list of backups adds additional details.") ap.SupportsFlag(ForceFlag, "f", "When restoring a backup, overwrite the contents of the existing database with the same name.") + ap.SupportsString("ref", "", "ref", "Git ref to use as the Dolt data ref for git remotes (default: refs/dolt/data).") ap.SupportsString(dbfactory.AWSRegionParam, "", "region", "") ap.SupportsValidatedString(dbfactory.AWSCredsTypeParam, "", "creds-type", "", argparser.ValidatorFromStrList(dbfactory.AWSCredsTypeParam, dbfactory.AWSCredTypes)) ap.SupportsString(dbfactory.AWSCredsFileParam, "", "file", "AWS credentials file") diff --git a/go/cmd/dolt/commands/clone.go b/go/cmd/dolt/commands/clone.go index 3381382d608..5009ca63895 100644 --- a/go/cmd/dolt/commands/clone.go +++ b/go/cmd/dolt/commands/clone.go @@ -49,6 +49,18 @@ This default configuration is achieved by creating references to the remote bran }, } +type remoteDialerWithGitCacheRoot struct { + dbfactory.GRPCDialProvider + root string +} + +func (d remoteDialerWithGitCacheRoot) GitCacheRoot() (string, bool) { + if strings.TrimSpace(d.root) == "" { + return "", false + } + return d.root, true +} + type CloneCmd struct{} // Name is returns the name of the Dolt cli command. This is what is used on the command line to invoke the command @@ -130,7 +142,11 @@ func clone(ctx context.Context, apr *argparser.ArgParseResults, dEnv *env.DoltEn var r env.Remote var srcDB *doltdb.DoltDB - r, srcDB, verr = createRemote(ctx, remoteName, remoteUrl, params, dEnv) + cloneRoot, err := dEnv.FS.Abs(dir) + if err != nil { + return errhand.VerboseErrorFromError(err) + } + r, srcDB, verr = createRemote(ctx, remoteName, remoteUrl, params, dEnv, cloneRoot) if verr != nil { return verr } @@ -187,15 +203,19 @@ func parseArgs(apr *argparser.ArgParseResults) (string, string, errhand.VerboseE urlStr := apr.Arg(0) _, err := earl.Parse(urlStr) - if err != nil { - return "", "", errhand.BuildDError("error: invalid remote url: %s", urlStr).Build() + if normalized, ok, nerr := env.NormalizeGitRemoteUrl(urlStr); nerr == nil && ok { + urlStr = normalized + } else { + return "", "", errhand.BuildDError("error: invalid remote url: %s", urlStr).Build() + } } var dir string if apr.NArg() == 2 { dir = apr.Arg(1) } else { + // Infer directory name from the URL. dir = path.Base(urlStr) if dir == "." { dir = path.Dir(urlStr) @@ -207,11 +227,15 @@ func parseArgs(apr *argparser.ArgParseResults) (string, string, errhand.VerboseE return dir, urlStr, nil } -func createRemote(ctx context.Context, remoteName, remoteUrl string, params map[string]string, dEnv *env.DoltEnv) (env.Remote, *doltdb.DoltDB, errhand.VerboseError) { +func createRemote(ctx context.Context, remoteName, remoteUrl string, params map[string]string, dEnv *env.DoltEnv, cloneRoot string) (env.Remote, *doltdb.DoltDB, errhand.VerboseError) { cli.Printf("cloning %s\n", remoteUrl) r := env.NewRemote(remoteName, remoteUrl, params) - ddb, err := r.GetRemoteDB(ctx, types.Format_Default, dEnv) + dialer := dbfactory.GRPCDialProvider(dEnv) + if strings.TrimSpace(cloneRoot) != "" { + dialer = remoteDialerWithGitCacheRoot{GRPCDialProvider: dEnv, root: cloneRoot} + } + ddb, err := r.GetRemoteDB(ctx, types.Format_Default, dialer) if err != nil { bdr := errhand.BuildDError("error: failed to get remote db").AddCause(err) return env.NoRemote, nil, bdr.Build() diff --git a/go/cmd/dolt/commands/clone_test.go b/go/cmd/dolt/commands/clone_test.go index d67e1d52fef..a0947612063 100644 --- a/go/cmd/dolt/commands/clone_test.go +++ b/go/cmd/dolt/commands/clone_test.go @@ -65,3 +65,14 @@ func TestParseDolthubRepos(t *testing.T) { } } + +func TestCloneParseArgs_InferDir(t *testing.T) { + ap := CloneCmd{}.ArgParser() + apr, err := ap.Parse([]string{"https://example.com/org/repo.git"}) + require.NoError(t, err) + + dir, urlStr, verr := parseArgs(apr) + require.Nil(t, verr) + require.Equal(t, "repo.git", dir) + require.Equal(t, "https://example.com/org/repo.git", urlStr) +} diff --git a/go/cmd/dolt/commands/read_tables.go b/go/cmd/dolt/commands/read_tables.go index 80faa375995..b868d81dd60 100644 --- a/go/cmd/dolt/commands/read_tables.go +++ b/go/cmd/dolt/commands/read_tables.go @@ -78,6 +78,7 @@ func (cmd ReadTablesCmd) ArgParser() *argparser.ArgParser { {"table", " Optional tables to retrieve. If omitted, all tables are retrieved."}, } ap.SupportsString(dirParamName, "d", "directory", "directory to create and put retrieved table data.") + ap.SupportsString(gitRefFlag, "", "ref", "Git ref to use as the Dolt data ref for git remotes (default: refs/dolt/data).") return ap } @@ -99,7 +100,11 @@ func (cmd ReadTablesCmd) Exec(ctx context.Context, commandStr string, args []str _, err := earl.Parse(urlStr) if err != nil { - return HandleVErrAndExitCode(errhand.BuildDError("Invalid remote url").AddCause(err).Build(), usage) + if normalized, ok, nerr := env.NormalizeGitRemoteUrl(urlStr); nerr == nil && ok { + urlStr = normalized + } else { + return HandleVErrAndExitCode(errhand.BuildDError("Invalid remote url").AddCause(err).Build(), usage) + } } dir := apr.GetValueOrDefault(dirParamName, path.Base(urlStr)) @@ -203,7 +208,8 @@ func pullTableValue(ctx context.Context, dEnv *env.DoltEnv, srcDB *doltdb.DoltDB } func getRemoteDBAtCommit(ctx context.Context, remoteUrl string, remoteUrlParams map[string]string, commitStr string, dEnv *env.DoltEnv) (*doltdb.DoltDB, doltdb.RootValue, errhand.VerboseError) { - _, srcDB, verr := createRemote(ctx, "temp", remoteUrl, remoteUrlParams, dEnv) + cacheRoot, _ := dEnv.GitCacheRoot() + _, srcDB, verr := createRemote(ctx, "temp", remoteUrl, remoteUrlParams, dEnv, cacheRoot) if verr != nil { return nil, nil, verr diff --git a/go/cmd/dolt/commands/read_tables_test.go b/go/cmd/dolt/commands/read_tables_test.go new file mode 100644 index 00000000000..b9adbd41c45 --- /dev/null +++ b/go/cmd/dolt/commands/read_tables_test.go @@ -0,0 +1,31 @@ +// Copyright 2026 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package commands + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestReadTablesArgParser_AcceptsGitFlags(t *testing.T) { + ap := ReadTablesCmd{}.ArgParser() + apr, err := ap.Parse([]string{ + "--" + gitRefFlag, "refs/dolt/custom", + "git+file:///tmp/remote.git", "main", + }) + require.NoError(t, err) + require.Equal(t, "refs/dolt/custom", apr.GetValueOrDefault(gitRefFlag, "")) +} diff --git a/go/cmd/dolt/commands/remote.go b/go/cmd/dolt/commands/remote.go index 60dcca8a3a8..73f522b2e1e 100644 --- a/go/cmd/dolt/commands/remote.go +++ b/go/cmd/dolt/commands/remote.go @@ -71,6 +71,7 @@ const ( addRemoteId = "add" removeRemoteId = "remove" removeRemoteShortId = "rm" + gitRefFlag = "ref" ) type RemoteCmd struct{} @@ -212,6 +213,11 @@ func parseRemoteArgs(apr *argparser.ArgParseResults, scheme, remoteUrl string) ( err = cli.AddAWSParams(remoteUrl, apr, params) case dbfactory.OSSScheme: err = cli.AddOSSParams(remoteUrl, apr, params) + case dbfactory.GitFileScheme, dbfactory.GitHTTPScheme, dbfactory.GitHTTPSScheme, dbfactory.GitSSHScheme: + verr := addGitRemoteParams(apr, params) + if verr != nil { + return nil, verr + } default: err = cli.VerifyNoAwsParams(apr) } @@ -219,9 +225,29 @@ func parseRemoteArgs(apr *argparser.ArgParseResults, scheme, remoteUrl string) ( return nil, errhand.VerboseErrorFromError(err) } + // Flags that are only meaningful for git remotes should not be accepted for other schemes. + switch scheme { + case dbfactory.GitFileScheme, dbfactory.GitHTTPScheme, dbfactory.GitHTTPSScheme, dbfactory.GitSSHScheme: + default: + if _, ok := apr.GetValue(gitRefFlag); ok { + return nil, errhand.BuildDError("error: --%s is only supported for git remotes", gitRefFlag).Build() + } + } + return params, nil } +func addGitRemoteParams(apr *argparser.ArgParseResults, params map[string]string) errhand.VerboseError { + if v, ok := apr.GetValue(gitRefFlag); ok { + v = strings.TrimSpace(v) + if v == "" { + return errhand.BuildDError("error: --%s cannot be empty", gitRefFlag).Build() + } + params[dbfactory.GitRefParam] = v + } + return nil +} + // callSQLRemoteAdd calls the SQL function `call `dolt_remote('add', remoteName, remoteUrl)` func callSQLRemoteAdd(sqlCtx *sql.Context, queryist cli.Queryist, remoteName, remoteUrl string) error { qry, err := dbr.InterpolateForDialect("call dolt_remote('add', ?, ?)", []interface{}{remoteName, remoteUrl}, dialect.MySQL) diff --git a/go/cmd/dolt/commands/remote_test.go b/go/cmd/dolt/commands/remote_test.go index ef5c6d3ae72..a0984251c0c 100644 --- a/go/cmd/dolt/commands/remote_test.go +++ b/go/cmd/dolt/commands/remote_test.go @@ -21,6 +21,7 @@ import ( "github.com/stretchr/testify/assert" + "github.com/dolthub/dolt/go/libraries/doltcore/dbfactory" "github.com/dolthub/dolt/go/libraries/doltcore/env" "github.com/dolthub/dolt/go/libraries/utils/config" "github.com/dolthub/dolt/go/libraries/utils/filesys" @@ -135,3 +136,13 @@ func TestGetAbsRemoteUrl(t *testing.T) { }) } } + +func TestParseRemoteArgs_GitRef(t *testing.T) { + ap := RemoteCmd{}.ArgParser() + apr, err := ap.Parse([]string{"add", "origin", "git+file:///tmp/remote.git", "--" + gitRefFlag, "refs/dolt/custom"}) + assert.NoError(t, err) + + params, verr := parseRemoteArgs(apr, dbfactory.GitFileScheme, "git+file:///tmp/remote.git") + assert.Nil(t, verr) + assert.Equal(t, "refs/dolt/custom", params[dbfactory.GitRefParam]) +} diff --git a/go/libraries/doltcore/dbfactory/factory.go b/go/libraries/doltcore/dbfactory/factory.go index dcb19f14386..5cf26b65be6 100644 --- a/go/libraries/doltcore/dbfactory/factory.go +++ b/go/libraries/doltcore/dbfactory/factory.go @@ -53,6 +53,12 @@ const ( OSSScheme = "oss" + // Git remote dbfactory schemes (Git remotes as Dolt remotes) + GitFileScheme = "git+file" + GitHTTPScheme = "git+http" + GitHTTPSScheme = "git+https" + GitSSHScheme = "git+ssh" + defaultScheme = HTTPSScheme defaultMemTableSize = 256 * 1024 * 1024 ) @@ -69,15 +75,19 @@ type DBFactory interface { // DBFactories is a map from url scheme name to DBFactory. Additional factories can be added to the DBFactories map // from external packages. var DBFactories = map[string]DBFactory{ - AWSScheme: AWSFactory{}, - OSSScheme: OSSFactory{}, - GSScheme: GSFactory{}, - OCIScheme: OCIFactory{}, - FileScheme: FileFactory{}, - MemScheme: MemFactory{}, - LocalBSScheme: LocalBSFactory{}, - HTTPScheme: NewDoltRemoteFactory(true), - HTTPSScheme: NewDoltRemoteFactory(false), + AWSScheme: AWSFactory{}, + OSSScheme: OSSFactory{}, + GSScheme: GSFactory{}, + OCIScheme: OCIFactory{}, + FileScheme: FileFactory{}, + MemScheme: MemFactory{}, + LocalBSScheme: LocalBSFactory{}, + HTTPScheme: NewDoltRemoteFactory(true), + HTTPSScheme: NewDoltRemoteFactory(false), + GitFileScheme: GitRemoteFactory{}, + GitHTTPScheme: GitRemoteFactory{}, + GitHTTPSScheme: GitRemoteFactory{}, + GitSSHScheme: GitRemoteFactory{}, } // CreateDB creates a database based on the supplied urlStr, and creation params. The DBFactory used for creation is diff --git a/go/libraries/doltcore/dbfactory/git_remote.go b/go/libraries/doltcore/dbfactory/git_remote.go new file mode 100644 index 00000000000..e69d553b3e3 --- /dev/null +++ b/go/libraries/doltcore/dbfactory/git_remote.go @@ -0,0 +1,280 @@ +// Copyright 2026 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package dbfactory + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "errors" + "fmt" + "net/url" + "os" + "os/exec" + "path/filepath" + "strings" + + "github.com/dolthub/dolt/go/store/blobstore" + "github.com/dolthub/dolt/go/store/datas" + "github.com/dolthub/dolt/go/store/nbs" + "github.com/dolthub/dolt/go/store/prolly/tree" + "github.com/dolthub/dolt/go/store/types" +) + +const ( + // GitCacheRootParam is the absolute path to the local Dolt repository root (the directory that contains `.dolt/`). + // Required for git remotes. GitRemoteFactory stores its local cache repo under: + // `/.dolt/git-remote-cache//repo.git`. + GitCacheRootParam = "git_cache_root" + GitRefParam = "git_ref" + GitRemoteNameParam = "git_remote_name" + defaultGitRef = "refs/dolt/data" + defaultGitRemoteName = "origin" +) + +// GitCacheRootProvider provides the local Dolt repo root for per-repo git remote caches. +// Implementations should return ok=false when no repo root is available. +type GitCacheRootProvider interface { + GitCacheRoot() (string, bool) +} + +// GitRemoteFactory opens a Dolt database backed by a Git remote, using a local bare +// repository as an object cache and remote configuration store. +// +// Supported schemes (registered in factory.go): +// - git+file +// - git+http +// - git+https +// - git+ssh +type GitRemoteFactory struct{} + +var _ DBFactory = GitRemoteFactory{} + +func (fact GitRemoteFactory) PrepareDB(ctx context.Context, nbf *types.NomsBinFormat, urlObj *url.URL, params map[string]interface{}) error { + switch strings.ToLower(urlObj.Scheme) { + case GitFileScheme: + remoteURL, _, err := parseGitRemoteFactoryURL(urlObj, params) + if err != nil { + return err + } + if remoteURL.Scheme != "file" { + return fmt.Errorf("git+file: expected underlying file URL, got %q", remoteURL.Scheme) + } + p := filepath.Join(remoteURL.Host, filepath.FromSlash(remoteURL.Path)) + if p == "" { + return fmt.Errorf("git+file: empty remote path") + } + if _, err := os.Stat(p); err == nil { + return nil + } else if !errors.Is(err, os.ErrNotExist) { + return err + } + return runGitInitBare(ctx, p) + default: + return fmt.Errorf("prepare not supported for scheme %q", urlObj.Scheme) + } +} + +func (fact GitRemoteFactory) CreateDB(ctx context.Context, nbf *types.NomsBinFormat, urlObj *url.URL, params map[string]interface{}) (datas.Database, types.ValueReadWriter, tree.NodeStore, error) { + remoteURL, ref, err := parseGitRemoteFactoryURL(urlObj, params) + if err != nil { + return nil, nil, nil, err + } + + cacheRoot, ok, err := resolveGitCacheRoot(params) + if err != nil { + return nil, nil, nil, err + } + if !ok { + return nil, nil, nil, fmt.Errorf("%s is required for git remotes", GitCacheRootParam) + } + cacheBase := filepath.Join(cacheRoot, DoltDir, "git-remote-cache") + + cacheRepo, err := cacheRepoPath(cacheBase, remoteURL.String(), ref) + if err != nil { + return nil, nil, nil, err + } + if err := ensureBareRepo(ctx, cacheRepo); err != nil { + return nil, nil, nil, err + } + + remoteName := resolveGitRemoteName(params) + + // Ensure the configured git remote exists and points to the underlying git remote URL. + if err := ensureGitRemoteURL(ctx, cacheRepo, remoteName, remoteURL.String()); err != nil { + return nil, nil, nil, err + } + + q := nbs.NewUnlimitedMemQuotaProvider() + cs, err := nbs.NewGitStore(ctx, nbf.VersionString(), cacheRepo, ref, blobstore.GitBlobstoreOptions{RemoteName: remoteName}, defaultMemTableSize, q) + if err != nil { + return nil, nil, nil, err + } + + vrw := types.NewValueStore(cs) + ns := tree.NewNodeStore(cs) + db := datas.NewTypesDatabase(vrw, ns) + return db, vrw, ns, nil +} + +func parseGitRemoteFactoryURL(urlObj *url.URL, params map[string]interface{}) (remoteURL *url.URL, ref string, err error) { + if urlObj == nil { + return nil, "", fmt.Errorf("nil url") + } + scheme := strings.ToLower(urlObj.Scheme) + if !strings.HasPrefix(scheme, "git+") { + return nil, "", fmt.Errorf("expected git+ scheme, got %q", urlObj.Scheme) + } + underlyingScheme := strings.TrimPrefix(scheme, "git+") + if underlyingScheme == "" { + return nil, "", fmt.Errorf("invalid git+ scheme %q", urlObj.Scheme) + } + + ref = resolveGitRemoteRef(params) + + cp := *urlObj + cp.Scheme = underlyingScheme + cp.RawQuery = "" + cp.Fragment = "" + return &cp, ref, nil +} + +func resolveGitRemoteRef(params map[string]interface{}) string { + // Prefer an explicit remote parameter (e.g. from `--ref`). + if params != nil { + if v, ok := params[GitRefParam]; ok && v != nil { + s, ok := v.(string) + if ok { + if s = strings.TrimSpace(s); s != "" { + return s + } + } + } + } + return defaultGitRef +} + +func resolveGitRemoteName(params map[string]interface{}) string { + if params != nil { + if v, ok := params[GitRemoteNameParam]; ok && v != nil { + s, ok := v.(string) + if ok { + if s = strings.TrimSpace(s); s != "" { + return s + } + } + } + } + return defaultGitRemoteName +} + +// resolveGitCacheRoot parses and validates GitCacheRootParam. +// It returns ok=false when the param is not present. +func resolveGitCacheRoot(params map[string]interface{}) (root string, ok bool, err error) { + if params == nil { + return "", false, nil + } + v, ok := params[GitCacheRootParam] + if !ok || v == nil { + return "", false, nil + } + s, ok := v.(string) + if !ok { + return "", false, fmt.Errorf("%s must be a string", GitCacheRootParam) + } + if strings.TrimSpace(s) == "" { + return "", false, fmt.Errorf("%s cannot be empty", GitCacheRootParam) + } + return s, true, nil +} + +func cacheRepoPath(cacheBase, remoteURL, ref string) (string, error) { + if strings.TrimSpace(cacheBase) == "" { + return "", fmt.Errorf("empty git cache base") + } + sum := sha256.Sum256([]byte(remoteURL + "|" + ref)) + h := hex.EncodeToString(sum[:]) + return filepath.Join(cacheBase, h, "repo.git"), nil +} + +func ensureBareRepo(ctx context.Context, gitDir string) error { + if gitDir == "" { + return fmt.Errorf("empty gitDir") + } + if st, err := os.Stat(gitDir); err == nil { + if !st.IsDir() { + return fmt.Errorf("git cache repo path is not a directory: %s", gitDir) + } + return nil + } else if !errors.Is(err, os.ErrNotExist) { + return err + } + if err := os.MkdirAll(filepath.Dir(gitDir), 0o755); err != nil { + return err + } + return runGitInitBare(ctx, gitDir) +} + +func ensureGitRemoteURL(ctx context.Context, gitDir string, remoteName string, remoteURL string) error { + if strings.TrimSpace(remoteName) == "" { + return fmt.Errorf("empty remote name") + } + if strings.TrimSpace(remoteURL) == "" { + return fmt.Errorf("empty remote url") + } + // Insert `--` so remoteName can't be interpreted as a flag. + got, err := runGitInDir(ctx, gitDir, "remote", "get-url", "--", remoteName) + if err != nil { + // Remote likely doesn't exist; attempt to add. + return runGitInDirNoOutput(ctx, gitDir, "remote", "add", "--", remoteName, remoteURL) + } + got = strings.TrimSpace(got) + if got == remoteURL { + return nil + } + return runGitInDirNoOutput(ctx, gitDir, "remote", "set-url", "--", remoteName, remoteURL) +} + +func runGitInitBare(ctx context.Context, dir string) error { + _, err := exec.LookPath("git") + if err != nil { + return fmt.Errorf("git not found on PATH: %w", err) + } + cmd := exec.CommandContext(ctx, "git", "init", "--bare", dir) //nolint:gosec // controlled args + out, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("git init --bare failed: %w\noutput:\n%s", err, strings.TrimSpace(string(out))) + } + return nil +} + +func runGitInDir(ctx context.Context, gitDir string, args ...string) (string, error) { + _, err := exec.LookPath("git") + if err != nil { + return "", fmt.Errorf("git not found on PATH: %w", err) + } + all := append([]string{"--git-dir", gitDir}, args...) + cmd := exec.CommandContext(ctx, "git", all...) //nolint:gosec // controlled args + out, err := cmd.CombinedOutput() + if err != nil { + return "", fmt.Errorf("git %s failed: %w\noutput:\n%s", strings.Join(args, " "), err, strings.TrimSpace(string(out))) + } + return string(out), nil +} + +func runGitInDirNoOutput(ctx context.Context, gitDir string, args ...string) error { + _, err := runGitInDir(ctx, gitDir, args...) + return err +} diff --git a/go/libraries/doltcore/dbfactory/git_remote_test.go b/go/libraries/doltcore/dbfactory/git_remote_test.go new file mode 100644 index 00000000000..a46c5387102 --- /dev/null +++ b/go/libraries/doltcore/dbfactory/git_remote_test.go @@ -0,0 +1,187 @@ +// Copyright 2026 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package dbfactory + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "os" + "os/exec" + "path/filepath" + "runtime" + "strings" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/dolthub/dolt/go/store/chunks" + "github.com/dolthub/dolt/go/store/datas" + "github.com/dolthub/dolt/go/store/hash" + "github.com/dolthub/dolt/go/store/testutils/gitrepo" + "github.com/dolthub/dolt/go/store/types" +) + +// t.TempDir() includes the test name on disk, which can create very long paths on Windows. +// These tests create deep `refs/...` paths inside bare git repos and can hit MAX_PATH without +// long path support enabled. Use a short temp prefix on Windows to keep paths under the limit. +func shortTempDir(t *testing.T) string { + t.Helper() + if runtime.GOOS != "windows" { + return t.TempDir() + } + + dir, err := os.MkdirTemp("", "dolt") + require.NoError(t, err) + t.Cleanup(func() { _ = os.RemoveAll(dir) }) + return dir +} + +func TestGitRemoteFactory_GitFile_RequiresGitCacheRootParam(t *testing.T) { + ctx := context.Background() + _, _, _, err := CreateDB(ctx, types.Format_Default, "git+file:///tmp/remote.git", map[string]interface{}{}) + require.Error(t, err) + require.Contains(t, err.Error(), GitCacheRootParam) +} + +func TestGitRemoteFactory_GitFile_CachesUnderRepoDoltDirAndCanWrite(t *testing.T) { + if _, err := exec.LookPath("git"); err != nil { + t.Skip("git not found on PATH") + } + + ctx := context.Background() + remoteRepo, err := gitrepo.InitBare(ctx, filepath.Join(shortTempDir(t), "remote.git")) + require.NoError(t, err) + + localRepoRoot := shortTempDir(t) + + remotePath := filepath.ToSlash(remoteRepo.GitDir) + remoteURL := "file://" + remotePath + urlStr := "git+file://" + remotePath + params := map[string]interface{}{ + GitCacheRootParam: localRepoRoot, + } + + db, vrw, _, err := CreateDB(ctx, types.Format_Default, urlStr, params) + require.NoError(t, err) + require.NotNil(t, db) + require.NotNil(t, vrw) + + // Ensure cache repo created under /.dolt/git-remote-cache. + cacheBase := filepath.Join(localRepoRoot, DoltDir, "git-remote-cache") + + sum := sha256.Sum256([]byte(remoteURL + "|" + "refs/dolt/data")) + h := hex.EncodeToString(sum[:]) + cacheRepo := filepath.Join(cacheBase, h, "repo.git") + _, err = os.Stat(filepath.Join(cacheRepo, "HEAD")) + require.NoError(t, err) + + vs, ok := vrw.(*types.ValueStore) + require.True(t, ok, "expected ValueReadWriter to be *types.ValueStore, got %T", vrw) + cs := vs.ChunkStore() + + // Minimal write: put one chunk and commit its hash as the root. + c := chunks.NewChunk([]byte("hello\n")) + err = cs.Put(ctx, c, func(chunks.Chunk) chunks.GetAddrsCb { + return func(context.Context, hash.HashSet, chunks.PendingRefExists) error { return nil } + }) + require.NoError(t, err) + + last, err := cs.Root(ctx) + require.NoError(t, err) + okCommit, err := cs.Commit(ctx, c.Hash(), last) + require.NoError(t, err) + require.True(t, okCommit) + + require.NoError(t, db.Close()) + + // Remote should now have refs/dolt/data. + cmd := exec.CommandContext(ctx, "git", "--git-dir", remoteRepo.GitDir, "rev-parse", "--verify", "--quiet", "refs/dolt/data^{commit}") + out, err := cmd.CombinedOutput() + require.NoError(t, err, "git rev-parse failed: %s", strings.TrimSpace(string(out))) +} + +func TestGitRemoteFactory_TwoClientsDistinctCacheDirsRoundtrip(t *testing.T) { + if _, err := exec.LookPath("git"); err != nil { + t.Skip("git not found on PATH") + } + + ctx := context.Background() + remoteRepo, err := gitrepo.InitBare(ctx, filepath.Join(shortTempDir(t), "remote.git")) + require.NoError(t, err) + + remotePath := filepath.ToSlash(remoteRepo.GitDir) + urlStr := "git+file://" + remotePath + + noopGetAddrs := func(chunks.Chunk) chunks.GetAddrsCb { + return func(context.Context, hash.HashSet, chunks.PendingRefExists) error { return nil } + } + + open := func(cacheRoot string) (db datas.Database, cs chunks.ChunkStore) { + params := map[string]interface{}{ + GitCacheRootParam: cacheRoot, + } + d, vrw, _, err := CreateDB(ctx, types.Format_Default, urlStr, params) + require.NoError(t, err) + require.NotNil(t, d) + require.NotNil(t, vrw) + + vs, ok := vrw.(*types.ValueStore) + require.True(t, ok, "expected ValueReadWriter to be *types.ValueStore, got %T", vrw) + return d, vs.ChunkStore() + } + + cacheA := shortTempDir(t) + cacheB := shortTempDir(t) + + // Client A writes a root pointing at chunk A. + dbA, csA := open(cacheA) + cA := chunks.NewChunk([]byte("clientA\n")) + require.NoError(t, csA.Put(ctx, cA, noopGetAddrs)) + lastA, err := csA.Root(ctx) + require.NoError(t, err) + okCommitA, err := csA.Commit(ctx, cA.Hash(), lastA) + require.NoError(t, err) + require.True(t, okCommitA) + require.NoError(t, dbA.Close()) + + // Client B reads chunk A, then writes chunk B and updates the root. + dbB, csB := open(cacheB) + require.NoError(t, csB.Rebase(ctx)) + rootB, err := csB.Root(ctx) + require.NoError(t, err) + require.Equal(t, cA.Hash(), rootB) + gotA, err := csB.Get(ctx, cA.Hash()) + require.NoError(t, err) + require.Equal(t, "clientA\n", string(gotA.Data())) + + cB := chunks.NewChunk([]byte("clientB\n")) + require.NoError(t, csB.Put(ctx, cB, noopGetAddrs)) + okCommitB, err := csB.Commit(ctx, cB.Hash(), rootB) + require.NoError(t, err) + require.True(t, okCommitB) + require.NoError(t, dbB.Close()) + + // Client A re-opens and should see B's update. + dbA2, csA2 := open(cacheA) + require.NoError(t, csA2.Rebase(ctx)) + rootA2, err := csA2.Root(ctx) + require.NoError(t, err) + require.Equal(t, cB.Hash(), rootA2) + gotB, err := csA2.Get(ctx, cB.Hash()) + require.NoError(t, err) + require.Equal(t, "clientB\n", string(gotB.Data())) + require.NoError(t, dbA2.Close()) +} diff --git a/go/libraries/doltcore/env/environment.go b/go/libraries/doltcore/env/environment.go index 618f21b22c1..441581f4611 100644 --- a/go/libraries/doltcore/env/environment.go +++ b/go/libraries/doltcore/env/environment.go @@ -184,6 +184,16 @@ func (dEnv *DoltEnv) UrlStr() string { return dEnv.urlStr } +// GitCacheRoot returns the absolute path to the local Dolt repository root (the directory that contains `.dolt/`). +// It is used to place git-remote caches under `/.dolt/...`. +func (dEnv *DoltEnv) GitCacheRoot() (string, bool) { + doltDir := dEnv.GetDoltDir() + if doltDir == "" { + return "", false + } + return filepath.Dir(doltDir), true +} + func createRepoState(fs filesys.Filesys) (*RepoState, error) { repoState, rsErr := LoadRepoState(fs) @@ -530,7 +540,8 @@ var ErrCannotCreateDoltDirAlreadyExists = errors.New(".dolt dir already exists") // * |dir|/.dolt exists and is a directory and is empty, or // * |dir|/.dolt exists and is a directory and has only one other entry in it, a directory with name "tmp", or // * |dir|/.dolt exists and is a directory and has only one other entry in it, a file with name "config.json", or -// * |dir|/.dolt exists and is a directory and contains both a |tmp| directory and a |config.json| file and nothing else. +// * |dir|/.dolt exists and is a directory and contains both a |tmp| directory and a |config.json| file and nothing else, or +// * |dir|/.dolt exists and is a directory and contains a |git-remote-cache| directory (and any contents under it) plus any of the above. func CanCreateDatabaseAtPath(fs filesys.Filesys, dir string) (bool, error) { absPath, err := fs.Abs(dir) if err != nil { @@ -550,6 +561,7 @@ func CanCreateDatabaseAtPath(fs filesys.Filesys, dir string) (bool, error) { } tmpPath := filepath.Join(doltDirPath, TmpDirName) configPath := filepath.Join(doltDirPath, configFile) + gitRemoteCachePath := filepath.Join(doltDirPath, "git-remote-cache") isOK := true err := fs.Iter(doltDirPath, true, func(path string, sz int64, isDir bool) (stop bool) { if path == doltDirPath { @@ -558,6 +570,12 @@ func CanCreateDatabaseAtPath(fs filesys.Filesys, dir string) (bool, error) { return false } else if path == configPath && !isDir { return false + } else if path == gitRemoteCachePath && isDir { + // Allow git remote cache contents to exist under .dolt/ when cloning / creating a DB. + return false + } else if strings.HasPrefix(path, gitRemoteCachePath+string(filepath.Separator)) { + // Allow any children of .dolt/git-remote-cache. + return false } else { isOK = false return true diff --git a/go/libraries/doltcore/env/environment_test.go b/go/libraries/doltcore/env/environment_test.go index b03a53fdbad..790b98da41e 100644 --- a/go/libraries/doltcore/env/environment_test.go +++ b/go/libraries/doltcore/env/environment_test.go @@ -38,6 +38,29 @@ const ( workingDir = "/user/bheni/datasets/addresses" ) +func TestCanCreateDatabaseAtPathAllowsGitRemoteCache(t *testing.T) { + dir := "/user/bheni/datasets/allow_git_remote_cache" + doltDir := filepath.Join(dir, dbfactory.DoltDir) + cacheDir := filepath.Join(doltDir, "git-remote-cache") + + // Any contents under .dolt/git-remote-cache should be ignored by CanCreateDatabaseAtPath. + fs := filesys.NewInMemFS( + []string{ + testHomeDir, + dir, + doltDir, + cacheDir, + filepath.Join(cacheDir, "somecache"), + }, + map[string][]byte{}, + dir, + ) + + ok, err := CanCreateDatabaseAtPath(fs, dir) + require.NoError(t, err) + require.True(t, ok) +} + func testHomeDirFunc() (string, error) { return testHomeDir, nil } diff --git a/go/libraries/doltcore/env/git_remote_url.go b/go/libraries/doltcore/env/git_remote_url.go new file mode 100644 index 00000000000..0cb588855e0 --- /dev/null +++ b/go/libraries/doltcore/env/git_remote_url.go @@ -0,0 +1,176 @@ +// Copyright 2026 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package env + +import ( + "fmt" + "net/url" + "path/filepath" + "strings" +) + +var supportedGitPlusSchemes = map[string]struct{}{ + "git+file": {}, + "git+http": {}, + "git+https": {}, + "git+ssh": {}, +} + +var supportedUnderlyingGitSchemes = map[string]struct{}{ + "file": {}, + "http": {}, + "https": {}, + "ssh": {}, +} + +// NormalizeGitRemoteUrl translates user-provided git remote strings into a canonical dbfactory URL +// using git+* schemes. +// +// It accepts: +// - Explicit dbfactory URLs: git+file/http/https/ssh://... +// - URLs ending in .git: file/http/https/ssh URLs +// - scp-style ssh: [user@]host:path/repo.git +// - schemeless host/path: host/org/repo.git (defaults to git+https) +// - local paths ending in .git (absolute or relative) (translated to git+file) +// +// It returns ok=false when the input is not recognized as a git remote URL (so callers can fall back +// to existing remote handling). +func NormalizeGitRemoteUrl(urlArg string) (normalized string, ok bool, err error) { + urlArg = strings.TrimSpace(urlArg) + if urlArg == "" { + return "", false, nil + } + + // Fast-path: explicit git+* dbfactory URL. + if strings.HasPrefix(strings.ToLower(urlArg), "git+") { + u, err := url.Parse(urlArg) + if err != nil { + return "", false, err + } + if _, ok := supportedGitPlusSchemes[strings.ToLower(u.Scheme)]; !ok { + return "", false, fmt.Errorf("unsupported git dbfactory scheme %q", u.Scheme) + } + return u.String(), true, nil + } + + // Only translate obvious git remote strings (must end in .git). + base := stripQueryAndFragment(urlArg) + if !strings.HasSuffix(base, ".git") { + return "", false, nil + } + + // scp-like ssh: [user@]host:path/repo.git (no scheme, no ://) + if isScpLikeGitRemote(urlArg) { + host, p := splitScpLike(urlArg) + ssh := "git+ssh://" + host + "/" + strings.TrimPrefix(p, "/") + u, err := url.Parse(ssh) + if err != nil { + return "", false, err + } + return u.String(), true, nil + } + + // file/http/https/ssh url with a scheme. + if strings.Contains(urlArg, "://") { + u, err := url.Parse(urlArg) + if err != nil { + return "", false, err + } + s := strings.ToLower(u.Scheme) + if _, ok := supportedUnderlyingGitSchemes[s]; !ok { + return "", false, nil + } + u.Scheme = "git+" + s + return u.String(), true, nil + } + + // Local filesystem path (absolute or relative). + if looksLikeLocalPath(urlArg) { + abs, err := filepath.Abs(urlArg) + if err != nil { + return "", false, err + } + abs = filepath.ToSlash(abs) + u, err := url.Parse("git+file://" + abs) + if err != nil { + return "", false, err + } + return u.String(), true, nil + } + + // Schemeless host/path.git defaults to https. + u, err := url.Parse("git+https://" + urlArg) + if err != nil { + return "", false, err + } + return u.String(), true, nil +} + +func stripQueryAndFragment(s string) string { + // Order matters: strip fragment then query. + if i := strings.IndexByte(s, '#'); i >= 0 { + s = s[:i] + } + if i := strings.IndexByte(s, '?'); i >= 0 { + s = s[:i] + } + return s +} + +func looksLikeLocalPath(s string) bool { + // Treat absolute filesystem paths as local paths, including Windows drive-letter and UNC paths. + if filepath.IsAbs(s) { + return true + } + return strings.HasPrefix(s, "./") || strings.HasPrefix(s, "../") +} + +func isScpLikeGitRemote(s string) bool { + // This intentionally keeps the matcher simple: + // - no scheme (no "://") + // - contains a single ':' separating host from path + // - host part contains no '/' + // - path ends in .git (already checked by caller) + if strings.Contains(s, "://") { + return false + } + colon := strings.IndexByte(s, ':') + if colon < 0 { + return false + } + host := s[:colon] + path := s[colon+1:] + if host == "" || path == "" { + return false + } + if strings.Contains(host, "/") { + return false + } + // Avoid misclassifying Windows paths; host must contain a dot or an '@' (git@host:...). + if !strings.Contains(host, ".") && !strings.Contains(host, "@") { + return false + } + return true +} + +func splitScpLike(s string) (host string, path string) { + i := strings.IndexByte(s, ':') + if i < 0 { + return "", s + } + return s[:i], s[i+1:] +} + +// NOTE: we intentionally do not reject URL query parameters (including `ref=`) here. diff --git a/go/libraries/doltcore/env/git_remote_url_test.go b/go/libraries/doltcore/env/git_remote_url_test.go new file mode 100644 index 00000000000..122451291ab --- /dev/null +++ b/go/libraries/doltcore/env/git_remote_url_test.go @@ -0,0 +1,74 @@ +// Copyright 2026 Dolthub, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package env + +import ( + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestNormalizeGitRemoteUrl(t *testing.T) { + t.Run("empty not recognized", func(t *testing.T) { + got, ok, err := NormalizeGitRemoteUrl("") + require.NoError(t, err) + require.False(t, ok) + require.Empty(t, got) + }) + + t.Run("explicit git+https keeps scheme", func(t *testing.T) { + got, ok, err := NormalizeGitRemoteUrl("git+https://example.com/org/repo.git") + require.NoError(t, err) + require.True(t, ok) + require.Equal(t, "git+https://example.com/org/repo.git", got) + }) + + t.Run("https .git becomes git+https", func(t *testing.T) { + got, ok, err := NormalizeGitRemoteUrl("https://example.com/org/repo.git") + require.NoError(t, err) + require.True(t, ok) + require.Equal(t, "git+https://example.com/org/repo.git", got) + }) + + t.Run("scp-style becomes git+ssh", func(t *testing.T) { + got, ok, err := NormalizeGitRemoteUrl("git@github.com:org/repo.git") + require.NoError(t, err) + require.True(t, ok) + require.Equal(t, "git+ssh://git@github.com/org/repo.git", got) + }) + + t.Run("schemeless host/path defaults to git+https", func(t *testing.T) { + got, ok, err := NormalizeGitRemoteUrl("github.com/org/repo.git") + require.NoError(t, err) + require.True(t, ok) + require.Equal(t, "git+https://github.com/org/repo.git", got) + }) + + t.Run("local absolute path becomes git+file", func(t *testing.T) { + p := filepath.ToSlash(filepath.Join(t.TempDir(), "remote.git")) + got, ok, err := NormalizeGitRemoteUrl(p) + require.NoError(t, err) + require.True(t, ok) + require.Equal(t, "git+file://"+p, got) + }) + + t.Run("non .git url not recognized", func(t *testing.T) { + got, ok, err := NormalizeGitRemoteUrl("https://example.com/not-git") + require.NoError(t, err) + require.False(t, ok) + require.Empty(t, got) + }) +} diff --git a/go/libraries/doltcore/env/remotes.go b/go/libraries/doltcore/env/remotes.go index 3ea65724421..a4c3d64c76b 100644 --- a/go/libraries/doltcore/env/remotes.go +++ b/go/libraries/doltcore/env/remotes.go @@ -104,6 +104,14 @@ func (r *Remote) GetRemoteDB(ctx context.Context, nbf *types.NomsBinFormat, dial } params[dbfactory.GRPCDialProviderParam] = dialer + if u, err := earl.Parse(r.Url); err == nil && u != nil && strings.HasPrefix(strings.ToLower(u.Scheme), "git+") { + params[dbfactory.GitRemoteNameParam] = r.Name + if p, ok := dialer.(dbfactory.GitCacheRootProvider); ok { + if root, ok := p.GitCacheRoot(); ok { + params[dbfactory.GitCacheRootParam] = root + } + } + } return doltdb.LoadDoltDBWithParams(ctx, nbf, r.Url, filesys2.LocalFS, params) } @@ -117,6 +125,14 @@ func (r *Remote) Prepare(ctx context.Context, nbf *types.NomsBinFormat, dialer d } params[dbfactory.GRPCDialProviderParam] = dialer + if u, err := earl.Parse(r.Url); err == nil && u != nil && strings.HasPrefix(strings.ToLower(u.Scheme), "git+") { + params[dbfactory.GitRemoteNameParam] = r.Name + if p, ok := dialer.(dbfactory.GitCacheRootProvider); ok { + if root, ok := p.GitCacheRoot(); ok { + params[dbfactory.GitCacheRootParam] = root + } + } + } return dbfactory.PrepareDB(ctx, nbf, r.Url, params) } @@ -128,6 +144,14 @@ func (r *Remote) GetRemoteDBWithoutCaching(ctx context.Context, nbf *types.NomsB } params[dbfactory.NoCachingParameter] = "true" params[dbfactory.GRPCDialProviderParam] = dialer + if u, err := earl.Parse(r.Url); err == nil && u != nil && strings.HasPrefix(strings.ToLower(u.Scheme), "git+") { + params[dbfactory.GitRemoteNameParam] = r.Name + if p, ok := dialer.(dbfactory.GitCacheRootProvider); ok { + if root, ok := p.GitCacheRoot(); ok { + params[dbfactory.GitCacheRootParam] = root + } + } + } return doltdb.LoadDoltDBWithParams(ctx, nbf, r.Url, filesys2.LocalFS, params) } @@ -643,6 +667,12 @@ func NewPullSpec[C doltdb.Context]( } func GetAbsRemoteUrl(fs filesys2.Filesys, cfg config.ReadableConfig, urlArg string) (string, string, error) { + if normalized, ok, nerr := NormalizeGitRemoteUrl(urlArg); nerr != nil { + return "", "", nerr + } else if ok { + urlArg = normalized + } + u, err := earl.Parse(urlArg) if err != nil { return "", "", err diff --git a/go/libraries/doltcore/sqle/database_provider.go b/go/libraries/doltcore/sqle/database_provider.go index 5d7cca5d6db..69a4401be71 100644 --- a/go/libraries/doltcore/sqle/database_provider.go +++ b/go/libraries/doltcore/sqle/database_provider.go @@ -77,6 +77,18 @@ type DoltDatabaseProvider struct { InitDatabaseHooks []InitDatabaseHook } +type remoteDialerWithGitCacheRoot struct { + dbfactory.GRPCDialProvider + root string +} + +func (d remoteDialerWithGitCacheRoot) GitCacheRoot() (string, bool) { + if strings.TrimSpace(d.root) == "" { + return "", false + } + return d.root, true +} + var _ sql.DatabaseProvider = (*DoltDatabaseProvider)(nil) var _ sql.FunctionProvider = (*DoltDatabaseProvider)(nil) var _ sql.MutableDatabaseProvider = (*DoltDatabaseProvider)(nil) @@ -502,10 +514,26 @@ func (p *DoltDatabaseProvider) allRevisionDbs(ctx *sql.Context, db dsess.SqlData } func (p *DoltDatabaseProvider) GetRemoteDB(ctx context.Context, format *types.NomsBinFormat, r env.Remote, withCaching bool) (*doltdb.DoltDB, error) { + // For git remotes, thread through the initiating database's repo root so git caches can be located under + // `/.dolt/...` instead of a user-global cache dir. + dialer := p.remoteDialer + if sqlCtx, ok := ctx.(*sql.Context); ok { + baseName, _ := doltdb.SplitRevisionDbName(sqlCtx.GetCurrentDatabase()) + dbKey := strings.ToLower(baseName) + p.mu.RLock() + dbLoc, ok := p.dbLocations[dbKey] + p.mu.RUnlock() + if ok && dbLoc != nil { + if root, err := dbLoc.Abs("."); err == nil && strings.TrimSpace(root) != "" { + dialer = remoteDialerWithGitCacheRoot{GRPCDialProvider: p.remoteDialer, root: root} + } + } + } + if withCaching { - return r.GetRemoteDB(ctx, format, p.remoteDialer) + return r.GetRemoteDB(ctx, format, dialer) } - return r.GetRemoteDBWithoutCaching(ctx, format, p.remoteDialer) + return r.GetRemoteDBWithoutCaching(ctx, format, dialer) } func (p *DoltDatabaseProvider) CreateDatabase(ctx *sql.Context, name string) error { @@ -814,7 +842,11 @@ func (p *DoltDatabaseProvider) cloneDatabaseFromRemote( } r := env.NewRemote(remoteName, remoteUrl, remoteParams) - srcDB, err := r.GetRemoteDB(ctx, types.Format_Default, p.remoteDialer) + destRoot, err := p.fs.Abs(dbName) + if err != nil { + return err + } + srcDB, err := r.GetRemoteDB(ctx, types.Format_Default, remoteDialerWithGitCacheRoot{GRPCDialProvider: p.remoteDialer, root: destRoot}) if err != nil { return err } diff --git a/go/libraries/doltcore/sqle/dprocedures/dolt_backup.go b/go/libraries/doltcore/sqle/dprocedures/dolt_backup.go index 4e0f4f8a9db..25fd7f7c23c 100644 --- a/go/libraries/doltcore/sqle/dprocedures/dolt_backup.go +++ b/go/libraries/doltcore/sqle/dprocedures/dolt_backup.go @@ -307,6 +307,14 @@ func syncRemote(ctx *sql.Context, dbData env.DbData[*sql.Context], dsess *dsess. // not AWS, it verifies that no AWS parameters are present in |apr|. func newParams(apr *argparser.ArgParseResults, url string, urlScheme string) (map[string]string, error) { params := map[string]string{} + + isGitRemote := urlScheme == dbfactory.GitFileScheme || urlScheme == dbfactory.GitHTTPScheme || urlScheme == dbfactory.GitHTTPSScheme || urlScheme == dbfactory.GitSSHScheme + if !isGitRemote { + if _, ok := apr.GetValue("ref"); ok { + return nil, fmt.Errorf("error: --ref is only supported for git remotes") + } + } + var err error switch urlScheme { case dbfactory.AWSScheme: @@ -315,6 +323,15 @@ func newParams(apr *argparser.ArgParseResults, url string, urlScheme string) (ma // TODO(elianddb): This func mainly interfaces with apr to set the OSS key-vals in params, but the backup arg // parser does not include any OSS-related flags? I'm guessing they must be processed elsewhere? err = cli.AddOSSParams(url, apr, params) + case dbfactory.GitFileScheme, dbfactory.GitHTTPScheme, dbfactory.GitHTTPSScheme, dbfactory.GitSSHScheme: + err = cli.VerifyNoAwsParams(apr) + if ref, ok := apr.GetValue("ref"); ok { + ref = strings.TrimSpace(ref) + if ref == "" { + return nil, fmt.Errorf("error: --ref cannot be empty") + } + params[dbfactory.GitRefParam] = ref + } default: err = cli.VerifyNoAwsParams(apr) } diff --git a/go/libraries/doltcore/sqle/dprocedures/dolt_clone.go b/go/libraries/doltcore/sqle/dprocedures/dolt_clone.go index 043a980e6c1..43f90c12a77 100644 --- a/go/libraries/doltcore/sqle/dprocedures/dolt_clone.go +++ b/go/libraries/doltcore/sqle/dprocedures/dolt_clone.go @@ -16,6 +16,7 @@ package dprocedures import ( "path" + "strings" "github.com/dolthub/go-mysql-server/sql" @@ -45,7 +46,7 @@ func doltClone(ctx *sql.Context, args ...string) (sql.RowIter, error) { } sess := dsess.DSessFromSess(ctx.Session) - _, remoteUrl, err := env.GetAbsRemoteUrl(sess.Provider().FileSystem(), emptyConfig(), urlStr) + scheme, remoteUrl, err := env.GetAbsRemoteUrl(sess.Provider().FileSystem(), emptyConfig(), urlStr) if err != nil { return nil, errhand.BuildDError("error: '%s' is not valid.", urlStr).Build() } @@ -57,6 +58,19 @@ func doltClone(ctx *sql.Context, args ...string) (sql.RowIter, error) { remoteParms[dbfactory.GRPCUsernameAuthParam] = user } + isGitRemote := scheme == dbfactory.GitFileScheme || scheme == dbfactory.GitHTTPScheme || scheme == dbfactory.GitHTTPSScheme || scheme == dbfactory.GitSSHScheme + + if ref, ok := apr.GetValue("ref"); ok { + ref = strings.TrimSpace(ref) + if ref == "" { + return nil, errhand.BuildDError("error: --ref cannot be empty").Build() + } + if !isGitRemote { + return nil, errhand.BuildDError("error: --ref is only supported for git remotes").Build() + } + remoteParms[dbfactory.GitRefParam] = ref + } + depth, ok := apr.GetInt(cli.DepthFlag) if !ok { depth = -1 @@ -82,7 +96,11 @@ func getDirectoryAndUrlString(apr *argparser.ArgParseResults) (string, string, e urlStr := apr.Arg(0) _, err := earl.Parse(urlStr) if err != nil { - return "", "", errhand.BuildDError("error: invalid remote url: %s", urlStr).Build() + if normalized, ok, nerr := env.NormalizeGitRemoteUrl(urlStr); nerr == nil && ok { + urlStr = normalized + } else { + return "", "", errhand.BuildDError("error: invalid remote url: %s", urlStr).Build() + } } var dir string diff --git a/go/libraries/doltcore/sqle/dprocedures/dolt_remote.go b/go/libraries/doltcore/sqle/dprocedures/dolt_remote.go index b9c8dc1d530..8e1eed0097f 100644 --- a/go/libraries/doltcore/sqle/dprocedures/dolt_remote.go +++ b/go/libraries/doltcore/sqle/dprocedures/dolt_remote.go @@ -22,6 +22,7 @@ import ( "github.com/dolthub/dolt/go/cmd/dolt/cli" "github.com/dolthub/dolt/go/libraries/doltcore/branch_control" + "github.com/dolthub/dolt/go/libraries/doltcore/dbfactory" "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" "github.com/dolthub/dolt/go/libraries/doltcore/env" "github.com/dolthub/dolt/go/libraries/doltcore/ref" @@ -97,12 +98,27 @@ func addRemote(_ *sql.Context, dbName string, dbd env.DbData[*sql.Context], apr return err } - _, absRemoteUrl, err := env.GetAbsRemoteUrl(dbFs, &config.MapConfig{}, remoteUrl) + scheme, absRemoteUrl, err := env.GetAbsRemoteUrl(dbFs, &config.MapConfig{}, remoteUrl) if err != nil { return err } - r := env.NewRemote(remoteName, absRemoteUrl, map[string]string{}) + params := map[string]string{} + + isGitRemote := scheme == dbfactory.GitFileScheme || scheme == dbfactory.GitHTTPScheme || scheme == dbfactory.GitHTTPSScheme || scheme == dbfactory.GitSSHScheme + + if ref, ok := apr.GetValue("ref"); ok { + ref = strings.TrimSpace(ref) + if ref == "" { + return fmt.Errorf("error: --ref cannot be empty") + } + if !isGitRemote { + return fmt.Errorf("error: --ref is only supported for git remotes") + } + params[dbfactory.GitRefParam] = ref + } + + r := env.NewRemote(remoteName, absRemoteUrl, params) return dbd.Rsw.AddRemote(r) } diff --git a/integration-tests/bats/remotes-git.bats b/integration-tests/bats/remotes-git.bats new file mode 100644 index 00000000000..64d897bc610 --- /dev/null +++ b/integration-tests/bats/remotes-git.bats @@ -0,0 +1,204 @@ +#!/usr/bin/env bats +load $BATS_TEST_DIRNAME/helper/common.bash + +setup() { + skiponwindows "tests are flaky on Windows" + skip_if_remote + setup_common + if ! command -v git >/dev/null 2>&1; then + skip "git not installed" + fi + cd $BATS_TMPDIR + cd dolt-repo-$$ + mkdir "dolt-repo-clones" +} + +teardown() { + assert_feature_version + teardown_common +} + +@test "remotes-git: smoke push/clone/push-back/pull" { + mkdir remote.git + git init --bare remote.git + + mkdir repo1 + cd repo1 + dolt init + dolt sql -q "create table test(pk int primary key, v int);" + dolt add . + dolt commit -m "create table" + + dolt remote add origin ../remote.git + run dolt push --set-upstream origin main + [ "$status" -eq 0 ] + + cd .. + cd dolt-repo-clones + run dolt clone ../remote.git repo2 + [ "$status" -eq 0 ] + + cd repo2 + dolt sql -q "insert into test values (1, 10);" + dolt add . + dolt commit -m "add row" + run dolt push origin main + [ "$status" -eq 0 ] + + cd ../../repo1 + run dolt pull + [ "$status" -eq 0 ] + + run dolt sql -q "select v from test where pk = 1;" -r csv + [ "$status" -eq 0 ] + [[ "$output" =~ "10" ]] || false + +} + +@test "remotes-git: empty remote bootstrap creates refs/dolt/data" { + mkdir remote.git + git init --bare remote.git + + # Assert the dolt data ref doesn't exist yet. + run git --git-dir remote.git show-ref refs/dolt/data + [ "$status" -eq 1 ] + + mkdir repo1 + cd repo1 + dolt init + dolt sql -q "create table test(pk int primary key);" + dolt add . + dolt commit -m "create table" + + dolt remote add origin ../remote.git + run dolt push --set-upstream origin main + [ "$status" -eq 0 ] + + run git --git-dir ../remote.git show-ref refs/dolt/data + [ "$status" -eq 0 ] + +} + +@test "remotes-git: pull also fetches branches from git remote" { + mkdir remote.git + git init --bare remote.git + + mkdir repo1 + cd repo1 + dolt init + dolt remote add origin ../remote.git + dolt push origin main + + cd .. + cd dolt-repo-clones + run dolt clone ../remote.git repo2 + [ "$status" -eq 0 ] + + cd repo2 + run dolt branch -va + [[ "$output" =~ "main" ]] || false + [[ ! "$output" =~ "other" ]] || false + + cd ../../repo1 + dolt checkout -b other + dolt commit --allow-empty -m "first commit on other" + dolt push origin other + + cd ../dolt-repo-clones/repo2 + dolt pull + run dolt branch -va + [[ "$output" =~ "main" ]] || false + [[ "$output" =~ "other" ]] || false + +} + +@test "remotes-git: pull fetches but does not merge other branches" { + mkdir remote.git + git init --bare remote.git + + mkdir repo1 + cd repo1 + dolt init + dolt remote add origin ../remote.git + dolt push --set-upstream origin main + dolt checkout -b other + dolt commit --allow-empty -m "first commit on other" + dolt push --set-upstream origin other + + cd .. + cd dolt-repo-clones + run dolt clone ../remote.git repo2 + [ "$status" -eq 0 ] + + cd repo2 + main_state1=$(get_head_commit) + + run dolt pull + [ "$status" -eq 0 ] + + main_state2=$(get_head_commit) + [[ "$main_state1" = "$main_state2" ]] || false + + run dolt branch -va + [[ "$output" =~ "main" ]] || false + [[ "$output" =~ "other" ]] || false + + run dolt checkout other + [ "$status" -eq 0 ] + [[ "$output" =~ "branch 'other' set up to track 'origin/other'." ]] || false + + run dolt log --oneline -n 1 + [ "$status" -eq 0 ] + [[ "$output" =~ "first commit on other" ]] || false + +} + +@test "remotes-git: custom --ref writes to configured dolt data ref" { + mkdir remote.git + git init --bare remote.git + + mkdir repo1 + cd repo1 + dolt init + dolt sql -q "create table test(pk int primary key, v int);" + dolt sql -q "insert into test values (1, 111);" + dolt add . + dolt commit -m "seed" + + dolt remote add --ref refs/dolt/custom origin ../remote.git + run dolt push --set-upstream origin main + [ "$status" -eq 0 ] + + run git --git-dir ../remote.git show-ref refs/dolt/custom + [ "$status" -eq 0 ] + run git --git-dir ../remote.git show-ref refs/dolt/data + [ "$status" -ne 0 ] + + cd .. + cd dolt-repo-clones + run dolt clone --ref refs/dolt/custom ../remote.git repo2 + [ "$status" -eq 0 ] + + cd repo2 + run dolt sql -q "select v from test where pk = 1;" -r csv + [ "$status" -eq 0 ] + [[ "$output" =~ "111" ]] || false + + run git --git-dir ../../remote.git show-ref refs/dolt/data + [ "$status" -ne 0 ] + +} + +@test "remotes-git: push works with per-repo git cache under .dolt/" { + mkdir remote.git + git init --bare remote.git + + mkdir repo1 + cd repo1 + dolt init + dolt commit --allow-empty -m "init" + + dolt remote add origin ../remote.git + run dolt push --set-upstream origin main + [ "$status" -eq 0 ] +} diff --git a/integration-tests/bats/sql-remotes-git.bats b/integration-tests/bats/sql-remotes-git.bats new file mode 100644 index 00000000000..44a289423ef --- /dev/null +++ b/integration-tests/bats/sql-remotes-git.bats @@ -0,0 +1,98 @@ +#!/usr/bin/env bats +load $BATS_TEST_DIRNAME/helper/common.bash + +setup() { + skiponwindows "tests are flaky on Windows" + skip_if_remote + setup_common + if ! command -v git >/dev/null 2>&1; then + skip "git not installed" + fi + cd $BATS_TMPDIR + cd dolt-repo-$$ +} + +teardown() { + assert_feature_version + teardown_common +} + +@test "sql-remotes-git: dolt_remote add supports --ref for git remotes" { + mkdir remote.git + git init --bare remote.git + + mkdir repo1 + cd repo1 + dolt init + dolt sql -q "create table test(pk int primary key, v int);" + dolt sql -q "insert into test values (1, 111);" + dolt add . + dolt commit -m "seed" + + run dolt sql <