Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions go/cmd/dolt/commands/clone.go
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,12 @@ func parseArgs(apr *argparser.ArgParseResults) (string, string, errhand.VerboseE
} else if dir == "/" {
return "", "", errhand.BuildDError("Could not infer repo name. Please explicitly define a directory for this url").Build()
}
if strings.HasSuffix(dir, ".git") {
dir = strings.TrimSuffix(dir, ".git")
if dir == "" {
return "", "", errhand.BuildDError("Could not infer repo name. Please explicitly define a directory for this url").Build()
}
}
}

return dir, urlStr, nil
Expand Down
13 changes: 12 additions & 1 deletion go/cmd/dolt/commands/clone_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,17 @@ func TestCloneParseArgs_InferDir(t *testing.T) {

dir, urlStr, verr := parseArgs(apr)
require.Nil(t, verr)
require.Equal(t, "repo.git", dir)
require.Equal(t, "repo", dir)
require.Equal(t, "https://example.com/org/repo.git", urlStr)
}

func TestCloneParseArgs_InferDir_GitFile(t *testing.T) {
ap := CloneCmd{}.ArgParser()
apr, err := ap.Parse([]string{"git+file:///tmp/remote.git"})
require.NoError(t, err)

dir, urlStr, verr := parseArgs(apr)
require.Nil(t, verr)
require.Equal(t, "remote", dir)
require.Equal(t, "git+file:///tmp/remote.git", urlStr)
}
8 changes: 7 additions & 1 deletion go/cmd/dolt/commands/remote.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,13 @@ func addRemote(sqlCtx *sql.Context, queryist cli.Queryist, dEnv *env.DoltEnv, ap
}

if len(params) == 0 {
err := callSQLRemoteAdd(sqlCtx, queryist, remoteName, remoteUrl)
// For git remotes, ensure the stored URL is the normalized git+* dbfactory URL (e.g. scp-style ssh becomes
// git+ssh://..., and local paths become git+file://...).
urlToStore := remoteUrl
if strings.HasPrefix(strings.ToLower(scheme), "git+") {
urlToStore = absRemoteUrl
}
err := callSQLRemoteAdd(sqlCtx, queryist, remoteName, urlToStore)
if err != nil {
return errhand.BuildDError("error: Unable to add remote.").AddCause(err).Build()
}
Expand Down
18 changes: 18 additions & 0 deletions go/cmd/dolt/commands/remote_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package commands
import (
"fmt"
"path/filepath"
"strings"
"testing"

"github.com/stretchr/testify/assert"
Expand Down Expand Up @@ -146,3 +147,20 @@ func TestParseRemoteArgs_GitRef(t *testing.T) {
assert.Nil(t, verr)
assert.Equal(t, "refs/dolt/custom", params[dbfactory.GitRefParam])
}

func TestRemoteAdd_StoresNormalizedGitUrl(t *testing.T) {
// This mirrors the behavior in addRemote(): for git remotes, we must store the normalized git+* URL
// (in particular, scp-style ssh should never be stored as a schemeless string).
scheme := dbfactory.GitSSHScheme
original := "git@github.com:timsehn/dolt-in-git.git"
normalized, ok, err := env.NormalizeGitRemoteUrl(original)
assert.NoError(t, err)
assert.True(t, ok)
assert.Equal(t, "git+ssh://git@github.com/timsehn/dolt-in-git.git", normalized)

urlToStore := original
if strings.HasPrefix(strings.ToLower(scheme), "git+") {
urlToStore = normalized
}
assert.Equal(t, normalized, urlToStore)
}
16 changes: 16 additions & 0 deletions go/libraries/doltcore/dbfactory/git_remote.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ const (
defaultGitRemoteName = "origin"
)

var ErrGitRemoteHasNoBranches = errors.New("git remote has no branches")

// GitCacheRootProvider provides the local Dolt repo root for per-repo git remote caches.
// Implementations should return ok=false when no repo root is available.
type GitCacheRootProvider interface {
Expand Down Expand Up @@ -116,6 +118,9 @@ func (fact GitRemoteFactory) CreateDB(ctx context.Context, nbf *types.NomsBinFor
if err := ensureGitRemoteURL(ctx, cacheRepo, remoteName, remoteURL.String()); err != nil {
return nil, nil, nil, err
}
if err := ensureRemoteHasBranches(ctx, cacheRepo, remoteName, remoteURL.String()); err != nil {
return nil, nil, nil, err
}

q := nbs.NewUnlimitedMemQuotaProvider()
cs, err := nbs.NewGitStore(ctx, nbf.VersionString(), cacheRepo, ref, blobstore.GitBlobstoreOptions{RemoteName: remoteName}, defaultMemTableSize, q)
Expand All @@ -129,6 +134,17 @@ func (fact GitRemoteFactory) CreateDB(ctx context.Context, nbf *types.NomsBinFor
return db, vrw, ns, nil
}

func ensureRemoteHasBranches(ctx context.Context, gitDir string, remoteName string, remoteURL string) error {
out, err := runGitInDir(ctx, gitDir, "ls-remote", "--heads", "--", remoteName)
if err != nil {
return err
}
if strings.TrimSpace(out) == "" {
return fmt.Errorf("%w: cannot push to %q; initialize the repository with an initial branch/commit first", ErrGitRemoteHasNoBranches, remoteURL)
}
return nil
}

func parseGitRemoteFactoryURL(urlObj *url.URL, params map[string]interface{}) (remoteURL *url.URL, ref string, err error) {
if urlObj == nil {
return nil, "", fmt.Errorf("nil url")
Expand Down
25 changes: 25 additions & 0 deletions go/libraries/doltcore/dbfactory/git_remote_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ func TestGitRemoteFactory_GitFile_CachesUnderRepoDoltDirAndCanWrite(t *testing.T
ctx := context.Background()
remoteRepo, err := gitrepo.InitBare(ctx, filepath.Join(shortTempDir(t), "remote.git"))
require.NoError(t, err)
_, err = remoteRepo.SetRefToTree(ctx, "refs/heads/main", map[string][]byte{"README": []byte("seed\n")}, "seed")
require.NoError(t, err)

localRepoRoot := shortTempDir(t)

Expand Down Expand Up @@ -121,6 +123,8 @@ func TestGitRemoteFactory_TwoClientsDistinctCacheDirsRoundtrip(t *testing.T) {
ctx := context.Background()
remoteRepo, err := gitrepo.InitBare(ctx, filepath.Join(shortTempDir(t), "remote.git"))
require.NoError(t, err)
_, err = remoteRepo.SetRefToTree(ctx, "refs/heads/main", map[string][]byte{"README": []byte("seed\n")}, "seed")
require.NoError(t, err)

remotePath := filepath.ToSlash(remoteRepo.GitDir)
urlStr := "git+file://" + remotePath
Expand Down Expand Up @@ -185,3 +189,24 @@ func TestGitRemoteFactory_TwoClientsDistinctCacheDirsRoundtrip(t *testing.T) {
require.Equal(t, "clientB\n", string(gotB.Data()))
require.NoError(t, dbA2.Close())
}

func TestGitRemoteFactory_GitFile_RemoteWithNoBranchesFails(t *testing.T) {
if _, err := exec.LookPath("git"); err != nil {
t.Skip("git not found on PATH")
}

ctx := context.Background()
remoteRepo, err := gitrepo.InitBare(ctx, filepath.Join(shortTempDir(t), "remote.git"))
require.NoError(t, err)

localRepoRoot := shortTempDir(t)
remotePath := filepath.ToSlash(remoteRepo.GitDir)
urlStr := "git+file://" + remotePath
params := map[string]interface{}{
GitCacheRootParam: localRepoRoot,
}

_, _, _, err = CreateDB(ctx, types.Format_Default, urlStr, params)
require.Error(t, err)
require.ErrorIs(t, err, ErrGitRemoteHasNoBranches)
}
52 changes: 52 additions & 0 deletions go/store/nbs/git_blobstore_empty_remote_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,3 +112,55 @@ func TestNBS_GitBlobstore_EmptyRemote_FirstManifestUpdateBootstrapsRef(t *testin
require.NoError(t, writeManifest(&buf, contents))
require.NotEmpty(t, buf.Bytes())
}

func TestNBS_NewGitStore_DefaultsMaxPartSizeTo50MB(t *testing.T) {
if _, err := exec.LookPath("git"); err != nil {
t.Skip("git not found on PATH")
}

ctx := context.Background()
remoteRepo, err := gitrepo.InitBare(ctx, t.TempDir()+"/remote.git")
require.NoError(t, err)
_, err = remoteRepo.SetRefToTree(ctx, blobstore.DoltDataRef, nil, "seed empty")
require.NoError(t, err)

localRepo, err := gitrepo.InitBare(ctx, t.TempDir()+"/local.git")
require.NoError(t, err)
cmd := exec.CommandContext(ctx, "git", "--git-dir", localRepo.GitDir, "remote", "add", "origin", remoteRepo.GitDir)
out, err := cmd.CombinedOutput()
require.NoError(t, err, "git remote add failed: %s", string(out))

store, err := NewGitStore(ctx, types.Format_DOLT.VersionString(), localRepo.GitDir, blobstore.DoltDataRef, blobstore.GitBlobstoreOptions{}, 0, NewUnlimitedMemQuotaProvider())
require.NoError(t, err)
defer store.Close()

// Assert the underlying blobstore is a GitBlobstore and that chunked writes are enabled by default.
bsp, ok := store.persister.(*blobstorePersister)
require.True(t, ok, "expected persister to be *blobstorePersister, got %T", store.persister)

gbs, ok := bsp.bs.(*blobstore.GitBlobstore)
require.True(t, ok, "expected blobstore to be *blobstore.GitBlobstore, got %T", bsp.bs)

// Use a totalSize larger than the default 50MB threshold, but provide a tiny reader.
// planPutWrites decides chunked-vs-inline based on totalSize, and should pick chunked mode
// when the default max part size is enabled.
_, err = gbs.Put(ctx, "k", int64(50*1024*1024+1), bytes.NewReader([]byte("hi")))
require.NoError(t, err)

// On the remote, key "k" should be represented as a tree containing part "0001".
cmd = exec.CommandContext(ctx, "git", "--git-dir", remoteRepo.GitDir, "rev-parse", "--verify", "--quiet", blobstore.DoltDataRef+"^{commit}")
headBytes, err := cmd.CombinedOutput()
require.NoError(t, err, "git rev-parse failed: %s", string(headBytes))

commit := string(bytes.TrimSpace(headBytes))
cmd = exec.CommandContext(ctx, "git", "--git-dir", remoteRepo.GitDir, "ls-tree", commit, "k")
lsOut, err := cmd.CombinedOutput()
require.NoError(t, err, "git ls-tree failed: %s", string(lsOut))
require.Contains(t, string(lsOut), "\tk\n")
require.Contains(t, string(lsOut), "tree")

cmd = exec.CommandContext(ctx, "git", "--git-dir", remoteRepo.GitDir, "ls-tree", commit, "k/0001")
partOut, err := cmd.CombinedOutput()
require.NoError(t, err, "git ls-tree part failed: %s", string(partOut))
require.Contains(t, string(partOut), "0001")
}
14 changes: 14 additions & 0 deletions go/store/nbs/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@ const (
defaultMaxTables = 256

defaultManifestCacheSize = 1 << 23 // 8MB

// defaultGitBlobstoreMaxPartSize is the default maximum size (in bytes) for a single part blob written to a
// git-backed blobstore. Large values may be rejected by some Git remotes.
defaultGitBlobstoreMaxPartSize uint64 = 50 * 1024 * 1024
)

var (
Expand Down Expand Up @@ -628,6 +632,12 @@ func NewOCISStore(ctx context.Context, nbfVerStr string, bucketName, path string
func NewGitStore(ctx context.Context, nbfVerStr string, gitDir string, ref string, opts blobstore.GitBlobstoreOptions, memTableSize uint64, q MemoryQuotaProvider) (*NomsBlockStore, error) {
cacheOnce.Do(makeGlobalCaches)

// A Git remote may reject large blobs. To keep git-backed remotes broadly usable by default, enable
// chunked-object writes with a conservative max part size unless the caller explicitly overrides it.
if opts.MaxPartSize == 0 {
opts.MaxPartSize = defaultGitBlobstoreMaxPartSize
}

bs, err := blobstore.NewGitBlobstoreWithOptions(gitDir, ref, opts)
if err != nil {
return nil, err
Expand All @@ -640,6 +650,10 @@ func NewGitStore(ctx context.Context, nbfVerStr string, gitDir string, ref strin
func NewNoConjoinGitStore(ctx context.Context, nbfVerStr string, gitDir string, ref string, opts blobstore.GitBlobstoreOptions, memTableSize uint64, q MemoryQuotaProvider) (*NomsBlockStore, error) {
cacheOnce.Do(makeGlobalCaches)

if opts.MaxPartSize == 0 {
opts.MaxPartSize = defaultGitBlobstoreMaxPartSize
}

bs, err := blobstore.NewGitBlobstoreWithOptions(gitDir, ref, opts)
if err != nil {
return nil, err
Expand Down
35 changes: 35 additions & 0 deletions integration-tests/bats/remotes-git.bats
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,39 @@ teardown() {
teardown_common
}

seed_git_remote_branch() {
# Create an initial branch on an otherwise-empty bare git remote.
# Dolt git remotes require at least one git branch to exist on the remote.
local remote_git_dir="$1"
local branch="${2:-main}"

local remote_abs
remote_abs="$(cd "$remote_git_dir" && pwd)"

local seed_dir
seed_dir="$(mktemp -d "${BATS_TMPDIR:-/tmp}/seed-repo.XXXXXX")"

(
set -euo pipefail
trap 'rm -rf "$seed_dir"' EXIT
cd "$seed_dir"

git init >/dev/null
git config user.email "bats@email.fake"
git config user.name "Bats Tests"
echo "seed" > README
git add README
git commit -m "seed" >/dev/null
git branch -M "$branch"
git remote add origin "$remote_abs"
git push origin "$branch" >/dev/null
)
}

@test "remotes-git: smoke push/clone/push-back/pull" {
mkdir remote.git
git init --bare remote.git
seed_git_remote_branch remote.git main

mkdir repo1
cd repo1
Expand Down Expand Up @@ -58,6 +88,7 @@ teardown() {
@test "remotes-git: empty remote bootstrap creates refs/dolt/data" {
mkdir remote.git
git init --bare remote.git
seed_git_remote_branch remote.git main

# Assert the dolt data ref doesn't exist yet.
run git --git-dir remote.git show-ref refs/dolt/data
Expand All @@ -82,6 +113,7 @@ teardown() {
@test "remotes-git: pull also fetches branches from git remote" {
mkdir remote.git
git init --bare remote.git
seed_git_remote_branch remote.git main

mkdir repo1
cd repo1
Expand Down Expand Up @@ -115,6 +147,7 @@ teardown() {
@test "remotes-git: pull fetches but does not merge other branches" {
mkdir remote.git
git init --bare remote.git
seed_git_remote_branch remote.git main

mkdir repo1
cd repo1
Expand Down Expand Up @@ -156,6 +189,7 @@ teardown() {
@test "remotes-git: custom --ref writes to configured dolt data ref" {
mkdir remote.git
git init --bare remote.git
seed_git_remote_branch remote.git main

mkdir repo1
cd repo1
Expand Down Expand Up @@ -192,6 +226,7 @@ teardown() {
@test "remotes-git: push works with per-repo git cache under .dolt/" {
mkdir remote.git
git init --bare remote.git
seed_git_remote_branch remote.git main

mkdir repo1
cd repo1
Expand Down
32 changes: 32 additions & 0 deletions integration-tests/bats/sql-remotes-git.bats
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,39 @@ teardown() {
teardown_common
}

seed_git_remote_branch() {
# Create an initial branch on an otherwise-empty bare git remote.
# Dolt git remotes require at least one git branch to exist on the remote.
local remote_git_dir="$1"
local branch="${2:-main}"

local remote_abs
remote_abs="$(cd "$remote_git_dir" && pwd)"

local seed_dir
seed_dir="$(mktemp -d "${BATS_TMPDIR:-/tmp}/seed-repo.XXXXXX")"

(
set -euo pipefail
trap 'rm -rf "$seed_dir"' EXIT
cd "$seed_dir"

git init >/dev/null
git config user.email "bats@email.fake"
git config user.name "Bats Tests"
echo "seed" > README
git add README
git commit -m "seed" >/dev/null
git branch -M "$branch"
git remote add origin "$remote_abs"
git push origin "$branch" >/dev/null
)
}

@test "sql-remotes-git: dolt_remote add supports --ref for git remotes" {
mkdir remote.git
git init --bare remote.git
seed_git_remote_branch remote.git main

mkdir repo1
cd repo1
Expand All @@ -44,6 +74,7 @@ SQL
@test "sql-remotes-git: dolt_clone supports --ref for git remotes" {
mkdir remote.git
git init --bare remote.git
seed_git_remote_branch remote.git main

mkdir repo1
cd repo1
Expand Down Expand Up @@ -78,6 +109,7 @@ SQL
@test "sql-remotes-git: dolt_backup sync-url supports --ref for git remotes" {
mkdir remote.git
git init --bare remote.git
seed_git_remote_branch remote.git main

mkdir repo1
cd repo1
Expand Down
Loading