From 412849de273ac280e711eca3143bf166371e8daf Mon Sep 17 00:00:00 2001 From: Oleksandr Redko Date: Tue, 12 Nov 2024 12:49:41 +0200 Subject: [PATCH] refactor: use giturl pkg instead of forked chainguard-dev/git-urls --- go.mod | 1 - go.sum | 2 - internal/giturl/giturl.go | 115 ++++++++ internal/giturl/giturl_test.go | 472 +++++++++++++++++++++++++++++++++ taskfile/node.go | 5 +- taskfile/node_git.go | 4 +- 6 files changed, 591 insertions(+), 8 deletions(-) create mode 100644 internal/giturl/giturl.go create mode 100644 internal/giturl/giturl_test.go diff --git a/go.mod b/go.mod index 6cec425a2f..bab44dc495 100644 --- a/go.mod +++ b/go.mod @@ -6,7 +6,6 @@ require ( github.com/Ladicle/tabwriter v1.0.0 github.com/Masterminds/semver/v3 v3.3.0 github.com/alecthomas/chroma/v2 v2.14.0 - github.com/chainguard-dev/git-urls v1.0.2 github.com/davecgh/go-spew v1.1.1 github.com/dominikbraun/graph v0.23.0 github.com/fatih/color v1.18.0 diff --git a/go.sum b/go.sum index d0b6fac4c8..68e302c87f 100644 --- a/go.sum +++ b/go.sum @@ -20,8 +20,6 @@ github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be/go.mod h1:ySMOLuW github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= github.com/bwesterb/go-ristretto v1.2.3/go.mod h1:fUIoIZaG73pV5biE2Blr2xEzDoMj7NFEuV9ekS419A0= -github.com/chainguard-dev/git-urls v1.0.2 h1:pSpT7ifrpc5X55n4aTTm7FFUE+ZQHKiqpiwNkJrVcKQ= -github.com/chainguard-dev/git-urls v1.0.2/go.mod h1:rbGgj10OS7UgZlbzdUQIQpT0k/D4+An04HJY7Ol+Y/o= github.com/cloudflare/circl v1.3.3/go.mod h1:5XYMA4rFBvNIrhs50XuiBJ15vF2pZn4nnUKZrLbUZFA= github.com/cloudflare/circl v1.3.7 h1:qlCDlTPz2n9fu58M0Nh1J/JzcFpfgkFHHX3O35r5vcU= github.com/cloudflare/circl v1.3.7/go.mod h1:sRTcRWXGLrKw6yIGJ+l7amYJFfAXbZG0kBSc8r4zxgA= diff --git a/internal/giturl/giturl.go b/internal/giturl/giturl.go new file mode 100644 index 0000000000..92e1d29d66 --- /dev/null +++ b/internal/giturl/giturl.go @@ -0,0 +1,115 @@ +// Package giturl parses Git URLs. +// +// These URLs include standard RFC 3986 URLs as well as special formats that +// are specific to Git. Examples are provided in the Git documentation at +// https://mirrors.edge.kernel.org/pub/software/scm/git/docs/git-clone.html +package giturl + +import ( + "fmt" + "net/url" + "regexp" + "strings" +) + +// scpURLMaxLen is max length of the SCP URL to prevent reDOS attacks. +const scpURLMaxLen = 2048 + +var ( + // scpSyntax matches the SCP-like addresses used by Git to access repositories by SSH. + scpSyntax = regexp.MustCompile(`^([a-zA-Z0-9-._~]+@)?([a-zA-Z0-9._-]+):([a-zA-Z0-9./._-]+)(?:\?||$)(.*)$`) + + // transports is a set of known Git URL schemes. + transports = map[string]struct{}{ + "ssh": {}, + "git": {}, + "git+ssh": {}, + "http": {}, + "https": {}, + "ftp": {}, + "ftps": {}, + "rsync": {}, + "file": {}, + } +) + +// parser converts a string into a URL. +type parser func(string) (*url.URL, error) + +// Parse parses rawURL into a URL structure. Parse first attempts to find a standard URL +// with a valid Git transport as its scheme. If that cannot be found, it then attempts= +// to find a SCP-like URL. And if that cannot be found, it assumes rawURL is a local path. +// If none of these rules apply, Parse returns an error. +func Parse(rawURL string) (*url.URL, error) { + parsers := []parser{ + parseTransport, + parseSCP, + parseLocal, + } + + // Apply each parser in turn; if the parser succeeds, accept its result and return. + var err error + for _, p := range parsers { + var u *url.URL + u, err = p(rawURL) + if err == nil { + return u, nil + } + } + + // It's unlikely that none of the parsers will succeed, since + // ParseLocal is very forgiving. + return nil, fmt.Errorf("failed to parse %q: %w", rawURL, err) +} + +// parseTransport parses rawURL into a URL object. Unless the URL's scheme is a known Git transport, +// parseTransport returns an error. +func parseTransport(rawURL string) (*url.URL, error) { + u, err := url.Parse(rawURL) + if err != nil { + return nil, err + } + if _, ok := transports[u.Scheme]; !ok { + return nil, fmt.Errorf("scheme %q is not a valid transport", u.Scheme) + } + return u, nil +} + +// parseSCP parses rawURL into a URL object. The rawURL must be +// an SCP-like URL, otherwise parseSCP returns an error. +func parseSCP(rawURL string) (*url.URL, error) { + if len(rawURL) > scpURLMaxLen { + return nil, fmt.Errorf("URL too long: %q", rawURL) + } + match := scpSyntax.FindAllStringSubmatch(rawURL, -1) + if len(match) == 0 { + return nil, fmt.Errorf("no scp URL found in %q", rawURL) + } + m := match[0] + user := strings.TrimRight(m[1], "@") + var userinfo *url.Userinfo + if user != "" { + userinfo = url.User(user) + } + rawQuery := "" + if len(m) > 3 { + rawQuery = m[4] + } + return &url.URL{ + Scheme: "ssh", + User: userinfo, + Host: m[2], + Path: m[3], + RawQuery: rawQuery, + }, nil +} + +// parseLocal parses rawURL into a URL object with a "file" scheme. +// This will effectively never return an error. +func parseLocal(rawURL string) (*url.URL, error) { + return &url.URL{ + Scheme: "file", + Host: "", + Path: rawURL, + }, nil +} diff --git a/internal/giturl/giturl_test.go b/internal/giturl/giturl_test.go new file mode 100644 index 0000000000..b7e100822b --- /dev/null +++ b/internal/giturl/giturl_test.go @@ -0,0 +1,472 @@ +package giturl + +import ( + "net/url" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestParse(t *testing.T) { + tests := []struct { + in string + wantURL *url.URL + wantStr string + wantErr bool + }{ + { + "user@host.xz:path/to/repo.git/", + &url.URL{ + Scheme: "ssh", + User: url.User("user"), + Host: "host.xz", + Path: "path/to/repo.git/", + }, + "ssh://user@host.xz/path/to/repo.git/", + false, + }, + { + "host.xz:path/to/repo.git/", + &url.URL{ + Scheme: "ssh", + Host: "host.xz", + Path: "path/to/repo.git/", + }, + "ssh://host.xz/path/to/repo.git/", + false, + }, + { + "host.xz:/path/to/repo.git/", + &url.URL{ + Scheme: "ssh", + Host: "host.xz", + Path: "/path/to/repo.git/", + }, + "ssh://host.xz/path/to/repo.git/", + false, + }, + { + "host.xz:path/to/repo-with_specials.git/", + &url.URL{ + Scheme: "ssh", + Host: "host.xz", + Path: "path/to/repo-with_specials.git/", + }, + "ssh://host.xz/path/to/repo-with_specials.git/", + false, + }, + { + "git://host.xz/path/to/repo.git/", + &url.URL{ + Scheme: "git", + Host: "host.xz", + Path: "/path/to/repo.git/", + }, + "git://host.xz/path/to/repo.git/", + false, + }, + { + "git://host.xz:1234/path/to/repo.git/", + &url.URL{ + Scheme: "git", + Host: "host.xz:1234", + Path: "/path/to/repo.git/", + }, + "git://host.xz:1234/path/to/repo.git/", + false, + }, + { + "http://host.xz/path/to/repo.git/", + &url.URL{ + Scheme: "http", + Host: "host.xz", + Path: "/path/to/repo.git/", + }, + "http://host.xz/path/to/repo.git/", + false, + }, + { + "http://host.xz:1234/path/to/repo.git/", + &url.URL{ + Scheme: "http", + Host: "host.xz:1234", + Path: "/path/to/repo.git/", + }, + "http://host.xz:1234/path/to/repo.git/", + false, + }, + { + "https://host.xz/path/to/repo.git/", + &url.URL{ + Scheme: "https", + Host: "host.xz", + Path: "/path/to/repo.git/", + }, + "https://host.xz/path/to/repo.git/", + false, + }, + { + "https://host.xz:1234/path/to/repo.git/", + &url.URL{ + Scheme: "https", + Host: "host.xz:1234", + Path: "/path/to/repo.git/", + }, + "https://host.xz:1234/path/to/repo.git/", + false, + }, + { + "ftp://host.xz/path/to/repo.git/", + &url.URL{ + Scheme: "ftp", + Host: "host.xz", + Path: "/path/to/repo.git/", + }, + "ftp://host.xz/path/to/repo.git/", + false, + }, + { + "ftp://host.xz:1234/path/to/repo.git/", + &url.URL{ + Scheme: "ftp", + Host: "host.xz:1234", + Path: "/path/to/repo.git/", + }, + "ftp://host.xz:1234/path/to/repo.git/", + false, + }, + { + "ftps://host.xz/path/to/repo.git/", + &url.URL{ + Scheme: "ftps", + Host: "host.xz", + Path: "/path/to/repo.git/", + }, + "ftps://host.xz/path/to/repo.git/", + false, + }, + { + "ftps://host.xz:1234/path/to/repo.git/", + &url.URL{ + Scheme: "ftps", + Host: "host.xz:1234", + Path: "/path/to/repo.git/", + }, + "ftps://host.xz:1234/path/to/repo.git/", + false, + }, + { + "rsync://host.xz/path/to/repo.git/", + &url.URL{ + Scheme: "rsync", + Host: "host.xz", + Path: "/path/to/repo.git/", + }, + "rsync://host.xz/path/to/repo.git/", + false, + }, + { + "ssh://user@host.xz:1234/path/to/repo.git/", + &url.URL{ + Scheme: "ssh", + User: url.User("user"), + Host: "host.xz:1234", + Path: "/path/to/repo.git/", + }, + "ssh://user@host.xz:1234/path/to/repo.git/", + false, + }, + { + "ssh://host.xz:1234/path/to/repo.git/", + &url.URL{ + Scheme: "ssh", + Host: "host.xz:1234", + Path: "/path/to/repo.git/", + }, + "ssh://host.xz:1234/path/to/repo.git/", + false, + }, + { + "ssh://host.xz/path/to/repo.git/", + &url.URL{ + Scheme: "ssh", + Host: "host.xz", + Path: "/path/to/repo.git/", + }, + "ssh://host.xz/path/to/repo.git/", + false, + }, + { + "git+ssh://host.xz/path/to/repo.git/", + &url.URL{ + Scheme: "git+ssh", + Host: "host.xz", + Path: "/path/to/repo.git/", + }, + "git+ssh://host.xz/path/to/repo.git/", + false, + }, + { + "/path/to/repo.git/", + &url.URL{ + Scheme: "file", + Path: "/path/to/repo.git/", + }, + "file:///path/to/repo.git/", + false, + }, + { + "file:///path/to/repo.git/", + &url.URL{ + Scheme: "file", + Path: "/path/to/repo.git/", + }, + "file:///path/to/repo.git/", + false, + }, + { + "https://host.xz/organization/repo.git?ref=", + &url.URL{ + Scheme: "https", + Host: "host.xz", + Path: "/organization/repo.git", + RawQuery: "ref=", + }, + "https://host.xz/organization/repo.git?ref=", + false, + }, + { + "https://host.xz/organization/repo.git?ref=test", + &url.URL{ + Scheme: "https", + Host: "host.xz", + Path: "/organization/repo.git", + RawQuery: "ref=test", + }, + "https://host.xz/organization/repo.git?ref=test", + false, + }, + { + "https://host.xz/organization/repo.git?ref=feature/test", + &url.URL{ + Scheme: "https", + Host: "host.xz", + Path: "/organization/repo.git", + RawQuery: "ref=feature/test", + }, + "https://host.xz/organization/repo.git?ref=feature/test", + false, + }, + { + "git@host.xz:organization/repo.git?ref=test", + &url.URL{ + Scheme: "ssh", + User: url.User("git"), + Host: "host.xz", + Path: "organization/repo.git", + RawQuery: "ref=test", + }, + "ssh://git@host.xz/organization/repo.git?ref=test", + false, + }, + { + "git@host.xz:organization/repo.git?ref=feature/test", + &url.URL{ + Scheme: "ssh", + User: url.User("git"), + Host: "host.xz", + Path: "organization/repo.git", + RawQuery: "ref=feature/test", + }, + "ssh://git@host.xz/organization/repo.git?ref=feature/test", + false, + }, + { + "https://user:password@host.xz/organization/repo.git/", + &url.URL{ + Scheme: "https", + User: url.UserPassword("user", "password"), + Host: "host.xz", + Path: "/organization/repo.git/", + }, + "https://user:password@host.xz/organization/repo.git/", + false, + }, + { + "https://user:password@host.xz/organization/repo.git?ref=test", + &url.URL{ + Scheme: "https", + User: url.UserPassword("user", "password"), + Host: "host.xz", + Path: "/organization/repo.git", + RawQuery: "ref=test", + }, + "https://user:password@host.xz/organization/repo.git?ref=test", + false, + }, + { + "https://user:password@host.xz/organization/repo.git?ref=feature/test", + &url.URL{ + Scheme: "https", + User: url.UserPassword("user", "password"), + Host: "host.xz", + Path: "/organization/repo.git", + RawQuery: "ref=feature/test", + }, + "https://user:password@host.xz/organization/repo.git?ref=feature/test", + false, + }, + { + "user-1234@host.xz:path/to/repo.git/", + &url.URL{ + Scheme: "ssh", + User: url.User("user-1234"), + Host: "host.xz", + Path: "path/to/repo.git/", + }, + "ssh://user-1234@host.xz/path/to/repo.git/", + false, + }, + } + + for _, tt := range tests { + + got, err := Parse(tt.in) + if tt.wantErr { + require.Error(t, err) + } else { + require.NoError(t, err) + assert.Equal(t, tt.wantURL, got) + assert.Equal(t, tt.wantStr, got.String()) + } + } +} + +func TestParseSCP(t *testing.T) { + tests := []struct { + in string + wantURL *url.URL + wantErr bool + }{ + { + "user@host.xz:path/to/repo.git/", + &url.URL{ + Scheme: "ssh", + User: url.User("user"), + Host: "host.xz", + Path: "path/to/repo.git/", + }, + false, + }, + { + "host.xz:path/to/repo.git/", + &url.URL{ + Scheme: "ssh", + Host: "host.xz", + Path: "path/to/repo.git/", + }, + false, + }, + { + "host.xz:/path/to/repo.git/", + &url.URL{ + Scheme: "ssh", + Host: "host.xz", + Path: "/path/to/repo.git/", + }, false, + }, + { + "invalid-scp-url", + nil, + true, + }, + { + "https://example.com/" + strings.Repeat("a", 4049), + nil, + true, + }, + } + + for _, tt := range tests { + got, err := parseSCP(tt.in) + if tt.wantErr { + require.Error(t, err) + } else { + require.NoError(t, err) + assert.Equal(t, tt.wantURL, got) + } + } +} + +func TestParseTransport(t *testing.T) { + tests := []struct { + in string + wantURL *url.URL + wantErr bool + }{ + { + "git://host.xz/path/to/repo.git/", + &url.URL{ + Scheme: "git", + Host: "host.xz", + Path: "/path/to/repo.git/", + }, + false, + }, + { + "http://host.xz/path/to/repo.git/", + &url.URL{ + Scheme: "http", + Host: "host.xz", + Path: "/path/to/repo.git/", + }, + false, + }, + { + "https://host.xz/path/to/repo.git/", + &url.URL{ + Scheme: "https", + Host: "host.xz", + Path: "/path/to/repo.git/", + }, + false, + }, + { + "ftp://host.xz/path/to/repo.git/", + &url.URL{ + Scheme: "ftp", + Host: "host.xz", + Path: "/path/to/repo.git/", + }, + false, + }, + { + "invalid://host.xz/path/to/repo.git/", + nil, + true, + }, + } + + for _, tt := range tests { + got, err := parseTransport(tt.in) + if tt.wantErr { + require.Error(t, err) + } else { + require.NoError(t, err) + assert.Equal(t, tt.wantURL, got) + } + } +} + +func TestParseLocal(t *testing.T) { + rawURL := "/path/to/repo.git/" + + u, err := parseLocal(rawURL) + + require.NoError(t, err) + assert.Equal(t, "file", u.Scheme) + assert.Empty(t, u.Host) + assert.Equal(t, "/path/to/repo.git/", u.Path) +} diff --git a/taskfile/node.go b/taskfile/node.go index 74b3245eaf..87a7d977a2 100644 --- a/taskfile/node.go +++ b/taskfile/node.go @@ -7,10 +7,9 @@ import ( "strings" "time" - giturls "github.com/chainguard-dev/git-urls" - "github.com/go-task/task/v3/errors" "github.com/go-task/task/v3/internal/experiments" + "github.com/go-task/task/v3/internal/giturl" "github.com/go-task/task/v3/internal/logger" ) @@ -71,7 +70,7 @@ func NewNode( } func getScheme(uri string) (string, error) { - u, err := giturls.Parse(uri) + u, err := giturl.Parse(uri) if u == nil { return "", err } diff --git a/taskfile/node_git.go b/taskfile/node_git.go index 557986d541..90a40f70fd 100644 --- a/taskfile/node_git.go +++ b/taskfile/node_git.go @@ -8,7 +8,6 @@ import ( "path/filepath" "strings" - giturls "github.com/chainguard-dev/git-urls" "github.com/go-git/go-billy/v5/memfs" "github.com/go-git/go-git/v5" "github.com/go-git/go-git/v5/plumbing" @@ -17,6 +16,7 @@ import ( "github.com/go-task/task/v3/errors" "github.com/go-task/task/v3/internal/execext" "github.com/go-task/task/v3/internal/filepathext" + "github.com/go-task/task/v3/internal/giturl" ) // An GitNode is a node that reads a Taskfile from a remote location via Git. @@ -35,7 +35,7 @@ func NewGitNode( opts ...NodeOption, ) (*GitNode, error) { base := NewBaseNode(dir, opts...) - u, err := giturls.Parse(entrypoint) + u, err := giturl.Parse(entrypoint) if err != nil { return nil, err }