Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions modules/git/gitcmd/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,14 +57,12 @@ type Command struct {
}

func logArgSanitize(arg string) string {
if strings.Contains(arg, "://") && strings.Contains(arg, "@") {
return util.SanitizeCredentialURLs(arg)
} else if filepath.IsAbs(arg) {
if filepath.IsAbs(arg) {
base := filepath.Base(arg)
dir := filepath.Dir(arg)
return ".../" + filepath.Join(filepath.Base(dir), base)
}
return arg
return util.SanitizeCredentialURLs(arg)
}

func (c *Command) LogString() string {
Expand Down
5 changes: 4 additions & 1 deletion modules/git/gitcmd/command_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,10 @@ func TestCommandString(t *testing.T) {
assert.Equal(t, cmd.prog+` a "-m msg" "it's a test" "say \"hello\""`, cmd.LogString())

cmd = NewCommand("url: https://a:b@c/", "/root/dir-a/dir-b")
assert.Equal(t, cmd.prog+` "url: https://sanitized-credential@c/" .../dir-a/dir-b`, cmd.LogString())
assert.Equal(t, cmd.prog+` "url: https://(masked)@c/" .../dir-a/dir-b`, cmd.LogString())

cmd = NewCommand("url: a:b@c/", "/root/dir-a/dir-b")
assert.Equal(t, cmd.prog+` "url: (masked)@c/" .../dir-a/dir-b`, cmd.LogString())
}

func TestRunStdError(t *testing.T) {
Expand Down
122 changes: 89 additions & 33 deletions modules/util/sanitize.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ package util

import (
"bytes"
"unicode"
"net"
"strings"
)

type sanitizedError struct {
Expand All @@ -25,48 +26,103 @@ func SanitizeErrorCredentialURLs(err error) error {
return sanitizedError{err: err}
}

const userPlaceholder = "sanitized-credential"

var schemeSep = []byte("://")

// SanitizeCredentialURLs remove all credentials in URLs (starting with "scheme://") for the input string: "https://user:pass@domain.com" => "https://sanitized-credential@domain.com"
const userInfoPlaceholder = "(masked)"

// SanitizeCredentialURLs remove all credentials in URLs for the input string:
// * "https://userinfo@domain.com" => "https://***@domain.com"
// * "user:pass@domain.com" => "***@domain.com"
// "***" is a magic string internally used, doesn't guarantee to be anything.
func SanitizeCredentialURLs(s string) string {
bs := UnsafeStringToBytes(s)
schemeSepPos := bytes.Index(bs, schemeSep)
if schemeSepPos == -1 || bytes.IndexByte(bs[schemeSepPos:], '@') == -1 {
return s // fast return if there is no URL scheme or no userinfo
sepColPos := strings.Index(s, ":")
if sepColPos == -1 {
return s // fast path: no colon, unlikely contain any URL credential
}
out := make([]byte, 0, len(bs)+len(userPlaceholder))
for schemeSepPos != -1 {
schemeSepPos += 3 // skip the "://"
sepAtPos := -1 // the possible '@' position: "https://foo@[^here]host"
sepEndPos := schemeSepPos // the possible end position: "The https://host[^here] in log for test"
sepLoop:
for ; sepEndPos < len(bs); sepEndPos++ {
c := bs[sepEndPos]
if ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('0' <= c && c <= '9') {
continue
}
sepAtPos := strings.Index(s[sepColPos+1:], "@")
for sepAtPos == -1 {
return s // fast path: no "@" after colon, unlikely contain any URL credential
}
sepAtPos += sepColPos + 1

res := make([]byte, 0, len(s)+len(userInfoPlaceholder)) // a best guess to avoid too many re-allocations
bs := UnsafeStringToBytes(s)
for {
// left part (before "@") is likely to be the "userinfo" (single username, or "username:password")
leftPos := sepAtPos - 1
leftLoop:
for leftPos >= 0 {
c := bs[leftPos]
switch c {
case '@':
sepAtPos = sepEndPos
case '-', '.', '_', '~', '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '%':
continue // due to RFC 3986, userinfo can contain - . _ ~ ! $ & ' ( ) * + , ; = : and any percent-encoded chars
// RFC 3986, userinfo can contain - . _ ~ ! $ & ' ( ) * + , ; = : and any percent-encoded chars
default:
valid := 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9'
if !valid {
break leftLoop
}
}
leftPos--
}
// left pos should point to the beginning of the left part, this pos is always valid in the buffer
leftPos++

// right part is likely to be the host (domain name, ip address)
rightPos := sepAtPos + 1
rightLoop:
for rightPos < len(bs) {
c := bs[rightPos]
switch c {
case '.', '-':
// valid host char
case '[':
// ipv6 begin
if rightPos != sepAtPos+1 {
break rightLoop
}
case ']':
// ipv6 end
rightPos++
break rightLoop
default:
break sepLoop // if it is an invalid char for URL (eg: space, '/', and others), stop the loop
valid := 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9'
if bs[sepAtPos+1] == '[' {
// ipv6 host
valid = 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' || '0' <= c && c <= '9' || c == ':'
}
if !valid {
break rightLoop
}
}
rightPos++
}
// if there is '@', and the string is like "s://u@h", then hide the "u" part
if sepAtPos != -1 && (schemeSepPos >= 4 && unicode.IsLetter(rune(bs[schemeSepPos-4]))) && sepAtPos-schemeSepPos > 0 && sepEndPos-sepAtPos > 0 {
out = append(out, bs[:schemeSepPos]...)
out = append(out, userPlaceholder...)
out = append(out, bs[sepAtPos:sepEndPos]...)

leading, leftPart, rightPart := bs[:leftPos], bs[leftPos:sepAtPos], bs[sepAtPos+1:rightPos]

// Either:
// * git log message: "user:pass@host" (it contains a colon in userinfo), ignore "git@host" pattern
// * http like URL: "https://userinfo@host.com" (it has "://" before the userinfo)
needSanitize := bytes.IndexByte(leftPart, ':') >= 0 || bytes.HasSuffix(leading, schemeSep)
needSanitize = needSanitize && len(leftPart) > 0 && len(rightPart) > 0
// TODO: can also do more checks for right part
// for example: ipv6 quick check
if needSanitize && rightPart[0] == '[' {
needSanitize = rightPart[len(rightPart)-1] == ']' && net.ParseIP(UnsafeBytesToString(rightPart[1:len(rightPart)-1])) != nil
}
if needSanitize {
res = append(res, leading...)
res = append(res, userInfoPlaceholder...)
res = append(res, '@')
res = append(res, rightPart...)
} else {
out = append(out, bs[:sepEndPos]...)
res = append(res, bs[:rightPos]...)
}
bs = bs[rightPos:]
Comment thread
wxiaoguang marked this conversation as resolved.
sepAtPos = bytes.IndexByte(bs, '@')
if sepAtPos == -1 {
break
}
bs = bs[sepEndPos:]
schemeSepPos = bytes.Index(bs, schemeSep)
}
out = append(out, bs...)
return UnsafeBytesToString(out)
res = append(res, bs...)
return UnsafeBytesToString(res)
}
53 changes: 43 additions & 10 deletions modules/util/sanitize_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import (
func TestSanitizeErrorCredentialURLs(t *testing.T) {
err := errors.New("error with https://a@b.com")
se := SanitizeErrorCredentialURLs(err)
assert.Equal(t, "error with https://"+userPlaceholder+"@b.com", se.Error())
assert.Equal(t, "error with https://"+userInfoPlaceholder+"@b.com", se.Error())
}

func TestSanitizeCredentialURLs(t *testing.T) {
Expand All @@ -27,43 +27,76 @@ func TestSanitizeCredentialURLs(t *testing.T) {
},
{
"https://mytoken@github.com/go-gitea/test_repo.git",
"https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git",
"https://" + userInfoPlaceholder + "@github.com/go-gitea/test_repo.git",
},
{
"https://user:password@github.com/go-gitea/test_repo.git",
"https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git",
"https://" + userInfoPlaceholder + "@github.com/go-gitea/test_repo.git",
},
{
"https://user:password@[::]/go-gitea/test_repo.git",
"https://" + userInfoPlaceholder + "@[::]/go-gitea/test_repo.git",
},
{
"https://user:password@[2001:db8::1]:8080/go-gitea/test_repo.git",
"https://" + userInfoPlaceholder + "@[2001:db8::1]:8080/go-gitea/test_repo.git",
},
{
"see https://u:p@[::1]/x and https://u2:p2@h2",
"see https://" + userInfoPlaceholder + "@[::1]/x and https://" + userInfoPlaceholder + "@h2",
},
{
"https://user:secret@[unclosed-ipv6",
"https://user:secret@[unclosed-ipv6",
},
{
"https://user:secret@[invalid-ipv6]",
"https://user:secret@[invalid-ipv6]",
},
{
"ftp://x@",
"ftp://x@",
"ftp://" + userPlaceholder + "@",
},
{
"ftp://x/@",
"ftp://x/@",
},
{
"ftp://u@x/@", // test multiple @ chars
"ftp://" + userPlaceholder + "@x/@",
"ftp://" + userInfoPlaceholder + "@x/@",
},
{
"😊ftp://u@x😊", // test unicode
"😊ftp://" + userPlaceholder + "@x😊",
"😊ftp://" + userInfoPlaceholder + "@x😊",
},
{
"://@",
"://@",
},
{
"//u:p@h", // do not process URLs without explicit scheme, they are not treated as "valid" URLs because there is no scheme context in string
"//u:p@h",
"//" + userInfoPlaceholder + "@h",
},
{
"s://u@h", // the minimal pattern to be sanitized
"s://" + userPlaceholder + "@h",
"s://u@h",
"s://" + userInfoPlaceholder + "@h",
},
{
"URLs in log https://u:b@h and https://u:b@h:80/, with https://h.com and u@h.com",
"URLs in log https://" + userPlaceholder + "@h and https://" + userPlaceholder + "@h:80/, with https://h.com and u@h.com",
"URLs in log https://" + userInfoPlaceholder + "@h and https://" + userInfoPlaceholder + "@h:80/, with https://h.com and u@h.com",
},
{
"fatal: unable to look up username:token@github.com (port 9418)",
"fatal: unable to look up " + userInfoPlaceholder + "@github.com (port 9418)",
},
{
"git failed for user:token@github.com/go-gitea/test_repo.git",
"git failed for " + userInfoPlaceholder + "@github.com/go-gitea/test_repo.git",
},
{
// SSH-form git URL ("git@host:path") must not let a later credential URL through
"failed remote git@github.com:foo, retried via https://user:tok@github.com/foo",
"failed remote git@github.com:foo, retried via https://" + userInfoPlaceholder + "@github.com/foo",
},
}

Expand Down
2 changes: 1 addition & 1 deletion services/migrations/migrate.go
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ func migrateRepository(ctx context.Context, doer *user_model.User, downloader ba
// We don't actually need to check the OriginalURL as it isn't used anywhere
}

log.Trace("migrating git data from %s", repo.CloneURL)
log.Trace("migrating git data from %s", util.SanitizeCredentialURLs(repo.CloneURL))
Comment thread
wxiaoguang marked this conversation as resolved.
messenger("repo.migrate.migrating_git")
if err = uploader.CreateRepo(ctx, repo, opts); err != nil {
return err
Expand Down