diff --git a/modules/git/gitcmd/command.go b/modules/git/gitcmd/command.go index e9b51802febf3..ee447dfd032c7 100644 --- a/modules/git/gitcmd/command.go +++ b/modules/git/gitcmd/command.go @@ -57,14 +57,12 @@ type Command struct { } func logArgSanitize(arg string) string { - if strings.Contains(arg, "://") && strings.Contains(arg, "@") { - return util.SanitizeCredentialURLs(arg) - } else if filepath.IsAbs(arg) { + if filepath.IsAbs(arg) { base := filepath.Base(arg) dir := filepath.Dir(arg) return ".../" + filepath.Join(filepath.Base(dir), base) } - return arg + return util.SanitizeCredentialURLs(arg) } func (c *Command) LogString() string { diff --git a/modules/git/gitcmd/command_test.go b/modules/git/gitcmd/command_test.go index 19ec02b8088dd..1a9bfe7d75276 100644 --- a/modules/git/gitcmd/command_test.go +++ b/modules/git/gitcmd/command_test.go @@ -109,7 +109,10 @@ func TestCommandString(t *testing.T) { assert.Equal(t, cmd.prog+` a "-m msg" "it's a test" "say \"hello\""`, cmd.LogString()) cmd = NewCommand("url: https://a:b@c/", "/root/dir-a/dir-b") - assert.Equal(t, cmd.prog+` "url: https://sanitized-credential@c/" .../dir-a/dir-b`, cmd.LogString()) + assert.Equal(t, cmd.prog+` "url: https://(masked)@c/" .../dir-a/dir-b`, cmd.LogString()) + + cmd = NewCommand("url: a:b@c/", "/root/dir-a/dir-b") + assert.Equal(t, cmd.prog+` "url: (masked)@c/" .../dir-a/dir-b`, cmd.LogString()) } func TestRunStdError(t *testing.T) { diff --git a/modules/util/sanitize.go b/modules/util/sanitize.go index 0dd8b342a2a4f..88ca34788fc61 100644 --- a/modules/util/sanitize.go +++ b/modules/util/sanitize.go @@ -5,7 +5,8 @@ package util import ( "bytes" - "unicode" + "net" + "strings" ) type sanitizedError struct { @@ -25,48 +26,103 @@ func SanitizeErrorCredentialURLs(err error) error { return sanitizedError{err: err} } -const userPlaceholder = "sanitized-credential" - var schemeSep = []byte("://") -// SanitizeCredentialURLs remove all credentials in URLs (starting with "scheme://") for the input string: "https://user:pass@domain.com" => "https://sanitized-credential@domain.com" +const userInfoPlaceholder = "(masked)" + +// SanitizeCredentialURLs remove all credentials in URLs for the input string: +// * "https://userinfo@domain.com" => "https://***@domain.com" +// * "user:pass@domain.com" => "***@domain.com" +// "***" is a magic string internally used, doesn't guarantee to be anything. func SanitizeCredentialURLs(s string) string { - bs := UnsafeStringToBytes(s) - schemeSepPos := bytes.Index(bs, schemeSep) - if schemeSepPos == -1 || bytes.IndexByte(bs[schemeSepPos:], '@') == -1 { - return s // fast return if there is no URL scheme or no userinfo + sepColPos := strings.Index(s, ":") + if sepColPos == -1 { + return s // fast path: no colon, unlikely contain any URL credential } - out := make([]byte, 0, len(bs)+len(userPlaceholder)) - for schemeSepPos != -1 { - schemeSepPos += 3 // skip the "://" - sepAtPos := -1 // the possible '@' position: "https://foo@[^here]host" - sepEndPos := schemeSepPos // the possible end position: "The https://host[^here] in log for test" - sepLoop: - for ; sepEndPos < len(bs); sepEndPos++ { - c := bs[sepEndPos] - if ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('0' <= c && c <= '9') { - continue - } + sepAtPos := strings.Index(s[sepColPos+1:], "@") + for sepAtPos == -1 { + return s // fast path: no "@" after colon, unlikely contain any URL credential + } + sepAtPos += sepColPos + 1 + + res := make([]byte, 0, len(s)+len(userInfoPlaceholder)) // a best guess to avoid too many re-allocations + bs := UnsafeStringToBytes(s) + for { + // left part (before "@") is likely to be the "userinfo" (single username, or "username:password") + leftPos := sepAtPos - 1 + leftLoop: + for leftPos >= 0 { + c := bs[leftPos] switch c { - case '@': - sepAtPos = sepEndPos case '-', '.', '_', '~', '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '%': - continue // due to RFC 3986, userinfo can contain - . _ ~ ! $ & ' ( ) * + , ; = : and any percent-encoded chars + // RFC 3986, userinfo can contain - . _ ~ ! $ & ' ( ) * + , ; = : and any percent-encoded chars + default: + valid := 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' + if !valid { + break leftLoop + } + } + leftPos-- + } + // left pos should point to the beginning of the left part, this pos is always valid in the buffer + leftPos++ + + // right part is likely to be the host (domain name, ip address) + rightPos := sepAtPos + 1 + rightLoop: + for rightPos < len(bs) { + c := bs[rightPos] + switch c { + case '.', '-': + // valid host char + case '[': + // ipv6 begin + if rightPos != sepAtPos+1 { + break rightLoop + } + case ']': + // ipv6 end + rightPos++ + break rightLoop default: - break sepLoop // if it is an invalid char for URL (eg: space, '/', and others), stop the loop + valid := 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' + if bs[sepAtPos+1] == '[' { + // ipv6 host + valid = 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' || '0' <= c && c <= '9' || c == ':' + } + if !valid { + break rightLoop + } } + rightPos++ } - // if there is '@', and the string is like "s://u@h", then hide the "u" part - if sepAtPos != -1 && (schemeSepPos >= 4 && unicode.IsLetter(rune(bs[schemeSepPos-4]))) && sepAtPos-schemeSepPos > 0 && sepEndPos-sepAtPos > 0 { - out = append(out, bs[:schemeSepPos]...) - out = append(out, userPlaceholder...) - out = append(out, bs[sepAtPos:sepEndPos]...) + + leading, leftPart, rightPart := bs[:leftPos], bs[leftPos:sepAtPos], bs[sepAtPos+1:rightPos] + + // Either: + // * git log message: "user:pass@host" (it contains a colon in userinfo), ignore "git@host" pattern + // * http like URL: "https://userinfo@host.com" (it has "://" before the userinfo) + needSanitize := bytes.IndexByte(leftPart, ':') >= 0 || bytes.HasSuffix(leading, schemeSep) + needSanitize = needSanitize && len(leftPart) > 0 && len(rightPart) > 0 + // TODO: can also do more checks for right part + // for example: ipv6 quick check + if needSanitize && rightPart[0] == '[' { + needSanitize = rightPart[len(rightPart)-1] == ']' && net.ParseIP(UnsafeBytesToString(rightPart[1:len(rightPart)-1])) != nil + } + if needSanitize { + res = append(res, leading...) + res = append(res, userInfoPlaceholder...) + res = append(res, '@') + res = append(res, rightPart...) } else { - out = append(out, bs[:sepEndPos]...) + res = append(res, bs[:rightPos]...) + } + bs = bs[rightPos:] + sepAtPos = bytes.IndexByte(bs, '@') + if sepAtPos == -1 { + break } - bs = bs[sepEndPos:] - schemeSepPos = bytes.Index(bs, schemeSep) } - out = append(out, bs...) - return UnsafeBytesToString(out) + res = append(res, bs...) + return UnsafeBytesToString(res) } diff --git a/modules/util/sanitize_test.go b/modules/util/sanitize_test.go index 0bcfd45ca4100..c1a80016f8d15 100644 --- a/modules/util/sanitize_test.go +++ b/modules/util/sanitize_test.go @@ -13,7 +13,7 @@ import ( func TestSanitizeErrorCredentialURLs(t *testing.T) { err := errors.New("error with https://a@b.com") se := SanitizeErrorCredentialURLs(err) - assert.Equal(t, "error with https://"+userPlaceholder+"@b.com", se.Error()) + assert.Equal(t, "error with https://"+userInfoPlaceholder+"@b.com", se.Error()) } func TestSanitizeCredentialURLs(t *testing.T) { @@ -27,15 +27,35 @@ func TestSanitizeCredentialURLs(t *testing.T) { }, { "https://mytoken@github.com/go-gitea/test_repo.git", - "https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git", + "https://" + userInfoPlaceholder + "@github.com/go-gitea/test_repo.git", }, { "https://user:password@github.com/go-gitea/test_repo.git", - "https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git", + "https://" + userInfoPlaceholder + "@github.com/go-gitea/test_repo.git", }, { + "https://user:password@[::]/go-gitea/test_repo.git", + "https://" + userInfoPlaceholder + "@[::]/go-gitea/test_repo.git", + }, + { + "https://user:password@[2001:db8::1]:8080/go-gitea/test_repo.git", + "https://" + userInfoPlaceholder + "@[2001:db8::1]:8080/go-gitea/test_repo.git", + }, + { + "see https://u:p@[::1]/x and https://u2:p2@h2", + "see https://" + userInfoPlaceholder + "@[::1]/x and https://" + userInfoPlaceholder + "@h2", + }, + { + "https://user:secret@[unclosed-ipv6", + "https://user:secret@[unclosed-ipv6", + }, + { + "https://user:secret@[invalid-ipv6]", + "https://user:secret@[invalid-ipv6]", + }, + { + "ftp://x@", "ftp://x@", - "ftp://" + userPlaceholder + "@", }, { "ftp://x/@", @@ -43,27 +63,40 @@ func TestSanitizeCredentialURLs(t *testing.T) { }, { "ftp://u@x/@", // test multiple @ chars - "ftp://" + userPlaceholder + "@x/@", + "ftp://" + userInfoPlaceholder + "@x/@", }, { "😊ftp://u@x😊", // test unicode - "😊ftp://" + userPlaceholder + "@x😊", + "😊ftp://" + userInfoPlaceholder + "@x😊", }, { "://@", "://@", }, { - "//u:p@h", // do not process URLs without explicit scheme, they are not treated as "valid" URLs because there is no scheme context in string "//u:p@h", + "//" + userInfoPlaceholder + "@h", }, { - "s://u@h", // the minimal pattern to be sanitized - "s://" + userPlaceholder + "@h", + "s://u@h", + "s://" + userInfoPlaceholder + "@h", }, { "URLs in log https://u:b@h and https://u:b@h:80/, with https://h.com and u@h.com", - "URLs in log https://" + userPlaceholder + "@h and https://" + userPlaceholder + "@h:80/, with https://h.com and u@h.com", + "URLs in log https://" + userInfoPlaceholder + "@h and https://" + userInfoPlaceholder + "@h:80/, with https://h.com and u@h.com", + }, + { + "fatal: unable to look up username:token@github.com (port 9418)", + "fatal: unable to look up " + userInfoPlaceholder + "@github.com (port 9418)", + }, + { + "git failed for user:token@github.com/go-gitea/test_repo.git", + "git failed for " + userInfoPlaceholder + "@github.com/go-gitea/test_repo.git", + }, + { + // SSH-form git URL ("git@host:path") must not let a later credential URL through + "failed remote git@github.com:foo, retried via https://user:tok@github.com/foo", + "failed remote git@github.com:foo, retried via https://" + userInfoPlaceholder + "@github.com/foo", }, } diff --git a/services/migrations/migrate.go b/services/migrations/migrate.go index 99f8dba92f470..0cdd96496ddbc 100644 --- a/services/migrations/migrate.go +++ b/services/migrations/migrate.go @@ -218,7 +218,7 @@ func migrateRepository(ctx context.Context, doer *user_model.User, downloader ba // We don't actually need to check the OriginalURL as it isn't used anywhere } - log.Trace("migrating git data from %s", repo.CloneURL) + log.Trace("migrating git data from %s", util.SanitizeCredentialURLs(repo.CloneURL)) messenger("repo.migrate.migrating_git") if err = uploader.CreateRepo(ctx, repo, opts); err != nil { return err