Skip to content

Commit

Permalink
fix(azure): detect more scenarios
Browse files Browse the repository at this point in the history
  • Loading branch information
rgmz committed Nov 3, 2023
1 parent 50a3a82 commit d5d8489
Show file tree
Hide file tree
Showing 4 changed files with 573 additions and 41 deletions.
224 changes: 183 additions & 41 deletions pkg/detectors/azure/azure.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@ package azure

import (
"context"
"fmt"
"github.com/Azure/go-autorest/autorest/adal"
"github.com/go-errors/errors"
"regexp"
"slices"
"strings"

"github.com/Azure/go-autorest/autorest/azure/auth"
Expand All @@ -17,78 +19,218 @@ type Scanner struct{}
// Ensure the Scanner satisfies the interface at compile time.
var _ detectors.Detector = (*Scanner)(nil)

func mustFmtPat(id, pat string) *regexp.Regexp {
combinedID := strings.ReplaceAll(id, "_", "") + "|" + id
return regexp.MustCompile(fmt.Sprintf(pat, combinedID))
func uuidPattern(identifiers ...string) *regexp.Regexp {
var sb strings.Builder
sb.WriteString(`(?i)(?:`)
sb.WriteString(strings.Join(identifiers, "|"))
sb.WriteString(`).{0,80}([a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})`)
return regexp.MustCompile(sb.String())
}

// https://learn.microsoft.com/en-us/cli/azure/authenticate-azure-cli-service-principal
var (
// TODO: Azure storage access keys and investigate other types of creds.

// Azure App Oauth
idPatFmt = `(?i)(%s).{0,20}([a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})`
clientIDPat = mustFmtPat("client_id", idPatFmt)
tenantIDPat = mustFmtPat("tenant_id", idPatFmt)

// TODO: Support `.onmicrosoft.com` tenant IDs.
tenantIDPat = uuidPattern("t[ae]n[ae]nt(?:[._-]?id)?", `login\.microsoftonline\.com/`)
// TODO: support URL-based client IDs
clientIDPat = regexp.MustCompile(uuidPattern("(?:app(?:lication)?|client)[._-]?id", "username", "-u"))
// TODO: support old patterns
secretPatFmt = `(?i)(%s).{0,20}([a-z0-9_\.\-~]{34})`
clientSecretPat = mustFmtPat("client_secret", secretPatFmt)
// TODO: Azure storage access keys and investigate other types of creds.
clientSecretPat = regexp.MustCompile(`([a-zA-Z0-9_~.-]{3}\dQ~[a-zA-Z0-9_~.-]{31,34})`)
)

// Keywords are used for efficiently pre-filtering chunks.
// Use identifiers in the secret preferably, or the provider name.
func (s Scanner) Keywords() []string {
return []string{"azure"}
return []string{"Q~"}
}

// FromData will find and optionally verify Azure secrets in a given set of bytes.
func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
dataStr := string(data)

clientSecretMatches := clientSecretPat.FindAllStringSubmatch(dataStr, -1)
for _, clientSecret := range clientSecretMatches {
tenantIDMatches := tenantIDPat.FindAllStringSubmatch(dataStr, -1)
for _, tenantID := range tenantIDMatches {
clientIDMatches := clientIDPat.FindAllStringSubmatch(dataStr, -1)
for _, clientID := range clientIDMatches {
s := detectors.Result{
DetectorType: detectorspb.DetectorType_Azure,
Raw: []byte(clientSecret[2]),
RawV2: []byte(clientID[2] + clientSecret[2] + tenantID[2]),
Redacted: clientID[2],
}
// Set the RotationGuideURL in the ExtraData
s.ExtraData = map[string]string{
"rotation_guide": "https://howtorotate.com/docs/tutorials/azure/",
// Create a deduplicated set of results.
// This helps performance in large chunks with lots of duplicates.
clientSecrets := make(map[string]bool)
for _, match := range clientSecretPat.FindAllStringSubmatch(dataStr, -1) {
clientSecrets[match[1]] = true
}
if len(clientSecrets) == 0 {
return results, nil
}
clientIds := make(map[string]bool)
for _, match := range clientIDPat.FindAllStringSubmatch(dataStr, -1) {
if detectors.IsKnownFalsePositiveUuid(match[1]) {
continue
}
clientIds[match[1]] = true
}
// A client secret without a client id is useless.
//if len(clientIds) == 0 {
// return results, nil
//}
tenantIds := make(map[string]bool)
for _, match := range tenantIDPat.FindAllStringSubmatch(dataStr, -1) {
if detectors.IsKnownFalsePositiveUuid(match[1]) {
continue
}
tenantIds[match[1]] = true
}

processedResults := processData(clientSecrets, clientIds, tenantIds, verify)
for _, result := range processedResults {
results = append(results, result)
}
return results, nil
}

func processData(clientSecrets, clientIds, tenantIds map[string]bool, verify bool) (results []detectors.Result) {
invalidTenantsForClientId := make(map[string][]string)

SecretLoop:
for clientSecret, _ := range clientSecrets {
clientSecret := clientSecret
secretReported := false

var s detectors.Result
var clientId string
var tenantId string

IdLoop:
for cId, _ := range clientIds {
clientId = cId

for tId, _ := range tenantIds {
tenantId = tId

// Skip known invalid tenants.
if slices.Contains(invalidTenantsForClientId[clientId], tenantId) {
continue
}

if verify {
cred := auth.NewClientCredentialsConfig(clientID[2], clientSecret[2], tenantID[2])
s = createResult(clientSecret, clientId, tenantId)

// https://learn.microsoft.com/en-us/entra/identity-platform/v2-oauth2-auth-code-flow#request-an-access-token-with-a-client_secret
cred := auth.NewClientCredentialsConfig(clientId, clientSecret, tenantId)
token, err := cred.ServicePrincipalToken()
if err != nil {
// This can only fail if a value is empty, which shouldn't be possible.
continue
}

err = token.Refresh()
if err == nil {
if err != nil {
var refreshError adal.TokenRefreshError
if ok := errors.As(err, &refreshError); ok {
resp := refreshError.Response()
defer resp.Body.Close()

status := resp.StatusCode
errStr := refreshError.Error()
if status == 400 {
if strings.Contains(errStr, `"error_description":"AADSTS90002:`) {
// Tenant doesn't exist
delete(tenantIds, tenantId)
continue
} else if strings.Contains(errStr, `"error_description":"AADSTS700016:`) {
// Tenant is valid but the ClientID doesn't exist.
invalidTenantsForClientId[clientId] = append(invalidTenantsForClientId[clientId], tenantId)
continue
} else {
// Unexpected error.
s.VerificationError = refreshError
break
}
} else if status == 401 {
// Tenant exists and the clientID is valid, but something is wrong.
if strings.Contains(errStr, `"error_description":"AADSTS7000215:`) {
// Secret is not valid.
setValidTenantIdForClientId(clientId, tenantId, tenantIds, invalidTenantsForClientId)
continue IdLoop
} else if strings.Contains(errStr, `"error_description":"AADSTS7000222:`) {
// The secret is expired.
setValidTenantIdForClientId(clientId, tenantId, tenantIds, invalidTenantsForClientId)
continue SecretLoop
} else {
// TODO: Investigate if it's possible to get a 401 with a valid id/secret.
s.VerificationError = refreshError
break
}
} else {
// Unexpected status code.
s.VerificationError = refreshError
break
}
} else {
// Unexpected error.
s.VerificationError = err
break
}
} else {
s.Verified = true
setValidTenantIdForClientId(clientId, tenantId, tenantIds, invalidTenantsForClientId)
break
}
}
}

if !s.Verified {
if detectors.IsKnownFalsePositive(s.Redacted, detectors.DefaultFalsePositives, true) {
continue
}
if detectors.IsKnownFalsePositive(string(s.Raw), detectors.DefaultFalsePositives, true) {
continue
}
}

if s.Verified {
results = append(results, s)
continue SecretLoop
} else if s.VerificationError != nil {
secretReported = true
results = append(results, s)
}
}

// The secret pattern is unique enough that we should still report it
// if it hasn't already been added.
if !secretReported {
// Only include the clientId and tenantId if we're confident which one it is.
if len(clientIds) != 1 {
clientId = ""
}
if len(tenantIds) != 1 {
tenantId = ""
}
s = createResult(clientSecret, clientId, tenantId)
results = append(results, s)
}
}
return results
}

return results, nil
func setValidTenantIdForClientId(clientId, validTenantId string, tenantIds map[string]bool, invalidTenantsForClientId map[string][]string) {
for id := range tenantIds {
if id != validTenantId {
invalidTenantsForClientId[clientId] = append(invalidTenantsForClientId[clientId], id)
}
}
}

func createResult(clientSecret, clientId, tenantId string) detectors.Result {
s := detectors.Result{
DetectorType: detectorspb.DetectorType_Azure,
Raw: []byte(clientSecret),
// Set the RotationGuideURL in the ExtraData
ExtraData: map[string]string{
"rotation_guide": "https://howtorotate.com/docs/tutorials/azure/",
},
}

if clientId != "" {
s.Redacted = clientId

// Tenant ID is required for verification, but it may not always be present.
// e.g., ACR or Azure SQL use client id+secret without tenant.
if tenantId != "" {
s.RawV2 = []byte(clientId + ":" + clientSecret + " (" + tenantId + ")")
} else {
s.RawV2 = []byte(clientId + ":" + clientSecret)
}
}

return s
}

func (s Scanner) Type() detectorspb.DetectorType {
Expand Down
Loading

0 comments on commit d5d8489

Please sign in to comment.