|
| 1 | +package azure_openai |
| 2 | + |
| 3 | +import ( |
| 4 | + "context" |
| 5 | + "encoding/json" |
| 6 | + "fmt" |
| 7 | + "io" |
| 8 | + "net/http" |
| 9 | + |
| 10 | + regexp "github.com/wasilibs/go-re2" |
| 11 | + |
| 12 | + logContext "github.com/trufflesecurity/trufflehog/v3/pkg/context" |
| 13 | + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" |
| 14 | + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" |
| 15 | + |
| 16 | + "github.com/trufflesecurity/trufflehog/v3/pkg/common" |
| 17 | +) |
| 18 | + |
| 19 | +// Scanner detects API keys for Azure's OpenAI service. |
| 20 | +// https://learn.microsoft.com/en-us/azure/ai-services/openai/reference |
| 21 | +type Scanner struct { |
| 22 | + client *http.Client |
| 23 | +} |
| 24 | + |
| 25 | +// Ensure the Scanner satisfies the interface at compile time. |
| 26 | +var _ detectors.Detector = (*Scanner)(nil) |
| 27 | + |
| 28 | +var ( |
| 29 | + // TODO: Investigate custom `azure-api.net` endpoints. |
| 30 | + // https://github.com/openai/openai-python#microsoft-azure-openai |
| 31 | + azureUrlPat = regexp.MustCompile(`(?i)([a-z0-9-]+\.openai\.azure\.com)`) |
| 32 | + azureKeyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"api[_.-]?key"}) + `\b(?-i:([a-f0-9]{32}))\b`) |
| 33 | +) |
| 34 | + |
| 35 | +// Keywords are used for efficiently pre-filtering chunks. |
| 36 | +// Use identifiers in the secret preferably, or the provider name. |
| 37 | +func (s Scanner) Keywords() []string { |
| 38 | + return []string{".openai.azure.com"} |
| 39 | +} |
| 40 | + |
| 41 | +// FromData will find and optionally verify OpenAI secrets in a given set of bytes. |
| 42 | +func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { |
| 43 | + dataStr := string(data) |
| 44 | + |
| 45 | + // De-duplicate results. |
| 46 | + tokens := make(map[string]struct{}) |
| 47 | + for _, match := range azureKeyPat.FindAllStringSubmatch(dataStr, -1) { |
| 48 | + tokens[match[1]] = struct{}{} |
| 49 | + } |
| 50 | + if len(tokens) == 0 { |
| 51 | + return |
| 52 | + } |
| 53 | + urls := make(map[string]struct{}) |
| 54 | + for _, match := range azureUrlPat.FindAllStringSubmatch(dataStr, -1) { |
| 55 | + urls[match[1]] = struct{}{} |
| 56 | + } |
| 57 | + |
| 58 | + // Process results. |
| 59 | + logCtx := logContext.AddLogger(ctx) |
| 60 | + for token := range tokens { |
| 61 | + s1 := detectors.Result{ |
| 62 | + DetectorType: s.Type(), |
| 63 | + Redacted: token[:3] + "..." + token[25:], |
| 64 | + Raw: []byte(token), |
| 65 | + } |
| 66 | + |
| 67 | + for url := range urls { |
| 68 | + if verify { |
| 69 | + client := s.client |
| 70 | + if client == nil { |
| 71 | + client = common.SaneHttpClient() |
| 72 | + } |
| 73 | + |
| 74 | + isVerified, extraData, verificationErr := verifyAzureToken(logCtx, client, url, token) |
| 75 | + if isVerified || len(urls) == 1 { |
| 76 | + s1.RawV2 = []byte(token + ":" + url) |
| 77 | + s1.Verified = isVerified |
| 78 | + s1.ExtraData = extraData |
| 79 | + s1.SetVerificationError(verificationErr, token) |
| 80 | + break |
| 81 | + } |
| 82 | + } |
| 83 | + } |
| 84 | + |
| 85 | + if s1.Verified { |
| 86 | + results = append(results, s1) |
| 87 | + } else { |
| 88 | + if detectors.IsKnownFalsePositive(string(s1.Raw), detectors.DefaultFalsePositives, true) { |
| 89 | + continue |
| 90 | + } |
| 91 | + results = append(results, s1) |
| 92 | + } |
| 93 | + } |
| 94 | + return |
| 95 | +} |
| 96 | + |
| 97 | +func verifyAzureToken(ctx logContext.Context, client *http.Client, baseUrl, token string) (bool, map[string]string, error) { |
| 98 | + // TODO: Replace this with a more suitable long-term endpoint. |
| 99 | + // Most endpoints require additional info, e.g., deployment name, which complicates verification. |
| 100 | + // This may be retired in the future, so we should look for another candidate. |
| 101 | + // https://learn.microsoft.com/en-us/answers/questions/1371786/get-azure-openai-deployments-in-api |
| 102 | + req, err := http.NewRequestWithContext(ctx, http.MethodGet, fmt.Sprintf("https://%s/openai/deployments?api-version=2023-03-15-preview", baseUrl), nil) |
| 103 | + if err != nil { |
| 104 | + return false, nil, nil |
| 105 | + } |
| 106 | + |
| 107 | + req.Header.Add("Content-Type", "application/json") |
| 108 | + req.Header.Add("api-key", token) |
| 109 | + res, err := client.Do(req) |
| 110 | + if err != nil { |
| 111 | + return false, nil, err |
| 112 | + } |
| 113 | + defer func() { |
| 114 | + _, _ = io.Copy(io.Discard, res.Body) |
| 115 | + _ = res.Body.Close() |
| 116 | + }() |
| 117 | + |
| 118 | + switch res.StatusCode { |
| 119 | + case http.StatusOK: |
| 120 | + body, err := io.ReadAll(res.Body) |
| 121 | + if err != nil { |
| 122 | + return false, nil, err |
| 123 | + } |
| 124 | + |
| 125 | + var deployments deploymentsResponse |
| 126 | + if err := json.Unmarshal(body, &deployments); err != nil { |
| 127 | + if json.Valid(body) { |
| 128 | + return false, nil, fmt.Errorf("failed to decode response %s: %w", req.URL, err) |
| 129 | + } else { |
| 130 | + // If the response isn't JSON it's highly unlikely to be valid. |
| 131 | + return false, nil, nil |
| 132 | + } |
| 133 | + } |
| 134 | + |
| 135 | + // JSON unmarshal doesn't check whether the structure actually matches. |
| 136 | + if deployments.Object == "" { |
| 137 | + return false, nil, nil |
| 138 | + } |
| 139 | + |
| 140 | + // No extra data available at the moment. |
| 141 | + return true, nil, nil |
| 142 | + case http.StatusUnauthorized: |
| 143 | + return false, nil, nil |
| 144 | + default: |
| 145 | + return false, nil, fmt.Errorf("unexpected response status %d for %s", res.StatusCode, req.URL) |
| 146 | + } |
| 147 | +} |
| 148 | + |
| 149 | +type deploymentsResponse struct { |
| 150 | + Data []deployment `json:"data"` |
| 151 | + Object string `json:"object"` |
| 152 | +} |
| 153 | + |
| 154 | +type deployment struct { |
| 155 | + ID string `json:"id"` |
| 156 | +} |
| 157 | + |
| 158 | +func (s Scanner) Type() detectorspb.DetectorType { |
| 159 | + return detectorspb.DetectorType_AzureOpenAI |
| 160 | +} |
0 commit comments