Skip to content

Commit 86c6d63

Browse files
rgmzRichard Gomez
authored and
Richard Gomez
committed
feat(azure): create openai detector
1 parent 980d783 commit 86c6d63

File tree

5 files changed

+430
-6
lines changed

5 files changed

+430
-6
lines changed
+153
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
package azure_openai
2+
3+
import (
4+
"context"
5+
"encoding/json"
6+
"fmt"
7+
"io"
8+
"net/http"
9+
10+
regexp "github.com/wasilibs/go-re2"
11+
12+
logContext "github.com/trufflesecurity/trufflehog/v3/pkg/context"
13+
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
14+
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
15+
16+
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
17+
)
18+
19+
// Scanner detects API keys for Azure's OpenAI service.
20+
// https://learn.microsoft.com/en-us/azure/ai-services/openai/reference
21+
type Scanner struct {
22+
client *http.Client
23+
}
24+
25+
// Ensure the Scanner satisfies the interface at compile time.
26+
var _ detectors.Detector = (*Scanner)(nil)
27+
28+
var (
29+
// TODO: Investigate custom `azure-api.net` endpoints.
30+
// https://github.com/openai/openai-python#microsoft-azure-openai
31+
azureUrlPat = regexp.MustCompile(`(?i)([a-z0-9-]+\.openai\.azure\.com)`)
32+
azureKeyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"api[_.-]?key"}) + `\b(?-i:([a-f0-9]{32}))\b`)
33+
)
34+
35+
// Keywords are used for efficiently pre-filtering chunks.
36+
// Use identifiers in the secret preferably, or the provider name.
37+
func (s Scanner) Keywords() []string {
38+
return []string{".openai.azure.com"}
39+
}
40+
41+
// FromData will find and optionally verify OpenAI secrets in a given set of bytes.
42+
func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
43+
dataStr := string(data)
44+
45+
// De-duplicate results.
46+
tokens := make(map[string]struct{})
47+
for _, match := range azureKeyPat.FindAllStringSubmatch(dataStr, -1) {
48+
tokens[match[1]] = struct{}{}
49+
}
50+
if len(tokens) == 0 {
51+
return
52+
}
53+
urls := make(map[string]struct{})
54+
for _, match := range azureUrlPat.FindAllStringSubmatch(dataStr, -1) {
55+
urls[match[1]] = struct{}{}
56+
}
57+
58+
// Process results.
59+
logCtx := logContext.AddLogger(ctx)
60+
for token := range tokens {
61+
s1 := detectors.Result{
62+
DetectorType: s.Type(),
63+
Redacted: token[:3] + "..." + token[25:],
64+
Raw: []byte(token),
65+
}
66+
67+
for url := range urls {
68+
if verify {
69+
client := s.client
70+
if client == nil {
71+
client = common.SaneHttpClient()
72+
}
73+
74+
isVerified, extraData, verificationErr := verifyAzureToken(logCtx, client, url, token)
75+
if isVerified || len(urls) == 1 {
76+
s1.RawV2 = []byte(token + ":" + url)
77+
s1.Verified = isVerified
78+
s1.ExtraData = extraData
79+
s1.SetVerificationError(verificationErr, token)
80+
break
81+
}
82+
}
83+
}
84+
85+
results = append(results, s1)
86+
}
87+
return
88+
}
89+
90+
func verifyAzureToken(ctx logContext.Context, client *http.Client, baseUrl, token string) (bool, map[string]string, error) {
91+
// TODO: Replace this with a more suitable long-term endpoint.
92+
// Most endpoints require additional info, e.g., deployment name, which complicates verification.
93+
// This may be retired in the future, so we should look for another candidate.
94+
// https://learn.microsoft.com/en-us/answers/questions/1371786/get-azure-openai-deployments-in-api
95+
req, err := http.NewRequestWithContext(ctx, http.MethodGet, fmt.Sprintf("https://%s/openai/deployments?api-version=2023-03-15-preview", baseUrl), nil)
96+
if err != nil {
97+
return false, nil, nil
98+
}
99+
100+
req.Header.Add("Content-Type", "application/json")
101+
req.Header.Add("api-key", token)
102+
res, err := client.Do(req)
103+
if err != nil {
104+
return false, nil, err
105+
}
106+
defer func() {
107+
_, _ = io.Copy(io.Discard, res.Body)
108+
_ = res.Body.Close()
109+
}()
110+
111+
switch res.StatusCode {
112+
case http.StatusOK:
113+
body, err := io.ReadAll(res.Body)
114+
if err != nil {
115+
return false, nil, err
116+
}
117+
118+
var deployments deploymentsResponse
119+
if err := json.Unmarshal(body, &deployments); err != nil {
120+
if json.Valid(body) {
121+
return false, nil, fmt.Errorf("failed to decode response %s: %w", req.URL, err)
122+
} else {
123+
// If the response isn't JSON it's highly unlikely to be valid.
124+
return false, nil, nil
125+
}
126+
}
127+
128+
// JSON unmarshal doesn't check whether the structure actually matches.
129+
if deployments.Object == "" {
130+
return false, nil, nil
131+
}
132+
133+
// No extra data available at the moment.
134+
return true, nil, nil
135+
case http.StatusUnauthorized:
136+
return false, nil, nil
137+
default:
138+
return false, nil, fmt.Errorf("unexpected response status %d for %s", res.StatusCode, req.URL)
139+
}
140+
}
141+
142+
type deploymentsResponse struct {
143+
Data []deployment `json:"data"`
144+
Object string `json:"object"`
145+
}
146+
147+
type deployment struct {
148+
ID string `json:"id"`
149+
}
150+
151+
func (s Scanner) Type() detectorspb.DetectorType {
152+
return detectorspb.DetectorType_AzureOpenAI
153+
}

0 commit comments

Comments
 (0)