Skip to content

Commit 59200a6

Browse files
rgmzRichard Gomez
authored and
Richard Gomez
committed
feat(azure): create openai detector
1 parent a22874f commit 59200a6

File tree

5 files changed

+439
-7
lines changed

5 files changed

+439
-7
lines changed
+160
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
package azure_openai
2+
3+
import (
4+
"context"
5+
"encoding/json"
6+
"fmt"
7+
"io"
8+
"net/http"
9+
10+
regexp "github.com/wasilibs/go-re2"
11+
12+
logContext "github.com/trufflesecurity/trufflehog/v3/pkg/context"
13+
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
14+
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
15+
16+
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
17+
)
18+
19+
// Scanner detects API keys for Azure's OpenAI service.
20+
// https://learn.microsoft.com/en-us/azure/ai-services/openai/reference
21+
type Scanner struct {
22+
client *http.Client
23+
}
24+
25+
// Ensure the Scanner satisfies the interface at compile time.
26+
var _ detectors.Detector = (*Scanner)(nil)
27+
28+
var (
29+
// TODO: Investigate custom `azure-api.net` endpoints.
30+
// https://github.com/openai/openai-python#microsoft-azure-openai
31+
azureUrlPat = regexp.MustCompile(`(?i)([a-z0-9-]+\.openai\.azure\.com)`)
32+
azureKeyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"api[_.-]?key"}) + `\b(?-i:([a-f0-9]{32}))\b`)
33+
)
34+
35+
// Keywords are used for efficiently pre-filtering chunks.
36+
// Use identifiers in the secret preferably, or the provider name.
37+
func (s Scanner) Keywords() []string {
38+
return []string{".openai.azure.com"}
39+
}
40+
41+
// FromData will find and optionally verify OpenAI secrets in a given set of bytes.
42+
func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
43+
dataStr := string(data)
44+
45+
// De-duplicate results.
46+
tokens := make(map[string]struct{})
47+
for _, match := range azureKeyPat.FindAllStringSubmatch(dataStr, -1) {
48+
tokens[match[1]] = struct{}{}
49+
}
50+
if len(tokens) == 0 {
51+
return
52+
}
53+
urls := make(map[string]struct{})
54+
for _, match := range azureUrlPat.FindAllStringSubmatch(dataStr, -1) {
55+
urls[match[1]] = struct{}{}
56+
}
57+
58+
// Process results.
59+
logCtx := logContext.AddLogger(ctx)
60+
for token := range tokens {
61+
s1 := detectors.Result{
62+
DetectorType: s.Type(),
63+
Redacted: token[:3] + "..." + token[25:],
64+
Raw: []byte(token),
65+
}
66+
67+
for url := range urls {
68+
if verify {
69+
client := s.client
70+
if client == nil {
71+
client = common.SaneHttpClient()
72+
}
73+
74+
isVerified, extraData, verificationErr := verifyAzureToken(logCtx, client, url, token)
75+
if isVerified || len(urls) == 1 {
76+
s1.RawV2 = []byte(token + ":" + url)
77+
s1.Verified = isVerified
78+
s1.ExtraData = extraData
79+
s1.SetVerificationError(verificationErr, token)
80+
break
81+
}
82+
}
83+
}
84+
85+
if s1.Verified {
86+
results = append(results, s1)
87+
} else {
88+
if detectors.IsKnownFalsePositive(string(s1.Raw), detectors.DefaultFalsePositives, true) {
89+
continue
90+
}
91+
results = append(results, s1)
92+
}
93+
}
94+
return
95+
}
96+
97+
func verifyAzureToken(ctx logContext.Context, client *http.Client, baseUrl, token string) (bool, map[string]string, error) {
98+
// TODO: Replace this with a more suitable long-term endpoint.
99+
// Most endpoints require additional info, e.g., deployment name, which complicates verification.
100+
// This may be retired in the future, so we should look for another candidate.
101+
// https://learn.microsoft.com/en-us/answers/questions/1371786/get-azure-openai-deployments-in-api
102+
req, err := http.NewRequestWithContext(ctx, http.MethodGet, fmt.Sprintf("https://%s/openai/deployments?api-version=2023-03-15-preview", baseUrl), nil)
103+
if err != nil {
104+
return false, nil, nil
105+
}
106+
107+
req.Header.Add("Content-Type", "application/json")
108+
req.Header.Add("api-key", token)
109+
res, err := client.Do(req)
110+
if err != nil {
111+
return false, nil, err
112+
}
113+
defer func() {
114+
_, _ = io.Copy(io.Discard, res.Body)
115+
_ = res.Body.Close()
116+
}()
117+
118+
switch res.StatusCode {
119+
case http.StatusOK:
120+
body, err := io.ReadAll(res.Body)
121+
if err != nil {
122+
return false, nil, err
123+
}
124+
125+
var deployments deploymentsResponse
126+
if err := json.Unmarshal(body, &deployments); err != nil {
127+
if json.Valid(body) {
128+
return false, nil, fmt.Errorf("failed to decode response %s: %w", req.URL, err)
129+
} else {
130+
// If the response isn't JSON it's highly unlikely to be valid.
131+
return false, nil, nil
132+
}
133+
}
134+
135+
// JSON unmarshal doesn't check whether the structure actually matches.
136+
if deployments.Object == "" {
137+
return false, nil, nil
138+
}
139+
140+
// No extra data available at the moment.
141+
return true, nil, nil
142+
case http.StatusUnauthorized:
143+
return false, nil, nil
144+
default:
145+
return false, nil, fmt.Errorf("unexpected response status %d for %s", res.StatusCode, req.URL)
146+
}
147+
}
148+
149+
type deploymentsResponse struct {
150+
Data []deployment `json:"data"`
151+
Object string `json:"object"`
152+
}
153+
154+
type deployment struct {
155+
ID string `json:"id"`
156+
}
157+
158+
func (s Scanner) Type() detectorspb.DetectorType {
159+
return detectorspb.DetectorType_AzureOpenAI
160+
}

0 commit comments

Comments
 (0)