-
Notifications
You must be signed in to change notification settings - Fork 2.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add trace anonymizer prorotype (#2328)
* Add trace anonymizer prototype Signed-off-by: Yuri Shkuro <[email protected]> * Delint Signed-off-by: Yuri Shkuro <[email protected]> * Fix gosec Signed-off-by: Yuri Shkuro <[email protected]>
- Loading branch information
1 parent
00b6e96
commit 5001225
Showing
5 changed files
with
329 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
non-critical test utility |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,168 @@ | ||
// Copyright (c) 2020 The Jaeger Authors. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package anonymizer | ||
|
||
import ( | ||
"encoding/json" | ||
"fmt" | ||
"hash/fnv" | ||
"io/ioutil" | ||
"os" | ||
"path/filepath" | ||
"sync" | ||
"time" | ||
|
||
"go.uber.org/zap" | ||
|
||
"github.com/jaegertracing/jaeger/model" | ||
uiconv "github.com/jaegertracing/jaeger/model/converter/json" | ||
uimodel "github.com/jaegertracing/jaeger/model/json" | ||
) | ||
|
||
var allowedTags = map[string]bool{ | ||
"error": true, | ||
"span.kind": true, | ||
"http.method": true, | ||
"http.status_code": true, | ||
"sampler.type": true, | ||
"sampler.param": true, | ||
} | ||
|
||
// mapping stores the mapping of service/operation names to their one-way hashes, | ||
// so that we can do a reverse lookup should the researchers have questions. | ||
type mapping struct { | ||
Services map[string]string | ||
Operations map[string]string // key=[service]:operation | ||
} | ||
|
||
// Anonymizer transforms Jaeger span in the domain model by obfuscating site-specific strings, | ||
// like service and operation names, and removes custom tags. It returns obfuscated span in the | ||
// Jaeger UI format, to make it easy to visualize traces. | ||
// | ||
// The mapping from original to obfuscated strings is stored in a file and can be reused between runs. | ||
type Anonymizer struct { | ||
mappingFile string | ||
logger *zap.Logger | ||
|
||
lock sync.Mutex | ||
mapping mapping | ||
} | ||
|
||
// New creates new Anonymizer. The mappingFile stores the mapping from original to | ||
// obfuscated strings, in case later investigations require looking at the original traces. | ||
func New(mappingFile string, logger *zap.Logger) *Anonymizer { | ||
a := &Anonymizer{ | ||
mappingFile: mappingFile, | ||
logger: logger, | ||
mapping: mapping{ | ||
Services: make(map[string]string), | ||
Operations: make(map[string]string), | ||
}, | ||
} | ||
if _, err := os.Stat(filepath.Clean(mappingFile)); err == nil { | ||
dat, err := ioutil.ReadFile(filepath.Clean(mappingFile)) | ||
if err != nil { | ||
logger.Fatal("Cannot load previous mapping", zap.Error(err)) | ||
} | ||
if err := json.Unmarshal(dat, &a.mapping); err != nil { | ||
logger.Fatal("Cannot unmarshal previous mapping", zap.Error(err)) | ||
} | ||
} | ||
go func() { | ||
for range time.NewTicker(10 * time.Second).C { | ||
a.SaveMapping() | ||
} | ||
}() | ||
return a | ||
} | ||
|
||
// SaveMapping writes the mapping from original to obfuscated strings to a file. | ||
// It is called by the anonymizer itself periodically, and should be called at | ||
// the end of the extraction run. | ||
func (a *Anonymizer) SaveMapping() { | ||
a.lock.Lock() | ||
defer a.lock.Unlock() | ||
dat, err := json.Marshal(a.mapping) | ||
if err != nil { | ||
a.logger.Error("Failed to marshal mapping file", zap.Error(err)) | ||
return | ||
} | ||
if err := ioutil.WriteFile(filepath.Clean(a.mappingFile), dat, os.ModePerm); err != nil { | ||
a.logger.Error("Failed to write mapping file", zap.Error(err)) | ||
return | ||
} | ||
a.logger.Sugar().Infof("Saved mapping file %s: %s", a.mappingFile, string(dat)) | ||
} | ||
|
||
func (a *Anonymizer) mapServiceName(service string) string { | ||
return a.mapString(service, a.mapping.Services) | ||
} | ||
|
||
func (a *Anonymizer) mapOperationName(service, operation string) string { | ||
v := fmt.Sprintf("[%s]:%s", service, operation) | ||
return a.mapString(v, a.mapping.Operations) | ||
} | ||
|
||
func (a *Anonymizer) mapString(v string, m map[string]string) string { | ||
a.lock.Lock() | ||
defer a.lock.Unlock() | ||
if s, ok := m[v]; ok { | ||
return s | ||
} | ||
s := hash(v) | ||
m[v] = s | ||
return s | ||
} | ||
|
||
func hash(value string) string { | ||
h := fnv.New64() | ||
_, _ = h.Write([]byte(value)) | ||
return fmt.Sprintf("%016x", h.Sum64()) | ||
} | ||
|
||
// AnonymizeSpan obfuscates and converts the span. | ||
func (a *Anonymizer) AnonymizeSpan(span *model.Span) *uimodel.Span { | ||
service := span.Process.ServiceName | ||
span.OperationName = a.mapOperationName(service, span.OperationName) | ||
span.Tags = filterTags(span.Tags) | ||
span.Logs = nil | ||
span.Process.ServiceName = a.mapServiceName(service) | ||
span.Process.Tags = nil | ||
span.Warnings = nil | ||
return uiconv.FromDomainEmbedProcess(span) | ||
} | ||
|
||
func filterTags(tags []model.KeyValue) []model.KeyValue { | ||
out := make([]model.KeyValue, 0, len(tags)) | ||
for _, tag := range tags { | ||
if !allowedTags[tag.Key] { | ||
continue | ||
} | ||
if tag.Key == "error" { | ||
switch tag.VType { | ||
case model.BoolType: | ||
// allowed | ||
case model.StringType: | ||
if tag.VStr != "true" && tag.VStr != "false" { | ||
tag = model.Bool("error", true) | ||
} | ||
default: | ||
tag = model.Bool("error", true) | ||
} | ||
} | ||
out = append(out, tag) | ||
} | ||
return out | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
nobn-critical test utility |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
// Copyright (c) 2020 The Jaeger Authors. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package writer | ||
|
||
import ( | ||
"bytes" | ||
"encoding/json" | ||
"fmt" | ||
"os" | ||
"sync" | ||
|
||
"github.com/gogo/protobuf/jsonpb" | ||
"go.uber.org/zap" | ||
|
||
"github.com/jaegertracing/jaeger/cmd/anonymizer/app/anonymizer" | ||
"github.com/jaegertracing/jaeger/model" | ||
) | ||
|
||
// Config contains parameters to NewWriter. | ||
type Config struct { | ||
MaxSpansCount int `yaml:"max_spans_count" name:"max_spans_count"` | ||
CapturedFile string `yaml:"captured_file" name:"captured_file"` | ||
AnonymizedFile string `yaml:"anonymized_file" name:"anonymized_file"` | ||
MappingFile string `yaml:"mapping_file" name:"mapping_file"` | ||
} | ||
|
||
// Writer is a span Writer that obfuscates the span and writes it to a JSON file. | ||
type Writer struct { | ||
config Config | ||
lock sync.Mutex | ||
logger *zap.Logger | ||
capturedFile *os.File | ||
anonymizedFile *os.File | ||
anonymizer *anonymizer.Anonymizer | ||
spanCount int | ||
} | ||
|
||
// New creates an Writer | ||
func New(config Config, logger *zap.Logger) (*Writer, error) { | ||
wd, err := os.Getwd() | ||
if err != nil { | ||
return nil, err | ||
} | ||
logger.Sugar().Infof("Current working dir is %s", wd) | ||
|
||
cf, err := os.OpenFile(config.CapturedFile, os.O_CREATE|os.O_WRONLY, os.ModePerm) | ||
if err != nil { | ||
return nil, fmt.Errorf("cannot create output file: %w", err) | ||
} | ||
logger.Sugar().Infof("Writing captured spans to file %s", config.CapturedFile) | ||
|
||
af, err := os.OpenFile(config.AnonymizedFile, os.O_CREATE|os.O_WRONLY, os.ModePerm) | ||
if err != nil { | ||
return nil, fmt.Errorf("cannot create output file: %w", err) | ||
} | ||
logger.Sugar().Infof("Writing anonymized spans to file %s", config.AnonymizedFile) | ||
|
||
_, err = cf.WriteString("[") | ||
if err != nil { | ||
return nil, fmt.Errorf("cannot write tp output file: %w", err) | ||
} | ||
_, err = af.WriteString("[") | ||
if err != nil { | ||
return nil, fmt.Errorf("cannot write tp output file: %w", err) | ||
} | ||
return &Writer{ | ||
config: config, | ||
logger: logger, | ||
capturedFile: cf, | ||
anonymizedFile: af, | ||
anonymizer: anonymizer.New(config.MappingFile, logger), | ||
}, nil | ||
} | ||
|
||
// WriteSpan anonymized the span and appends it as JSON to w.file. | ||
func (w *Writer) WriteSpan(msg *model.Span) error { | ||
w.lock.Lock() | ||
defer w.lock.Unlock() | ||
|
||
out := new(bytes.Buffer) | ||
if err := new(jsonpb.Marshaler).Marshal(out, msg); err != nil { | ||
return err | ||
} | ||
if w.spanCount > 0 { | ||
w.capturedFile.WriteString(",\n") | ||
} | ||
w.capturedFile.Write(out.Bytes()) | ||
w.capturedFile.Sync() | ||
|
||
span := w.anonymizer.AnonymizeSpan(msg) | ||
|
||
dat, err := json.Marshal(span) | ||
if err != nil { | ||
return err | ||
} | ||
if w.spanCount > 0 { | ||
w.anonymizedFile.WriteString(",\n") | ||
} | ||
if _, err := w.anonymizedFile.Write(dat); err != nil { | ||
return err | ||
} | ||
w.anonymizedFile.Sync() | ||
|
||
w.spanCount++ | ||
if w.spanCount%100 == 0 { | ||
w.logger.Info("progress", zap.Int("numSpans", w.spanCount)) | ||
} | ||
|
||
if w.spanCount >= w.config.MaxSpansCount { | ||
w.logger.Info("Saved enough spans, exiting...") | ||
w.capturedFile.WriteString("\n]\n") | ||
w.capturedFile.Close() | ||
w.anonymizedFile.WriteString("\n]\n") | ||
w.anonymizedFile.Close() | ||
w.anonymizer.SaveMapping() | ||
os.Exit(0) | ||
} | ||
|
||
return nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
// Copyright (c) 2020 The Jaeger Authors. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package main | ||
|
||
import ( | ||
"go.uber.org/zap" | ||
|
||
"github.com/jaegertracing/jaeger/cmd/anonymizer/app/writer" | ||
) | ||
|
||
func main() { | ||
// TODO | ||
_, _ = writer.New(writer.Config{}, zap.NewNop()) | ||
println("not implemented") | ||
} |