diff --git a/.chloggen/datadogreceiver-span-obfuscate.yaml b/.chloggen/datadogreceiver-span-obfuscate.yaml new file mode 100644 index 0000000000000..7348b5732eb20 --- /dev/null +++ b/.chloggen/datadogreceiver-span-obfuscate.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: datadogreceiver + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Implement span obfuscation for Datadog receiver. + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [35593] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [user] diff --git a/receiver/datadogreceiver/README.md b/receiver/datadogreceiver/README.md index 36d504fb38256..2c834efdd61fe 100644 --- a/receiver/datadogreceiver/README.md +++ b/receiver/datadogreceiver/README.md @@ -29,6 +29,9 @@ receivers: datadog: endpoint: localhost:8126 read_timeout: 60s + traces: + obfuscation: + enabled: true exporters: debug: @@ -48,6 +51,10 @@ The read timeout of the HTTP Server Default: 60s +### Traces (Optional) +#### Obfuscation +The obfuscation configuration allows you to obfuscate and cleanup sensitive information in the trace data. The complete list of obfuscation rules and configuration can be found [here](config.go). + ### HTTP Service Config All config params here are valid as well diff --git a/receiver/datadogreceiver/config.go b/receiver/datadogreceiver/config.go index 646309c997630..4ad60a968e6f6 100644 --- a/receiver/datadogreceiver/config.go +++ b/receiver/datadogreceiver/config.go @@ -7,11 +7,182 @@ package datadogreceiver // import "github.com/open-telemetry/opentelemetry-colle import ( "time" + "github.com/DataDog/datadog-agent/pkg/obfuscate" "go.opentelemetry.io/collector/config/confighttp" ) +func NewConfig() *Config { + return &Config{ + ServerConfig: confighttp.ServerConfig{ + Endpoint: "localhost:8126", + }, + ReadTimeout: 60 * time.Second, + Traces: TracesConfig{ + Obfuscation: ObfuscationConfig{}, + }, + } +} + type Config struct { confighttp.ServerConfig `mapstructure:",squash"` // ReadTimeout of the http server ReadTimeout time.Duration `mapstructure:"read_timeout"` + + // Traces holds tracing-related configurations + Traces TracesConfig `mapstructure:"traces"` +} + +// TracesConfig holds the configuration for the Datadog receiver's trace processor. +type TracesConfig struct { + // Obfuscation holds sensitive data obufscator's configuration. + Obfuscation ObfuscationConfig `mapstructure:"obfuscation"` +} + +// ObfuscationConfig holds the configuration for obfuscating sensitive data +// for various span types. +type ObfuscationConfig struct { + Enabled bool `mapstructure:"enabled"` + + // SQL holds the obfuscation configuration for SQL queries. + SQL SQLConfig `mapstructure:"sql"` + + // ES holds the obfuscation configuration for ElasticSearch bodies. + ES obfuscate.JSONConfig `mapstructure:"elasticsearch"` + + // OpenSearch holds the obfuscation configuration for OpenSearch bodies. + OpenSearch obfuscate.JSONConfig `mapstructure:"opensearch"` + + // Mongo holds the obfuscation configuration for MongoDB queries. + Mongo obfuscate.JSONConfig `mapstructure:"mongodb"` + + // SQLExecPlan holds the obfuscation configuration for SQL Exec Plans. This is strictly for safety related obfuscation, + // not normalization. Normalization of exec plans is configured in SQLExecPlanNormalize. + SQLExecPlan obfuscate.JSONConfig `mapstructure:"sql_exec_plan"` + + // SQLExecPlanNormalize holds the normalization configuration for SQL Exec Plans. + SQLExecPlanNormalize obfuscate.JSONConfig `mapstructure:"sql_exec_plan_normalize"` + + // HTTP holds the obfuscation settings for HTTP URLs. + HTTP obfuscate.HTTPConfig `mapstructure:"http"` + + // RemoveStackTraces specifies whether stack traces should be removed. + // More specifically "error.stack" tag values will be cleared. + RemoveStackTraces bool `mapstructure:"remove_stack_traces"` + + // Redis holds the configuration for obfuscating the "redis.raw_command" tag + // for spans of type "redis". + Redis obfuscate.RedisConfig `mapstructure:"redis"` + + // Memcached holds the configuration for obfuscating the "memcached.command" tag + // for spans of type "memcached". + Memcached obfuscate.MemcachedConfig `mapstructure:"memcached"` + + // CreditCards holds the configuration for obfuscating credit cards. + CreditCards obfuscate.CreditCardsConfig `mapstructure:"credit_cards"` +} + +// SQLConfig holds the config for obfuscating SQL. +type SQLConfig struct { + // DBMS identifies the type of database management system (e.g. MySQL, Postgres, and SQL Server). + // Valid values for this can be found at https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/semantic_conventions/database.md#connection-level-attributes + DBMS string `mapstructure:"dbms"` + + // TableNames specifies whether the obfuscator should also extract the table names that a query addresses, + // in addition to obfuscating. + TableNames bool `mapstructure:"table_names"` + + // CollectCommands specifies whether the obfuscator should extract and return commands as SQL metadata when obfuscating. + CollectCommands bool `mapstructure:"collect_commands"` + + // CollectComments specifies whether the obfuscator should extract and return comments as SQL metadata when obfuscating. + CollectComments bool `mapstructure:"collect_comments"` + + // CollectProcedures specifies whether the obfuscator should extract and return procedure names as SQL metadata when obfuscating. + CollectProcedures bool `mapstructure:"collect_procedures"` + + // ReplaceDigits specifies whether digits in table names and identifiers should be obfuscated. + ReplaceDigits bool `mapstructure:"replace_digits"` + + // KeepSQLAlias reports whether SQL aliases ("AS") should be truncated. + KeepSQLAlias bool `mapstructure:"keep_sql_alias"` + + // DollarQuotedFunc reports whether to treat "$func$" delimited dollar-quoted strings + // differently and not obfuscate them as a string. To read more about dollar quoted + // strings see: + // + // https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-DOLLAR-QUOTING + DollarQuotedFunc bool `mapstructure:"dollar_quoted_func"` + + // ObfuscationMode specifies the obfuscation mode to use for go-sqllexer pkg. + // When specified, obfuscator will attempt to use go-sqllexer pkg to obfuscate (and normalize) SQL queries. + // Valid values are "normalize_only", "obfuscate_only", "obfuscate_and_normalize" + ObfuscationMode obfuscate.ObfuscationMode `mapstructure:"obfuscation_mode"` + + // RemoveSpaceBetweenParentheses specifies whether to remove spaces between parentheses. + // By default, spaces are inserted between parentheses during normalization. + // This option is only valid when ObfuscationMode is "normalize_only" or "obfuscate_and_normalize". + RemoveSpaceBetweenParentheses bool `mapstructure:"remove_space_between_parentheses"` + + // KeepNull specifies whether to disable obfuscate NULL value with ?. + // This option is only valid when ObfuscationMode is "obfuscate_only" or "obfuscate_and_normalize". + KeepNull bool `mapstructure:"keep_null"` + + // KeepBoolean specifies whether to disable obfuscate boolean value with ?. + // This option is only valid when ObfuscationMode is "obfuscate_only" or "obfuscate_and_normalize". + KeepBoolean bool `mapstructure:"keep_boolean"` + + // KeepPositionalParameter specifies whether to disable obfuscate positional parameter with ?. + // This option is only valid when ObfuscationMode is "obfuscate_only" or "obfuscate_and_normalize". + KeepPositionalParameter bool `mapstructure:"keep_positional_parameter"` + + // KeepTrailingSemicolon specifies whether to keep trailing semicolon. + // By default, trailing semicolon is removed during normalization. + // This option is only valid when ObfuscationMode is "normalize_only" or "obfuscate_and_normalize". + KeepTrailingSemicolon bool `mapstructure:"keep_trailing_semicolon"` + + // KeepIdentifierQuotation specifies whether to keep identifier quotation, e.g. "my_table" or [my_table]. + // By default, identifier quotation is removed during normalization. + // This option is only valid when ObfuscationMode is "normalize_only" or "obfuscate_and_normalize". + KeepIdentifierQuotation bool `mapstructure:"keep_identifier_quotation"` +} + +// Export returns an obfuscate.Config matching o. +func (o *ObfuscationConfig) Export() obfuscate.Config { + if !o.Enabled { + return obfuscate.Config{} + } + + return obfuscate.Config{ + SQL: o.SQL.Export(), + ES: o.ES, + OpenSearch: o.OpenSearch, + Mongo: o.Mongo, + SQLExecPlan: o.SQLExecPlan, + SQLExecPlanNormalize: o.SQLExecPlanNormalize, + HTTP: o.HTTP, + Redis: o.Redis, + Memcached: o.Memcached, + CreditCard: o.CreditCards, + } +} + +// Export returns an obfuscate.Config matching o. +func (o *SQLConfig) Export() obfuscate.SQLConfig { + return obfuscate.SQLConfig{ + DBMS: o.DBMS, + TableNames: o.TableNames, + CollectCommands: o.CollectCommands, + CollectComments: o.CollectComments, + CollectProcedures: o.CollectProcedures, + ReplaceDigits: o.ReplaceDigits, + KeepSQLAlias: o.KeepSQLAlias, + DollarQuotedFunc: o.DollarQuotedFunc, + ObfuscationMode: o.ObfuscationMode, + KeepNull: o.KeepNull, + KeepBoolean: o.KeepBoolean, + KeepPositionalParameter: o.KeepPositionalParameter, + KeepTrailingSemicolon: o.KeepTrailingSemicolon, + KeepIdentifierQuotation: o.KeepIdentifierQuotation, + RemoveSpaceBetweenParentheses: o.RemoveSpaceBetweenParentheses, + } } diff --git a/receiver/datadogreceiver/factory.go b/receiver/datadogreceiver/factory.go index 6403be658d3b7..d78a46abfa54a 100644 --- a/receiver/datadogreceiver/factory.go +++ b/receiver/datadogreceiver/factory.go @@ -5,10 +5,8 @@ package datadogreceiver // import "github.com/open-telemetry/opentelemetry-colle import ( "context" - "time" "go.opentelemetry.io/collector/component" - "go.opentelemetry.io/collector/config/confighttp" "go.opentelemetry.io/collector/consumer" "go.opentelemetry.io/collector/receiver" @@ -27,12 +25,7 @@ func NewFactory() receiver.Factory { } func createDefaultConfig() component.Config { - return &Config{ - ServerConfig: confighttp.ServerConfig{ - Endpoint: "localhost:8126", - }, - ReadTimeout: 60 * time.Second, - } + return NewConfig() } func createTracesReceiver(_ context.Context, params receiver.Settings, cfg component.Config, consumer consumer.Traces) (receiver.Traces, error) { diff --git a/receiver/datadogreceiver/internal/translator/traces_translator.go b/receiver/datadogreceiver/internal/translator/traces_translator.go index f07c3aaf7d100..2ef582f69ecb9 100644 --- a/receiver/datadogreceiver/internal/translator/traces_translator.go +++ b/receiver/datadogreceiver/internal/translator/traces_translator.go @@ -16,12 +16,15 @@ import ( "strings" "sync" + "github.com/DataDog/datadog-agent/pkg/obfuscate" pb "github.com/DataDog/datadog-agent/pkg/proto/pbgo/trace" + "github.com/DataDog/datadog-agent/pkg/trace/traceutil" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/ptrace" semconv "go.opentelemetry.io/collector/semconv/v1.16.0" "google.golang.org/protobuf/proto" + "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/datadogreceiver" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/translator/zipkin/zipkinv2" "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/datadogreceiver/internal/translator/header" ) @@ -40,8 +43,29 @@ const ( // Requirement Level: Optional // Examples: '228114450199004348' attributeDatadogSpanID = "datadog.span.id" + + tagRedisRawCommand = "redis.raw_command" + tagMemcachedCommand = "memcached.command" + tagMongoDBQuery = "mongodb.query" + tagElasticBody = "elasticsearch.body" + tagOpenSearchBody = "opensearch.body" + tagSQLQuery = "sql.query" + tagHTTPURL = "http.url" ) +type TracesTranslator struct { + obfuscator *obfuscate.Obfuscator + + conf *datadogreceiver.TracesConfig +} + +func NewTracesTranslator(config *datadogreceiver.Config) *TracesTranslator { + return &TracesTranslator{ + obfuscator: obfuscate.NewObfuscator(config.Traces.Obfuscation.Export()), + conf: &config.Traces, + } +} + func upsertHeadersAttributes(req *http.Request, attrs pcommon.Map) { if ddTracerVersion := req.Header.Get(header.TracerVersion); ddTracerVersion != "" { attrs.PutStr(semconv.AttributeTelemetrySDKVersion, "Datadog-"+ddTracerVersion) @@ -55,7 +79,7 @@ func upsertHeadersAttributes(req *http.Request, attrs pcommon.Map) { } } -func ToTraces(payload *pb.TracerPayload, req *http.Request) ptrace.Traces { +func (tt *TracesTranslator) ToTraces(payload *pb.TracerPayload, req *http.Request) ptrace.Traces { var traces pb.Traces for _, p := range payload.GetChunks() { traces = append(traces, p.GetSpans()) @@ -92,6 +116,8 @@ func ToTraces(payload *pb.TracerPayload, req *http.Request) ptrace.Traces { for _, trace := range traces { for _, span := range trace { + tt.obfuscateSpan(span) + slice, exist := groupByService[span.Service] if !exist { slice = ptrace.NewSpanSlice() @@ -370,3 +396,86 @@ func uInt64ToSpanID(id uint64) pcommon.SpanID { binary.BigEndian.PutUint64(spanID[:], id) return spanID } + +func (tt *TracesTranslator) obfuscateSpan(span *pb.Span) { + if !tt.conf.Obfuscation.Enabled { + return + } + + o := tt.obfuscator + if tt.conf.Obfuscation.CreditCards.Enabled { + for k, v := range span.Meta { + newV := o.ObfuscateCreditCardNumber(k, v) + if v != newV { + span.Meta[k] = newV + } + } + } + + switch span.Type { + case "sql", "cassandra": + if span.Resource == "" { + return + } + oq, err := o.ObfuscateSQLString(span.Resource) + if err != nil { + // we have an error, discard the SQL to avoid polluting user resources. + span.Resource = textNonParsable + traceutil.SetMeta(span, tagSQLQuery, textNonParsable) + return + } + + span.Resource = oq.Query + if len(oq.Metadata.TablesCSV) > 0 { + traceutil.SetMeta(span, "sql.tables", oq.Metadata.TablesCSV) + } + traceutil.SetMeta(span, tagSQLQuery, oq.Query) + case "redis": + span.Resource = o.QuantizeRedisString(span.Resource) + if tt.conf.Obfuscation.Redis.Enabled { + if span.Meta == nil || span.Meta[tagRedisRawCommand] == "" { + return + } + if tt.conf.Obfuscation.Redis.RemoveAllArgs { + span.Meta[tagRedisRawCommand] = o.RemoveAllRedisArgs(span.Meta[tagRedisRawCommand]) + return + } + span.Meta[tagRedisRawCommand] = o.ObfuscateRedisString(span.Meta[tagRedisRawCommand]) + } + case "memcached": + if !tt.conf.Obfuscation.Memcached.Enabled { + return + } + if span.Meta == nil || span.Meta[tagMemcachedCommand] == "" { + return + } + span.Meta[tagMemcachedCommand] = o.ObfuscateMemcachedString(span.Meta[tagMemcachedCommand]) + case "web", "http": + if span.Meta == nil || span.Meta[tagHTTPURL] == "" { + return + } + span.Meta[tagHTTPURL] = o.ObfuscateURLString(span.Meta[tagHTTPURL]) + case "mongodb": + if !tt.conf.Obfuscation.Mongo.Enabled { + return + } + if span.Meta == nil || span.Meta[tagMongoDBQuery] == "" { + return + } + span.Meta[tagMongoDBQuery] = o.ObfuscateMongoDBString(span.Meta[tagMongoDBQuery]) + case "elasticsearch", "opensearch": + if span.Meta == nil { + return + } + if tt.conf.Obfuscation.ES.Enabled { + if span.Meta[tagElasticBody] != "" { + span.Meta[tagElasticBody] = o.ObfuscateElasticSearchString(span.Meta[tagElasticBody]) + } + } + if tt.conf.Obfuscation.OpenSearch.Enabled { + if span.Meta[tagOpenSearchBody] != "" { + span.Meta[tagOpenSearchBody] = o.ObfuscateOpenSearchString(span.Meta[tagOpenSearchBody]) + } + } + } +} diff --git a/receiver/datadogreceiver/internal/translator/traces_translator_test.go b/receiver/datadogreceiver/internal/translator/traces_translator_test.go index 595242f25bdc0..1d143a6ba6949 100644 --- a/receiver/datadogreceiver/internal/translator/traces_translator_test.go +++ b/receiver/datadogreceiver/internal/translator/traces_translator_test.go @@ -7,6 +7,7 @@ import ( "bytes" "fmt" "io" + "log" "net/http" "strconv" "testing" @@ -19,6 +20,7 @@ import ( semconv "go.opentelemetry.io/collector/semconv/v1.16.0" "google.golang.org/protobuf/proto" + "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/datadogreceiver" "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/datadogreceiver/internal/translator/header" ) @@ -90,7 +92,9 @@ func TestTracePayloadV05Unmarshalling(t *testing.T) { tracePayloads, _ := HandleTracesPayload(req) assert.Len(t, tracePayloads, 1, "Expected one translated payload") tracePayload := tracePayloads[0] - translated := ToTraces(tracePayload, req) + + tt := NewTracesTranslator(datadogreceiver.NewConfig()) + translated := tt.ToTraces(tracePayload, req) assert.Equal(t, 1, translated.SpanCount(), "Span Count wrong") span := translated.ResourceSpans().At(0).ScopeSpans().At(0).Spans().At(0) assert.NotNil(t, span) @@ -110,6 +114,50 @@ func TestTracePayloadV05Unmarshalling(t *testing.T) { assert.Equal(t, 1.2, numericAttributeFloat) } +func TestTracePayloadV05UnmarshallingObfuscateSQL(t *testing.T) { + stringSlice, ok := data[0].([]string) + if !ok { + log.Fatal("Type assertion failed for data[0]") + } + + // Resource + stringSlice[7] = "/* query.digest=46f6d7bdae8dd8e8c907aed02b0e6525 tx=vjxn46eyxp */ \nselect /*+ MAX_EXECUTION_TIME(?) */ table.id as id1_6_0_, table.created_at as created_2_6_0_, table.updated_at as updated_3_6_0_, table.description as descript4_6_0_, table.is_internal as is_inter5_6_0_, table.name as name6_6_0_ \nfrom \ntable \nwhere table.id in (?, ?, ?, ?)" + + payload, err := vmsgp.Marshal(&data) + assert.NoError(t, err) + + var traces pb.Traces + require.NoError(t, traces.UnmarshalMsgDictionary(payload), "Must not error when marshaling content") + req, _ := http.NewRequest(http.MethodPost, "/v0.5/traces", io.NopCloser(bytes.NewReader(payload))) + + tracePayloads, _ := HandleTracesPayload(req) + assert.Len(t, tracePayloads, 1, "Expected one translated payload") + tracePayload := tracePayloads[0] + + cfg := datadogreceiver.NewConfig() + cfg.Traces.Obfuscation.Enabled = true + + tt := NewTracesTranslator(cfg) + translated := tt.ToTraces(tracePayload, req) + assert.Equal(t, 1, translated.SpanCount(), "Span Count wrong") + span := translated.ResourceSpans().At(0).ScopeSpans().At(0).Spans().At(0) + assert.NotNil(t, span) + assert.Equal(t, "my-name", span.Name()) + assert.Equal(t, 11, span.Attributes().Len(), "missing attributes") + value, exists := span.Attributes().Get("service.name") + assert.True(t, exists, "service.name missing") + assert.Equal(t, "my-service", value.AsString(), "service.name attribute value incorrect") + serviceVersionValue, _ := span.Attributes().Get("service.version") + assert.Equal(t, "1.0.1", serviceVersionValue.AsString()) + spanResource, _ := span.Attributes().Get("dd.span.Resource") + assert.Equal(t, "select table.id, table.created_at, table.updated_at, table.description, table.is_internal, table.name from table where table.id in ( ? )", spanResource.Str()) + spanResource1, _ := span.Attributes().Get("sampling.priority") + assert.Equal(t, fmt.Sprintf("%f", 1.0), spanResource1.Str()) + numericAttributeValue, _ := span.Attributes().Get("numeric_attribute") + numericAttributeFloat, _ := strconv.ParseFloat(numericAttributeValue.AsString(), 64) + assert.Equal(t, 1.2, numericAttributeFloat) +} + func TestTracePayloadV07Unmarshalling(t *testing.T) { traces := getTraces(t) apiPayload := pb.TracerPayload{ diff --git a/receiver/datadogreceiver/receiver.go b/receiver/datadogreceiver/receiver.go index 17b2814ce007e..dd598589ab40e 100644 --- a/receiver/datadogreceiver/receiver.go +++ b/receiver/datadogreceiver/receiver.go @@ -37,6 +37,7 @@ type datadogReceiver struct { metricsTranslator *translator.MetricsTranslator statsTranslator *translator.StatsTranslator + tracesTranslator *translator.TracesTranslator server *http.Server tReceiver *receiverhelper.ObsReport @@ -149,6 +150,7 @@ func newDataDogReceiver(config *Config, params receiver.Settings) (component.Com tReceiver: instance, metricsTranslator: translator.NewMetricsTranslator(params.BuildInfo), statsTranslator: translator.NewStatsTranslator(), + tracesTranslator: translator.NewTracesTranslator(config), }, nil } @@ -235,7 +237,7 @@ func (ddr *datadogReceiver) handleTraces(w http.ResponseWriter, req *http.Reques return } for _, ddTrace := range ddTraces { - otelTraces := translator.ToTraces(ddTrace, req) + otelTraces := ddr.tracesTranslator.ToTraces(ddTrace, req) spanCount = otelTraces.SpanCount() err = ddr.nextTracesConsumer.ConsumeTraces(obsCtx, otelTraces) if err != nil {