-
Notifications
You must be signed in to change notification settings - Fork 5k
Fingerprint processor #14205
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fingerprint processor #14205
Changes from all commits
cc31da1
ecc4772
0d2af8c
00a3575
a6afbc9
7521e11
d8b5958
a6b6156
85ef943
8cda4be
3c75d3b
da29e8d
52e5110
92af70c
b1981e8
3a2825c
9a0be57
b2ecab6
217e318
6479e84
661f891
ce27088
2d17110
ba390ad
de73d0d
a6be2ab
c65da9a
5f577f4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,36 @@ | ||
| // Licensed to Elasticsearch B.V. under one or more contributor | ||
| // license agreements. See the NOTICE file distributed with | ||
| // this work for additional information regarding copyright | ||
| // ownership. Elasticsearch B.V. licenses this file to you under | ||
| // the Apache License, Version 2.0 (the "License"); you may | ||
| // not use this file except in compliance with the License. | ||
| // You may obtain a copy of the License at | ||
| // | ||
| // http://www.apache.org/licenses/LICENSE-2.0 | ||
| // | ||
| // Unless required by applicable law or agreed to in writing, | ||
| // software distributed under the License is distributed on an | ||
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| // KIND, either express or implied. See the License for the | ||
| // specific language governing permissions and limitations | ||
| // under the License. | ||
|
|
||
| package fingerprint | ||
|
|
||
| // Config for fingerprint processor. | ||
| type Config struct { | ||
| Method hashMethod `config:"method"` // Hash function to use for fingerprinting | ||
| Fields []string `config:"fields" validate:"required"` // Source fields to compute fingerprint from | ||
| TargetField string `config:"target_field"` // Target field for the fingerprint | ||
| Encoding encodingMethod `config:"encoding"` // Encoding to use for target field value | ||
| IgnoreMissing bool `config:"ignore_missing"` // Ignore missing fields? | ||
| } | ||
|
|
||
| func defaultConfig() Config { | ||
| return Config{ | ||
| Method: hashes["sha256"], | ||
| TargetField: "fingerprint", | ||
| Encoding: encodings["hex"], | ||
| IgnoreMissing: false, | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,46 @@ | ||
| // Licensed to Elasticsearch B.V. under one or more contributor | ||
| // license agreements. See the NOTICE file distributed with | ||
| // this work for additional information regarding copyright | ||
| // ownership. Elasticsearch B.V. licenses this file to you under | ||
| // the Apache License, Version 2.0 (the "License"); you may | ||
| // not use this file except in compliance with the License. | ||
| // You may obtain a copy of the License at | ||
| // | ||
| // http://www.apache.org/licenses/LICENSE-2.0 | ||
| // | ||
| // Unless required by applicable law or agreed to in writing, | ||
| // software distributed under the License is distributed on an | ||
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| // KIND, either express or implied. See the License for the | ||
| // specific language governing permissions and limitations | ||
| // under the License. | ||
|
|
||
| package fingerprint | ||
|
|
||
| import ( | ||
| "encoding/base32" | ||
| "encoding/base64" | ||
| "encoding/hex" | ||
| "strings" | ||
| ) | ||
|
|
||
| type encodingMethod func([]byte) string | ||
|
|
||
| var encodings = map[string]encodingMethod{ | ||
| "hex": hex.EncodeToString, | ||
| "base32": base32.StdEncoding.EncodeToString, | ||
| "base64": base64.StdEncoding.EncodeToString, | ||
| } | ||
|
|
||
| // Unpack creates the encodingMethod from the given string | ||
| func (e *encodingMethod) Unpack(str string) error { | ||
| str = strings.ToLower(str) | ||
|
|
||
| m, found := encodings[str] | ||
| if !found { | ||
| return makeErrUnknownEncoding(str) | ||
| } | ||
|
|
||
| *e = m | ||
| return nil | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,79 @@ | ||
| // Licensed to Elasticsearch B.V. under one or more contributor | ||
| // license agreements. See the NOTICE file distributed with | ||
| // this work for additional information regarding copyright | ||
| // ownership. Elasticsearch B.V. licenses this file to you under | ||
| // the Apache License, Version 2.0 (the "License"); you may | ||
| // not use this file except in compliance with the License. | ||
| // You may obtain a copy of the License at | ||
| // | ||
| // http://www.apache.org/licenses/LICENSE-2.0 | ||
| // | ||
| // Unless required by applicable law or agreed to in writing, | ||
| // software distributed under the License is distributed on an | ||
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| // KIND, either express or implied. See the License for the | ||
| // specific language governing permissions and limitations | ||
| // under the License. | ||
|
|
||
| package fingerprint | ||
|
|
||
| import ( | ||
| "errors" | ||
| "fmt" | ||
| ) | ||
|
|
||
| var errNoFields = errors.New("must specify at least one field") | ||
|
|
||
| type ( | ||
| errUnknownEncoding struct{ encoding string } | ||
| errUnknownMethod struct{ method string } | ||
| errConfigUnpack struct{ cause error } | ||
| errComputeFingerprint struct{ cause error } | ||
| errMissingField struct { | ||
| field string | ||
| cause error | ||
| } | ||
| errNonScalarField struct{ field string } | ||
| ) | ||
|
|
||
| func makeErrUnknownEncoding(encoding string) errUnknownEncoding { | ||
| return errUnknownEncoding{encoding} | ||
| } | ||
| func (e errUnknownEncoding) Error() string { | ||
| return fmt.Sprintf("invalid encoding [%s]", e.encoding) | ||
| } | ||
|
|
||
| func makeErrUnknownMethod(method string) errUnknownMethod { | ||
| return errUnknownMethod{method} | ||
| } | ||
| func (e errUnknownMethod) Error() string { | ||
| return fmt.Sprintf("invalid fingerprinting method [%s]", e.method) | ||
| } | ||
|
|
||
| func makeErrConfigUnpack(cause error) errConfigUnpack { | ||
| return errConfigUnpack{cause} | ||
| } | ||
| func (e errConfigUnpack) Error() string { | ||
| return fmt.Sprintf("failed to unpack %v processor configuration: %v", processorName, e.cause) | ||
| } | ||
|
|
||
| func makeErrComputeFingerprint(cause error) errComputeFingerprint { | ||
| return errComputeFingerprint{cause} | ||
| } | ||
| func (e errComputeFingerprint) Error() string { | ||
| return fmt.Sprintf("failed to compute fingerprint: %v", e.cause) | ||
| } | ||
|
|
||
| func makeErrMissingField(field string, cause error) errMissingField { | ||
| return errMissingField{field, cause} | ||
| } | ||
| func (e errMissingField) Error() string { | ||
| return fmt.Sprintf("failed to find field [%v] in event: %v", e.field, e.cause) | ||
| } | ||
|
|
||
| func makeErrNonScalarField(field string) errNonScalarField { | ||
| return errNonScalarField{field} | ||
| } | ||
| func (e errNonScalarField) Error() string { | ||
| return fmt.Sprintf("cannot compute fingerprint using non-scalar field [%v]", e.field) | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,111 @@ | ||
| // Licensed to Elasticsearch B.V. under one or more contributor | ||
| // license agreements. See the NOTICE file distributed with | ||
| // this work for additional information regarding copyright | ||
| // ownership. Elasticsearch B.V. licenses this file to you under | ||
| // the Apache License, Version 2.0 (the "License"); you may | ||
| // not use this file except in compliance with the License. | ||
| // You may obtain a copy of the License at | ||
| // | ||
| // http://www.apache.org/licenses/LICENSE-2.0 | ||
| // | ||
| // Unless required by applicable law or agreed to in writing, | ||
| // software distributed under the License is distributed on an | ||
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| // KIND, either express or implied. See the License for the | ||
| // specific language governing permissions and limitations | ||
| // under the License. | ||
|
|
||
| package fingerprint | ||
|
|
||
| import ( | ||
| "fmt" | ||
| "hash" | ||
| "io" | ||
| "time" | ||
|
|
||
| "github.com/elastic/beats/libbeat/beat" | ||
| "github.com/elastic/beats/libbeat/common" | ||
| "github.com/elastic/beats/libbeat/processors" | ||
| jsprocessor "github.com/elastic/beats/libbeat/processors/script/javascript/module/processor" | ||
| ) | ||
|
|
||
| func init() { | ||
| processors.RegisterPlugin("fingerprint", New) | ||
| jsprocessor.RegisterPlugin("Fingerprint", New) | ||
| } | ||
|
|
||
| const processorName = "fingerprint" | ||
|
|
||
| type fingerprint struct { | ||
| config Config | ||
| fields []string | ||
| hash hash.Hash | ||
| } | ||
|
|
||
| // New constructs a new fingerprint processor. | ||
| func New(cfg *common.Config) (processors.Processor, error) { | ||
| config := defaultConfig() | ||
| if err := cfg.Unpack(&config); err != nil { | ||
| return nil, makeErrConfigUnpack(err) | ||
| } | ||
|
|
||
| fields := common.MakeStringSet(config.Fields...) | ||
|
|
||
| p := &fingerprint{ | ||
| config: config, | ||
| hash: config.Method(), | ||
| fields: fields.ToSlice(), | ||
| } | ||
|
|
||
| return p, nil | ||
| } | ||
|
|
||
| // Run enriches the given event with fingerprint information | ||
| func (p *fingerprint) Run(event *beat.Event) (*beat.Event, error) { | ||
| hashFn := p.hash | ||
| hashFn.Reset() | ||
|
|
||
| err := p.writeFields(hashFn, event.Fields) | ||
| if err != nil { | ||
| return nil, makeErrComputeFingerprint(err) | ||
| } | ||
|
|
||
| hash := hashFn.Sum(nil) | ||
| encodedHash := p.config.Encoding(hash) | ||
|
|
||
| if _, err = event.PutValue(p.config.TargetField, encodedHash); err != nil { | ||
| return nil, makeErrComputeFingerprint(err) | ||
| } | ||
|
|
||
| return event, nil | ||
| } | ||
|
|
||
| func (p *fingerprint) String() string { | ||
| return fmt.Sprintf("%v=[method=[%v]]", processorName, p.config.Method) | ||
| } | ||
|
|
||
| func (p *fingerprint) writeFields(to io.Writer, eventFields common.MapStr) error { | ||
| for _, k := range p.fields { | ||
| v, err := eventFields.GetValue(k) | ||
| if err != nil { | ||
| if p.config.IgnoreMissing { | ||
| continue | ||
| } | ||
| return makeErrMissingField(k, err) | ||
| } | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we want to have an option to ignore missing fields in case we have at least one field present?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry, but is this the same as your suggestion in #14205 (comment) or something different? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. almost. My suggestion originally was to add support to ignore missing fields. But apparently we can have other error types as well. Would it make sense to treat those other types as 'missing' as well?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see. So if we can't "get" a field for whatever reason, we treat it as missing and then, if the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The only other error that can be returned here is if we try to get a value for a nested field (e.g. |
||
|
|
||
| i := v | ||
| switch vv := v.(type) { | ||
| case map[string]interface{}, []interface{}, common.MapStr: | ||
| return makeErrNonScalarField(k) | ||
| case time.Time: | ||
| // Ensure we consistently hash times in UTC. | ||
| i = vv.UTC() | ||
| } | ||
|
|
||
| fmt.Fprintf(to, "|%v|%v", k, i) | ||
| } | ||
|
|
||
| io.WriteString(to, "|") | ||
| return nil | ||
| } | ||
|
urso marked this conversation as resolved.
Outdated
|
||
Uh oh!
There was an error while loading. Please reload this page.