diff --git a/code/go/internal/validator/semantic/validate_unique_fields.go b/code/go/internal/validator/semantic/validate_unique_fields.go new file mode 100644 index 000000000..551aa895e --- /dev/null +++ b/code/go/internal/validator/semantic/validate_unique_fields.go @@ -0,0 +1,59 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package semantic + +import ( + "sort" + "strings" + + ve "github.com/elastic/package-spec/code/go/internal/errors" + "github.com/elastic/package-spec/code/go/internal/fspath" + "github.com/pkg/errors" +) + +// ValidateUniqueFields verifies that any field is defined only once on each data stream. +func ValidateUniqueFields(fsys fspath.FS) ve.ValidationErrors { + // data_stream -> field -> files + fields := make(map[string]map[string][]string) + + countField := func(fieldsFile string, f field) ve.ValidationErrors { + if len(f.Fields) > 0 { + // Don't count groups + return nil + } + + dataStream, err := dataStreamFromFieldsPath(fsys.Path(), fieldsFile) + if err != nil { + return ve.ValidationErrors{err} + } + + dsMap, found := fields[dataStream] + if !found { + dsMap = make(map[string][]string) + fields[dataStream] = dsMap + } + dsMap[f.Name] = append(dsMap[f.Name], fieldsFile) + return nil + } + + err := validateFields(fsys, countField) + if err != nil { + return err + } + + var errs ve.ValidationErrors + for dataStream, dataStreamFields := range fields { + for field, files := range dataStreamFields { + if len(files) > 1 { + sort.Strings(files) + errs = append(errs, + errors.Errorf("field %q is defined multiple times for data stream %q, found in: %s", + field, dataStream, strings.Join(files, ", "))) + } + } + + } + return errs +} diff --git a/code/go/internal/validator/spec.go b/code/go/internal/validator/spec.go index e341b76c6..ce3f6f98b 100644 --- a/code/go/internal/validator/spec.go +++ b/code/go/internal/validator/spec.go @@ -71,6 +71,7 @@ func (s Spec) ValidatePackage(pkg Package) ve.ValidationErrors { semantic.ValidatePrerelease, semantic.ValidateFieldGroups, semantic.ValidateFieldsLimits(rootSpec.Limits.FieldsPerDataStreamLimit), + semantic.ValidateUniqueFields, semantic.ValidateDimensionFields, semantic.ValidateRequiredFields, } diff --git a/code/go/pkg/validator/validator_test.go b/code/go/pkg/validator/validator_test.go index 71028a19b..476736cf3 100644 --- a/code/go/pkg/validator/validator_test.go +++ b/code/go/pkg/validator/validator_test.go @@ -264,6 +264,30 @@ func TestValidateVersionIntegrity(t *testing.T) { } } +func TestValidateDuplicatedFields(t *testing.T) { + tests := map[string]string{ + "bad_duplicated_fields": "field \"event.dataset\" is defined multiple times for data stream \"wrong\", found in: ../../../../test/packages/bad_duplicated_fields/data_stream/wrong/fields/base-fields.yml, ../../../../test/packages/bad_duplicated_fields/data_stream/wrong/fields/ecs.yml", + } + + for pkgName, expectedErrorMessage := range tests { + t.Run(pkgName, func(t *testing.T) { + errs := ValidateFromPath(filepath.Join("..", "..", "..", "..", "test", "packages", pkgName)) + require.Error(t, errs) + vErrs, ok := errs.(errors.ValidationErrors) + require.True(t, ok) + + assert.Len(t, vErrs, 1) + + var errMessages []string + for _, vErr := range vErrs { + errMessages = append(errMessages, vErr.Error()) + } + require.Contains(t, errMessages, expectedErrorMessage) + }) + } + +} + func requireErrorMessage(t *testing.T, pkgName string, invalidItemsPerFolder map[string][]string, expectedErrorMessage string) { pkgRootPath := filepath.Join("..", "..", "..", "..", "test", "packages", pkgName) diff --git a/test/packages/bad_duplicated_fields/changelog.yml b/test/packages/bad_duplicated_fields/changelog.yml new file mode 100644 index 000000000..75dfcddc2 --- /dev/null +++ b/test/packages/bad_duplicated_fields/changelog.yml @@ -0,0 +1,6 @@ +# newer versions go on top +- version: "0.0.1" + changes: + - description: Initial draft of the package + type: enhancement + link: https://github.com/elastic/integrations/pull/309 diff --git a/test/packages/bad_duplicated_fields/data_stream/good/agent/stream/stream.yml.hbs b/test/packages/bad_duplicated_fields/data_stream/good/agent/stream/stream.yml.hbs new file mode 100644 index 000000000..5845510de --- /dev/null +++ b/test/packages/bad_duplicated_fields/data_stream/good/agent/stream/stream.yml.hbs @@ -0,0 +1,7 @@ +paths: +{{#each paths as |path i|}} + - {{path}} +{{/each}} +exclude_files: [".gz$"] +processors: + - add_locale: ~ diff --git a/test/packages/bad_duplicated_fields/data_stream/good/elasticsearch/ingest_pipeline/default.yml b/test/packages/bad_duplicated_fields/data_stream/good/elasticsearch/ingest_pipeline/default.yml new file mode 100644 index 000000000..81221adf3 --- /dev/null +++ b/test/packages/bad_duplicated_fields/data_stream/good/elasticsearch/ingest_pipeline/default.yml @@ -0,0 +1,10 @@ +--- +description: Pipeline for processing sample logs +processors: +- set: + field: sample_field + value: "1" +on_failure: +- set: + field: error.message + value: '{{ _ingest.on_failure_message }}' \ No newline at end of file diff --git a/test/packages/bad_duplicated_fields/data_stream/good/fields/base-fields.yml b/test/packages/bad_duplicated_fields/data_stream/good/fields/base-fields.yml new file mode 100644 index 000000000..7c798f453 --- /dev/null +++ b/test/packages/bad_duplicated_fields/data_stream/good/fields/base-fields.yml @@ -0,0 +1,12 @@ +- name: data_stream.type + type: constant_keyword + description: Data stream type. +- name: data_stream.dataset + type: constant_keyword + description: Data stream dataset. +- name: data_stream.namespace + type: constant_keyword + description: Data stream namespace. +- name: '@timestamp' + type: date + description: Event timestamp. diff --git a/test/packages/bad_duplicated_fields/data_stream/good/manifest.yml b/test/packages/bad_duplicated_fields/data_stream/good/manifest.yml new file mode 100644 index 000000000..a3791222e --- /dev/null +++ b/test/packages/bad_duplicated_fields/data_stream/good/manifest.yml @@ -0,0 +1,13 @@ +title: "Valid data stream." +type: logs +streams: + - input: logfile + title: Sample logs + description: Collect sample logs + vars: + - name: paths + type: text + title: Paths + multi: true + default: + - /var/log/*.log diff --git a/test/packages/bad_duplicated_fields/data_stream/wrong/agent/stream/stream.yml.hbs b/test/packages/bad_duplicated_fields/data_stream/wrong/agent/stream/stream.yml.hbs new file mode 100644 index 000000000..5845510de --- /dev/null +++ b/test/packages/bad_duplicated_fields/data_stream/wrong/agent/stream/stream.yml.hbs @@ -0,0 +1,7 @@ +paths: +{{#each paths as |path i|}} + - {{path}} +{{/each}} +exclude_files: [".gz$"] +processors: + - add_locale: ~ diff --git a/test/packages/bad_duplicated_fields/data_stream/wrong/elasticsearch/ingest_pipeline/default.yml b/test/packages/bad_duplicated_fields/data_stream/wrong/elasticsearch/ingest_pipeline/default.yml new file mode 100644 index 000000000..81221adf3 --- /dev/null +++ b/test/packages/bad_duplicated_fields/data_stream/wrong/elasticsearch/ingest_pipeline/default.yml @@ -0,0 +1,10 @@ +--- +description: Pipeline for processing sample logs +processors: +- set: + field: sample_field + value: "1" +on_failure: +- set: + field: error.message + value: '{{ _ingest.on_failure_message }}' \ No newline at end of file diff --git a/test/packages/bad_duplicated_fields/data_stream/wrong/fields/base-fields.yml b/test/packages/bad_duplicated_fields/data_stream/wrong/fields/base-fields.yml new file mode 100644 index 000000000..e905fc8e7 --- /dev/null +++ b/test/packages/bad_duplicated_fields/data_stream/wrong/fields/base-fields.yml @@ -0,0 +1,14 @@ +- name: data_stream.type + type: constant_keyword + description: Data stream type. +- name: data_stream.dataset + type: constant_keyword + description: Data stream dataset. +- name: data_stream.namespace + type: constant_keyword + description: Data stream namespace. +- name: '@timestamp' + type: date + description: Event timestamp. +- name: event.dataset + type: constant_keyword diff --git a/test/packages/bad_duplicated_fields/data_stream/wrong/fields/ecs.yml b/test/packages/bad_duplicated_fields/data_stream/wrong/fields/ecs.yml new file mode 100644 index 000000000..eb760b3bd --- /dev/null +++ b/test/packages/bad_duplicated_fields/data_stream/wrong/fields/ecs.yml @@ -0,0 +1,2 @@ +- name: event.dataset + external: ecs diff --git a/test/packages/bad_duplicated_fields/data_stream/wrong/manifest.yml b/test/packages/bad_duplicated_fields/data_stream/wrong/manifest.yml new file mode 100644 index 000000000..39da35576 --- /dev/null +++ b/test/packages/bad_duplicated_fields/data_stream/wrong/manifest.yml @@ -0,0 +1,13 @@ +title: "Data stream with duplicated fields." +type: logs +streams: + - input: logfile + title: Sample logs + description: Collect sample logs + vars: + - name: paths + type: text + title: Paths + multi: true + default: + - /var/log/*.log diff --git a/test/packages/bad_duplicated_fields/docs/README.md b/test/packages/bad_duplicated_fields/docs/README.md new file mode 100644 index 000000000..bef806d2f --- /dev/null +++ b/test/packages/bad_duplicated_fields/docs/README.md @@ -0,0 +1,5 @@ +# Package with duplicated fields + +This is a new integration created using the [elastic-package](https://github.com/elastic/elastic-package) tool. + +Consider using the README template file `_dev/build/docs/README.md`to generate a list of exported fields or include a sample event. \ No newline at end of file diff --git a/test/packages/bad_duplicated_fields/img/sample-logo.svg b/test/packages/bad_duplicated_fields/img/sample-logo.svg new file mode 100644 index 000000000..6268dd88f --- /dev/null +++ b/test/packages/bad_duplicated_fields/img/sample-logo.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/test/packages/bad_duplicated_fields/img/sample-screenshot.png b/test/packages/bad_duplicated_fields/img/sample-screenshot.png new file mode 100644 index 000000000..d7a56a3ec Binary files /dev/null and b/test/packages/bad_duplicated_fields/img/sample-screenshot.png differ diff --git a/test/packages/bad_duplicated_fields/manifest.yml b/test/packages/bad_duplicated_fields/manifest.yml new file mode 100644 index 000000000..50e976985 --- /dev/null +++ b/test/packages/bad_duplicated_fields/manifest.yml @@ -0,0 +1,31 @@ +format_version: 1.0.0 +name: bad_duplicated_fields +title: "Package with duplicated fields" +version: 0.0.1 +license: basic +description: "This is an invalid package because contains duplicated fields." +type: integration +categories: + - custom +conditions: + kibana.version: "^8.1.0" +screenshots: + - src: /img/sample-screenshot.png + title: Sample screenshot + size: 600x600 + type: image/png +icons: + - src: /img/sample-logo.svg + title: Sample logo + size: 32x32 + type: image/svg+xml +policy_templates: + - name: sample + title: Sample logs + description: Collect sample logs + inputs: + - type: logfile + title: Collect sample logs from instances + description: Collecting sample logs +owner: + github: elastic/ecosystem diff --git a/versions/1/changelog.yml b/versions/1/changelog.yml index 8e0aa44bb..a88688d50 100644 --- a/versions/1/changelog.yml +++ b/versions/1/changelog.yml @@ -4,9 +4,9 @@ ## - version: 1.7.1-next changes: - - description: Prepare next release + - description: Validate that fields are only defined once per data stream. type: enhancement - link: https://github.com/elastic/package-spec/pull/301 + link: https://github.com/elastic/package-spec/pull/309 - version: 1.7.0 changes: - description: Add kibana/osquery-pack-asset