diff --git a/docs/howto/ingest_geoip.md b/docs/howto/ingest_geoip.md index 51c956fc32..3dd9cd4afb 100644 --- a/docs/howto/ingest_geoip.md +++ b/docs/howto/ingest_geoip.md @@ -2,26 +2,7 @@ Elasticsearch provides default GeoIP databases that can be downloaded in runtime and which weights ~70 MB. This can be a root cause of flakiness of package tests, so elastic-package embeds small samples of GeoIP databases, that can identify -accurately only few ranges of IP addresses: - -``` -1.128.3.4 -175.16.199.1 -216.160.83.57 -216.160.83.61 -67.43.156.12 -81.2.69.143 -81.2.69.144 -81.2.69.145 -81.2.69.193 -89.160.20.112 -89.160.20.156 -67.43.156.12 -67.43.156.13 -67.43.156.14 -67.43.156.15 -2a02:cf40:add:4002:91f2:a9b2:e09a:6fc6 -``` +accurately only few ranges of IP addresses included [here](../../internal/fields/_static/allowed_geo_ips.txt) If you want the ingest pipeline to include a "geo" section in the event, feel free to use one of above IP addresses. Embedded databases contain information about: cities, countries and ASNs. \ No newline at end of file diff --git a/internal/fields/_static/allowed_geo_ips.txt b/internal/fields/_static/allowed_geo_ips.txt new file mode 100644 index 0000000000..500b8b6bb9 --- /dev/null +++ b/internal/fields/_static/allowed_geo_ips.txt @@ -0,0 +1,15 @@ +1.128.3.4 +175.16.199.1 +216.160.83.57 +216.160.83.61 +81.2.69.143 +81.2.69.144 +81.2.69.145 +81.2.69.193 +89.160.20.112 +89.160.20.156 +67.43.156.12 +67.43.156.13 +67.43.156.14 +67.43.156.15 +2a02:cf40:add:4002:91f2:a9b2:e09a:6fc6 \ No newline at end of file diff --git a/internal/fields/validate.go b/internal/fields/validate.go index 6ceee110cb..77cd03e77d 100644 --- a/internal/fields/validate.go +++ b/internal/fields/validate.go @@ -5,8 +5,10 @@ package fields import ( + _ "embed" "encoding/json" "fmt" + "net" "os" "path/filepath" "regexp" @@ -31,6 +33,9 @@ type Validator struct { numericKeywordFields map[string]struct{} disabledDependencyManagement bool + + enabledAllowedIPCheck bool + allowedIPs map[string]struct{} } // ValidatorOption represents an optional flag that can be passed to CreateValidatorForDataStream. @@ -64,6 +69,14 @@ func WithDisabledDependencyManagement() ValidatorOption { } } +// WithEnabledAllowedIPCheck configures the validator to perform check on the IP values against an allowed list. +func WithEnabledAllowedIPCheck() ValidatorOption { + return func(v *Validator) error { + v.enabledAllowedIPCheck = true + return nil + } +} + // CreateValidatorForDataStream function creates a validator for the data stream. func CreateValidatorForDataStream(dataStreamRootPath string, opts ...ValidatorOption) (v *Validator, err error) { v = new(Validator) @@ -72,6 +85,9 @@ func CreateValidatorForDataStream(dataStreamRootPath string, opts ...ValidatorOp return nil, err } } + + v.allowedIPs = initializeAllowedIPsList() + v.Schema, err = loadFieldsForDataStream(dataStreamRootPath) if err != nil { return nil, errors.Wrapf(err, "can't load fields for data stream (path: %s)", dataStreamRootPath) @@ -99,6 +115,25 @@ func CreateValidatorForDataStream(dataStreamRootPath string, opts ...ValidatorOp return v, nil } +//go:embed _static/allowed_geo_ips.txt +var allowedGeoIPs string + +func initializeAllowedIPsList() map[string]struct{} { + m := map[string]struct{}{ + "0.0.0.0": {}, "255.255.255.255": {}, + "0:0:0:0:0:0:0:0": {}, "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff": {}, + } + for _, ip := range strings.Split(allowedGeoIPs, "\n") { + ip = strings.Trim(ip, " \n\t") + if ip == "" { + continue + } + m[ip] = struct{}{} + } + + return m +} + func loadFieldsForDataStream(dataStreamRootPath string) ([]FieldDefinition, error) { fieldsDir := filepath.Join(dataStreamRootPath, "fields") files, err := filepath.Glob(filepath.Join(fieldsDir, "*.yml")) @@ -306,7 +341,7 @@ func (v *Validator) parseElementValue(key string, definition FieldDefinition, va if err := ensurePatternMatches(key, valStr, definition.Pattern); err != nil { return err } - case "date", "ip", "keyword", "text": + case "date", "keyword", "text": var valStr string valStr, valid = val.(string) if !valid { @@ -316,6 +351,20 @@ func (v *Validator) parseElementValue(key string, definition FieldDefinition, va if err := ensurePatternMatches(key, valStr, definition.Pattern); err != nil { return err } + case "ip": + var valStr string + valStr, valid = val.(string) + if !valid { + break + } + + if err := ensurePatternMatches(key, valStr, definition.Pattern); err != nil { + return err + } + + if v.enabledAllowedIPCheck && !v.isAllowedIPValue(valStr) { + return fmt.Errorf("the IP %q is not one of the allowed test IPs", valStr) + } case "float", "long", "double": _, valid = val.(float64) default: @@ -328,6 +377,30 @@ func (v *Validator) parseElementValue(key string, definition FieldDefinition, va return nil } +// isAllowedIPValue checks if the provided IP is allowed for testing +// The set of allowed IPs are: +// - private IPs as described in RFC 1918 & RFC 4193 +// - public IPs allowed by MaxMind for testing +// - 0.0.0.0 and 255.255.255.255 for IPv4 +// - 0:0:0:0:0:0:0:0 and ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff for IPv6 +func (v *Validator) isAllowedIPValue(s string) bool { + if _, found := v.allowedIPs[s]; found { + return true + } + + ip := net.ParseIP(s) + if ip == nil { + return false + } + + if ip.IsPrivate() || ip.IsLoopback() || + ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() { + return true + } + + return false +} + // ensureSingleElementValue extracts single entity from a potential array, which is a valid field representation // in Elasticsearch. For type assertion we need a single value. func ensureSingleElementValue(val interface{}) (interface{}, bool) { diff --git a/internal/testrunner/runners/pipeline/runner.go b/internal/testrunner/runners/pipeline/runner.go index fdd5046026..9861aef4fa 100644 --- a/internal/testrunner/runners/pipeline/runner.go +++ b/internal/testrunner/runners/pipeline/runner.go @@ -131,7 +131,11 @@ func (r *runner) run() ([]testrunner.TestResult, error) { tr.TimeElapsed = time.Since(startTime) fieldsValidator, err := fields.CreateValidatorForDataStream(dataStreamPath, - fields.WithNumericKeywordFields(tc.config.NumericKeywordFields)) + fields.WithNumericKeywordFields(tc.config.NumericKeywordFields), + // explicitly enabled for pipeline tests only + // since system tests can have dynamic public IPs + fields.WithEnabledAllowedIPCheck(), + ) if err != nil { return nil, errors.Wrapf(err, "creating fields validator for data stream failed (path: %s, test case file: %s)", dataStreamPath, testCaseFile) }