diff --git a/.github/actions/spelling/allow.txt b/.github/actions/spelling/allow.txt index 85ca0f9f2981f..9b14c6dbd7086 100644 --- a/.github/actions/spelling/allow.txt +++ b/.github/actions/spelling/allow.txt @@ -355,6 +355,7 @@ netcat netdata Netflix netlify +netlink Neue neuronull Nextbook diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 3e88b0ed77069..7a362b2e66f4f 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -115,6 +115,36 @@ updates: docker-images: patterns: - "*" + - package-ecosystem: "docker" + directory: "/distribution/docker/distroless-static" + schedule: + interval: "monthly" + time: "04:00" # UTC + labels: + - "domain: releasing" + - "no-changelog" + commit-message: + prefix: "chore(deps)" + open-pull-requests-limit: 100 + groups: + docker-images: + patterns: + - "*" + - package-ecosystem: "docker" + directory: "/distribution/docker/distroless-libc" + schedule: + interval: "monthly" + time: "04:00" # UTC + labels: + - "domain: releasing" + - "no-changelog" + commit-message: + prefix: "chore(deps)" + open-pull-requests-limit: 100 + groups: + docker-images: + patterns: + - "*" - package-ecosystem: "github-actions" directory: "/" schedule: diff --git a/.github/workflows/cla.yml b/.github/workflows/cla.yml index 179a849e92a0e..c2693a2d33a18 100644 --- a/.github/workflows/cla.yml +++ b/.github/workflows/cla.yml @@ -40,9 +40,9 @@ jobs: branch: 'vector' remote-repository-name: cla-signatures remote-organization-name: DataDog + allowlist: step-security-bot # the followings are the optional inputs - If the optional inputs are not given, then default values will be taken - #allowlist: user1,bot* #create-file-commit-message: 'For example: Creating file for storing CLA Signatures' #signed-commit-message: 'For example: $contributorName has signed the CLA in $owner/$repo#$pullRequestNo' #custom-notsigned-prcomment: 'pull request comment with Introductory message to ask new contributors to sign' diff --git a/.github/workflows/preview_site_trigger.yml b/.github/workflows/preview_site_trigger.yml index 761c010524185..b1e1838b5c49f 100644 --- a/.github/workflows/preview_site_trigger.yml +++ b/.github/workflows/preview_site_trigger.yml @@ -12,8 +12,9 @@ jobs: # Validate branch name - name: Validate branch name and set output id: validate + env: + BRANCH: ${{ github.head_ref }} run: | - BRANCH="${{ github.head_ref }}" if [[ ! "$BRANCH" =~ ^[a-zA-Z0-9_-]+$ ]]; then echo "valid=false" >> $GITHUB_OUTPUT else diff --git a/Cargo.lock b/Cargo.lock index 6a0d4636bf0ee..5ee83ae078c4a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1854,7 +1854,7 @@ version = "0.72.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.10.0", "cexpr", "clang-sys", "itertools 0.13.0", @@ -1889,11 +1889,11 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.9.0" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" +checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" dependencies = [ - "serde", + "serde_core", ] [[package]] @@ -2227,12 +2227,6 @@ dependencies = [ "url", ] -[[package]] -name = "cassowary" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df8670b8c7b9dae1793364eafadf7239c40d669904660c5960d74cfd80b46a53" - [[package]] name = "cast" version = "0.3.0" @@ -2454,9 +2448,9 @@ dependencies = [ [[package]] name = "clap_complete" -version = "4.5.64" +version = "4.5.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c0da80818b2d95eca9aa614a30783e42f62bf5fdfee24e68cfb960b071ba8d1" +checksum = "430b4dc2b5e3861848de79627b2bedc9f3342c7da5173a14eaa5d0f8dc18ae5d" dependencies = [ "clap", ] @@ -2526,6 +2520,7 @@ dependencies = [ "indoc", "influxdb-line-protocol", "memchr", + "metrics", "opentelemetry-proto", "ordered-float 4.6.0", "prost 0.12.6", @@ -2550,6 +2545,7 @@ dependencies = [ "tracing-test", "uuid", "vector-common", + "vector-common-macros", "vector-config", "vector-config-macros", "vector-core", @@ -2627,9 +2623,9 @@ dependencies = [ [[package]] name = "compact_str" -version = "0.8.0" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6050c3a16ddab2e412160b31f2c871015704239bca62f72f6e5f0be631d3f644" +checksum = "3fdb1325a1cece981e8a296ab8f0f9b63ae357bd0784a9faaf548cc7b480707a" dependencies = [ "castaway", "cfg-if", @@ -2758,6 +2754,12 @@ dependencies = [ "tiny-keccak", ] +[[package]] +name = "const-str" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93e19f68b180ebff43d6d42005c4b5f046c65fcac28369ba8b3beaad633f9ec0" + [[package]] name = "convert_case" version = "0.4.0" @@ -2992,30 +2994,15 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" -[[package]] -name = "crossterm" -version = "0.28.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "829d955a0bb380ef178a640b91779e3987da38c9aea133b20614cfed8cdea9c6" -dependencies = [ - "bitflags 2.9.0", - "crossterm_winapi", - "mio", - "parking_lot 0.12.4", - "rustix 0.38.40", - "signal-hook", - "signal-hook-mio", - "winapi", -] - [[package]] name = "crossterm" version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d8b9f2e4c67f833b660cdb0a3523065869fb35570177239812ed4c905aeff87b" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.10.0", "crossterm_winapi", + "derive_more 2.0.1", "document-features", "futures-core", "mio", @@ -3429,6 +3416,7 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bda628edc44c4bb645fbe0f758797143e4e07926f7ebf4e9bdfbd3d2ce621df3" dependencies = [ + "convert_case 0.7.1", "proc-macro2 1.0.101", "quote 1.0.40", "syn 2.0.106", @@ -3756,7 +3744,9 @@ version = "0.1.0" dependencies = [ "arc-swap", "chrono", + "const-str", "dyn-clone", + "indoc", "vrl", ] @@ -3998,21 +3988,9 @@ checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" [[package]] name = "fancy-regex" -version = "0.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6215aee357f8c7c989ebb4b8466ca4d7dc93b3957039f2fc3ea2ade8ea5f279" -dependencies = [ - "bit-set", - "derivative", - "regex-automata 0.4.8", - "regex-syntax", -] - -[[package]] -name = "fancy-regex" -version = "0.16.1" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf04c5ec15464ace8355a7b440a33aece288993475556d461154d7a62ad9947c" +checksum = "72cf461f865c862bb7dc573f643dd6a2b6842f7c30b07882b56bd148cc2761b8" dependencies = [ "bit-set", "regex-automata 0.4.8", @@ -4113,7 +4091,7 @@ version = "25.9.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09b6620799e7340ebd9968d2e0708eb82cf1971e9a16821e2091b6d6e475eed5" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.10.0", "rustc_version", ] @@ -4440,7 +4418,7 @@ version = "0.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2deb07a133b1520dc1a5690e9bd08950108873d7ed5de38dcc74d3b5ebffa110" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.10.0", "libc", "libgit2-sys", "log", @@ -4970,7 +4948,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8a6fe56c0038198998a6f217ca4e7ef3a5e51f46163bd6dd60b5c71ca6c6502" dependencies = [ "async-trait", - "bitflags 2.9.0", + "bitflags 2.10.0", "cfg-if", "data-encoding", "enum-as-inner", @@ -5326,6 +5304,19 @@ dependencies = [ "tower-service", ] +[[package]] +name = "hyper-tls" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" +dependencies = [ + "bytes 1.10.1", + "hyper 0.14.32", + "native-tls", + "tokio", + "tokio-native-tls", +] + [[package]] name = "hyper-tls" version = "0.6.0" @@ -5613,7 +5604,7 @@ version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f37dccff2791ab604f9babef0ba14fbe0be30bd368dc541e2b08d07c8aa908f3" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.10.0", "inotify-sys", "libc", ] @@ -5685,7 +5676,7 @@ version = "0.7.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d93587f37623a1a17d94ef2bc9ada592f5465fe7732084ab7beefabe5c77c0c4" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.10.0", "cfg-if", "libc", ] @@ -5894,15 +5885,15 @@ dependencies = [ [[package]] name = "jsonschema" -version = "0.37.4" +version = "0.38.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73c9ffb2b5c56d58030e1b532d8e8389da94590515f118cf35b5cb68e4764a7e" +checksum = "89f50532ce4a0ba3ae930212908d8ec50e7806065c059fe9c75da2ece6132294" dependencies = [ "ahash 0.8.11", "bytecount", "data-encoding", "email_address", - "fancy-regex 0.16.1", + "fancy-regex", "fraction", "getrandom 0.3.4", "idna", @@ -5930,7 +5921,7 @@ dependencies = [ "k8s-test-framework", "rand 0.9.2", "regex", - "reqwest 0.12.28", + "reqwest 0.11.26", "serde_json", "tokio", "tracing 0.1.41", @@ -5974,6 +5965,17 @@ dependencies = [ "tokio", ] +[[package]] +name = "kasuari" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fe90c1150662e858c7d5f945089b7517b0a80d8bf7ba4b1b5ffc984e7230a5b" +dependencies = [ + "hashbrown 0.16.0", + "portable-atomic", + "thiserror 2.0.17", +] + [[package]] name = "keccak" version = "0.1.4" @@ -6289,7 +6291,7 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.10.0", "libc", ] @@ -6338,6 +6340,15 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "line-clipping" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f4de44e98ddbf09375cbf4d17714d18f39195f4f4894e8524501726fd9a8a4a" +dependencies = [ + "bitflags 2.10.0", +] + [[package]] name = "linked-hash-map" version = "0.5.6" @@ -6437,9 +6448,12 @@ dependencies = [ [[package]] name = "lru" -version = "0.16.0" +version = "0.16.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86ea4e65087ff52f3862caff188d489f1fab49a0cb09e01b2e3f1a617b10aaed" +checksum = "a1dc47f592c06f33f8e3aea9591776ec7c9f9e4124778ff8a3c3b87159f7e593" +dependencies = [ + "hashbrown 0.16.0", +] [[package]] name = "lru-cache" @@ -7092,7 +7106,7 @@ version = "0.30.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.10.0", "cfg-if", "cfg_aliases", "libc", @@ -7178,7 +7192,7 @@ version = "8.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4d3d07927151ff8575b7087f245456e549fea62edf0ec4e565a5ee50c8402bc3" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.10.0", "fsevent-sys", "inotify", "kqueue", @@ -7453,7 +7467,7 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1c10c2894a6fed806ade6027bcd50662746363a9589d3ec9d9bef30a4e4bc166" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.10.0", ] [[package]] @@ -7511,7 +7525,7 @@ version = "6.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "336b9c63443aceef14bea841b899035ae3abe89b7c486aaf4c5bd8aafedac3f0" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.10.0", "libc", "once_cell", "onig_sys", @@ -7603,7 +7617,7 @@ version = "0.10.73" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8505734d46c8ab1e19a1dce3aef597ad87dcb4c37e7188231769bd6bd51cebf8" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.10.0", "cfg-if", "foreign-types", "libc", @@ -8369,7 +8383,7 @@ checksum = "2bb0be07becd10686a0bb407298fb425360a5c44a663774406340c59a22de4ce" dependencies = [ "bit-set", "bit-vec", - "bitflags 2.9.0", + "bitflags 2.10.0", "lazy_static", "num-traits", "rand 0.9.2", @@ -8910,32 +8924,74 @@ dependencies = [ [[package]] name = "ratatui" -version = "0.29.0" +version = "0.30.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1ce67fb8ba4446454d1c8dbaeda0557ff5e94d39d5e5ed7f10a65eb4c8266bc" +dependencies = [ + "instability", + "ratatui-core", + "ratatui-crossterm", + "ratatui-widgets", +] + +[[package]] +name = "ratatui-core" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eabd94c2f37801c20583fc49dd5cd6b0ba68c716787c2dd6ed18571e1e63117b" +checksum = "5ef8dea09a92caaf73bff7adb70b76162e5937524058a7e5bff37869cbbec293" dependencies = [ - "bitflags 2.9.0", - "cassowary", + "bitflags 2.10.0", "compact_str", - "crossterm 0.28.1", + "hashbrown 0.16.0", "indoc", - "instability", - "itertools 0.13.0", - "lru 0.12.5", - "paste", - "strum 0.26.3", + "itertools 0.14.0", + "kasuari", + "lru 0.16.3", + "strum 0.27.2", + "thiserror 2.0.17", "unicode-segmentation", "unicode-truncate", "unicode-width 0.2.0", ] +[[package]] +name = "ratatui-crossterm" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "577c9b9f652b4c121fb25c6a391dd06406d3b092ba68827e6d2f09550edc54b3" +dependencies = [ + "cfg-if", + "crossterm", + "instability", + "ratatui-core", +] + +[[package]] +name = "ratatui-widgets" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7dbfa023cd4e604c2553483820c5fe8aa9d71a42eea5aa77c6e7f35756612db" +dependencies = [ + "bitflags 2.10.0", + "hashbrown 0.16.0", + "indoc", + "instability", + "itertools 0.14.0", + "line-clipping", + "ratatui-core", + "strum 0.27.2", + "time", + "unicode-segmentation", + "unicode-width 0.2.0", +] + [[package]] name = "raw-cpuid" version = "11.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d86a7c4638d42c44551f4791a20e687dbb4c3de1f33c43dd71e355cd429def1" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.10.0", ] [[package]] @@ -9067,7 +9123,7 @@ version = "0.5.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "928fca9cf2aa042393a8325b9ead81d2f0df4cb12e1e24cef072922ccd99c5af" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.10.0", ] [[package]] @@ -9114,9 +9170,9 @@ dependencies = [ [[package]] name = "referencing" -version = "0.37.4" +version = "0.38.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4283168a506f0dcbdce31c9f9cce3129c924da4c6bca46e46707fcb746d2d70c" +checksum = "15a8af0c6bb8eaf8b07cb06fc31ff30ca6fe19fb99afa476c276d8b24f365b0b" dependencies = [ "ahash 0.8.11", "fluent-uri 0.4.1", @@ -9213,10 +9269,12 @@ dependencies = [ "http-body 0.4.6", "hyper 0.14.32", "hyper-rustls 0.24.2", + "hyper-tls 0.5.0", "ipnet", "js-sys", "log", "mime", + "native-tls", "once_cell", "percent-encoding", "pin-project-lite", @@ -9228,6 +9286,7 @@ dependencies = [ "sync_wrapper 0.1.2", "system-configuration 0.5.1", "tokio", + "tokio-native-tls", "tokio-rustls 0.24.1", "tower-service", "url", @@ -9258,7 +9317,7 @@ dependencies = [ "http-body-util", "hyper 1.7.0", "hyper-rustls 0.27.5", - "hyper-tls", + "hyper-tls 0.6.0", "hyper-util", "js-sys", "log", @@ -9446,9 +9505,12 @@ dependencies = [ [[package]] name = "roxmltree" -version = "0.20.0" +version = "0.21.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c20b6793b5c2fa6553b250154b78d6d0db37e72700ae35fad9387a46f487c97" +checksum = "f1964b10c76125c36f8afe190065a4bf9a87bf324842c05701330bba9f1cacbb" +dependencies = [ + "memchr", +] [[package]] name = "rsa" @@ -9584,7 +9646,7 @@ version = "0.38.40" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "99e4ea3e1cdc4b559b8e5650f9c8e5998e3e5c1343b4eaf034565f32318d63c0" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.10.0", "errno", "libc", "linux-raw-sys 0.4.14", @@ -9597,7 +9659,7 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dade4812df5c384711475be5fcd8c162555352945401aed22a35bffeab61f657" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.10.0", "errno", "libc", "linux-raw-sys 0.9.2", @@ -9751,11 +9813,11 @@ dependencies = [ [[package]] name = "rustyline" -version = "16.0.0" +version = "17.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62fd9ca5ebc709e8535e8ef7c658eb51457987e48c98ead2be482172accc408d" +checksum = "e902948a25149d50edc1a8e0141aad50f54e22ba83ff988cf8f7c9ef07f50564" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.10.0", "cfg-if", "clipboard-win", "libc", @@ -9765,7 +9827,7 @@ dependencies = [ "unicode-segmentation", "unicode-width 0.2.0", "utf8parse", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -9924,7 +9986,7 @@ version = "3.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b3297343eaf830f66ede390ea39da1d462b6b0c1b000f420d0a83f898bbbe6ef" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.10.0", "core-foundation 0.10.1", "core-foundation-sys", "libc", @@ -10558,6 +10620,7 @@ dependencies = [ "memchr", "once_cell", "percent-encoding", + "rustls 0.23.23", "serde", "serde_json", "sha2", @@ -10567,6 +10630,7 @@ dependencies = [ "tokio-stream", "tracing 0.1.41", "url", + "webpki-roots 0.26.1", ] [[package]] @@ -10615,7 +10679,7 @@ checksum = "aa003f0038df784eb8fecbbac13affe3da23b45194bd57dba231c8f48199c526" dependencies = [ "atoi", "base64 0.22.1", - "bitflags 2.9.0", + "bitflags 2.10.0", "byteorder", "bytes 1.10.1", "chrono", @@ -10658,7 +10722,7 @@ checksum = "db58fcd5a53cf07c184b154801ff91347e4c30d17a3562a635ff028ad5deda46" dependencies = [ "atoi", "base64 0.22.1", - "bitflags 2.9.0", + "bitflags 2.10.0", "byteorder", "chrono", "crc", @@ -10801,6 +10865,9 @@ name = "strum" version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf" +dependencies = [ + "strum_macros 0.27.2", +] [[package]] name = "strum_macros" @@ -10988,7 +11055,7 @@ version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.10.0", "core-foundation 0.9.3", "system-configuration-sys 0.6.0", ] @@ -11719,7 +11786,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61c5bb1d698276a2443e5ecfabc1008bf15a36c12e6a7176e7bf089ea9131140" dependencies = [ "async-compression", - "bitflags 2.9.0", + "bitflags 2.10.0", "bytes 1.10.1", "futures-core", "futures-util", @@ -11741,7 +11808,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e9cd434a998747dd2c4276bc96ee2e0c7a2eadf3cae88e52be55a05fa9053f5" dependencies = [ "base64 0.21.7", - "bitflags 2.9.0", + "bitflags 2.10.0", "bytes 1.10.1", "http 1.3.1", "http-body 1.0.0", @@ -11760,7 +11827,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" dependencies = [ "async-compression", - "bitflags 2.9.0", + "bitflags 2.10.0", "bytes 1.10.1", "futures-core", "futures-util", @@ -12227,12 +12294,13 @@ checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" [[package]] name = "unicode-truncate" -version = "1.0.0" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a5fbabedabe362c618c714dbefda9927b5afc8e2a8102f47f081089a9019226" +checksum = "8fbf03860ff438702f3910ca5f28f8dac63c1c11e7efb5012b8b175493606330" dependencies = [ - "itertools 0.12.1", - "unicode-width 0.1.13", + "itertools 0.13.0", + "unicode-segmentation", + "unicode-width 0.2.0", ] [[package]] @@ -12412,7 +12480,7 @@ dependencies = [ "owo-colors", "paste", "regex", - "reqwest 0.12.28", + "reqwest 0.11.26", "semver", "serde", "serde_json", @@ -12474,10 +12542,12 @@ dependencies = [ "chrono", "chrono-tz", "clap", + "clap_complete", "colored", "console-subscriber", "criterion", "csv", + "dashmap", "databend-client", "deadpool", "derivative", @@ -12525,7 +12595,7 @@ dependencies = [ "libc", "listenfd", "loki-logproto", - "lru 0.16.0", + "lru 0.16.3", "maxminddb", "md-5", "metrics", @@ -12564,6 +12634,7 @@ dependencies = [ "rdkafka", "redis", "regex", + "reqwest 0.11.26", "reqwest 0.12.28", "rmp-serde", "rmpv", @@ -12942,7 +13013,7 @@ version = "0.1.0" dependencies = [ "chrono", "clap", - "crossterm 0.29.0", + "crossterm", "exitcode", "futures 0.3.31", "futures-util", @@ -12974,6 +13045,7 @@ version = "0.1.0" dependencies = [ "dnstap-parser", "enrichment", + "indoc", "vector-vrl-metrics", "vrl", ] @@ -12983,6 +13055,7 @@ name = "vector-vrl-metrics" version = "0.1.0" dependencies = [ "arc-swap", + "const-str", "tokio", "tokio-stream", "vector-common", @@ -13038,7 +13111,7 @@ checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" [[package]] name = "vrl" version = "0.29.0" -source = "git+https://github.com/vectordotdev/vrl.git?branch=main#53f01dfa6226fb3f4093d1bf838319c522f2dcad" +source = "git+https://github.com/vectordotdev/vrl.git?branch=main#7ab2c5516a0c46e25d6f6efbd16977af4a3441a0" dependencies = [ "aes", "aes-siv", @@ -13071,7 +13144,7 @@ dependencies = [ "dyn-clone", "encoding_rs", "exitcode", - "fancy-regex 0.15.0", + "fancy-regex", "flate2", "grok", "hex", @@ -13616,7 +13689,7 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "193cae8e647981c35bc947fdd57ba7928b1fa0d4a79305f6dd2dc55221ac35ac" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.10.0", "widestring 1.0.2", "windows-sys 0.59.0", ] diff --git a/Cargo.toml b/Cargo.toml index 7a296222c1736..1e2a51d35a825 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -147,7 +147,9 @@ cfg-if = { version = "1.0.3", default-features = false } chrono = { version = "0.4.41", default-features = false, features = ["clock", "serde"] } chrono-tz = { version = "0.10.4", default-features = false, features = ["serde"] } clap = { version = "4.5.53", default-features = false, features = ["derive", "error-context", "env", "help", "std", "string", "usage", "wrap_help"] } +clap_complete = "4.5.65" colored = { version = "3.0.0", default-features = false } +const-str = { version = "1.0.0", default-features = false } crossbeam-utils = { version = "0.8.21", default-features = false } darling = { version = "0.20.11", default-features = false, features = ["suggestions"] } dashmap = { version = "6.1.0", default-features = false } @@ -180,7 +182,7 @@ prost-types = { version = "0.12", default-features = false } rand = { version = "0.9.2", default-features = false, features = ["small_rng", "thread_rng"] } rand_distr = { version = "0.5.1", default-features = false } regex = { version = "1.11.2", default-features = false, features = ["std", "perf"] } -reqwest = { version = "0.12", features = ["json"] } +reqwest = { version = "0.11", features = ["json"] } rust_decimal = { version = "1.37.0", default-features = false, features = ["std"] } semver = { version = "1.0.26", default-features = false, features = ["serde", "std"] } serde = { version = "1.0.219", default-features = false, features = ["alloc", "derive", "rc"] } @@ -210,7 +212,10 @@ serial_test = { version = "3.2" } [dependencies] cfg-if.workspace = true reqwest.workspace = true +reqwest_12 = { package = "reqwest", version = "0.12", features = ["json"] } clap.workspace = true +clap_complete.workspace = true +dashmap.workspace = true indoc.workspace = true paste.workspace = true pin-project.workspace = true @@ -384,7 +389,7 @@ itertools.workspace = true k8s-openapi = { version = "0.22.0", default-features = false, features = ["v1_26"], optional = true } kube = { version = "0.93.0", default-features = false, features = ["client", "openssl-tls", "runtime"], optional = true } listenfd = { version = "1.0.2", default-features = false, optional = true } -lru = { version = "0.16.0", default-features = false } +lru = { version = "0.16.3", default-features = false } maxminddb = { version = "0.27.0", default-features = false, optional = true, features = ["simdutf8"] } md-5 = { version = "0.10", default-features = false, optional = true } mongodb = { version = "3.3.0", default-features = false, optional = true, features = ["compat-3-0-0", "dns-resolver", "rustls-tls"] } @@ -411,7 +416,7 @@ seahash = { version = "4.1.0", default-features = false } smallvec = { version = "1", default-features = false, features = ["union", "serde"] } snap = { version = "1.1.1", default-features = false } socket2.workspace = true -sqlx = { version = "0.8.6", default-features = false, features = ["derive", "postgres", "chrono", "runtime-tokio"], optional = true } +sqlx = { version = "0.8.6", default-features = false, features = ["derive", "postgres", "chrono", "runtime-tokio", "tls-rustls-ring"], optional = true } stream-cancel = { version = "0.8.2", default-features = false } strip-ansi-escapes = { version = "0.2.1", default-features = false } syslog = { version = "6.1.1", default-features = false, optional = true } diff --git a/LICENSE-3rdparty.csv b/LICENSE-3rdparty.csv index 9e723f89a93e8..7e2d08bbfc106 100644 --- a/LICENSE-3rdparty.csv +++ b/LICENSE-3rdparty.csv @@ -146,7 +146,6 @@ bytes,https://github.com/carllerche/bytes,MIT,Carl Lerche bytes,https://github.com/tokio-rs/bytes,MIT,"Carl Lerche , Sean McArthur " bytes-utils,https://github.com/vorner/bytes-utils,Apache-2.0 OR MIT,Michal 'vorner' Vaner bytesize,https://github.com/bytesize-rs/bytesize,Apache-2.0,"Hyunsik Choi , MrCroxx , Rob Ede " -cassowary,https://github.com/dylanede/cassowary-rs,MIT OR Apache-2.0,Dylan Ede castaway,https://github.com/sagebind/castaway,MIT,Stephen M. Coakley cbc,https://github.com/RustCrypto/block-modes,MIT OR Apache-2.0,RustCrypto Developers cesu8,https://github.com/emk/cesu8-rs,Apache-2.0 OR MIT,Eric Kidd @@ -164,6 +163,7 @@ cidr,https://github.com/stbuehler/rust-cidr,MIT,Stefan Bühler @@ -180,6 +180,7 @@ concurrent-queue,https://github.com/smol-rs/concurrent-queue,Apache-2.0 OR MIT," const-oid,https://github.com/RustCrypto/formats/tree/master/const-oid,Apache-2.0 OR MIT,RustCrypto Developers const-random,https://github.com/tkaitchuck/constrandom,MIT OR Apache-2.0,Tom Kaitchuck const-random-macro,https://github.com/tkaitchuck/constrandom,MIT OR Apache-2.0,Tom Kaitchuck +const-str,https://github.com/Nugine/const-str,MIT,Nugine convert_case,https://github.com/rutrum/convert-case,MIT,David Purdum convert_case,https://github.com/rutrum/convert-case,MIT,rutrum cookie,https://github.com/SergioBenitez/cookie-rs,MIT OR Apache-2.0,"Sergio Benitez , Alex Crichton " @@ -274,7 +275,6 @@ executor-trait,https://github.com/amqp-rs/executor-trait,Apache-2.0 OR MIT,Marc- exitcode,https://github.com/benwilber/exitcode,Apache-2.0,Ben Wilber fakedata_generator,https://github.com/kevingimbel/fakedata_generator,MIT,Kevin Gimbel fallible-iterator,https://github.com/sfackler/rust-fallible-iterator,MIT OR Apache-2.0,Steven Fackler -fancy-regex,https://github.com/fancy-regex/fancy-regex,MIT,"Raph Levien , Robin Stocker " fancy-regex,https://github.com/fancy-regex/fancy-regex,MIT,"Raph Levien , Robin Stocker , Keith Hall " fastrand,https://github.com/smol-rs/fastrand,Apache-2.0 OR MIT,Stjepan Glavina ff,https://github.com/zkcrypto/ff,MIT OR Apache-2.0,"Sean Bowe , Jack Grigg " @@ -411,6 +411,7 @@ jsonpath-rust,https://github.com/besok/jsonpath-rust,MIT,BorisZhguchev k8s-openapi,https://github.com/Arnavion/k8s-openapi,Apache-2.0,Arnav Singh +kasuari,https://github.com/ratatui/kasuari,MIT OR Apache-2.0,"Dylan Ede , The Ratatui Developers" keccak,https://github.com/RustCrypto/sponges/tree/master/keccak,Apache-2.0 OR MIT,RustCrypto Developers kqueue,https://gitlab.com/rust-kqueue/rust-kqueue,MIT,William Orr kqueue-sys,https://gitlab.com/rust-kqueue/rust-kqueue-sys,MIT,"William Orr , Daniel (dmilith) Dettlaff " @@ -435,6 +436,7 @@ libm,https://github.com/rust-lang/libm,MIT OR Apache-2.0,Jorge Aparicio , Josh Triplett , Sebastian Thiel " +line-clipping,https://github.com/joshka/line-clipping,MIT OR Apache-2.0,Josh McKinney linked-hash-map,https://github.com/contain-rs/linked-hash-map,MIT OR Apache-2.0,"Stepan Koltsov , Andrew Paseltiner " linked_hash_set,https://github.com/alexheretic/linked-hash-set,Apache-2.0,Alex Butler linux-raw-sys,https://github.com/sunfishcode/linux-raw-sys,Apache-2.0 WITH LLVM-exception OR Apache-2.0 OR MIT,Dan Gohman @@ -618,6 +620,9 @@ rand_core,https://github.com/rust-random/rand,MIT OR Apache-2.0,"The Rand Projec rand_distr,https://github.com/rust-random/rand_distr,MIT OR Apache-2.0,The Rand Project Developers rand_xorshift,https://github.com/rust-random/rngs,MIT OR Apache-2.0,"The Rand Project Developers, The Rust Project Developers" ratatui,https://github.com/ratatui/ratatui,MIT,"Florian Dehau , The Ratatui Developers" +ratatui-core,https://github.com/ratatui/ratatui,MIT,"Florian Dehau , The Ratatui Developers" +ratatui-crossterm,https://github.com/ratatui/ratatui,MIT,"Florian Dehau , The Ratatui Developers" +ratatui-widgets,https://github.com/ratatui/ratatui,MIT,"Florian Dehau , The Ratatui Developers" raw-cpuid,https://github.com/gz/rust-cpuid,MIT,Gerd Zellweger raw-window-handle,https://github.com/rust-windowing/raw-window-handle,MIT OR Apache-2.0 OR Zlib,Osspial rdkafka,https://github.com/fede1024/rust-rdkafka,MIT,Federico Giraud diff --git a/changelog.d/22487_tcp_netlink_parsing.fix.md b/changelog.d/22487_tcp_netlink_parsing.fix.md new file mode 100644 index 0000000000000..f98c5861f95f8 --- /dev/null +++ b/changelog.d/22487_tcp_netlink_parsing.fix.md @@ -0,0 +1,3 @@ +Fixed a `host_metrics` source issue that caused tcp metrics collection to fail with "Could not parse netlink response: invalid netlink buffer" errors on Linux systems. + +authors: mushrowan diff --git a/changelog.d/22942_tcp_decoder_bytes_framing_error.fix.md b/changelog.d/22942_tcp_decoder_bytes_framing_error.fix.md new file mode 100644 index 0000000000000..b2435f94cabe0 --- /dev/null +++ b/changelog.d/22942_tcp_decoder_bytes_framing_error.fix.md @@ -0,0 +1,4 @@ +Fixed recurrent "Failed framing bytes" produced by TCP sources such as fluent and logstash by ignoring connection +resets that occur after complete frames. Connection resets with partial frame data are still reported as errors. + +authors: gwenaskell diff --git a/changelog.d/23536_postgres_sink_enable_tls.fix.md b/changelog.d/23536_postgres_sink_enable_tls.fix.md new file mode 100644 index 0000000000000..c87a6e2177cd6 --- /dev/null +++ b/changelog.d/23536_postgres_sink_enable_tls.fix.md @@ -0,0 +1,3 @@ +Fixed an issue in the `postgres` sink which made a TLS connection impossible due to a missing `sqlx` feature flag. + +authors: thomasqueirozb diff --git a/changelog.d/24316_opentelemetry_use_otlp_decoding_metrics.fix.md b/changelog.d/24316_opentelemetry_use_otlp_decoding_metrics.fix.md new file mode 100644 index 0000000000000..6a67eeb2c6330 --- /dev/null +++ b/changelog.d/24316_opentelemetry_use_otlp_decoding_metrics.fix.md @@ -0,0 +1,3 @@ +The `opentelemetry` source now correctly emits the `component_received_events_total` metric when `use_otlp_decoding` is enabled for HTTP requests. Previously, this metric would show 0 despite events being received and processed. + +authors: thomasqueirozb diff --git a/changelog.d/24414_shell_autocompletion.enhancement.md b/changelog.d/24414_shell_autocompletion.enhancement.md new file mode 100644 index 0000000000000..dd02ff49bab56 --- /dev/null +++ b/changelog.d/24414_shell_autocompletion.enhancement.md @@ -0,0 +1,3 @@ +Autocompletion scripts for the vector cli can now be generated via `vector completion `. + +authors: weriomat diff --git a/changelog.d/24415_histogram_incremental_conversion.fix.md b/changelog.d/24415_histogram_incremental_conversion.fix.md new file mode 100644 index 0000000000000..3aa1c23388f83 --- /dev/null +++ b/changelog.d/24415_histogram_incremental_conversion.fix.md @@ -0,0 +1,3 @@ +Fixed histogram incremental conversion by ensuring all individual buckets increase or reinitializing the entire metric. + +authors: dd-sebastien-lb diff --git a/changelog.d/buffer_size_metrics.deprecation.md b/changelog.d/buffer_size_metrics.deprecation.md new file mode 100644 index 0000000000000..dd58c8f53bd27 --- /dev/null +++ b/changelog.d/buffer_size_metrics.deprecation.md @@ -0,0 +1,10 @@ +Buffers now emit metric names for sizes that better follow the metric naming standard specification +while keeping the old related gauges available for a transition period. Operators should update +dashboards/alerts to the new variants as the legacy names are now deprecated. + +* `buffer_max_size_bytes` deprecates `buffer_max_byte_size` +* `buffer_max_size_events` deprecates `buffer_max_event_size` +* `buffer_size_bytes` deprecates `buffer_byte_size` +* `buffer_size_events` deprecates `buffer_events` + +authors: bruceg diff --git a/changelog.d/buffer_utilization_mean_metrics.enhancement.md b/changelog.d/buffer_utilization_mean_metrics.enhancement.md new file mode 100644 index 0000000000000..3a88daa924c48 --- /dev/null +++ b/changelog.d/buffer_utilization_mean_metrics.enhancement.md @@ -0,0 +1,3 @@ +Added moving-mean gauges for source and transform buffers (`source_buffer_utilization_mean` and `transform_buffer_utilization_mean`), so observers can track an EWMA of buffer utilization in addition to the instant level. + +authors: bruceg diff --git a/changelog.d/event_processing_time_metrics.enhancement.md b/changelog.d/event_processing_time_metrics.enhancement.md new file mode 100644 index 0000000000000..2041b920da91f --- /dev/null +++ b/changelog.d/event_processing_time_metrics.enhancement.md @@ -0,0 +1,4 @@ +Added the `event_processing_time_seconds` histogram and `event_processing_time_mean_seconds` gauge internal_metrics, +exposing the total time events spend between the originating source and final sink in a topology. + +authors: bruceg diff --git a/changelog.d/expand-internal-histogram-precision.breaking.md b/changelog.d/expand-internal-histogram-precision.breaking.md new file mode 100644 index 0000000000000..47bfb31245e3a --- /dev/null +++ b/changelog.d/expand-internal-histogram-precision.breaking.md @@ -0,0 +1,5 @@ +Increased the number of buckets in internal histograms to reduce the smallest +bucket down to approximately 0.000244 (2.0^-12). Since this shifts all the +bucket values out, it may break VRL scripts that rely on the previous values. + +authors: bruceg diff --git a/changelog.d/gcp_cloud_storage_content_type.enhancement.md b/changelog.d/gcp_cloud_storage_content_type.enhancement.md new file mode 100644 index 0000000000000..71cd179b9f555 --- /dev/null +++ b/changelog.d/gcp_cloud_storage_content_type.enhancement.md @@ -0,0 +1,3 @@ +Add `content_type` option to the `gcp_cloud_storage` sink to override the `Content-Type` of created objects. If unset, defaults to the encoder's content type. + +authors: AnuragEkkati diff --git a/changelog.d/utilization_ewma_alpha_configuration.enhancement.md b/changelog.d/utilization_ewma_alpha_configuration.enhancement.md new file mode 100644 index 0000000000000..1f5c84bb9f515 --- /dev/null +++ b/changelog.d/utilization_ewma_alpha_configuration.enhancement.md @@ -0,0 +1,6 @@ +Added `buffer_utilization_ewma_alpha` configuration option to the global +options, allowing users to control the alpha value for the exponentially +weighted moving average (EWMA) used in source and transform buffer utilization +metrics. + +authors: bruceg diff --git a/deny.toml b/deny.toml index e571de12a7f71..32a8862fd30ec 100644 --- a/deny.toml +++ b/deny.toml @@ -6,6 +6,7 @@ allow = [ "BSD-3-Clause", "BSL-1.0", "CC0-1.0", + "CDLA-Permissive-2.0", "ISC", "MIT", "MIT-0", @@ -50,4 +51,5 @@ ignore = [ { id = "RUSTSEC-2025-0012", reason = "backoff is unmaintained" }, # rustls-pemfile is unmaintained. Blocked by both async-nats and http 1.0.0 upgrade. { id = "RUSTSEC-2025-0134", reason = "rustls-pemfile is unmaintained" }, + { id = "RUSTSEC-2026-0002", reason = "latest aws-sdk-s3 (v1.119.0) is still using lru v0.12.5" }, ] diff --git a/distribution/docker/alpine/Dockerfile b/distribution/docker/alpine/Dockerfile index 0358bd9bdc882..9d40daff5ce8e 100644 --- a/distribution/docker/alpine/Dockerfile +++ b/distribution/docker/alpine/Dockerfile @@ -1,4 +1,4 @@ -FROM docker.io/alpine:3.22 AS builder +FROM docker.io/alpine:3.23 AS builder WORKDIR /vector @@ -12,7 +12,7 @@ RUN ARCH=$(if [ "$TARGETPLATFORM" = "linux/arm/v6" ]; then echo "arm"; else cat RUN mkdir -p /var/lib/vector -FROM docker.io/alpine:3.22 +FROM docker.io/alpine:3.23 # https://github.com/opencontainers/image-spec/blob/main/annotations.md LABEL org.opencontainers.image.url="https://vector.dev" diff --git a/distribution/docker/distroless-static/Dockerfile b/distribution/docker/distroless-static/Dockerfile index c369cff031f94..d692f48321614 100644 --- a/distribution/docker/distroless-static/Dockerfile +++ b/distribution/docker/distroless-static/Dockerfile @@ -1,4 +1,4 @@ -FROM docker.io/alpine:3.22 AS builder +FROM docker.io/alpine:3.23 AS builder WORKDIR /vector diff --git a/docs/DEPRECATION.md b/docs/DEPRECATION.md index 3b4a5d8c576c5..23ef97ec2c475 100644 --- a/docs/DEPRECATION.md +++ b/docs/DEPRECATION.md @@ -68,8 +68,8 @@ When possible, Vector will error at start-up when a removed configuration option When introducing a deprecation into Vector, the pull request introducing the deprecation should: -- Add a note to the Deprecations section of the upgrade guide for the next release with a description and - directions for transitioning if applicable. +- Add a note to the Deprecations section of the upgrade guide in `website/content/en/highlights` for + the next release with a description and directions for transitioning if applicable. - Copy the same note from the previous step, to a changelog fragment, with type="deprecation". See the changelog fragment [README.md](../changelog.d/README.md) for details. - Add a deprecation note to the docs. Typically, this means adding `deprecation: "description of the deprecation"` @@ -80,7 +80,7 @@ When introducing a deprecation into Vector, the pull request introducing the dep the new name will be appended with the text `(formerly OldName)`. - Add a log message to Vector that is logged at the `WARN` level starting with the word `DEPRECATION` if Vector detects the deprecated configuration or feature being used (when possible). -- Add the deprecation to [DEPRECATIONS.md](DEPRECATIONS.md) to track migration (if applicable) and removal +- Add the deprecation to [docs/DEPRECATIONS.md](../docs/DEPRECATIONS.md) to track migration (if applicable) and removal When removing a deprecation in a subsequent release, the pull request should: @@ -90,4 +90,4 @@ When removing a deprecation in a subsequent release, the pull request should: for transitioning if applicable. - Copy the same note from the previous step, to a changelog fragment, with type="breaking". See the changelog fragment [README.md](../changelog.d/README.md) for details. -- Remove the deprecation from [DEPRECATIONS.md](DEPRECATIONS.md) +- Remove the deprecation from [docs/DEPRECATIONS.md](../docs/DEPRECATIONS.md) diff --git a/docs/DEPRECATIONS.md b/docs/DEPRECATIONS.md index 7c9042da1a70a..e43d70d493d3d 100644 --- a/docs/DEPRECATIONS.md +++ b/docs/DEPRECATIONS.md @@ -15,6 +15,7 @@ For example: ## To be deprecated - `v0.50.0` | `http-server-encoding` | The `encoding` field will be removed. Use `decoding` and `framing` instead. +- `v0.53.0` | `buffer-bytes-events-metrics` | The `buffer_byte_size` and `buffer_events` gauges are deprecated in favor of the `buffer_size_bytes`/`buffer_size_events` metrics described in `docs/specs/buffer.md`. ## To be migrated diff --git a/docs/specs/buffer.md b/docs/specs/buffer.md index 152f14ad56215..05d343a4bc0bc 100644 --- a/docs/specs/buffer.md +++ b/docs/specs/buffer.md @@ -42,8 +42,8 @@ _All buffers_ MUST emit a `BufferCreated` event upon creation. To avoid stale me - `max_size_bytes` - the max size of the buffer in bytes if relevant - `max_size_events` - the max size of the buffer in number of events if relevant - Metric - - MUST emit the `buffer_max_event_size` gauge (in-memory buffers) if the defined `max_size_events` value is present - - MUST emit the `buffer_max_byte_size` gauge (disk buffers) if the defined `max_size_bytes` value is present + - MUST emit the `buffer_max_size_events` gauge (in-memory buffers) if the defined `max_size_events` value is present, and emit `buffer_max_event_size` for backward compatibility + - MUST emit the `buffer_max_size_bytes` gauge (disk buffers) if the defined `max_size_bytes` value is present, and emit `buffer_max_byte_size` for backward compatibility #### BufferEventsReceived @@ -58,8 +58,8 @@ _All buffers_ MUST emit a `BufferEventsReceived` event: - Metric - MUST increment the `buffer_received_events_total` counter by the defined `count` - MUST increment the `buffer_received_bytes_total` counter by the defined `byte_size` - - MUST increment the `buffer_events` gauge by the defined `count` - - MUST increment the `buffer_byte_size` gauge by the defined `byte_size` + - MUST increment the `buffer_size_events` gauge by the defined `count`, and emit `buffer_events` for backward compatibility + - MUST increment the `buffer_size_bytes` gauge by the defined `byte_size`, and emit `buffer_byte_size` for backward compatibility #### BufferEventsSent @@ -71,8 +71,8 @@ _All buffers_ MUST emit a `BufferEventsSent` event after sending one or more Vec - Metric - MUST increment the `buffer_sent_events_total` counter by the defined `count` - MUST increment the `buffer_sent_bytes_total` counter by the defined `byte_size` - - MUST decrement the `buffer_events` gauge by the defined `count` - - MUST decrement the `buffer_byte_size` gauge by the defined `byte_size` + - MUST decrement the `buffer_size_events` gauge by the defined `count`, and emit `buffer_events` for backward compatibility + - MUST decrement the `buffer_size_bytes` gauge by the defined `byte_size`, and emit `buffer_byte_size` for backward compatibility #### BufferError diff --git a/docs/tutorials/lognamespacing.md b/docs/tutorials/lognamespacing.md index de23447f796b7..ed4d0cd3a0993 100644 --- a/docs/tutorials/lognamespacing.md +++ b/docs/tutorials/lognamespacing.md @@ -112,7 +112,9 @@ separate namespace to the event data. The actual value to be placed into the field. -For the ingest timestamp this will be `chrono::Utc::now()`. Source type will be +The ingest timestamp should be recorded once per batch (e.g. `let now = +chrono::Utc::now();`) and passed into `insert_standard_vector_source_metadata`, +which takes care of updating the event metadata. The source type will be the `NAME` property of the `Config` struct. `NAME` is provided by the `configurable_component` macro. You may need to include `use vector_config::NamedComponent;`. diff --git a/lib/codecs/Cargo.toml b/lib/codecs/Cargo.toml index 01d9836f666b3..d8ef9e1057361 100644 --- a/lib/codecs/Cargo.toml +++ b/lib/codecs/Cargo.toml @@ -24,9 +24,11 @@ derivative.workspace = true derive_more = { version = "2.0.1", optional = true, features = ["from", "display"] } dyn-clone = { version = "1", default-features = false } flate2.workspace = true +futures.workspace = true influxdb-line-protocol = { version = "2", default-features = false } lookup = { package = "vector-lookup", path = "../vector-lookup", default-features = false, features = ["test"] } memchr = { version = "2", default-features = false } +metrics.workspace = true opentelemetry-proto = { path = "../opentelemetry-proto", optional = true } ordered-float.workspace = true prost.workspace = true @@ -46,11 +48,13 @@ tokio = { workspace = true, features = ["full"] } tracing.workspace = true vrl.workspace = true vector-common = { path = "../vector-common", default-features = false } +vector-common-macros.workspace = true vector-config = { path = "../vector-config", default-features = false } vector-config-macros = { path = "../vector-config-macros", default-features = false } vector-core = { path = "../vector-core", default-features = false, features = ["vrl"] } vector-vrl-functions.workspace = true toml = { version = "0.9.8", optional = true } + [dev-dependencies] futures.workspace = true indoc.workspace = true @@ -66,3 +70,4 @@ vrl.workspace = true arrow = [] opentelemetry = ["dep:opentelemetry-proto"] syslog = ["dep:syslog_loose", "dep:strum", "dep:derive_more", "dep:serde-aux", "dep:toml"] +test = [] diff --git a/src/codecs/decoding/config.rs b/lib/codecs/src/decoding/config.rs similarity index 87% rename from src/codecs/decoding/config.rs rename to lib/codecs/src/decoding/config.rs index 2670b76c977dc..6ed15f8243867 100644 --- a/src/codecs/decoding/config.rs +++ b/lib/codecs/src/decoding/config.rs @@ -1,10 +1,7 @@ use serde::{Deserialize, Serialize}; -use vector_lib::{ - codecs::decoding::{DeserializerConfig, FramingConfig}, - config::LogNamespace, -}; +use vector_core::config::LogNamespace; -use crate::codecs::Decoder; +use crate::decoding::{Decoder, DeserializerConfig, FramingConfig}; /// Config used to build a `Decoder`. #[derive(Debug, Clone, Deserialize, Serialize)] @@ -43,7 +40,7 @@ impl DecodingConfig { } /// Builds a `Decoder` from the provided configuration. - pub fn build(&self) -> vector_lib::Result { + pub fn build(&self) -> vector_common::Result { // Build the framer. let framer = self.framing.build(); diff --git a/src/codecs/decoding/decoder.rs b/lib/codecs/src/decoding/decoder.rs similarity index 89% rename from src/codecs/decoding/decoder.rs rename to lib/codecs/src/decoding/decoder.rs index 499dedb10e9e6..0796e61ff82f3 100644 --- a/src/codecs/decoding/decoder.rs +++ b/lib/codecs/src/decoding/decoder.rs @@ -1,18 +1,18 @@ use bytes::{Bytes, BytesMut}; use smallvec::SmallVec; -use vector_lib::{ - codecs::decoding::{ - BoxedFramingError, BytesDeserializer, Deserializer, Error, Framer, NewlineDelimitedDecoder, - format::Deserializer as _, - }, - config::LogNamespace, -}; +use vector_common::internal_event::emit; +use vector_core::{config::LogNamespace, event::Event}; use crate::{ - event::Event, + decoding::format::Deserializer as _, + decoding::{ + BoxedFramingError, BytesDeserializer, Deserializer, Error, Framer, NewlineDelimitedDecoder, + }, internal_events::{DecoderDeserializeError, DecoderFramingError}, }; +type DecodedFrame = (SmallVec<[Event; 1]>, usize); + /// A decoder that can decode structured events from a byte stream / byte /// messages. #[derive(Clone)] @@ -60,9 +60,9 @@ impl Decoder { fn handle_framing_result( &mut self, frame: Result, BoxedFramingError>, - ) -> Result, usize)>, Error> { + ) -> Result, Error> { let frame = frame.map_err(|error| { - emit!(DecoderFramingError { error: &error }); + emit(DecoderFramingError { error: &error }); Error::FramingError(error) })?; @@ -72,7 +72,7 @@ impl Decoder { } /// Parses a frame using the included deserializer, and handles any errors by logging. - pub fn deserializer_parse(&self, frame: Bytes) -> Result<(SmallVec<[Event; 1]>, usize), Error> { + pub fn deserializer_parse(&self, frame: Bytes) -> Result { let byte_size = frame.len(); // Parse structured events from the byte frame. @@ -80,14 +80,14 @@ impl Decoder { .parse(frame, self.log_namespace) .map(|events| (events, byte_size)) .map_err(|error| { - emit!(DecoderDeserializeError { error: &error }); + emit(DecoderDeserializeError { error: &error }); Error::ParsingError(error) }) } } impl tokio_util::codec::Decoder for Decoder { - type Item = (SmallVec<[Event; 1]>, usize); + type Item = DecodedFrame; type Error = Error; fn decode(&mut self, buf: &mut BytesMut) -> Result, Self::Error> { @@ -106,13 +106,13 @@ mod tests { use bytes::Bytes; use futures::{StreamExt, stream}; use tokio_util::{codec::FramedRead, io::StreamReader}; - use vector_lib::codecs::{ - JsonDeserializer, NewlineDelimitedDecoder, StreamDecodingError, - decoding::{Deserializer, Framer}, - }; use vrl::value::Value; use super::Decoder; + use crate::{ + JsonDeserializer, NewlineDelimitedDecoder, StreamDecodingError, + decoding::{Deserializer, Framer}, + }; #[tokio::test] async fn framed_read_recover_from_error() { diff --git a/lib/codecs/src/decoding/mod.rs b/lib/codecs/src/decoding/mod.rs index f27c30d93d501..c87337856454a 100644 --- a/lib/codecs/src/decoding/mod.rs +++ b/lib/codecs/src/decoding/mod.rs @@ -1,6 +1,8 @@ //! A collection of support structures that are used in the process of decoding //! bytes into events. +mod config; +mod decoder; mod error; pub mod format; pub mod framing; @@ -8,6 +10,8 @@ pub mod framing; use std::fmt::Debug; use bytes::{Bytes, BytesMut}; +pub use config::DecodingConfig; +pub use decoder::Decoder; pub use error::StreamDecodingError; pub use format::{ BoxedDeserializer, BytesDeserializer, BytesDeserializerConfig, GelfDeserializer, diff --git a/src/codecs/encoding/config.rs b/lib/codecs/src/encoding/config.rs similarity index 90% rename from src/codecs/encoding/config.rs rename to lib/codecs/src/encoding/config.rs index 47bec858ffb08..32d18c9e83d00 100644 --- a/src/codecs/encoding/config.rs +++ b/lib/codecs/src/encoding/config.rs @@ -1,14 +1,13 @@ -use crate::codecs::{Encoder, EncoderKind, Transformer}; -use vector_lib::{ - codecs::{ - CharacterDelimitedEncoder, LengthDelimitedEncoder, NewlineDelimitedEncoder, - encoding::{Framer, FramingConfig, Serializer, SerializerConfig}, - }, - configurable::configurable_component, +use vector_config::configurable_component; + +use super::{Encoder, EncoderKind, Transformer}; +use crate::encoding::{ + CharacterDelimitedEncoder, Framer, FramingConfig, LengthDelimitedEncoder, + NewlineDelimitedEncoder, Serializer, SerializerConfig, }; -#[cfg(feature = "codecs-opentelemetry")] -use vector_lib::codecs::BytesEncoder; +#[cfg(feature = "opentelemetry")] +use crate::encoding::BytesEncoder; /// Encoding configuration. #[configurable_component] @@ -43,7 +42,7 @@ impl EncodingConfig { } /// Build the `Serializer` for this config. - pub fn build(&self) -> crate::Result { + pub fn build(&self) -> vector_common::Result { self.encoding.build() } } @@ -100,7 +99,7 @@ impl EncodingConfigWithFraming { } /// Build the `Framer` and `Serializer` for this config. - pub fn build(&self, sink_type: SinkType) -> crate::Result<(Framer, Serializer)> { + pub fn build(&self, sink_type: SinkType) -> vector_common::Result<(Framer, Serializer)> { let framer = self.framing.as_ref().map(|framing| framing.build()); let serializer = self.encoding.build()?; @@ -132,9 +131,9 @@ impl EncodingConfigWithFraming { | Serializer::RawMessage(_) | Serializer::Text(_), ) => NewlineDelimitedEncoder::default().into(), - #[cfg(feature = "codecs-syslog")] + #[cfg(feature = "syslog")] (None, Serializer::Syslog(_)) => NewlineDelimitedEncoder::default().into(), - #[cfg(feature = "codecs-opentelemetry")] + #[cfg(feature = "opentelemetry")] (None, Serializer::Otlp(_)) => BytesEncoder.into(), }; @@ -142,7 +141,10 @@ impl EncodingConfigWithFraming { } /// Build the `Transformer` and `EncoderKind` for this config. - pub fn build_encoder(&self, sink_type: SinkType) -> crate::Result<(Transformer, EncoderKind)> { + pub fn build_encoder( + &self, + sink_type: SinkType, + ) -> vector_common::Result<(Transformer, EncoderKind)> { let (framer, serializer) = self.build(sink_type)?; let encoder = EncoderKind::Framed(Box::new(Encoder::::new(framer, serializer))); Ok((self.transformer(), encoder)) @@ -172,10 +174,10 @@ where #[cfg(test)] mod test { - use vector_lib::lookup::lookup_v2::{ConfigValuePath, parse_value_path}; + use lookup::lookup_v2::{ConfigValuePath, parse_value_path}; use super::*; - use crate::codecs::encoding::TimestampFormat; + use crate::encoding::TimestampFormat; #[test] fn deserialize_encoding_config() { diff --git a/src/codecs/encoding/encoder.rs b/lib/codecs/src/encoding/encoder.rs similarity index 86% rename from src/codecs/encoding/encoder.rs rename to lib/codecs/src/encoding/encoder.rs index a36b98a6496cb..4924dd05447b1 100644 --- a/src/codecs/encoding/encoder.rs +++ b/lib/codecs/src/encoding/encoder.rs @@ -1,14 +1,12 @@ use bytes::BytesMut; use tokio_util::codec::Encoder as _; -#[cfg(feature = "codecs-arrow")] -use vector_lib::codecs::encoding::ArrowStreamSerializer; -use vector_lib::codecs::{ - CharacterDelimitedEncoder, NewlineDelimitedEncoder, TextSerializerConfig, - encoding::{Error, Framer, Serializer}, -}; +use vector_common::internal_event::emit; +use vector_core::event::Event; +#[cfg(feature = "arrow")] +use crate::encoding::ArrowStreamSerializer; use crate::{ - event::Event, + encoding::{Error, Framer, Serializer}, internal_events::{EncoderFramingError, EncoderSerializeError}, }; @@ -16,7 +14,7 @@ use crate::{ #[derive(Debug, Clone)] pub enum BatchSerializer { /// Arrow IPC stream format serializer. - #[cfg(feature = "codecs-arrow")] + #[cfg(feature = "arrow")] Arrow(ArrowStreamSerializer), } @@ -38,7 +36,7 @@ impl BatchEncoder { } /// Get the HTTP content type. - #[cfg(feature = "codecs-arrow")] + #[cfg(feature = "arrow")] pub const fn content_type(&self) -> &'static str { match &self.serializer { BatchSerializer::Arrow(_) => "application/vnd.apache.arrow.stream", @@ -53,10 +51,10 @@ impl tokio_util::codec::Encoder> for BatchEncoder { fn encode(&mut self, events: Vec, buffer: &mut BytesMut) -> Result<(), Self::Error> { #[allow(unreachable_patterns)] match &mut self.serializer { - #[cfg(feature = "codecs-arrow")] + #[cfg(feature = "arrow")] BatchSerializer::Arrow(serializer) => { serializer.encode(events, buffer).map_err(|err| { - use vector_lib::codecs::encoding::ArrowEncodingError; + use crate::encoding::ArrowEncodingError; match err { ArrowEncodingError::NullConstraint { .. } => { Error::SchemaConstraintViolation(Box::new(err)) @@ -76,7 +74,7 @@ pub enum EncoderKind { /// Uses framing to encode individual events Framed(Box>), /// Encodes events in batches without framing - #[cfg(feature = "codecs-arrow")] + #[cfg(feature = "arrow")] Batch(BatchEncoder), } @@ -92,6 +90,8 @@ where impl Default for Encoder { fn default() -> Self { + use crate::encoding::{NewlineDelimitedEncoder, TextSerializerConfig}; + Self { framer: NewlineDelimitedEncoder::default().into(), serializer: TextSerializerConfig::default().build().into(), @@ -101,6 +101,8 @@ impl Default for Encoder { impl Default for Encoder<()> { fn default() -> Self { + use crate::encoding::TextSerializerConfig; + Self { framer: (), serializer: TextSerializerConfig::default().build().into(), @@ -127,7 +129,7 @@ where /// Serialize the event without applying framing, at the start of the provided buffer. fn serialize_at_start(&mut self, event: Event, buffer: &mut BytesMut) -> Result<(), Error> { self.serializer.encode(event, buffer).map_err(|error| { - emit!(EncoderSerializeError { error: &error }); + emit(EncoderSerializeError { error: &error }); Error::SerializingError(error) }) } @@ -155,7 +157,9 @@ impl Encoder { pub const fn batch_prefix(&self) -> &[u8] { match (&self.framer, &self.serializer) { ( - Framer::CharacterDelimited(CharacterDelimitedEncoder { delimiter: b',' }), + Framer::CharacterDelimited(crate::encoding::CharacterDelimitedEncoder { + delimiter: b',', + }), Serializer::Json(_) | Serializer::NativeJson(_), ) => b"[", _ => &[], @@ -166,7 +170,9 @@ impl Encoder { pub const fn batch_suffix(&self, empty: bool) -> &[u8] { match (&self.framer, &self.serializer, empty) { ( - Framer::CharacterDelimited(CharacterDelimitedEncoder { delimiter: b',' }), + Framer::CharacterDelimited(crate::encoding::CharacterDelimitedEncoder { + delimiter: b',', + }), Serializer::Json(_) | Serializer::NativeJson(_), _, ) => b"]", @@ -183,7 +189,9 @@ impl Encoder { } ( Serializer::Gelf(_) | Serializer::Json(_) | Serializer::NativeJson(_), - Framer::CharacterDelimited(CharacterDelimitedEncoder { delimiter: b',' }), + Framer::CharacterDelimited(crate::encoding::CharacterDelimitedEncoder { + delimiter: b',', + }), ) => "application/json", (Serializer::Native(_), _) | (Serializer::Protobuf(_), _) => "application/octet-stream", ( @@ -198,9 +206,9 @@ impl Encoder { | Serializer::Text(_), _, ) => "text/plain", - #[cfg(feature = "codecs-syslog")] + #[cfg(feature = "syslog")] (Serializer::Syslog(_), _) => "text/plain", - #[cfg(feature = "codecs-opentelemetry")] + #[cfg(feature = "opentelemetry")] (Serializer::Otlp(_), _) => "application/x-protobuf", } } @@ -233,7 +241,7 @@ impl tokio_util::codec::Encoder for Encoder { // Frame the serialized event. self.framer.encode((), &mut payload).map_err(|error| { - emit!(EncoderFramingError { error: &error }); + emit(EncoderFramingError { error: &error }); Error::FramingError(error) })?; @@ -261,11 +269,12 @@ impl tokio_util::codec::Encoder for Encoder<()> { #[cfg(test)] mod tests { use bytes::BufMut; - use futures_util::{SinkExt, StreamExt}; + use futures::{SinkExt, StreamExt}; use tokio_util::codec::FramedWrite; - use vector_lib::{codecs::encoding::BoxedFramingError, event::LogEvent}; + use vector_core::event::LogEvent; use super::*; + use crate::encoding::BoxedFramingError; #[derive(Debug, Clone)] struct ParenEncoder; @@ -325,7 +334,9 @@ mod tests { async fn test_encode_events_sink_empty() { let encoder = Encoder::::new( Framer::Boxed(Box::new(ParenEncoder::new())), - TextSerializerConfig::default().build().into(), + crate::encoding::TextSerializerConfig::default() + .build() + .into(), ); let source = futures::stream::iter(vec![ Event::Log(LogEvent::from("foo")), @@ -344,7 +355,9 @@ mod tests { async fn test_encode_events_sink_non_empty() { let encoder = Encoder::::new( Framer::Boxed(Box::new(ParenEncoder::new())), - TextSerializerConfig::default().build().into(), + crate::encoding::TextSerializerConfig::default() + .build() + .into(), ); let source = futures::stream::iter(vec![ Event::Log(LogEvent::from("bar")), @@ -363,7 +376,9 @@ mod tests { async fn test_encode_events_sink_empty_handle_framing_error() { let encoder = Encoder::::new( Framer::Boxed(Box::new(ErrorNthEncoder::new(ParenEncoder::new(), 1))), - TextSerializerConfig::default().build().into(), + crate::encoding::TextSerializerConfig::default() + .build() + .into(), ); let source = futures::stream::iter(vec![ Event::Log(LogEvent::from("foo")), @@ -383,7 +398,9 @@ mod tests { async fn test_encode_events_sink_non_empty_handle_framing_error() { let encoder = Encoder::::new( Framer::Boxed(Box::new(ErrorNthEncoder::new(ParenEncoder::new(), 1))), - TextSerializerConfig::default().build().into(), + crate::encoding::TextSerializerConfig::default() + .build() + .into(), ); let source = futures::stream::iter(vec![ Event::Log(LogEvent::from("bar")), @@ -402,8 +419,10 @@ mod tests { #[tokio::test] async fn test_encode_batch_newline() { let encoder = Encoder::::new( - Framer::NewlineDelimited(NewlineDelimitedEncoder::default()), - TextSerializerConfig::default().build().into(), + Framer::NewlineDelimited(crate::encoding::NewlineDelimitedEncoder::default()), + crate::encoding::TextSerializerConfig::default() + .build() + .into(), ); let source = futures::stream::iter(vec![ Event::Log(LogEvent::from("bar")), diff --git a/lib/codecs/src/encoding/mod.rs b/lib/codecs/src/encoding/mod.rs index 7d611790cb613..5e3885d2752e4 100644 --- a/lib/codecs/src/encoding/mod.rs +++ b/lib/codecs/src/encoding/mod.rs @@ -2,10 +2,15 @@ //! events into bytes. pub mod chunking; +mod config; +mod encoder; pub mod format; pub mod framing; pub mod serializer; +mod transformer; pub use chunking::{Chunker, Chunking, GelfChunker}; +pub use config::{EncodingConfig, EncodingConfigWithFraming, SinkType}; +pub use encoder::{BatchEncoder, BatchSerializer, Encoder, EncoderKind}; #[cfg(feature = "arrow")] pub use format::{ ArrowEncodingError, ArrowStreamSerializer, ArrowStreamSerializerConfig, SchemaProvider, @@ -33,6 +38,7 @@ pub use framing::{ #[cfg(feature = "arrow")] pub use serializer::BatchSerializerConfig; pub use serializer::{Serializer, SerializerConfig}; +pub use transformer::{TimestampFormat, Transformer}; /// An error that occurred while building an encoder. pub type BuildError = Box; diff --git a/src/codecs/encoding/transformer.rs b/lib/codecs/src/encoding/transformer.rs similarity index 95% rename from src/codecs/encoding/transformer.rs rename to lib/codecs/src/encoding/transformer.rs index 65304989af291..2fb84f00e6fd1 100644 --- a/src/codecs/encoding/transformer.rs +++ b/lib/codecs/src/encoding/transformer.rs @@ -1,21 +1,19 @@ #![deny(missing_docs)] -use core::fmt::Debug; use std::collections::BTreeMap; use chrono::{DateTime, Utc}; +use lookup::{PathPrefix, event_path, lookup_v2::ConfigValuePath}; use ordered_float::NotNan; use serde::{Deserialize, Deserializer}; -use vector_lib::{ - configurable::configurable_component, - event::{LogEvent, MaybeAsLogMut}, - lookup::{PathPrefix, event_path, lookup_v2::ConfigValuePath}, +use vector_config::configurable_component; +use vector_core::{ + event::{Event, LogEvent, MaybeAsLogMut}, schema::meaning, + serde::is_default, }; use vrl::{path::OwnedValuePath, value::Value}; -use crate::{event::Event, serde::is_default}; - /// Transformations to prepare an event for serialization. #[configurable_component(no_deser)] #[derive(Clone, Debug, Default, PartialEq, Eq)] @@ -72,7 +70,7 @@ impl Transformer { only_fields: Option>, except_fields: Option>, timestamp_format: Option, - ) -> Result { + ) -> vector_common::Result { Self::validate_fields(only_fields.as_ref(), except_fields.as_ref())?; Ok(Self { @@ -83,7 +81,7 @@ impl Transformer { } /// Get the `Transformer`'s `only_fields`. - #[cfg(test)] + #[cfg(any(test, feature = "test"))] pub const fn only_fields(&self) -> &Option> { &self.only_fields } @@ -104,7 +102,7 @@ impl Transformer { fn validate_fields( only_fields: Option<&Vec>, except_fields: Option<&Vec>, - ) -> crate::Result<()> { + ) -> vector_common::Result<()> { if let (Some(only_fields), Some(except_fields)) = (only_fields, except_fields) && except_fields .iter() @@ -188,7 +186,8 @@ impl Transformer { } } for (k, v) in unix_timestamps { - log.parse_path_and_insert(k, v).unwrap(); + log.parse_path_and_insert(k, v) + .expect("timestamp fields must allow insertion"); } } else { // root is not an object @@ -213,7 +212,8 @@ impl Transformer { ts.timestamp_nanos_opt().expect("Timestamp out of range") }), TimestampFormat::UnixFloat => self.format_timestamps(log, |ts| { - NotNan::new(ts.timestamp_micros() as f64 / 1e6).unwrap() + NotNan::new(ts.timestamp_micros() as f64 / 1e6) + .expect("this division will never produce a NaN") }), // RFC3339 is the default serialization of a timestamp. TimestampFormat::Rfc3339 => (), @@ -225,11 +225,11 @@ impl Transformer { /// /// Returns `Err` if the new `except_fields` fail validation, i.e. are not mutually exclusive /// with `only_fields`. - #[cfg(test)] + #[cfg(any(test, feature = "test"))] pub fn set_except_fields( &mut self, except_fields: Option>, - ) -> crate::Result<()> { + ) -> vector_common::Result<()> { Self::validate_fields(self.only_fields.as_ref(), except_fields.as_ref())?; self.except_fields = except_fields; Ok(()) @@ -265,15 +265,14 @@ mod tests { use std::{collections::BTreeMap, sync::Arc}; use indoc::indoc; - use vector_lib::{ - btreemap, + use lookup::path::parse_target_path; + use vector_core::{ config::{LogNamespace, log_schema}, - lookup::path::parse_target_path, + schema, }; - use vrl::value::Kind; + use vrl::{btreemap, value::Kind}; use super::*; - use crate::config::schema; #[test] fn serialize() { @@ -450,7 +449,7 @@ mod tests { Kind::object(btreemap! { "thing" => Kind::object(btreemap! { "service" => Kind::bytes(), - }) + }), }), [LogNamespace::Vector], ); @@ -490,7 +489,7 @@ mod tests { Kind::object(btreemap! { "thing" => Kind::object(btreemap! { "service" => Kind::bytes(), - }) + }), }), [LogNamespace::Vector], ); diff --git a/src/internal_events/codecs.rs b/lib/codecs/src/internal_events.rs similarity index 75% rename from src/internal_events/codecs.rs rename to lib/codecs/src/internal_events.rs index 27980af51b799..134fee16ecf8b 100644 --- a/src/internal_events/codecs.rs +++ b/lib/codecs/src/internal_events.rs @@ -1,11 +1,16 @@ +//! Internal events for codecs. + use metrics::counter; -use vector_lib::NamedInternalEvent; -use vector_lib::internal_event::{ - ComponentEventsDropped, InternalEvent, UNINTENTIONAL, error_stage, error_type, +use tracing::error; +use vector_common::internal_event::{ + ComponentEventsDropped, InternalEvent, UNINTENTIONAL, emit, error_stage, error_type, }; +use vector_common_macros::NamedInternalEvent; #[derive(Debug, NamedInternalEvent)] +/// Emitted when a decoder framing error occurs. pub struct DecoderFramingError { + /// The framing error that occurred. pub error: E, } @@ -29,8 +34,10 @@ impl InternalEvent for DecoderFramingError { } #[derive(Debug, NamedInternalEvent)] +/// Emitted when a decoder fails to deserialize a frame. pub struct DecoderDeserializeError<'a> { - pub error: &'a crate::Error, + /// The deserialize error that occurred. + pub error: &'a vector_common::Error, } impl InternalEvent for DecoderDeserializeError<'_> { @@ -53,8 +60,10 @@ impl InternalEvent for DecoderDeserializeError<'_> { } #[derive(Debug, NamedInternalEvent)] +/// Emitted when an encoder framing error occurs. pub struct EncoderFramingError<'a> { - pub error: &'a vector_lib::codecs::encoding::BoxedFramingError, + /// The framing error that occurred. + pub error: &'a crate::encoding::BoxedFramingError, } impl InternalEvent for EncoderFramingError<'_> { @@ -74,13 +83,15 @@ impl InternalEvent for EncoderFramingError<'_> { "stage" => error_stage::SENDING, ) .increment(1); - emit!(ComponentEventsDropped:: { count: 1, reason }); + emit(ComponentEventsDropped:: { count: 1, reason }); } } #[derive(Debug, NamedInternalEvent)] +/// Emitted when an encoder fails to serialize a frame. pub struct EncoderSerializeError<'a> { - pub error: &'a crate::Error, + /// The serialization error that occurred. + pub error: &'a vector_common::Error, } impl InternalEvent for EncoderSerializeError<'_> { @@ -100,16 +111,19 @@ impl InternalEvent for EncoderSerializeError<'_> { "stage" => error_stage::SENDING, ) .increment(1); - emit!(ComponentEventsDropped:: { + emit(ComponentEventsDropped:: { count: 1, - reason: SERIALIZE_REASON + reason: SERIALIZE_REASON, }); } } #[derive(Debug, NamedInternalEvent)] +/// Emitted when writing encoded bytes fails. pub struct EncoderWriteError<'a, E> { + /// The write error that occurred. pub error: &'a E, + /// The number of events dropped by the failed write. pub count: usize, } @@ -129,7 +143,7 @@ impl InternalEvent for EncoderWriteError<'_, E> { ) .increment(1); if self.count > 0 { - emit!(ComponentEventsDropped:: { + emit(ComponentEventsDropped:: { count: self.count, reason, }); @@ -137,13 +151,15 @@ impl InternalEvent for EncoderWriteError<'_, E> { } } -#[cfg(feature = "codecs-arrow")] +#[cfg(feature = "arrow")] #[derive(Debug, NamedInternalEvent)] +/// Emitted when encoding violates a schema constraint. pub struct EncoderNullConstraintError<'a> { - pub error: &'a crate::Error, + /// The schema constraint error that occurred. + pub error: &'a vector_common::Error, } -#[cfg(feature = "codecs-arrow")] +#[cfg(feature = "arrow")] impl InternalEvent for EncoderNullConstraintError<'_> { fn emit(self) { const CONSTRAINT_REASON: &str = "Schema constraint violation."; @@ -161,9 +177,9 @@ impl InternalEvent for EncoderNullConstraintError<'_> { "stage" => error_stage::SENDING, ) .increment(1); - emit!(ComponentEventsDropped:: { + emit(ComponentEventsDropped:: { count: 1, - reason: CONSTRAINT_REASON + reason: CONSTRAINT_REASON, }); } } diff --git a/lib/codecs/src/lib.rs b/lib/codecs/src/lib.rs index e386bb30f378a..c88eb531dc1a7 100644 --- a/lib/codecs/src/lib.rs +++ b/lib/codecs/src/lib.rs @@ -8,27 +8,33 @@ mod common; pub mod decoding; pub mod encoding; pub mod gelf; +pub mod internal_events; +mod ready_frames; pub use decoding::{ BytesDecoder, BytesDecoderConfig, BytesDeserializer, BytesDeserializerConfig, - CharacterDelimitedDecoder, CharacterDelimitedDecoderConfig, GelfDeserializer, - GelfDeserializerConfig, JsonDeserializer, JsonDeserializerConfig, LengthDelimitedDecoder, - LengthDelimitedDecoderConfig, NativeDeserializer, NativeDeserializerConfig, - NativeJsonDeserializer, NativeJsonDeserializerConfig, NewlineDelimitedDecoder, - NewlineDelimitedDecoderConfig, OctetCountingDecoder, OctetCountingDecoderConfig, - StreamDecodingError, VarintLengthDelimitedDecoder, VarintLengthDelimitedDecoderConfig, + CharacterDelimitedDecoder, CharacterDelimitedDecoderConfig, Decoder, DecodingConfig, + GelfDeserializer, GelfDeserializerConfig, JsonDeserializer, JsonDeserializerConfig, + LengthDelimitedDecoder, LengthDelimitedDecoderConfig, NativeDeserializer, + NativeDeserializerConfig, NativeJsonDeserializer, NativeJsonDeserializerConfig, + NewlineDelimitedDecoder, NewlineDelimitedDecoderConfig, OctetCountingDecoder, + OctetCountingDecoderConfig, StreamDecodingError, VarintLengthDelimitedDecoder, + VarintLengthDelimitedDecoderConfig, }; #[cfg(feature = "syslog")] pub use decoding::{SyslogDeserializer, SyslogDeserializerConfig}; pub use encoding::{ - BytesEncoder, BytesEncoderConfig, CharacterDelimitedEncoder, CharacterDelimitedEncoderConfig, - CsvSerializer, CsvSerializerConfig, GelfSerializer, GelfSerializerConfig, JsonSerializer, - JsonSerializerConfig, LengthDelimitedEncoder, LengthDelimitedEncoderConfig, LogfmtSerializer, - LogfmtSerializerConfig, NativeJsonSerializer, NativeJsonSerializerConfig, NativeSerializer, - NativeSerializerConfig, NewlineDelimitedEncoder, NewlineDelimitedEncoderConfig, - RawMessageSerializer, RawMessageSerializerConfig, TextSerializer, TextSerializerConfig, + BatchEncoder, BatchSerializer, BytesEncoder, BytesEncoderConfig, CharacterDelimitedEncoder, + CharacterDelimitedEncoderConfig, CsvSerializer, CsvSerializerConfig, Encoder, EncoderKind, + EncodingConfig, EncodingConfigWithFraming, GelfSerializer, GelfSerializerConfig, + JsonSerializer, JsonSerializerConfig, LengthDelimitedEncoder, LengthDelimitedEncoderConfig, + LogfmtSerializer, LogfmtSerializerConfig, NativeJsonSerializer, NativeJsonSerializerConfig, + NativeSerializer, NativeSerializerConfig, NewlineDelimitedEncoder, + NewlineDelimitedEncoderConfig, RawMessageSerializer, RawMessageSerializerConfig, SinkType, + TextSerializer, TextSerializerConfig, TimestampFormat, Transformer, }; pub use gelf::{VALID_FIELD_REGEX, gelf_fields}; +pub use ready_frames::ReadyFrames; use vector_config_macros::configurable_component; /// The user configuration to choose the metric tag strategy. diff --git a/src/codecs/ready_frames.rs b/lib/codecs/src/ready_frames.rs similarity index 100% rename from src/codecs/ready_frames.rs rename to lib/codecs/src/ready_frames.rs diff --git a/lib/dnstap-parser/src/vrl_functions/parse_dnstap.rs b/lib/dnstap-parser/src/vrl_functions/parse_dnstap.rs index b32fdcfeb3106..f61ef30c8f28a 100644 --- a/lib/dnstap-parser/src/vrl_functions/parse_dnstap.rs +++ b/lib/dnstap-parser/src/vrl_functions/parse_dnstap.rs @@ -13,6 +13,10 @@ impl Function for ParseDnstap { "parse_dnstap" } + fn usage(&self) -> &'static str { + "Parses the `value` as base64 encoded DNSTAP data." + } + fn parameters(&self) -> &'static [Parameter] { &[ Parameter { diff --git a/lib/enrichment/Cargo.toml b/lib/enrichment/Cargo.toml index 087ab7f60bfc0..1d2a67a2c26b6 100644 --- a/lib/enrichment/Cargo.toml +++ b/lib/enrichment/Cargo.toml @@ -8,5 +8,7 @@ publish = false [dependencies] arc-swap.workspace = true chrono.workspace = true +const-str.workspace = true dyn-clone = { version = "1.0.20", default-features = false } +indoc.workspace = true vrl.workspace = true diff --git a/lib/enrichment/src/find_enrichment_table_records.rs b/lib/enrichment/src/find_enrichment_table_records.rs index 369eb5d21140b..1b016aa901a14 100644 --- a/lib/enrichment/src/find_enrichment_table_records.rs +++ b/lib/enrichment/src/find_enrichment_table_records.rs @@ -51,6 +51,13 @@ impl Function for FindEnrichmentTableRecords { "find_enrichment_table_records" } + fn usage(&self) -> &'static str { + const_str::concat!( + "Searches an [enrichment table](/docs/reference/glossary/#enrichment-tables) for rows that match the provided condition.\n\n", + super::ENRICHMENT_TABLE_EXPLAINER + ) + } + fn parameters(&self) -> &'static [Parameter] { &[ Parameter { diff --git a/lib/enrichment/src/get_enrichment_table_record.rs b/lib/enrichment/src/get_enrichment_table_record.rs index efdbc92542a19..ef2103702f8ca 100644 --- a/lib/enrichment/src/get_enrichment_table_record.rs +++ b/lib/enrichment/src/get_enrichment_table_record.rs @@ -48,6 +48,14 @@ impl Function for GetEnrichmentTableRecord { "get_enrichment_table_record" } + fn usage(&self) -> &'static str { + const USAGE: &str = const_str::concat!( + "Searches an [enrichment table](/docs/reference/glossary/#enrichment-tables) for a row that matches the provided condition. A single row must be matched. If no rows are found or more than one row is found, an error is returned.\n\n", + super::ENRICHMENT_TABLE_EXPLAINER + ); + USAGE + } + fn parameters(&self) -> &'static [Parameter] { &[ Parameter { diff --git a/lib/enrichment/src/lib.rs b/lib/enrichment/src/lib.rs index e69cf979a1dc0..a73f18f163181 100644 --- a/lib/enrichment/src/lib.rs +++ b/lib/enrichment/src/lib.rs @@ -9,6 +9,7 @@ mod test_util; mod vrl_util; use dyn_clone::DynClone; +use indoc::indoc; pub use tables::{TableRegistry, TableSearch}; use vrl::{ compiler::Function, @@ -97,3 +98,59 @@ pub fn vrl_functions() -> Vec> { Box::new(find_enrichment_table_records::FindEnrichmentTableRecords) as _, ] } + +pub(crate) const ENRICHMENT_TABLE_EXPLAINER: &str = indoc! {r#" + For `file` enrichment tables, this condition needs to be a VRL object in which + the key-value pairs indicate a field to search mapped to a value to search in that field. + This function returns the rows that match the provided condition(s). _All_ fields need to + match for rows to be returned; if any fields do not match, then no rows are returned. + + There are currently three forms of search criteria: + + 1. **Exact match search**. The given field must match the value exactly. Case sensitivity + can be specified using the `case_sensitive` argument. An exact match search can use an + index directly into the dataset, which should make this search fairly "cheap" from a + performance perspective. + + 2. **Wildcard match search**. The given fields specified by the exact match search may also + be matched exactly to the value provided to the `wildcard` parameter. + A wildcard match search can also use an index directly into the dataset. + + 3. **Date range search**. The given field must be greater than or equal to the `from` date + and/or less than or equal to the `to` date. A date range search involves + sequentially scanning through the rows that have been located using any exact match + criteria. This can be an expensive operation if there are many rows returned by any exact + match criteria. Therefore, use date ranges as the _only_ criteria when the enrichment + data set is very small. + + For `geoip` and `mmdb` enrichment tables, this condition needs to be a VRL object with a single key-value pair + whose value needs to be a valid IP address. Example: `{"ip": .ip }`. If a return field is expected + and without a value, `null` is used. This table can return the following fields: + + * ISP databases: + * `autonomous_system_number` + * `autonomous_system_organization` + * `isp` + * `organization` + + * City databases: + * `city_name` + * `continent_code` + * `country_code` + * `country_name` + * `region_code` + * `region_name` + * `metro_code` + * `latitude` + * `longitude` + * `postal_code` + * `timezone` + + * Connection-Type databases: + * `connection_type` + + To use this function, you need to update your configuration to + include an + [`enrichment_tables`](/docs/reference/configuration/global-options/#enrichment_tables) + parameter. +"#}; diff --git a/lib/vector-buffers/src/internal_events.rs b/lib/vector-buffers/src/internal_events.rs index e207b8e0fab74..6d2735c7bb75f 100644 --- a/lib/vector-buffers/src/internal_events.rs +++ b/lib/vector-buffers/src/internal_events.rs @@ -18,19 +18,34 @@ pub struct BufferCreated { impl InternalEvent for BufferCreated { #[expect(clippy::cast_precision_loss)] fn emit(self) { + let stage = self.idx.to_string(); if self.max_size_events != 0 { + gauge!( + "buffer_max_size_events", + "buffer_id" => self.buffer_id.clone(), + "stage" => stage.clone(), + ) + .set(self.max_size_events as f64); + // DEPRECATED: buffer-bytes-events-metrics gauge!( "buffer_max_event_size", "buffer_id" => self.buffer_id.clone(), - "stage" => self.idx.to_string(), + "stage" => stage.clone(), ) .set(self.max_size_events as f64); } if self.max_size_bytes != 0 { + gauge!( + "buffer_max_size_bytes", + "buffer_id" => self.buffer_id.clone(), + "stage" => stage.clone(), + ) + .set(self.max_size_bytes as f64); + // DEPRECATED: buffer-bytes-events-metrics gauge!( "buffer_max_byte_size", "buffer_id" => self.buffer_id, - "stage" => self.idx.to_string(), + "stage" => stage, ) .set(self.max_size_bytes as f64); } @@ -63,12 +78,26 @@ impl InternalEvent for BufferEventsReceived { "stage" => self.idx.to_string() ) .increment(self.byte_size); + // DEPRECATED: buffer-bytes-events-metrics gauge!( "buffer_events", "buffer_id" => self.buffer_id.clone(), "stage" => self.idx.to_string() ) .set(self.total_count as f64); + gauge!( + "buffer_size_events", + "buffer_id" => self.buffer_id.clone(), + "stage" => self.idx.to_string() + ) + .set(self.total_count as f64); + gauge!( + "buffer_size_bytes", + "buffer_id" => self.buffer_id.clone(), + "stage" => self.idx.to_string() + ) + .set(self.total_byte_size as f64); + // DEPRECATED: buffer-bytes-events-metrics gauge!( "buffer_byte_size", "buffer_id" => self.buffer_id, @@ -103,12 +132,26 @@ impl InternalEvent for BufferEventsSent { "stage" => self.idx.to_string() ) .increment(self.byte_size); + // DEPRECATED: buffer-bytes-events-metrics gauge!( "buffer_events", "buffer_id" => self.buffer_id.clone(), "stage" => self.idx.to_string() ) .set(self.total_count as f64); + gauge!( + "buffer_size_events", + "buffer_id" => self.buffer_id.clone(), + "stage" => self.idx.to_string() + ) + .set(self.total_count as f64); + gauge!( + "buffer_size_bytes", + "buffer_id" => self.buffer_id.clone(), + "stage" => self.idx.to_string() + ) + .set(self.total_byte_size as f64); + // DEPRECATED: buffer-bytes-events-metrics gauge!( "buffer_byte_size", "buffer_id" => self.buffer_id, @@ -170,12 +213,26 @@ impl InternalEvent for BufferEventsDropped { "intentional" => intentional_str, ) .increment(self.byte_size); + // DEPRECATED: buffer-bytes-events-metrics gauge!( "buffer_events", "buffer_id" => self.buffer_id.clone(), "stage" => self.idx.to_string() ) .set(self.total_count as f64); + gauge!( + "buffer_size_events", + "buffer_id" => self.buffer_id.clone(), + "stage" => self.idx.to_string() + ) + .set(self.total_count as f64); + gauge!( + "buffer_size_bytes", + "buffer_id" => self.buffer_id.clone(), + "stage" => self.idx.to_string() + ) + .set(self.total_byte_size as f64); + // DEPRECATED: buffer-bytes-events-metrics gauge!( "buffer_byte_size", "buffer_id" => self.buffer_id, diff --git a/lib/vector-buffers/src/lib.rs b/lib/vector-buffers/src/lib.rs index d9c6373831c50..50daa4f19de1c 100644 --- a/lib/vector-buffers/src/lib.rs +++ b/lib/vector-buffers/src/lib.rs @@ -108,6 +108,13 @@ pub trait Bufferable: InMemoryBufferable + Encodable {} // Blanket implementation for anything that is already bufferable. impl Bufferable for T where T: InMemoryBufferable + Encodable {} +/// Hook for observing items as they are sent into a `BufferSender`. +pub trait BufferInstrumentation: Send + Sync + 'static { + /// Called immediately before the item is emitted to the underlying buffer. + /// The underlying type is stored in an `Arc`, so we cannot have `&mut self`. + fn on_send(&self, item: &T); +} + pub trait EventCount { fn event_count(&self) -> usize; } diff --git a/lib/vector-buffers/src/topology/builder.rs b/lib/vector-buffers/src/topology/builder.rs index c4a42c30fab1f..0f54759c3bc65 100644 --- a/lib/vector-buffers/src/topology/builder.rs +++ b/lib/vector-buffers/src/topology/builder.rs @@ -191,12 +191,13 @@ impl TopologyBuilder { when_full: WhenFull, receiver_span: &Span, metadata: Option, + ewma_alpha: Option, ) -> (BufferSender, BufferReceiver) { let usage_handle = BufferUsageHandle::noop(); usage_handle.set_buffer_limits(None, Some(max_events.get())); let limit = MemoryBufferSize::MaxEvents(max_events); - let (sender, receiver) = limited(limit, metadata); + let (sender, receiver) = limited(limit, metadata, ewma_alpha); let mode = match when_full { WhenFull::Overflow => WhenFull::Block, @@ -232,7 +233,7 @@ impl TopologyBuilder { usage_handle.set_buffer_limits(None, Some(max_events.get())); let limit = MemoryBufferSize::MaxEvents(max_events); - let (sender, receiver) = limited(limit, metadata); + let (sender, receiver) = limited(limit, metadata, None); let mode = match when_full { WhenFull::Overflow => WhenFull::Block, diff --git a/lib/vector-buffers/src/topology/channel/limited_queue.rs b/lib/vector-buffers/src/topology/channel/limited_queue.rs index 54264e06afe60..739f7cca9cdaa 100644 --- a/lib/vector-buffers/src/topology/channel/limited_queue.rs +++ b/lib/vector-buffers/src/topology/channel/limited_queue.rs @@ -16,6 +16,7 @@ use crossbeam_queue::{ArrayQueue, SegQueue}; use futures::Stream; use metrics::{Gauge, Histogram, gauge, histogram}; use tokio::sync::{Notify, OwnedSemaphorePermit, Semaphore, TryAcquireError}; +use vector_common::stats::EwmaGauge; use crate::{InMemoryBufferable, config::MemoryBufferSize}; @@ -108,45 +109,72 @@ impl ChannelMetricMetadata { struct Metrics { histogram: Histogram, gauge: Gauge, + mean_gauge: EwmaGauge, // We hold a handle to the max gauge to avoid it being dropped by the metrics collector, but // since the value is static, we never need to update it. The compiler detects this as an unused // field, so we need to suppress the warning here. #[expect(dead_code)] max_gauge: Gauge, + #[expect(dead_code)] + legacy_max_gauge: Gauge, #[cfg(test)] recorded_values: Arc>>, } impl Metrics { #[expect(clippy::cast_precision_loss)] // We have to convert buffer sizes for a gauge, it's okay to lose precision here. - fn new(limit: MemoryBufferSize, metadata: ChannelMetricMetadata) -> Self { + fn new( + limit: MemoryBufferSize, + metadata: ChannelMetricMetadata, + ewma_alpha: Option, + ) -> Self { let ChannelMetricMetadata { prefix, output } = metadata; - let (gauge_suffix, max_value) = match limit { - MemoryBufferSize::MaxEvents(max_events) => ("_max_event_size", max_events.get() as f64), - MemoryBufferSize::MaxSize(max_bytes) => ("_max_byte_size", max_bytes.get() as f64), + let (legacy_suffix, gauge_suffix, max_value) = match limit { + MemoryBufferSize::MaxEvents(max_events) => ( + "_max_event_size", + "_max_size_events", + max_events.get() as f64, + ), + MemoryBufferSize::MaxSize(max_bytes) => { + ("_max_byte_size", "_max_size_bytes", max_bytes.get() as f64) + } }; let max_gauge_name = format!("{prefix}{gauge_suffix}"); + let legacy_max_gauge_name = format!("{prefix}{legacy_suffix}"); let histogram_name = format!("{prefix}_utilization"); let gauge_name = format!("{prefix}_utilization_level"); + let mean_name = format!("{prefix}_utilization_mean"); #[cfg(test)] let recorded_values = Arc::new(Mutex::new(Vec::new())); if let Some(label_value) = output { let max_gauge = gauge!(max_gauge_name, "output" => label_value.clone()); max_gauge.set(max_value); + let mean_gauge_handle = gauge!(mean_name, "output" => label_value.clone()); + // DEPRECATED: buffer-bytes-events-metrics + let legacy_max_gauge = gauge!(legacy_max_gauge_name, "output" => label_value.clone()); + legacy_max_gauge.set(max_value); Self { histogram: histogram!(histogram_name, "output" => label_value.clone()), gauge: gauge!(gauge_name, "output" => label_value.clone()), + mean_gauge: EwmaGauge::new(mean_gauge_handle, ewma_alpha), max_gauge, + legacy_max_gauge, #[cfg(test)] recorded_values, } } else { let max_gauge = gauge!(max_gauge_name); max_gauge.set(max_value); + let mean_gauge_handle = gauge!(mean_name); + // DEPRECATED: buffer-bytes-events-metrics + let legacy_max_gauge = gauge!(legacy_max_gauge_name); + legacy_max_gauge.set(max_value); Self { histogram: histogram!(histogram_name), gauge: gauge!(gauge_name), + mean_gauge: EwmaGauge::new(mean_gauge_handle, ewma_alpha), max_gauge, + legacy_max_gauge, #[cfg(test)] recorded_values, } @@ -157,6 +185,7 @@ impl Metrics { fn record(&self, value: usize) { self.histogram.record(value as f64); self.gauge.set(value as f64); + self.mean_gauge.record(value as f64); #[cfg(test)] if let Ok(mut recorded) = self.recorded_values.lock() { recorded.push(value); @@ -186,9 +215,13 @@ impl Clone for Inner { } impl Inner { - fn new(limit: MemoryBufferSize, metric_metadata: Option) -> Self { + fn new( + limit: MemoryBufferSize, + metric_metadata: Option, + ewma_alpha: Option, + ) -> Self { let read_waker = Arc::new(Notify::new()); - let metrics = metric_metadata.map(|metadata| Metrics::new(limit, metadata)); + let metrics = metric_metadata.map(|metadata| Metrics::new(limit, metadata, ewma_alpha)); match limit { MemoryBufferSize::MaxEvents(max_events) => Inner { data: Arc::new(ArrayQueue::new(max_events.get())), @@ -381,8 +414,9 @@ impl Drop for LimitedReceiver { pub fn limited( limit: MemoryBufferSize, metric_metadata: Option, + ewma_alpha: Option, ) -> (LimitedSender, LimitedReceiver) { - let inner = Inner::new(limit, metric_metadata); + let inner = Inner::new(limit, metric_metadata, ewma_alpha); let sender = LimitedSender { inner: inner.clone(), @@ -410,7 +444,7 @@ mod tests { #[tokio::test] async fn send_receive() { let limit = MemoryBufferSize::MaxEvents(NonZeroUsize::new(2).unwrap()); - let (mut tx, mut rx) = limited(limit, None); + let (mut tx, mut rx) = limited(limit, None, None); assert_eq!(2, tx.available_capacity()); @@ -442,6 +476,7 @@ mod tests { let (mut tx, mut rx) = limited( limit, Some(ChannelMetricMetadata::new("test_channel", None)), + None, ); let metrics = tx.inner.metrics.as_ref().unwrap().recorded_values.clone(); @@ -461,7 +496,7 @@ mod tests { // With this configuration a maximum of exactly 10 messages can fit in the channel let limit = MemoryBufferSize::MaxSize(NonZeroUsize::new(max_allowed_bytes).unwrap()); - let (mut tx, mut rx) = limited(limit, None); + let (mut tx, mut rx) = limited(limit, None, None); assert_eq!(max_allowed_bytes, tx.available_capacity()); @@ -495,7 +530,7 @@ mod tests { #[test] fn sender_waits_for_more_capacity_when_none_available() { let limit = MemoryBufferSize::MaxEvents(NonZeroUsize::new(1).unwrap()); - let (mut tx, mut rx) = limited(limit, None); + let (mut tx, mut rx) = limited(limit, None, None); assert_eq!(1, tx.available_capacity()); @@ -557,7 +592,7 @@ mod tests { #[test] fn sender_waits_for_more_capacity_when_partial_available() { let limit = MemoryBufferSize::MaxEvents(NonZeroUsize::new(7).unwrap()); - let (mut tx, mut rx) = limited(limit, None); + let (mut tx, mut rx) = limited(limit, None, None); assert_eq!(7, tx.available_capacity()); @@ -646,7 +681,7 @@ mod tests { #[test] fn empty_receiver_returns_none_when_last_sender_drops() { let limit = MemoryBufferSize::MaxEvents(NonZeroUsize::new(1).unwrap()); - let (mut tx, mut rx) = limited(limit, None); + let (mut tx, mut rx) = limited(limit, None, None); assert_eq!(1, tx.available_capacity()); @@ -689,7 +724,7 @@ mod tests { #[test] fn receiver_returns_none_once_empty_when_last_sender_drops() { let limit = MemoryBufferSize::MaxEvents(NonZeroUsize::new(1).unwrap()); - let (tx, mut rx) = limited::(limit, None); + let (tx, mut rx) = limited::(limit, None, None); assert_eq!(1, tx.available_capacity()); @@ -719,7 +754,7 @@ mod tests { #[test] fn oversized_send_allowed_when_empty() { let limit = MemoryBufferSize::MaxEvents(NonZeroUsize::new(1).unwrap()); - let (mut tx, mut rx) = limited(limit, None); + let (mut tx, mut rx) = limited(limit, None, None); assert_eq!(1, tx.available_capacity()); @@ -752,7 +787,7 @@ mod tests { #[test] fn oversized_send_allowed_when_partial_capacity() { let limit = MemoryBufferSize::MaxEvents(NonZeroUsize::new(2).unwrap()); - let (mut tx, mut rx) = limited(limit, None); + let (mut tx, mut rx) = limited(limit, None, None); assert_eq!(2, tx.available_capacity()); diff --git a/lib/vector-buffers/src/topology/channel/mod.rs b/lib/vector-buffers/src/topology/channel/mod.rs index 303abee288cc4..300e79d88c872 100644 --- a/lib/vector-buffers/src/topology/channel/mod.rs +++ b/lib/vector-buffers/src/topology/channel/mod.rs @@ -7,6 +7,7 @@ pub use limited_queue::{ }; pub use receiver::*; pub use sender::*; +pub use vector_common::stats::DEFAULT_EWMA_ALPHA; #[cfg(test)] mod tests; diff --git a/lib/vector-buffers/src/topology/channel/sender.rs b/lib/vector-buffers/src/topology/channel/sender.rs index e68b868e042e7..0a7116f8ceea9 100644 --- a/lib/vector-buffers/src/topology/channel/sender.rs +++ b/lib/vector-buffers/src/topology/channel/sender.rs @@ -8,7 +8,7 @@ use vector_common::internal_event::{InternalEventHandle, Registered, register}; use super::limited_queue::LimitedSender; use crate::{ - Bufferable, WhenFull, + BufferInstrumentation, Bufferable, WhenFull, buffer_usage_data::BufferUsageHandle, internal_events::BufferSendDuration, variants::disk_v2::{self, ProductionFilesystem}, @@ -134,9 +134,11 @@ pub struct BufferSender { base: SenderAdapter, overflow: Option>>, when_full: WhenFull, - instrumentation: Option, + usage_instrumentation: Option, #[derivative(Debug = "ignore")] send_duration: Option>, + #[derivative(Debug = "ignore")] + custom_instrumentation: Option>>, } impl BufferSender { @@ -146,8 +148,9 @@ impl BufferSender { base, overflow: None, when_full, - instrumentation: None, + usage_instrumentation: None, send_duration: None, + custom_instrumentation: None, } } @@ -157,8 +160,9 @@ impl BufferSender { base, overflow: Some(Box::new(overflow)), when_full: WhenFull::Overflow, - instrumentation: None, + usage_instrumentation: None, send_duration: None, + custom_instrumentation: None, } } @@ -174,7 +178,7 @@ impl BufferSender { /// Configures this sender to instrument the items passing through it. pub fn with_usage_instrumentation(&mut self, handle: BufferUsageHandle) { - self.instrumentation = Some(handle); + self.usage_instrumentation = Some(handle); } /// Configures this sender to instrument the send duration. @@ -182,6 +186,11 @@ impl BufferSender { let _enter = span.enter(); self.send_duration = Some(register(BufferSendDuration { stage })); } + + /// Configures this sender to invoke a custom instrumentation hook. + pub fn with_custom_instrumentation(&mut self, instrumentation: impl BufferInstrumentation) { + self.custom_instrumentation = Some(Arc::new(instrumentation)); + } } impl BufferSender { @@ -197,14 +206,17 @@ impl BufferSender { #[async_recursion] pub async fn send(&mut self, item: T, send_reference: Option) -> crate::Result<()> { + if let Some(instrumentation) = self.custom_instrumentation.as_ref() { + instrumentation.on_send(&item); + } let item_sizing = self - .instrumentation + .usage_instrumentation .as_ref() .map(|_| (item.event_count(), item.size_of())); let mut was_dropped = false; - if let Some(instrumentation) = self.instrumentation.as_ref() + if let Some(instrumentation) = self.usage_instrumentation.as_ref() && let Some((item_count, item_size)) = item_sizing { instrumentation @@ -229,7 +241,7 @@ impl BufferSender { } } - if let Some(instrumentation) = self.instrumentation.as_ref() + if let Some(instrumentation) = self.usage_instrumentation.as_ref() && let Some((item_count, item_size)) = item_sizing && was_dropped { diff --git a/lib/vector-buffers/src/variants/in_memory.rs b/lib/vector-buffers/src/variants/in_memory.rs index f6bbe87c15b29..93937a591b133 100644 --- a/lib/vector-buffers/src/variants/in_memory.rs +++ b/lib/vector-buffers/src/variants/in_memory.rs @@ -45,7 +45,7 @@ where usage_handle.set_buffer_limits(max_bytes, max_size); - let (tx, rx) = limited(self.capacity, None); + let (tx, rx) = limited(self.capacity, None, None); Ok((tx.into(), rx.into())) } } diff --git a/lib/vector-common-macros/src/internal_event.rs b/lib/vector-common-macros/src/internal_event.rs index aed9e71bfa9a0..58972625d64f2 100644 --- a/lib/vector-common-macros/src/internal_event.rs +++ b/lib/vector-common-macros/src/internal_event.rs @@ -27,7 +27,8 @@ pub fn derive_impl_named_internal_event(item: TokenStream) -> TokenStream { let pkg_name = std::env::var("CARGO_PKG_NAME").unwrap_or_default(); let internal_event_path = if pkg_name == "vector-common" { quote! { crate::internal_event } - } else if pkg_name.starts_with("vector-") || pkg_name == "dnstap-parser" { + } else if pkg_name.starts_with("vector-") || pkg_name == "codecs" || pkg_name == "dnstap-parser" + { // Most vector-* crates depend on vector-common but not vector-lib quote! { ::vector_common::internal_event } } else { diff --git a/lib/vector-common/src/atomic.rs b/lib/vector-common/src/atomic.rs new file mode 100644 index 0000000000000..a2d342771819a --- /dev/null +++ b/lib/vector-common/src/atomic.rs @@ -0,0 +1,49 @@ +use std::sync::atomic::{AtomicU64, Ordering}; + +use metrics::GaugeFn; + +/// Simple atomic wrapper for `f64` values. +#[derive(Debug)] +pub struct AtomicF64(AtomicU64); + +impl AtomicF64 { + /// Creates a new `AtomicF64` with the given initial value. + #[must_use] + pub fn new(init: f64) -> Self { + Self(AtomicU64::new(init.to_bits())) + } + + pub fn load(&self, order: Ordering) -> f64 { + f64::from_bits(self.0.load(order)) + } + + #[expect(clippy::missing_panics_doc, reason = "fetch_update always succeeds")] + pub fn fetch_update( + &self, + set_order: Ordering, + fetch_order: Ordering, + mut f: impl FnMut(f64) -> f64, + ) -> f64 { + f64::from_bits( + self.0 + .fetch_update(set_order, fetch_order, |x| { + Some(f(f64::from_bits(x)).to_bits()) + }) + .expect("fetch_update always succeeds"), + ) + } +} + +impl GaugeFn for AtomicF64 { + fn increment(&self, amount: f64) { + self.fetch_update(Ordering::Relaxed, Ordering::Relaxed, |value| value + amount); + } + + fn decrement(&self, amount: f64) { + self.fetch_update(Ordering::Relaxed, Ordering::Relaxed, |value| value - amount); + } + + fn set(&self, value: f64) { + self.0.store(f64::to_bits(value), Ordering::Relaxed); + } +} diff --git a/lib/vector-common/src/lib.rs b/lib/vector-common/src/lib.rs index 8216f67f7a2a9..099a068578090 100644 --- a/lib/vector-common/src/lib.rs +++ b/lib/vector-common/src/lib.rs @@ -59,6 +59,8 @@ pub mod shutdown; #[cfg(feature = "sensitive_string")] pub mod sensitive_string; +pub mod atomic; +pub mod stats; pub mod trigger; #[macro_use] diff --git a/lib/vector-common/src/stats/ewma_gauge.rs b/lib/vector-common/src/stats/ewma_gauge.rs new file mode 100644 index 0000000000000..ebd635cad1f78 --- /dev/null +++ b/lib/vector-common/src/stats/ewma_gauge.rs @@ -0,0 +1,32 @@ +use std::sync::Arc; + +use metrics::Gauge; + +use super::AtomicEwma; + +/// The default alpha parameter used when constructing EWMA-backed gauges. +pub const DEFAULT_EWMA_ALPHA: f64 = 0.9; + +/// Couples a [`Gauge`] with an [`AtomicEwma`] so gauge readings reflect the EWMA. +#[derive(Clone, Debug)] +pub struct EwmaGauge { + gauge: Gauge, + // Note that the `Gauge` internally is equivalent to an `Arc` so we need to use the + // same semantics for the EWMA calculation as well. + ewma: Arc, +} + +impl EwmaGauge { + #[must_use] + pub fn new(gauge: Gauge, alpha: Option) -> Self { + let alpha = alpha.unwrap_or(DEFAULT_EWMA_ALPHA); + let ewma = Arc::new(AtomicEwma::new(alpha)); + Self { gauge, ewma } + } + + /// Records a new value, updates the EWMA, and sets the gauge accordingly. + pub fn record(&self, value: f64) { + let average = self.ewma.update(value); + self.gauge.set(average); + } +} diff --git a/src/stats.rs b/lib/vector-common/src/stats/mod.rs similarity index 69% rename from src/stats.rs rename to lib/vector-common/src/stats/mod.rs index 9bcd3253515a9..5c6c5bccc0240 100644 --- a/src/stats.rs +++ b/lib/vector-common/src/stats/mod.rs @@ -1,4 +1,13 @@ #![allow(missing_docs)] + +pub mod ewma_gauge; + +pub use ewma_gauge::{DEFAULT_EWMA_ALPHA, EwmaGauge}; + +use std::sync::atomic::Ordering; + +use crate::atomic::AtomicF64; + /// Exponentially Weighted Moving Average #[derive(Clone, Copy, Debug)] pub struct Ewma { @@ -7,11 +16,13 @@ pub struct Ewma { } impl Ewma { + #[must_use] pub const fn new(alpha: f64) -> Self { let average = None; Self { average, alpha } } + #[must_use] pub const fn average(&self) -> Option { self.average } @@ -35,6 +46,7 @@ pub struct EwmaDefault { } impl EwmaDefault { + #[must_use] pub const fn new(alpha: f64, initial_value: f64) -> Self { Self { average: initial_value, @@ -42,6 +54,7 @@ impl EwmaDefault { } } + #[must_use] pub const fn average(&self) -> f64 { self.average } @@ -67,21 +80,25 @@ pub struct MeanVariance { } impl EwmaVar { + #[must_use] pub const fn new(alpha: f64) -> Self { let state = None; Self { state, alpha } } + #[must_use] pub const fn state(&self) -> Option { self.state } #[cfg(test)] + #[must_use] pub fn average(&self) -> Option { self.state.map(|state| state.mean) } #[cfg(test)] + #[must_use] pub fn variance(&self) -> Option { self.state.map(|state| state.variance) } @@ -114,11 +131,16 @@ pub struct Mean { impl Mean { /// Update the and return the current average + #[expect( + clippy::cast_precision_loss, + reason = "We have to convert count to f64 for the calculation, it's okay to lose precision for very large counts." + )] pub fn update(&mut self, point: f64) { self.count += 1; self.mean += (point - self.mean) / self.count as f64; } + #[must_use] pub const fn average(&self) -> Option { match self.count { 0 => None, @@ -127,6 +149,43 @@ impl Mean { } } +/// Atomic EWMA that uses an `AtomicF64` to store the current average. +#[derive(Debug)] +pub struct AtomicEwma { + average: AtomicF64, + alpha: f64, +} + +impl AtomicEwma { + #[must_use] + pub fn new(alpha: f64) -> Self { + Self { + average: AtomicF64::new(f64::NAN), + alpha, + } + } + + pub fn update(&self, point: f64) -> f64 { + let mut result = f64::NAN; + self.average + .fetch_update(Ordering::Relaxed, Ordering::Relaxed, |current| { + let average = if current.is_nan() { + point + } else { + point.mul_add(self.alpha, current * (1.0 - self.alpha)) + }; + result = average; + average + }); + result + } + + pub fn average(&self) -> Option { + let value = self.average.load(Ordering::Relaxed); + if value.is_nan() { None } else { Some(value) } + } +} + #[cfg(test)] mod tests { use super::*; @@ -144,16 +203,17 @@ mod tests { } #[test] + #[expect(clippy::float_cmp, reason = "none of the values will be rounded")] fn ewma_update_works() { let mut mean = Ewma::new(0.5); assert_eq!(mean.average(), None); - mean.update(2.0); + assert_eq!(mean.update(2.0), 2.0); assert_eq!(mean.average(), Some(2.0)); - mean.update(2.0); + assert_eq!(mean.update(2.0), 2.0); assert_eq!(mean.average(), Some(2.0)); - mean.update(1.0); + assert_eq!(mean.update(1.0), 1.5); assert_eq!(mean.average(), Some(1.5)); - mean.update(2.0); + assert_eq!(mean.update(2.0), 1.75); assert_eq!(mean.average(), Some(1.75)); assert_eq!(mean.average, Some(1.75)); @@ -185,4 +245,15 @@ mod tests { }) ); } + + #[test] + #[expect(clippy::float_cmp, reason = "none of the values will be rounded")] + fn atomic_ewma_update_works() { + let ewma = AtomicEwma::new(0.5); + assert_eq!(ewma.average(), None); + assert_eq!(ewma.update(2.0), 2.0); + assert_eq!(ewma.average(), Some(2.0)); + assert_eq!(ewma.update(1.0), 1.5); + assert_eq!(ewma.average(), Some(1.5)); + } } diff --git a/lib/vector-core/src/config/global_options.rs b/lib/vector-core/src/config/global_options.rs index e789e8ae973ab..0c034c473e5e9 100644 --- a/lib/vector-core/src/config/global_options.rs +++ b/lib/vector-core/src/config/global_options.rs @@ -140,6 +140,32 @@ pub struct GlobalOptions { #[serde(skip_serializing_if = "crate::serde::is_default")] pub expire_metrics_per_metric_set: Option>, + /// The alpha value for the exponential weighted moving average (EWMA) of source and transform + /// buffer utilization metrics. + /// + /// This controls how quickly the `*_buffer_utilization_mean` gauges respond to new + /// observations. Values closer to 1.0 retain more of the previous value, leading to slower + /// adjustments. The default value of 0.9 is equivalent to a "half life" of 6-7 measurements. + /// + /// Must be between 0 and 1 exclusively (0 < alpha < 1). + #[serde(default, skip_serializing_if = "crate::serde::is_default")] + #[configurable(validation(range(min = 0.0, max = 1.0)))] + #[configurable(metadata(docs::advanced))] + pub buffer_utilization_ewma_alpha: Option, + + /// The alpha value for the exponential weighted moving average (EWMA) of transform processing + /// time metrics. + /// + /// This controls how quickly the `event_processing_time_mean_seconds` gauge responds to new + /// observations. Values closer to 1.0 retain more of the previous value, leading to slower + /// adjustments. The default value of 0.9 is equivalent to a "half life" of 6-7 measurements. + /// + /// Must be between 0 and 1 exclusively (0 < alpha < 1). + #[serde(default, skip_serializing_if = "crate::serde::is_default")] + #[configurable(validation(range(min = 0.0, max = 1.0)))] + #[configurable(metadata(docs::advanced))] + pub processing_time_ewma_alpha: Option, + /// The interval, in seconds, at which the internal metrics cache for VRL is refreshed. /// This must be set to be able to access metrics in VRL functions. /// @@ -295,6 +321,12 @@ impl GlobalOptions { expire_metrics: self.expire_metrics.or(with.expire_metrics), expire_metrics_secs: self.expire_metrics_secs.or(with.expire_metrics_secs), expire_metrics_per_metric_set: merged_expire_metrics_per_metric_set, + buffer_utilization_ewma_alpha: self + .buffer_utilization_ewma_alpha + .or(with.buffer_utilization_ewma_alpha), + processing_time_ewma_alpha: self + .processing_time_ewma_alpha + .or(with.processing_time_ewma_alpha), metrics_storage_refresh_period: self .metrics_storage_refresh_period .or(with.metrics_storage_refresh_period), diff --git a/lib/vector-core/src/config/mod.rs b/lib/vector-core/src/config/mod.rs index c86848d7b0be5..9969fb670eebf 100644 --- a/lib/vector-core/src/config/mod.rs +++ b/lib/vector-core/src/config/mod.rs @@ -503,6 +503,7 @@ impl LogNamespace { path!("source_type"), Bytes::from_static(source_name.as_bytes()), ); + log.metadata_mut().set_ingest_timestamp(now); self.insert_vector_metadata( log, log_schema().timestamp_key(), diff --git a/lib/vector-core/src/event/metadata.rs b/lib/vector-core/src/event/metadata.rs index f860b03bb207b..8123cf59ecfc4 100644 --- a/lib/vector-core/src/event/metadata.rs +++ b/lib/vector-core/src/event/metadata.rs @@ -2,6 +2,7 @@ use std::{borrow::Cow, collections::BTreeMap, fmt, sync::Arc}; +use chrono::{DateTime, Utc}; use derivative::Derivative; use lookup::OwnedTargetPath; use serde::{Deserialize, Serialize}; @@ -78,6 +79,11 @@ pub(super) struct Inner { /// An internal vector id that can be used to identify this event across all components. #[derivative(PartialEq = "ignore")] pub(crate) source_event_id: Option, + + /// The timestamp when the event was ingested into Vector. + #[derivative(PartialEq = "ignore")] + #[serde(default, skip)] + pub(crate) ingest_timestamp: Option>, } /// Metric Origin metadata for submission to Datadog. @@ -239,6 +245,17 @@ impl EventMetadata { pub fn source_event_id(&self) -> Option { self.0.source_event_id } + + /// Returns the ingest timestamp, if it exists. + #[must_use] + pub fn ingest_timestamp(&self) -> Option> { + self.0.ingest_timestamp + } + + /// Sets the ingest timestamp to the provided value. + pub fn set_ingest_timestamp(&mut self, timestamp: DateTime) { + self.get_mut().ingest_timestamp = Some(timestamp); + } } impl Default for Inner { @@ -254,6 +271,7 @@ impl Default for Inner { dropped_fields: ObjectMap::new(), datadog_origin_metadata: None, source_event_id: Some(Uuid::new_v4()), + ingest_timestamp: None, } } } diff --git a/lib/vector-core/src/event/metric/mod.rs b/lib/vector-core/src/event/metric/mod.rs index 884fbbdc1d81e..5e956f8042c02 100644 --- a/lib/vector-core/src/event/metric/mod.rs +++ b/lib/vector-core/src/event/metric/mod.rs @@ -896,6 +896,33 @@ mod test { assert!(!new_reset_histogram.subtract(&old_histogram)); } + #[test] + fn subtract_aggregated_histograms_bucket_redistribution() { + // Test for issue #24415: when total count is higher but individual bucket counts is sometimes lower + let old_histogram = Metric::new( + "histogram", + MetricKind::Absolute, + MetricValue::AggregatedHistogram { + count: 15, + sum: 15.0, + buckets: buckets!(1.0 => 10, 2.0 => 5), + }, + ); + + let mut new_histogram_with_redistribution = Metric::new( + "histogram", + MetricKind::Absolute, + MetricValue::AggregatedHistogram { + count: 20, + sum: 20.0, + // Total count is higher (20 > 15), but bucket1 count is lower (8 < 10) + buckets: buckets!(1.0 => 8, 2.0 => 12), + }, + ); + + assert!(!new_histogram_with_redistribution.subtract(&old_histogram)); + } + #[test] // `too_many_lines` is mostly just useful for production code but we're not // able to flag the lint on only for non-test. diff --git a/lib/vector-core/src/event/metric/value.rs b/lib/vector-core/src/event/metric/value.rs index ce21e80bbec24..7e301ae7943ce 100644 --- a/lib/vector-core/src/event/metric/value.rs +++ b/lib/vector-core/src/event/metric/value.rs @@ -327,6 +327,11 @@ impl MetricValue { // fewer values -- would not make sense, since buckets should never be able to have negative counts... and // it's not clear that a saturating subtraction is technically correct either. Instead, we avoid having to // make that decision, and simply force the metric to be reinitialized. + // + // We also check that each individual bucket count is >= the corresponding count in the + // other histogram, since bucket value redistribution (e.g., after a source restart or + // cache eviction) can cause individual buckets to have lower counts even when the total + // count is higher. Failing here leads to the metric being reinitialized. ( Self::AggregatedHistogram { buckets, @@ -343,7 +348,7 @@ impl MetricValue { && buckets .iter() .zip(buckets2.iter()) - .all(|(b1, b2)| b1.upper_limit == b2.upper_limit) => + .all(|(b1, b2)| b1.upper_limit == b2.upper_limit && b1.count >= b2.count) => { for (b1, b2) in buckets.iter_mut().zip(buckets2) { b1.count -= b2.count; diff --git a/lib/vector-core/src/event/proto.rs b/lib/vector-core/src/event/proto.rs index 6a4796906d287..8ba73a9ae7540 100644 --- a/lib/vector-core/src/event/proto.rs +++ b/lib/vector-core/src/event/proto.rs @@ -688,6 +688,7 @@ impl From for EventMetadata { dropped_fields: ObjectMap::new(), datadog_origin_metadata, source_event_id, + ingest_timestamp: None, })) } } diff --git a/lib/vector-core/src/event/ref.rs b/lib/vector-core/src/event/ref.rs index 3c1b6d1953055..345a9ccaef880 100644 --- a/lib/vector-core/src/event/ref.rs +++ b/lib/vector-core/src/event/ref.rs @@ -64,6 +64,15 @@ impl<'a> EventRef<'a> { _ => panic!("Failed type coercion, {self:?} is not a metric reference"), } } + + /// Access the metadata for the event under this reference. + pub fn metadata(&self) -> &EventMetadata { + match self { + Self::Log(log) => log.metadata(), + Self::Metric(metric) => metric.metadata(), + Self::Trace(trace) => trace.metadata(), + } + } } impl<'a> From<&'a Event> for EventRef<'a> { diff --git a/lib/vector-core/src/fanout.rs b/lib/vector-core/src/fanout.rs index d8ec7e57a11ad..0fd5cb034b703 100644 --- a/lib/vector-core/src/fanout.rs +++ b/lib/vector-core/src/fanout.rs @@ -490,6 +490,7 @@ mod tests { WhenFull::Block, &Span::current(), None, + None, ) } diff --git a/lib/vector-core/src/metrics/recency.rs b/lib/vector-core/src/metrics/recency.rs index dcb237d6206c5..e63de81c9b1e3 100644 --- a/lib/vector-core/src/metrics/recency.rs +++ b/lib/vector-core/src/metrics/recency.rs @@ -63,8 +63,9 @@ use metrics_util::{ }; use parking_lot::Mutex; use quanta::{Clock, Instant}; +use vector_common::atomic::AtomicF64; -use super::storage::{AtomicF64, Histogram}; +use super::storage::Histogram; /// The generation of a metric. /// diff --git a/lib/vector-core/src/metrics/storage.rs b/lib/vector-core/src/metrics/storage.rs index b102849a6f2a9..86cf82c1ef7ea 100644 --- a/lib/vector-core/src/metrics/storage.rs +++ b/lib/vector-core/src/metrics/storage.rs @@ -3,8 +3,9 @@ use std::sync::{ atomic::{AtomicU32, Ordering}, }; -use metrics::{GaugeFn, HistogramFn, atomics::AtomicU64}; +use metrics::{HistogramFn, atomics::AtomicU64}; use metrics_util::registry::Storage; +use vector_common::atomic::AtomicF64; use crate::event::{MetricValue, metric::Bucket}; @@ -28,61 +29,17 @@ impl Storage for VectorStorage { } } -#[derive(Debug)] -pub(super) struct AtomicF64 { - inner: AtomicU64, -} - -impl AtomicF64 { - fn new(init: f64) -> Self { - Self { - inner: AtomicU64::new(init.to_bits()), - } - } - - fn fetch_update( - &self, - set_order: Ordering, - fetch_order: Ordering, - mut f: impl FnMut(f64) -> f64, - ) { - self.inner - .fetch_update(set_order, fetch_order, |x| { - Some(f(f64::from_bits(x)).to_bits()) - }) - .expect("Cannot fail"); - } - - pub(super) fn load(&self, order: Ordering) -> f64 { - f64::from_bits(self.inner.load(order)) - } -} - -impl GaugeFn for AtomicF64 { - fn increment(&self, amount: f64) { - self.fetch_update(Ordering::Relaxed, Ordering::Relaxed, |value| value + amount); - } - - fn decrement(&self, amount: f64) { - self.fetch_update(Ordering::Relaxed, Ordering::Relaxed, |value| value - amount); - } - - fn set(&self, value: f64) { - self.inner.store(f64::to_bits(value), Ordering::Relaxed); - } -} - #[derive(Debug)] pub(super) struct Histogram { - buckets: Box<[(f64, AtomicU32); 20]>, + buckets: Box<[(f64, AtomicU32); 26]>, count: AtomicU64, sum: AtomicF64, } impl Histogram { - const MIN_BUCKET: f64 = 0.015_625; // (-6_f64).exp2() is not const yet - const MIN_BUCKET_EXP: f64 = -6.0; - const BUCKETS: usize = 20; + const MIN_BUCKET: f64 = 1.0 / (1 << 12) as f64; // f64::powi() is not const yet + const MIN_BUCKET_EXP: f64 = -12.0; + const BUCKETS: usize = 26; pub(crate) fn new() -> Self { // Box to avoid having this large array inline to the structure, blowing @@ -95,25 +52,31 @@ impl Histogram { // long-tail. This also lets us find the right bucket to record into using simple // constant-time math operations instead of a loop-and-compare construct. let buckets = Box::new([ - ((-6_f64).exp2(), AtomicU32::new(0)), - ((-5_f64).exp2(), AtomicU32::new(0)), - ((-4_f64).exp2(), AtomicU32::new(0)), - ((-3_f64).exp2(), AtomicU32::new(0)), - ((-2_f64).exp2(), AtomicU32::new(0)), - ((-1_f64).exp2(), AtomicU32::new(0)), - (0_f64.exp2(), AtomicU32::new(0)), - (1_f64.exp2(), AtomicU32::new(0)), - (2_f64.exp2(), AtomicU32::new(0)), - (3_f64.exp2(), AtomicU32::new(0)), - (4_f64.exp2(), AtomicU32::new(0)), - (5_f64.exp2(), AtomicU32::new(0)), - (6_f64.exp2(), AtomicU32::new(0)), - (7_f64.exp2(), AtomicU32::new(0)), - (8_f64.exp2(), AtomicU32::new(0)), - (9_f64.exp2(), AtomicU32::new(0)), - (10_f64.exp2(), AtomicU32::new(0)), - (11_f64.exp2(), AtomicU32::new(0)), - (12_f64.exp2(), AtomicU32::new(0)), + (2.0f64.powi(-12), AtomicU32::new(0)), + (2.0f64.powi(-11), AtomicU32::new(0)), + (2.0f64.powi(-10), AtomicU32::new(0)), + (2.0f64.powi(-9), AtomicU32::new(0)), + (2.0f64.powi(-8), AtomicU32::new(0)), + (2.0f64.powi(-7), AtomicU32::new(0)), + (2.0f64.powi(-6), AtomicU32::new(0)), + (2.0f64.powi(-5), AtomicU32::new(0)), + (2.0f64.powi(-4), AtomicU32::new(0)), + (2.0f64.powi(-3), AtomicU32::new(0)), + (2.0f64.powi(-2), AtomicU32::new(0)), + (2.0f64.powi(-1), AtomicU32::new(0)), + (2.0f64.powi(0), AtomicU32::new(0)), + (2.0f64.powi(1), AtomicU32::new(0)), + (2.0f64.powi(2), AtomicU32::new(0)), + (2.0f64.powi(3), AtomicU32::new(0)), + (2.0f64.powi(4), AtomicU32::new(0)), + (2.0f64.powi(5), AtomicU32::new(0)), + (2.0f64.powi(6), AtomicU32::new(0)), + (2.0f64.powi(7), AtomicU32::new(0)), + (2.0f64.powi(8), AtomicU32::new(0)), + (2.0f64.powi(9), AtomicU32::new(0)), + (2.0f64.powi(10), AtomicU32::new(0)), + (2.0f64.powi(11), AtomicU32::new(0)), + (2.0f64.powi(12), AtomicU32::new(0)), (f64::INFINITY, AtomicU32::new(0)), ]); Self { diff --git a/lib/vector-core/src/source_sender/builder.rs b/lib/vector-core/src/source_sender/builder.rs index 452d890f0d863..1992dc531ee9a 100644 --- a/lib/vector-core/src/source_sender/builder.rs +++ b/lib/vector-core/src/source_sender/builder.rs @@ -13,6 +13,7 @@ pub struct Builder { named_outputs: HashMap, lag_time: Option, timeout: Option, + ewma_alpha: Option, } impl Default for Builder { @@ -23,6 +24,7 @@ impl Default for Builder { named_outputs: Default::default(), lag_time: Some(histogram!(LAG_TIME_NAME)), timeout: None, + ewma_alpha: None, } } } @@ -40,6 +42,12 @@ impl Builder { self } + #[must_use] + pub fn with_ewma_alpha(mut self, alpha: Option) -> Self { + self.ewma_alpha = alpha; + self + } + pub fn add_source_output( &mut self, output: SourceOutput, @@ -60,6 +68,7 @@ impl Builder { log_definition, output_id, self.timeout, + self.ewma_alpha, ); self.default_output = Some(output); rx @@ -72,6 +81,7 @@ impl Builder { log_definition, output_id, self.timeout, + self.ewma_alpha, ); self.named_outputs.insert(name, output); rx diff --git a/lib/vector-core/src/source_sender/output.rs b/lib/vector-core/src/source_sender/output.rs index 5e470c0231d94..e06c80776af7d 100644 --- a/lib/vector-core/src/source_sender/output.rs +++ b/lib/vector-core/src/source_sender/output.rs @@ -5,7 +5,7 @@ use std::{ time::{Duration, Instant}, }; -use chrono::Utc; +use chrono::{DateTime, Utc}; use futures::{Stream, StreamExt as _}; use metrics::Histogram; use tracing::Span; @@ -115,10 +115,11 @@ impl Output { log_definition: Option>, output_id: OutputId, timeout: Option, + ewma_alpha: Option, ) -> (Self, LimitedReceiver) { let limit = MemoryBufferSize::MaxEvents(NonZeroUsize::new(n).unwrap()); let metrics = ChannelMetricMetadata::new(UTILIZATION_METRIC_PREFIX, Some(output.clone())); - let (tx, rx) = channel::limited(limit, Some(metrics)); + let (tx, rx) = channel::limited(limit, Some(metrics), ewma_alpha); ( Self { sender: tx, @@ -134,13 +135,25 @@ impl Output { ) } + /// Set the ingest timestamp for any events that don't already have one. + fn ensure_ingest_timestamp(events: &mut EventArray, now: DateTime) { + events.iter_events_mut().for_each(|mut event| { + if event.metadata().ingest_timestamp().is_none() { + event.metadata_mut().set_ingest_timestamp(now); + } + }); + } + pub(super) async fn send( &mut self, mut events: EventArray, unsent_event_count: &mut UnsentEventCount, ) -> Result<(), SendError> { + let now = Utc::now(); + Self::ensure_ingest_timestamp(&mut events, now); + let send_reference = Instant::now(); - let reference = Utc::now().timestamp_millis(); + let reference = now.timestamp_millis(); events .iter_events() .for_each(|event| self.emit_lag_time(event, reference)); diff --git a/lib/vector-core/src/source_sender/sender.rs b/lib/vector-core/src/source_sender/sender.rs index 8bbf09404e755..88cb50172c70a 100644 --- a/lib/vector-core/src/source_sender/sender.rs +++ b/lib/vector-core/src/source_sender/sender.rs @@ -119,6 +119,7 @@ impl SourceSender { None, output_id, timeout, + None, ); ( Self { @@ -192,7 +193,7 @@ impl SourceSender { port: Some(name.clone()), }; let (output, recv) = - Output::new_with_buffer(100, name.clone(), None, None, output_id, None); + Output::new_with_buffer(100, name.clone(), None, None, output_id, None, None); let recv = recv.into_stream().map(move |mut item| { item.events.iter_events_mut().for_each(|mut event| { let metadata = event.metadata_mut(); diff --git a/lib/vector-core/src/source_sender/tests.rs b/lib/vector-core/src/source_sender/tests.rs index 66e4169af7077..3c3ffac73bb4a 100644 --- a/lib/vector-core/src/source_sender/tests.rs +++ b/lib/vector-core/src/source_sender/tests.rs @@ -266,7 +266,7 @@ async fn emits_buffer_utilization_histogram_on_send_and_receive() { .into_iter() .filter(|metric| metric.name().starts_with("source_buffer_")) .collect(); - assert_eq!(metrics.len(), 3, "expected 3 utilization metrics"); + assert_eq!(metrics.len(), 5, "expected 5 utilization metrics"); let find_metric = |name: &str| { metrics @@ -290,4 +290,10 @@ async fn emits_buffer_utilization_histogram_on_send_and_receive() { panic!("source_buffer_max_event_size should be a gauge"); }; assert_eq!(*value, buffer_size as f64); + + let metric = find_metric("source_buffer_max_size_events"); + let MetricValue::Gauge { value } = metric.value() else { + panic!("source_buffer_max_size_events should be a gauge"); + }; + assert_eq!(*value, buffer_size as f64); } diff --git a/lib/vector-lib/Cargo.toml b/lib/vector-lib/Cargo.toml index c72af97fdaa62..6c46f70a019bd 100644 --- a/lib/vector-lib/Cargo.toml +++ b/lib/vector-lib/Cargo.toml @@ -34,5 +34,5 @@ opentelemetry = ["dep:opentelemetry-proto", "codecs/opentelemetry"] prometheus = ["dep:prometheus-parser"] proptest = ["vector-lookup/proptest", "vrl/proptest"] syslog = ["codecs/syslog"] -test = ["vector-core/test"] +test = ["codecs/test", "vector-core/test"] vrl = ["vector-core/vrl", "dep:vrl"] diff --git a/lib/vector-lib/src/lib.rs b/lib/vector-lib/src/lib.rs index 4672148bacdf1..3400a03c2e4cf 100644 --- a/lib/vector-lib/src/lib.rs +++ b/lib/vector-lib/src/lib.rs @@ -10,10 +10,10 @@ pub use vector_buffers as buffers; #[cfg(feature = "test")] pub use vector_common::event_test_util; pub use vector_common::{ - Error, NamedInternalEvent, Result, TimeZone, assert_event_data_eq, btreemap, byte_size_of, - byte_size_of::ByteSizeOf, conversion, encode_logfmt, finalization, finalizer, id, + Error, NamedInternalEvent, Result, TimeZone, assert_event_data_eq, atomic, btreemap, + byte_size_of, byte_size_of::ByteSizeOf, conversion, encode_logfmt, finalization, finalizer, id, impl_event_data_eq, internal_event, json_size, registered_event, request_metadata, - sensitive_string, shutdown, trigger, + sensitive_string, shutdown, stats, trigger, }; pub use vector_config as configurable; pub use vector_config::impl_generate_config_from_default; diff --git a/lib/vector-tap/src/controller.rs b/lib/vector-tap/src/controller.rs index f6e53b07691b3..dba9a7ec7b885 100644 --- a/lib/vector-tap/src/controller.rs +++ b/lib/vector-tap/src/controller.rs @@ -361,6 +361,7 @@ async fn tap_handler( WhenFull::DropNewest, &Span::current(), None, + None, ); let mut tap_transformer = TapTransformer::new(tx.clone(), output.clone()); diff --git a/lib/vector-top/Cargo.toml b/lib/vector-top/Cargo.toml index 815c0a4f858a1..4a746f1d48b5a 100644 --- a/lib/vector-top/Cargo.toml +++ b/lib/vector-top/Cargo.toml @@ -20,7 +20,7 @@ humantime = { version = "2.2.0", default-features = false } crossterm = { version = "0.29.0", default-features = false, features = ["event-stream", "windows"] } unit-prefix = { version = "0.5.2", default-features = false, features = ["std"] } num-format = { version = "0.4.4", default-features = false, features = ["with-num-bigint"] } -ratatui = { version = "0.29.0", default-features = false, features = ["crossterm"] } +ratatui = { version = "0.30.0", default-features = false, features = ["crossterm", "layout-cache"] } vector-common = { path = "../vector-common" } vector-api-client = { path = "../vector-api-client" } diff --git a/lib/vector-vrl-metrics/Cargo.toml b/lib/vector-vrl-metrics/Cargo.toml index f0a17f8b76e32..6f3bbb7eaa84b 100644 --- a/lib/vector-vrl-metrics/Cargo.toml +++ b/lib/vector-vrl-metrics/Cargo.toml @@ -8,6 +8,7 @@ license = "MPL-2.0" [dependencies] arc-swap.workspace = true +const-str.workspace = true vrl.workspace = true vector-core = { path = "../vector-core", default-features = false, features = ["vrl"] } vector-common = { path = "../vector-common", default-features = false } diff --git a/lib/vector-vrl-metrics/src/aggregate_vector_metrics.rs b/lib/vector-vrl-metrics/src/aggregate_vector_metrics.rs index 529e1df2b7824..d51c132a9f22a 100644 --- a/lib/vector-vrl-metrics/src/aggregate_vector_metrics.rs +++ b/lib/vector-vrl-metrics/src/aggregate_vector_metrics.rs @@ -47,6 +47,13 @@ impl Function for AggregateVectorMetrics { "aggregate_vector_metrics" } + fn usage(&self) -> &'static str { + const_str::concat!( + "Aggregates internal Vector metrics, using one of 4 aggregation functions, filtering by name and optionally by tags. Returns the aggregated value. Only includes counter and gauge metrics.\n\n", + crate::VECTOR_METRICS_EXPLAINER + ) + } + fn parameters(&self) -> &'static [Parameter] { &[ Parameter { diff --git a/lib/vector-vrl-metrics/src/find_vector_metrics.rs b/lib/vector-vrl-metrics/src/find_vector_metrics.rs index 98017a0fcc905..5ca5535a0179a 100644 --- a/lib/vector-vrl-metrics/src/find_vector_metrics.rs +++ b/lib/vector-vrl-metrics/src/find_vector_metrics.rs @@ -30,6 +30,13 @@ impl Function for FindVectorMetrics { "find_vector_metrics" } + fn usage(&self) -> &'static str { + const_str::concat!( + "Searches internal Vector metrics by name and optionally by tags. Returns all matching metrics.\n\n", + crate::VECTOR_METRICS_EXPLAINER + ) + } + fn parameters(&self) -> &'static [Parameter] { &[ Parameter { diff --git a/lib/vector-vrl-metrics/src/get_vector_metric.rs b/lib/vector-vrl-metrics/src/get_vector_metric.rs index eb669a4ac537c..5f24705a9c65c 100644 --- a/lib/vector-vrl-metrics/src/get_vector_metric.rs +++ b/lib/vector-vrl-metrics/src/get_vector_metric.rs @@ -27,6 +27,13 @@ impl Function for GetVectorMetric { "get_vector_metric" } + fn usage(&self) -> &'static str { + const_str::concat!( + "Searches internal Vector metrics by name and optionally by tags. Returns the first matching metric.\n\n", + crate::VECTOR_METRICS_EXPLAINER + ) + } + fn parameters(&self) -> &'static [Parameter] { &[ Parameter { diff --git a/lib/vector-vrl-metrics/src/lib.rs b/lib/vector-vrl-metrics/src/lib.rs index f3246159f5fbb..6df6e762cf110 100644 --- a/lib/vector-vrl-metrics/src/lib.rs +++ b/lib/vector-vrl-metrics/src/lib.rs @@ -8,6 +8,13 @@ mod find_vector_metrics; mod get_vector_metric; pub use common::MetricsStorage; +pub(crate) const VECTOR_METRICS_EXPLAINER: &str = "\ +Internal Vector metrics functions work with a snapshot of the metrics. The interval at which \ +the snapshot is updated is controlled through the \ +`metrics_storage_refresh_period` (/docs/reference/configuration/global-options/#metrics_storage_refresh_period) \ +global option. Higher values can reduce performance impact of that process, but may cause \ +stale metrics data in the snapshot."; + pub fn all() -> Vec> { vec![ Box::new(get_vector_metric::GetVectorMetric) as _, diff --git a/lib/vector-vrl/functions/Cargo.toml b/lib/vector-vrl/functions/Cargo.toml index 5b08d1bf5f40e..4e630b9515e92 100644 --- a/lib/vector-vrl/functions/Cargo.toml +++ b/lib/vector-vrl/functions/Cargo.toml @@ -7,6 +7,7 @@ publish = false license = "MPL-2.0" [dependencies] +indoc.workspace = true vrl.workspace = true enrichment = { path = "../../enrichment" } dnstap-parser = { path = "../../dnstap-parser", optional = true } diff --git a/lib/vector-vrl/functions/src/get_secret.rs b/lib/vector-vrl/functions/src/get_secret.rs index a1fbfc593ba70..b643d34152e3c 100644 --- a/lib/vector-vrl/functions/src/get_secret.rs +++ b/lib/vector-vrl/functions/src/get_secret.rs @@ -17,6 +17,10 @@ impl Function for GetSecret { "get_secret" } + fn usage(&self) -> &'static str { + "Returns the value of the given secret from an event." + } + fn parameters(&self) -> &'static [Parameter] { &[Parameter { keyword: "key", diff --git a/lib/vector-vrl/functions/src/remove_secret.rs b/lib/vector-vrl/functions/src/remove_secret.rs index 4788c7cedfb48..5bdc75153b551 100644 --- a/lib/vector-vrl/functions/src/remove_secret.rs +++ b/lib/vector-vrl/functions/src/remove_secret.rs @@ -14,6 +14,10 @@ impl Function for RemoveSecret { "remove_secret" } + fn usage(&self) -> &'static str { + "Removes a secret from an event." + } + fn parameters(&self) -> &'static [Parameter] { &[Parameter { keyword: "key", diff --git a/lib/vector-vrl/functions/src/set_secret.rs b/lib/vector-vrl/functions/src/set_secret.rs index e6ba1e310c31a..5a128b448c825 100644 --- a/lib/vector-vrl/functions/src/set_secret.rs +++ b/lib/vector-vrl/functions/src/set_secret.rs @@ -21,6 +21,10 @@ impl Function for SetSecret { "set_secret" } + fn usage(&self) -> &'static str { + "Sets the given secret in the event." + } + fn parameters(&self) -> &'static [Parameter] { &[ Parameter { diff --git a/lib/vector-vrl/functions/src/set_semantic_meaning.rs b/lib/vector-vrl/functions/src/set_semantic_meaning.rs index 14d45acbdd519..19dc0be342b06 100644 --- a/lib/vector-vrl/functions/src/set_semantic_meaning.rs +++ b/lib/vector-vrl/functions/src/set_semantic_meaning.rs @@ -9,6 +9,8 @@ use vrl::{ prelude::*, }; +use indoc::indoc; + #[derive(Debug, Default, Clone)] pub struct MeaningList(pub BTreeMap); @@ -34,6 +36,16 @@ impl Function for SetSemanticMeaning { "set_semantic_meaning" } + fn usage(&self) -> &'static str { + indoc! {" + Sets a semantic meaning for an event. **Note**: This function assigns + meaning at startup, and has _no_ runtime behavior. It is suggested + to put all calls to this function at the beginning of a VRL function. The function + cannot be conditionally called. For example, using an if statement cannot stop the meaning + from being assigned. + "} + } + fn parameters(&self) -> &'static [Parameter] { &[ Parameter { diff --git a/licenses/CDLA-Permissive-2.0 b/licenses/CDLA-Permissive-2.0 new file mode 100644 index 0000000000000..cc0f954b59ba2 --- /dev/null +++ b/licenses/CDLA-Permissive-2.0 @@ -0,0 +1,35 @@ +Community Data License Agreement - Permissive - Version 2.0 + +This is the Community Data License Agreement - Permissive, Version 2.0 (the "agreement"). Data Provider(s) and Data Recipient(s) agree as follows: + +1. Provision of the Data + +1.1. A Data Recipient may use, modify, and share the Data made available by Data Provider(s) under this agreement if that Data Recipient follows the terms of this agreement. + +1.2. This agreement does not impose any restriction on a Data Recipient's use, modification, or sharing of any portions of the Data that are in the public domain or that may be used, modified, or shared under any other legal exception or limitation. + +2. Conditions for Sharing Data + +2.1. A Data Recipient may share Data, with or without modifications, so long as the Data Recipient makes available the text of this agreement with the shared Data. + +3. No Restrictions on Results + +3.1. This agreement does not impose any restriction or obligations with respect to the use, modification, or sharing of Results. + +4. No Warranty; Limitation of Liability + +4.1. All Data Recipients receive the Data subject to the following terms: + +THE DATA IS PROVIDED ON AN "AS IS" BASIS, WITHOUT REPRESENTATIONS, WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + +NO DATA PROVIDER SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE DATA OR RESULTS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +5. Definitions + +5.1. "Data" means the material received by a Data Recipient under this agreement. + +5.2. "Data Provider" means any person who is the source of Data provided under this agreement and in reliance on a Data Recipient's agreement to its terms. + +5.3. "Data Recipient" means any person who receives Data directly or indirectly from a Data Provider and agrees to the terms of this agreement. + +5.4. "Results" means any outcome obtained by computational analysis of Data, including for example machine learning models and models' insights. diff --git a/src/cli.rs b/src/cli.rs index 7df8ca67215be..2282d22f3689c 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -12,8 +12,8 @@ use crate::tap; use crate::top; use crate::{ - config, convert_config, generate, generate_schema, get_version, graph, list, signal, unit_test, - validate, + completion, config, convert_config, generate, generate_schema, get_version, graph, list, + signal, unit_test, validate, }; #[derive(Parser, Debug)] @@ -318,6 +318,10 @@ pub enum SubCommand { /// By default all output is writen to stdout. The `output_path` option can be used to redirect to a file. GenerateSchema(generate_schema::Opts), + /// Generate shell completion, then exit. + #[command(hide = true)] + Completion(completion::Opts), + /// Output a provided Vector configuration file/dir as a single JSON object, useful for checking in to version control. #[command(hide = true)] Config(config::Opts), @@ -355,6 +359,7 @@ impl SubCommand { color: bool, ) -> exitcode::ExitCode { match self { + Self::Completion(s) => completion::cmd(s), Self::Config(c) => config::cmd(c), Self::ConvertConfig(opts) => convert_config::cmd(opts), Self::Generate(g) => generate::cmd(g), diff --git a/src/codecs/decoding/mod.rs b/src/codecs/decoding/mod.rs deleted file mode 100644 index ad4cfdb444fd6..0000000000000 --- a/src/codecs/decoding/mod.rs +++ /dev/null @@ -1,5 +0,0 @@ -mod config; -mod decoder; - -pub use config::DecodingConfig; -pub use decoder::Decoder; diff --git a/src/codecs/encoding/mod.rs b/src/codecs/encoding/mod.rs deleted file mode 100644 index 36d637bd75090..0000000000000 --- a/src/codecs/encoding/mod.rs +++ /dev/null @@ -1,7 +0,0 @@ -mod config; -mod encoder; -mod transformer; - -pub use config::{EncodingConfig, EncodingConfigWithFraming, SinkType}; -pub use encoder::{BatchEncoder, BatchSerializer, Encoder, EncoderKind}; -pub use transformer::{TimestampFormat, Transformer}; diff --git a/src/codecs/mod.rs b/src/codecs/mod.rs deleted file mode 100644 index 32b0e9efb7f8b..0000000000000 --- a/src/codecs/mod.rs +++ /dev/null @@ -1,15 +0,0 @@ -//! A collection of codecs that can be used to transform between bytes streams / -//! byte messages, byte frames and structured events. - -#![deny(missing_docs)] - -mod decoding; -mod encoding; -mod ready_frames; - -pub use decoding::{Decoder, DecodingConfig}; -pub use encoding::{ - BatchEncoder, BatchSerializer, Encoder, EncoderKind, EncodingConfig, EncodingConfigWithFraming, - SinkType, TimestampFormat, Transformer, -}; -pub use ready_frames::ReadyFrames; diff --git a/src/completion.rs b/src/completion.rs new file mode 100644 index 0000000000000..debb2b175043c --- /dev/null +++ b/src/completion.rs @@ -0,0 +1,23 @@ +#![allow(missing_docs)] +use clap::{CommandFactory, Parser}; +use clap_complete::{Shell, generate}; +use std::io; + +use crate::cli::Opts as RootCli; + +#[derive(Parser, Debug, Clone)] +#[command(rename_all = "kebab-case")] +pub struct Opts { + /// Shell to generate completion for + #[clap(value_enum)] + shell: Shell, +} + +pub fn cmd(opts: &Opts) -> exitcode::ExitCode { + let mut cmd = RootCli::command(); + let bin_name = cmd.get_name().to_string(); + + generate(opts.shell, &mut cmd, bin_name, &mut io::stdout()); + + exitcode::OK +} diff --git a/src/components/validation/resources/event.rs b/src/components/validation/resources/event.rs index f97ff4b5c4adf..b34493837248a 100644 --- a/src/components/validation/resources/event.rs +++ b/src/components/validation/resources/event.rs @@ -12,7 +12,7 @@ use vector_lib::{ event::{Event, LogEvent}, }; -use crate::codecs::Encoder; +use vector_lib::codecs::Encoder; /// A test case event for deserialization from yaml file. /// This is an intermediary step to TestEvent. diff --git a/src/components/validation/resources/mod.rs b/src/components/validation/resources/mod.rs index 85f72f2aaf35f..6b3fa43a1b6ba 100644 --- a/src/components/validation/resources/mod.rs +++ b/src/components/validation/resources/mod.rs @@ -6,7 +6,7 @@ use std::sync::Arc; use tokio::sync::{Mutex, mpsc}; use vector_lib::{ codecs::{ - BytesEncoder, + BytesEncoder, Decoder, DecodingConfig, Encoder, EncodingConfig, EncodingConfigWithFraming, decoding::{self, DeserializerConfig}, encoding::{ self, Framer, FramingConfig, JsonSerializerConfig, SerializerConfig, @@ -26,7 +26,6 @@ use super::{ RunnerMetrics, sync::{Configuring, TaskCoordinator}, }; -use crate::codecs::{Decoder, DecodingConfig, Encoder, EncodingConfig, EncodingConfigWithFraming}; /// The codec used by the external resource. /// diff --git a/src/config/compiler.rs b/src/config/compiler.rs index 734fd18c98205..f493f8c8479b9 100644 --- a/src/config/compiler.rs +++ b/src/config/compiler.rs @@ -36,6 +36,10 @@ pub fn compile(mut builder: ConfigBuilder) -> Result<(Config, Vec), Vec< errors.extend(output_errors); } + if let Err(alpha_errors) = validation::check_values(&builder) { + errors.extend(alpha_errors); + } + let ConfigBuilder { global, #[cfg(feature = "api")] diff --git a/src/config/validation.rs b/src/config/validation.rs index 0878eedd5948a..56964e0a4a9c7 100644 --- a/src/config/validation.rs +++ b/src/config/validation.rs @@ -11,6 +11,14 @@ use super::{ }; use crate::config::schema; +/// Minimum value (exclusive) for EWMA alpha options. +/// The alpha value must be strictly greater than this value. +const EWMA_ALPHA_MIN: f64 = 0.0; + +/// Maximum value (exclusive) for EWMA alpha options. +/// The alpha value must be strictly less than this value. +const EWMA_ALPHA_MAX: f64 = 1.0; + /// Check that provide + topology config aren't present in the same builder, which is an error. pub fn check_provider(config: &ConfigBuilder) -> Result<(), Vec> { if config.provider.is_some() @@ -147,6 +155,32 @@ pub fn check_resources(config: &ConfigBuilder) -> Result<(), Vec> { } } +/// Validates that `*_ewma_alpha` values are within the valid range (0 < alpha < 1). +pub fn check_values(config: &ConfigBuilder) -> Result<(), Vec> { + let mut errors = Vec::new(); + + if let Some(alpha) = config.global.buffer_utilization_ewma_alpha + && (alpha <= EWMA_ALPHA_MIN || alpha >= EWMA_ALPHA_MAX) + { + errors.push(format!( + "Global `buffer_utilization_ewma_alpha` must be between 0 and 1 exclusive (0 < alpha < 1), got {alpha}" + )); + } + if let Some(alpha) = config.global.processing_time_ewma_alpha + && (alpha <= EWMA_ALPHA_MIN || alpha >= EWMA_ALPHA_MAX) + { + errors.push(format!( + "Global `processing_time_ewma_alpha` must be between 0 and 1 exclusive (0 < alpha < 1), got {alpha}" + )); + } + + if errors.is_empty() { + Ok(()) + } else { + Err(errors) + } +} + /// To avoid collisions between `output` metric tags, check that a component /// does not have a named output with the name [`DEFAULT_OUTPUT`] pub fn check_outputs(config: &ConfigBuilder) -> Result<(), Vec> { diff --git a/src/internal_events/mod.rs b/src/internal_events/mod.rs index f22f896336070..30f282686357a 100644 --- a/src/internal_events/mod.rs +++ b/src/internal_events/mod.rs @@ -28,7 +28,6 @@ mod aws_kinesis_firehose; #[cfg(any(feature = "sources-aws_s3", feature = "sources-aws_sqs",))] mod aws_sqs; mod batch; -mod codecs; mod common; mod conditions; #[cfg(feature = "sources-datadog_agent")] @@ -184,7 +183,6 @@ pub(crate) use self::aws_kinesis::*; pub(crate) use self::aws_kinesis_firehose::*; #[cfg(any(feature = "sources-aws_s3", feature = "sources-aws_sqs",))] pub(crate) use self::aws_sqs::*; -pub(crate) use self::codecs::*; #[cfg(feature = "sources-datadog_agent")] pub(crate) use self::datadog_agent::*; #[cfg(feature = "sinks-datadog_metrics")] diff --git a/src/lib.rs b/src/lib.rs index 0f2dbfb5fa728..d4d4ab1eeb38a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -32,6 +32,8 @@ extern crate tracing; extern crate vector_lib; pub use indoc::indoc; +// re-export codecs for convenience +pub use vector_lib::codecs; #[cfg(all(feature = "tikv-jemallocator", not(feature = "allocation-tracing")))] #[global_allocator] @@ -72,9 +74,8 @@ pub mod app; pub mod async_read; #[cfg(feature = "aws-config")] pub mod aws; -#[allow(unreachable_pub)] -pub mod codecs; pub mod common; +pub mod completion; mod convert_config; pub mod encoding_transcode; pub mod enrichment_tables; @@ -107,7 +108,6 @@ pub(crate) mod sink_ext; pub mod sinks; #[allow(unreachable_pub)] pub mod sources; -pub mod stats; #[cfg(feature = "api-client")] #[allow(unreachable_pub)] pub mod tap; diff --git a/src/sinks/amqp/encoder.rs b/src/sinks/amqp/encoder.rs index 07ab035c5efec..ec403b39cfd54 100644 --- a/src/sinks/amqp/encoder.rs +++ b/src/sinks/amqp/encoder.rs @@ -9,8 +9,8 @@ use crate::sinks::prelude::*; #[derive(Clone, Debug)] pub(super) struct AmqpEncoder { - pub(super) encoder: crate::codecs::Encoder<()>, - pub(super) transformer: crate::codecs::Transformer, + pub(super) encoder: vector_lib::codecs::Encoder<()>, + pub(super) transformer: vector_lib::codecs::Transformer, } impl encoding::Encoder for AmqpEncoder { diff --git a/src/sinks/amqp/sink.rs b/src/sinks/amqp/sink.rs index 058eaf22bc10f..ba00cbcee98cc 100644 --- a/src/sinks/amqp/sink.rs +++ b/src/sinks/amqp/sink.rs @@ -32,7 +32,7 @@ pub(super) struct AmqpSink { routing_key: Option