diff --git a/Cargo.lock b/Cargo.lock index b97b30d26cec3..3d2d9ef2e2617 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2833,6 +2833,12 @@ dependencies = [ "tiny-keccak", ] +[[package]] +name = "const-str" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93e19f68b180ebff43d6d42005c4b5f046c65fcac28369ba8b3beaad633f9ec0" + [[package]] name = "convert_case" version = "0.4.0" @@ -3817,7 +3823,9 @@ version = "0.1.0" dependencies = [ "arc-swap", "chrono", + "const-str", "dyn-clone", + "indoc", "vrl", ] @@ -4059,21 +4067,9 @@ checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" [[package]] name = "fancy-regex" -version = "0.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6215aee357f8c7c989ebb4b8466ca4d7dc93b3957039f2fc3ea2ade8ea5f279" -dependencies = [ - "bit-set", - "derivative", - "regex-automata 0.4.8", - "regex-syntax", -] - -[[package]] -name = "fancy-regex" -version = "0.16.1" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf04c5ec15464ace8355a7b440a33aece288993475556d461154d7a62ad9947c" +checksum = "72cf461f865c862bb7dc573f643dd6a2b6842f7c30b07882b56bd148cc2761b8" dependencies = [ "bit-set", "regex-automata 0.4.8", @@ -5988,15 +5984,15 @@ dependencies = [ [[package]] name = "jsonschema" -version = "0.37.4" +version = "0.38.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73c9ffb2b5c56d58030e1b532d8e8389da94590515f118cf35b5cb68e4764a7e" +checksum = "89f50532ce4a0ba3ae930212908d8ec50e7806065c059fe9c75da2ece6132294" dependencies = [ "ahash 0.8.11", "bytecount", "data-encoding", "email_address", - "fancy-regex 0.16.1", + "fancy-regex", "fraction", "getrandom 0.3.4", "idna", @@ -9314,9 +9310,9 @@ dependencies = [ [[package]] name = "referencing" -version = "0.37.4" +version = "0.38.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4283168a506f0dcbdce31c9f9cce3129c924da4c6bca46e46707fcb746d2d70c" +checksum = "15a8af0c6bb8eaf8b07cb06fc31ff30ca6fe19fb99afa476c276d8b24f365b0b" dependencies = [ "ahash 0.8.11", "fluent-uri 0.4.1", @@ -9651,9 +9647,12 @@ dependencies = [ [[package]] name = "roxmltree" -version = "0.20.0" +version = "0.21.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c20b6793b5c2fa6553b250154b78d6d0db37e72700ae35fad9387a46f487c97" +checksum = "f1964b10c76125c36f8afe190065a4bf9a87bf324842c05701330bba9f1cacbb" +dependencies = [ + "memchr", +] [[package]] name = "rsa" @@ -9956,9 +9955,9 @@ dependencies = [ [[package]] name = "rustyline" -version = "16.0.0" +version = "17.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62fd9ca5ebc709e8535e8ef7c658eb51457987e48c98ead2be482172accc408d" +checksum = "e902948a25149d50edc1a8e0141aad50f54e22ba83ff988cf8f7c9ef07f50564" dependencies = [ "bitflags 2.10.0", "cfg-if", @@ -9970,7 +9969,7 @@ dependencies = [ "unicode-segmentation", "unicode-width 0.2.0", "utf8parse", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -13154,6 +13153,7 @@ version = "0.1.0" dependencies = [ "dnstap-parser", "enrichment", + "indoc", "vector-vrl-metrics", "vrl", ] @@ -13163,6 +13163,7 @@ name = "vector-vrl-metrics" version = "0.1.0" dependencies = [ "arc-swap", + "const-str", "tokio", "tokio-stream", "vector-common", @@ -13218,7 +13219,7 @@ checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" [[package]] name = "vrl" version = "0.29.0" -source = "git+https://github.com/vectordotdev/vrl.git?branch=main#53f01dfa6226fb3f4093d1bf838319c522f2dcad" +source = "git+https://github.com/vectordotdev/vrl.git?branch=main#7ab2c5516a0c46e25d6f6efbd16977af4a3441a0" dependencies = [ "aes", "aes-siv", @@ -13251,7 +13252,7 @@ dependencies = [ "dyn-clone", "encoding_rs", "exitcode", - "fancy-regex 0.15.0", + "fancy-regex", "flate2", "grok", "hex", diff --git a/Cargo.toml b/Cargo.toml index be3e047bdf894..03be659ba9864 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -149,6 +149,7 @@ chrono-tz = { version = "0.10.4", default-features = false, features = ["serde"] clap = { version = "4.5.53", default-features = false, features = ["derive", "error-context", "env", "help", "std", "string", "usage", "wrap_help"] } clap_complete = "4.5.65" colored = { version = "3.0.0", default-features = false } +const-str = { version = "1.0.0", default-features = false } crossbeam-utils = { version = "0.8.21", default-features = false } darling = { version = "0.20.11", default-features = false, features = ["suggestions"] } dashmap = { version = "6.1.0", default-features = false } diff --git a/LICENSE-3rdparty.csv b/LICENSE-3rdparty.csv index 6ce8b1ef3d827..603be009102ba 100644 --- a/LICENSE-3rdparty.csv +++ b/LICENSE-3rdparty.csv @@ -184,6 +184,7 @@ concurrent-queue,https://github.com/smol-rs/concurrent-queue,Apache-2.0 OR MIT," const-oid,https://github.com/RustCrypto/formats/tree/master/const-oid,Apache-2.0 OR MIT,RustCrypto Developers const-random,https://github.com/tkaitchuck/constrandom,MIT OR Apache-2.0,Tom Kaitchuck const-random-macro,https://github.com/tkaitchuck/constrandom,MIT OR Apache-2.0,Tom Kaitchuck +const-str,https://github.com/Nugine/const-str,MIT,Nugine convert_case,https://github.com/rutrum/convert-case,MIT,David Purdum convert_case,https://github.com/rutrum/convert-case,MIT,rutrum cookie,https://github.com/SergioBenitez/cookie-rs,MIT OR Apache-2.0,"Sergio Benitez , Alex Crichton " @@ -278,7 +279,6 @@ executor-trait,https://github.com/amqp-rs/executor-trait,Apache-2.0 OR MIT,Marc- exitcode,https://github.com/benwilber/exitcode,Apache-2.0,Ben Wilber fakedata_generator,https://github.com/kevingimbel/fakedata_generator,MIT,Kevin Gimbel fallible-iterator,https://github.com/sfackler/rust-fallible-iterator,MIT OR Apache-2.0,Steven Fackler -fancy-regex,https://github.com/fancy-regex/fancy-regex,MIT,"Raph Levien , Robin Stocker " fancy-regex,https://github.com/fancy-regex/fancy-regex,MIT,"Raph Levien , Robin Stocker , Keith Hall " fastrand,https://github.com/smol-rs/fastrand,Apache-2.0 OR MIT,Stjepan Glavina ff,https://github.com/zkcrypto/ff,MIT OR Apache-2.0,"Sean Bowe , Jack Grigg " diff --git a/lib/dnstap-parser/src/vrl_functions/parse_dnstap.rs b/lib/dnstap-parser/src/vrl_functions/parse_dnstap.rs index b32fdcfeb3106..f61ef30c8f28a 100644 --- a/lib/dnstap-parser/src/vrl_functions/parse_dnstap.rs +++ b/lib/dnstap-parser/src/vrl_functions/parse_dnstap.rs @@ -13,6 +13,10 @@ impl Function for ParseDnstap { "parse_dnstap" } + fn usage(&self) -> &'static str { + "Parses the `value` as base64 encoded DNSTAP data." + } + fn parameters(&self) -> &'static [Parameter] { &[ Parameter { diff --git a/lib/enrichment/Cargo.toml b/lib/enrichment/Cargo.toml index 087ab7f60bfc0..1d2a67a2c26b6 100644 --- a/lib/enrichment/Cargo.toml +++ b/lib/enrichment/Cargo.toml @@ -8,5 +8,7 @@ publish = false [dependencies] arc-swap.workspace = true chrono.workspace = true +const-str.workspace = true dyn-clone = { version = "1.0.20", default-features = false } +indoc.workspace = true vrl.workspace = true diff --git a/lib/enrichment/src/find_enrichment_table_records.rs b/lib/enrichment/src/find_enrichment_table_records.rs index 369eb5d21140b..1b016aa901a14 100644 --- a/lib/enrichment/src/find_enrichment_table_records.rs +++ b/lib/enrichment/src/find_enrichment_table_records.rs @@ -51,6 +51,13 @@ impl Function for FindEnrichmentTableRecords { "find_enrichment_table_records" } + fn usage(&self) -> &'static str { + const_str::concat!( + "Searches an [enrichment table](/docs/reference/glossary/#enrichment-tables) for rows that match the provided condition.\n\n", + super::ENRICHMENT_TABLE_EXPLAINER + ) + } + fn parameters(&self) -> &'static [Parameter] { &[ Parameter { diff --git a/lib/enrichment/src/get_enrichment_table_record.rs b/lib/enrichment/src/get_enrichment_table_record.rs index efdbc92542a19..ef2103702f8ca 100644 --- a/lib/enrichment/src/get_enrichment_table_record.rs +++ b/lib/enrichment/src/get_enrichment_table_record.rs @@ -48,6 +48,14 @@ impl Function for GetEnrichmentTableRecord { "get_enrichment_table_record" } + fn usage(&self) -> &'static str { + const USAGE: &str = const_str::concat!( + "Searches an [enrichment table](/docs/reference/glossary/#enrichment-tables) for a row that matches the provided condition. A single row must be matched. If no rows are found or more than one row is found, an error is returned.\n\n", + super::ENRICHMENT_TABLE_EXPLAINER + ); + USAGE + } + fn parameters(&self) -> &'static [Parameter] { &[ Parameter { diff --git a/lib/enrichment/src/lib.rs b/lib/enrichment/src/lib.rs index e69cf979a1dc0..a73f18f163181 100644 --- a/lib/enrichment/src/lib.rs +++ b/lib/enrichment/src/lib.rs @@ -9,6 +9,7 @@ mod test_util; mod vrl_util; use dyn_clone::DynClone; +use indoc::indoc; pub use tables::{TableRegistry, TableSearch}; use vrl::{ compiler::Function, @@ -97,3 +98,59 @@ pub fn vrl_functions() -> Vec> { Box::new(find_enrichment_table_records::FindEnrichmentTableRecords) as _, ] } + +pub(crate) const ENRICHMENT_TABLE_EXPLAINER: &str = indoc! {r#" + For `file` enrichment tables, this condition needs to be a VRL object in which + the key-value pairs indicate a field to search mapped to a value to search in that field. + This function returns the rows that match the provided condition(s). _All_ fields need to + match for rows to be returned; if any fields do not match, then no rows are returned. + + There are currently three forms of search criteria: + + 1. **Exact match search**. The given field must match the value exactly. Case sensitivity + can be specified using the `case_sensitive` argument. An exact match search can use an + index directly into the dataset, which should make this search fairly "cheap" from a + performance perspective. + + 2. **Wildcard match search**. The given fields specified by the exact match search may also + be matched exactly to the value provided to the `wildcard` parameter. + A wildcard match search can also use an index directly into the dataset. + + 3. **Date range search**. The given field must be greater than or equal to the `from` date + and/or less than or equal to the `to` date. A date range search involves + sequentially scanning through the rows that have been located using any exact match + criteria. This can be an expensive operation if there are many rows returned by any exact + match criteria. Therefore, use date ranges as the _only_ criteria when the enrichment + data set is very small. + + For `geoip` and `mmdb` enrichment tables, this condition needs to be a VRL object with a single key-value pair + whose value needs to be a valid IP address. Example: `{"ip": .ip }`. If a return field is expected + and without a value, `null` is used. This table can return the following fields: + + * ISP databases: + * `autonomous_system_number` + * `autonomous_system_organization` + * `isp` + * `organization` + + * City databases: + * `city_name` + * `continent_code` + * `country_code` + * `country_name` + * `region_code` + * `region_name` + * `metro_code` + * `latitude` + * `longitude` + * `postal_code` + * `timezone` + + * Connection-Type databases: + * `connection_type` + + To use this function, you need to update your configuration to + include an + [`enrichment_tables`](/docs/reference/configuration/global-options/#enrichment_tables) + parameter. +"#}; diff --git a/lib/vector-vrl-metrics/Cargo.toml b/lib/vector-vrl-metrics/Cargo.toml index f0a17f8b76e32..6f3bbb7eaa84b 100644 --- a/lib/vector-vrl-metrics/Cargo.toml +++ b/lib/vector-vrl-metrics/Cargo.toml @@ -8,6 +8,7 @@ license = "MPL-2.0" [dependencies] arc-swap.workspace = true +const-str.workspace = true vrl.workspace = true vector-core = { path = "../vector-core", default-features = false, features = ["vrl"] } vector-common = { path = "../vector-common", default-features = false } diff --git a/lib/vector-vrl-metrics/src/aggregate_vector_metrics.rs b/lib/vector-vrl-metrics/src/aggregate_vector_metrics.rs index 529e1df2b7824..d51c132a9f22a 100644 --- a/lib/vector-vrl-metrics/src/aggregate_vector_metrics.rs +++ b/lib/vector-vrl-metrics/src/aggregate_vector_metrics.rs @@ -47,6 +47,13 @@ impl Function for AggregateVectorMetrics { "aggregate_vector_metrics" } + fn usage(&self) -> &'static str { + const_str::concat!( + "Aggregates internal Vector metrics, using one of 4 aggregation functions, filtering by name and optionally by tags. Returns the aggregated value. Only includes counter and gauge metrics.\n\n", + crate::VECTOR_METRICS_EXPLAINER + ) + } + fn parameters(&self) -> &'static [Parameter] { &[ Parameter { diff --git a/lib/vector-vrl-metrics/src/find_vector_metrics.rs b/lib/vector-vrl-metrics/src/find_vector_metrics.rs index 98017a0fcc905..5ca5535a0179a 100644 --- a/lib/vector-vrl-metrics/src/find_vector_metrics.rs +++ b/lib/vector-vrl-metrics/src/find_vector_metrics.rs @@ -30,6 +30,13 @@ impl Function for FindVectorMetrics { "find_vector_metrics" } + fn usage(&self) -> &'static str { + const_str::concat!( + "Searches internal Vector metrics by name and optionally by tags. Returns all matching metrics.\n\n", + crate::VECTOR_METRICS_EXPLAINER + ) + } + fn parameters(&self) -> &'static [Parameter] { &[ Parameter { diff --git a/lib/vector-vrl-metrics/src/get_vector_metric.rs b/lib/vector-vrl-metrics/src/get_vector_metric.rs index eb669a4ac537c..5f24705a9c65c 100644 --- a/lib/vector-vrl-metrics/src/get_vector_metric.rs +++ b/lib/vector-vrl-metrics/src/get_vector_metric.rs @@ -27,6 +27,13 @@ impl Function for GetVectorMetric { "get_vector_metric" } + fn usage(&self) -> &'static str { + const_str::concat!( + "Searches internal Vector metrics by name and optionally by tags. Returns the first matching metric.\n\n", + crate::VECTOR_METRICS_EXPLAINER + ) + } + fn parameters(&self) -> &'static [Parameter] { &[ Parameter { diff --git a/lib/vector-vrl-metrics/src/lib.rs b/lib/vector-vrl-metrics/src/lib.rs index f3246159f5fbb..6df6e762cf110 100644 --- a/lib/vector-vrl-metrics/src/lib.rs +++ b/lib/vector-vrl-metrics/src/lib.rs @@ -8,6 +8,13 @@ mod find_vector_metrics; mod get_vector_metric; pub use common::MetricsStorage; +pub(crate) const VECTOR_METRICS_EXPLAINER: &str = "\ +Internal Vector metrics functions work with a snapshot of the metrics. The interval at which \ +the snapshot is updated is controlled through the \ +`metrics_storage_refresh_period` (/docs/reference/configuration/global-options/#metrics_storage_refresh_period) \ +global option. Higher values can reduce performance impact of that process, but may cause \ +stale metrics data in the snapshot."; + pub fn all() -> Vec> { vec![ Box::new(get_vector_metric::GetVectorMetric) as _, diff --git a/lib/vector-vrl/functions/Cargo.toml b/lib/vector-vrl/functions/Cargo.toml index 5b08d1bf5f40e..4e630b9515e92 100644 --- a/lib/vector-vrl/functions/Cargo.toml +++ b/lib/vector-vrl/functions/Cargo.toml @@ -7,6 +7,7 @@ publish = false license = "MPL-2.0" [dependencies] +indoc.workspace = true vrl.workspace = true enrichment = { path = "../../enrichment" } dnstap-parser = { path = "../../dnstap-parser", optional = true } diff --git a/lib/vector-vrl/functions/src/get_secret.rs b/lib/vector-vrl/functions/src/get_secret.rs index a1fbfc593ba70..b643d34152e3c 100644 --- a/lib/vector-vrl/functions/src/get_secret.rs +++ b/lib/vector-vrl/functions/src/get_secret.rs @@ -17,6 +17,10 @@ impl Function for GetSecret { "get_secret" } + fn usage(&self) -> &'static str { + "Returns the value of the given secret from an event." + } + fn parameters(&self) -> &'static [Parameter] { &[Parameter { keyword: "key", diff --git a/lib/vector-vrl/functions/src/remove_secret.rs b/lib/vector-vrl/functions/src/remove_secret.rs index 4788c7cedfb48..5bdc75153b551 100644 --- a/lib/vector-vrl/functions/src/remove_secret.rs +++ b/lib/vector-vrl/functions/src/remove_secret.rs @@ -14,6 +14,10 @@ impl Function for RemoveSecret { "remove_secret" } + fn usage(&self) -> &'static str { + "Removes a secret from an event." + } + fn parameters(&self) -> &'static [Parameter] { &[Parameter { keyword: "key", diff --git a/lib/vector-vrl/functions/src/set_secret.rs b/lib/vector-vrl/functions/src/set_secret.rs index e6ba1e310c31a..5a128b448c825 100644 --- a/lib/vector-vrl/functions/src/set_secret.rs +++ b/lib/vector-vrl/functions/src/set_secret.rs @@ -21,6 +21,10 @@ impl Function for SetSecret { "set_secret" } + fn usage(&self) -> &'static str { + "Sets the given secret in the event." + } + fn parameters(&self) -> &'static [Parameter] { &[ Parameter { diff --git a/lib/vector-vrl/functions/src/set_semantic_meaning.rs b/lib/vector-vrl/functions/src/set_semantic_meaning.rs index 14d45acbdd519..19dc0be342b06 100644 --- a/lib/vector-vrl/functions/src/set_semantic_meaning.rs +++ b/lib/vector-vrl/functions/src/set_semantic_meaning.rs @@ -9,6 +9,8 @@ use vrl::{ prelude::*, }; +use indoc::indoc; + #[derive(Debug, Default, Clone)] pub struct MeaningList(pub BTreeMap); @@ -34,6 +36,16 @@ impl Function for SetSemanticMeaning { "set_semantic_meaning" } + fn usage(&self) -> &'static str { + indoc! {" + Sets a semantic meaning for an event. **Note**: This function assigns + meaning at startup, and has _no_ runtime behavior. It is suggested + to put all calls to this function at the beginning of a VRL function. The function + cannot be conditionally called. For example, using an if statement cannot stop the meaning + from being assigned. + "} + } + fn parameters(&self) -> &'static [Parameter] { &[ Parameter {