From 531009e11d1d022f470bc0a8c1883443a6944434 Mon Sep 17 00:00:00 2001 From: Stephen Wakely Date: Fri, 14 Apr 2023 10:01:05 +0100 Subject: [PATCH 1/3] Revert "Revert "enhancement(topology): Update transforms to handle multiple definitions (#16793)"" This reverts commit 5dc20f3bb7630ed558c6dd939198f0574e791dbe. --- benches/remap.rs | 20 +- benches/transform/route.rs | 6 +- lib/vector-core/src/config/mod.rs | 220 ++++++- lib/vector-core/src/schema/definition.rs | 100 ++- lib/vector-core/src/transform/mod.rs | 6 +- src/api/schema/components/mod.rs | 4 +- src/config/compiler.rs | 5 +- src/config/graph.rs | 78 ++- src/config/id.rs | 29 + src/config/mod.rs | 4 +- src/config/source.rs | 9 +- src/config/transform.rs | 17 +- src/config/unit_test/mod.rs | 14 +- src/config/unit_test/unit_test_components.rs | 21 +- src/config/validation.rs | 10 +- src/source_sender/mod.rs | 4 +- src/sources/amqp.rs | 29 +- src/sources/apache_metrics/mod.rs | 6 +- src/sources/aws_ecs_metrics/mod.rs | 6 +- src/sources/aws_kinesis_firehose/mod.rs | 10 +- src/sources/aws_s3/mod.rs | 8 +- src/sources/aws_sqs/config.rs | 9 +- src/sources/aws_sqs/source.rs | 20 +- src/sources/datadog_agent/metrics.rs | 32 +- src/sources/datadog_agent/mod.rs | 27 +- src/sources/datadog_agent/tests.rs | 330 +++++----- src/sources/demo_logs.rs | 9 +- src/sources/dnstap/mod.rs | 6 +- src/sources/docker_logs/mod.rs | 6 +- src/sources/docker_logs/tests.rs | 148 +++-- src/sources/eventstoredb_metrics/mod.rs | 6 +- src/sources/exec/mod.rs | 9 +- src/sources/file.rs | 107 ++-- .../file_descriptors/file_descriptor.rs | 4 +- src/sources/file_descriptors/mod.rs | 11 +- src/sources/file_descriptors/stdin.rs | 4 +- src/sources/fluent/mod.rs | 28 +- src/sources/gcp_pubsub.rs | 26 +- src/sources/heroku_logs.rs | 31 +- src/sources/host_metrics/mod.rs | 6 +- src/sources/http_client/client.rs | 9 +- src/sources/http_server.rs | 33 +- src/sources/internal_logs.rs | 26 +- src/sources/internal_metrics.rs | 6 +- src/sources/journald.rs | 34 +- src/sources/kafka.rs | 148 ++--- src/sources/kubernetes_logs/mod.rs | 324 +++++----- src/sources/logstash.rs | 29 +- src/sources/mongodb_metrics/mod.rs | 6 +- src/sources/nats.rs | 29 +- src/sources/nginx_metrics/mod.rs | 6 +- src/sources/opentelemetry/mod.rs | 10 +- src/sources/opentelemetry/tests.rs | 24 +- src/sources/postgresql_metrics.rs | 6 +- src/sources/prometheus/remote_write.rs | 6 +- src/sources/prometheus/scrape.rs | 6 +- src/sources/redis/mod.rs | 9 +- src/sources/socket/mod.rs | 10 +- src/sources/splunk_hec/mod.rs | 26 +- src/sources/statsd/mod.rs | 6 +- src/sources/syslog.rs | 28 +- src/sources/vector/mod.rs | 28 +- src/test_util/mock/sources/backpressure.rs | 10 +- src/test_util/mock/sources/basic.rs | 11 +- src/test_util/mock/sources/error.rs | 10 +- src/test_util/mock/sources/panic.rs | 10 +- src/test_util/mock/sources/tripwire.rs | 10 +- src/test_util/mock/transforms/basic.rs | 18 +- src/test_util/mock/transforms/noop.rs | 18 +- src/topology/builder.rs | 49 +- src/topology/schema.rs | 601 ++++++++---------- src/transforms/aggregate.rs | 10 +- src/transforms/aws_ec2_metadata.rs | 32 +- src/transforms/dedupe.rs | 17 +- src/transforms/filter.rs | 19 +- src/transforms/log_to_metric.rs | 14 +- src/transforms/lua/mod.rs | 12 +- src/transforms/lua/v1/mod.rs | 17 +- src/transforms/lua/v2/mod.rs | 20 +- src/transforms/metric_to_log.rs | 12 +- src/transforms/reduce/mod.rs | 179 +++--- src/transforms/remap.rs | 236 ++++--- src/transforms/route.rs | 41 +- src/transforms/sample.rs | 20 +- .../tag_cardinality_limit/config.rs | 12 +- src/transforms/throttle.rs | 16 +- 86 files changed, 2066 insertions(+), 1562 deletions(-) diff --git a/benches/remap.rs b/benches/remap.rs index 7d666d4041c11..d3c2c6930a144 100644 --- a/benches/remap.rs +++ b/benches/remap.rs @@ -4,7 +4,7 @@ use chrono::{DateTime, Utc}; use criterion::{criterion_group, criterion_main, BatchSize, Criterion}; use indexmap::IndexMap; use vector::{ - config::{DataType, Output}, + config::{DataType, TransformOutput}, event::{Event, LogEvent, Value}, transforms::{ remap::{Remap, RemapConfig}, @@ -27,8 +27,10 @@ fn benchmark_remap(c: &mut Criterion) { let mut group = c.benchmark_group("remap"); let add_fields_runner = |tform: &mut Box, event: Event| { - let mut outputs = - TransformOutputsBuf::new_with_capacity(vec![Output::default(DataType::all())], 1); + let mut outputs = TransformOutputsBuf::new_with_capacity( + vec![TransformOutput::new(DataType::all(), vec![])], + 1, + ); tform.transform(event, &mut outputs); let result = outputs.take_primary(); let output_1 = result.first().unwrap().as_log(); @@ -77,8 +79,10 @@ fn benchmark_remap(c: &mut Criterion) { }); let json_parser_runner = |tform: &mut Box, event: Event| { - let mut outputs = - TransformOutputsBuf::new_with_capacity(vec![Output::default(DataType::all())], 1); + let mut outputs = TransformOutputsBuf::new_with_capacity( + vec![TransformOutput::new(DataType::all(), vec![])], + 1, + ); tform.transform(event, &mut outputs); let result = outputs.take_primary(); let output_1 = result.first().unwrap().as_log(); @@ -129,8 +133,10 @@ fn benchmark_remap(c: &mut Criterion) { let coerce_runner = |tform: &mut Box, event: Event, timestamp: DateTime| { - let mut outputs = - TransformOutputsBuf::new_with_capacity(vec![Output::default(DataType::all())], 1); + let mut outputs = TransformOutputsBuf::new_with_capacity( + vec![TransformOutput::new(DataType::all(), vec![])], + 1, + ); tform.transform(event, &mut outputs); let result = outputs.take_primary(); let output_1 = result.first().unwrap().as_log(); diff --git a/benches/transform/route.rs b/benches/transform/route.rs index 0e23695b8f58a..f1ed85f163634 100644 --- a/benches/transform/route.rs +++ b/benches/transform/route.rs @@ -12,7 +12,7 @@ use vector::transforms::{ TransformOutputsBuf, }; use vector_core::{ - config::{DataType, Output}, + config::{DataType, TransformOutput}, event::{Event, EventContainer, EventMetadata, LogEvent}, transform::{SyncTransform, TransformContext}, }; @@ -54,10 +54,10 @@ fn route(c: &mut Criterion) { "bba", "bbca", "dba", "bea", "fba", "gba", "hba", "iba", "jba", "bka", "bal", "bma", "bna", "boa", "bpa", "bqa", "bra", "bsa", "bta", "bua", "bva", "bwa", "xba", "aby", "zba", ] { - outputs.push(Output { + outputs.push(TransformOutput { port: Some(String::from(name)), ty: DataType::Log, - log_schema_definition: None, + log_schema_definitions: Vec::new(), }); } let output_buffer: TransformOutputsBuf = TransformOutputsBuf::new_with_capacity(outputs, 10); diff --git a/lib/vector-core/src/config/mod.rs b/lib/vector-core/src/config/mod.rs index 7350ff80d916b..07774b73f40e3 100644 --- a/lib/vector-core/src/config/mod.rs +++ b/lib/vector-core/src/config/mod.rs @@ -100,42 +100,119 @@ impl Input { } #[derive(Debug, Clone, PartialEq)] -pub struct Output { +pub struct SourceOutput { pub port: Option, pub ty: DataType, // NOTE: schema definitions are only implemented/supported for log-type events. There is no // inherent blocker to support other types as well, but it'll require additional work to add // the relevant schemas, and store them separately in this type. + pub schema_definition: Option, +} + +impl SourceOutput { + /// Create a `SourceOutput` of the given data type that contains a single output `Definition`. + /// Designed for use in log sources. /// - /// The `None` variant of a schema definition has two distinct meanings for a source component - /// versus a transform component: /// - /// For *sources*, a `None` schema is identical to a `Some(Definition::source_default())`. + /// # Panics /// - /// For a *transform*, a schema [`schema::Definition`] is required if `Datatype` is Log. - pub log_schema_definition: Option, -} + /// Panics if `ty` does not contain [`DataType::Log`]. + #[must_use] + pub fn new_logs(ty: DataType, schema_definition: schema::Definition) -> Self { + assert!(ty.contains(DataType::Log)); -impl Output { - /// Create a default `Output` of the given data type. - /// - /// A default output is one without a port identifier (i.e. not a named output) and the default - /// output consumers will receive if they declare the component itself as an input. - pub fn default(ty: DataType) -> Self { Self { port: None, ty, - log_schema_definition: None, + schema_definition: Some(schema_definition), + } + } + + /// Create a `SourceOutput` of the given data type that contains no output `Definition`s. + /// Designed for use in metrics sources. + /// + /// Sets the datatype to be [`DataType::Metric`]. + #[must_use] + pub fn new_metrics() -> Self { + Self { + port: None, + ty: DataType::Metric, + schema_definition: None, } } - /// Set the schema definition for this `Output`. + /// Create a `SourceOutput` of the given data type that contains no output `Definition`s. + /// Designed for use in trace sources. + /// + /// Sets the datatype to be [`DataType::Trace`]. #[must_use] - pub fn with_schema_definition(mut self, schema_definition: schema::Definition) -> Self { - self.log_schema_definition = Some(schema_definition); + pub fn new_traces() -> Self { + Self { + port: None, + ty: DataType::Trace, + schema_definition: None, + } + } + + /// Return the schema [`schema::Definition`] from this output. + /// + /// Takes a `schema_enabled` flag to determine if the full definition including the fields + /// and associated types should be returned, or if a simple definition should be returned. + /// A simple definition is just the default for the namespace. For the Vector namespace the + /// meanings are included. + /// Schema enabled is set in the users configuration. + #[must_use] + pub fn schema_definition(&self, schema_enabled: bool) -> Option { + self.schema_definition.as_ref().map(|definition| { + if schema_enabled { + definition.clone() + } else { + let mut new_definition = + schema::Definition::default_for_namespace(definition.log_namespaces()); + + if definition.log_namespaces().contains(&LogNamespace::Vector) { + new_definition.add_meanings(definition.meanings()); + } + + new_definition + } + }) + } +} + +impl SourceOutput { + /// Set the port name for this `SourceOutput`. + #[must_use] + pub fn with_port(mut self, name: impl Into) -> Self { + self.port = Some(name.into()); self } +} + +#[derive(Debug, Clone, PartialEq)] +pub struct TransformOutput { + pub port: Option, + pub ty: DataType, + + /// For *transforms* if `Datatype` is [`DataType::Log`], if schema is + /// enabled, at least one definition should be output. If the transform + /// has multiple connected sources, it is possible to have multiple output + /// definitions - one for each input. + pub log_schema_definitions: Vec, +} + +impl TransformOutput { + /// Create a `TransformOutput` of the given data type that contains multiple [`schema::Definition`]s. + /// Designed for use in transforms. + #[must_use] + pub fn new(ty: DataType, schema_definitions: Vec) -> Self { + Self { + port: None, + ty, + log_schema_definitions: schema_definitions, + } + } /// Set the port name for this `Output`. #[must_use] @@ -427,10 +504,12 @@ impl LogNamespace { #[cfg(test)] mod test { - use crate::config::{init_log_schema, LogNamespace, LogSchema}; + use super::*; use crate::event::LogEvent; use chrono::Utc; - use lookup::event_path; + use lookup::{event_path, owned_value_path, OwnedTargetPath}; + use value::Kind; + use vector_common::btreemap; #[test] fn test_insert_standard_vector_source_metadata() { @@ -446,4 +525,107 @@ mod test { assert!(event.get(event_path!("a", "b", "c", "d")).is_some()); } + + #[test] + fn test_source_definitions_legacy() { + let definition = schema::Definition::empty_legacy_namespace() + .with_event_field(&owned_value_path!("zork"), Kind::bytes(), Some("zork")) + .with_event_field(&owned_value_path!("nork"), Kind::integer(), None); + let output = SourceOutput::new_logs(DataType::Log, definition); + + let valid_event = LogEvent::from(Value::from(btreemap! { + "zork" => "norknoog", + "nork" => 32 + })) + .into(); + + let invalid_event = LogEvent::from(Value::from(btreemap! { + "nork" => 32 + })) + .into(); + + // Get a definition with schema enabled. + let new_definition = output.schema_definition(true).unwrap(); + + // Meanings should still exist. + assert_eq!( + Some(&OwnedTargetPath::event(owned_value_path!("zork"))), + new_definition.meaning_path("zork") + ); + + // Events should have the schema validated. + new_definition.assert_valid_for_event(&valid_event); + new_definition.assert_invalid_for_event(&invalid_event); + + // There should be the default legacy definition without schemas enabled. + assert_eq!( + Some(schema::Definition::default_legacy_namespace()), + output.schema_definition(false) + ); + } + + #[test] + fn test_source_definitons_vector() { + let definition = schema::Definition::default_for_namespace(&[LogNamespace::Vector].into()) + .with_metadata_field( + &owned_value_path!("vector", "zork"), + Kind::integer(), + Some("zork"), + ) + .with_event_field(&owned_value_path!("nork"), Kind::integer(), None); + + let output = SourceOutput::new_logs(DataType::Log, definition); + + let mut valid_event = LogEvent::from(Value::from(btreemap! { + "nork" => 32 + })); + + valid_event + .metadata_mut() + .value_mut() + .insert(path!("vector").concat("zork"), 32); + + let valid_event = valid_event.into(); + + let mut invalid_event = LogEvent::from(Value::from(btreemap! { + "nork" => 32 + })); + + invalid_event + .metadata_mut() + .value_mut() + .insert(path!("vector").concat("zork"), "noog"); + + let invalid_event = invalid_event.into(); + + // Get a definition with schema enabled. + let new_definition = output.schema_definition(true).unwrap(); + + // Meanings should still exist. + assert_eq!( + Some(&OwnedTargetPath::metadata(owned_value_path!( + "vector", "zork" + ))), + new_definition.meaning_path("zork") + ); + + // Events should have the schema validated. + new_definition.assert_valid_for_event(&valid_event); + new_definition.assert_invalid_for_event(&invalid_event); + + // Get a definition without schema enabled. + let new_definition = output.schema_definition(false).unwrap(); + + // Meanings should still exist. + assert_eq!( + Some(&OwnedTargetPath::metadata(owned_value_path!( + "vector", "zork" + ))), + new_definition.meaning_path("zork") + ); + + // Events should not have the schema validated. + new_definition.assert_valid_for_event(&valid_event); + new_definition.assert_valid_for_event(&invalid_event); + } } diff --git a/lib/vector-core/src/schema/definition.rs b/lib/vector-core/src/schema/definition.rs index fd6ea4565d9af..0442337bba094 100644 --- a/lib/vector-core/src/schema/definition.rs +++ b/lib/vector-core/src/schema/definition.rs @@ -87,7 +87,7 @@ impl Definition { ) -> Self { Self { event_kind, - metadata_kind: Kind::object(Collection::empty()), + metadata_kind: Kind::object(Collection::any()), meaning: BTreeMap::default(), log_namespaces: log_namespaces.into(), } @@ -373,25 +373,55 @@ impl Definition { /// This method panics if the provided path points to an unknown location in the collection. #[must_use] pub fn with_meaning(mut self, target_path: OwnedTargetPath, meaning: &str) -> Self { - // Ensure the path exists in the collection. + self.add_meaning(target_path, meaning); + self + } + + /// Adds the meaning pointing to the given path to our list of meanings. + /// + /// # Panics + /// + /// This method panics if the provided path points to an unknown location in the collection. + fn add_meaning(&mut self, target_path: OwnedTargetPath, meaning: &str) { + self.try_with_meaning(target_path, meaning) + .unwrap_or_else(|err| panic!("{}", err)); + } + + /// Register a semantic meaning for the definition. + /// + /// # Errors + /// + /// Returns an error if the provided path points to an unknown location in the collection. + pub fn try_with_meaning( + &mut self, + target_path: OwnedTargetPath, + meaning: &str, + ) -> Result<(), &'static str> { match target_path.prefix { - PathPrefix::Event => assert!( - self.event_kind + PathPrefix::Event + if !self + .event_kind .at_path(&target_path.path) - .contains_any_defined(), - "meaning must point to a valid path" - ), - PathPrefix::Metadata => assert!( - self.metadata_kind + .contains_any_defined() => + { + Err("meaning must point to a valid path") + } + + PathPrefix::Metadata + if !self + .metadata_kind .at_path(&target_path.path) - .contains_any_defined(), - "meaning must point to a valid path" - ), - }; + .contains_any_defined() => + { + Err("meaning must point to a valid path") + } - self.meaning - .insert(meaning.to_owned(), MeaningPointer::Valid(target_path)); - self + _ => { + self.meaning + .insert(meaning.to_owned(), MeaningPointer::Valid(target_path)); + Ok(()) + } + } } /// Set the kind for all unknown fields. @@ -451,6 +481,21 @@ impl Definition { }) } + /// Adds the meanings provided by an iterator over the given meanings. + /// + /// # Panics + /// + /// This method panics if the provided path from any of the incoming meanings point to + /// an unknown location in the collection. + pub fn add_meanings<'a>( + &'a mut self, + meanings: impl Iterator, + ) { + for (meaning, path) in meanings { + self.add_meaning(path.clone(), meaning); + } + } + pub fn event_kind(&self) -> &Kind { &self.event_kind } @@ -481,6 +526,7 @@ mod test_utils { /// Checks that the schema definition is _valid_ for the given event. /// /// # Errors + /// /// If the definition is not valid, debug info will be returned. pub fn is_valid_for_event(&self, event: &Event) -> Result<(), String> { if let Some(log) = event.maybe_as_log() { @@ -522,13 +568,27 @@ mod test_utils { } /// Asserts that the schema definition is _valid_ for the given event. + /// /// # Panics + /// /// If the definition is not valid for the event. pub fn assert_valid_for_event(&self, event: &Event) { if let Err(err) = self.is_valid_for_event(event) { panic!("Schema definition assertion failed: {err}"); } } + + /// Asserts that the schema definition is _invalid_ for the given event. + /// + /// # Panics + /// + /// If the definition is valid for the event. + pub fn assert_invalid_for_event(&self, event: &Event) { + assert!( + self.is_valid_for_event(event).is_err(), + "Schema definition assertion should not be valid" + ); + } } } @@ -732,7 +792,7 @@ mod tests { "foo".into(), Kind::boolean().or_undefined(), )])), - metadata_kind: Kind::object(Collection::empty()), + metadata_kind: Kind::object(Collection::any()), meaning: [( "foo_meaning".to_owned(), MeaningPointer::Valid(parse_target_path("foo").unwrap()), @@ -756,7 +816,7 @@ mod tests { Kind::regex().or_null().or_undefined(), )])), )])), - metadata_kind: Kind::object(Collection::empty()), + metadata_kind: Kind::object(Collection::any()), meaning: [( "foobar".to_owned(), MeaningPointer::Valid(parse_target_path(".foo.bar").unwrap()), @@ -777,7 +837,7 @@ mod tests { "foo".into(), Kind::boolean().or_undefined(), )])), - metadata_kind: Kind::object(Collection::empty()), + metadata_kind: Kind::object(Collection::any()), meaning: BTreeMap::default(), log_namespaces: BTreeSet::new(), }, @@ -795,7 +855,7 @@ mod tests { fn test_unknown_fields() { let want = Definition { event_kind: Kind::object(Collection::from_unknown(Kind::bytes().or_integer())), - metadata_kind: Kind::object(Collection::empty()), + metadata_kind: Kind::object(Collection::any()), meaning: BTreeMap::default(), log_namespaces: BTreeSet::new(), }; diff --git a/lib/vector-core/src/transform/mod.rs b/lib/vector-core/src/transform/mod.rs index cc44eba2bdec5..372724c42a912 100644 --- a/lib/vector-core/src/transform/mod.rs +++ b/lib/vector-core/src/transform/mod.rs @@ -222,14 +222,14 @@ struct TransformOutput { } pub struct TransformOutputs { - outputs_spec: Vec, + outputs_spec: Vec, primary_output: Option, named_outputs: HashMap, } impl TransformOutputs { pub fn new( - outputs_in: Vec, + outputs_in: Vec, ) -> (Self, HashMap, fanout::ControlChannel>) { let outputs_spec = outputs_in.clone(); let mut primary_output = None; @@ -319,7 +319,7 @@ pub struct TransformOutputsBuf { } impl TransformOutputsBuf { - pub fn new_with_capacity(outputs_in: Vec, capacity: usize) -> Self { + pub fn new_with_capacity(outputs_in: Vec, capacity: usize) -> Self { let mut primary_buffer = None; let mut named_buffers = HashMap::new(); diff --git a/src/api/schema/components/mod.rs b/src/api/schema/components/mod.rs index d6ba5ade90874..171bdecc0bfe3 100644 --- a/src/api/schema/components/mod.rs +++ b/src/api/schema/components/mod.rs @@ -14,7 +14,7 @@ use tokio_stream::{wrappers::BroadcastStream, Stream, StreamExt}; use vector_config::NamedComponent; use vector_core::internal_event::DEFAULT_OUTPUT; -use crate::topology::schema::merged_definition; +use crate::topology::schema::possible_definitions; use crate::{ api::schema::{ components::state::component_by_component_key, @@ -294,7 +294,7 @@ pub fn update_config(config: &Config) { outputs: transform .inner .outputs( - &merged_definition(&transform.inputs, config, &mut cache), + &possible_definitions(&transform.inputs, config, &mut cache), config.schema.log_namespace(), ) .into_iter() diff --git a/src/config/compiler.rs b/src/config/compiler.rs index 8ec372c2187c4..f4170505bf557 100644 --- a/src/config/compiler.rs +++ b/src/config/compiler.rs @@ -138,7 +138,10 @@ pub(crate) fn expand_globs(config: &mut ConfigBuilder) { }) .chain(config.transforms.iter().flat_map(|(key, t)| { t.inner - .outputs(&schema::Definition::any(), config.schema.log_namespace()) + .outputs( + &[(key.into(), schema::Definition::any())], + config.schema.log_namespace(), + ) .into_iter() .map(|output| OutputId { component: key.clone(), diff --git a/src/config/graph.rs b/src/config/graph.rs index 2beab867fbdd8..3c1e27439a78d 100644 --- a/src/config/graph.rs +++ b/src/config/graph.rs @@ -2,18 +2,18 @@ use indexmap::{set::IndexSet, IndexMap}; use std::collections::{HashMap, HashSet, VecDeque}; use super::{ - schema, ComponentKey, DataType, Output, OutputId, SinkConfig, SinkOuter, SourceOuter, - TransformOuter, + schema, ComponentKey, DataType, OutputId, SinkConfig, SinkOuter, SourceOuter, SourceOutput, + TransformOuter, TransformOutput, }; #[derive(Debug, Clone)] pub enum Node { Source { - outputs: Vec, + outputs: Vec, }, Transform { in_ty: DataType, - outputs: Vec, + outputs: Vec, }, Sink { ty: DataType, @@ -76,9 +76,10 @@ impl Graph { id.clone(), Node::Transform { in_ty: transform.inner.input().data_type(), - outputs: transform - .inner - .outputs(&schema::Definition::any(), schema.log_namespace()), + outputs: transform.inner.outputs( + &[(id.into(), schema::Definition::any())], + schema.log_namespace(), + ), }, ); } @@ -166,7 +167,12 @@ impl Graph { /// have inputs. fn get_output_type(&self, id: &OutputId) -> DataType { match &self.nodes[&id.component] { - Node::Source { outputs } | Node::Transform { outputs, .. } => outputs + Node::Source { outputs } => outputs + .iter() + .find(|output| output.port == id.port) + .map(|output| output.ty) + .expect("output didn't exist"), + Node::Transform { outputs, .. } => outputs .iter() .find(|output| output.port == id.port) .map(|output| output.ty) @@ -257,7 +263,14 @@ impl Graph { .iter() .flat_map(|(key, node)| match node { Node::Sink { .. } => vec![], - Node::Source { outputs } | Node::Transform { outputs, .. } => outputs + Node::Source { outputs } => outputs + .iter() + .map(|output| OutputId { + component: key.clone(), + port: output.port.clone(), + }) + .collect(), + Node::Transform { outputs, .. } => outputs .iter() .map(|output| OutputId { component: key.clone(), @@ -353,6 +366,7 @@ impl Graph { #[cfg(test)] mod test { use similar_asserts::assert_eq; + use vector_core::schema::Definition; use super::*; @@ -361,7 +375,11 @@ mod test { self.nodes.insert( id.into(), Node::Source { - outputs: vec![Output::default(ty)], + outputs: vec![match ty { + DataType::Metric => SourceOutput::new_metrics(), + DataType::Trace => SourceOutput::new_traces(), + _ => SourceOutput::new_logs(ty, Definition::any()), + }], }, ); } @@ -379,7 +397,10 @@ mod test { id.clone(), Node::Transform { in_ty, - outputs: vec![Output::default(out_ty)], + outputs: vec![TransformOutput::new( + out_ty, + vec![Definition::default_legacy_namespace()], + )], }, ); for from in inputs { @@ -393,9 +414,10 @@ mod test { fn add_transform_output(&mut self, id: &str, name: &str, ty: DataType) { let id = id.into(); match self.nodes.get_mut(&id) { - Some(Node::Transform { outputs, .. }) => { - outputs.push(Output::default(ty).with_port(name)) - } + Some(Node::Transform { outputs, .. }) => outputs.push( + TransformOutput::new(ty, vec![Definition::default_legacy_namespace()]) + .with_port(name), + ), _ => panic!("invalid transform"), } } @@ -613,13 +635,13 @@ mod test { graph.nodes.insert( ComponentKey::from("foo.bar"), Node::Source { - outputs: vec![Output::default(DataType::all())], + outputs: vec![SourceOutput::new_logs(DataType::all(), Definition::any())], }, ); graph.nodes.insert( ComponentKey::from("foo.bar"), Node::Source { - outputs: vec![Output::default(DataType::all())], + outputs: vec![SourceOutput::new_logs(DataType::all(), Definition::any())], }, ); graph.nodes.insert( @@ -627,8 +649,15 @@ mod test { Node::Transform { in_ty: DataType::all(), outputs: vec![ - Output::default(DataType::all()), - Output::default(DataType::all()).with_port("bar"), + TransformOutput::new( + DataType::all(), + vec![Definition::default_legacy_namespace()], + ), + TransformOutput::new( + DataType::all(), + vec![Definition::default_legacy_namespace()], + ) + .with_port("bar"), ], }, ); @@ -637,7 +666,7 @@ mod test { graph.nodes.insert( ComponentKey::from("baz.errors"), Node::Source { - outputs: vec![Output::default(DataType::all())], + outputs: vec![SourceOutput::new_logs(DataType::all(), Definition::any())], }, ); graph.nodes.insert( @@ -645,8 +674,15 @@ mod test { Node::Transform { in_ty: DataType::all(), outputs: vec![ - Output::default(DataType::all()), - Output::default(DataType::all()).with_port("errors"), + TransformOutput::new( + DataType::all(), + vec![Definition::default_legacy_namespace()], + ), + TransformOutput::new( + DataType::all(), + vec![Definition::default_legacy_namespace()], + ) + .with_port("errors"), ], }, ); diff --git a/src/config/id.rs b/src/config/id.rs index 57c9cd33178bc..caa561b68352d 100644 --- a/src/config/id.rs +++ b/src/config/id.rs @@ -3,6 +3,8 @@ use std::{fmt, ops::Deref}; use vector_config::configurable_component; pub use vector_core::config::ComponentKey; +use super::schema; + /// A list of upstream [source][sources] or [transform][transforms] IDs. /// /// Wildcards (`*`) are supported. @@ -106,6 +108,33 @@ pub struct OutputId { pub port: Option, } +impl OutputId { + /// Some situations, for example when validating a config file requires running the + /// transforms::output function to retrieve the outputs, but we don't have an + /// `OutputId` from a source. This gives us an `OutputId` that we can use. + /// + /// TODO: This is not a pleasant solution, but would require some significant refactoring + /// to the topology code to avoid. + pub fn dummy() -> Self { + Self { + component: "dummy".into(), + port: None, + } + } + + /// Given a list of [`schema::Definition`]s, returns a [`Vec`] of tuples of + /// this `OutputId` with each `Definition`. + pub fn with_definitions( + &self, + definitions: impl IntoIterator, + ) -> Vec<(OutputId, schema::Definition)> { + definitions + .into_iter() + .map(|definition| (self.clone(), definition)) + .collect() + } +} + impl fmt::Display for OutputId { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match &self.port { diff --git a/src/config/mod.rs b/src/config/mod.rs index bd9f6c283eaf1..1cd6b5aff96af 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -11,8 +11,8 @@ use indexmap::IndexMap; pub use vector_config::component::{GenerateConfig, SinkDescription, TransformDescription}; use vector_config::configurable_component; pub use vector_core::config::{ - AcknowledgementsConfig, DataType, GlobalOptions, Input, LogNamespace, Output, - SourceAcknowledgementsConfig, + AcknowledgementsConfig, DataType, GlobalOptions, Input, LogNamespace, + SourceAcknowledgementsConfig, SourceOutput, TransformOutput, }; use crate::{conditions, event::Metric, secrets::SecretBackends, serde::OneOrMany}; diff --git a/src/config/source.rs b/src/config/source.rs index 366fb8553c66b..1353c18c05dc4 100644 --- a/src/config/source.rs +++ b/src/config/source.rs @@ -10,7 +10,8 @@ use vector_config_common::attributes::CustomAttribute; use vector_config_common::schema::{SchemaGenerator, SchemaObject}; use vector_core::{ config::{ - AcknowledgementsConfig, GlobalOptions, LogNamespace, Output, SourceAcknowledgementsConfig, + AcknowledgementsConfig, GlobalOptions, LogNamespace, SourceAcknowledgementsConfig, + SourceOutput, }, source::Source, }; @@ -89,7 +90,7 @@ pub trait SourceConfig: DynClone + NamedComponent + core::fmt::Debug + Send + Sy async fn build(&self, cx: SourceContext) -> crate::Result; /// Gets the list of outputs exposed by this source. - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec; + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec; /// Gets the list of resources, if any, used by this source. /// @@ -130,8 +131,8 @@ pub struct SourceContext { /// Tracks the schema IDs assigned to schemas exposed by the source. /// - /// Given a source can expose multiple [`Output`] channels, the ID is tied to the identifier of - /// that `Output`. + /// Given a source can expose multiple [`SourceOutput`] channels, the ID is tied to the identifier of + /// that `SourceOutput`. pub schema_definitions: HashMap, schema::Definition>, } diff --git a/src/config/transform.rs b/src/config/transform.rs index 54a5996d144a9..f3271c3422e7b 100644 --- a/src/config/transform.rs +++ b/src/config/transform.rs @@ -11,12 +11,13 @@ use vector_config::{ }; use vector_config_common::attributes::CustomAttribute; use vector_core::{ - config::{GlobalOptions, Input, LogNamespace, Output}, + config::{GlobalOptions, Input, LogNamespace, TransformOutput}, schema, transform::Transform, }; use super::schema::Options as SchemaOptions; +use super::OutputId; use super::{id::Inputs, ComponentKey}; pub type BoxedTransform = Box; @@ -108,9 +109,9 @@ pub struct TransformContext { /// Tracks the schema IDs assigned to schemas exposed by the transform. /// - /// Given a transform can expose multiple [`Output`] channels, the ID is tied to the identifier of - /// that `Output`. - pub schema_definitions: HashMap, schema::Definition>, + /// Given a transform can expose multiple [`TransformOutput`] channels, the ID is tied to the identifier of + /// that `TransformOutput`. + pub schema_definitions: HashMap, Vec>, /// The schema definition created by merging all inputs of the transform. /// @@ -128,7 +129,7 @@ impl Default for TransformContext { key: Default::default(), globals: Default::default(), enrichment_tables: Default::default(), - schema_definitions: HashMap::from([(None, schema::Definition::any())]), + schema_definitions: HashMap::from([(None, vec![schema::Definition::any()])]), merged_schema_definition: schema::Definition::any(), schema: SchemaOptions::default(), } @@ -147,7 +148,7 @@ impl TransformContext { } #[cfg(any(test, feature = "test"))] - pub fn new_test(schema_definitions: HashMap, schema::Definition>) -> Self { + pub fn new_test(schema_definitions: HashMap, Vec>) -> Self { Self { schema_definitions, ..Default::default() @@ -190,9 +191,9 @@ pub trait TransformConfig: DynClone + NamedComponent + core::fmt::Debug + Send + /// of events flowing through the transform. fn outputs( &self, - merged_definition: &schema::Definition, + input_definitions: &[(OutputId, schema::Definition)], global_log_namespace: LogNamespace, - ) -> Vec; + ) -> Vec; /// Validates that the configuration of the transform is valid. /// diff --git a/src/config/unit_test/mod.rs b/src/config/unit_test/mod.rs index 7eac5cbc8b1ed..273b5fc70b849 100644 --- a/src/config/unit_test/mod.rs +++ b/src/config/unit_test/mod.rs @@ -15,8 +15,6 @@ use tokio::sync::{ Mutex, }; use uuid::Uuid; -use value::Kind; -use vector_core::config::LogNamespace; pub use self::unit_test_components::{ UnitTestSinkCheck, UnitTestSinkConfig, UnitTestSinkResult, UnitTestSourceConfig, @@ -30,7 +28,7 @@ use crate::{ TestDefinition, TestInput, TestInputValue, TestOutput, }, event::{Event, LogEvent, Value}, - schema, signal, + signal, topology::{ self, builder::{self, Pieces}, @@ -190,7 +188,7 @@ impl UnitTestBuildMetadata { .flat_map(|(key, transform)| { transform .inner - .outputs(&schema::Definition::any(), builder.schema.log_namespace()) + .outputs(&[], builder.schema.log_namespace()) .into_iter() .map(|output| OutputId { component: key.clone(), @@ -461,13 +459,7 @@ fn get_loose_end_outputs_sink(config: &ConfigBuilder) -> Option Vec { - vec![Output::default(DataType::all())] + fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { + vec![SourceOutput::new_logs( + DataType::all(), + schema::Definition::default_legacy_namespace(), + )] } fn can_acknowledge(&self) -> bool { @@ -97,8 +103,11 @@ impl SourceConfig for UnitTestStreamSourceConfig { })) } - fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { - vec![Output::default(DataType::all())] + fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { + vec![SourceOutput::new_logs( + DataType::all(), + schema::Definition::default_legacy_namespace(), + )] } fn can_acknowledge(&self) -> bool { diff --git a/src/config/validation.rs b/src/config/validation.rs index dbb25377d9654..2c971f602fed3 100644 --- a/src/config/validation.rs +++ b/src/config/validation.rs @@ -1,5 +1,4 @@ -use crate::config::schema; -use crate::topology::schema::merged_definition; +use crate::{config::schema, topology::schema::possible_definitions}; use futures_util::{stream, FutureExt, StreamExt, TryFutureExt, TryStreamExt}; use heim::{disk::Partition, units::information::byte}; use indexmap::IndexMap; @@ -172,7 +171,10 @@ pub fn check_outputs(config: &ConfigBuilder) -> Result<(), Vec> { if transform .inner - .outputs(&definition, config.schema.log_namespace()) + .outputs( + &[(OutputId::dummy(), definition)], + config.schema.log_namespace(), + ) .iter() .map(|output| output.port.as_deref().unwrap_or("")) .any(|name| name == DEFAULT_OUTPUT) @@ -343,7 +345,7 @@ pub fn warnings(config: &Config) -> Vec { transform .inner .outputs( - &merged_definition(&transform.inputs, config, &mut cache), + &possible_definitions(&transform.inputs, config, &mut cache), config.schema.log_namespace(), ) .iter() diff --git a/src/source_sender/mod.rs b/src/source_sender/mod.rs index 5cf013e06e886..30ee2cf2b781c 100644 --- a/src/source_sender/mod.rs +++ b/src/source_sender/mod.rs @@ -9,7 +9,7 @@ use vector_buffers::topology::channel::{self, LimitedReceiver, LimitedSender}; #[cfg(test)] use vector_core::event::{into_event_stream, EventStatus}; use vector_core::{ - config::{log_schema, Output}, + config::{log_schema, SourceOutput}, event::{array, Event, EventArray, EventContainer, EventRef}, internal_event::{ self, CountByteSize, EventsSent, InternalEventHandle as _, Registered, DEFAULT_OUTPUT, @@ -48,7 +48,7 @@ impl Builder { } } - pub fn add_output(&mut self, output: Output) -> LimitedReceiver { + pub fn add_source_output(&mut self, output: SourceOutput) -> LimitedReceiver { let lag_time = self.lag_time.clone(); match output.port { None => { diff --git a/src/sources/amqp.rs b/src/sources/amqp.rs index cda43a446ade3..157742b8733e2 100644 --- a/src/sources/amqp.rs +++ b/src/sources/amqp.rs @@ -3,7 +3,7 @@ use crate::{ amqp::AmqpConfig, codecs::{Decoder, DecodingConfig}, - config::{Output, SourceConfig, SourceContext}, + config::{SourceConfig, SourceContext, SourceOutput}, event::{BatchNotifier, BatchStatus}, internal_events::{ source::{AmqpAckError, AmqpBytesReceived, AmqpEventError, AmqpRejectError}, @@ -142,7 +142,7 @@ impl SourceConfig for AmqpSourceConfig { amqp_source(self, cx.shutdown, cx.out, log_namespace, acknowledgements).await } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let log_namespace = global_log_namespace.merge(self.log_namespace); let schema_definition = self .decoding @@ -180,7 +180,10 @@ impl SourceConfig for AmqpSourceConfig { None, ); - vec![Output::default(self.decoding.output_type()).with_schema_definition(schema_definition)] + vec![SourceOutput::new_logs( + self.decoding.output_type(), + schema_definition, + )] } fn can_acknowledge(&self) -> bool { @@ -522,10 +525,10 @@ pub mod test { ..Default::default() }; - let definition = config.outputs(LogNamespace::Vector)[0] - .clone() - .log_schema_definition - .unwrap(); + let definition = config + .outputs(LogNamespace::Vector) + .remove(0) + .schema_definition(true); let expected_definition = Definition::new_with_default_metadata(Kind::bytes(), [LogNamespace::Vector]) @@ -549,17 +552,17 @@ pub mod test { .with_metadata_field(&owned_value_path!("amqp", "exchange"), Kind::bytes(), None) .with_metadata_field(&owned_value_path!("amqp", "offset"), Kind::integer(), None); - assert_eq!(definition, expected_definition); + assert_eq!(definition, Some(expected_definition)); } #[test] fn output_schema_definition_legacy_namespace() { let config = AmqpSourceConfig::default(); - let definition = config.outputs(LogNamespace::Legacy)[0] - .clone() - .log_schema_definition - .unwrap(); + let definition = config + .outputs(LogNamespace::Legacy) + .remove(0) + .schema_definition(true); let expected_definition = Definition::new_with_default_metadata( Kind::object(Collection::empty()), @@ -576,7 +579,7 @@ pub mod test { .with_event_field(&owned_value_path!("exchange"), Kind::bytes(), None) .with_event_field(&owned_value_path!("offset"), Kind::integer(), None); - assert_eq!(definition, expected_definition); + assert_eq!(definition, Some(expected_definition)); } } diff --git a/src/sources/apache_metrics/mod.rs b/src/sources/apache_metrics/mod.rs index dd58f7bc8a223..1a207624c8cb5 100644 --- a/src/sources/apache_metrics/mod.rs +++ b/src/sources/apache_metrics/mod.rs @@ -14,7 +14,7 @@ use vector_config::configurable_component; use vector_core::{metric_tags, EstimatedJsonEncodedSizeOf}; use crate::{ - config::{self, GenerateConfig, Output, ProxyConfig, SourceConfig, SourceContext}, + config::{GenerateConfig, ProxyConfig, SourceConfig, SourceContext, SourceOutput}, event::metric::{Metric, MetricKind, MetricValue}, http::HttpClient, internal_events::{ @@ -93,8 +93,8 @@ impl SourceConfig for ApacheMetricsConfig { )) } - fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { - vec![Output::default(config::DataType::Metric)] + fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { + vec![SourceOutput::new_metrics()] } fn can_acknowledge(&self) -> bool { diff --git a/src/sources/aws_ecs_metrics/mod.rs b/src/sources/aws_ecs_metrics/mod.rs index ab474f64663ca..e3db0a38c7619 100644 --- a/src/sources/aws_ecs_metrics/mod.rs +++ b/src/sources/aws_ecs_metrics/mod.rs @@ -10,7 +10,7 @@ use vector_config::configurable_component; use vector_core::{config::LogNamespace, EstimatedJsonEncodedSizeOf}; use crate::{ - config::{self, GenerateConfig, Output, SourceConfig, SourceContext}, + config::{GenerateConfig, SourceConfig, SourceContext, SourceOutput}, internal_events::{ AwsEcsMetricsEventsReceived, AwsEcsMetricsHttpError, AwsEcsMetricsParseError, AwsEcsMetricsResponseError, RequestCompleted, StreamClosedError, @@ -159,8 +159,8 @@ impl SourceConfig for AwsEcsMetricsSourceConfig { ))) } - fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { - vec![Output::default(config::DataType::Metric)] + fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { + vec![SourceOutput::new_metrics()] } fn can_acknowledge(&self) -> bool { diff --git a/src/sources/aws_kinesis_firehose/mod.rs b/src/sources/aws_kinesis_firehose/mod.rs index 20dd3d14eb3b6..198f14104d308 100644 --- a/src/sources/aws_kinesis_firehose/mod.rs +++ b/src/sources/aws_kinesis_firehose/mod.rs @@ -13,7 +13,8 @@ use warp::Filter; use crate::{ codecs::DecodingConfig, config::{ - GenerateConfig, Output, Resource, SourceAcknowledgementsConfig, SourceConfig, SourceContext, + GenerateConfig, Resource, SourceAcknowledgementsConfig, SourceConfig, SourceContext, + SourceOutput, }, serde::{bool_or_struct, default_decoding, default_framing_message_based}, tls::{MaybeTlsSettings, TlsEnableableConfig}, @@ -183,7 +184,7 @@ impl SourceConfig for AwsKinesisFirehoseConfig { })) } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let schema_definition = self .decoding .schema_definition(global_log_namespace.merge(self.log_namespace)) @@ -203,7 +204,10 @@ impl SourceConfig for AwsKinesisFirehoseConfig { None, ); - vec![Output::default(self.decoding.output_type()).with_schema_definition(schema_definition)] + vec![SourceOutput::new_logs( + self.decoding.output_type(), + schema_definition, + )] } fn resources(&self) -> Vec { diff --git a/src/sources/aws_s3/mod.rs b/src/sources/aws_s3/mod.rs index ecbc83f582787..9085a5b26ce94 100644 --- a/src/sources/aws_s3/mod.rs +++ b/src/sources/aws_s3/mod.rs @@ -15,7 +15,9 @@ use super::util::MultilineConfig; use crate::{ aws::{auth::AwsAuthentication, create_client, RegionOrEndpoint}, common::{s3::S3ClientBuilder, sqs::SqsClientBuilder}, - config::{Output, ProxyConfig, SourceAcknowledgementsConfig, SourceConfig, SourceContext}, + config::{ + ProxyConfig, SourceAcknowledgementsConfig, SourceConfig, SourceContext, SourceOutput, + }, line_agg, serde::bool_or_struct, tls::TlsConfig, @@ -138,7 +140,7 @@ impl SourceConfig for AwsS3Config { } } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let log_namespace = global_log_namespace.merge(self.log_namespace); let mut schema_definition = BytesDeserializerConfig .schema_definition(log_namespace) @@ -185,7 +187,7 @@ impl SourceConfig for AwsS3Config { schema_definition = schema_definition.unknown_fields(Kind::bytes()); } - vec![Output::default(DataType::Log).with_schema_definition(schema_definition)] + vec![SourceOutput::new_logs(DataType::Log, schema_definition)] } fn can_acknowledge(&self) -> bool { diff --git a/src/sources/aws_sqs/config.rs b/src/sources/aws_sqs/config.rs index 8d746033c062a..b165bc16bdee2 100644 --- a/src/sources/aws_sqs/config.rs +++ b/src/sources/aws_sqs/config.rs @@ -12,7 +12,7 @@ use crate::common::sqs::SqsClientBuilder; use crate::tls::TlsConfig; use crate::{ aws::{auth::AwsAuthentication, region::RegionOrEndpoint}, - config::{Output, SourceAcknowledgementsConfig, SourceConfig, SourceContext}, + config::{SourceAcknowledgementsConfig, SourceConfig, SourceContext, SourceOutput}, serde::{bool_or_struct, default_decoding, default_framing_message_based}, sources::aws_sqs::source::SqsSource, }; @@ -131,7 +131,7 @@ impl SourceConfig for AwsSqsConfig { )) } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let schema_definition = self .decoding .schema_definition(global_log_namespace.merge(self.log_namespace)) @@ -144,7 +144,10 @@ impl SourceConfig for AwsSqsConfig { Some("timestamp"), ); - vec![Output::default(self.decoding.output_type()).with_schema_definition(schema_definition)] + vec![SourceOutput::new_logs( + self.decoding.output_type(), + schema_definition, + )] } fn can_acknowledge(&self) -> bool { diff --git a/src/sources/aws_sqs/source.rs b/src/sources/aws_sqs/source.rs index 8f02fd3028e0f..3a17e8c801fcb 100644 --- a/src/sources/aws_sqs/source.rs +++ b/src/sources/aws_sqs/source.rs @@ -233,10 +233,10 @@ mod tests { log_namespace: Some(true), ..Default::default() }; - let definition = config.outputs(LogNamespace::Vector)[0] - .clone() - .log_schema_definition - .unwrap(); + let definitions = config + .outputs(LogNamespace::Vector) + .remove(0) + .schema_definition(true); let message = "test"; let now = Utc::now(); @@ -276,7 +276,7 @@ mod tests { .to_string_lossy(), now.to_rfc3339_opts(SecondsFormat::AutoSi, true) ); - definition.assert_valid_for_event(&events[0]); + definitions.unwrap().assert_valid_for_event(&events[0]); } #[tokio::test] @@ -285,10 +285,10 @@ mod tests { log_namespace: None, ..Default::default() }; - let definition = config.outputs(LogNamespace::Legacy)[0] - .clone() - .log_schema_definition - .unwrap(); + let definitions = config + .outputs(LogNamespace::Legacy) + .remove(0) + .schema_definition(true); let message = "test"; let now = Utc::now(); @@ -329,7 +329,7 @@ mod tests { .to_string_lossy(), now.to_rfc3339_opts(SecondsFormat::AutoSi, true) ); - definition.assert_valid_for_event(&events[0]); + definitions.unwrap().assert_valid_for_event(&events[0]); } #[test] diff --git a/src/sources/datadog_agent/metrics.rs b/src/sources/datadog_agent/metrics.rs index ea6d7538319d0..0a02e2a11dc95 100644 --- a/src/sources/datadog_agent/metrics.rs +++ b/src/sources/datadog_agent/metrics.rs @@ -81,7 +81,6 @@ fn sketches_service( api_token, query_params.dd_api_key, ), - &source.metrics_schema_definition, &source.events_received, ) }); @@ -120,7 +119,9 @@ fn series_v1_service( api_token, query_params.dd_api_key, ), - &source.metrics_schema_definition, + // Currently metrics do not have schemas defined, so for now we just pass a + // default one. + &Arc::new(schema::Definition::default_legacy_namespace()), &source.events_received, ) }); @@ -159,7 +160,6 @@ fn series_v2_service( api_token, query_params.dd_api_key, ), - &source.metrics_schema_definition, &source.events_received, ) }); @@ -172,7 +172,6 @@ fn series_v2_service( fn decode_datadog_sketches( body: Bytes, api_key: Option>, - schema_definition: &Arc, events_received: &Registered, ) -> Result, ErrorMessage> { if body.is_empty() { @@ -184,7 +183,7 @@ fn decode_datadog_sketches( return Ok(Vec::new()); } - let metrics = decode_ddsketch(body, &api_key, schema_definition).map_err(|error| { + let metrics = decode_ddsketch(body, &api_key).map_err(|error| { ErrorMessage::new( StatusCode::UNPROCESSABLE_ENTITY, format!("Error decoding Datadog sketch: {:?}", error), @@ -202,7 +201,6 @@ fn decode_datadog_sketches( fn decode_datadog_series_v2( body: Bytes, api_key: Option>, - schema_definition: &Arc, events_received: &Registered, ) -> Result, ErrorMessage> { if body.is_empty() { @@ -214,14 +212,12 @@ fn decode_datadog_series_v2( return Ok(Vec::new()); } - let metrics = decode_ddseries_v2(body, &api_key, schema_definition, events_received).map_err( - |error| { - ErrorMessage::new( - StatusCode::UNPROCESSABLE_ENTITY, - format!("Error decoding Datadog sketch: {:?}", error), - ) - }, - )?; + let metrics = decode_ddseries_v2(body, &api_key, events_received).map_err(|error| { + ErrorMessage::new( + StatusCode::UNPROCESSABLE_ENTITY, + format!("Error decoding Datadog sketch: {:?}", error), + ) + })?; events_received.emit(CountByteSize( metrics.len(), @@ -234,7 +230,6 @@ fn decode_datadog_series_v2( pub(crate) fn decode_ddseries_v2( frame: Bytes, api_key: &Option>, - schema_definition: &Arc, events_received: &Registered, ) -> crate::Result> { let payload = MetricPayload::decode(frame)?; @@ -336,9 +331,6 @@ pub(crate) fn decode_ddseries_v2( if let Some(k) = &api_key { metric.metadata_mut().set_datadog_api_key(Arc::clone(k)); } - metric - .metadata_mut() - .set_schema_definition(schema_definition); metric.into() }) @@ -502,7 +494,6 @@ fn namespace_name_from_dd_metric(dd_metric_name: &str) -> (Option<&str>, &str) { pub(crate) fn decode_ddsketch( frame: Bytes, api_key: &Option>, - schema_definition: &Arc, ) -> crate::Result> { let payload = SketchPayload::decode(frame)?; // payload.metadata is always empty for payload coming from dd agents @@ -544,9 +535,6 @@ pub(crate) fn decode_ddsketch( metric.metadata_mut().set_datadog_api_key(Arc::clone(k)); } - metric - .metadata_mut() - .set_schema_definition(schema_definition); metric.into() }) }) diff --git a/src/sources/datadog_agent/mod.rs b/src/sources/datadog_agent/mod.rs index 5132e12c50918..c92d8856e87b1 100644 --- a/src/sources/datadog_agent/mod.rs +++ b/src/sources/datadog_agent/mod.rs @@ -40,8 +40,8 @@ use warp::{filters::BoxedFilter, reject::Rejection, reply::Response, Filter, Rep use crate::{ codecs::{Decoder, DecodingConfig}, config::{ - log_schema, DataType, GenerateConfig, Output, Resource, SourceAcknowledgementsConfig, - SourceConfig, SourceContext, + log_schema, DataType, GenerateConfig, Resource, SourceAcknowledgementsConfig, SourceConfig, + SourceContext, SourceOutput, }, event::Event, internal_events::{HttpBytesReceived, HttpDecompressError, StreamClosedError}, @@ -154,13 +154,6 @@ impl SourceConfig for DatadogAgentConfig { .expect("registered log schema required") .clone(); - let metrics_schema_definition = cx - .schema_definitions - .get(&Some(METRICS.to_owned())) - .or_else(|| cx.schema_definitions.get(&None)) - .expect("registered metrics schema required") - .clone(); - let decoder = DecodingConfig::new(self.framing.clone(), self.decoding.clone(), log_namespace).build(); @@ -170,7 +163,6 @@ impl SourceConfig for DatadogAgentConfig { decoder, tls.http_protocol_name(), logs_schema_definition, - metrics_schema_definition, log_namespace, ); let listener = tls.bind(&self.address).await?; @@ -205,7 +197,7 @@ impl SourceConfig for DatadogAgentConfig { })) } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let definition = self .decoding .schema_definition(global_log_namespace.merge(self.log_namespace)) @@ -255,14 +247,12 @@ impl SourceConfig for DatadogAgentConfig { if self.multiple_outputs { vec![ - Output::default(DataType::Metric).with_port(METRICS), - Output::default(DataType::Log) - .with_schema_definition(definition) - .with_port(LOGS), - Output::default(DataType::Trace).with_port(TRACES), + SourceOutput::new_logs(DataType::Log, definition).with_port(LOGS), + SourceOutput::new_metrics().with_port(METRICS), + SourceOutput::new_traces().with_port(TRACES), ] } else { - vec![Output::default(DataType::all()).with_schema_definition(definition)] + vec![SourceOutput::new_logs(DataType::all(), definition)] } } @@ -299,7 +289,6 @@ pub(crate) struct DatadogAgentSource { pub(crate) decoder: Decoder, protocol: &'static str, logs_schema_definition: Arc, - metrics_schema_definition: Arc, events_received: Registered, } @@ -336,7 +325,6 @@ impl DatadogAgentSource { decoder: Decoder, protocol: &'static str, logs_schema_definition: schema::Definition, - metrics_schema_definition: schema::Definition, log_namespace: LogNamespace, ) -> Self { Self { @@ -350,7 +338,6 @@ impl DatadogAgentSource { decoder, protocol, logs_schema_definition: Arc::new(logs_schema_definition), - metrics_schema_definition: Arc::new(metrics_schema_definition), log_namespace, events_received: register!(EventsReceived), } diff --git a/src/sources/datadog_agent/tests.rs b/src/sources/datadog_agent/tests.rs index e71cbde6c0fc4..d1d629c119e01 100644 --- a/src/sources/datadog_agent/tests.rs +++ b/src/sources/datadog_agent/tests.rs @@ -57,14 +57,6 @@ fn test_logs_schema_definition() -> schema::Definition { ) } -fn test_metrics_schema_definition() -> schema::Definition { - schema::Definition::empty_legacy_namespace().with_event_field( - &owned_value_path!("a schema tag"), - Kind::boolean().or_null(), - Some("tag"), - ) -} - impl Arbitrary for LogMsg { fn arbitrary(g: &mut Gen) -> Self { LogMsg { @@ -101,7 +93,6 @@ fn test_decode_log_body() { decoder, "http", test_logs_schema_definition(), - test_metrics_schema_definition(), LogNamespace::Legacy, ); @@ -173,10 +164,8 @@ async fn source( address, store_api_key, acknowledgements, multiple_outputs )) .unwrap(); - let schema_definitions = HashMap::from([ - (Some(LOGS.to_owned()), test_logs_schema_definition()), - (Some(METRICS.to_owned()), test_metrics_schema_definition()), - ]); + let schema_definitions = + HashMap::from([(Some(LOGS.to_owned()), test_logs_schema_definition())]); let context = SourceContext::new_test(sender, Some(schema_definitions)); tokio::spawn(async move { config.build(context).await.unwrap().await.unwrap(); @@ -935,13 +924,6 @@ async fn decode_series_endpoint_v1() { &events[3].metadata().datadog_api_key().as_ref().unwrap()[..], "12345678abcdefgh12345678abcdefgh" ); - - for event in events { - assert_eq!( - event.metadata().schema_definition(), - &test_metrics_schema_definition() - ); - } } }) .await; @@ -1044,13 +1026,6 @@ async fn decode_sketches() { &events[0].metadata().datadog_api_key().as_ref().unwrap()[..], "12345678abcdefgh12345678abcdefgh" ); - - for event in events { - assert_eq!( - event.metadata().schema_definition(), - &test_metrics_schema_definition() - ); - } } }) .await; @@ -1405,10 +1380,6 @@ async fn split_outputs() { &event.metadata().datadog_api_key().as_ref().unwrap()[..], "abcdefgh12345678abcdefgh12345678" ); - assert_eq!( - event.metadata().schema_definition(), - &test_metrics_schema_definition() - ); } { @@ -1860,7 +1831,7 @@ fn test_config_outputs() { let mut outputs = config .outputs(LogNamespace::Legacy) .into_iter() - .map(|output| (output.port, output.log_schema_definition)) + .map(|output| (output.port.clone(), output.schema_definition(true))) .collect::>(); for (name, want) in want { @@ -2079,13 +2050,6 @@ async fn decode_series_endpoint_v2() { &events[3].metadata().datadog_api_key().as_ref().unwrap()[..], "12345678abcdefgh12345678abcdefgh" ); - - for event in events { - assert_eq!( - event.metadata().schema_definition(), - &test_metrics_schema_definition() - ); - } } }) .await; @@ -2098,54 +2062,55 @@ fn test_output_schema_definition_json_vector_namespace() { decoding.codec = "json" "#}) .unwrap() - .outputs(LogNamespace::Vector)[0] - .clone() - .log_schema_definition - .unwrap(); + .outputs(LogNamespace::Vector) + .remove(0) + .schema_definition(true); assert_eq!( definition, - Definition::new_with_default_metadata(Kind::json(), [LogNamespace::Vector]) - .with_metadata_field( - &owned_value_path!("datadog_agent", "ddsource"), - Kind::bytes(), - Some("source") - ) - .with_metadata_field( - &owned_value_path!("datadog_agent", "ddtags"), - Kind::bytes(), - Some("tags") - ) - .with_metadata_field( - &owned_value_path!("datadog_agent", "hostname"), - Kind::bytes(), - Some("host") - ) - .with_metadata_field( - &owned_value_path!("datadog_agent", "service"), - Kind::bytes(), - Some("service") - ) - .with_metadata_field( - &owned_value_path!("datadog_agent", "status"), - Kind::bytes(), - Some("severity") - ) - .with_metadata_field( - &owned_value_path!("datadog_agent", "timestamp"), - Kind::timestamp(), - Some("timestamp") - ) - .with_metadata_field( - &owned_value_path!("vector", "ingest_timestamp"), - Kind::timestamp(), - None - ) - .with_metadata_field( - &owned_value_path!("vector", "source_type"), - Kind::bytes(), - None - ) + Some( + Definition::new_with_default_metadata(Kind::json(), [LogNamespace::Vector]) + .with_metadata_field( + &owned_value_path!("datadog_agent", "ddsource"), + Kind::bytes(), + Some("source") + ) + .with_metadata_field( + &owned_value_path!("datadog_agent", "ddtags"), + Kind::bytes(), + Some("tags") + ) + .with_metadata_field( + &owned_value_path!("datadog_agent", "hostname"), + Kind::bytes(), + Some("host") + ) + .with_metadata_field( + &owned_value_path!("datadog_agent", "service"), + Kind::bytes(), + Some("service") + ) + .with_metadata_field( + &owned_value_path!("datadog_agent", "status"), + Kind::bytes(), + Some("severity") + ) + .with_metadata_field( + &owned_value_path!("datadog_agent", "timestamp"), + Kind::timestamp(), + Some("timestamp") + ) + .with_metadata_field( + &owned_value_path!("vector", "ingest_timestamp"), + Kind::timestamp(), + None + ) + .with_metadata_field( + &owned_value_path!("vector", "source_type"), + Kind::bytes(), + None + ) + ) ) } @@ -2156,55 +2121,56 @@ fn test_output_schema_definition_bytes_vector_namespace() { decoding.codec = "bytes" "#}) .unwrap() - .outputs(LogNamespace::Vector)[0] - .clone() - .log_schema_definition - .unwrap(); + .outputs(LogNamespace::Vector) + .remove(0) + .schema_definition(true); assert_eq!( definition, - Definition::new_with_default_metadata(Kind::bytes(), [LogNamespace::Vector]) - .with_metadata_field( - &owned_value_path!("datadog_agent", "ddsource"), - Kind::bytes(), - Some("source") - ) - .with_metadata_field( - &owned_value_path!("datadog_agent", "ddtags"), - Kind::bytes(), - Some("tags") - ) - .with_metadata_field( - &owned_value_path!("datadog_agent", "hostname"), - Kind::bytes(), - Some("host") - ) - .with_metadata_field( - &owned_value_path!("datadog_agent", "service"), - Kind::bytes(), - Some("service") - ) - .with_metadata_field( - &owned_value_path!("datadog_agent", "status"), - Kind::bytes(), - Some("severity") - ) - .with_metadata_field( - &owned_value_path!("datadog_agent", "timestamp"), - Kind::timestamp(), - Some("timestamp") - ) - .with_metadata_field( - &owned_value_path!("vector", "ingest_timestamp"), - Kind::timestamp(), - None - ) - .with_metadata_field( - &owned_value_path!("vector", "source_type"), - Kind::bytes(), - None - ) - .with_meaning(OwnedTargetPath::event_root(), "message") + Some( + Definition::new_with_default_metadata(Kind::bytes(), [LogNamespace::Vector]) + .with_metadata_field( + &owned_value_path!("datadog_agent", "ddsource"), + Kind::bytes(), + Some("source") + ) + .with_metadata_field( + &owned_value_path!("datadog_agent", "ddtags"), + Kind::bytes(), + Some("tags") + ) + .with_metadata_field( + &owned_value_path!("datadog_agent", "hostname"), + Kind::bytes(), + Some("host") + ) + .with_metadata_field( + &owned_value_path!("datadog_agent", "service"), + Kind::bytes(), + Some("service") + ) + .with_metadata_field( + &owned_value_path!("datadog_agent", "status"), + Kind::bytes(), + Some("severity") + ) + .with_metadata_field( + &owned_value_path!("datadog_agent", "timestamp"), + Kind::timestamp(), + Some("timestamp") + ) + .with_metadata_field( + &owned_value_path!("vector", "ingest_timestamp"), + Kind::timestamp(), + None + ) + .with_metadata_field( + &owned_value_path!("vector", "source_type"), + Kind::bytes(), + None + ) + .with_meaning(OwnedTargetPath::event_root(), "message") + ) ) } @@ -2215,25 +2181,26 @@ fn test_output_schema_definition_json_legacy_namespace() { decoding.codec = "json" "#}) .unwrap() - .outputs(LogNamespace::Legacy)[0] - .clone() - .log_schema_definition - .unwrap(); + .outputs(LogNamespace::Legacy) + .remove(0) + .schema_definition(true); assert_eq!( definition, - Definition::new_with_default_metadata(Kind::json(), [LogNamespace::Legacy]) - .with_event_field( - &owned_value_path!("timestamp"), - Kind::json().or_timestamp(), - None - ) - .with_event_field(&owned_value_path!("ddsource"), Kind::json(), None) - .with_event_field(&owned_value_path!("ddtags"), Kind::json(), None) - .with_event_field(&owned_value_path!("hostname"), Kind::json(), None) - .with_event_field(&owned_value_path!("service"), Kind::json(), None) - .with_event_field(&owned_value_path!("source_type"), Kind::json(), None) - .with_event_field(&owned_value_path!("status"), Kind::json(), None) + Some( + Definition::new_with_default_metadata(Kind::json(), [LogNamespace::Legacy]) + .with_event_field( + &owned_value_path!("timestamp"), + Kind::json().or_timestamp(), + None + ) + .with_event_field(&owned_value_path!("ddsource"), Kind::json(), None) + .with_event_field(&owned_value_path!("ddtags"), Kind::json(), None) + .with_event_field(&owned_value_path!("hostname"), Kind::json(), None) + .with_event_field(&owned_value_path!("service"), Kind::json(), None) + .with_event_field(&owned_value_path!("source_type"), Kind::json(), None) + .with_event_field(&owned_value_path!("status"), Kind::json(), None) + ) ) } @@ -2244,44 +2211,45 @@ fn test_output_schema_definition_bytes_legacy_namespace() { decoding.codec = "bytes" "#}) .unwrap() - .outputs(LogNamespace::Legacy)[0] - .clone() - .log_schema_definition - .unwrap(); + .outputs(LogNamespace::Legacy) + .remove(0) + .schema_definition(true); assert_eq!( definition, - Definition::new_with_default_metadata( - Kind::object(Collection::empty()), - [LogNamespace::Legacy] - ) - .with_event_field( - &owned_value_path!("ddsource"), - Kind::bytes(), - Some("source") - ) - .with_event_field(&owned_value_path!("ddtags"), Kind::bytes(), Some("tags")) - .with_event_field(&owned_value_path!("hostname"), Kind::bytes(), Some("host")) - .with_event_field( - &owned_value_path!("message"), - Kind::bytes(), - Some("message") - ) - .with_event_field( - &owned_value_path!("service"), - Kind::bytes(), - Some("service") - ) - .with_event_field(&owned_value_path!("source_type"), Kind::bytes(), None) - .with_event_field( - &owned_value_path!("status"), - Kind::bytes(), - Some("severity") - ) - .with_event_field( - &owned_value_path!("timestamp"), - Kind::timestamp(), - Some("timestamp") + Some( + Definition::new_with_default_metadata( + Kind::object(Collection::empty()), + [LogNamespace::Legacy] + ) + .with_event_field( + &owned_value_path!("ddsource"), + Kind::bytes(), + Some("source") + ) + .with_event_field(&owned_value_path!("ddtags"), Kind::bytes(), Some("tags")) + .with_event_field(&owned_value_path!("hostname"), Kind::bytes(), Some("host")) + .with_event_field( + &owned_value_path!("message"), + Kind::bytes(), + Some("message") + ) + .with_event_field( + &owned_value_path!("service"), + Kind::bytes(), + Some("service") + ) + .with_event_field(&owned_value_path!("source_type"), Kind::bytes(), None) + .with_event_field( + &owned_value_path!("status"), + Kind::bytes(), + Some("severity") + ) + .with_event_field( + &owned_value_path!("timestamp"), + Kind::timestamp(), + Some("timestamp") + ) ) ) } diff --git a/src/sources/demo_logs.rs b/src/sources/demo_logs.rs index c6aa841e6f751..9f0b6a0a098f7 100644 --- a/src/sources/demo_logs.rs +++ b/src/sources/demo_logs.rs @@ -19,7 +19,7 @@ use vector_core::{config::LogNamespace, EstimatedJsonEncodedSizeOf}; use crate::{ codecs::{Decoder, DecodingConfig}, - config::{Output, SourceConfig, SourceContext}, + config::{SourceConfig, SourceContext, SourceOutput}, internal_events::{DemoLogsEventProcessed, EventsReceived, StreamClosedError}, serde::{default_decoding, default_framing_message_based}, shutdown::ShutdownSignal, @@ -292,7 +292,7 @@ impl SourceConfig for DemoLogsConfig { ))) } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { // There is a global and per-source `log_namespace` config. The source config overrides the global setting, // and is merged here. let log_namespace = global_log_namespace.merge(self.log_namespace); @@ -302,7 +302,10 @@ impl SourceConfig for DemoLogsConfig { .schema_definition(log_namespace) .with_standard_vector_source_metadata(); - vec![Output::default(self.decoding.output_type()).with_schema_definition(schema_definition)] + vec![SourceOutput::new_logs( + self.decoding.output_type(), + schema_definition, + )] } fn can_acknowledge(&self) -> bool { diff --git a/src/sources/dnstap/mod.rs b/src/sources/dnstap/mod.rs index 3c105f9473fe7..cb61a2876d76b 100644 --- a/src/sources/dnstap/mod.rs +++ b/src/sources/dnstap/mod.rs @@ -11,7 +11,7 @@ use vector_config::configurable_component; use super::util::framestream::{build_framestream_unix_source, FrameHandler}; use crate::{ - config::{log_schema, DataType, Output, SourceConfig, SourceContext}, + config::{log_schema, DataType, SourceConfig, SourceContext, SourceOutput}, event::{Event, LogEvent}, internal_events::DnstapParseError, Result, @@ -182,12 +182,12 @@ impl SourceConfig for DnstapConfig { build_framestream_unix_source(frame_handler, cx.shutdown, cx.out) } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let log_namespace = global_log_namespace.merge(self.log_namespace); let schema_definition = self .schema_definition(log_namespace) .with_standard_vector_source_metadata(); - vec![Output::default(DataType::Log).with_schema_definition(schema_definition)] + vec![SourceOutput::new_logs(DataType::Log, schema_definition)] } fn can_acknowledge(&self) -> bool { diff --git a/src/sources/docker_logs/mod.rs b/src/sources/docker_logs/mod.rs index 6544589845738..2f7bca9ec7ed7 100644 --- a/src/sources/docker_logs/mod.rs +++ b/src/sources/docker_logs/mod.rs @@ -30,7 +30,7 @@ use vector_core::config::{LegacyKey, LogNamespace}; use super::util::MultilineConfig; use crate::{ - config::{log_schema, DataType, Output, SourceConfig, SourceContext}, + config::{log_schema, DataType, SourceConfig, SourceContext, SourceOutput}, docker::{docker, DockerTlsConfig}, event::{self, merge_state::LogEventMergeState, EstimatedJsonEncodedSizeOf, LogEvent, Value}, internal_events::{ @@ -272,7 +272,7 @@ impl SourceConfig for DockerLogsConfig { })) } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let host_key = self.host_key.clone().path.map(LegacyKey::Overwrite); let schema_definition = BytesDeserializerConfig @@ -351,7 +351,7 @@ impl SourceConfig for DockerLogsConfig { None, ); - vec![Output::default(DataType::Log).with_schema_definition(schema_definition)] + vec![SourceOutput::new_logs(DataType::Log, schema_definition)] } fn can_acknowledge(&self) -> bool { diff --git a/src/sources/docker_logs/tests.rs b/src/sources/docker_logs/tests.rs index 0f8cd0981c069..04c9a05c37bb0 100644 --- a/src/sources/docker_logs/tests.rs +++ b/src/sources/docker_logs/tests.rs @@ -293,13 +293,12 @@ mod integration_tests { #[tokio::test] async fn container_with_tty_vector_namespace() { trace_init(); - let schema_definition = DockerLogsConfig::default() + let schema_definitions = DockerLogsConfig::default() .outputs(LogNamespace::Vector) .first() .unwrap() - .log_schema_definition - .clone() - .unwrap(); + .schema_definition + .clone(); assert_source_compliance(&SOURCE_TAGS, async { let message = "log container_with_tty"; @@ -313,7 +312,9 @@ mod integration_tests { let events = collect_n(out, 1).await; container_remove(&id, &docker).await; - schema_definition.assert_valid_for_event(&events[0]); + schema_definitions + .unwrap() + .assert_valid_for_event(&events[0]); assert_eq!(events[0].as_log().get(".").unwrap(), &vrl::value!(message)); }) .await; @@ -322,13 +323,12 @@ mod integration_tests { #[tokio::test] async fn container_with_tty_legacy_namespace() { trace_init(); - let schema_definition = DockerLogsConfig::default() + let schema_definitions = DockerLogsConfig::default() .outputs(LogNamespace::Legacy) .first() .unwrap() - .log_schema_definition - .clone() - .unwrap(); + .schema_definition + .clone(); assert_source_compliance(&SOURCE_TAGS, async { let message = "log container_with_tty"; @@ -342,7 +342,9 @@ mod integration_tests { let events = collect_n(out, 1).await; container_remove(&id, &docker).await; - schema_definition.assert_valid_for_event(&events[0]); + schema_definitions + .unwrap() + .assert_valid_for_event(&events[0]); assert_eq!( events[0].as_log()[log_schema().message_key()], message.into() @@ -354,13 +356,12 @@ mod integration_tests { #[tokio::test] async fn newly_started_vector_namespace() { trace_init(); - let schema_definition = DockerLogsConfig::default() + let schema_definitions = DockerLogsConfig::default() .outputs(LogNamespace::Vector) .first() .unwrap() - .log_schema_definition - .clone() - .unwrap(); + .schema_definition + .clone(); assert_source_compliance(&SOURCE_TAGS, async { let message = "9"; @@ -375,7 +376,9 @@ mod integration_tests { let events = collect_n(out, 1).await; container_remove(&id, &docker).await; - schema_definition.assert_valid_for_event(&events[0]); + schema_definitions + .unwrap() + .assert_valid_for_event(&events[0]); let log = events[0].as_log(); let meta = log.metadata().value(); @@ -413,13 +416,12 @@ mod integration_tests { #[tokio::test] async fn newly_started_legacy_namespace() { trace_init(); - let schema_definition = DockerLogsConfig::default() + let schema_definitions = DockerLogsConfig::default() .outputs(LogNamespace::Legacy) .first() .unwrap() - .log_schema_definition - .clone() - .unwrap(); + .schema_definition + .clone(); assert_source_compliance(&SOURCE_TAGS, async { let message = "9"; @@ -434,7 +436,9 @@ mod integration_tests { let events = collect_n(out, 1).await; container_remove(&id, &docker).await; - schema_definition.assert_valid_for_event(&events[0]); + schema_definitions + .unwrap() + .assert_valid_for_event(&events[0]); let log = events[0].as_log(); assert_eq!(log[log_schema().message_key()], message.into()); assert_eq!(log[CONTAINER], id.into()); @@ -453,13 +457,12 @@ mod integration_tests { #[tokio::test] async fn restart_legacy_namespace() { trace_init(); - let schema_definition = DockerLogsConfig::default() + let schema_definitions = DockerLogsConfig::default() .outputs(LogNamespace::Legacy) .first() .unwrap() - .log_schema_definition - .clone() - .unwrap(); + .schema_definition + .clone(); assert_source_compliance(&SOURCE_TAGS, async { let message = "10"; @@ -473,12 +476,14 @@ mod integration_tests { let events = collect_n(out, 2).await; container_remove(&id, &docker).await; - schema_definition.assert_valid_for_event(&events[0]); + let definition = schema_definitions.unwrap(); + + definition.assert_valid_for_event(&events[0]); assert_eq!( events[0].as_log()[log_schema().message_key()], message.into() ); - schema_definition.assert_valid_for_event(&events[1]); + definition.assert_valid_for_event(&events[1]); assert_eq!( events[1].as_log()[log_schema().message_key()], message.into() @@ -490,13 +495,12 @@ mod integration_tests { #[tokio::test] async fn include_containers_legacy_namespace() { trace_init(); - let schema_definition = DockerLogsConfig::default() + let schema_definitions = DockerLogsConfig::default() .outputs(LogNamespace::Legacy) .first() .unwrap() - .log_schema_definition - .clone() - .unwrap(); + .schema_definition + .clone(); assert_source_compliance(&SOURCE_TAGS, async { let message = "11"; @@ -513,7 +517,9 @@ mod integration_tests { container_remove(&id0, &docker).await; container_remove(&id1, &docker).await; - schema_definition.assert_valid_for_event(&events[0]); + schema_definitions + .unwrap() + .assert_valid_for_event(&events[0]); assert_eq!( events[0].as_log()[log_schema().message_key()], message.into() @@ -525,13 +531,12 @@ mod integration_tests { #[tokio::test] async fn exclude_containers_legacy_namespace() { trace_init(); - let schema_definition = DockerLogsConfig::default() + let schema_definitions = DockerLogsConfig::default() .outputs(LogNamespace::Legacy) .first() .unwrap() - .log_schema_definition - .clone() - .unwrap(); + .schema_definition + .clone(); assert_source_compliance(&SOURCE_TAGS, async { let will_be_read = "12"; @@ -561,13 +566,15 @@ mod integration_tests { assert_eq!(events.len(), 2); - schema_definition.assert_valid_for_event(&events[0]); + let definition = schema_definitions.unwrap(); + + definition.assert_valid_for_event(&events[0]); assert_eq!( events[0].as_log()[log_schema().message_key()], will_be_read.into() ); - schema_definition.assert_valid_for_event(&events[1]); + definition.assert_valid_for_event(&events[1]); assert_eq!( events[1].as_log()[log_schema().message_key()], will_be_read.into() @@ -579,13 +586,12 @@ mod integration_tests { #[tokio::test] async fn include_labels_legacy_namespace() { trace_init(); - let schema_definition = DockerLogsConfig::default() + let schema_definitions = DockerLogsConfig::default() .outputs(LogNamespace::Legacy) .first() .unwrap() - .log_schema_definition - .clone() - .unwrap(); + .schema_definition + .clone(); assert_source_compliance(&SOURCE_TAGS, async { let message = "13"; @@ -603,7 +609,9 @@ mod integration_tests { container_remove(&id0, &docker).await; container_remove(&id1, &docker).await; - schema_definition.assert_valid_for_event(&events[0]); + schema_definitions + .unwrap() + .assert_valid_for_event(&events[0]); assert_eq!( events[0].as_log()[log_schema().message_key()], message.into() @@ -615,13 +623,12 @@ mod integration_tests { #[tokio::test] async fn currently_running_legacy_namespace() { trace_init(); - let schema_definition = DockerLogsConfig::default() + let schema_definitions = DockerLogsConfig::default() .outputs(LogNamespace::Legacy) .first() .unwrap() - .log_schema_definition - .clone() - .unwrap(); + .schema_definition + .clone(); assert_source_compliance(&SOURCE_TAGS, async { let message = "14"; @@ -636,7 +643,9 @@ mod integration_tests { let _ = container_kill(&id, &docker).await; container_remove(&id, &docker).await; - schema_definition.assert_valid_for_event(&events[0]); + schema_definitions + .unwrap() + .assert_valid_for_event(&events[0]); let log = events[0].as_log(); assert_eq!(log[log_schema().message_key()], message.into()); assert_eq!(log[CONTAINER], id.into()); @@ -655,13 +664,12 @@ mod integration_tests { #[tokio::test] async fn include_image_legacy_namespace() { trace_init(); - let schema_definition = DockerLogsConfig::default() + let schema_definitions = DockerLogsConfig::default() .outputs(LogNamespace::Legacy) .first() .unwrap() - .log_schema_definition - .clone() - .unwrap(); + .schema_definition + .clone(); assert_source_compliance(&SOURCE_TAGS, async { let message = "15"; @@ -680,7 +688,9 @@ mod integration_tests { let events = collect_n(out, 1).await; container_remove(&id, &docker).await; - schema_definition.assert_valid_for_event(&events[0]); + schema_definitions + .unwrap() + .assert_valid_for_event(&events[0]); assert_eq!( events[0].as_log()[log_schema().message_key()], message.into() @@ -748,13 +758,12 @@ mod integration_tests { #[tokio::test] async fn flat_labels_legacy_namespace() { trace_init(); - let schema_definition = DockerLogsConfig::default() + let schema_definitions = DockerLogsConfig::default() .outputs(LogNamespace::Legacy) .first() .unwrap() - .log_schema_definition - .clone() - .unwrap(); + .schema_definition + .clone(); assert_source_compliance(&SOURCE_TAGS, async { let message = "18"; @@ -769,7 +778,9 @@ mod integration_tests { let _ = container_kill(&id, &docker).await; container_remove(&id, &docker).await; - schema_definition.assert_valid_for_event(&events[0]); + schema_definitions + .unwrap() + .assert_valid_for_event(&events[0]); let log = events[0].as_log(); assert_eq!(log[log_schema().message_key()], message.into()); assert_eq!(log[CONTAINER], id.into()); @@ -794,13 +805,12 @@ mod integration_tests { #[tokio::test] async fn log_longer_than_16kb_legacy_namespace() { trace_init(); - let schema_definition = DockerLogsConfig::default() + let schema_definitions = DockerLogsConfig::default() .outputs(LogNamespace::Legacy) .first() .unwrap() - .log_schema_definition - .clone() - .unwrap(); + .schema_definition + .clone(); assert_source_compliance(&SOURCE_TAGS, async { let mut message = String::with_capacity(20 * 1024); @@ -817,7 +827,9 @@ mod integration_tests { let events = collect_n(out, 1).await; container_remove(&id, &docker).await; - schema_definition.assert_valid_for_event(&events[0]); + schema_definitions + .unwrap() + .assert_valid_for_event(&events[0]); let log = events[0].as_log(); assert_eq!(log[log_schema().message_key()], message.into()); }) @@ -828,11 +840,11 @@ mod integration_tests { async fn merge_multiline_vector_namespace() { assert_source_compliance(&SOURCE_TAGS, async { trace_init(); - let schema_definition = DockerLogsConfig::default() + let schema_definitions = DockerLogsConfig::default() .outputs(LogNamespace::Vector) .first() .unwrap() - .log_schema_definition + .schema_definition .clone() .unwrap(); @@ -881,7 +893,7 @@ mod integration_tests { let actual_messages = events .into_iter() .map(|event| { - schema_definition.assert_valid_for_event(&event); + schema_definitions.assert_valid_for_event(&event); event .into_log() @@ -900,11 +912,11 @@ mod integration_tests { async fn merge_multiline_legacy_namespace() { assert_source_compliance(&SOURCE_TAGS, async { trace_init(); - let schema_definition = DockerLogsConfig::default() + let schema_definitions = DockerLogsConfig::default() .outputs(LogNamespace::Legacy) .first() .unwrap() - .log_schema_definition + .schema_definition .clone() .unwrap(); @@ -952,7 +964,7 @@ mod integration_tests { let actual_messages = events .into_iter() .map(|event| { - schema_definition.assert_valid_for_event(&event); + schema_definitions.assert_valid_for_event(&event); event .into_log() diff --git a/src/sources/eventstoredb_metrics/mod.rs b/src/sources/eventstoredb_metrics/mod.rs index c46cb9cecb048..57017950f56be 100644 --- a/src/sources/eventstoredb_metrics/mod.rs +++ b/src/sources/eventstoredb_metrics/mod.rs @@ -14,7 +14,7 @@ use vector_core::EstimatedJsonEncodedSizeOf; use self::types::Stats; use crate::{ - config::{self, Output, SourceConfig, SourceContext}, + config::{SourceConfig, SourceContext, SourceOutput}, http::HttpClient, internal_events::{ EventStoreDbMetricsHttpError, EventStoreDbStatsParsingError, EventsReceived, @@ -72,8 +72,8 @@ impl SourceConfig for EventStoreDbConfig { ) } - fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { - vec![Output::default(config::DataType::Metric)] + fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { + vec![SourceOutput::new_metrics()] } fn can_acknowledge(&self) -> bool { diff --git a/src/sources/exec/mod.rs b/src/sources/exec/mod.rs index 3facbb16a7dad..0bb58985c73f7 100644 --- a/src/sources/exec/mod.rs +++ b/src/sources/exec/mod.rs @@ -27,7 +27,7 @@ use vector_core::{config::LegacyKey, EstimatedJsonEncodedSizeOf}; use crate::{ codecs::{Decoder, DecodingConfig}, - config::{Output, SourceConfig, SourceContext}, + config::{SourceConfig, SourceContext, SourceOutput}, event::Event, internal_events::{ ExecChannelClosedError, ExecCommandExecuted, ExecEventsReceived, ExecFailedError, @@ -266,7 +266,7 @@ impl SourceConfig for ExecConfig { } } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let log_namespace = global_log_namespace.merge(Some(self.log_namespace.unwrap_or(false))); let schema_definition = self @@ -304,7 +304,10 @@ impl SourceConfig for ExecConfig { None, ); - vec![Output::default(self.decoding.output_type()).with_schema_definition(schema_definition)] + vec![SourceOutput::new_logs( + self.decoding.output_type(), + schema_definition, + )] } fn can_acknowledge(&self) -> bool { diff --git a/src/sources/file.rs b/src/sources/file.rs index a70bb8a52f8cc..b3644a6aec5d1 100644 --- a/src/sources/file.rs +++ b/src/sources/file.rs @@ -24,7 +24,8 @@ use vector_core::config::{LegacyKey, LogNamespace}; use super::util::{EncodingConfig, MultilineConfig}; use crate::{ config::{ - log_schema, DataType, Output, SourceAcknowledgementsConfig, SourceConfig, SourceContext, + log_schema, DataType, SourceAcknowledgementsConfig, SourceConfig, SourceContext, + SourceOutput, }, encoding_transcode::{Decoder, Encoder}, event::{BatchNotifier, BatchStatus, LogEvent}, @@ -431,7 +432,7 @@ impl SourceConfig for FileConfig { )) } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let file_key = self.file_key.clone().path.map(LegacyKey::Overwrite); let host_key = self.host_key.clone().path.map(LegacyKey::Overwrite); @@ -466,7 +467,7 @@ impl SourceConfig for FileConfig { None, ); - vec![Output::default(DataType::Log).with_schema_definition(schema_definition)] + vec![SourceOutput::new_logs(DataType::Log, schema_definition)] } fn can_acknowledge(&self) -> bool { @@ -943,62 +944,70 @@ mod tests { #[test] fn output_schema_definition_vector_namespace() { - let definition = FileConfig::default().outputs(LogNamespace::Vector)[0] - .clone() - .log_schema_definition - .unwrap(); + let definitions = FileConfig::default() + .outputs(LogNamespace::Vector) + .remove(0) + .schema_definition(true); assert_eq!( - definition, - Definition::new_with_default_metadata(Kind::bytes(), [LogNamespace::Vector]) - .with_meaning(OwnedTargetPath::event_root(), "message") - .with_metadata_field( - &owned_value_path!("vector", "source_type"), - Kind::bytes(), - None - ) - .with_metadata_field( - &owned_value_path!("vector", "ingest_timestamp"), - Kind::timestamp(), - None - ) - .with_metadata_field( - &owned_value_path!("file", "host"), - Kind::bytes().or_undefined(), - Some("host") - ) - .with_metadata_field(&owned_value_path!("file", "offset"), Kind::integer(), None) - .with_metadata_field(&owned_value_path!("file", "path"), Kind::bytes(), None) + definitions, + Some( + Definition::new_with_default_metadata(Kind::bytes(), [LogNamespace::Vector]) + .with_meaning(OwnedTargetPath::event_root(), "message") + .with_metadata_field( + &owned_value_path!("vector", "source_type"), + Kind::bytes(), + None + ) + .with_metadata_field( + &owned_value_path!("vector", "ingest_timestamp"), + Kind::timestamp(), + None + ) + .with_metadata_field( + &owned_value_path!("file", "host"), + Kind::bytes().or_undefined(), + Some("host") + ) + .with_metadata_field( + &owned_value_path!("file", "offset"), + Kind::integer(), + None + ) + .with_metadata_field(&owned_value_path!("file", "path"), Kind::bytes(), None) + ) ) } #[test] fn output_schema_definition_legacy_namespace() { - let definition = FileConfig::default().outputs(LogNamespace::Legacy)[0] - .clone() - .log_schema_definition - .unwrap(); + let definitions = FileConfig::default() + .outputs(LogNamespace::Legacy) + .remove(0) + .schema_definition(true); assert_eq!( - definition, - Definition::new_with_default_metadata( - Kind::object(Collection::empty()), - [LogNamespace::Legacy] - ) - .with_event_field( - &owned_value_path!("message"), - Kind::bytes(), - Some("message") - ) - .with_event_field(&owned_value_path!("source_type"), Kind::bytes(), None) - .with_event_field(&owned_value_path!("timestamp"), Kind::timestamp(), None) - .with_event_field( - &owned_value_path!("host"), - Kind::bytes().or_undefined(), - Some("host") + definitions, + Some( + Definition::new_with_default_metadata( + Kind::object(Collection::empty()), + [LogNamespace::Legacy] + ) + .with_event_field( + &owned_value_path!("message"), + Kind::bytes(), + Some("message") + ) + .with_event_field(&owned_value_path!("source_type"), Kind::bytes(), None) + .with_event_field(&owned_value_path!("timestamp"), Kind::timestamp(), None) + .with_event_field( + &owned_value_path!("host"), + Kind::bytes().or_undefined(), + Some("host") + ) + .with_event_field(&owned_value_path!("offset"), Kind::undefined(), None) + .with_event_field(&owned_value_path!("file"), Kind::bytes(), None) ) - .with_event_field(&owned_value_path!("offset"), Kind::undefined(), None) - .with_event_field(&owned_value_path!("file"), Kind::bytes(), None) ) } diff --git a/src/sources/file_descriptors/file_descriptor.rs b/src/sources/file_descriptors/file_descriptor.rs index 95b8d2be97a68..008ac2b43773a 100644 --- a/src/sources/file_descriptors/file_descriptor.rs +++ b/src/sources/file_descriptors/file_descriptor.rs @@ -8,7 +8,7 @@ use vector_config::configurable_component; use vector_core::config::LogNamespace; use crate::{ - config::{GenerateConfig, Output, Resource, SourceConfig, SourceContext}, + config::{GenerateConfig, Resource, SourceConfig, SourceContext, SourceOutput}, serde::default_decoding, }; /// Configuration for the `file_descriptor` source. @@ -83,7 +83,7 @@ impl SourceConfig for FileDescriptorSourceConfig { self.source(pipe, cx.shutdown, cx.out, log_namespace) } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let log_namespace = global_log_namespace.merge(self.log_namespace); outputs(log_namespace, &self.host_key, &self.decoding, Self::NAME) diff --git a/src/sources/file_descriptors/mod.rs b/src/sources/file_descriptors/mod.rs index 0eb514d756c9e..68c8df41f60db 100644 --- a/src/sources/file_descriptors/mod.rs +++ b/src/sources/file_descriptors/mod.rs @@ -19,14 +19,14 @@ use vector_common::internal_event::{ }; use vector_config::NamedComponent; use vector_core::{ - config::{LegacyKey, LogNamespace, Output}, + config::{LegacyKey, LogNamespace}, event::Event, EstimatedJsonEncodedSizeOf, }; use crate::{ codecs::{Decoder, DecodingConfig}, - config::log_schema, + config::{log_schema, SourceOutput}, internal_events::{EventsReceived, FileDescriptorReadError, StreamClosedError}, shutdown::ShutdownSignal, SourceSender, @@ -210,7 +210,7 @@ fn outputs( host_key: &Option, decoding: &DeserializerConfig, source_name: &'static str, -) -> Vec { +) -> Vec { let legacy_host_key = Some(LegacyKey::InsertIfEmpty( host_key.clone().and_then(|k| k.path).unwrap_or_else(|| { parse_value_path(log_schema().host_key()).expect("log_schema.host_key to be valid path") @@ -228,5 +228,8 @@ fn outputs( ) .with_standard_vector_source_metadata(); - vec![Output::default(decoding.output_type()).with_schema_definition(schema_definition)] + vec![SourceOutput::new_logs( + decoding.output_type(), + schema_definition, + )] } diff --git a/src/sources/file_descriptors/stdin.rs b/src/sources/file_descriptors/stdin.rs index f63cc4a6ac857..c3ed61d901e63 100644 --- a/src/sources/file_descriptors/stdin.rs +++ b/src/sources/file_descriptors/stdin.rs @@ -6,7 +6,7 @@ use vector_config::configurable_component; use vector_core::config::LogNamespace; use crate::{ - config::{Output, Resource, SourceConfig, SourceContext}, + config::{Resource, SourceConfig, SourceContext, SourceOutput}, serde::default_decoding, }; @@ -90,7 +90,7 @@ impl SourceConfig for StdinConfig { ) } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let log_namespace = global_log_namespace.merge(self.log_namespace); outputs(log_namespace, &self.host_key, &self.decoding, Self::NAME) diff --git a/src/sources/fluent/mod.rs b/src/sources/fluent/mod.rs index f7662f076070c..eadf8abd556e3 100644 --- a/src/sources/fluent/mod.rs +++ b/src/sources/fluent/mod.rs @@ -22,8 +22,8 @@ use vector_core::schema::Definition; use super::util::net::{SocketListenAddr, TcpSource, TcpSourceAck, TcpSourceAcker}; use crate::{ config::{ - log_schema, DataType, GenerateConfig, Output, Resource, SourceAcknowledgementsConfig, - SourceConfig, SourceContext, + log_schema, DataType, GenerateConfig, Resource, SourceAcknowledgementsConfig, SourceConfig, + SourceContext, SourceOutput, }, event::{Event, LogEvent}, internal_events::{FluentMessageDecodeError, FluentMessageReceived}, @@ -114,11 +114,11 @@ impl SourceConfig for FluentConfig { ) } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let log_namespace = global_log_namespace.merge(self.log_namespace); let schema_definition = self.schema_definition(log_namespace); - vec![Output::default(DataType::Log).with_schema_definition(schema_definition)] + vec![SourceOutput::new_logs(DataType::Log, schema_definition)] } fn resources(&self) -> Vec { @@ -960,10 +960,10 @@ mod tests { log_namespace: Some(true), }; - let definition = config.outputs(LogNamespace::Vector)[0] - .clone() - .log_schema_definition - .unwrap(); + let definitions = config + .outputs(LogNamespace::Vector) + .remove(0) + .schema_definition(true); let expected_definition = Definition::new_with_default_metadata(Kind::bytes(), [LogNamespace::Vector]) @@ -1000,7 +1000,7 @@ mod tests { None, ); - assert_eq!(definition, expected_definition) + assert_eq!(definitions, Some(expected_definition)) } #[test] @@ -1015,10 +1015,10 @@ mod tests { log_namespace: None, }; - let definition = config.outputs(LogNamespace::Legacy)[0] - .clone() - .log_schema_definition - .unwrap(); + let definitions = config + .outputs(LogNamespace::Legacy) + .remove(0) + .schema_definition(true); let expected_definition = Definition::new_with_default_metadata( Kind::object(Collection::empty()), @@ -1035,7 +1035,7 @@ mod tests { .with_event_field(&owned_value_path!("host"), Kind::bytes(), Some("host")) .unknown_fields(Kind::bytes()); - assert_eq!(definition, expected_definition) + assert_eq!(definitions, Some(expected_definition)) } } diff --git a/src/sources/gcp_pubsub.rs b/src/sources/gcp_pubsub.rs index 423dfbfe0a972..3cd5836089fbe 100644 --- a/src/sources/gcp_pubsub.rs +++ b/src/sources/gcp_pubsub.rs @@ -30,7 +30,7 @@ use vector_core::config::{LegacyKey, LogNamespace}; use crate::{ codecs::{Decoder, DecodingConfig}, - config::{DataType, Output, SourceAcknowledgementsConfig, SourceConfig, SourceContext}, + config::{DataType, SourceAcknowledgementsConfig, SourceConfig, SourceContext, SourceOutput}, event::{BatchNotifier, BatchStatus, Event, MaybeAsLogMut, Value}, gcp::{GcpAuthConfig, GcpAuthenticator, Scope, PUBSUB_URL}, internal_events::{ @@ -326,7 +326,7 @@ impl SourceConfig for PubsubConfig { Ok(Box::pin(source)) } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let log_namespace = global_log_namespace.merge(self.log_namespace); let schema_definition = self .decoding @@ -354,7 +354,7 @@ impl SourceConfig for PubsubConfig { None, ); - vec![Output::default(DataType::Log).with_schema_definition(schema_definition)] + vec![SourceOutput::new_logs(DataType::Log, schema_definition)] } fn can_acknowledge(&self) -> bool { @@ -756,10 +756,10 @@ mod tests { ..Default::default() }; - let definition = config.outputs(LogNamespace::Vector)[0] - .clone() - .log_schema_definition - .unwrap(); + let definitions = config + .outputs(LogNamespace::Vector) + .remove(0) + .schema_definition(true); let expected_definition = Definition::new_with_default_metadata(Kind::bytes(), [LogNamespace::Vector]) @@ -790,17 +790,17 @@ mod tests { None, ); - assert_eq!(definition, expected_definition); + assert_eq!(definitions, Some(expected_definition)); } #[test] fn output_schema_definition_legacy_namespace() { let config = PubsubConfig::default(); - let definition = config.outputs(LogNamespace::Legacy)[0] - .clone() - .log_schema_definition - .unwrap(); + let definitions = config + .outputs(LogNamespace::Legacy) + .remove(0) + .schema_definition(true); let expected_definition = Definition::new_with_default_metadata( Kind::object(Collection::empty()), @@ -824,7 +824,7 @@ mod tests { ) .with_event_field(&owned_value_path!("message_id"), Kind::bytes(), None); - assert_eq!(definition, expected_definition); + assert_eq!(definitions, Some(expected_definition)); } } diff --git a/src/sources/heroku_logs.rs b/src/sources/heroku_logs.rs index be9671b803026..787b3db9db9c0 100644 --- a/src/sources/heroku_logs.rs +++ b/src/sources/heroku_logs.rs @@ -26,8 +26,8 @@ use vector_core::{ use crate::{ codecs::{Decoder, DecodingConfig}, config::{ - log_schema, GenerateConfig, Output, Resource, SourceAcknowledgementsConfig, SourceConfig, - SourceContext, + log_schema, GenerateConfig, Resource, SourceAcknowledgementsConfig, SourceConfig, + SourceContext, SourceOutput, }, event::{Event, LogEvent}, internal_events::{HerokuLogplexRequestReadError, HerokuLogplexRequestReceived}, @@ -182,11 +182,14 @@ impl SourceConfig for LogplexConfig { ) } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { // There is a global and per-source `log_namespace` config. // The source config overrides the global setting and is merged here. let schema_def = self.schema_definition(global_log_namespace.merge(self.log_namespace)); - vec![Output::default(self.decoding.output_type()).with_schema_definition(schema_def)] + vec![SourceOutput::new_logs( + self.decoding.output_type(), + schema_def, + )] } fn resources(&self) -> Vec { @@ -660,10 +663,10 @@ mod tests { ..Default::default() }; - let definition = config.outputs(LogNamespace::Vector)[0] - .clone() - .log_schema_definition - .unwrap(); + let definitions = config + .outputs(LogNamespace::Vector) + .remove(0) + .schema_definition(true); let expected_definition = Definition::new_with_default_metadata(Kind::bytes(), [LogNamespace::Vector]) @@ -704,17 +707,17 @@ mod tests { None, ); - assert_eq!(definition, expected_definition) + assert_eq!(definitions, Some(expected_definition)) } #[test] fn output_schema_definition_legacy_namespace() { let config = LogplexConfig::default(); - let definition = config.outputs(LogNamespace::Legacy)[0] - .clone() - .log_schema_definition - .unwrap(); + let definitions = config + .outputs(LogNamespace::Legacy) + .remove(0) + .schema_definition(true); let expected_definition = Definition::new_with_default_metadata( Kind::object(Collection::empty()), @@ -732,6 +735,6 @@ mod tests { .with_event_field(&owned_value_path!("proc_id"), Kind::bytes(), None) .unknown_fields(Kind::bytes()); - assert_eq!(definition, expected_definition) + assert_eq!(definitions, Some(expected_definition)) } } diff --git a/src/sources/host_metrics/mod.rs b/src/sources/host_metrics/mod.rs index 332a5b5a80e2b..d75ab4d0420f9 100644 --- a/src/sources/host_metrics/mod.rs +++ b/src/sources/host_metrics/mod.rs @@ -20,7 +20,7 @@ use vector_core::config::LogNamespace; use vector_core::EstimatedJsonEncodedSizeOf; use crate::{ - config::{DataType, Output, SourceConfig, SourceContext}, + config::{SourceConfig, SourceContext, SourceOutput}, event::metric::{Metric, MetricKind, MetricTags, MetricValue}, internal_events::{EventsReceived, HostMetricsScrapeDetailError, StreamClosedError}, shutdown::ShutdownSignal, @@ -267,8 +267,8 @@ impl SourceConfig for HostMetricsConfig { Ok(Box::pin(config.run(cx.out, cx.shutdown))) } - fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { - vec![Output::default(DataType::Metric)] + fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { + vec![SourceOutput::new_metrics()] } fn can_acknowledge(&self) -> bool { diff --git a/src/sources/http_client/client.rs b/src/sources/http_client/client.rs index 2eea222563a68..61c7a6c572687 100644 --- a/src/sources/http_client/client.rs +++ b/src/sources/http_client/client.rs @@ -33,7 +33,7 @@ use codecs::{ }; use vector_config::configurable_component; use vector_core::{ - config::{log_schema, LogNamespace, Output}, + config::{log_schema, LogNamespace, SourceOutput}, event::Event, }; @@ -206,7 +206,7 @@ impl SourceConfig for HttpClientConfig { Ok(call(inputs, context, cx.out, self.method).boxed()) } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { // There is a global and per-source `log_namespace` config. The source config overrides the global setting, // and is merged here. let log_namespace = global_log_namespace.merge(self.log_namespace); @@ -216,7 +216,10 @@ impl SourceConfig for HttpClientConfig { .schema_definition(log_namespace) .with_standard_vector_source_metadata(); - vec![Output::default(self.decoding.output_type()).with_schema_definition(schema_definition)] + vec![SourceOutput::new_logs( + self.decoding.output_type(), + schema_definition, + )] } fn can_acknowledge(&self) -> bool { diff --git a/src/sources/http_server.rs b/src/sources/http_server.rs index 0d416392e9876..eb0b7a33a41c6 100644 --- a/src/sources/http_server.rs +++ b/src/sources/http_server.rs @@ -23,7 +23,8 @@ use crate::{ codecs::{Decoder, DecodingConfig}, components::validation::*, config::{ - GenerateConfig, Output, Resource, SourceAcknowledgementsConfig, SourceConfig, SourceContext, + GenerateConfig, Resource, SourceAcknowledgementsConfig, SourceConfig, SourceContext, + SourceOutput, }, event::{Event, Value}, register_validatable_component, @@ -54,7 +55,7 @@ impl SourceConfig for HttpConfig { self.0.build(cx).await } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { self.0.outputs(global_log_namespace) } @@ -335,20 +336,20 @@ impl SourceConfig for SimpleHttpConfig { ) } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { // There is a global and per-source `log_namespace` config. // The source config overrides the global setting and is merged here. let log_namespace = global_log_namespace.merge(self.log_namespace); let schema_definition = self.schema_definition(log_namespace); - vec![Output::default( + vec![SourceOutput::new_logs( self.decoding .as_ref() .map(|d| d.output_type()) .unwrap_or(DataType::Log), - ) - .with_schema_definition(schema_definition)] + schema_definition, + )] } fn resources(&self) -> Vec { @@ -1315,10 +1316,10 @@ mod tests { ..Default::default() }; - let definition = config.outputs(LogNamespace::Vector)[0] - .clone() - .log_schema_definition - .unwrap(); + let definitions = config + .outputs(LogNamespace::Vector) + .remove(0) + .schema_definition(true); let expected_definition = Definition::new_with_default_metadata(Kind::bytes(), [LogNamespace::Vector]) @@ -1349,17 +1350,17 @@ mod tests { None, ); - assert_eq!(definition, expected_definition) + assert_eq!(definitions, Some(expected_definition)) } #[test] fn output_schema_definition_legacy_namespace() { let config = SimpleHttpConfig::default(); - let definition = config.outputs(LogNamespace::Legacy)[0] - .clone() - .log_schema_definition - .unwrap(); + let definitions = config + .outputs(LogNamespace::Legacy) + .remove(0) + .schema_definition(true); let expected_definition = Definition::new_with_default_metadata( Kind::object(Collection::empty()), @@ -1375,7 +1376,7 @@ mod tests { .with_event_field(&owned_value_path!("path"), Kind::bytes(), None) .unknown_fields(Kind::bytes()); - assert_eq!(definition, expected_definition) + assert_eq!(definitions, Some(expected_definition)) } #[test] diff --git a/src/sources/internal_logs.rs b/src/sources/internal_logs.rs index 524b16237d683..bc69a34079328 100644 --- a/src/sources/internal_logs.rs +++ b/src/sources/internal_logs.rs @@ -12,7 +12,7 @@ use vector_core::{ }; use crate::{ - config::{DataType, Output, SourceConfig, SourceContext}, + config::{DataType, SourceConfig, SourceContext, SourceOutput}, event::{EstimatedJsonEncodedSizeOf, Event}, internal_events::{InternalLogsBytesReceived, InternalLogsEventsReceived, StreamClosedError}, shutdown::ShutdownSignal, @@ -121,11 +121,11 @@ impl SourceConfig for InternalLogsConfig { ))) } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let schema_definition = self.schema_definition(global_log_namespace.merge(self.log_namespace)); - vec![Output::default(DataType::Log).with_schema_definition(schema_definition)] + vec![SourceOutput::new_logs(DataType::Log, schema_definition)] } fn can_acknowledge(&self) -> bool { @@ -339,10 +339,10 @@ mod tests { fn output_schema_definition_vector_namespace() { let config = InternalLogsConfig::default(); - let definition = config.outputs(LogNamespace::Vector)[0] - .clone() - .log_schema_definition - .unwrap(); + let definitions = config + .outputs(LogNamespace::Vector) + .remove(0) + .schema_definition(true); let expected_definition = Definition::new_with_default_metadata(Kind::bytes(), [LogNamespace::Vector]) @@ -368,7 +368,7 @@ mod tests { Some("host"), ); - assert_eq!(definition, expected_definition) + assert_eq!(definitions, Some(expected_definition)) } #[test] @@ -379,10 +379,10 @@ mod tests { config.pid_key = OptionalValuePath::from(owned_value_path!(pid_key)); - let definition = config.outputs(LogNamespace::Legacy)[0] - .clone() - .log_schema_definition - .unwrap(); + let definitions = config + .outputs(LogNamespace::Legacy) + .remove(0) + .schema_definition(true); let expected_definition = Definition::new_with_default_metadata( Kind::object(Collection::empty()), @@ -402,6 +402,6 @@ mod tests { Some("host"), ); - assert_eq!(definition, expected_definition) + assert_eq!(definitions, Some(expected_definition)) } } diff --git a/src/sources/internal_metrics.rs b/src/sources/internal_metrics.rs index 5b4822e7cdc02..bddf04240673d 100644 --- a/src/sources/internal_metrics.rs +++ b/src/sources/internal_metrics.rs @@ -10,7 +10,7 @@ use vector_core::config::LogNamespace; use vector_core::EstimatedJsonEncodedSizeOf; use crate::{ - config::{log_schema, DataType, Output, SourceConfig, SourceContext}, + config::{log_schema, SourceConfig, SourceContext, SourceOutput}, internal_events::{EventsReceived, InternalMetricsBytesReceived, StreamClosedError}, metrics::Controller, shutdown::ShutdownSignal, @@ -136,8 +136,8 @@ impl SourceConfig for InternalMetricsConfig { )) } - fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { - vec![Output::default(DataType::Metric)] + fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { + vec![SourceOutput::new_metrics()] } fn can_acknowledge(&self) -> bool { diff --git a/src/sources/journald.rs b/src/sources/journald.rs index 61288dbf33584..558bb19a62356 100644 --- a/src/sources/journald.rs +++ b/src/sources/journald.rs @@ -44,7 +44,8 @@ use vector_core::{ use crate::{ config::{ - log_schema, DataType, Output, SourceAcknowledgementsConfig, SourceConfig, SourceContext, + log_schema, DataType, SourceAcknowledgementsConfig, SourceConfig, SourceContext, + SourceOutput, }, event::{BatchNotifier, BatchStatus, BatchStatusReceiver, LogEvent}, internal_events::{ @@ -363,11 +364,11 @@ impl SourceConfig for JournaldConfig { )) } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let schema_definition = self.schema_definition(global_log_namespace.merge(self.log_namespace)); - vec![Output::default(DataType::Log).with_schema_definition(schema_definition)] + vec![SourceOutput::new_logs(DataType::Log, schema_definition)] } fn can_acknowledge(&self) -> bool { @@ -1465,10 +1466,10 @@ mod tests { ..Default::default() }; - let definition = config.outputs(LogNamespace::Vector)[0] - .clone() - .log_schema_definition - .unwrap(); + let definitions = config + .outputs(LogNamespace::Vector) + .remove(0) + .schema_definition(true); let expected_definition = Definition::new_with_default_metadata(Kind::bytes().or_null(), [LogNamespace::Vector]) @@ -1498,17 +1499,17 @@ mod tests { Some("host"), ); - assert_eq!(definition, expected_definition) + assert_eq!(definitions, Some(expected_definition)) } #[test] fn output_schema_definition_legacy_namespace() { let config = JournaldConfig::default(); - let definition = config.outputs(LogNamespace::Legacy)[0] - .clone() - .log_schema_definition - .unwrap(); + let definitions = config + .outputs(LogNamespace::Legacy) + .remove(0) + .schema_definition(true); let expected_definition = Definition::new_with_default_metadata( Kind::object(Collection::empty()), @@ -1523,7 +1524,7 @@ mod tests { ) .unknown_fields(Kind::bytes()); - assert_eq!(definition, expected_definition) + assert_eq!(definitions, Some(expected_definition)) } fn matches_schema(config: &JournaldConfig, namespace: LogNamespace) { @@ -1558,12 +1559,9 @@ mod tests { event.as_mut_log().insert("timestamp", chrono::Utc::now()); - let definition = config.outputs(namespace)[0] - .clone() - .log_schema_definition - .unwrap(); + let definitions = config.outputs(namespace).remove(0).schema_definition(true); - definition.assert_valid_for_event(&event) + definitions.unwrap().assert_valid_for_event(&event); } #[test] diff --git a/src/sources/kafka.rs b/src/sources/kafka.rs index e8d4f1e4e56d1..fa3d551527d27 100644 --- a/src/sources/kafka.rs +++ b/src/sources/kafka.rs @@ -35,7 +35,8 @@ use vector_core::{ use crate::{ codecs::{Decoder, DecodingConfig}, config::{ - log_schema, LogSchema, Output, SourceAcknowledgementsConfig, SourceConfig, SourceContext, + log_schema, LogSchema, SourceAcknowledgementsConfig, SourceConfig, SourceContext, + SourceOutput, }, event::{BatchNotifier, BatchStatus, Event, Value}, internal_events::{ @@ -304,7 +305,7 @@ impl SourceConfig for KafkaSourceConfig { ))) } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let log_namespace = global_log_namespace.merge(self.log_namespace); let keys = self.keys(); @@ -355,7 +356,10 @@ impl SourceConfig for KafkaSourceConfig { None, ); - vec![Output::default(self.decoding.output_type()).with_schema_definition(schema_definition)] + vec![SourceOutput::new_logs( + self.decoding.output_type(), + schema_definition, + )] } fn can_acknowledge(&self) -> bool { @@ -789,83 +793,85 @@ mod test { #[test] fn test_output_schema_definition_vector_namespace() { - let definition = make_config("topic", "group", LogNamespace::Vector) - .outputs(LogNamespace::Vector)[0] - .clone() - .log_schema_definition - .unwrap(); + let definitions = make_config("topic", "group", LogNamespace::Vector) + .outputs(LogNamespace::Vector) + .remove(0) + .schema_definition(true); assert_eq!( - definition, - Definition::new_with_default_metadata(Kind::bytes(), [LogNamespace::Vector]) - .with_meaning(OwnedTargetPath::event_root(), "message") - .with_metadata_field( - &owned_value_path!("kafka", "timestamp"), - Kind::timestamp(), - Some("timestamp") - ) - .with_metadata_field( - &owned_value_path!("kafka", "message_key"), - Kind::bytes(), - None - ) - .with_metadata_field(&owned_value_path!("kafka", "topic"), Kind::bytes(), None) - .with_metadata_field( - &owned_value_path!("kafka", "partition"), - Kind::bytes(), - None - ) - .with_metadata_field(&owned_value_path!("kafka", "offset"), Kind::bytes(), None) - .with_metadata_field( - &owned_value_path!("kafka", "headers"), - Kind::object(Collection::empty().with_unknown(Kind::bytes())), - None - ) - .with_metadata_field( - &owned_value_path!("vector", "ingest_timestamp"), - Kind::timestamp(), - None - ) - .with_metadata_field( - &owned_value_path!("vector", "source_type"), - Kind::bytes(), - None - ) + definitions, + Some( + Definition::new_with_default_metadata(Kind::bytes(), [LogNamespace::Vector]) + .with_meaning(OwnedTargetPath::event_root(), "message") + .with_metadata_field( + &owned_value_path!("kafka", "timestamp"), + Kind::timestamp(), + Some("timestamp") + ) + .with_metadata_field( + &owned_value_path!("kafka", "message_key"), + Kind::bytes(), + None + ) + .with_metadata_field(&owned_value_path!("kafka", "topic"), Kind::bytes(), None) + .with_metadata_field( + &owned_value_path!("kafka", "partition"), + Kind::bytes(), + None + ) + .with_metadata_field(&owned_value_path!("kafka", "offset"), Kind::bytes(), None) + .with_metadata_field( + &owned_value_path!("kafka", "headers"), + Kind::object(Collection::empty().with_unknown(Kind::bytes())), + None + ) + .with_metadata_field( + &owned_value_path!("vector", "ingest_timestamp"), + Kind::timestamp(), + None + ) + .with_metadata_field( + &owned_value_path!("vector", "source_type"), + Kind::bytes(), + None + ) + ) ) } #[test] fn test_output_schema_definition_legacy_namespace() { - let definition = make_config("topic", "group", LogNamespace::Legacy) - .outputs(LogNamespace::Legacy)[0] - .clone() - .log_schema_definition - .unwrap(); + let definitions = make_config("topic", "group", LogNamespace::Legacy) + .outputs(LogNamespace::Legacy) + .remove(0) + .schema_definition(true); assert_eq!( - definition, - Definition::new_with_default_metadata(Kind::json(), [LogNamespace::Legacy]) - .unknown_fields(Kind::undefined()) - .with_event_field( - &owned_value_path!("message"), - Kind::bytes(), - Some("message") - ) - .with_event_field( - &owned_value_path!("timestamp"), - Kind::timestamp(), - Some("timestamp") - ) - .with_event_field(&owned_value_path!("message_key"), Kind::bytes(), None) - .with_event_field(&owned_value_path!("topic"), Kind::bytes(), None) - .with_event_field(&owned_value_path!("partition"), Kind::bytes(), None) - .with_event_field(&owned_value_path!("offset"), Kind::bytes(), None) - .with_event_field( - &owned_value_path!("headers"), - Kind::object(Collection::empty().with_unknown(Kind::bytes())), - None - ) - .with_event_field(&owned_value_path!("source_type"), Kind::bytes(), None) + definitions, + Some( + Definition::new_with_default_metadata(Kind::json(), [LogNamespace::Legacy]) + .unknown_fields(Kind::undefined()) + .with_event_field( + &owned_value_path!("message"), + Kind::bytes(), + Some("message") + ) + .with_event_field( + &owned_value_path!("timestamp"), + Kind::timestamp(), + Some("timestamp") + ) + .with_event_field(&owned_value_path!("message_key"), Kind::bytes(), None) + .with_event_field(&owned_value_path!("topic"), Kind::bytes(), None) + .with_event_field(&owned_value_path!("partition"), Kind::bytes(), None) + .with_event_field(&owned_value_path!("offset"), Kind::bytes(), None) + .with_event_field( + &owned_value_path!("headers"), + Kind::object(Collection::empty().with_unknown(Kind::bytes())), + None + ) + .with_event_field(&owned_value_path!("source_type"), Kind::bytes(), None) + ) ) } diff --git a/src/sources/kubernetes_logs/mod.rs b/src/sources/kubernetes_logs/mod.rs index a5d5efa6359bd..6ca4f6e7fcd52 100644 --- a/src/sources/kubernetes_logs/mod.rs +++ b/src/sources/kubernetes_logs/mod.rs @@ -42,8 +42,8 @@ use vector_core::{ use crate::{ config::{ - log_schema, ComponentKey, DataType, GenerateConfig, GlobalOptions, Output, SourceConfig, - SourceContext, + log_schema, ComponentKey, DataType, GenerateConfig, GlobalOptions, SourceConfig, + SourceContext, SourceOutput, }, event::Event, internal_events::{ @@ -299,7 +299,7 @@ impl SourceConfig for Config { )) } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let log_namespace = global_log_namespace.merge(self.log_namespace); let schema_definition = BytesDeserializerConfig .schema_definition(log_namespace) @@ -495,7 +495,7 @@ impl SourceConfig for Config { ) .with_standard_vector_source_metadata(); - vec![Output::default(DataType::Log).with_schema_definition(schema_definition)] + vec![SourceOutput::new_logs(DataType::Log, schema_definition)] } fn can_acknowledge(&self) -> bool { @@ -1180,213 +1180,223 @@ mod tests { #[test] fn test_output_schema_definition_vector_namespace() { - let definition = toml::from_str::("") + let definitions = toml::from_str::("") .unwrap() - .outputs(LogNamespace::Vector)[0] - .clone() - .log_schema_definition - .unwrap(); + .outputs(LogNamespace::Vector) + .remove(0) + .schema_definition(true); assert_eq!( - definition, - Definition::new_with_default_metadata(Kind::bytes(), [LogNamespace::Vector]) - .with_metadata_field( - &owned_value_path!("kubernetes_logs", "file"), + definitions, + Some( + Definition::new_with_default_metadata(Kind::bytes(), [LogNamespace::Vector]) + .with_metadata_field( + &owned_value_path!("kubernetes_logs", "file"), + Kind::bytes(), + None + ) + .with_metadata_field( + &owned_value_path!("kubernetes_logs", "container_id"), + Kind::bytes().or_undefined(), + None + ) + .with_metadata_field( + &owned_value_path!("kubernetes_logs", "container_image"), + Kind::bytes().or_undefined(), + None + ) + .with_metadata_field( + &owned_value_path!("kubernetes_logs", "container_name"), + Kind::bytes().or_undefined(), + None + ) + .with_metadata_field( + &owned_value_path!("kubernetes_logs", "namespace_labels"), + Kind::object(Collection::empty().with_unknown(Kind::bytes())) + .or_undefined(), + None + ) + .with_metadata_field( + &owned_value_path!("kubernetes_logs", "node_labels"), + Kind::object(Collection::empty().with_unknown(Kind::bytes())) + .or_undefined(), + None + ) + .with_metadata_field( + &owned_value_path!("kubernetes_logs", "pod_annotations"), + Kind::object(Collection::empty().with_unknown(Kind::bytes())) + .or_undefined(), + None + ) + .with_metadata_field( + &owned_value_path!("kubernetes_logs", "pod_ip"), + Kind::bytes().or_undefined(), + None + ) + .with_metadata_field( + &owned_value_path!("kubernetes_logs", "pod_ips"), + Kind::array(Collection::empty().with_unknown(Kind::bytes())).or_undefined(), + None + ) + .with_metadata_field( + &owned_value_path!("kubernetes_logs", "pod_labels"), + Kind::object(Collection::empty().with_unknown(Kind::bytes())) + .or_undefined(), + None + ) + .with_metadata_field( + &owned_value_path!("kubernetes_logs", "pod_name"), + Kind::bytes().or_undefined(), + None + ) + .with_metadata_field( + &owned_value_path!("kubernetes_logs", "pod_namespace"), + Kind::bytes().or_undefined(), + None + ) + .with_metadata_field( + &owned_value_path!("kubernetes_logs", "pod_node_name"), + Kind::bytes().or_undefined(), + None + ) + .with_metadata_field( + &owned_value_path!("kubernetes_logs", "pod_owner"), + Kind::bytes().or_undefined(), + None + ) + .with_metadata_field( + &owned_value_path!("kubernetes_logs", "pod_uid"), + Kind::bytes().or_undefined(), + None + ) + .with_metadata_field( + &owned_value_path!("kubernetes_logs", "stream"), + Kind::bytes(), + None + ) + .with_metadata_field( + &owned_value_path!("kubernetes_logs", "timestamp"), + Kind::timestamp(), + Some("timestamp") + ) + .with_metadata_field( + &owned_value_path!("vector", "source_type"), + Kind::bytes(), + None + ) + .with_metadata_field( + &owned_value_path!("vector", "ingest_timestamp"), + Kind::timestamp(), + None + ) + .with_meaning(OwnedTargetPath::event_root(), "message") + ) + ) + } + + #[test] + fn test_output_schema_definition_legacy_namespace() { + let definitions = toml::from_str::("") + .unwrap() + .outputs(LogNamespace::Legacy) + .remove(0) + .schema_definition(true); + + assert_eq!( + definitions, + Some( + Definition::new_with_default_metadata( + Kind::object(Collection::empty()), + [LogNamespace::Legacy] + ) + .with_event_field(&owned_value_path!("file"), Kind::bytes(), None) + .with_event_field( + &owned_value_path!("message"), Kind::bytes(), - None + Some("message") ) - .with_metadata_field( - &owned_value_path!("kubernetes_logs", "container_id"), + .with_event_field( + &owned_value_path!("kubernetes", "container_id"), Kind::bytes().or_undefined(), None ) - .with_metadata_field( - &owned_value_path!("kubernetes_logs", "container_image"), + .with_event_field( + &owned_value_path!("kubernetes", "container_image"), Kind::bytes().or_undefined(), None ) - .with_metadata_field( - &owned_value_path!("kubernetes_logs", "container_name"), + .with_event_field( + &owned_value_path!("kubernetes", "container_name"), Kind::bytes().or_undefined(), None ) - .with_metadata_field( - &owned_value_path!("kubernetes_logs", "namespace_labels"), + .with_event_field( + &owned_value_path!("kubernetes", "namespace_labels"), Kind::object(Collection::empty().with_unknown(Kind::bytes())).or_undefined(), None ) - .with_metadata_field( - &owned_value_path!("kubernetes_logs", "node_labels"), + .with_event_field( + &owned_value_path!("kubernetes", "node_labels"), Kind::object(Collection::empty().with_unknown(Kind::bytes())).or_undefined(), None ) - .with_metadata_field( - &owned_value_path!("kubernetes_logs", "pod_annotations"), + .with_event_field( + &owned_value_path!("kubernetes", "pod_annotations"), Kind::object(Collection::empty().with_unknown(Kind::bytes())).or_undefined(), None ) - .with_metadata_field( - &owned_value_path!("kubernetes_logs", "pod_ip"), + .with_event_field( + &owned_value_path!("kubernetes", "pod_ip"), Kind::bytes().or_undefined(), None ) - .with_metadata_field( - &owned_value_path!("kubernetes_logs", "pod_ips"), + .with_event_field( + &owned_value_path!("kubernetes", "pod_ips"), Kind::array(Collection::empty().with_unknown(Kind::bytes())).or_undefined(), None ) - .with_metadata_field( - &owned_value_path!("kubernetes_logs", "pod_labels"), + .with_event_field( + &owned_value_path!("kubernetes", "pod_labels"), Kind::object(Collection::empty().with_unknown(Kind::bytes())).or_undefined(), None ) - .with_metadata_field( - &owned_value_path!("kubernetes_logs", "pod_name"), + .with_event_field( + &owned_value_path!("kubernetes", "pod_name"), Kind::bytes().or_undefined(), None ) - .with_metadata_field( - &owned_value_path!("kubernetes_logs", "pod_namespace"), + .with_event_field( + &owned_value_path!("kubernetes", "pod_namespace"), Kind::bytes().or_undefined(), None ) - .with_metadata_field( - &owned_value_path!("kubernetes_logs", "pod_node_name"), + .with_event_field( + &owned_value_path!("kubernetes", "pod_node_name"), Kind::bytes().or_undefined(), None ) - .with_metadata_field( - &owned_value_path!("kubernetes_logs", "pod_owner"), + .with_event_field( + &owned_value_path!("kubernetes", "pod_owner"), Kind::bytes().or_undefined(), None ) - .with_metadata_field( - &owned_value_path!("kubernetes_logs", "pod_uid"), + .with_event_field( + &owned_value_path!("kubernetes", "pod_uid"), Kind::bytes().or_undefined(), None ) - .with_metadata_field( - &owned_value_path!("kubernetes_logs", "stream"), - Kind::bytes(), - None - ) - .with_metadata_field( - &owned_value_path!("kubernetes_logs", "timestamp"), + .with_event_field(&owned_value_path!("stream"), Kind::bytes(), None) + .with_event_field( + &owned_value_path!("timestamp"), Kind::timestamp(), Some("timestamp") ) - .with_metadata_field( - &owned_value_path!("vector", "source_type"), + .with_event_field( + &owned_value_path!("source_type"), Kind::bytes(), None ) - .with_metadata_field( - &owned_value_path!("vector", "ingest_timestamp"), - Kind::timestamp(), - None - ) - .with_meaning(OwnedTargetPath::event_root(), "message") - ) - } - - #[test] - fn test_output_schema_definition_legacy_namespace() { - let definition = toml::from_str::("") - .unwrap() - .outputs(LogNamespace::Legacy)[0] - .clone() - .log_schema_definition - .unwrap(); - - assert_eq!( - definition, - Definition::new_with_default_metadata( - Kind::object(Collection::empty()), - [LogNamespace::Legacy] - ) - .with_event_field(&owned_value_path!("file"), Kind::bytes(), None) - .with_event_field( - &owned_value_path!("message"), - Kind::bytes(), - Some("message") - ) - .with_event_field( - &owned_value_path!("kubernetes", "container_id"), - Kind::bytes().or_undefined(), - None - ) - .with_event_field( - &owned_value_path!("kubernetes", "container_image"), - Kind::bytes().or_undefined(), - None - ) - .with_event_field( - &owned_value_path!("kubernetes", "container_name"), - Kind::bytes().or_undefined(), - None - ) - .with_event_field( - &owned_value_path!("kubernetes", "namespace_labels"), - Kind::object(Collection::empty().with_unknown(Kind::bytes())).or_undefined(), - None - ) - .with_event_field( - &owned_value_path!("kubernetes", "node_labels"), - Kind::object(Collection::empty().with_unknown(Kind::bytes())).or_undefined(), - None - ) - .with_event_field( - &owned_value_path!("kubernetes", "pod_annotations"), - Kind::object(Collection::empty().with_unknown(Kind::bytes())).or_undefined(), - None - ) - .with_event_field( - &owned_value_path!("kubernetes", "pod_ip"), - Kind::bytes().or_undefined(), - None - ) - .with_event_field( - &owned_value_path!("kubernetes", "pod_ips"), - Kind::array(Collection::empty().with_unknown(Kind::bytes())).or_undefined(), - None - ) - .with_event_field( - &owned_value_path!("kubernetes", "pod_labels"), - Kind::object(Collection::empty().with_unknown(Kind::bytes())).or_undefined(), - None - ) - .with_event_field( - &owned_value_path!("kubernetes", "pod_name"), - Kind::bytes().or_undefined(), - None - ) - .with_event_field( - &owned_value_path!("kubernetes", "pod_namespace"), - Kind::bytes().or_undefined(), - None - ) - .with_event_field( - &owned_value_path!("kubernetes", "pod_node_name"), - Kind::bytes().or_undefined(), - None - ) - .with_event_field( - &owned_value_path!("kubernetes", "pod_owner"), - Kind::bytes().or_undefined(), - None - ) - .with_event_field( - &owned_value_path!("kubernetes", "pod_uid"), - Kind::bytes().or_undefined(), - None - ) - .with_event_field(&owned_value_path!("stream"), Kind::bytes(), None) - .with_event_field( - &owned_value_path!("timestamp"), - Kind::timestamp(), - Some("timestamp") ) - .with_event_field(&owned_value_path!("source_type"), Kind::bytes(), None) ) } } diff --git a/src/sources/logstash.rs b/src/sources/logstash.rs index f1f090206956a..c8287a0d01a84 100644 --- a/src/sources/logstash.rs +++ b/src/sources/logstash.rs @@ -25,8 +25,8 @@ use vector_core::{ use super::util::net::{SocketListenAddr, TcpSource, TcpSourceAck, TcpSourceAcker}; use crate::{ config::{ - log_schema, DataType, GenerateConfig, Output, Resource, SourceAcknowledgementsConfig, - SourceConfig, SourceContext, + log_schema, DataType, GenerateConfig, Resource, SourceAcknowledgementsConfig, SourceConfig, + SourceContext, SourceOutput, }, event::{Event, LogEvent, Value}, serde::bool_or_struct, @@ -167,10 +167,11 @@ impl SourceConfig for LogstashConfig { ) } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { // There is a global and per-source `log_namespace` config. // The source config overrides the global setting and is merged here. - vec![Output::default(DataType::Log).with_schema_definition( + vec![SourceOutput::new_logs( + DataType::Log, self.schema_definition(global_log_namespace.merge(self.log_namespace)), )] } @@ -790,10 +791,10 @@ mod test { ..Default::default() }; - let definition = config.outputs(LogNamespace::Vector)[0] - .clone() - .log_schema_definition - .unwrap(); + let definitions = config + .outputs(LogNamespace::Vector) + .remove(0) + .schema_definition(true); let expected_definition = Definition::new_with_default_metadata(Kind::bytes(), [LogNamespace::Vector]) @@ -824,17 +825,17 @@ mod test { None, ); - assert_eq!(definition, expected_definition) + assert_eq!(definitions, Some(expected_definition)) } #[test] fn output_schema_definition_legacy_namespace() { let config = LogstashConfig::default(); - let definition = config.outputs(LogNamespace::Legacy)[0] - .clone() - .log_schema_definition - .unwrap(); + let definitions = config + .outputs(LogNamespace::Legacy) + .remove(0) + .schema_definition(true); let expected_definition = Definition::new_with_default_metadata( Kind::object(Collection::empty()), @@ -849,7 +850,7 @@ mod test { .with_event_field(&owned_value_path!("timestamp"), Kind::timestamp(), None) .with_event_field(&owned_value_path!("host"), Kind::bytes(), Some("host")); - assert_eq!(definition, expected_definition) + assert_eq!(definitions, Some(expected_definition)) } } diff --git a/src/sources/mongodb_metrics/mod.rs b/src/sources/mongodb_metrics/mod.rs index 30f3f84ec4fb9..b68356444d5dd 100644 --- a/src/sources/mongodb_metrics/mod.rs +++ b/src/sources/mongodb_metrics/mod.rs @@ -19,7 +19,7 @@ use vector_config::configurable_component; use vector_core::{metric_tags, ByteSizeOf, EstimatedJsonEncodedSizeOf}; use crate::{ - config::{self, Output, SourceConfig, SourceContext}, + config::{SourceConfig, SourceContext, SourceOutput}, event::metric::{Metric, MetricKind, MetricTags, MetricValue}, internal_events::{ CollectionCompleted, EndpointBytesReceived, MongoDbMetricsBsonParseError, @@ -156,8 +156,8 @@ impl SourceConfig for MongoDbMetricsConfig { })) } - fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { - vec![Output::default(config::DataType::Metric)] + fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { + vec![SourceOutput::new_metrics()] } fn can_acknowledge(&self) -> bool { diff --git a/src/sources/nats.rs b/src/sources/nats.rs index be96449831199..faef941a4c7f5 100644 --- a/src/sources/nats.rs +++ b/src/sources/nats.rs @@ -16,7 +16,7 @@ use vector_core::{ use crate::{ codecs::{Decoder, DecodingConfig}, - config::{GenerateConfig, Output, SourceConfig, SourceContext}, + config::{GenerateConfig, SourceConfig, SourceContext, SourceOutput}, event::Event, internal_events::StreamClosedError, nats::{from_tls_auth_config, NatsAuthConfig, NatsConfigError}, @@ -135,7 +135,7 @@ impl SourceConfig for NatsSourceConfig { ))) } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let log_namespace = global_log_namespace.merge(self.log_namespace); let legacy_subject_key_field = self .subject_key_field @@ -154,7 +154,10 @@ impl SourceConfig for NatsSourceConfig { None, ); - vec![Output::default(self.decoding.output_type()).with_schema_definition(schema_definition)] + vec![SourceOutput::new_logs( + self.decoding.output_type(), + schema_definition, + )] } fn can_acknowledge(&self) -> bool { @@ -290,10 +293,10 @@ mod tests { ..Default::default() }; - let definition = config.outputs(LogNamespace::Vector)[0] - .clone() - .log_schema_definition - .unwrap(); + let definitions = config + .outputs(LogNamespace::Vector) + .remove(0) + .schema_definition(true); let expected_definition = Definition::new_with_default_metadata(Kind::bytes(), [LogNamespace::Vector]) @@ -310,7 +313,7 @@ mod tests { ) .with_metadata_field(&owned_value_path!("nats", "subject"), Kind::bytes(), None); - assert_eq!(definition, expected_definition); + assert_eq!(definitions, Some(expected_definition)); } #[test] @@ -319,10 +322,10 @@ mod tests { subject_key_field: default_subject_key_field(), ..Default::default() }; - let definition = config.outputs(LogNamespace::Legacy)[0] - .clone() - .log_schema_definition - .unwrap(); + let definitions = config + .outputs(LogNamespace::Legacy) + .remove(0) + .schema_definition(true); let expected_definition = Definition::new_with_default_metadata( Kind::object(Collection::empty()), @@ -337,7 +340,7 @@ mod tests { .with_event_field(&owned_value_path!("source_type"), Kind::bytes(), None) .with_event_field(&owned_value_path!("subject"), Kind::bytes(), None); - assert_eq!(definition, expected_definition); + assert_eq!(definitions, Some(expected_definition)); } } diff --git a/src/sources/nginx_metrics/mod.rs b/src/sources/nginx_metrics/mod.rs index 8d9cdb8b6e3de..0ea02fffd83c1 100644 --- a/src/sources/nginx_metrics/mod.rs +++ b/src/sources/nginx_metrics/mod.rs @@ -16,7 +16,7 @@ use vector_config::configurable_component; use vector_core::{metric_tags, EstimatedJsonEncodedSizeOf}; use crate::{ - config::{DataType, Output, SourceConfig, SourceContext}, + config::{SourceConfig, SourceContext, SourceOutput}, event::metric::{Metric, MetricKind, MetricTags, MetricValue}, http::{Auth, HttpClient}, internal_events::{ @@ -144,8 +144,8 @@ impl SourceConfig for NginxMetricsConfig { })) } - fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { - vec![Output::default(DataType::Metric)] + fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { + vec![SourceOutput::new_metrics()] } fn can_acknowledge(&self) -> bool { diff --git a/src/sources/opentelemetry/mod.rs b/src/sources/opentelemetry/mod.rs index dd2f2cc6add96..275721872bf1d 100644 --- a/src/sources/opentelemetry/mod.rs +++ b/src/sources/opentelemetry/mod.rs @@ -32,8 +32,8 @@ use self::{ }; use crate::{ config::{ - DataType, GenerateConfig, Output, Resource, SourceAcknowledgementsConfig, SourceConfig, - SourceContext, + DataType, GenerateConfig, Resource, SourceAcknowledgementsConfig, SourceConfig, + SourceContext, SourceOutput, }, serde::bool_or_struct, sources::{util::grpc::run_grpc_server, Source}, @@ -167,7 +167,7 @@ impl SourceConfig for OpentelemetryConfig { // TODO: appropriately handle "severity" meaning across both "severity_text" and "severity_number", // as both are optional and can be converted to/from. - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let log_namespace = global_log_namespace.merge(self.log_namespace); let schema_definition = Definition::new_with_default_metadata(Kind::any(), [log_namespace]) .with_source_metadata( @@ -255,9 +255,7 @@ impl SourceConfig for OpentelemetryConfig { } }; - vec![Output::default(DataType::Log) - .with_port(LOGS) - .with_schema_definition(schema_definition)] + vec![SourceOutput::new_logs(DataType::Log, schema_definition).with_port(LOGS)] } fn resources(&self) -> Vec { diff --git a/src/sources/opentelemetry/tests.rs b/src/sources/opentelemetry/tests.rs index 23419aaed5beb..c2eef23095339 100644 --- a/src/sources/opentelemetry/tests.rs +++ b/src/sources/opentelemetry/tests.rs @@ -48,13 +48,10 @@ async fn receive_grpc_logs_vector_namespace() { acknowledgements: Default::default(), log_namespace: Some(true), }; - let schema_definition = source + let schema_definitions = source .outputs(LogNamespace::Vector) - .first() - .unwrap() - .log_schema_definition - .clone() - .unwrap(); + .remove(0) + .schema_definition(true); let (sender, logs_output, _) = new_source(EventStatus::Delivered); let server = source @@ -111,7 +108,7 @@ async fn receive_grpc_logs_vector_namespace() { // we just send one, so only one output assert_eq!(output.len(), 1); let event = output.pop().unwrap(); - schema_definition.assert_valid_for_event(&event); + schema_definitions.unwrap().assert_valid_for_event(&event); assert_eq!(event.as_log().get(".").unwrap(), &vrl::value!("log body")); @@ -188,13 +185,10 @@ async fn receive_grpc_logs_legacy_namespace() { acknowledgements: Default::default(), log_namespace: Default::default(), }; - let schema_definition = source + let schema_definitions = source .outputs(LogNamespace::Legacy) - .first() - .unwrap() - .log_schema_definition - .clone() - .unwrap(); + .remove(0) + .schema_definition(true); let (sender, logs_output, _) = new_source(EventStatus::Delivered); let server = source @@ -251,7 +245,9 @@ async fn receive_grpc_logs_legacy_namespace() { // we just send one, so only one output assert_eq!(output.len(), 1); let actual_event = output.pop().unwrap(); - schema_definition.assert_valid_for_event(&actual_event); + schema_definitions + .unwrap() + .assert_valid_for_event(&actual_event); let expect_vec = vec_into_btmap(vec![ ( "attributes", diff --git a/src/sources/postgresql_metrics.rs b/src/sources/postgresql_metrics.rs index f7de5ff1fecbb..b7415c4217831 100644 --- a/src/sources/postgresql_metrics.rs +++ b/src/sources/postgresql_metrics.rs @@ -31,7 +31,7 @@ use vector_core::config::LogNamespace; use vector_core::{metric_tags, ByteSizeOf, EstimatedJsonEncodedSizeOf}; use crate::{ - config::{DataType, Output, SourceConfig, SourceContext}, + config::{SourceConfig, SourceContext, SourceOutput}, event::metric::{Metric, MetricKind, MetricTags, MetricValue}, internal_events::{ CollectionCompleted, EndpointBytesReceived, EventsReceived, PostgresqlMetricsCollectError, @@ -233,8 +233,8 @@ impl SourceConfig for PostgresqlMetricsConfig { })) } - fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { - vec![Output::default(DataType::Metric)] + fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { + vec![SourceOutput::new_metrics()] } fn can_acknowledge(&self) -> bool { diff --git a/src/sources/prometheus/remote_write.rs b/src/sources/prometheus/remote_write.rs index fce4e781848b5..3b27770ba995b 100644 --- a/src/sources/prometheus/remote_write.rs +++ b/src/sources/prometheus/remote_write.rs @@ -10,7 +10,7 @@ use warp::http::{HeaderMap, StatusCode}; use super::parser; use crate::{ config::{ - self, GenerateConfig, Output, SourceAcknowledgementsConfig, SourceConfig, SourceContext, + GenerateConfig, SourceAcknowledgementsConfig, SourceConfig, SourceContext, SourceOutput, }, event::Event, internal_events::PrometheusRemoteWriteParseError, @@ -88,8 +88,8 @@ impl SourceConfig for PrometheusRemoteWriteConfig { ) } - fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { - vec![Output::default(config::DataType::Metric)] + fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { + vec![SourceOutput::new_metrics()] } fn can_acknowledge(&self) -> bool { diff --git a/src/sources/prometheus/scrape.rs b/src/sources/prometheus/scrape.rs index 67a4cfd042055..7deb49eb7085b 100644 --- a/src/sources/prometheus/scrape.rs +++ b/src/sources/prometheus/scrape.rs @@ -12,7 +12,7 @@ use vector_core::{config::LogNamespace, event::Event}; use super::parser; use crate::sources::util::http::HttpMethod; use crate::{ - config::{self, GenerateConfig, Output, SourceConfig, SourceContext}, + config::{GenerateConfig, SourceConfig, SourceContext, SourceOutput}, http::Auth, internal_events::PrometheusParseError, sources::{ @@ -156,8 +156,8 @@ impl SourceConfig for PrometheusScrapeConfig { Ok(call(inputs, builder, cx.out, HttpMethod::Get).boxed()) } - fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { - vec![Output::default(config::DataType::Metric)] + fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { + vec![SourceOutput::new_metrics()] } fn can_acknowledge(&self) -> bool { diff --git a/src/sources/redis/mod.rs b/src/sources/redis/mod.rs index e45b042927f61..e2134e78ba9ee 100644 --- a/src/sources/redis/mod.rs +++ b/src/sources/redis/mod.rs @@ -20,7 +20,7 @@ use vector_core::{ use crate::{ codecs::{Decoder, DecodingConfig}, - config::{log_schema, GenerateConfig, Output, SourceConfig, SourceContext}, + config::{log_schema, GenerateConfig, SourceConfig, SourceContext, SourceOutput}, event::Event, internal_events::{EventsReceived, StreamClosedError}, serde::{default_decoding, default_framing_message_based}, @@ -195,7 +195,7 @@ impl SourceConfig for RedisSourceConfig { } } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let log_namespace = global_log_namespace.merge(self.log_namespace); let redis_key_path = self @@ -216,7 +216,10 @@ impl SourceConfig for RedisSourceConfig { ) .with_standard_vector_source_metadata(); - vec![Output::default(self.decoding.output_type()).with_schema_definition(schema_definition)] + vec![SourceOutput::new_logs( + self.decoding.output_type(), + schema_definition, + )] } fn can_acknowledge(&self) -> bool { diff --git a/src/sources/socket/mod.rs b/src/sources/socket/mod.rs index ebfcec8b000cc..888df782f08bd 100644 --- a/src/sources/socket/mod.rs +++ b/src/sources/socket/mod.rs @@ -13,7 +13,7 @@ use vector_core::config::{log_schema, LegacyKey, LogNamespace}; use crate::serde::default_framing_message_based; use crate::{ codecs::DecodingConfig, - config::{GenerateConfig, Output, Resource, SourceConfig, SourceContext}, + config::{GenerateConfig, Resource, SourceConfig, SourceContext, SourceOutput}, sources::util::net::TcpSource, tls::MaybeTlsSettings, }; @@ -200,7 +200,7 @@ impl SourceConfig for SocketConfig { } } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let log_namespace = global_log_namespace.merge(Some(self.log_namespace())); let schema_definition = self @@ -292,8 +292,10 @@ impl SourceConfig for SocketConfig { } }; - vec![Output::default(self.decoding().output_type()) - .with_schema_definition(schema_definition)] + vec![SourceOutput::new_logs( + self.decoding().output_type(), + schema_definition, + )] } fn resources(&self) -> Vec { diff --git a/src/sources/splunk_hec/mod.rs b/src/sources/splunk_hec/mod.rs index b239921ce5b9e..f8d7d0e208429 100644 --- a/src/sources/splunk_hec/mod.rs +++ b/src/sources/splunk_hec/mod.rs @@ -34,7 +34,7 @@ use self::{ splunk_response::{HecResponse, HecResponseMetadata, HecStatusCode}, }; use crate::{ - config::{log_schema, DataType, Output, Resource, SourceConfig, SourceContext}, + config::{log_schema, DataType, Resource, SourceConfig, SourceContext, SourceOutput}, event::{Event, LogEvent, Value}, internal_events::{ EventsReceived, HttpBytesReceived, SplunkHecRequestBodyInvalidError, SplunkHecRequestError, @@ -175,7 +175,7 @@ impl SourceConfig for SplunkConfig { })) } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let log_namespace = global_log_namespace.merge(self.log_namespace); let schema_definition = match log_namespace { @@ -237,7 +237,7 @@ impl SourceConfig for SplunkConfig { None, ); - vec![Output::default(DataType::Log).with_schema_definition(schema_definition)] + vec![SourceOutput::new_logs(DataType::Log, schema_definition)] } fn resources(&self) -> Vec { @@ -2443,10 +2443,10 @@ mod tests { ..Default::default() }; - let definition = config.outputs(LogNamespace::Vector)[0] - .clone() - .log_schema_definition - .unwrap(); + let definition = config + .outputs(LogNamespace::Vector) + .remove(0) + .schema_definition(true); let expected_definition = Definition::new_with_default_metadata( Kind::object(Collection::empty()).or_bytes(), @@ -2488,16 +2488,16 @@ mod tests { None, ); - assert_eq!(definition, expected_definition); + assert_eq!(definition, Some(expected_definition)); } #[test] fn output_schema_definition_legacy_namespace() { let config = SplunkConfig::default(); - let definition = config.outputs(LogNamespace::Legacy)[0] - .clone() - .log_schema_definition - .unwrap(); + let definitions = config + .outputs(LogNamespace::Legacy) + .remove(0) + .schema_definition(true); let expected_definition = Definition::new_with_default_metadata( Kind::object(Collection::empty()), @@ -2523,6 +2523,6 @@ mod tests { .with_event_field(&owned_value_path!("splunk_sourcetype"), Kind::bytes(), None) .with_event_field(&owned_value_path!("timestamp"), Kind::timestamp(), None); - assert_eq!(definition, expected_definition); + assert_eq!(definitions, Some(expected_definition)); } } diff --git a/src/sources/statsd/mod.rs b/src/sources/statsd/mod.rs index a0332adcfeebe..8fcb93e176e4b 100644 --- a/src/sources/statsd/mod.rs +++ b/src/sources/statsd/mod.rs @@ -21,7 +21,7 @@ use self::parser::ParseError; use super::util::net::{try_bind_udp_socket, SocketListenAddr, TcpNullAcker, TcpSource}; use crate::{ codecs::Decoder, - config::{self, GenerateConfig, Output, Resource, SourceConfig, SourceContext}, + config::{GenerateConfig, Resource, SourceConfig, SourceContext, SourceOutput}, event::Event, internal_events::{ EventsReceived, SocketBindError, SocketBytesReceived, SocketMode, SocketReceiveError, @@ -175,8 +175,8 @@ impl SourceConfig for StatsdConfig { } } - fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { - vec![Output::default(config::DataType::Metric)] + fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { + vec![SourceOutput::new_metrics()] } fn resources(&self) -> Vec { diff --git a/src/sources/syslog.rs b/src/sources/syslog.rs index d53626a4650ab..c2c145437d9d6 100644 --- a/src/sources/syslog.rs +++ b/src/sources/syslog.rs @@ -23,7 +23,9 @@ use vector_core::config::{LegacyKey, LogNamespace}; use crate::sources::util::build_unix_stream_source; use crate::{ codecs::Decoder, - config::{log_schema, DataType, GenerateConfig, Output, Resource, SourceConfig, SourceContext}, + config::{ + log_schema, DataType, GenerateConfig, Resource, SourceConfig, SourceContext, SourceOutput, + }, event::Event, internal_events::StreamClosedError, internal_events::{SocketBindError, SocketMode, SocketReceiveError}, @@ -238,13 +240,13 @@ impl SourceConfig for SyslogConfig { } } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let log_namespace = global_log_namespace.merge(self.log_namespace); let schema_definition = SyslogDeserializerConfig::from_source(SyslogConfig::NAME) .schema_definition(log_namespace) .with_standard_vector_source_metadata(); - vec![Output::default(DataType::Log).with_schema_definition(schema_definition)] + vec![SourceOutput::new_logs(DataType::Log, schema_definition)] } fn resources(&self) -> Vec { @@ -496,10 +498,10 @@ mod test { ..Default::default() }; - let definition = config.outputs(LogNamespace::Vector)[0] - .clone() - .log_schema_definition - .unwrap(); + let definitions = config + .outputs(LogNamespace::Vector) + .remove(0) + .schema_definition(true); let expected_definition = Definition::new_with_default_metadata(Kind::bytes(), [LogNamespace::Vector]) @@ -572,17 +574,17 @@ mod test { None, ); - assert_eq!(definition, expected_definition); + assert_eq!(definitions, Some(expected_definition)); } #[test] fn output_schema_definition_legacy_namespace() { let config = SyslogConfig::default(); - let definition = config.outputs(LogNamespace::Legacy)[0] - .clone() - .log_schema_definition - .unwrap(); + let definitions = config + .outputs(LogNamespace::Legacy) + .remove(0) + .schema_definition(true); let expected_definition = Definition::new_with_default_metadata( Kind::object(Collection::empty()), @@ -641,7 +643,7 @@ mod test { .unknown_fields(Kind::object(Collection::from_unknown(Kind::bytes()))) .with_standard_vector_source_metadata(); - assert_eq!(definition, expected_definition); + assert_eq!(definitions, Some(expected_definition)); } #[test] diff --git a/src/sources/vector/mod.rs b/src/sources/vector/mod.rs index e2ab2807a6263..7f148fff81c47 100644 --- a/src/sources/vector/mod.rs +++ b/src/sources/vector/mod.rs @@ -15,8 +15,8 @@ use vector_core::{ use crate::{ config::{ - DataType, GenerateConfig, Output, Resource, SourceAcknowledgementsConfig, SourceConfig, - SourceContext, + DataType, GenerateConfig, Resource, SourceAcknowledgementsConfig, SourceConfig, + SourceContext, SourceOutput, }, internal_events::{EventsReceived, StreamClosedError}, proto::vector as proto, @@ -191,14 +191,14 @@ impl SourceConfig for VectorConfig { Ok(Box::pin(source)) } - fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { let log_namespace = global_log_namespace.merge(self.log_namespace); let schema_definition = NativeDeserializerConfig .schema_definition(log_namespace) .with_standard_vector_source_metadata(); - vec![Output::default(DataType::all()).with_schema_definition(schema_definition)] + vec![SourceOutput::new_logs(DataType::all(), schema_definition)] } fn resources(&self) -> Vec { @@ -229,10 +229,10 @@ mod test { fn output_schema_definition_vector_namespace() { let config = VectorConfig::default(); - let definition = config.outputs(LogNamespace::Vector)[0] - .clone() - .log_schema_definition - .unwrap(); + let definitions = config + .outputs(LogNamespace::Vector) + .remove(0) + .schema_definition(true); let expected_definition = Definition::new_with_default_metadata(Kind::any(), [LogNamespace::Vector]) @@ -247,17 +247,17 @@ mod test { None, ); - assert_eq!(definition, expected_definition) + assert_eq!(definitions, Some(expected_definition)) } #[test] fn output_schema_definition_legacy_namespace() { let config = VectorConfig::default(); - let definition = config.outputs(LogNamespace::Legacy)[0] - .clone() - .log_schema_definition - .unwrap(); + let definitions = config + .outputs(LogNamespace::Legacy) + .remove(0) + .schema_definition(true); let expected_definition = Definition::new_with_default_metadata( Kind::object(Collection::empty()), @@ -266,7 +266,7 @@ mod test { .with_event_field(&owned_value_path!("source_type"), Kind::bytes(), None) .with_event_field(&owned_value_path!("timestamp"), Kind::timestamp(), None); - assert_eq!(definition, expected_definition) + assert_eq!(definitions, Some(expected_definition)) } } diff --git a/src/test_util/mock/sources/backpressure.rs b/src/test_util/mock/sources/backpressure.rs index 59d3b898bfaca..146a3b6d2a828 100644 --- a/src/test_util/mock/sources/backpressure.rs +++ b/src/test_util/mock/sources/backpressure.rs @@ -9,9 +9,10 @@ use vector_config::configurable_component; use vector_core::{ config::LogNamespace, event::{Event, LogEvent}, + schema::Definition, }; use vector_core::{ - config::{DataType, Output}, + config::{DataType, SourceOutput}, source::Source, }; @@ -62,8 +63,11 @@ impl SourceConfig for BackpressureSourceConfig { .boxed()) } - fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { - vec![Output::default(DataType::all())] + fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { + vec![SourceOutput::new_logs( + DataType::all(), + Definition::default_legacy_namespace(), + )] } fn can_acknowledge(&self) -> bool { diff --git a/src/test_util/mock/sources/basic.rs b/src/test_util/mock/sources/basic.rs index e9d7e15ff611f..7ffeb6d3d0da2 100644 --- a/src/test_util/mock/sources/basic.rs +++ b/src/test_util/mock/sources/basic.rs @@ -6,9 +6,9 @@ use std::sync::{ use async_trait::async_trait; use vector_buffers::topology::channel::{limited, LimitedReceiver}; use vector_config::configurable_component; -use vector_core::config::LogNamespace; +use vector_core::{config::LogNamespace, schema::Definition}; use vector_core::{ - config::{DataType, Output}, + config::{DataType, SourceOutput}, event::{EventArray, EventContainer}, source::Source, }; @@ -132,8 +132,11 @@ impl SourceConfig for BasicSourceConfig { })) } - fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { - vec![Output::default(self.data_type.unwrap())] + fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { + vec![SourceOutput::new_logs( + self.data_type.unwrap(), + Definition::default_legacy_namespace(), + )] } fn can_acknowledge(&self) -> bool { diff --git a/src/test_util/mock/sources/error.rs b/src/test_util/mock/sources/error.rs index 4a84e6c12ad7b..360f15632c1e4 100644 --- a/src/test_util/mock/sources/error.rs +++ b/src/test_util/mock/sources/error.rs @@ -2,8 +2,9 @@ use async_trait::async_trait; use futures_util::{future::err, FutureExt}; use vector_config::configurable_component; use vector_core::config::LogNamespace; +use vector_core::schema::Definition; use vector_core::{ - config::{DataType, Output}, + config::{DataType, SourceOutput}, source::Source, }; @@ -26,8 +27,11 @@ impl SourceConfig for ErrorSourceConfig { Ok(err(()).boxed()) } - fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { - vec![Output::default(DataType::Log)] + fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { + vec![SourceOutput::new_logs( + DataType::Log, + Definition::default_legacy_namespace(), + )] } fn can_acknowledge(&self) -> bool { diff --git a/src/test_util/mock/sources/panic.rs b/src/test_util/mock/sources/panic.rs index c65e0ab099b19..65f895f9eaf6d 100644 --- a/src/test_util/mock/sources/panic.rs +++ b/src/test_util/mock/sources/panic.rs @@ -1,8 +1,9 @@ use async_trait::async_trait; use vector_config::configurable_component; use vector_core::config::LogNamespace; +use vector_core::schema::Definition; use vector_core::{ - config::{DataType, Output}, + config::{DataType, SourceOutput}, source::Source, }; @@ -25,8 +26,11 @@ impl SourceConfig for PanicSourceConfig { Ok(Box::pin(async { panic!() })) } - fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { - vec![Output::default(DataType::Log)] + fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { + vec![SourceOutput::new_logs( + DataType::Log, + Definition::default_legacy_namespace(), + )] } fn can_acknowledge(&self) -> bool { diff --git a/src/test_util/mock/sources/tripwire.rs b/src/test_util/mock/sources/tripwire.rs index f24e2d18899c3..5a8d91a143a62 100644 --- a/src/test_util/mock/sources/tripwire.rs +++ b/src/test_util/mock/sources/tripwire.rs @@ -5,8 +5,9 @@ use futures_util::{future, FutureExt}; use stream_cancel::{Trigger, Tripwire}; use vector_config::configurable_component; use vector_core::config::LogNamespace; +use vector_core::schema::Definition; use vector_core::{ - config::{DataType, Output}, + config::{DataType, SourceOutput}, source::Source, }; @@ -65,8 +66,11 @@ impl SourceConfig for TripwireSourceConfig { )) } - fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { - vec![Output::default(DataType::Log)] + fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { + vec![SourceOutput::new_logs( + DataType::Log, + Definition::default_legacy_namespace(), + )] } fn can_acknowledge(&self) -> bool { diff --git a/src/test_util/mock/transforms/basic.rs b/src/test_util/mock/transforms/basic.rs index e2643bec025b2..ce8673c408994 100644 --- a/src/test_util/mock/transforms/basic.rs +++ b/src/test_util/mock/transforms/basic.rs @@ -5,7 +5,7 @@ use value::Value; use vector_config::configurable_component; use vector_core::config::LogNamespace; use vector_core::{ - config::{DataType, Input, Output}, + config::{DataType, Input, TransformOutput}, event::{ metric::{MetricData, Sample}, Event, MetricValue, @@ -14,7 +14,7 @@ use vector_core::{ transform::{FunctionTransform, OutputBuffer, Transform}, }; -use crate::config::{TransformConfig, TransformContext}; +use crate::config::{OutputId, TransformConfig, TransformContext}; /// Configuration for the `test_basic` transform. #[configurable_component(transform("test_basic", "Test (basic)"))] @@ -49,8 +49,18 @@ impl TransformConfig for BasicTransformConfig { Input::all() } - fn outputs(&self, _: &schema::Definition, _: LogNamespace) -> Vec { - vec![Output::default(DataType::all())] + fn outputs( + &self, + definitions: &[(OutputId, schema::Definition)], + _: LogNamespace, + ) -> Vec { + vec![TransformOutput::new( + DataType::all(), + definitions + .iter() + .map(|(_output, definition)| definition.clone()) + .collect(), + )] } } diff --git a/src/test_util/mock/transforms/noop.rs b/src/test_util/mock/transforms/noop.rs index f94bf7a1bceee..b6712e4eec21f 100644 --- a/src/test_util/mock/transforms/noop.rs +++ b/src/test_util/mock/transforms/noop.rs @@ -5,13 +5,13 @@ use futures_util::Stream; use vector_config::configurable_component; use vector_core::config::LogNamespace; use vector_core::{ - config::{DataType, Input, Output}, + config::{DataType, Input, TransformOutput}, event::{Event, EventContainer}, schema::Definition, transform::{FunctionTransform, OutputBuffer, TaskTransform, Transform}, }; -use crate::config::{GenerateConfig, TransformConfig, TransformContext}; +use crate::config::{GenerateConfig, OutputId, TransformConfig, TransformContext}; use super::TransformType; @@ -39,8 +39,18 @@ impl TransformConfig for NoopTransformConfig { Input::all() } - fn outputs(&self, _: &Definition, _: LogNamespace) -> Vec { - vec![Output::default(DataType::all())] + fn outputs( + &self, + definitions: &[(OutputId, Definition)], + _: LogNamespace, + ) -> Vec { + vec![TransformOutput::new( + DataType::all(), + definitions + .iter() + .map(|(_output, definition)| definition.clone()) + .collect(), + )] } async fn build(&self, _: &TransformContext) -> crate::Result { diff --git a/src/topology/builder.rs b/src/topology/builder.rs index 4c2c31531a9ca..b2cdc06a4d8ef 100644 --- a/src/topology/builder.rs +++ b/src/topology/builder.rs @@ -41,8 +41,8 @@ use super::{ }; use crate::{ config::{ - ComponentKey, DataType, EnrichmentTableConfig, Input, Inputs, Output, OutputId, - ProxyConfig, SinkConfig, SinkContext, SourceContext, TransformContext, TransformOuter, + ComponentKey, DataType, EnrichmentTableConfig, Input, Inputs, OutputId, ProxyConfig, + SinkConfig, SinkContext, SourceContext, TransformContext, TransformOuter, TransformOutput, }, event::{EventArray, EventContainer}, internal_events::EventsReceived, @@ -187,8 +187,8 @@ pub async fn build_pieces( let mut controls = HashMap::new(); let mut schema_definitions = HashMap::with_capacity(source_outputs.len()); - for output in source_outputs { - let mut rx = builder.add_output(output.clone()); + for output in source_outputs.into_iter() { + let mut rx = builder.add_source_output(output.clone()); let (mut fanout, control) = Fanout::new(); let pump = async move { @@ -214,11 +214,10 @@ pub async fn build_pieces( control, ); - let schema_definition = output - .log_schema_definition - .unwrap_or_else(schema::Definition::default_legacy_namespace); - - schema_definitions.insert(output.port, schema_definition); + let port = output.port.clone(); + if let Some(definition) = output.schema_definition(config.schema.enabled) { + schema_definitions.insert(port, definition); + } } let (pump_error_tx, mut pump_error_rx) = oneshot::channel(); @@ -343,9 +342,15 @@ pub async fn build_pieces( { debug!(component = %key, "Building new transform."); - let mut schema_definitions = HashMap::new(); - let merged_definition = - schema::merged_definition(&transform.inputs, config, &mut definition_cache); + let input_definitions = + schema::input_definitions(&transform.inputs, config, &mut definition_cache); + + let merged_definition: Definition = input_definitions + .iter() + .map(|(_output_id, definition)| definition.clone()) + .reduce(Definition::merge) + // We may not have any definitions if all the inputs are from metrics sources. + .unwrap_or_else(Definition::any); let span = error_span!( "transform", @@ -356,15 +361,13 @@ pub async fn build_pieces( component_name = %key.id(), ); - for output in transform + // Create a map of the outputs to the list of possible definitions from those outputs. + let schema_definitions = transform .inner - .outputs(&merged_definition, config.schema.log_namespace()) - { - let definition = output - .log_schema_definition - .unwrap_or_else(|| merged_definition.clone()); - schema_definitions.insert(output.port, definition); - } + .outputs(&input_definitions, config.schema.log_namespace()) + .into_iter() + .map(|output| (output.port, output.log_schema_definitions)) + .collect::>(); let context = TransformContext { key: Some(key.clone()), @@ -378,7 +381,7 @@ pub async fn build_pieces( let node = TransformNode::from_parts( key.clone(), transform, - &merged_definition, + &input_definitions, config.schema.log_namespace(), ); @@ -609,7 +612,7 @@ struct TransformNode { typetag: &'static str, inputs: Inputs, input_details: Input, - outputs: Vec, + outputs: Vec, enable_concurrency: bool, } @@ -617,7 +620,7 @@ impl TransformNode { pub fn from_parts( key: ComponentKey, transform: &TransformOuter, - schema_definition: &Definition, + schema_definition: &[(OutputId, Definition)], global_log_namespace: LogNamespace, ) -> Self { Self { diff --git a/src/topology/schema.rs b/src/topology/schema.rs index 2b8f733e35c99..088967ec5b211 100644 --- a/src/topology/schema.rs +++ b/src/topology/schema.rs @@ -1,44 +1,25 @@ use std::collections::HashMap; -use value::Kind; -use vector_core::config::Output; + +use vector_core::config::SourceOutput; pub(super) use crate::schema::Definition; use crate::{ - config::{ComponentKey, Config, OutputId, SinkConfig, SinkOuter}, + config::{ComponentKey, Config, OutputId, SinkConfig, SinkOuter, TransformOutput}, topology, }; -/// Create a new [`Definition`] by recursively merging all provided inputs into a given component. -/// -/// Recursion happens when one of the components inputs references a transform that has no -/// definition output of its own, in such a case, the definition output becomes the merged output -/// of that transform's inputs. -/// -/// For example: -/// -/// Source 1 [Definition 1] -> -/// Source 2 [Definition 2] -> Transform 1 [] -> [Definition 1 & 2] -/// Source 3 [Definition 3] -> Transform 2 [Definition 4] -> [Definition 4] -> Sink -/// -/// When asking for the merged definition feeding into `Sink`, `Transform 1` returns no definition -/// of its own, when asking for its schema definition. In this case the `merged_definition` method -/// recurses further back towards `Source 1` and `Source 2`, merging the two into a new definition -/// (marked as `[Definition 1 & 2]` above). -/// -/// It then asks for the definition of `Transform 2`, which *does* defines its own definition, -/// named `Definition 4`, which overrides `Definition 3` feeding into `Transform 2`. In this case, -/// the `Sink` is only interested in `Definition 4`, and ignores `Definition 3`. -/// -/// Finally, The merged definition (named `Definition 1 & 2`), and `Definition 4` are merged -/// together to produce the new `Definition` returned by this method. -pub fn merged_definition( +/// The cache is used whilst building up the topology. +/// TODO: Describe more, especially why we have a bool in the key. +type Cache = HashMap<(bool, Vec), Vec<(OutputId, Definition)>>; + +pub fn possible_definitions( inputs: &[OutputId], config: &dyn ComponentContainer, - cache: &mut HashMap<(bool, Vec), Definition>, -) -> Definition { + cache: &mut Cache, +) -> Vec<(OutputId, Definition)> { if inputs.is_empty() { - return Definition::default_legacy_namespace(); + return vec![]; } // Try to get the definition from the cache. @@ -46,66 +27,49 @@ pub fn merged_definition( return definition.clone(); } - let mut definition = Definition::new(Kind::never(), Kind::never(), []); + let mut definitions = Vec::new(); for input in inputs { let key = &input.component; // If the input is a source, the output is merged into the top-level schema. - // Not all sources contain a schema yet, in which case they use a default. if let Ok(maybe_output) = config.source_output_for_port(key, &input.port) { - let source_definition = maybe_output - .unwrap_or_else(|| { - unreachable!( - "source output mis-configured - output for port {:?} missing", - &input.port - ) - }) - .log_schema_definition - .clone() - // Schemas must be implemented for components that support the "Vector" namespace, so since - // one doesn't exist here, we can assume it's using the default "legacy" namespace schema definition - .unwrap_or_else(Definition::default_legacy_namespace); - - if config.schema_enabled() { - definition = definition.merge(source_definition); - } else { - definition = definition.merge(Definition::default_for_namespace( - source_definition.log_namespaces(), - )); - } + let mut source_definition = input.with_definitions( + maybe_output + .unwrap_or_else(|| { + unreachable!( + "source output mis-configured - output for port {:?} missing", + &input.port + ) + }) + .schema_definition(config.schema_enabled()), + ); + + definitions.append(&mut source_definition); } + // If the input is a transform, the output is merged into the top-level schema - // Not all transforms contain a schema yet. If that's the case, it's assumed - // that the transform doesn't modify the event schema, so it is passed through as-is (recursively) if let Some(inputs) = config.transform_inputs(key) { - let merged_definition = merged_definition(inputs, config, cache); - - let transform_definition = config - .transform_output_for_port(key, &input.port, &merged_definition) - .expect("transform must exist - already found inputs") - .unwrap_or_else(|| { - unreachable!( - "transform output mis-configured - output for port {:?} missing", - &input.port - ) - }) - .log_schema_definition - .clone() - .unwrap_or(merged_definition); - - if config.schema_enabled() { - definition = definition.merge(transform_definition); - } else { - // Schemas must be implemented for components that support the "Vector" namespace, so since - // one doesn't exist here, we can assume it's using the default "legacy" namespace schema definit - definition = definition.merge(Definition::default_for_namespace( - transform_definition.log_namespaces(), - )); - } + let input_definitions = possible_definitions(inputs, config, cache); + + let mut transform_definition = input.with_definitions( + config + .transform_output_for_port(key, &input.port, &input_definitions) + .expect("transform must exist - already found inputs") + .unwrap_or_else(|| { + unreachable!( + "transform output mis-configured - output for port {:?} missing", + &input.port + ) + }) + .log_schema_definitions, + ); + + definitions.append(&mut transform_definition); } } - definition + + definitions } /// Get a list of definitions from individual pipelines feeding into a component. @@ -124,14 +88,14 @@ pub fn merged_definition( pub(super) fn expanded_definitions( inputs: &[OutputId], config: &dyn ComponentContainer, - cache: &mut HashMap<(bool, Vec), Vec>, -) -> Vec { + cache: &mut Cache, +) -> Vec<(OutputId, Definition)> { // Try to get the definition from the cache. if let Some(definitions) = cache.get(&(config.schema_enabled(), inputs.to_vec())) { return definitions.clone(); } - let mut definitions = vec![]; + let mut definitions: Vec<(OutputId, Definition)> = vec![]; let mut merged_cache = HashMap::default(); for input in inputs { @@ -146,40 +110,38 @@ pub(super) fn expanded_definitions( // After getting the source matching to the given input, we need to further narrow the // actual output of the source feeding into this input, and then get the definition // belonging to that output. - let source_definition = outputs - .iter() - .find_map(|output| { - if output.port == input.port { - Some( - output - .log_schema_definition - .clone() - .unwrap_or_else(Definition::default_legacy_namespace), - ) - } else { - None - } - }) - .unwrap_or_else(|| { - // If we find no match, it means the topology is misconfigured. This is a fatal - // error, but other parts of the topology builder deal with this state. - unreachable!("source output mis-configured") - }); - - definitions.push(source_definition); + let mut source_definitions = + outputs + .into_iter() + .find_map(|output| { + if output.port == input.port { + Some(input.with_definitions( + output.schema_definition(config.schema_enabled()), + )) + } else { + None + } + }) + .unwrap_or_else(|| { + // If we find no match, it means the topology is misconfigured. This is a fatal + // error, but other parts of the topology builder deal with this state. + unreachable!("source output mis-configured") + }); + + definitions.append(&mut source_definitions); // A transform can receive from multiple inputs, and each input needs to be expanded to // a new pipeline. } else if let Some(inputs) = config.transform_inputs(key) { - let merged_definition = merged_definition(inputs, config, &mut merged_cache); + let input_definitions = possible_definitions(inputs, config, &mut merged_cache); - let maybe_transform_definition = config - .transform_outputs(key, &merged_definition) + let mut transform_definition = config + .transform_outputs(key, &input_definitions) .expect("already found inputs") .iter() .find_map(|output| { if output.port == input.port { - Some(output.log_schema_definition.clone()) + Some(input.with_definitions(output.log_schema_definitions.clone())) } else { None } @@ -188,24 +150,9 @@ pub(super) fn expanded_definitions( // error, but other parts of the topology builder deal with this state. .expect("transform output misconfigured"); - // We need to iterate over the individual inputs of a transform, as we are expected to - // expand each input into its own pipeline. - for input in inputs { - let mut expanded_definitions = match &maybe_transform_definition { - // If the transform defines its own schema definition, we no longer care about - // any upstream definitions, and use the transform definition instead. - Some(transform_definition) => vec![transform_definition.clone()], - - // If the transform does not define its own schema definition, we need to - // recursively call this function in case upstream components expand into - // multiple pipelines. - None => expanded_definitions(&[input.clone()], config, cache), - }; - - // Append whatever number of additional pipelines we created to the existing - // pipeline definitions. - definitions.append(&mut expanded_definitions); - } + // Append whatever number of additional pipelines we created to the existing + // pipeline definitions. + definitions.append(&mut transform_definition); } } @@ -217,6 +164,66 @@ pub(super) fn expanded_definitions( definitions } +/// Returns a list of definitions from the given inputs. +pub(crate) fn input_definitions( + inputs: &[OutputId], + config: &Config, + cache: &mut Cache, +) -> Vec<(OutputId, Definition)> { + if inputs.is_empty() { + return vec![]; + } + + if let Some(definitions) = cache.get(&(config.schema_enabled(), inputs.to_vec())) { + return definitions.clone(); + } + + let mut definitions = Vec::new(); + + for input in inputs { + let key = &input.component; + + // If the input is a source we retrieve the definitions from the source + // (there should only be one) and add it to the return. + if let Ok(maybe_output) = config.source_output_for_port(key, &input.port) { + let mut source_definitions = input.with_definitions( + maybe_output + .unwrap_or_else(|| { + unreachable!( + "source output mis-configured - output for port {:?} missing", + &input.port + ) + }) + .schema_definition(config.schema_enabled()), + ); + + definitions.append(&mut source_definitions); + } + + // If the input is a transform we recurse to the upstream components to retrieve + // their definitions and pass it through the transform to get the new definitions. + if let Some(inputs) = config.transform_inputs(key) { + let transform_definitions = input_definitions(inputs, config, cache); + let mut transform_definitions = input.with_definitions( + config + .transform_output_for_port(key, &input.port, &transform_definitions) + .expect("transform must exist") + .unwrap_or_else(|| { + unreachable!( + "transform output mis-configured - output for port {:?} missing", + &input.port + ) + }) + .log_schema_definitions, + ); + + definitions.append(&mut transform_definitions); + } + } + + definitions +} + pub(super) fn validate_sink_expectations( key: &ComponentKey, sink: &SinkOuter, @@ -234,7 +241,7 @@ pub(super) fn validate_sink_expectations( let definitions = expanded_definitions(&sink.inputs, config, &mut cache); // Validate each individual definition against the sink requirement. - for definition in definitions { + for (_output, definition) in definitions { if let Err(err) = requirement.validate(&definition, config.schema.validation) { errors.append( &mut err @@ -257,15 +264,15 @@ pub(super) fn validate_sink_expectations( pub trait ComponentContainer { fn schema_enabled(&self) -> bool; - fn source_outputs(&self, key: &ComponentKey) -> Option>; + fn source_outputs(&self, key: &ComponentKey) -> Option>; fn transform_inputs(&self, key: &ComponentKey) -> Option<&[OutputId]>; fn transform_outputs( &self, key: &ComponentKey, - merged_definition: &Definition, - ) -> Option>; + input_definitions: &[(OutputId, Definition)], + ) -> Option>; /// Gets the transform output for the given port. /// @@ -276,9 +283,9 @@ pub trait ComponentContainer { &self, key: &ComponentKey, port: &Option, - merged_definition: &Definition, - ) -> Result, ()> { - if let Some(outputs) = self.transform_outputs(key, merged_definition) { + input_definitions: &[(OutputId, Definition)], + ) -> Result, ()> { + if let Some(outputs) = self.transform_outputs(key, input_definitions) { Ok(get_output_for_port(outputs, port)) } else { Err(()) @@ -294,16 +301,26 @@ pub trait ComponentContainer { &self, key: &ComponentKey, port: &Option, - ) -> Result, ()> { + ) -> Result, ()> { if let Some(outputs) = self.source_outputs(key) { - Ok(get_output_for_port(outputs, port)) + Ok(get_source_output_for_port(outputs, port)) } else { Err(()) } } } -fn get_output_for_port(outputs: Vec, port: &Option) -> Option { +fn get_output_for_port( + outputs: Vec, + port: &Option, +) -> Option { + outputs.into_iter().find(|output| &output.port == port) +} + +fn get_source_output_for_port( + outputs: Vec, + port: &Option, +) -> Option { outputs.into_iter().find(|output| &output.port == port) } @@ -312,7 +329,7 @@ impl ComponentContainer for Config { self.schema.enabled } - fn source_outputs(&self, key: &ComponentKey) -> Option> { + fn source_outputs(&self, key: &ComponentKey) -> Option> { self.source(key) .map(|source| source.inner.outputs(self.schema.log_namespace())) } @@ -324,12 +341,12 @@ impl ComponentContainer for Config { fn transform_outputs( &self, key: &ComponentKey, - merged_definition: &Definition, - ) -> Option> { + input_definitions: &[(OutputId, Definition)], + ) -> Option> { self.transform(key).map(|source| { source .inner - .outputs(merged_definition, self.schema.log_namespace()) + .outputs(input_definitions, self.schema.log_namespace()) }) } } @@ -339,145 +356,20 @@ mod tests { use std::collections::HashMap; use indexmap::IndexMap; - use lookup::lookup_v2::parse_target_path; use lookup::owned_value_path; use similar_asserts::assert_eq; use value::Kind; - use vector_core::config::{DataType, Output}; + use vector_core::config::{DataType, SourceOutput, TransformOutput}; use super::*; - #[test] - fn test_merged_definition() { - struct TestCase { - inputs: Vec<(&'static str, Option)>, - sources: IndexMap<&'static str, Vec>, - transforms: IndexMap<&'static str, Vec>, - want: Definition, - } - - impl ComponentContainer for TestCase { - fn schema_enabled(&self) -> bool { - true - } - - fn source_outputs(&self, key: &ComponentKey) -> Option> { - self.sources.get(key.id()).cloned() - } - - fn transform_inputs(&self, _key: &ComponentKey) -> Option<&[OutputId]> { - None - } - - fn transform_outputs( - &self, - key: &ComponentKey, - _merged_definition: &Definition, - ) -> Option> { - self.transforms.get(key.id()).cloned() - } - } - - for (title, case) in HashMap::from([ - ( - "no inputs", - TestCase { - inputs: vec![], - sources: IndexMap::default(), - transforms: IndexMap::default(), - want: Definition::default_legacy_namespace(), - }, - ), - ( - "single input, source with empty schema", - TestCase { - inputs: vec![("foo", None)], - sources: IndexMap::from([("foo", vec![Output::default(DataType::all())])]), - transforms: IndexMap::default(), - want: Definition::default_legacy_namespace(), - }, - ), - ( - "single input, source with schema", - TestCase { - inputs: vec![("source-foo", None)], - sources: IndexMap::from([( - "source-foo", - vec![Output::default(DataType::all()).with_schema_definition( - Definition::empty_legacy_namespace().with_event_field( - &owned_value_path!("foo"), - Kind::integer().or_bytes(), - Some("foo bar"), - ), - )], - )]), - transforms: IndexMap::default(), - want: Definition::empty_legacy_namespace().with_event_field( - &owned_value_path!("foo"), - Kind::integer().or_bytes(), - Some("foo bar"), - ), - }, - ), - ( - "multiple inputs, sources with schema", - TestCase { - inputs: vec![("source-foo", None), ("source-bar", None)], - sources: IndexMap::from([ - ( - "source-foo", - vec![Output::default(DataType::all()).with_schema_definition( - Definition::empty_legacy_namespace().with_event_field( - &owned_value_path!("foo"), - Kind::integer().or_bytes(), - Some("foo bar"), - ), - )], - ), - ( - "source-bar", - vec![Output::default(DataType::all()).with_schema_definition( - Definition::empty_legacy_namespace().with_event_field( - &owned_value_path!("foo"), - Kind::timestamp(), - Some("baz qux"), - ), - )], - ), - ]), - transforms: IndexMap::default(), - want: Definition::empty_legacy_namespace() - .with_event_field( - &owned_value_path!("foo"), - Kind::integer().or_bytes().or_timestamp(), - Some("foo bar"), - ) - .with_meaning(parse_target_path("foo").unwrap(), "baz qux"), - }, - ), - ]) { - let inputs = case - .inputs - .iter() - .cloned() - .map(|(key, port)| OutputId { - component: key.into(), - port, - }) - .collect::>(); - - let got = merged_definition(&inputs, &case, &mut HashMap::default()); - assert_eq!(got, case.want, "{}", title); - } - } - #[test] fn test_expanded_definition() { struct TestCase { inputs: Vec<(&'static str, Option)>, - sources: IndexMap<&'static str, Vec>, - transforms: IndexMap<&'static str, (Vec, Vec)>, - want: Vec, + sources: IndexMap<&'static str, Vec>, + transforms: IndexMap<&'static str, (Vec, Vec)>, + want: Vec<(OutputId, Definition)>, } impl ComponentContainer for TestCase { @@ -485,7 +377,7 @@ mod tests { true } - fn source_outputs(&self, key: &ComponentKey) -> Option> { + fn source_outputs(&self, key: &ComponentKey) -> Option> { self.sources.get(key.id()).cloned() } @@ -496,8 +388,8 @@ mod tests { fn transform_outputs( &self, key: &ComponentKey, - _merged_definition: &Definition, - ) -> Option> { + _input_definitions: &[(OutputId, Definition)], + ) -> Option> { self.transforms.get(key.id()).cloned().map(|v| v.1) } } @@ -516,9 +408,15 @@ mod tests { "single input, source with default schema", TestCase { inputs: vec![("foo", None)], - sources: IndexMap::from([("foo", vec![Output::default(DataType::all())])]), + sources: IndexMap::from([( + "foo", + vec![SourceOutput::new_logs( + DataType::all(), + Definition::default_legacy_namespace(), + )], + )]), transforms: IndexMap::default(), - want: vec![Definition::default_legacy_namespace()], + want: vec![("foo".into(), Definition::default_legacy_namespace())], }, ), ( @@ -527,7 +425,8 @@ mod tests { inputs: vec![("source-foo", None)], sources: IndexMap::from([( "source-foo", - vec![Output::default(DataType::all()).with_schema_definition( + vec![SourceOutput::new_logs( + DataType::all(), Definition::empty_legacy_namespace().with_event_field( &owned_value_path!("foo"), Kind::integer().or_bytes(), @@ -536,10 +435,13 @@ mod tests { )], )]), transforms: IndexMap::default(), - want: vec![Definition::empty_legacy_namespace().with_event_field( - &owned_value_path!("foo"), - Kind::integer().or_bytes(), - Some("foo bar"), + want: vec![( + "source-foo".into(), + Definition::empty_legacy_namespace().with_event_field( + &owned_value_path!("foo"), + Kind::integer().or_bytes(), + Some("foo bar"), + ), )], }, ), @@ -550,7 +452,8 @@ mod tests { sources: IndexMap::from([ ( "source-foo", - vec![Output::default(DataType::all()).with_schema_definition( + vec![SourceOutput::new_logs( + DataType::all(), Definition::empty_legacy_namespace().with_event_field( &owned_value_path!("foo"), Kind::integer().or_bytes(), @@ -560,7 +463,8 @@ mod tests { ), ( "source-bar", - vec![Output::default(DataType::all()).with_schema_definition( + vec![SourceOutput::new_logs( + DataType::all(), Definition::empty_legacy_namespace().with_event_field( &owned_value_path!("foo"), Kind::timestamp(), @@ -571,15 +475,21 @@ mod tests { ]), transforms: IndexMap::default(), want: vec![ - Definition::empty_legacy_namespace().with_event_field( - &owned_value_path!("foo"), - Kind::integer().or_bytes(), - Some("foo bar"), + ( + "source-foo".into(), + Definition::empty_legacy_namespace().with_event_field( + &owned_value_path!("foo"), + Kind::integer().or_bytes(), + Some("foo bar"), + ), ), - Definition::empty_legacy_namespace().with_event_field( - &owned_value_path!("foo"), - Kind::timestamp(), - Some("baz qux"), + ( + "source-bar".into(), + Definition::empty_legacy_namespace().with_event_field( + &owned_value_path!("foo"), + Kind::timestamp(), + Some("baz qux"), + ), ), ], }, @@ -591,7 +501,8 @@ mod tests { sources: IndexMap::from([ ( "source-foo", - vec![Output::default(DataType::all()).with_schema_definition( + vec![SourceOutput::new_logs( + DataType::all(), Definition::empty_legacy_namespace().with_event_field( &owned_value_path!("foo"), Kind::boolean(), @@ -601,7 +512,8 @@ mod tests { ), ( "source-bar", - vec![Output::default(DataType::all()).with_schema_definition( + vec![SourceOutput::new_logs( + DataType::all(), Definition::empty_legacy_namespace().with_event_field( &owned_value_path!("bar"), Kind::integer(), @@ -614,25 +526,32 @@ mod tests { "transform-baz", ( vec![OutputId::from("source-foo")], - vec![Output::default(DataType::all()).with_schema_definition( - Definition::empty_legacy_namespace().with_event_field( + vec![TransformOutput::new( + DataType::all(), + vec![Definition::empty_legacy_namespace().with_event_field( &owned_value_path!("baz"), Kind::regex(), Some("baz"), - ), + )], )], ), )]), want: vec![ - Definition::empty_legacy_namespace().with_event_field( - &owned_value_path!("bar"), - Kind::integer(), - Some("bar"), + ( + "source-bar".into(), + Definition::empty_legacy_namespace().with_event_field( + &owned_value_path!("bar"), + Kind::integer(), + Some("bar"), + ), ), - Definition::empty_legacy_namespace().with_event_field( - &owned_value_path!("baz"), - Kind::regex(), - Some("baz"), + ( + "transform-baz".into(), + Definition::empty_legacy_namespace().with_event_field( + &owned_value_path!("baz"), + Kind::regex(), + Some("baz"), + ), ), ], }, @@ -652,7 +571,8 @@ mod tests { sources: IndexMap::from([ ( "Source 1", - vec![Output::default(DataType::all()).with_schema_definition( + vec![SourceOutput::new_logs( + DataType::all(), Definition::empty_legacy_namespace().with_event_field( &owned_value_path!("source-1"), Kind::boolean(), @@ -662,7 +582,8 @@ mod tests { ), ( "Source 2", - vec![Output::default(DataType::all()).with_schema_definition( + vec![SourceOutput::new_logs( + DataType::all(), Definition::empty_legacy_namespace().with_event_field( &owned_value_path!("source-2"), Kind::integer(), @@ -676,12 +597,13 @@ mod tests { "Transform 1", ( vec![OutputId::from("Source 1")], - vec![Output::default(DataType::all()).with_schema_definition( - Definition::empty_legacy_namespace().with_event_field( + vec![TransformOutput::new( + DataType::all(), + vec![Definition::empty_legacy_namespace().with_event_field( &owned_value_path!("transform-1"), Kind::regex(), None, - ), + )], )], ), ), @@ -689,12 +611,13 @@ mod tests { "Transform 2", ( vec![OutputId::from("Source 2")], - vec![Output::default(DataType::all()).with_schema_definition( - Definition::empty_legacy_namespace().with_event_field( + vec![TransformOutput::new( + DataType::all(), + vec![Definition::empty_legacy_namespace().with_event_field( &owned_value_path!("transform-2"), Kind::float().or_null(), Some("transform-2"), - ), + )], )], ), ), @@ -702,12 +625,13 @@ mod tests { "Transform 3", ( vec![OutputId::from("Source 2")], - vec![Output::default(DataType::all()).with_schema_definition( - Definition::empty_legacy_namespace().with_event_field( + vec![TransformOutput::new( + DataType::all(), + vec![Definition::empty_legacy_namespace().with_event_field( &owned_value_path!("transform-3"), Kind::integer(), Some("transform-3"), - ), + )], )], ), ), @@ -715,12 +639,13 @@ mod tests { "Transform 4", ( vec![OutputId::from("Source 2")], - vec![Output::default(DataType::all()).with_schema_definition( - Definition::empty_legacy_namespace().with_event_field( + vec![TransformOutput::new( + DataType::all(), + vec![Definition::empty_legacy_namespace().with_event_field( &owned_value_path!("transform-4"), Kind::timestamp().or_bytes(), Some("transform-4"), - ), + )], )], ), ), @@ -728,40 +653,44 @@ mod tests { "Transform 5", ( vec![OutputId::from("Transform 3"), OutputId::from("Transform 4")], - vec![Output::default(DataType::all()).with_schema_definition( - Definition::empty_legacy_namespace().with_event_field( + vec![TransformOutput::new( + DataType::all(), + vec![Definition::empty_legacy_namespace().with_event_field( &owned_value_path!("transform-5"), Kind::boolean(), Some("transform-5"), - ), + )], )], ), ), ]), want: vec![ // Pipeline 1 - Definition::empty_legacy_namespace().with_event_field( - &owned_value_path!("transform-1"), - Kind::regex(), - None, + ( + "Transform 1".into(), + Definition::empty_legacy_namespace().with_event_field( + &owned_value_path!("transform-1"), + Kind::regex(), + None, + ), ), // Pipeline 2 - Definition::empty_legacy_namespace().with_event_field( - &owned_value_path!("transform-2"), - Kind::float().or_null(), - Some("transform-2"), + ( + "Transform 2".into(), + Definition::empty_legacy_namespace().with_event_field( + &owned_value_path!("transform-2"), + Kind::float().or_null(), + Some("transform-2"), + ), ), // Pipeline 3 - Definition::empty_legacy_namespace().with_event_field( - &owned_value_path!("transform-5"), - Kind::boolean(), - Some("transform-5"), - ), - // Pipeline 4 - Definition::empty_legacy_namespace().with_event_field( - &owned_value_path!("transform-5"), - Kind::boolean(), - Some("transform-5"), + ( + "Transform 5".into(), + Definition::empty_legacy_namespace().with_event_field( + &owned_value_path!("transform-5"), + Kind::boolean(), + Some("transform-5"), + ), ), ], }, diff --git a/src/transforms/aggregate.rs b/src/transforms/aggregate.rs index 31673bfd6bd71..fdeb73eaa00a8 100644 --- a/src/transforms/aggregate.rs +++ b/src/transforms/aggregate.rs @@ -10,7 +10,7 @@ use vector_config::configurable_component; use vector_core::config::LogNamespace; use crate::{ - config::{DataType, Input, Output, TransformConfig, TransformContext}, + config::{DataType, Input, OutputId, TransformConfig, TransformContext, TransformOutput}, event::{metric, Event, EventMetadata}, internal_events::{AggregateEventRecorded, AggregateFlushed, AggregateUpdateFailed}, schema, @@ -46,8 +46,12 @@ impl TransformConfig for AggregateConfig { Input::metric() } - fn outputs(&self, _: &schema::Definition, _: LogNamespace) -> Vec { - vec![Output::default(DataType::Metric)] + fn outputs( + &self, + _: &[(OutputId, schema::Definition)], + _: LogNamespace, + ) -> Vec { + vec![TransformOutput::new(DataType::Metric, vec![])] } } diff --git a/src/transforms/aws_ec2_metadata.rs b/src/transforms/aws_ec2_metadata.rs index 698c92c8d5268..560e82ea917be 100644 --- a/src/transforms/aws_ec2_metadata.rs +++ b/src/transforms/aws_ec2_metadata.rs @@ -18,8 +18,9 @@ use value::Kind; use vector_config::configurable_component; use vector_core::config::LogNamespace; +use crate::config::OutputId; use crate::{ - config::{DataType, Input, Output, ProxyConfig, TransformConfig, TransformContext}, + config::{DataType, Input, ProxyConfig, TransformConfig, TransformContext, TransformOutput}, event::Event, http::HttpClient, internal_events::{AwsEc2MetadataRefreshError, AwsEc2MetadataRefreshSuccessful}, @@ -243,7 +244,11 @@ impl TransformConfig for Ec2Metadata { Input::new(DataType::Metric | DataType::Log) } - fn outputs(&self, merged_definition: &schema::Definition, _: LogNamespace) -> Vec { + fn outputs( + &self, + input_definitions: &[(OutputId, schema::Definition)], + _: LogNamespace, + ) -> Vec { let added_keys = Keys::new(self.namespace.clone()); let paths = [ @@ -263,15 +268,24 @@ impl TransformConfig for Ec2Metadata { &added_keys.tags_key.log_path, ]; - let mut schema_definition = merged_definition.clone(); + let schema_definition = input_definitions + .iter() + .map(|(_output, definition)| { + let mut schema_definition = definition.clone(); - for path in paths { - schema_definition = - schema_definition.with_field(path, Kind::bytes().or_undefined(), None); - } + for path in paths { + schema_definition = + schema_definition.with_field(path, Kind::bytes().or_undefined(), None); + } + + schema_definition + }) + .collect(); - vec![Output::default(DataType::Metric | DataType::Log) - .with_schema_definition(schema_definition)] + vec![TransformOutput::new( + DataType::Metric | DataType::Log, + schema_definition, + )] } } diff --git a/src/transforms/dedupe.rs b/src/transforms/dedupe.rs index 9a16cb9c6d824..c0fe99f0204cd 100644 --- a/src/transforms/dedupe.rs +++ b/src/transforms/dedupe.rs @@ -8,7 +8,8 @@ use vector_core::config::LogNamespace; use crate::{ config::{ - log_schema, DataType, GenerateConfig, Input, Output, TransformConfig, TransformContext, + log_schema, DataType, GenerateConfig, Input, OutputId, TransformConfig, TransformContext, + TransformOutput, }, event::{Event, Value}, internal_events::DedupeEventsDropped, @@ -152,8 +153,18 @@ impl TransformConfig for DedupeConfig { Input::log() } - fn outputs(&self, merged_definition: &schema::Definition, _: LogNamespace) -> Vec { - vec![Output::default(DataType::Log).with_schema_definition(merged_definition.clone())] + fn outputs( + &self, + input_definitions: &[(OutputId, schema::Definition)], + _: LogNamespace, + ) -> Vec { + vec![TransformOutput::new( + DataType::Log, + input_definitions + .iter() + .map(|(_output, definition)| definition.clone()) + .collect(), + )] } } diff --git a/src/transforms/filter.rs b/src/transforms/filter.rs index 212b38622f61c..f0bed3c180bc6 100644 --- a/src/transforms/filter.rs +++ b/src/transforms/filter.rs @@ -4,7 +4,10 @@ use vector_core::config::LogNamespace; use crate::{ conditions::{AnyCondition, Condition}, - config::{DataType, GenerateConfig, Input, Output, TransformConfig, TransformContext}, + config::{ + DataType, GenerateConfig, Input, OutputId, TransformConfig, TransformContext, + TransformOutput, + }, event::Event, internal_events::FilterEventsDropped, schema, @@ -48,8 +51,18 @@ impl TransformConfig for FilterConfig { Input::all() } - fn outputs(&self, merged_definition: &schema::Definition, _: LogNamespace) -> Vec { - vec![Output::default(DataType::all()).with_schema_definition(merged_definition.clone())] + fn outputs( + &self, + input_definitions: &[(OutputId, schema::Definition)], + _: LogNamespace, + ) -> Vec { + vec![TransformOutput::new( + DataType::all(), + input_definitions + .iter() + .map(|(_output, definition)| definition.clone()) + .collect(), + )] } fn enable_concurrency(&self) -> bool { diff --git a/src/transforms/log_to_metric.rs b/src/transforms/log_to_metric.rs index 40f4fb6ba44c0..86e2eb9eb30e6 100644 --- a/src/transforms/log_to_metric.rs +++ b/src/transforms/log_to_metric.rs @@ -6,7 +6,10 @@ use vector_config::configurable_component; use vector_core::config::LogNamespace; use crate::{ - config::{DataType, GenerateConfig, Input, Output, TransformConfig, TransformContext}, + config::{ + DataType, GenerateConfig, Input, OutputId, TransformConfig, TransformContext, + TransformOutput, + }, event::{ metric::{Metric, MetricKind, MetricTags, MetricValue, StatisticKind, TagValue}, Event, Value, @@ -154,8 +157,13 @@ impl TransformConfig for LogToMetricConfig { Input::log() } - fn outputs(&self, _: &schema::Definition, _: LogNamespace) -> Vec { - vec![Output::default(DataType::Metric)] + fn outputs( + &self, + _: &[(OutputId, schema::Definition)], + _: LogNamespace, + ) -> Vec { + // Converting the log to a metric means we lose all incoming `Definition`s. + vec![TransformOutput::new(DataType::Metric, Vec::new())] } fn enable_concurrency(&self) -> bool { diff --git a/src/transforms/lua/mod.rs b/src/transforms/lua/mod.rs index 6953e9f7c30db..ab43bc911b948 100644 --- a/src/transforms/lua/mod.rs +++ b/src/transforms/lua/mod.rs @@ -5,7 +5,7 @@ use vector_config::configurable_component; use vector_core::config::LogNamespace; use crate::{ - config::{GenerateConfig, Input, Output, TransformConfig, TransformContext}, + config::{GenerateConfig, Input, OutputId, TransformConfig, TransformContext, TransformOutput}, schema, transforms::Transform, }; @@ -103,10 +103,14 @@ impl TransformConfig for LuaConfig { } } - fn outputs(&self, merged_definition: &schema::Definition, _: LogNamespace) -> Vec { + fn outputs( + &self, + input_definitions: &[(OutputId, schema::Definition)], + _: LogNamespace, + ) -> Vec { match self { - LuaConfig::V1(v1) => v1.config.outputs(merged_definition), - LuaConfig::V2(v2) => v2.config.outputs(merged_definition), + LuaConfig::V1(v1) => v1.config.outputs(input_definitions), + LuaConfig::V2(v2) => v2.config.outputs(input_definitions), } } } diff --git a/src/transforms/lua/v1/mod.rs b/src/transforms/lua/v1/mod.rs index 73a165970c44e..efab62a686dcd 100644 --- a/src/transforms/lua/v1/mod.rs +++ b/src/transforms/lua/v1/mod.rs @@ -5,9 +5,10 @@ use ordered_float::NotNan; use snafu::{ResultExt, Snafu}; use vector_config::configurable_component; +use crate::config::OutputId; use crate::schema::Definition; use crate::{ - config::{DataType, Input, Output}, + config::{DataType, Input, TransformOutput}, event::{Event, Value}, internal_events::{LuaGcTriggered, LuaScriptError}, schema, @@ -47,11 +48,19 @@ impl LuaConfig { Input::log() } - pub fn outputs(&self, merged_definition: &schema::Definition) -> Vec { + pub fn outputs( + &self, + input_definitions: &[(OutputId, schema::Definition)], + ) -> Vec { // Lua causes the type definition to be reset - let definition = Definition::default_for_namespace(merged_definition.log_namespaces()); + let namespaces = input_definitions + .iter() + .flat_map(|(_output, definition)| definition.log_namespaces().clone()) + .collect(); + + let definition = Definition::default_for_namespace(&namespaces); - vec![Output::default(DataType::Log).with_schema_definition(definition)] + vec![TransformOutput::new(DataType::Log, vec![definition])] } } diff --git a/src/transforms/lua/v2/mod.rs b/src/transforms/lua/v2/mod.rs index 13f9d76e503ac..88bf5fd2086f4 100644 --- a/src/transforms/lua/v2/mod.rs +++ b/src/transforms/lua/v2/mod.rs @@ -7,10 +7,11 @@ use vector_config::configurable_component; pub use vector_core::event::lua; use vector_core::transform::runtime_transform::{RuntimeTransform, Timer}; +use crate::config::OutputId; use crate::event::lua::event::LuaEvent; use crate::schema::Definition; use crate::{ - config::{self, DataType, Input, Output, CONFIG_PATHS}, + config::{self, DataType, Input, TransformOutput, CONFIG_PATHS}, event::Event, internal_events::{LuaBuildError, LuaGcTriggered}, schema, @@ -177,11 +178,22 @@ impl LuaConfig { Input::new(DataType::Metric | DataType::Log) } - pub fn outputs(&self, merged_definition: &schema::Definition) -> Vec { + pub fn outputs( + &self, + input_definitions: &[(OutputId, schema::Definition)], + ) -> Vec { // Lua causes the type definition to be reset - let definition = Definition::default_for_namespace(merged_definition.log_namespaces()); + let namespaces = input_definitions + .iter() + .flat_map(|(_output, definition)| definition.log_namespaces().clone()) + .collect(); + + let definition = Definition::default_for_namespace(&namespaces); - vec![Output::default(DataType::Metric | DataType::Log).with_schema_definition(definition)] + vec![TransformOutput::new( + DataType::Metric | DataType::Log, + vec![definition], + )] } } diff --git a/src/transforms/metric_to_log.rs b/src/transforms/metric_to_log.rs index f3d7e8a0e95e0..155f32ae48c7b 100644 --- a/src/transforms/metric_to_log.rs +++ b/src/transforms/metric_to_log.rs @@ -11,9 +11,11 @@ use vector_config::configurable_component; use vector_core::config::LogNamespace; use vrl::prelude::BTreeMap; +use crate::config::OutputId; use crate::{ config::{ - log_schema, DataType, GenerateConfig, Input, Output, TransformConfig, TransformContext, + log_schema, DataType, GenerateConfig, Input, TransformConfig, TransformContext, + TransformOutput, }, event::{self, Event, LogEvent, Metric}, internal_events::MetricToLogSerializeError, @@ -90,7 +92,11 @@ impl TransformConfig for MetricToLogConfig { Input::metric() } - fn outputs(&self, _: &Definition, global_log_namespace: LogNamespace) -> Vec { + fn outputs( + &self, + _: &[(OutputId, Definition)], + global_log_namespace: LogNamespace, + ) -> Vec { let log_namespace = global_log_namespace.merge(self.log_namespace); let mut schema_definition = Definition::default_for_namespace(&BTreeSet::from([log_namespace])) @@ -223,7 +229,7 @@ impl TransformConfig for MetricToLogConfig { } } - vec![Output::default(DataType::Log).with_schema_definition(schema_definition)] + vec![TransformOutput::new(DataType::Log, vec![schema_definition])] } fn enable_concurrency(&self) -> bool { diff --git a/src/transforms/reduce/mod.rs b/src/transforms/reduce/mod.rs index ce3f41afe6f83..618f44fcbb1f2 100644 --- a/src/transforms/reduce/mod.rs +++ b/src/transforms/reduce/mod.rs @@ -14,9 +14,10 @@ use lookup::PathPrefix; use serde_with::serde_as; use vector_config::configurable_component; +use crate::config::OutputId; use crate::{ conditions::{AnyCondition, Condition}, - config::{DataType, Input, Output, TransformConfig, TransformContext}, + config::{DataType, Input, TransformConfig, TransformContext, TransformOutput}, event::{discriminant::Discriminant, Event, EventMetadata, LogEvent}, internal_events::ReduceStaleEventFlushed, schema, @@ -124,91 +125,102 @@ impl TransformConfig for ReduceConfig { Input::log() } - fn outputs(&self, input: &schema::Definition, _: LogNamespace) -> Vec { - let mut schema_definition = input.clone(); - - for (key, merge_strategy) in self.merge_strategies.iter() { - let key = if let Ok(key) = parse_target_path(key) { - key - } else { - continue; - }; - - let input_kind = match key.prefix { - PathPrefix::Event => schema_definition.event_kind().at_path(&key.path), - PathPrefix::Metadata => schema_definition.metadata_kind().at_path(&key.path), - }; - - let new_kind = match merge_strategy { - MergeStrategy::Discard | MergeStrategy::Retain => { - /* does not change the type */ - input_kind.clone() - } - MergeStrategy::Sum | MergeStrategy::Max | MergeStrategy::Min => { - // only keeps integer / float values - match (input_kind.contains_integer(), input_kind.contains_float()) { - (true, true) => Kind::float().or_integer(), - (true, false) => Kind::integer(), - (false, true) => Kind::float(), - (false, false) => Kind::undefined(), - } - } - MergeStrategy::Array => { - let unknown_kind = input_kind.clone(); - Kind::array(Collection::empty().with_unknown(unknown_kind)) - } - MergeStrategy::Concat => { - let mut new_kind = Kind::never(); + fn outputs( + &self, + input_definitions: &[(OutputId, schema::Definition)], + _: LogNamespace, + ) -> Vec { + let mut output_definitions = Vec::new(); + + for (_output, input) in input_definitions { + let mut schema_definition = input.clone(); + + for (key, merge_strategy) in self.merge_strategies.iter() { + let key = if let Ok(key) = parse_target_path(key) { + key + } else { + continue; + }; + + let input_kind = match key.prefix { + PathPrefix::Event => schema_definition.event_kind().at_path(&key.path), + PathPrefix::Metadata => schema_definition.metadata_kind().at_path(&key.path), + }; - if input_kind.contains_bytes() { - new_kind.add_bytes(); + let new_kind = match merge_strategy { + MergeStrategy::Discard | MergeStrategy::Retain => { + /* does not change the type */ + input_kind.clone() } - if let Some(array) = input_kind.as_array() { - // array elements can be either any type that the field can be, or any - // element of the array - let array_elements = array.reduced_kind().union(input_kind.without_array()); - new_kind.add_array(Collection::empty().with_unknown(array_elements)); + MergeStrategy::Sum | MergeStrategy::Max | MergeStrategy::Min => { + // only keeps integer / float values + match (input_kind.contains_integer(), input_kind.contains_float()) { + (true, true) => Kind::float().or_integer(), + (true, false) => Kind::integer(), + (false, true) => Kind::float(), + (false, false) => Kind::undefined(), + } } - new_kind - } - MergeStrategy::ConcatNewline | MergeStrategy::ConcatRaw => { - // can only produce bytes (or undefined) - if input_kind.contains_bytes() { - Kind::bytes() - } else { - Kind::undefined() + MergeStrategy::Array => { + let unknown_kind = input_kind.clone(); + Kind::array(Collection::empty().with_unknown(unknown_kind)) } - } - MergeStrategy::ShortestArray | MergeStrategy::LongestArray => { - if let Some(array) = input_kind.as_array() { - Kind::array(array.clone()) - } else { - Kind::undefined() + MergeStrategy::Concat => { + let mut new_kind = Kind::never(); + + if input_kind.contains_bytes() { + new_kind.add_bytes(); + } + if let Some(array) = input_kind.as_array() { + // array elements can be either any type that the field can be, or any + // element of the array + let array_elements = + array.reduced_kind().union(input_kind.without_array()); + new_kind.add_array(Collection::empty().with_unknown(array_elements)); + } + new_kind } - } - MergeStrategy::FlatUnique => { - let mut array_elements = input_kind.without_array().without_object(); - if let Some(array) = input_kind.as_array() { - array_elements = array_elements.union(array.reduced_kind()); + MergeStrategy::ConcatNewline | MergeStrategy::ConcatRaw => { + // can only produce bytes (or undefined) + if input_kind.contains_bytes() { + Kind::bytes() + } else { + Kind::undefined() + } } - if let Some(object) = input_kind.as_object() { - array_elements = array_elements.union(object.reduced_kind()); + MergeStrategy::ShortestArray | MergeStrategy::LongestArray => { + if let Some(array) = input_kind.as_array() { + Kind::array(array.clone()) + } else { + Kind::undefined() + } } - Kind::array(Collection::empty().with_unknown(array_elements)) - } - }; + MergeStrategy::FlatUnique => { + let mut array_elements = input_kind.without_array().without_object(); + if let Some(array) = input_kind.as_array() { + array_elements = array_elements.union(array.reduced_kind()); + } + if let Some(object) = input_kind.as_object() { + array_elements = array_elements.union(object.reduced_kind()); + } + Kind::array(Collection::empty().with_unknown(array_elements)) + } + }; - // all of the merge strategies are optional. They won't produce a value unless a value actually exists - let new_kind = if input_kind.contains_undefined() { - new_kind.or_undefined() - } else { - new_kind - }; + // all of the merge strategies are optional. They won't produce a value unless a value actually exists + let new_kind = if input_kind.contains_undefined() { + new_kind.or_undefined() + } else { + new_kind + }; + + schema_definition = schema_definition.with_field(&key, new_kind, None); + } - schema_definition = schema_definition.with_field(&key, new_kind, None); + output_definitions.push(schema_definition); } - vec![Output::default(DataType::Log).with_schema_definition(schema_definition)] + vec![TransformOutput::new(DataType::Log, output_definitions)] } } @@ -502,13 +514,12 @@ group_by = [ "request_id" ] Kind::bytes().or_undefined(), None, ); - let schema_definition = reduce_config - .outputs(&input_definition, LogNamespace::Legacy) + let schema_definitions = reduce_config + .outputs(&[("test".into(), input_definition)], LogNamespace::Legacy) .first() .unwrap() - .log_schema_definition - .clone() - .unwrap(); + .log_schema_definitions + .clone(); let (tx, rx) = mpsc::channel(1); let (topology, mut out) = create_topology(ReceiverStream::new(rx), reduce_config).await; @@ -546,14 +557,18 @@ group_by = [ "request_id" ] assert_eq!(output_1["message"], "test message 1".into()); assert_eq!(output_1["counter"], Value::from(8)); assert_eq!(output_1.metadata(), &metadata_1); - schema_definition.assert_valid_for_event(&output_1.into()); + schema_definitions + .iter() + .for_each(|definition| definition.assert_valid_for_event(&output_1.clone().into())); let output_2 = out.recv().await.unwrap().into_log(); assert_eq!(output_2["message"], "test message 2".into()); assert_eq!(output_2["extra_field"], "value1".into()); assert_eq!(output_2["counter"], Value::from(7)); assert_eq!(output_2.metadata(), &metadata_2); - schema_definition.assert_valid_for_event(&output_2.into()); + schema_definitions + .iter() + .for_each(|definition| definition.assert_valid_for_event(&output_2.clone().into())); drop(tx); topology.stop().await; diff --git a/src/transforms/remap.rs b/src/transforms/remap.rs index 1083634beeb6b..b1b35823175dd 100644 --- a/src/transforms/remap.rs +++ b/src/transforms/remap.rs @@ -24,9 +24,11 @@ use vrl::{ CompileConfig, Program, Runtime, Terminate, VrlRuntime, }; +use crate::config::OutputId; use crate::{ config::{ - log_schema, ComponentKey, DataType, Input, Output, TransformConfig, TransformContext, + log_schema, ComponentKey, DataType, Input, TransformConfig, TransformContext, + TransformOutput, }, event::{Event, TargetEvents, VrlTarget}, internal_events::{RemapMappingAbort, RemapMappingError}, @@ -221,92 +223,131 @@ impl TransformConfig for RemapConfig { Input::all() } - fn outputs(&self, input_definition: &schema::Definition, _: LogNamespace) -> Vec { + fn outputs( + &self, + input_definitions: &[(OutputId, schema::Definition)], + _: LogNamespace, + ) -> Vec { + let merged_definition: Definition = input_definitions + .iter() + .map(|(_output, definition)| definition.clone()) + .reduce(Definition::merge) + .unwrap_or_else(Definition::any); + // We need to compile the VRL program in order to know the schema definition output of this // transform. We ignore any compilation errors, as those are caught by the transform build // step. - let default_definition = self - .compile_vrl_program( - enrichment::TableRegistry::default(), - input_definition.clone(), - ) + let compiled = self + .compile_vrl_program(enrichment::TableRegistry::default(), merged_definition) .map(|(program, _, _, external_context)| { - // Apply any semantic meanings set in the VRL program - let meaning = external_context - .get_custom::() - .cloned() - .expect("context exists") - .0; - - let state = program.final_type_state(); - - let mut new_type_def = Definition::new_with_default_metadata( - state.external.target_kind().clone(), - input_definition.log_namespaces().clone(), - ); - for (id, path) in meaning { - // currently only event paths are supported - new_type_def = new_type_def.with_meaning(OwnedTargetPath::event(path), &id); - } - new_type_def - }) - .unwrap_or_else(|_| { - Definition::new_with_default_metadata( - // The program failed to compile, so it can "never" return a value - Kind::never(), - input_definition.log_namespaces().clone(), + ( + program.final_type_state(), + external_context + .get_custom::() + .cloned() + .expect("context exists") + .0, ) - }); - - // When a message is dropped and re-routed, we keep the original event, but also annotate - // it with additional metadata. - let mut dropped_definition = Definition::new_with_default_metadata( - Kind::never(), - input_definition.log_namespaces().clone(), - ); + }) + .map_err(|_| ()); + + let mut dropped_definitions = Vec::new(); + let mut default_definitions = Vec::new(); + + for (_output_id, input_definition) in input_definitions { + let default_definition = compiled + .clone() + .map(|(state, meaning)| { + let mut new_type_def = Definition::new( + state.external.target_kind().clone(), + state.external.metadata_kind().clone(), + input_definition.log_namespaces().clone(), + ); - if input_definition - .log_namespaces() - .contains(&LogNamespace::Legacy) - { - dropped_definition = - dropped_definition.merge(input_definition.clone().with_event_field( - &parse_value_path(log_schema().metadata_key()).expect("valid metadata key"), - Kind::object(BTreeMap::from([ - ("reason".into(), Kind::bytes()), - ("message".into(), Kind::bytes()), - ("component_id".into(), Kind::bytes()), - ("component_type".into(), Kind::bytes()), - ("component_kind".into(), Kind::bytes()), - ])), - Some("metadata"), - )); - } + for (id, path) in input_definition.meanings() { + // Attempt to copy over the meanings from the input definition. + // The function will fail if the meaning that now points to a field that no longer exists, + // this is fine since we will no longer want that meaning in the output definition. + let _ = new_type_def.try_with_meaning(path.clone(), id); + } - if input_definition - .log_namespaces() - .contains(&LogNamespace::Vector) - { - dropped_definition = dropped_definition.merge( - input_definition - .clone() - .with_metadata_field(&owned_value_path!("reason"), Kind::bytes(), None) - .with_metadata_field(&owned_value_path!("message"), Kind::bytes(), None) - .with_metadata_field(&owned_value_path!("component_id"), Kind::bytes(), None) - .with_metadata_field(&owned_value_path!("component_type"), Kind::bytes(), None) - .with_metadata_field(&owned_value_path!("component_kind"), Kind::bytes(), None), + // Apply any semantic meanings set in the VRL program + for (id, path) in meaning { + // currently only event paths are supported + new_type_def = new_type_def.with_meaning(OwnedTargetPath::event(path), &id); + } + new_type_def + }) + .unwrap_or_else(|_| { + Definition::new_with_default_metadata( + // The program failed to compile, so it can "never" return a value + Kind::never(), + input_definition.log_namespaces().clone(), + ) + }); + + // When a message is dropped and re-routed, we keep the original event, but also annotate + // it with additional metadata. + let mut dropped_definition = Definition::new_with_default_metadata( + Kind::never(), + input_definition.log_namespaces().clone(), ); + + if input_definition + .log_namespaces() + .contains(&LogNamespace::Legacy) + { + dropped_definition = + dropped_definition.merge(input_definition.clone().with_event_field( + &parse_value_path(log_schema().metadata_key()).expect("valid metadata key"), + Kind::object(BTreeMap::from([ + ("reason".into(), Kind::bytes()), + ("message".into(), Kind::bytes()), + ("component_id".into(), Kind::bytes()), + ("component_type".into(), Kind::bytes()), + ("component_kind".into(), Kind::bytes()), + ])), + Some("metadata"), + )); + } + + if input_definition + .log_namespaces() + .contains(&LogNamespace::Vector) + { + dropped_definition = dropped_definition.merge( + input_definition + .clone() + .with_metadata_field(&owned_value_path!("reason"), Kind::bytes(), None) + .with_metadata_field(&owned_value_path!("message"), Kind::bytes(), None) + .with_metadata_field( + &owned_value_path!("component_id"), + Kind::bytes(), + None, + ) + .with_metadata_field( + &owned_value_path!("component_type"), + Kind::bytes(), + None, + ) + .with_metadata_field( + &owned_value_path!("component_kind"), + Kind::bytes(), + None, + ), + ); + } + + default_definitions.push(default_definition); + dropped_definitions.push(dropped_definition); } - let default_output = - Output::default(DataType::all()).with_schema_definition(default_definition); + let default_output = TransformOutput::new(DataType::all(), default_definitions); if self.reroute_dropped { vec![ default_output, - Output::default(DataType::all()) - .with_schema_definition(dropped_definition) - .with_port(DROPPED), + TransformOutput::new(DataType::all(), dropped_definitions).with_port(DROPPED), ] } else { vec![default_output] @@ -401,14 +442,21 @@ where .schema_definitions .get(&None) .expect("default schema required") - .clone(); + // TODO we can now have multiple possible definitions. + // This is going to need to be updated to store these possible definitions and then + // choose the correct one based on the input the event has come from. + .get(0) + .cloned() + .unwrap_or_else(Definition::any); let dropped_schema_definition = context .schema_definitions .get(&Some(DROPPED.to_owned())) .or_else(|| context.schema_definitions.get(&None)) .expect("dropped schema required") - .clone(); + .get(0) + .cloned() + .unwrap_or_else(Definition::any); Ok(Remap { component_key: context.key.clone(), @@ -654,8 +702,11 @@ mod tests { fn remap(config: RemapConfig) -> Result> { let schema_definitions = HashMap::from([ - (None, test_default_schema_definition()), - (Some(DROPPED.to_owned()), test_dropped_schema_definition()), + (None, vec![test_default_schema_definition()]), + ( + Some(DROPPED.to_owned()), + vec![test_dropped_schema_definition()], + ), ]); Remap::new_ast(config, &TransformContext::new_test(schema_definitions)) @@ -1125,8 +1176,11 @@ mod tests { ..Default::default() }; let schema_definitions = HashMap::from([ - (None, test_default_schema_definition()), - (Some(DROPPED.to_owned()), test_dropped_schema_definition()), + (None, vec![test_default_schema_definition()]), + ( + Some(DROPPED.to_owned()), + vec![test_dropped_schema_definition()], + ), ]); let context = TransformContext { key: Some(ComponentKey::from("remapper")), @@ -1383,13 +1437,19 @@ mod tests { assert_eq!( conf.outputs( - &schema::Definition::new_with_default_metadata( - Kind::any_object(), - [LogNamespace::Legacy] - ), + &[( + "test".into(), + schema::Definition::new_with_default_metadata( + Kind::any_object(), + [LogNamespace::Legacy] + ) + )], LogNamespace::Legacy ), - vec![Output::default(DataType::all()).with_schema_definition(schema_definition)] + vec![TransformOutput::new( + DataType::all(), + vec![schema_definition] + )] ); let context = TransformContext { @@ -1454,8 +1514,8 @@ mod tests { fn collect_outputs(ft: &mut dyn SyncTransform, event: Event) -> CollectedOuput { let mut outputs = TransformOutputsBuf::new_with_capacity( vec![ - Output::default(DataType::all()), - Output::default(DataType::all()).with_port(DROPPED), + TransformOutput::new(DataType::all(), vec![]), + TransformOutput::new(DataType::all(), vec![]).with_port(DROPPED), ], 1, ); @@ -1481,8 +1541,8 @@ mod tests { ) -> std::result::Result { let mut outputs = TransformOutputsBuf::new_with_capacity( vec![ - Output::default(DataType::all()), - Output::default(DataType::all()).with_port(DROPPED), + TransformOutput::new(DataType::all(), vec![]), + TransformOutput::new(DataType::all(), vec![]).with_port(DROPPED), ], 1, ); diff --git a/src/transforms/route.rs b/src/transforms/route.rs index b776264f41e2b..1a456daff6b3d 100644 --- a/src/transforms/route.rs +++ b/src/transforms/route.rs @@ -5,7 +5,10 @@ use vector_core::transform::SyncTransform; use crate::{ conditions::{AnyCondition, Condition}, - config::{DataType, GenerateConfig, Input, Output, TransformConfig, TransformContext}, + config::{ + DataType, GenerateConfig, Input, OutputId, TransformConfig, TransformContext, + TransformOutput, + }, event::Event, schema, transforms::Transform, @@ -101,20 +104,34 @@ impl TransformConfig for RouteConfig { } } - fn outputs(&self, merged_definition: &schema::Definition, _: LogNamespace) -> Vec { - let mut result: Vec = self + fn outputs( + &self, + input_definitions: &[(OutputId, schema::Definition)], + _: LogNamespace, + ) -> Vec { + let mut result: Vec = self .route .keys() .map(|output_name| { - Output::default(DataType::all()) - .with_schema_definition(merged_definition.clone()) - .with_port(output_name) + TransformOutput::new( + DataType::all(), + input_definitions + .iter() + .map(|(_output, definition)| definition.clone()) + .collect(), + ) + .with_port(output_name) }) .collect(); result.push( - Output::default(DataType::all()) - .with_schema_definition(merged_definition.clone()) - .with_port(UNMATCHED_ROUTE), + TransformOutput::new( + DataType::all(), + input_definitions + .iter() + .map(|(_output, definition)| definition.clone()) + .collect(), + ) + .with_port(UNMATCHED_ROUTE), ); result } @@ -184,7 +201,7 @@ mod test { output_names .iter() .map(|output_name| { - Output::default(DataType::all()).with_port(output_name.to_owned()) + TransformOutput::new(DataType::all(), vec![]).with_port(output_name.to_owned()) }) .collect(), 1, @@ -225,7 +242,7 @@ mod test { output_names .iter() .map(|output_name| { - Output::default(DataType::all()).with_port(output_name.to_owned()) + TransformOutput::new(DataType::all(), vec![]).with_port(output_name.to_owned()) }) .collect(), 1, @@ -265,7 +282,7 @@ mod test { output_names .iter() .map(|output_name| { - Output::default(DataType::all()).with_port(output_name.to_owned()) + TransformOutput::new(DataType::all(), vec![]).with_port(output_name.to_owned()) }) .collect(), 1, diff --git a/src/transforms/sample.rs b/src/transforms/sample.rs index 45e7ad5a4c5d0..40e16e2e653d9 100644 --- a/src/transforms/sample.rs +++ b/src/transforms/sample.rs @@ -3,7 +3,10 @@ use vector_core::config::LogNamespace; use crate::{ conditions::{AnyCondition, Condition}, - config::{DataType, GenerateConfig, Input, Output, TransformConfig, TransformContext}, + config::{ + DataType, GenerateConfig, Input, OutputId, TransformConfig, TransformContext, + TransformOutput, + }, event::Event, internal_events::SampleEventDiscarded, schema, @@ -66,9 +69,18 @@ impl TransformConfig for SampleConfig { Input::new(DataType::Log | DataType::Trace) } - fn outputs(&self, merged_definition: &schema::Definition, _: LogNamespace) -> Vec { - vec![Output::default(DataType::Log | DataType::Trace) - .with_schema_definition(merged_definition.clone())] + fn outputs( + &self, + input_definitions: &[(OutputId, schema::Definition)], + _: LogNamespace, + ) -> Vec { + vec![TransformOutput::new( + DataType::Log | DataType::Trace, + input_definitions + .iter() + .map(|(_output, definition)| definition.clone()) + .collect(), + )] } } diff --git a/src/transforms/tag_cardinality_limit/config.rs b/src/transforms/tag_cardinality_limit/config.rs index ba38fc0e1dc96..6a83d70d98ff3 100644 --- a/src/transforms/tag_cardinality_limit/config.rs +++ b/src/transforms/tag_cardinality_limit/config.rs @@ -1,4 +1,6 @@ -use crate::config::{DataType, GenerateConfig, Input, Output, TransformConfig, TransformContext}; +use crate::config::{ + DataType, GenerateConfig, Input, OutputId, TransformConfig, TransformContext, TransformOutput, +}; use crate::schema; use crate::transforms::tag_cardinality_limit::TagCardinalityLimit; use crate::transforms::Transform; @@ -108,7 +110,11 @@ impl TransformConfig for TagCardinalityLimitConfig { Input::metric() } - fn outputs(&self, _: &schema::Definition, _: LogNamespace) -> Vec { - vec![Output::default(DataType::Metric)] + fn outputs( + &self, + _: &[(OutputId, schema::Definition)], + _: LogNamespace, + ) -> Vec { + vec![TransformOutput::new(DataType::Metric, vec![])] } } diff --git a/src/transforms/throttle.rs b/src/transforms/throttle.rs index 5d27672ef9ea2..bdc61383db18b 100644 --- a/src/transforms/throttle.rs +++ b/src/transforms/throttle.rs @@ -10,7 +10,7 @@ use vector_core::config::LogNamespace; use crate::{ conditions::{AnyCondition, Condition}, - config::{DataType, Input, Output, TransformConfig, TransformContext}, + config::{DataType, Input, OutputId, TransformConfig, TransformContext, TransformOutput}, event::Event, internal_events::{TemplateRenderingError, ThrottleEventDiscarded}, schema, @@ -59,9 +59,19 @@ impl TransformConfig for ThrottleConfig { Input::log() } - fn outputs(&self, merged_definition: &schema::Definition, _: LogNamespace) -> Vec { + fn outputs( + &self, + input_definitions: &[(OutputId, schema::Definition)], + _: LogNamespace, + ) -> Vec { // The event is not modified, so the definition is passed through as-is - vec![Output::default(DataType::Log).with_schema_definition(merged_definition.clone())] + vec![TransformOutput::new( + DataType::Log, + input_definitions + .iter() + .map(|(_output, definition)| definition.clone()) + .collect(), + )] } } From 51fe35d3ec2036ae6a72749b918e3de7a13ae2c8 Mon Sep 17 00:00:00 2001 From: Stephen Wakely Date: Fri, 14 Apr 2023 10:01:26 +0100 Subject: [PATCH 2/3] Revert "Revert "chore(topology): split `build_pieces` into smaller functions (#17037)"" This reverts commit 0e11bc3d0c801ad123ca7b29d4ddcee4a6f22a88. --- src/topology/builder.rs | 985 +++++++++++++++++++++------------------- 1 file changed, 518 insertions(+), 467 deletions(-) diff --git a/src/topology/builder.rs b/src/topology/builder.rs index b2cdc06a4d8ef..75957944d53fa 100644 --- a/src/topology/builder.rs +++ b/src/topology/builder.rs @@ -12,7 +12,7 @@ use once_cell::sync::Lazy; use stream_cancel::{StreamExt as StreamCancelExt, Trigger, Tripwire}; use tokio::{ select, - sync::oneshot, + sync::{mpsc::UnboundedSender, oneshot}, time::{timeout, Duration}, }; use tracing::Instrument; @@ -71,531 +71,582 @@ static TRANSFORM_CONCURRENCY_LIMIT: Lazy = Lazy::new(|| { .unwrap_or_else(crate::num_threads) }); -pub(self) async fn load_enrichment_tables<'a>( +/// Builds only the new pieces, and doesn't check their topology. +pub async fn build_pieces( + config: &super::Config, + diff: &ConfigDiff, + buffers: HashMap, +) -> Result> { + Builder::new(config, diff, buffers).build().await +} + +struct Builder<'a> { config: &'a super::Config, diff: &'a ConfigDiff, -) -> (&'static enrichment::TableRegistry, Vec) { - let mut enrichment_tables = HashMap::new(); - - let mut errors = vec![]; - - // Build enrichment tables - 'tables: for (name, table) in config.enrichment_tables.iter() { - let table_name = name.to_string(); - if ENRICHMENT_TABLES.needs_reload(&table_name) { - let indexes = if !diff.enrichment_tables.is_added(name) { - // If this is an existing enrichment table, we need to store the indexes to reapply - // them again post load. - Some(ENRICHMENT_TABLES.index_fields(&table_name)) - } else { - None - }; + shutdown_coordinator: SourceShutdownCoordinator, + errors: Vec, + outputs: HashMap>, + tasks: HashMap, + buffers: HashMap, + inputs: HashMap, Inputs)>, + healthchecks: HashMap, + detach_triggers: HashMap, +} - let mut table = match table.inner.build(&config.global).await { - Ok(table) => table, - Err(error) => { - errors.push(format!("Enrichment Table \"{}\": {}", name, error)); - continue; - } - }; +impl<'a> Builder<'a> { + fn new( + config: &'a super::Config, + diff: &'a ConfigDiff, + buffers: HashMap, + ) -> Self { + Self { + config, + diff, + buffers, + shutdown_coordinator: SourceShutdownCoordinator::default(), + errors: vec![], + outputs: HashMap::new(), + tasks: HashMap::new(), + inputs: HashMap::new(), + healthchecks: HashMap::new(), + detach_triggers: HashMap::new(), + } + } - if let Some(indexes) = indexes { - for (case, index) in indexes { - match table - .add_index(case, &index.iter().map(|s| s.as_ref()).collect::>()) - { - Ok(_) => (), - Err(error) => { - // If there is an error adding an index we do not want to use the reloaded - // data, the previously loaded data will still need to be used. - // Just report the error and continue. - error!(message = "Unable to add index to reloaded enrichment table.", + /// Builds the new pieces of the topology found in `self.diff`. + async fn build(mut self) -> Result> { + let enrichment_tables = self.load_enrichment_tables().await; + let source_tasks = self.build_sources().await; + self.build_transforms(enrichment_tables).await; + self.build_sinks().await; + + // We should have all the data for the enrichment tables loaded now, so switch them over to + // readonly. + enrichment_tables.finish_load(); + + if self.errors.is_empty() { + Ok(Pieces { + inputs: self.inputs, + outputs: Self::finalize_outputs(self.outputs), + tasks: self.tasks, + source_tasks, + healthchecks: self.healthchecks, + shutdown_coordinator: self.shutdown_coordinator, + detach_triggers: self.detach_triggers, + }) + } else { + Err(self.errors) + } + } + + fn finalize_outputs( + outputs: HashMap>, + ) -> HashMap, UnboundedSender>> + { + let mut finalized_outputs = HashMap::new(); + for (id, output) in outputs { + let entry = finalized_outputs + .entry(id.component) + .or_insert_with(HashMap::new); + entry.insert(id.port, output); + } + + finalized_outputs + } + + /// Loads, or reloads the enrichment tables. + /// The tables are stored in the `ENRICHMENT_TABLES` global variable. + async fn load_enrichment_tables(&mut self) -> &'static enrichment::TableRegistry { + let mut enrichment_tables = HashMap::new(); + + // Build enrichment tables + 'tables: for (name, table) in self.config.enrichment_tables.iter() { + let table_name = name.to_string(); + if ENRICHMENT_TABLES.needs_reload(&table_name) { + let indexes = if !self.diff.enrichment_tables.is_added(name) { + // If this is an existing enrichment table, we need to store the indexes to reapply + // them again post load. + Some(ENRICHMENT_TABLES.index_fields(&table_name)) + } else { + None + }; + + let mut table = match table.inner.build(&self.config.global).await { + Ok(table) => table, + Err(error) => { + self.errors + .push(format!("Enrichment Table \"{}\": {}", name, error)); + continue; + } + }; + + if let Some(indexes) = indexes { + for (case, index) in indexes { + match table + .add_index(case, &index.iter().map(|s| s.as_ref()).collect::>()) + { + Ok(_) => (), + Err(error) => { + // If there is an error adding an index we do not want to use the reloaded + // data, the previously loaded data will still need to be used. + // Just report the error and continue. + error!(message = "Unable to add index to reloaded enrichment table.", table = ?name.to_string(), %error); - continue 'tables; + continue 'tables; + } } } } - } - enrichment_tables.insert(table_name, table); + enrichment_tables.insert(table_name, table); + } } - } - - ENRICHMENT_TABLES.load(enrichment_tables); - - (&ENRICHMENT_TABLES, errors) -} - -pub struct Pieces { - pub(super) inputs: HashMap, Inputs)>, - pub(crate) outputs: HashMap, fanout::ControlChannel>>, - pub(super) tasks: HashMap, - pub(crate) source_tasks: HashMap, - pub(super) healthchecks: HashMap, - pub(crate) shutdown_coordinator: SourceShutdownCoordinator, - pub(crate) detach_triggers: HashMap, -} -/// Builds only the new pieces, and doesn't check their topology. -pub async fn build_pieces( - config: &super::Config, - diff: &ConfigDiff, - mut buffers: HashMap, -) -> Result> { - let mut inputs = HashMap::new(); - let mut outputs = HashMap::new(); - let mut tasks = HashMap::new(); - let mut source_tasks = HashMap::new(); - let mut healthchecks = HashMap::new(); - let mut shutdown_coordinator = SourceShutdownCoordinator::default(); - let mut detach_triggers = HashMap::new(); + ENRICHMENT_TABLES.load(enrichment_tables); - let mut errors = vec![]; + &ENRICHMENT_TABLES + } - let (enrichment_tables, enrichment_errors) = load_enrichment_tables(config, diff).await; - errors.extend(enrichment_errors); + async fn build_sources(&mut self) -> HashMap { + let mut source_tasks = HashMap::new(); - // Build sources - for (key, source) in config - .sources() - .filter(|(key, _)| diff.sources.contains_new(key)) - { - debug!(component = %key, "Building new source."); - - let typetag = source.inner.get_component_name(); - let source_outputs = source.inner.outputs(config.schema.log_namespace()); - - let span = error_span!( - "source", - component_kind = "source", - component_id = %key.id(), - component_type = %source.inner.get_component_name(), - // maintained for compatibility - component_name = %key.id(), - ); - let _entered_span = span.enter(); - - let task_name = format!( - ">> {} ({}, pump) >>", - source.inner.get_component_name(), - key.id() - ); - - let mut builder = SourceSender::builder().with_buffer(*SOURCE_SENDER_BUFFER_SIZE); - let mut pumps = Vec::new(); - let mut controls = HashMap::new(); - let mut schema_definitions = HashMap::with_capacity(source_outputs.len()); - - for output in source_outputs.into_iter() { - let mut rx = builder.add_source_output(output.clone()); - - let (mut fanout, control) = Fanout::new(); - let pump = async move { - debug!("Source pump starting."); + for (key, source) in self + .config + .sources() + .filter(|(key, _)| self.diff.sources.contains_new(key)) + { + debug!(component = %key, "Building new source."); - while let Some(array) = rx.next().await { - fanout.send(array).await.map_err(|e| { - debug!("Source pump finished with an error."); - TaskError::wrapped(e) - })?; - } + let typetag = source.inner.get_component_name(); + let source_outputs = source.inner.outputs(self.config.schema.log_namespace()); - debug!("Source pump finished normally."); - Ok(TaskOutput::Source) - }; + let span = error_span!( + "source", + component_kind = "source", + component_id = %key.id(), + component_type = %source.inner.get_component_name(), + // maintained for compatibility + component_name = %key.id(), + ); + let _entered_span = span.enter(); - pumps.push(pump.instrument(span.clone())); - controls.insert( - OutputId { - component: key.clone(), - port: output.port.clone(), - }, - control, + let task_name = format!( + ">> {} ({}, pump) >>", + source.inner.get_component_name(), + key.id() ); - let port = output.port.clone(); - if let Some(definition) = output.schema_definition(config.schema.enabled) { - schema_definitions.insert(port, definition); - } - } + let mut builder = SourceSender::builder().with_buffer(*SOURCE_SENDER_BUFFER_SIZE); + let mut pumps = Vec::new(); + let mut controls = HashMap::new(); + let mut schema_definitions = HashMap::with_capacity(source_outputs.len()); - let (pump_error_tx, mut pump_error_rx) = oneshot::channel(); - let pump = async move { - debug!("Source pump supervisor starting."); + for output in source_outputs.into_iter() { + let mut rx = builder.add_source_output(output.clone()); - // Spawn all of the per-output pumps and then await their completion. - // - // If any of the pumps complete with an error, or panic/are cancelled, we return - // immediately. - let mut handles = FuturesUnordered::new(); - for pump in pumps { - handles.push(spawn_named(pump, task_name.as_ref())); - } + let (mut fanout, control) = Fanout::new(); + let pump = async move { + debug!("Source pump starting."); - let mut had_pump_error = false; - while let Some(output) = handles.try_next().await? { - if let Err(e) = output { - // Immediately send the error to the source's wrapper future, but ignore any - // errors during the send, since nested errors wouldn't make any sense here. - let _ = pump_error_tx.send(e); - had_pump_error = true; - break; - } - } + while let Some(array) = rx.next().await { + fanout.send(array).await.map_err(|e| { + debug!("Source pump finished with an error."); + TaskError::wrapped(e) + })?; + } - if had_pump_error { - debug!("Source pump supervisor task finished with an error."); - } else { - debug!("Source pump supervisor task finished normally."); - } - Ok(TaskOutput::Source) - }; - let pump = Task::new(key.clone(), typetag, pump); - - let pipeline = builder.build(); - - let (shutdown_signal, force_shutdown_tripwire) = shutdown_coordinator.register_source(key); - - let context = SourceContext { - key: key.clone(), - globals: config.global.clone(), - shutdown: shutdown_signal, - out: pipeline, - proxy: ProxyConfig::merge_with_env(&config.global.proxy, &source.proxy), - acknowledgements: source.sink_acknowledgements, - schema_definitions, - schema: config.schema, - }; - let source = source.inner.build(context).await; - let server = match source { - Err(error) => { - errors.push(format!("Source \"{}\": {}", key, error)); - continue; + debug!("Source pump finished normally."); + Ok(TaskOutput::Source) + }; + + pumps.push(pump.instrument(span.clone())); + controls.insert( + OutputId { + component: key.clone(), + port: output.port.clone(), + }, + control, + ); + + let port = output.port.clone(); + if let Some(definition) = output.schema_definition(self.config.schema.enabled) { + schema_definitions.insert(port, definition); + } } - Ok(server) => server, - }; - - // Build a wrapper future that drives the actual source future, but returns early if we've - // been signalled to forcefully shutdown, or if the source pump encounters an error. - // - // The forceful shutdown will only resolve if the source itself doesn't shutdown gracefully - // within the alloted time window. This can occur normally for certain sources, like stdin, - // where the I/O is blocking (in a separate thread) and won't wake up to check if it's time - // to shutdown unless some input is given. - let server = async move { - debug!("Source starting."); - - let mut result = select! { - biased; - // We've been told that we must forcefully shut down. - _ = force_shutdown_tripwire => Ok(()), + let (pump_error_tx, mut pump_error_rx) = oneshot::channel(); + let pump = async move { + debug!("Source pump supervisor starting."); - // The source pump encountered an error, which we're now bubbling up here to stop - // the source as well, since the source running makes no sense without the pump. + // Spawn all of the per-output pumps and then await their completion. // - // We only match receiving a message, not the error of the sender being dropped, - // just to keep things simpler. - Ok(e) = &mut pump_error_rx => Err(e), + // If any of the pumps complete with an error, or panic/are cancelled, we return + // immediately. + let mut handles = FuturesUnordered::new(); + for pump in pumps { + handles.push(spawn_named(pump, task_name.as_ref())); + } - // The source finished normally. - result = server => result.map_err(|_| TaskError::Opaque), + let mut had_pump_error = false; + while let Some(output) = handles.try_next().await? { + if let Err(e) = output { + // Immediately send the error to the source's wrapper future, but ignore any + // errors during the send, since nested errors wouldn't make any sense here. + let _ = pump_error_tx.send(e); + had_pump_error = true; + break; + } + } + + if had_pump_error { + debug!("Source pump supervisor task finished with an error."); + } else { + debug!("Source pump supervisor task finished normally."); + } + Ok(TaskOutput::Source) + }; + let pump = Task::new(key.clone(), typetag, pump); + + let pipeline = builder.build(); + + let (shutdown_signal, force_shutdown_tripwire) = + self.shutdown_coordinator.register_source(key); + + let context = SourceContext { + key: key.clone(), + globals: self.config.global.clone(), + shutdown: shutdown_signal, + out: pipeline, + proxy: ProxyConfig::merge_with_env(&self.config.global.proxy, &source.proxy), + acknowledgements: source.sink_acknowledgements, + schema_definitions, + schema: self.config.schema, + }; + let source = source.inner.build(context).await; + let server = match source { + Err(error) => { + self.errors.push(format!("Source \"{}\": {}", key, error)); + continue; + } + Ok(server) => server, }; - // Even though we already tried to receive any pump task error above, we may have exited - // on the source itself returning an error due to task scheduling, where the pump task - // encountered an error, sent it over the oneshot, but we were polling the source - // already and hit an error trying to send to the now-shutdown pump task. + // Build a wrapper future that drives the actual source future, but returns early if we've + // been signalled to forcefully shutdown, or if the source pump encounters an error. // - // Since the error from the source is opaque at the moment (i.e. `()`), we try a final - // time to see if the pump task encountered an error, using _that_ instead if so, to - // propagate the true error that caused the source to have to stop. - if let Ok(e) = pump_error_rx.try_recv() { - result = Err(e); - } - - match result { - Ok(()) => { - debug!("Source finished normally."); - Ok(TaskOutput::Source) + // The forceful shutdown will only resolve if the source itself doesn't shutdown gracefully + // within the alloted time window. This can occur normally for certain sources, like stdin, + // where the I/O is blocking (in a separate thread) and won't wake up to check if it's time + // to shutdown unless some input is given. + let server = async move { + debug!("Source starting."); + + let mut result = select! { + biased; + + // We've been told that we must forcefully shut down. + _ = force_shutdown_tripwire => Ok(()), + + // The source pump encountered an error, which we're now bubbling up here to stop + // the source as well, since the source running makes no sense without the pump. + // + // We only match receiving a message, not the error of the sender being dropped, + // just to keep things simpler. + Ok(e) = &mut pump_error_rx => Err(e), + + // The source finished normally. + result = server => result.map_err(|_| TaskError::Opaque), + }; + + // Even though we already tried to receive any pump task error above, we may have exited + // on the source itself returning an error due to task scheduling, where the pump task + // encountered an error, sent it over the oneshot, but we were polling the source + // already and hit an error trying to send to the now-shutdown pump task. + // + // Since the error from the source is opaque at the moment (i.e. `()`), we try a final + // time to see if the pump task encountered an error, using _that_ instead if so, to + // propagate the true error that caused the source to have to stop. + if let Ok(e) = pump_error_rx.try_recv() { + result = Err(e); } - Err(e) => { - debug!("Source finished with an error."); - Err(e) + + match result { + Ok(()) => { + debug!("Source finished normally."); + Ok(TaskOutput::Source) + } + Err(e) => { + debug!("Source finished with an error."); + Err(e) + } } - } - }; - let server = Task::new(key.clone(), typetag, server); + }; + let server = Task::new(key.clone(), typetag, server); + + self.outputs.extend(controls); + self.tasks.insert(key.clone(), pump); + source_tasks.insert(key.clone(), server); + } - outputs.extend(controls); - tasks.insert(key.clone(), pump); - source_tasks.insert(key.clone(), server); + source_tasks } - let mut definition_cache = HashMap::default(); + async fn build_transforms(&mut self, enrichment_tables: &enrichment::TableRegistry) { + let mut definition_cache = HashMap::default(); - // Build transforms - for (key, transform) in config - .transforms() - .filter(|(key, _)| diff.transforms.contains_new(key)) - { - debug!(component = %key, "Building new transform."); - - let input_definitions = - schema::input_definitions(&transform.inputs, config, &mut definition_cache); - - let merged_definition: Definition = input_definitions - .iter() - .map(|(_output_id, definition)| definition.clone()) - .reduce(Definition::merge) - // We may not have any definitions if all the inputs are from metrics sources. - .unwrap_or_else(Definition::any); - - let span = error_span!( - "transform", - component_kind = "transform", - component_id = %key.id(), - component_type = %transform.inner.get_component_name(), - // maintained for compatibility - component_name = %key.id(), - ); - - // Create a map of the outputs to the list of possible definitions from those outputs. - let schema_definitions = transform - .inner - .outputs(&input_definitions, config.schema.log_namespace()) - .into_iter() - .map(|output| (output.port, output.log_schema_definitions)) - .collect::>(); - - let context = TransformContext { - key: Some(key.clone()), - globals: config.global.clone(), - enrichment_tables: enrichment_tables.clone(), - schema_definitions, - merged_schema_definition: merged_definition.clone(), - schema: config.schema, - }; - - let node = TransformNode::from_parts( - key.clone(), - transform, - &input_definitions, - config.schema.log_namespace(), - ); - - let transform = match transform - .inner - .build(&context) - .instrument(span.clone()) - .await + for (key, transform) in self + .config + .transforms() + .filter(|(key, _)| self.diff.transforms.contains_new(key)) { - Err(error) => { - errors.push(format!("Transform \"{}\": {}", key, error)); - continue; - } - Ok(transform) => transform, - }; + debug!(component = %key, "Building new transform."); - let (input_tx, input_rx) = - TopologyBuilder::standalone_memory(TOPOLOGY_BUFFER_SIZE, WhenFull::Block).await; + let input_definitions = + schema::input_definitions(&transform.inputs, self.config, &mut definition_cache); - inputs.insert(key.clone(), (input_tx, node.inputs.clone())); + let merged_definition: Definition = input_definitions + .iter() + .map(|(_output_id, definition)| definition.clone()) + .reduce(Definition::merge) + // We may not have any definitions if all the inputs are from metrics sources. + .unwrap_or_else(Definition::any); - let (transform_task, transform_outputs) = { - let _span = span.enter(); - build_transform(transform, node, input_rx) - }; + let span = error_span!( + "transform", + component_kind = "transform", + component_id = %key.id(), + component_type = %transform.inner.get_component_name(), + // maintained for compatibility + component_name = %key.id(), + ); + + // Create a map of the outputs to the list of possible definitions from those outputs. + let schema_definitions = transform + .inner + .outputs(&input_definitions, self.config.schema.log_namespace()) + .into_iter() + .map(|output| (output.port, output.log_schema_definitions)) + .collect::>(); + + let context = TransformContext { + key: Some(key.clone()), + globals: self.config.global.clone(), + enrichment_tables: enrichment_tables.clone(), + schema_definitions, + merged_schema_definition: merged_definition.clone(), + schema: self.config.schema, + }; + + let node = TransformNode::from_parts( + key.clone(), + transform, + &input_definitions, + self.config.schema.log_namespace(), + ); + + let transform = match transform + .inner + .build(&context) + .instrument(span.clone()) + .await + { + Err(error) => { + self.errors + .push(format!("Transform \"{}\": {}", key, error)); + continue; + } + Ok(transform) => transform, + }; + + let (input_tx, input_rx) = + TopologyBuilder::standalone_memory(TOPOLOGY_BUFFER_SIZE, WhenFull::Block).await; + + self.inputs + .insert(key.clone(), (input_tx, node.inputs.clone())); - outputs.extend(transform_outputs); - tasks.insert(key.clone(), transform_task); + let (transform_task, transform_outputs) = { + let _span = span.enter(); + build_transform(transform, node, input_rx) + }; + + self.outputs.extend(transform_outputs); + self.tasks.insert(key.clone(), transform_task); + } } - // Build sinks - for (key, sink) in config - .sinks() - .filter(|(key, _)| diff.sinks.contains_new(key)) - { - debug!(component = %key, "Building new sink."); + async fn build_sinks(&mut self) { + for (key, sink) in self + .config + .sinks() + .filter(|(key, _)| self.diff.sinks.contains_new(key)) + { + debug!(component = %key, "Building new sink."); - let sink_inputs = &sink.inputs; - let healthcheck = sink.healthcheck(); - let enable_healthcheck = healthcheck.enabled && config.healthchecks.enabled; + let sink_inputs = &sink.inputs; + let healthcheck = sink.healthcheck(); + let enable_healthcheck = healthcheck.enabled && self.config.healthchecks.enabled; - let typetag = sink.inner.get_component_name(); - let input_type = sink.inner.input().data_type(); + let typetag = sink.inner.get_component_name(); + let input_type = sink.inner.input().data_type(); - // At this point, we've validated that all transforms are valid, including any - // transform that mutates the schema provided by their sources. We can now validate the - // schema expectations of each individual sink. - if let Err(mut err) = schema::validate_sink_expectations(key, sink, config) { - errors.append(&mut err); - }; + // At this point, we've validated that all transforms are valid, including any + // transform that mutates the schema provided by their sources. We can now validate the + // schema expectations of each individual sink. + if let Err(mut err) = schema::validate_sink_expectations(key, sink, self.config) { + self.errors.append(&mut err); + }; - let (tx, rx) = if let Some(buffer) = buffers.remove(key) { - buffer - } else { - let buffer_type = match sink.buffer.stages().first().expect("cant ever be empty") { - BufferType::Memory { .. } => "memory", - BufferType::DiskV2 { .. } => "disk", + let (tx, rx) = if let Some(buffer) = self.buffers.remove(key) { + buffer + } else { + let buffer_type = match sink.buffer.stages().first().expect("cant ever be empty") { + BufferType::Memory { .. } => "memory", + BufferType::DiskV2 { .. } => "disk", + }; + let buffer_span = error_span!( + "sink", + component_kind = "sink", + component_id = %key.id(), + component_type = typetag, + component_name = %key.id(), + buffer_type, + ); + let buffer = sink + .buffer + .build( + self.config.global.data_dir.clone(), + key.to_string(), + buffer_span, + ) + .await; + match buffer { + Err(error) => { + self.errors.push(format!("Sink \"{}\": {}", key, error)); + continue; + } + Ok((tx, rx)) => (tx, Arc::new(Mutex::new(Some(rx.into_stream())))), + } }; - let buffer_span = error_span!( - "sink", - component_kind = "sink", - component_id = %key.id(), - component_type = typetag, - component_name = %key.id(), - buffer_type, - ); - let buffer = sink - .buffer - .build(config.global.data_dir.clone(), key.to_string(), buffer_span) - .await; - match buffer { + + let cx = SinkContext { + healthcheck, + globals: self.config.global.clone(), + proxy: ProxyConfig::merge_with_env(&self.config.global.proxy, sink.proxy()), + schema: self.config.schema, + }; + + let (sink, healthcheck) = match sink.inner.build(cx).await { Err(error) => { - errors.push(format!("Sink \"{}\": {}", key, error)); + self.errors.push(format!("Sink \"{}\": {}", key, error)); continue; } - Ok((tx, rx)) => (tx, Arc::new(Mutex::new(Some(rx.into_stream())))), - } - }; - - let cx = SinkContext { - healthcheck, - globals: config.global.clone(), - proxy: ProxyConfig::merge_with_env(&config.global.proxy, sink.proxy()), - schema: config.schema, - }; - - let (sink, healthcheck) = match sink.inner.build(cx).await { - Err(error) => { - errors.push(format!("Sink \"{}\": {}", key, error)); - continue; - } - Ok(built) => built, - }; - - let (trigger, tripwire) = Tripwire::new(); - - let sink = async move { - debug!("Sink starting."); - - // Why is this Arc>> needed you ask. - // In case when this function build_pieces errors - // this future won't be run so this rx won't be taken - // which will enable us to reuse rx to rebuild - // old configuration by passing this Arc>> - // yet again. - let rx = rx - .lock() - .unwrap() - .take() - .expect("Task started but input has been taken."); - - let mut rx = wrap(rx); - - let events_received = register!(EventsReceived); - sink.run( - rx.by_ref() - .filter(|events: &EventArray| ready(filter_events_type(events, input_type))) - .inspect(|events| { - events_received.emit(CountByteSize( - events.len(), - events.estimated_json_encoded_size_of(), - )) - }) - .take_until_if(tripwire), - ) - .await - .map(|_| { - debug!("Sink finished normally."); - TaskOutput::Sink(rx) - }) - .map_err(|_| { - debug!("Sink finished with an error."); - TaskError::Opaque - }) - }; - - let task = Task::new(key.clone(), typetag, sink); - - let component_key = key.clone(); - let healthcheck_task = async move { - if enable_healthcheck { - let duration = Duration::from_secs(10); - timeout(duration, healthcheck) - .map(|result| match result { - Ok(Ok(_)) => { - info!("Healthcheck passed."); - Ok(TaskOutput::Healthcheck) - } - Ok(Err(error)) => { - error!( - msg = "Healthcheck failed.", - %error, - component_kind = "sink", - component_type = typetag, - component_id = %component_key.id(), - // maintained for compatibility - component_name = %component_key.id(), - ); - Err(TaskError::wrapped(error)) - } - Err(e) => { - error!( - msg = "Healthcheck timed out.", - component_kind = "sink", - component_type = typetag, - component_id = %component_key.id(), - // maintained for compatibility - component_name = %component_key.id(), - ); - Err(TaskError::wrapped(Box::new(e))) - } - }) - .await - } else { - info!("Healthcheck disabled."); - Ok(TaskOutput::Healthcheck) - } - }; + Ok(built) => built, + }; - let healthcheck_task = Task::new(key.clone(), typetag, healthcheck_task); + let (trigger, tripwire) = Tripwire::new(); + + let sink = async move { + debug!("Sink starting."); + + // Why is this Arc>> needed you ask. + // In case when this function build_pieces errors + // this future won't be run so this rx won't be taken + // which will enable us to reuse rx to rebuild + // old configuration by passing this Arc>> + // yet again. + let rx = rx + .lock() + .unwrap() + .take() + .expect("Task started but input has been taken."); + + let mut rx = wrap(rx); + + let events_received = register!(EventsReceived); + sink.run( + rx.by_ref() + .filter(|events: &EventArray| ready(filter_events_type(events, input_type))) + .inspect(|events| { + events_received.emit(CountByteSize( + events.len(), + events.estimated_json_encoded_size_of(), + )) + }) + .take_until_if(tripwire), + ) + .await + .map(|_| { + debug!("Sink finished normally."); + TaskOutput::Sink(rx) + }) + .map_err(|_| { + debug!("Sink finished with an error."); + TaskError::Opaque + }) + }; - inputs.insert(key.clone(), (tx, sink_inputs.clone())); - healthchecks.insert(key.clone(), healthcheck_task); - tasks.insert(key.clone(), task); - detach_triggers.insert(key.clone(), trigger); - } + let task = Task::new(key.clone(), typetag, sink); + + let component_key = key.clone(); + let healthcheck_task = async move { + if enable_healthcheck { + let duration = Duration::from_secs(10); + timeout(duration, healthcheck) + .map(|result| match result { + Ok(Ok(_)) => { + info!("Healthcheck passed."); + Ok(TaskOutput::Healthcheck) + } + Ok(Err(error)) => { + error!( + msg = "Healthcheck failed.", + %error, + component_kind = "sink", + component_type = typetag, + component_id = %component_key.id(), + // maintained for compatibility + component_name = %component_key.id(), + ); + Err(TaskError::wrapped(error)) + } + Err(e) => { + error!( + msg = "Healthcheck timed out.", + component_kind = "sink", + component_type = typetag, + component_id = %component_key.id(), + // maintained for compatibility + component_name = %component_key.id(), + ); + Err(TaskError::wrapped(Box::new(e))) + } + }) + .await + } else { + info!("Healthcheck disabled."); + Ok(TaskOutput::Healthcheck) + } + }; - // We should have all the data for the enrichment tables loaded now, so switch them over to - // readonly. - enrichment_tables.finish_load(); + let healthcheck_task = Task::new(key.clone(), typetag, healthcheck_task); - let mut finalized_outputs = HashMap::new(); - for (id, output) in outputs { - let entry = finalized_outputs - .entry(id.component) - .or_insert_with(HashMap::new); - entry.insert(id.port, output); + self.inputs.insert(key.clone(), (tx, sink_inputs.clone())); + self.healthchecks.insert(key.clone(), healthcheck_task); + self.tasks.insert(key.clone(), task); + self.detach_triggers.insert(key.clone(), trigger); + } } +} - if errors.is_empty() { - let pieces = Pieces { - inputs, - outputs: finalized_outputs, - tasks, - source_tasks, - healthchecks, - shutdown_coordinator, - detach_triggers, - }; - - Ok(pieces) - } else { - Err(errors) - } +pub struct Pieces { + pub(super) inputs: HashMap, Inputs)>, + pub(crate) outputs: HashMap, fanout::ControlChannel>>, + pub(super) tasks: HashMap, + pub(crate) source_tasks: HashMap, + pub(super) healthchecks: HashMap, + pub(crate) shutdown_coordinator: SourceShutdownCoordinator, + pub(crate) detach_triggers: HashMap, } const fn filter_events_type(events: &EventArray, data_type: DataType) -> bool { From 42e77bb495e731e7c1936797556f56d8ce265079 Mon Sep 17 00:00:00 2001 From: Stephen Wakely Date: Fri, 14 Apr 2023 10:01:42 +0100 Subject: [PATCH 3/3] Revert "Revert "chore(topology): Transform outputs hash table of OutputId -> Definition (#17059)"" This reverts commit 8916ec179815cb22c1a95fe766bd6ceb297046f8. --- benches/remap.rs | 6 +- lib/vector-core/src/config/mod.rs | 20 +++- lib/vector-core/src/config/output_id.rs | 90 +++++++++++++++++ src/config/graph.rs | 17 ++-- src/config/id.rs | 89 +---------------- src/config/mod.rs | 6 +- src/config/transform.rs | 8 +- src/test_util/mock/transforms/basic.rs | 2 +- src/test_util/mock/transforms/noop.rs | 2 +- src/topology/schema.rs | 96 ++++++++++++------- src/transforms/aggregate.rs | 2 +- src/transforms/aws_ec2_metadata.rs | 4 +- src/transforms/dedupe.rs | 7 +- src/transforms/filter.rs | 7 +- src/transforms/log_to_metric.rs | 4 +- src/transforms/lua/v1/mod.rs | 12 ++- src/transforms/lua/v2/mod.rs | 12 ++- src/transforms/metric_to_log.rs | 10 +- src/transforms/reduce/mod.rs | 10 +- src/transforms/remap.rs | 45 +++++---- src/transforms/route.rs | 33 +++---- src/transforms/sample.rs | 2 +- .../tag_cardinality_limit/config.rs | 4 +- src/transforms/throttle.rs | 7 +- 24 files changed, 283 insertions(+), 212 deletions(-) create mode 100644 lib/vector-core/src/config/output_id.rs diff --git a/benches/remap.rs b/benches/remap.rs index d3c2c6930a144..3523c541b09ed 100644 --- a/benches/remap.rs +++ b/benches/remap.rs @@ -28,7 +28,7 @@ fn benchmark_remap(c: &mut Criterion) { let add_fields_runner = |tform: &mut Box, event: Event| { let mut outputs = TransformOutputsBuf::new_with_capacity( - vec![TransformOutput::new(DataType::all(), vec![])], + vec![TransformOutput::new(DataType::all(), HashMap::new())], 1, ); tform.transform(event, &mut outputs); @@ -80,7 +80,7 @@ fn benchmark_remap(c: &mut Criterion) { let json_parser_runner = |tform: &mut Box, event: Event| { let mut outputs = TransformOutputsBuf::new_with_capacity( - vec![TransformOutput::new(DataType::all(), vec![])], + vec![TransformOutput::new(DataType::all(), HashMap::new())], 1, ); tform.transform(event, &mut outputs); @@ -134,7 +134,7 @@ fn benchmark_remap(c: &mut Criterion) { let coerce_runner = |tform: &mut Box, event: Event, timestamp: DateTime| { let mut outputs = TransformOutputsBuf::new_with_capacity( - vec![TransformOutput::new(DataType::all(), vec![])], + vec![TransformOutput::new(DataType::all(), HashMap::new())], 1, ); tform.transform(event, &mut outputs); diff --git a/lib/vector-core/src/config/mod.rs b/lib/vector-core/src/config/mod.rs index 07774b73f40e3..bff0afcb04023 100644 --- a/lib/vector-core/src/config/mod.rs +++ b/lib/vector-core/src/config/mod.rs @@ -1,4 +1,4 @@ -use std::{fmt, num::NonZeroUsize}; +use std::{collections::HashMap, fmt, num::NonZeroUsize}; use bitmask_enum::bitmask; use bytes::Bytes; @@ -6,12 +6,14 @@ use chrono::{DateTime, Utc}; mod global_options; mod log_schema; +pub mod output_id; pub mod proxy; use crate::event::LogEvent; pub use global_options::GlobalOptions; pub use log_schema::{init_log_schema, log_schema, LogSchema}; use lookup::{lookup_v2::ValuePath, path, PathPrefix}; +pub use output_id::OutputId; use serde::{Deserialize, Serialize}; use value::Value; pub use vector_common::config::ComponentKey; @@ -199,14 +201,14 @@ pub struct TransformOutput { /// enabled, at least one definition should be output. If the transform /// has multiple connected sources, it is possible to have multiple output /// definitions - one for each input. - pub log_schema_definitions: Vec, + pub log_schema_definitions: HashMap, } impl TransformOutput { /// Create a `TransformOutput` of the given data type that contains multiple [`schema::Definition`]s. /// Designed for use in transforms. #[must_use] - pub fn new(ty: DataType, schema_definitions: Vec) -> Self { + pub fn new(ty: DataType, schema_definitions: HashMap) -> Self { Self { port: None, ty, @@ -222,6 +224,18 @@ impl TransformOutput { } } +/// Simple utility function that can be used by transforms that make no changes to +/// the schema definitions of events. +/// Takes a list of definitions with [`OutputId`] returns them as a [`HashMap`]. +pub fn clone_input_definitions( + input_definitions: &[(OutputId, schema::Definition)], +) -> HashMap { + input_definitions + .iter() + .map(|(output, definition)| (output.clone(), definition.clone())) + .collect() +} + /// Source-specific end-to-end acknowledgements configuration. /// /// This type exists solely to provide a source-specific description of the `acknowledgements` diff --git a/lib/vector-core/src/config/output_id.rs b/lib/vector-core/src/config/output_id.rs new file mode 100644 index 0000000000000..81c35279f6db2 --- /dev/null +++ b/lib/vector-core/src/config/output_id.rs @@ -0,0 +1,90 @@ +use std::fmt; + +use vector_common::config::ComponentKey; + +use crate::{config::configurable_component, schema}; + +/// Component output identifier. +#[configurable_component] +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub struct OutputId { + /// The component to which the output belongs. + pub component: ComponentKey, + + /// The output port name, if not the default. + pub port: Option, +} + +impl OutputId { + /// Some situations, for example when validating a config file requires running the + /// `transforms::output` function to retrieve the outputs, but we don't have an + /// `OutputId` from a source. This gives us an `OutputId` that we can use. + /// + /// TODO: This is not a pleasant solution, but would require some significant refactoring + /// to the topology code to avoid. + pub fn dummy() -> Self { + Self { + component: "dummy".into(), + port: None, + } + } + + /// Given a list of [`schema::Definition`]s, returns a [`Vec`] of tuples of + /// this `OutputId` with each `Definition`. + pub fn with_definitions( + &self, + definitions: impl IntoIterator, + ) -> Vec<(OutputId, schema::Definition)> { + definitions + .into_iter() + .map(|definition| (self.clone(), definition)) + .collect() + } +} + +impl fmt::Display for OutputId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self.port { + None => self.component.fmt(f), + Some(port) => write!(f, "{}.{port}", self.component), + } + } +} + +impl From for OutputId { + fn from(key: ComponentKey) -> Self { + Self { + component: key, + port: None, + } + } +} + +impl From<&ComponentKey> for OutputId { + fn from(key: &ComponentKey) -> Self { + Self::from(key.clone()) + } +} + +impl From<(&ComponentKey, String)> for OutputId { + fn from((key, name): (&ComponentKey, String)) -> Self { + Self { + component: key.clone(), + port: Some(name), + } + } +} + +// This panicking implementation is convenient for testing, but should never be enabled for use +// outside of tests. +#[cfg(any(test, feature = "test"))] +impl From<&str> for OutputId { + fn from(s: &str) -> Self { + assert!( + !s.contains('.'), + "Cannot convert dotted paths to strings without more context" + ); + let component = ComponentKey::from(s); + component.into() + } +} diff --git a/src/config/graph.rs b/src/config/graph.rs index 3c1e27439a78d..1b91ed11d9ccc 100644 --- a/src/config/graph.rs +++ b/src/config/graph.rs @@ -399,7 +399,7 @@ mod test { in_ty, outputs: vec![TransformOutput::new( out_ty, - vec![Definition::default_legacy_namespace()], + [("test".into(), Definition::default_legacy_namespace())].into(), )], }, ); @@ -415,8 +415,11 @@ mod test { let id = id.into(); match self.nodes.get_mut(&id) { Some(Node::Transform { outputs, .. }) => outputs.push( - TransformOutput::new(ty, vec![Definition::default_legacy_namespace()]) - .with_port(name), + TransformOutput::new( + ty, + [("test".into(), Definition::default_legacy_namespace())].into(), + ) + .with_port(name), ), _ => panic!("invalid transform"), } @@ -651,11 +654,11 @@ mod test { outputs: vec![ TransformOutput::new( DataType::all(), - vec![Definition::default_legacy_namespace()], + [("test".into(), Definition::default_legacy_namespace())].into(), ), TransformOutput::new( DataType::all(), - vec![Definition::default_legacy_namespace()], + [("test".into(), Definition::default_legacy_namespace())].into(), ) .with_port("bar"), ], @@ -676,11 +679,11 @@ mod test { outputs: vec![ TransformOutput::new( DataType::all(), - vec![Definition::default_legacy_namespace()], + [("test".into(), Definition::default_legacy_namespace())].into(), ), TransformOutput::new( DataType::all(), - vec![Definition::default_legacy_namespace()], + [("test".into(), Definition::default_legacy_namespace())].into(), ) .with_port("errors"), ], diff --git a/src/config/id.rs b/src/config/id.rs index caa561b68352d..8355fd22f1033 100644 --- a/src/config/id.rs +++ b/src/config/id.rs @@ -1,10 +1,8 @@ -use std::{fmt, ops::Deref}; +use std::ops::Deref; use vector_config::configurable_component; pub use vector_core::config::ComponentKey; -use super::schema; - /// A list of upstream [source][sources] or [transform][transforms] IDs. /// /// Wildcards (`*`) are supported. @@ -96,88 +94,3 @@ impl From> for Inputs { Self(inputs) } } - -/// Component output identifier. -#[configurable_component] -#[derive(Clone, Debug, Eq, Hash, PartialEq)] -pub struct OutputId { - /// The component to which the output belongs. - pub component: ComponentKey, - - /// The output port name, if not the default. - pub port: Option, -} - -impl OutputId { - /// Some situations, for example when validating a config file requires running the - /// transforms::output function to retrieve the outputs, but we don't have an - /// `OutputId` from a source. This gives us an `OutputId` that we can use. - /// - /// TODO: This is not a pleasant solution, but would require some significant refactoring - /// to the topology code to avoid. - pub fn dummy() -> Self { - Self { - component: "dummy".into(), - port: None, - } - } - - /// Given a list of [`schema::Definition`]s, returns a [`Vec`] of tuples of - /// this `OutputId` with each `Definition`. - pub fn with_definitions( - &self, - definitions: impl IntoIterator, - ) -> Vec<(OutputId, schema::Definition)> { - definitions - .into_iter() - .map(|definition| (self.clone(), definition)) - .collect() - } -} - -impl fmt::Display for OutputId { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match &self.port { - None => self.component.fmt(f), - Some(port) => write!(f, "{}.{}", self.component, port), - } - } -} - -impl From for OutputId { - fn from(key: ComponentKey) -> Self { - Self { - component: key, - port: None, - } - } -} - -impl From<&ComponentKey> for OutputId { - fn from(key: &ComponentKey) -> Self { - Self::from(key.clone()) - } -} - -impl From<(&ComponentKey, String)> for OutputId { - fn from((key, name): (&ComponentKey, String)) -> Self { - Self { - component: key.clone(), - port: Some(name), - } - } -} - -// This panicking implementation is convenient for testing, but should never be enabled for use -// outside of tests. -#[cfg(test)] -impl From<&str> for OutputId { - fn from(s: &str) -> Self { - assert!( - !s.contains('.'), - "Cannot convert dotted paths to strings without more context" - ); - let component = ComponentKey::from(s); - component.into() - } -} diff --git a/src/config/mod.rs b/src/config/mod.rs index 1cd6b5aff96af..8c3ceb9c999eb 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -45,7 +45,7 @@ pub use cmd::{cmd, Opts}; pub use diff::ConfigDiff; pub use enrichment_table::{EnrichmentTableConfig, EnrichmentTableOuter}; pub use format::{Format, FormatHint}; -pub use id::{ComponentKey, Inputs, OutputId}; +pub use id::{ComponentKey, Inputs}; pub use loading::{ load, load_builder_from_paths, load_from_paths, load_from_paths_with_provider_and_secrets, load_from_str, load_source_from_paths, merge_path_lists, process_paths, CONFIG_PATHS, @@ -57,7 +57,9 @@ pub use source::{BoxedSource, SourceConfig, SourceContext, SourceOuter}; pub use transform::{BoxedTransform, TransformConfig, TransformContext, TransformOuter}; pub use unit_test::{build_unit_tests, build_unit_tests_main, UnitTestResult}; pub use validation::warnings; -pub use vector_core::config::{init_log_schema, log_schema, proxy::ProxyConfig, LogSchema}; +pub use vector_core::config::{ + init_log_schema, log_schema, proxy::ProxyConfig, LogSchema, OutputId, +}; #[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq)] pub enum ConfigPath { diff --git a/src/config/transform.rs b/src/config/transform.rs index f3271c3422e7b..e2447a70e8ad1 100644 --- a/src/config/transform.rs +++ b/src/config/transform.rs @@ -111,7 +111,7 @@ pub struct TransformContext { /// /// Given a transform can expose multiple [`TransformOutput`] channels, the ID is tied to the identifier of /// that `TransformOutput`. - pub schema_definitions: HashMap, Vec>, + pub schema_definitions: HashMap, HashMap>, /// The schema definition created by merging all inputs of the transform. /// @@ -129,7 +129,7 @@ impl Default for TransformContext { key: Default::default(), globals: Default::default(), enrichment_tables: Default::default(), - schema_definitions: HashMap::from([(None, vec![schema::Definition::any()])]), + schema_definitions: HashMap::from([(None, HashMap::new())]), merged_schema_definition: schema::Definition::any(), schema: SchemaOptions::default(), } @@ -148,7 +148,9 @@ impl TransformContext { } #[cfg(any(test, feature = "test"))] - pub fn new_test(schema_definitions: HashMap, Vec>) -> Self { + pub fn new_test( + schema_definitions: HashMap, HashMap>, + ) -> Self { Self { schema_definitions, ..Default::default() diff --git a/src/test_util/mock/transforms/basic.rs b/src/test_util/mock/transforms/basic.rs index ce8673c408994..90e4484a547bd 100644 --- a/src/test_util/mock/transforms/basic.rs +++ b/src/test_util/mock/transforms/basic.rs @@ -58,7 +58,7 @@ impl TransformConfig for BasicTransformConfig { DataType::all(), definitions .iter() - .map(|(_output, definition)| definition.clone()) + .map(|(output, definition)| (output.clone(), definition.clone())) .collect(), )] } diff --git a/src/test_util/mock/transforms/noop.rs b/src/test_util/mock/transforms/noop.rs index b6712e4eec21f..18aadec304d03 100644 --- a/src/test_util/mock/transforms/noop.rs +++ b/src/test_util/mock/transforms/noop.rs @@ -48,7 +48,7 @@ impl TransformConfig for NoopTransformConfig { DataType::all(), definitions .iter() - .map(|(_output, definition)| definition.clone()) + .map(|(output, definition)| (output.clone(), definition.clone())) .collect(), )] } diff --git a/src/topology/schema.rs b/src/topology/schema.rs index 088967ec5b211..8e9fcd51f8704 100644 --- a/src/topology/schema.rs +++ b/src/topology/schema.rs @@ -62,7 +62,9 @@ pub fn possible_definitions( &input.port ) }) - .log_schema_definitions, + .log_schema_definitions + .values() + .cloned(), ); definitions.append(&mut transform_definition); @@ -141,7 +143,9 @@ pub(super) fn expanded_definitions( .iter() .find_map(|output| { if output.port == input.port { - Some(input.with_definitions(output.log_schema_definitions.clone())) + Some( + input.with_definitions(output.log_schema_definitions.values().cloned()), + ) } else { None } @@ -214,7 +218,9 @@ pub(crate) fn input_definitions( &input.port ) }) - .log_schema_definitions, + .log_schema_definitions + .values() + .cloned(), ); definitions.append(&mut transform_definitions); @@ -528,11 +534,15 @@ mod tests { vec![OutputId::from("source-foo")], vec![TransformOutput::new( DataType::all(), - vec![Definition::empty_legacy_namespace().with_event_field( - &owned_value_path!("baz"), - Kind::regex(), - Some("baz"), - )], + [( + "source-foo".into(), + Definition::empty_legacy_namespace().with_event_field( + &owned_value_path!("baz"), + Kind::regex(), + Some("baz"), + ), + )] + .into(), )], ), )]), @@ -599,11 +609,15 @@ mod tests { vec![OutputId::from("Source 1")], vec![TransformOutput::new( DataType::all(), - vec![Definition::empty_legacy_namespace().with_event_field( - &owned_value_path!("transform-1"), - Kind::regex(), - None, - )], + [( + "Source 1".into(), + Definition::empty_legacy_namespace().with_event_field( + &owned_value_path!("transform-1"), + Kind::regex(), + None, + ), + )] + .into(), )], ), ), @@ -613,11 +627,15 @@ mod tests { vec![OutputId::from("Source 2")], vec![TransformOutput::new( DataType::all(), - vec![Definition::empty_legacy_namespace().with_event_field( - &owned_value_path!("transform-2"), - Kind::float().or_null(), - Some("transform-2"), - )], + [( + "Source 2".into(), + Definition::empty_legacy_namespace().with_event_field( + &owned_value_path!("transform-2"), + Kind::float().or_null(), + Some("transform-2"), + ), + )] + .into(), )], ), ), @@ -627,11 +645,15 @@ mod tests { vec![OutputId::from("Source 2")], vec![TransformOutput::new( DataType::all(), - vec![Definition::empty_legacy_namespace().with_event_field( - &owned_value_path!("transform-3"), - Kind::integer(), - Some("transform-3"), - )], + [( + "Source 2".into(), + Definition::empty_legacy_namespace().with_event_field( + &owned_value_path!("transform-3"), + Kind::integer(), + Some("transform-3"), + ), + )] + .into(), )], ), ), @@ -641,11 +663,15 @@ mod tests { vec![OutputId::from("Source 2")], vec![TransformOutput::new( DataType::all(), - vec![Definition::empty_legacy_namespace().with_event_field( - &owned_value_path!("transform-4"), - Kind::timestamp().or_bytes(), - Some("transform-4"), - )], + [( + "Source 2".into(), + Definition::empty_legacy_namespace().with_event_field( + &owned_value_path!("transform-4"), + Kind::timestamp().or_bytes(), + Some("transform-4"), + ), + )] + .into(), )], ), ), @@ -655,11 +681,15 @@ mod tests { vec![OutputId::from("Transform 3"), OutputId::from("Transform 4")], vec![TransformOutput::new( DataType::all(), - vec![Definition::empty_legacy_namespace().with_event_field( - &owned_value_path!("transform-5"), - Kind::boolean(), - Some("transform-5"), - )], + [( + "Transform 3".into(), + Definition::empty_legacy_namespace().with_event_field( + &owned_value_path!("transform-5"), + Kind::boolean(), + Some("transform-5"), + ), + )] + .into(), )], ), ), diff --git a/src/transforms/aggregate.rs b/src/transforms/aggregate.rs index fdeb73eaa00a8..95212582601e0 100644 --- a/src/transforms/aggregate.rs +++ b/src/transforms/aggregate.rs @@ -51,7 +51,7 @@ impl TransformConfig for AggregateConfig { _: &[(OutputId, schema::Definition)], _: LogNamespace, ) -> Vec { - vec![TransformOutput::new(DataType::Metric, vec![])] + vec![TransformOutput::new(DataType::Metric, HashMap::new())] } } diff --git a/src/transforms/aws_ec2_metadata.rs b/src/transforms/aws_ec2_metadata.rs index 560e82ea917be..9a66140de50fe 100644 --- a/src/transforms/aws_ec2_metadata.rs +++ b/src/transforms/aws_ec2_metadata.rs @@ -270,7 +270,7 @@ impl TransformConfig for Ec2Metadata { let schema_definition = input_definitions .iter() - .map(|(_output, definition)| { + .map(|(output, definition)| { let mut schema_definition = definition.clone(); for path in paths { @@ -278,7 +278,7 @@ impl TransformConfig for Ec2Metadata { schema_definition.with_field(path, Kind::bytes().or_undefined(), None); } - schema_definition + (output.clone(), schema_definition) }) .collect(); diff --git a/src/transforms/dedupe.rs b/src/transforms/dedupe.rs index c0fe99f0204cd..c62f40b31a339 100644 --- a/src/transforms/dedupe.rs +++ b/src/transforms/dedupe.rs @@ -4,7 +4,7 @@ use bytes::Bytes; use futures::{Stream, StreamExt}; use lru::LruCache; use vector_config::configurable_component; -use vector_core::config::LogNamespace; +use vector_core::config::{clone_input_definitions, LogNamespace}; use crate::{ config::{ @@ -160,10 +160,7 @@ impl TransformConfig for DedupeConfig { ) -> Vec { vec![TransformOutput::new( DataType::Log, - input_definitions - .iter() - .map(|(_output, definition)| definition.clone()) - .collect(), + clone_input_definitions(input_definitions), )] } } diff --git a/src/transforms/filter.rs b/src/transforms/filter.rs index f0bed3c180bc6..9351c1d3c724b 100644 --- a/src/transforms/filter.rs +++ b/src/transforms/filter.rs @@ -1,6 +1,6 @@ use vector_common::internal_event::{Count, InternalEventHandle as _, Registered}; use vector_config::configurable_component; -use vector_core::config::LogNamespace; +use vector_core::config::{clone_input_definitions, LogNamespace}; use crate::{ conditions::{AnyCondition, Condition}, @@ -58,10 +58,7 @@ impl TransformConfig for FilterConfig { ) -> Vec { vec![TransformOutput::new( DataType::all(), - input_definitions - .iter() - .map(|(_output, definition)| definition.clone()) - .collect(), + clone_input_definitions(input_definitions), )] } diff --git a/src/transforms/log_to_metric.rs b/src/transforms/log_to_metric.rs index 86e2eb9eb30e6..59c0eea625da8 100644 --- a/src/transforms/log_to_metric.rs +++ b/src/transforms/log_to_metric.rs @@ -1,4 +1,4 @@ -use std::num::ParseFloatError; +use std::{collections::HashMap, num::ParseFloatError}; use chrono::Utc; use indexmap::IndexMap; @@ -163,7 +163,7 @@ impl TransformConfig for LogToMetricConfig { _: LogNamespace, ) -> Vec { // Converting the log to a metric means we lose all incoming `Definition`s. - vec![TransformOutput::new(DataType::Metric, Vec::new())] + vec![TransformOutput::new(DataType::Metric, HashMap::new())] } fn enable_concurrency(&self) -> bool { diff --git a/src/transforms/lua/v1/mod.rs b/src/transforms/lua/v1/mod.rs index efab62a686dcd..4aab930ede76b 100644 --- a/src/transforms/lua/v1/mod.rs +++ b/src/transforms/lua/v1/mod.rs @@ -58,9 +58,17 @@ impl LuaConfig { .flat_map(|(_output, definition)| definition.log_namespaces().clone()) .collect(); - let definition = Definition::default_for_namespace(&namespaces); + let definition = input_definitions + .iter() + .map(|(output, _definition)| { + ( + output.clone(), + Definition::default_for_namespace(&namespaces), + ) + }) + .collect(); - vec![TransformOutput::new(DataType::Log, vec![definition])] + vec![TransformOutput::new(DataType::Log, definition)] } } diff --git a/src/transforms/lua/v2/mod.rs b/src/transforms/lua/v2/mod.rs index 88bf5fd2086f4..caa4f7d92a775 100644 --- a/src/transforms/lua/v2/mod.rs +++ b/src/transforms/lua/v2/mod.rs @@ -188,11 +188,19 @@ impl LuaConfig { .flat_map(|(_output, definition)| definition.log_namespaces().clone()) .collect(); - let definition = Definition::default_for_namespace(&namespaces); + let definition = input_definitions + .iter() + .map(|(output, _definition)| { + ( + output.clone(), + Definition::default_for_namespace(&namespaces), + ) + }) + .collect(); vec![TransformOutput::new( DataType::Metric | DataType::Log, - vec![definition], + definition, )] } } diff --git a/src/transforms/metric_to_log.rs b/src/transforms/metric_to_log.rs index 155f32ae48c7b..e744ab02b94c1 100644 --- a/src/transforms/metric_to_log.rs +++ b/src/transforms/metric_to_log.rs @@ -94,7 +94,7 @@ impl TransformConfig for MetricToLogConfig { fn outputs( &self, - _: &[(OutputId, Definition)], + input_definitions: &[(OutputId, Definition)], global_log_namespace: LogNamespace, ) -> Vec { let log_namespace = global_log_namespace.merge(self.log_namespace); @@ -229,7 +229,13 @@ impl TransformConfig for MetricToLogConfig { } } - vec![TransformOutput::new(DataType::Log, vec![schema_definition])] + vec![TransformOutput::new( + DataType::Log, + input_definitions + .iter() + .map(|(output, _)| (output.clone(), schema_definition.clone())) + .collect(), + )] } fn enable_concurrency(&self) -> bool { diff --git a/src/transforms/reduce/mod.rs b/src/transforms/reduce/mod.rs index 618f44fcbb1f2..1e98086a52053 100644 --- a/src/transforms/reduce/mod.rs +++ b/src/transforms/reduce/mod.rs @@ -130,9 +130,9 @@ impl TransformConfig for ReduceConfig { input_definitions: &[(OutputId, schema::Definition)], _: LogNamespace, ) -> Vec { - let mut output_definitions = Vec::new(); + let mut output_definitions = HashMap::new(); - for (_output, input) in input_definitions { + for (output, input) in input_definitions { let mut schema_definition = input.clone(); for (key, merge_strategy) in self.merge_strategies.iter() { @@ -217,7 +217,7 @@ impl TransformConfig for ReduceConfig { schema_definition = schema_definition.with_field(&key, new_kind, None); } - output_definitions.push(schema_definition); + output_definitions.insert(output.clone(), schema_definition); } vec![TransformOutput::new(DataType::Log, output_definitions)] @@ -558,7 +558,7 @@ group_by = [ "request_id" ] assert_eq!(output_1["counter"], Value::from(8)); assert_eq!(output_1.metadata(), &metadata_1); schema_definitions - .iter() + .values() .for_each(|definition| definition.assert_valid_for_event(&output_1.clone().into())); let output_2 = out.recv().await.unwrap().into_log(); @@ -567,7 +567,7 @@ group_by = [ "request_id" ] assert_eq!(output_2["counter"], Value::from(7)); assert_eq!(output_2.metadata(), &metadata_2); schema_definitions - .iter() + .values() .for_each(|definition| definition.assert_valid_for_event(&output_2.clone().into())); drop(tx); diff --git a/src/transforms/remap.rs b/src/transforms/remap.rs index b1b35823175dd..7a2efa07b7f6a 100644 --- a/src/transforms/remap.rs +++ b/src/transforms/remap.rs @@ -1,3 +1,4 @@ +use std::collections::HashMap; use std::sync::Arc; use std::{ collections::BTreeMap, @@ -251,10 +252,10 @@ impl TransformConfig for RemapConfig { }) .map_err(|_| ()); - let mut dropped_definitions = Vec::new(); - let mut default_definitions = Vec::new(); + let mut dropped_definitions = HashMap::new(); + let mut default_definitions = HashMap::new(); - for (_output_id, input_definition) in input_definitions { + for (output_id, input_definition) in input_definitions { let default_definition = compiled .clone() .map(|(state, meaning)| { @@ -338,8 +339,8 @@ impl TransformConfig for RemapConfig { ); } - default_definitions.push(default_definition); - dropped_definitions.push(dropped_definition); + default_definitions.insert(output_id.clone(), default_definition); + dropped_definitions.insert(output_id.clone(), dropped_definition); } let default_output = TransformOutput::new(DataType::all(), default_definitions); @@ -445,8 +446,9 @@ where // TODO we can now have multiple possible definitions. // This is going to need to be updated to store these possible definitions and then // choose the correct one based on the input the event has come from. - .get(0) - .cloned() + .iter() + .map(|(_output, definition)| definition.clone()) + .next() .unwrap_or_else(Definition::any); let dropped_schema_definition = context @@ -454,8 +456,9 @@ where .get(&Some(DROPPED.to_owned())) .or_else(|| context.schema_definitions.get(&None)) .expect("dropped schema required") - .get(0) - .cloned() + .iter() + .map(|(_output, definition)| definition.clone()) + .next() .unwrap_or_else(Definition::any); Ok(Remap { @@ -702,10 +705,13 @@ mod tests { fn remap(config: RemapConfig) -> Result> { let schema_definitions = HashMap::from([ - (None, vec![test_default_schema_definition()]), + ( + None, + [("source".into(), test_default_schema_definition())].into(), + ), ( Some(DROPPED.to_owned()), - vec![test_dropped_schema_definition()], + [("source".into(), test_dropped_schema_definition())].into(), ), ]); @@ -1176,10 +1182,13 @@ mod tests { ..Default::default() }; let schema_definitions = HashMap::from([ - (None, vec![test_default_schema_definition()]), + ( + None, + [("source".into(), test_default_schema_definition())].into(), + ), ( Some(DROPPED.to_owned()), - vec![test_dropped_schema_definition()], + [("source".into(), test_dropped_schema_definition())].into(), ), ]); let context = TransformContext { @@ -1448,7 +1457,7 @@ mod tests { ), vec![TransformOutput::new( DataType::all(), - vec![schema_definition] + [("test".into(), schema_definition)].into() )] ); @@ -1514,8 +1523,8 @@ mod tests { fn collect_outputs(ft: &mut dyn SyncTransform, event: Event) -> CollectedOuput { let mut outputs = TransformOutputsBuf::new_with_capacity( vec![ - TransformOutput::new(DataType::all(), vec![]), - TransformOutput::new(DataType::all(), vec![]).with_port(DROPPED), + TransformOutput::new(DataType::all(), HashMap::new()), + TransformOutput::new(DataType::all(), HashMap::new()).with_port(DROPPED), ], 1, ); @@ -1541,8 +1550,8 @@ mod tests { ) -> std::result::Result { let mut outputs = TransformOutputsBuf::new_with_capacity( vec![ - TransformOutput::new(DataType::all(), vec![]), - TransformOutput::new(DataType::all(), vec![]).with_port(DROPPED), + TransformOutput::new(DataType::all(), HashMap::new()), + TransformOutput::new(DataType::all(), HashMap::new()).with_port(DROPPED), ], 1, ); diff --git a/src/transforms/route.rs b/src/transforms/route.rs index 1a456daff6b3d..971d678ffe170 100644 --- a/src/transforms/route.rs +++ b/src/transforms/route.rs @@ -1,6 +1,6 @@ use indexmap::IndexMap; use vector_config::configurable_component; -use vector_core::config::LogNamespace; +use vector_core::config::{clone_input_definitions, LogNamespace}; use vector_core::transform::SyncTransform; use crate::{ @@ -113,25 +113,13 @@ impl TransformConfig for RouteConfig { .route .keys() .map(|output_name| { - TransformOutput::new( - DataType::all(), - input_definitions - .iter() - .map(|(_output, definition)| definition.clone()) - .collect(), - ) - .with_port(output_name) + TransformOutput::new(DataType::all(), clone_input_definitions(input_definitions)) + .with_port(output_name) }) .collect(); result.push( - TransformOutput::new( - DataType::all(), - input_definitions - .iter() - .map(|(_output, definition)| definition.clone()) - .collect(), - ) - .with_port(UNMATCHED_ROUTE), + TransformOutput::new(DataType::all(), clone_input_definitions(input_definitions)) + .with_port(UNMATCHED_ROUTE), ); result } @@ -143,6 +131,8 @@ impl TransformConfig for RouteConfig { #[cfg(test)] mod test { + use std::collections::HashMap; + use indoc::indoc; use vector_core::transform::TransformOutputsBuf; @@ -201,7 +191,8 @@ mod test { output_names .iter() .map(|output_name| { - TransformOutput::new(DataType::all(), vec![]).with_port(output_name.to_owned()) + TransformOutput::new(DataType::all(), HashMap::new()) + .with_port(output_name.to_owned()) }) .collect(), 1, @@ -242,7 +233,8 @@ mod test { output_names .iter() .map(|output_name| { - TransformOutput::new(DataType::all(), vec![]).with_port(output_name.to_owned()) + TransformOutput::new(DataType::all(), HashMap::new()) + .with_port(output_name.to_owned()) }) .collect(), 1, @@ -282,7 +274,8 @@ mod test { output_names .iter() .map(|output_name| { - TransformOutput::new(DataType::all(), vec![]).with_port(output_name.to_owned()) + TransformOutput::new(DataType::all(), HashMap::new()) + .with_port(output_name.to_owned()) }) .collect(), 1, diff --git a/src/transforms/sample.rs b/src/transforms/sample.rs index 40e16e2e653d9..eec1a2652c4ac 100644 --- a/src/transforms/sample.rs +++ b/src/transforms/sample.rs @@ -78,7 +78,7 @@ impl TransformConfig for SampleConfig { DataType::Log | DataType::Trace, input_definitions .iter() - .map(|(_output, definition)| definition.clone()) + .map(|(output, definition)| (output.clone(), definition.clone())) .collect(), )] } diff --git a/src/transforms/tag_cardinality_limit/config.rs b/src/transforms/tag_cardinality_limit/config.rs index 6a83d70d98ff3..8eca913f8c416 100644 --- a/src/transforms/tag_cardinality_limit/config.rs +++ b/src/transforms/tag_cardinality_limit/config.rs @@ -1,3 +1,5 @@ +use std::collections::HashMap; + use crate::config::{ DataType, GenerateConfig, Input, OutputId, TransformConfig, TransformContext, TransformOutput, }; @@ -115,6 +117,6 @@ impl TransformConfig for TagCardinalityLimitConfig { _: &[(OutputId, schema::Definition)], _: LogNamespace, ) -> Vec { - vec![TransformOutput::new(DataType::Metric, vec![])] + vec![TransformOutput::new(DataType::Metric, HashMap::new())] } } diff --git a/src/transforms/throttle.rs b/src/transforms/throttle.rs index bdc61383db18b..4b97a40410e0b 100644 --- a/src/transforms/throttle.rs +++ b/src/transforms/throttle.rs @@ -6,7 +6,7 @@ use governor::{clock, Quota, RateLimiter}; use serde_with::serde_as; use snafu::Snafu; use vector_config::configurable_component; -use vector_core::config::LogNamespace; +use vector_core::config::{clone_input_definitions, LogNamespace}; use crate::{ conditions::{AnyCondition, Condition}, @@ -67,10 +67,7 @@ impl TransformConfig for ThrottleConfig { // The event is not modified, so the definition is passed through as-is vec![TransformOutput::new( DataType::Log, - input_definitions - .iter() - .map(|(_output, definition)| definition.clone()) - .collect(), + clone_input_definitions(input_definitions), )] } }