diff --git a/Cargo.lock b/Cargo.lock index c128248471ca3..adc6025f897f7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2830,6 +2830,20 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" +[[package]] +name = "docs-renderer" +version = "0.1.0" +dependencies = [ + "anyhow", + "serde", + "serde_json", + "snafu", + "tracing 0.1.37", + "tracing-subscriber", + "vector-config", + "vector-config-common", +] + [[package]] name = "duct" version = "0.13.5" @@ -9699,6 +9713,7 @@ dependencies = [ "inventory", "no-proxy", "num-traits", + "once_cell", "pretty_assertions", "serde", "serde_json", diff --git a/Cargo.toml b/Cargo.toml index cbcfd908b5ced..f6a704fc8a294 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -87,6 +87,7 @@ members = [ ".", "lib/codecs", "lib/dnsmsg-parser", + "lib/docs-renderer", "lib/enrichment", "lib/fakedata", "lib/file-source", diff --git a/lib/docs-renderer/Cargo.toml b/lib/docs-renderer/Cargo.toml new file mode 100644 index 0000000000000..c44645fb01474 --- /dev/null +++ b/lib/docs-renderer/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "docs-renderer" +version = "0.1.0" +authors = ["Vector Contributors "] +edition = "2021" +publish = false + +[dependencies] +anyhow = { version = "1.0.66", default-features = false, features = ["std"] } +serde = { version = "1.0", default-features = false } +serde_json = { version = "1.0", default-features = false, features = ["std"] } +snafu = { version = "0.7.4", default-features = false } +tracing = { version = "0.1.34", default-features = false } +tracing-subscriber = { version = "0.3.16", default-features = false, features = ["ansi", "env-filter", "fmt", "json", "registry", "tracing-log"] } +vector-config = { path = "../vector-config" } +vector-config-common = { path = "../vector-config-common" } diff --git a/lib/docs-renderer/src/main.rs b/lib/docs-renderer/src/main.rs new file mode 100644 index 0000000000000..dfd9c829d858c --- /dev/null +++ b/lib/docs-renderer/src/main.rs @@ -0,0 +1,82 @@ +mod renderer; + +use std::collections::HashMap; + +use crate::renderer::SchemaRenderer; +use anyhow::{Context, Result}; +use tracing::debug; +use vector_config::schema::parser::{component::ComponentSchema, query::SchemaQuerier}; +use vector_config_common::constants::{self, ComponentType}; + +fn main() -> Result<()> { + let querier = SchemaQuerier::from_schema("/tmp/vector-config-schema.json") + .context("Failed to create querier from given schema file path.")?; + + let base_component_types = &[ + ComponentType::Source, + ComponentType::Transform, + ComponentType::Sink, + ]; + for base_component_type in base_component_types { + // Find the base component schema for the component type itself, which is analogous to + // `SourceOuter`, `SinkOuter`, etc. We render the schema for that separately as it's meant + // to be common across components of the same type, etc. + let base_component_schema = querier + .query() + .with_custom_attribute_kv( + constants::DOCS_META_COMPONENT_BASE_TYPE, + base_component_type, + ) + .run_single()?; + + debug!( + "Got base component schema for component type '{}'.", + base_component_type.as_str() + ); + + // Find all component schemas of the same component type. + let maybe_component_schemas = querier + .query() + .with_custom_attribute_kv(constants::DOCS_META_COMPONENT_TYPE, base_component_type) + .run() + .into_iter() + .map(ComponentSchema::try_from) + .collect::, _>>()?; + + debug!( + "Found {} component schema(s) for component type '{}'.", + maybe_component_schemas.len(), + base_component_type.as_str() + ); + + let mut rendered_component_schemas = HashMap::new(); + + // Render the base component schema. + let base_component_schema_renderer = SchemaRenderer::new(&querier, base_component_schema); + let rendered_base_component_schema = + base_component_schema_renderer.render().context(format!( + "Failed to render the base component schema for component type '{}'.", + base_component_type.as_str() + ))?; + rendered_component_schemas.insert( + format!("base/{}", base_component_type.as_str()), + rendered_base_component_schema, + ); + + // Render each of the component schemas for this component type. + for component_schema in maybe_component_schemas { + let component_name = component_schema.component_name().to_string(); + let component_schema_renderer = SchemaRenderer::new(&querier, component_schema); + let rendered_component_schema = component_schema_renderer.render().context(format!( + "Failed to render the '{}' component schema.", + component_name + ))?; + rendered_component_schemas.insert( + format!("{}s/base/{}", base_component_type.as_str(), component_name), + rendered_component_schema, + ); + } + } + + Ok(()) +} diff --git a/lib/docs-renderer/src/renderer.rs b/lib/docs-renderer/src/renderer.rs new file mode 100644 index 0000000000000..49065c761ae67 --- /dev/null +++ b/lib/docs-renderer/src/renderer.rs @@ -0,0 +1,424 @@ +use std::collections::{HashMap, VecDeque}; + +use anyhow::Result; +use serde::Serialize; +use serde_json::{Map, Value}; +use snafu::Snafu; +use tracing::debug; +use vector_config::schema::parser::query::{ + QueryError, QueryableSchema, SchemaQuerier, SchemaType, +}; +use vector_config_common::constants; + +#[derive(Debug, Snafu)] +pub enum RenderError { + #[snafu(display("rendering failed: {reason}"))] + Failed { reason: String }, + + #[snafu(display("query error during rendering: {source}"), context(false))] + Query { source: QueryError }, +} + +#[derive(Serialize)] +#[serde(transparent)] +pub struct RenderData { + root: Value, +} + +impl RenderData { + fn with_mut_object(&mut self, f: F) -> V + where + F: FnOnce(&mut Map) -> V, + { + // TODO: We should refactor this method so that it takes the desired path, a boolean for + // whether or not to create missing path nodes, and a closure to call with the object + // reference/object key if it exists.. and then this way, `write` and `delete` become simple + // calls with simple closures that just do `map.insert(...)` and `map.delete(...)` and so + // on. + // + // tl;dr: make it DRY. + let map = self + .root + .as_object_mut() + .expect("Render data should always have an object value as root."); + f(map) + } + + /// Writes a value at the given path. + /// + /// The path follows the form of `/part1/part/.../partN`, where each slash-separated segment + /// represents a nested object within the overall object hierarchy. For example, a path of + /// `/root/nested/key2` would map to the value "weee!" if applied against the following JSON + /// object: + /// + /// { "root": { "nested": { "key2": "weee!" } } } + /// + /// # Panics + /// + /// If the path does not start with a forward slash, this method will panic. Likewise, if the + /// path is _only_ a forward slash (aka there is no segment to describe the key within the + /// object to write the value to), this method will panic. + /// + /// If any nested object within the path does not yet exist, it will be created. If any segment, + /// other than the leaf segment, points to a value that is not an object/map, this method will + /// panic. + pub fn write>(&mut self, path: &str, value: V) { + if !path.starts_with('/') { + panic!("Paths must always start with a leading forward slash (`/`)."); + } + + self.with_mut_object(|map| { + // Split the path, and take the last element as the actual map key to write to. + let mut segments = path.split('/').collect::>(); + let key = segments.pop_back().expect("Path must end with a key."); + + // Iterate over the remaining elements, traversing into the root object one level at a + // time, based on using `token` as the map key. If there's no map at the given key, + // we'll create one. If there's something other than a map, we'll panic. + let mut destination = map; + while let Some(segment) = segments.pop_front() { + if destination.contains_key(segment) { + match destination.get_mut(segment) { + Some(Value::Object(ref mut next)) => { + destination = next; + continue; + } + Some(_) => { + panic!("Only leaf nodes should be allowed to be non-object values.") + } + None => unreachable!("Already asserted that the given key exists."), + } + } else { + destination.insert(segment.to_string(), Value::Object(Map::new())); + match destination.get_mut(segment) { + Some(Value::Object(ref mut next)) => { + destination = next; + } + _ => panic!("New object was just inserted."), + } + } + } + + destination.insert(key.to_string(), value.into()); + }); + } + + /// Deletes the value at the given path. + /// + /// The path follows the form of `/part1/part/.../partN`, where each slash-separated segment + /// represents a nested object within the overall object hierarchy. For example, a path of + /// `/root/nested/key2` would map to the value "weee!" if applied against the following JSON + /// object: + /// + /// { "root": { "nested": { "key2": "weee!" } } } + /// + /// # Panics + /// + /// If the path does not start with a forward slash, this method will panic. Likewise, if the + /// path is _only_ a forward slash (aka there is no segment to describe the key within the + /// object to write the value to), this method will panic. + /// + /// If any nested object within the path does not yet exist, it will be created. If any segment, + /// other than the leaf segment, points to a value that is not an object/map, this method will + /// panic. + pub fn delete(&mut self, path: &str) -> bool { + if !path.starts_with('/') { + panic!("Paths must always start with a leading forward slash (`/`)."); + } + + self.with_mut_object(|map| { + // Split the path, and take the last element as the actual map key to write to. + let mut segments = path.split('/').collect::>(); + let key = segments + .pop_back() + .expect("Path cannot point directly to the root. Use `clear` instead."); + + // Iterate over the remaining elements, traversing into the root object one level at a + // time, based on using `token` as the map key. If there's no map at the given key, + // we'll create one. If there's something other than a map, we'll panic. + let mut destination = map; + while let Some(segment) = segments.pop_front() { + match destination.get_mut(segment) { + Some(Value::Object(ref mut next)) => { + destination = next; + continue; + } + Some(_) => panic!("Only leaf nodes should be allowed to be non-object values."), + // If the next segment doesn't exist, there's nothing for us to delete, so return `false`. + None => return false, + } + } + + destination.remove(key).is_some() + }) + } + + /// Gets whether or not a value at the given path. + /// + /// The path follows the form of `/part1/part/.../partN`, where each slash-separated segment + /// represents a nested object within the overall object hierarchy. For example, a path of + /// `/root/nested/key2` would map to the value "weee!" if applied against the following JSON + /// object: + /// + /// { "root": { "nested": { "key2": "weee!" } } } + /// + /// # Panics + /// + /// If the path does not start with a forward slash, this method will panic. + pub fn exists(&self, path: &str) -> bool { + if !path.starts_with('/') { + panic!("Paths must always start with a leading forward slash (`/`)."); + } + + // The root path always exists. + if path == "/" { + return true; + } + + self.root.pointer(path).is_some() + } + + /// Merges the data from `other` into `self`. + /// + /// Uses a "deep" merge strategy, which will recursively merge both objects together. This + /// strategy behaves as follows: + /// + /// - strings, booleans, integers, numbers, and nulls are "highest priority wins" (`self` has + /// highest priority) + /// - arrays are merged together without any deduplication, with the items from `self` appearing + /// first + /// - objects have their properties merged together, but if an overlapping property is + /// encountered: + /// - if it has the same type on both sides, the property is merged normally (using the + /// standard merge behavior) + /// - if it does not have the same type on both sides, the property value on the `self` side + /// takes precedence + /// + /// The only exception to the merge behavior above is if an overlapping object property does not + /// have the same type on both sides, but the type on the `self` side is an array. When the type + /// is an array, the value on the `other` side is appended to that array, regardless of the + /// contents of the array. + pub fn merge(&mut self, _other: Self) { + todo!() + } +} + +impl Default for RenderData { + fn default() -> Self { + Self { + root: Value::Object(Map::new()), + } + } +} + +pub struct SchemaRenderer<'a, T> { + querier: &'a SchemaQuerier, + schema: T, + data: RenderData, +} + +impl<'a, T> SchemaRenderer<'a, T> +where + T: QueryableSchema, +{ + pub fn new(querier: &'a SchemaQuerier, schema: T) -> Self { + Self { + querier, + schema, + data: RenderData::default(), + } + } + + pub fn render(self) -> Result { + let Self { + querier, + schema, + mut data, + } = self; + + // If a schema is hidden, then we intentionally do not want to render it. + if schema.has_flag_attribute(constants::DOCS_META_HIDDEN)? { + debug!("Schema is marked as hidden. Skipping rendering."); + + return Ok(data); + } + + // If a schema has an overridden type, we return some barebones render data. + if schema.has_flag_attribute(constants::DOCS_META_TYPE_OVERRIDE)? { + debug!("Schema has overridden type."); + + data.write("type", "blank"); + apply_schema_description(&schema, &mut data)?; + + return Ok(data); + } + + // Now that we've handled any special cases, attempt to render the schema. + render_bare_schema(querier, &schema, &mut data)?; + + // If the rendered schema represents an array schema, remove any description that is present + // for the schema of the array items themselves. We want the description of whatever object + // property that is using this array schema to be the one that is used. + // + // We just do this blindly because the control flow doesn't change depending on whether or + // not it's an array schema and we do or don't delete anything. + if data.delete("/type/array/items/description") { + debug!("Cleared description for items schema from top-level array schema."); + } + + // Apply any necessary defaults, descriptions, and so on, to the rendered schema. + // + // This must happen here because there could be callsite-specific overrides to default + // values/descriptions/etc which must take precedence, so that must occur after any nested + // rendering in order to maintain that precedence. + apply_schema_default_value(&schema, &mut data)?; + apply_schema_metadata(&schema, &mut data)?; + apply_schema_description(&schema, &mut data)?; + + Ok(data) + } +} + +fn render_bare_schema( + querier: &SchemaQuerier, + schema: T, + data: &mut RenderData, +) -> Result<(), RenderError> { + match schema.schema_type() { + SchemaType::AllOf(subschemas) => { + // Composite (`allOf`) schemas are indeed the sum of all of their parts, so render each + // subschema and simply merge the rendered subschemas together. + for subschema in subschemas { + let subschema_renderer = SchemaRenderer::new(querier, subschema); + let rendered_subschema = subschema_renderer.render()?; + data.merge(rendered_subschema); + } + } + SchemaType::OneOf(_subschemas) => {} + SchemaType::AnyOf(_subschemas) => {} + SchemaType::Constant(const_value) => { + // All we need to do is figure out the rendered type for the constant value, so we can + // generate the right type path and stick the constant value in it. + let rendered_const_type = get_rendered_value_type(&schema, const_value)?; + let const_type_path = format!("/type/{}/const", rendered_const_type); + data.write(const_type_path.as_str(), const_value.clone()); + } + SchemaType::Enum(enum_values) => { + // Similar to constant schemas, we just need to figure out the rendered type for each + // enum value, so that we can group them together and then write the grouped values to + // each of their respective type paths. + let mut type_map = HashMap::new(); + + for enum_value in enum_values { + let rendered_enum_type = get_rendered_value_type(&schema, enum_value)?; + let type_group_entry = type_map.entry(rendered_enum_type).or_insert_with(Vec::new); + type_group_entry.push(enum_value.clone()); + } + + let structured_type_map = type_map + .into_iter() + .map(|(key, values)| { + let mut nested = Map::new(); + nested.insert("enum".into(), Value::Array(values)); + + (key, Value::Object(nested)) + }) + .collect::>(); + + data.write("/type", structured_type_map); + } + SchemaType::Typed(_instance_types) => { + // TODO: Technically speaking, we could have multiple instance types declared here, + // which is _entirely_ valid for JSON Schema. The trick is simply that we'll likely want + // to do something equivalent to how we handle composite schemas where we just render + // the schema in the context of each instance type, and then merge that rendered data + // together. + // + // This means that we'll need another render method that operates on a schema + instance + // type basis, since trying to do it all in `render_bare_schema` would get ugly fast. + // + // Practically, all of this is fine for regular ol' data types because they don't + // intersect, but the tricky bit would be if we encountered the null instance type. It's + // a real/valid data type, but the main problem is that there's nothing that really + // makes sense to do with it. + // + // An object property, for example, that can be X or null, is essentially an optional + // field. We handle that by including, or excluding, that property from the object's + // required fields, which is specific to object. + // + // The only real world scenario where we would theoretically hit that is for an untagged + // enum, as a unit variant in an untagged enum is represented by `null` in JSON, in + // terms of its serialized value. _However_, we only generate enums as `oneOf`/`anyOf` + // schemas, so the `null` instance type should only ever show up by itself. + // + // Long story short, we can likely have a hard-coded check that rejects any "X or null" + // instance type groupings, knowing that _we_ never generate schemas like that, but it's + // still technically possible in a real-world JSON Schema document... so we should at + // least make the error message half-way decent so that it explains as much. + todo!() + } + } + + Ok(()) +} + +fn apply_schema_default_value( + _schema: T, + _data: &mut RenderData, +) -> Result<(), RenderError> { + Ok(()) +} + +fn apply_schema_metadata( + schema: T, + data: &mut RenderData, +) -> Result<(), RenderError> { + // If the schema is marked as being templateable, update the syntax of the string type field to + // use the special `template` sentinel value, which drives template-specific logic during the + // documentation generation phase. + if schema.has_flag_attribute(constants::DOCS_META_TEMPLATEABLE)? && data.exists("/type/string") + { + data.write("/type/string/syntax", "template"); + } + + // TODO: Add examples. + // TODO: Add units. + // TODO: Syntax override. + + Ok(()) +} + +fn apply_schema_description( + schema: T, + data: &mut RenderData, +) -> Result<(), RenderError> { + if let Some(description) = render_schema_description(schema)? { + data.write("/description", description); + } + + Ok(()) +} + +fn get_rendered_value_type( + _schema: T, + _value: &Value, +) -> Result { + todo!() +} + +fn render_schema_description(schema: T) -> Result, RenderError> { + let maybe_title = schema.title(); + let maybe_description = schema.description(); + + match (maybe_title, maybe_description) { + (Some(_title), None) => Err(RenderError::Failed { + reason: "a schema should never have a title without a description".into(), + }), + (None, None) => Ok(None), + (None, Some(description)) => Ok(Some(description.trim().to_string())), + (Some(title), Some(description)) => { + let concatenated = format!("{}\n\n{}", title, description); + Ok(Some(concatenated.trim().to_string())) + } + } +} diff --git a/lib/vector-config-common/src/attributes.rs b/lib/vector-config-common/src/attributes.rs index 0b6f60b45077f..8cc81136e2cd8 100644 --- a/lib/vector-config-common/src/attributes.rs +++ b/lib/vector-config-common/src/attributes.rs @@ -47,4 +47,12 @@ impl CustomAttribute { value: serde_json::to_value(value).expect("should not fail to serialize value to JSON"), } } + + pub const fn is_flag(&self) -> bool { + matches!(self, Self::Flag(_)) + } + + pub const fn is_kv(&self) -> bool { + matches!(self, Self::KeyValue { .. }) + } } diff --git a/lib/vector-config-common/src/constants.rs b/lib/vector-config-common/src/constants.rs new file mode 100644 index 0000000000000..c7a06a0d3a6ec --- /dev/null +++ b/lib/vector-config-common/src/constants.rs @@ -0,0 +1,90 @@ +use serde_json::Value; +use syn::Path; + +pub const COMPONENT_TYPE_ENRICHMENT_TABLE: &str = "enrichment_table"; +pub const COMPONENT_TYPE_PROVIDER: &str = "provider"; +pub const COMPONENT_TYPE_SECRETS: &str = "secrets"; +pub const COMPONENT_TYPE_SINK: &str = "sink"; +pub const COMPONENT_TYPE_SOURCE: &str = "source"; +pub const COMPONENT_TYPE_TRANSFORM: &str = "transform"; +pub const DOCS_META_ADDITIONAL_PROPS_DESC: &str = "docs::additional_props_description"; +pub const DOCS_META_ADVANCED: &str = "docs::advanced"; +pub const DOCS_META_COMPONENT_BASE_TYPE: &str = "docs::component_base_type"; +pub const DOCS_META_COMPONENT_NAME: &str = "docs::component_name"; +pub const DOCS_META_COMPONENT_TYPE: &str = "docs::component_type"; +pub const DOCS_META_ENUM_CONTENT_FIELD: &str = "docs::enum_content_field"; +pub const DOCS_META_ENUM_TAG_DESCRIPTION: &str = "docs::enum_tag_description"; +pub const DOCS_META_ENUM_TAG_FIELD: &str = "docs::enum_tag_field"; +pub const DOCS_META_ENUM_TAGGING: &str = "docs::enum_tagging"; +pub const DOCS_META_EXAMPLES: &str = "docs::examples"; +pub const DOCS_META_HIDDEN: &str = "docs::hidden"; +pub const DOCS_META_LABEL: &str = "docs::label"; +pub const DOCS_META_NUMERIC_TYPE: &str = "docs::numeric_type"; +pub const DOCS_META_OPTIONAL: &str = "docs::optional"; +pub const DOCS_META_SYNTAX_OVERRIDE: &str = "docs::syntax_override"; +pub const DOCS_META_TEMPLATEABLE: &str = "docs::templateable"; +pub const DOCS_META_TYPE_OVERRIDE: &str = "docs::type_override"; +pub const DOCS_META_TYPE_UNIT: &str = "docs::type_unit"; +pub const METADATA: &str = "_metadata"; + +/// Well-known component types. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum ComponentType { + EnrichmentTable, + Provider, + Secrets, + Sink, + Source, + Transform, +} + +impl ComponentType { + /// Gets the type of this component as a string. + pub const fn as_str(&self) -> &'static str { + match self { + ComponentType::EnrichmentTable => COMPONENT_TYPE_ENRICHMENT_TABLE, + ComponentType::Provider => COMPONENT_TYPE_PROVIDER, + ComponentType::Secrets => COMPONENT_TYPE_SECRETS, + ComponentType::Sink => COMPONENT_TYPE_SINK, + ComponentType::Source => COMPONENT_TYPE_SOURCE, + ComponentType::Transform => COMPONENT_TYPE_TRANSFORM, + } + } + + pub fn is_valid_type(path: &Path) -> bool { + ComponentType::try_from(path).is_ok() + } +} + +impl<'a> TryFrom<&'a str> for ComponentType { + type Error = (); + + fn try_from(value: &'a str) -> Result { + match value { + COMPONENT_TYPE_ENRICHMENT_TABLE => Ok(ComponentType::EnrichmentTable), + COMPONENT_TYPE_PROVIDER => Ok(ComponentType::Provider), + COMPONENT_TYPE_SECRETS => Ok(ComponentType::Secrets), + COMPONENT_TYPE_SINK => Ok(ComponentType::Sink), + COMPONENT_TYPE_SOURCE => Ok(ComponentType::Source), + COMPONENT_TYPE_TRANSFORM => Ok(ComponentType::Transform), + _ => Err(()), + } + } +} + +impl<'a> TryFrom<&'a Path> for ComponentType { + type Error = (); + + fn try_from(path: &'a Path) -> Result { + path.get_ident() + .ok_or(()) + .map(|id| id.to_string()) + .and_then(|s| Self::try_from(s.as_str())) + } +} + +impl From<&ComponentType> for Value { + fn from(value: &ComponentType) -> Self { + Value::String(value.as_str().to_string()) + } +} diff --git a/lib/vector-config-common/src/lib.rs b/lib/vector-config-common/src/lib.rs index 37f1c2f6d794a..35286bb11036c 100644 --- a/lib/vector-config-common/src/lib.rs +++ b/lib/vector-config-common/src/lib.rs @@ -12,6 +12,7 @@ #![deny(warnings)] pub mod attributes; +pub mod constants; pub mod num; pub mod schema; pub mod validation; diff --git a/lib/vector-config-macros/src/ast/mod.rs b/lib/vector-config-macros/src/ast/mod.rs index aa1fa568e5dcf..7688d6f2df511 100644 --- a/lib/vector-config-macros/src/ast/mod.rs +++ b/lib/vector-config-macros/src/ast/mod.rs @@ -11,6 +11,7 @@ pub use container::Container; pub use field::Field; use syn::{Expr, NestedMeta}; pub use variant::Variant; +use vector_config_common::constants; const INVALID_VALUE_EXPR: &str = "got function call-style literal value but could not parse as expression"; @@ -86,17 +87,23 @@ impl Tagging { /// purpose of generating usable documentation from the schema. pub fn as_enum_metadata(&self) -> Vec { match self { - Self::External => vec![LazyCustomAttribute::kv("docs::enum_tagging", "external")], + Self::External => vec![LazyCustomAttribute::kv( + constants::DOCS_META_ENUM_TAGGING, + "external", + )], Self::Internal { tag } => vec![ - LazyCustomAttribute::kv("docs::enum_tagging", "internal"), - LazyCustomAttribute::kv("docs::enum_tag_field", tag), + LazyCustomAttribute::kv(constants::DOCS_META_ENUM_TAGGING, "internal"), + LazyCustomAttribute::kv(constants::DOCS_META_ENUM_TAG_FIELD, tag), ], Self::Adjacent { tag, content } => vec![ - LazyCustomAttribute::kv("docs::enum_tagging", "adjacent"), - LazyCustomAttribute::kv("docs::enum_tag_field", tag), - LazyCustomAttribute::kv("docs::enum_content_field", content), + LazyCustomAttribute::kv(constants::DOCS_META_ENUM_TAGGING, "adjacent"), + LazyCustomAttribute::kv(constants::DOCS_META_ENUM_TAG_FIELD, tag), + LazyCustomAttribute::kv(constants::DOCS_META_ENUM_CONTENT_FIELD, content), ], - Self::None => vec![LazyCustomAttribute::kv("docs::enum_tagging", "untagged")], + Self::None => vec![LazyCustomAttribute::kv( + constants::DOCS_META_ENUM_TAGGING, + "untagged", + )], } } } diff --git a/lib/vector-config-macros/src/attrs.rs b/lib/vector-config-macros/src/attrs.rs index 00873061c82ef..0348b693d50c0 100644 --- a/lib/vector-config-macros/src/attrs.rs +++ b/lib/vector-config-macros/src/attrs.rs @@ -10,12 +10,6 @@ impl AttributeIdent { } } -pub const ENRICHMENT_TABLE: AttributeIdent = AttributeIdent("enrichment_table"); -pub const PROVIDER: AttributeIdent = AttributeIdent("provider"); -pub const SECRETS: AttributeIdent = AttributeIdent("secrets"); -pub const SINK: AttributeIdent = AttributeIdent("sink"); -pub const SOURCE: AttributeIdent = AttributeIdent("source"); -pub const TRANSFORM: AttributeIdent = AttributeIdent("transform"); pub const NO_SER: AttributeIdent = AttributeIdent("no_ser"); pub const NO_DESER: AttributeIdent = AttributeIdent("no_deser"); pub const ENRICHMENT_TABLE_COMPONENT: AttributeIdent = AttributeIdent("enrichment_table_component"); diff --git a/lib/vector-config-macros/src/configurable_component.rs b/lib/vector-config-macros/src/configurable_component.rs index 16a3c0664d0b7..59d72264fe85f 100644 --- a/lib/vector-config-macros/src/configurable_component.rs +++ b/lib/vector-config-macros/src/configurable_component.rs @@ -7,83 +7,10 @@ use syn::{ parse_macro_input, parse_quote, parse_quote_spanned, punctuated::Punctuated, spanned::Spanned, token::Comma, AttributeArgs, DeriveInput, Lit, LitStr, Meta, MetaList, NestedMeta, Path, }; +use vector_config_common::constants::ComponentType; use crate::attrs; -#[derive(Clone, Debug)] -enum ComponentType { - EnrichmentTable, - Provider, - Secrets, - Sink, - Source, - Transform, -} - -impl ComponentType { - /// Gets the ident of the component type-specific helper attribute for the `NamedComponent` derive. - /// - /// When we emit code for a configurable item that has been marked as a typed component, we - /// optionally emit the code to generate an implementation of `NamedComponent` if that component - /// is supposed to be named. - /// - /// This function returns the appropriate ident for the helper attribute specific to the - /// component, as we must pass the component type being named -- source vs transform, etc -- - /// down to the derive for `NamedComponent`. This allows it to emit error messages that _look_ - /// like they're coming from `configurable_component`, even though they're coming from the - /// derive for `NamedComponent`. - fn get_named_component_helper_ident(&self) -> Ident { - let attr = match self { - ComponentType::EnrichmentTable => attrs::ENRICHMENT_TABLE_COMPONENT, - ComponentType::Provider => attrs::PROVIDER_COMPONENT, - ComponentType::Secrets => attrs::SECRETS_COMPONENT, - ComponentType::Sink => attrs::SINK_COMPONENT, - ComponentType::Source => attrs::SOURCE_COMPONENT, - ComponentType::Transform => attrs::TRANSFORM_COMPONENT, - }; - - attr.as_ident(Span::call_site()) - } - - fn is_valid_type(path: &Path) -> bool { - ComponentType::try_from(path).is_ok() - } - - /// Gets the type of this component as a string. - fn as_str(&self) -> &'static str { - match self { - ComponentType::EnrichmentTable => "enrichment_table", - ComponentType::Provider => "provider", - ComponentType::Secrets => "secrets", - ComponentType::Sink => "sink", - ComponentType::Source => "source", - ComponentType::Transform => "transform", - } - } -} - -impl<'a> TryFrom<&'a Path> for ComponentType { - type Error = (); - - fn try_from(path: &'a Path) -> Result { - if path == attrs::ENRICHMENT_TABLE { - Ok(Self::EnrichmentTable) - } else if path == attrs::PROVIDER { - Ok(Self::Provider) - } else if path == attrs::SECRETS { - Ok(Self::Secrets) - } else if path == attrs::SINK { - Ok(Self::Sink) - } else if path == attrs::SOURCE { - Ok(Self::Source) - } else if path == attrs::TRANSFORM { - Ok(Self::Transform) - } else { - Err(()) - } - } -} - #[derive(Clone, Debug)] struct TypedComponent { span: Span, @@ -197,7 +124,7 @@ impl TypedComponent { /// Creates the component name registration code. fn get_component_name_registration(&self) -> proc_macro2::TokenStream { - let helper_attr = self.component_type.get_named_component_helper_ident(); + let helper_attr = get_named_component_helper_ident(self.component_type); match self.component_name.as_ref() { None => quote_spanned! {self.span=> #[derive(::vector_config_macros::NamedComponent)] @@ -411,3 +338,27 @@ fn capitalize(s: &str) -> String { fn capitalize_words(s: &str) -> String { s.split('_').map(capitalize).join(" ") } + +/// Gets the ident of the component type-specific helper attribute for the `NamedComponent` derive. +/// +/// When we emit code for a configurable item that has been marked as a typed component, we +/// optionally emit the code to generate an implementation of `NamedComponent` if that component +/// is supposed to be named. +/// +/// This function returns the appropriate ident for the helper attribute specific to the +/// component, as we must pass the component type being named -- source vs transform, etc -- +/// down to the derive for `NamedComponent`. This allows it to emit error messages that _look_ +/// like they're coming from `configurable_component`, even though they're coming from the +/// derive for `NamedComponent`. +fn get_named_component_helper_ident(component_type: ComponentType) -> Ident { + let attr = match component_type { + ComponentType::EnrichmentTable => attrs::ENRICHMENT_TABLE_COMPONENT, + ComponentType::Provider => attrs::PROVIDER_COMPONENT, + ComponentType::Secrets => attrs::SECRETS_COMPONENT, + ComponentType::Sink => attrs::SINK_COMPONENT, + ComponentType::Source => attrs::SOURCE_COMPONENT, + ComponentType::Transform => attrs::TRANSFORM_COMPONENT, + }; + + attr.as_ident(Span::call_site()) +} diff --git a/lib/vector-config/Cargo.toml b/lib/vector-config/Cargo.toml index c16e17a6e854b..05df325556fee 100644 --- a/lib/vector-config/Cargo.toml +++ b/lib/vector-config/Cargo.toml @@ -18,6 +18,7 @@ indexmap = { version = "1.9", default-features = false } inventory = { version = "0.3" } no-proxy = { version = "0.3.1", default-features = false, features = ["serialize"] } num-traits = { version = "0.2.15", default-features = false } +once_cell = { version = "1", default-features = false } serde = { version = "1.0", default-features = false } serde_json = { version = "1.0", default-features = false, features = ["std"] } serde_with = { version = "2.3.2", default-features = false, features = ["std"] } diff --git a/lib/vector-config/src/component/description.rs b/lib/vector-config/src/component/description.rs index 432d7d316f2a8..df18fa919f612 100644 --- a/lib/vector-config/src/component/description.rs +++ b/lib/vector-config/src/component/description.rs @@ -2,7 +2,7 @@ use std::{cell::RefCell, marker::PhantomData}; use snafu::Snafu; use toml::Value; -use vector_config_common::attributes::CustomAttribute; +use vector_config_common::{attributes::CustomAttribute, constants}; use super::{ComponentMarker, GenerateConfig}; use crate::schema::{SchemaGenerator, SchemaObject}; @@ -119,7 +119,8 @@ where let mut variant_metadata = Metadata::default(); variant_metadata.set_description(self.description); - variant_metadata.add_custom_attribute(CustomAttribute::kv("docs::label", self.label)); + variant_metadata + .add_custom_attribute(CustomAttribute::kv(constants::DOCS_META_LABEL, self.label)); variant_metadata .add_custom_attribute(CustomAttribute::kv("logical_name", self.logical_name)); schema::apply_base_metadata(&mut subschema, variant_metadata); diff --git a/lib/vector-config/src/external/datetime.rs b/lib/vector-config/src/external/datetime.rs index 8f49b88362daa..341df8076a950 100644 --- a/lib/vector-config/src/external/datetime.rs +++ b/lib/vector-config/src/external/datetime.rs @@ -8,7 +8,7 @@ use crate::{ use chrono_tz::Tz; use serde_json::Value; use std::cell::RefCell; -use vector_config_common::attributes::CustomAttribute; +use vector_config_common::{attributes::CustomAttribute, constants}; use vrl_core::TimeZone; // TODO: Consider an approach for generating schema of "fixed string value, or remainder" structure @@ -24,10 +24,19 @@ impl Configurable for TimeZone { metadata.set_description(r#"This can refer to any valid timezone as defined in the [TZ database][tzdb], or "local" which refers to the system local timezone. [tzdb]: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones"#); - metadata.add_custom_attribute(CustomAttribute::kv("docs::enum_tagging", "untagged")); - metadata.add_custom_attribute(CustomAttribute::kv("docs::examples", "local")); - metadata.add_custom_attribute(CustomAttribute::kv("docs::examples", "America/New_York")); - metadata.add_custom_attribute(CustomAttribute::kv("docs::examples", "EST5EDT")); + metadata.add_custom_attribute(CustomAttribute::kv( + constants::DOCS_META_ENUM_TAGGING, + "untagged", + )); + metadata.add_custom_attribute(CustomAttribute::kv(constants::DOCS_META_EXAMPLES, "local")); + metadata.add_custom_attribute(CustomAttribute::kv( + constants::DOCS_META_EXAMPLES, + "America/New_York", + )); + metadata.add_custom_attribute(CustomAttribute::kv( + constants::DOCS_META_EXAMPLES, + "EST5EDT", + )); metadata } diff --git a/lib/vector-config/src/external/serde_with.rs b/lib/vector-config/src/external/serde_with.rs index 96e7aeba0f93f..05de17e3bf605 100644 --- a/lib/vector-config/src/external/serde_with.rs +++ b/lib/vector-config/src/external/serde_with.rs @@ -1,6 +1,6 @@ use std::cell::RefCell; -use vector_config_common::attributes::CustomAttribute; +use vector_config_common::{attributes::CustomAttribute, constants}; use crate::{ num::NumberClass, @@ -59,10 +59,13 @@ impl Configurable for serde_with::DurationSeconds, + }, +} + +impl SchemaError { + pub fn invalid_component_schema>>( + pointer: &'static str, + reason: S, + ) -> Self { + Self::InvalidComponentSchema { + pointer, + reason: reason.into(), + } + } +} + +/// A schema object that represents the schema of a single Vector component. +/// +/// The schema represents the equivalent of the component's configuration type, excluding any common +/// configuration fields that appear on a per-component type basis. This means that, for a sink +/// component, this schema would include the configuration fields of the specific sink component, +/// but wouldn't contain the common sink configuration fields such as `inputs` or `buffer`. +pub struct ComponentSchema<'a> { + schema: &'a SchemaObject, + component_name: String, + component_type: ComponentType, +} + +impl<'a> ComponentSchema<'a> { + /// The type of the component represented by this schema. + pub fn component_type(&self) -> ComponentType { + self.component_type + } + + /// The name of the component represented by this schema. + /// + /// This refers to the configuration-specific identifier used to specify the component type + /// within the `type` field. + /// + /// For example, the AWS S3 sink would be `aws_s3`. + pub fn component_name(&self) -> &str { + &self.component_name + } +} + +impl<'a> QueryableSchema for ComponentSchema<'a> { + fn schema_type(&self) -> SchemaType { + self.schema.schema_type() + } + + fn description(&self) -> Option<&str> { + self.schema.description() + } + + fn title(&self) -> Option<&str> { + self.schema.title() + } + + fn get_attributes(&self, key: &str) -> Option> { + self.schema.get_attributes(key) + } + + fn get_attribute(&self, key: &str) -> Result, QueryError> { + self.schema.get_attribute(key) + } + + fn has_flag_attribute(&self, key: &str) -> Result { + self.schema.has_flag_attribute(key) + } +} + +impl<'a> TryFrom> for ComponentSchema<'a> { + type Error = SchemaError; + + fn try_from(value: SimpleSchema<'a>) -> Result { + // Component schemas must have a component type _and_ component name defined. + let component_type = + get_component_metadata_kv_str(&value, constants::DOCS_META_COMPONENT_TYPE).and_then( + |s| { + ComponentType::try_from(s.as_str()).map_err(|_| { + SchemaError::invalid_component_schema( + constants::DOCS_META_COMPONENT_TYPE, + "value was not a valid component type", + ) + }) + }, + )?; + + let component_name = + get_component_metadata_kv_str(&value, constants::DOCS_META_COMPONENT_NAME)?; + + Ok(Self { + schema: value.into_inner(), + component_name, + component_type, + }) + } +} + +fn get_component_metadata_kv_str<'a>( + schema: &'a SimpleSchema<'a>, + key: &'static str, +) -> Result { + schema + .get_attribute(key) + .map_err(|e| SchemaError::invalid_component_schema(key, e.to_string()))? + .ok_or_else(|| SchemaError::invalid_component_schema(key, "attribute must be present")) + .and_then(|attr| match attr { + CustomAttribute::Flag(_) => Err(SchemaError::invalid_component_schema( + key, + "expected key/value attribute, got flag instead", + )), + CustomAttribute::KeyValue { value, .. } => Ok(value), + }) + .and_then(|v| match v { + Value::String(name) => Ok(name), + _ => Err(SchemaError::invalid_component_schema( + key, + format!("`{}` must be a string", key), + )), + }) +} diff --git a/lib/vector-config/src/schema/parser/mod.rs b/lib/vector-config/src/schema/parser/mod.rs new file mode 100644 index 0000000000000..423ef678b2f61 --- /dev/null +++ b/lib/vector-config/src/schema/parser/mod.rs @@ -0,0 +1,2 @@ +pub mod component; +pub mod query; diff --git a/lib/vector-config/src/schema/parser/query.rs b/lib/vector-config/src/schema/parser/query.rs new file mode 100644 index 0000000000000..a2e719353b08c --- /dev/null +++ b/lib/vector-config/src/schema/parser/query.rs @@ -0,0 +1,435 @@ +use std::{fs::File, io::BufReader, path::Path}; + +use once_cell::sync::OnceCell; +use serde_json::Value; +use snafu::Snafu; +use vector_config_common::{ + attributes::CustomAttribute, + constants, + schema::{InstanceType, RootSchema, Schema, SchemaObject, SingleOrVec}, +}; + +#[derive(Debug, Snafu)] +#[snafu(module, context(suffix(false)))] +pub enum QueryError { + #[snafu(display("I/O error during opening schema: {source}"), context(false))] + Io { source: std::io::Error }, + + #[snafu(display("deserialization failed: {source}"), context(false))] + Deserialization { source: serde_json::Error }, + + #[snafu(display("no schemas matched the query"))] + NoMatches, + + #[snafu(display("multiple schemas matched the query ({len})"))] + MultipleMatches { len: usize }, + + #[snafu(display("found matching attribute but was not a flag"))] + AttributeNotFlag, + + #[snafu(display( + "found matching attribute but expected single value; multiple values present" + ))] + AttributeMultipleValues, +} + +pub struct SchemaQuerier { + schema: RootSchema, +} + +impl SchemaQuerier { + /// Creates a `SchemaQuerier` based on the schema file located at `schema_path`. + /// + /// # Errors + /// + /// If no file exists at the given schema path, or there is an I/O error during loading the file + /// (permissions, etc), then an error variant will be returned. + /// + /// If the file does not contain valid JSON, or cannot be deserialized as a schema, then an + /// error variant will be returned. + pub fn from_schema>(schema_path: P) -> Result { + let reader = File::open(schema_path).map(BufReader::new)?; + let schema = serde_json::from_reader(reader)?; + + Ok(Self { schema }) + } + + pub fn query(&self) -> SchemaQueryBuilder<'_> { + SchemaQueryBuilder::from_schema(&self.schema) + } +} + +/// A query builder for querying against a root schema. +/// +/// All constraints are applied in a boolean AND fashion. +pub struct SchemaQueryBuilder<'a> { + schema: &'a RootSchema, + attributes: Vec, +} + +impl<'a> SchemaQueryBuilder<'a> { + fn from_schema(schema: &'a RootSchema) -> Self { + Self { + schema, + attributes: Vec::new(), + } + } + + /// Adds a constraint on the given custom attribute key/value. + /// + /// Can be used multiple times to match schemas against multiple attributes. + /// + /// Custom attributes are strongly matched: a flag attribute can only match a flag attribute, + /// not a key/value attribute, and vice versa. For key/value attributes where the attribute in + /// the schema itself has multiple values, the schema is considered a match so long as it + /// contains the value specified in the query. + pub fn with_custom_attribute_kv(mut self, key: K, value: V) -> Self + where + K: Into, + V: Into, + { + self.attributes.push(CustomAttribute::KeyValue { + key: key.into(), + value: value.into(), + }); + self + } + + /// Executes the query, returning all matching schemas. + pub fn run(self) -> Vec> { + let mut matches = Vec::new(); + + // Search through all defined schemas. + 'schema: for schema_definition in self.schema.definitions.values() { + match schema_definition { + // We don't match against boolean schemas because there's nothing to match against. + Schema::Bool(_) => continue, + Schema::Object(schema_object) => { + // If we have custom attribute matches defined, but the schema has no metadata, + // it's not possible for it to match, so just bail out early. + let has_attribute_matchers = !self.attributes.is_empty(); + let schema_metadata = schema_object.extensions.get(constants::METADATA); + if has_attribute_matchers && schema_metadata.is_none() { + continue 'schema; + } + + if let Some(Value::Object(schema_attributes)) = schema_metadata { + for self_attribute in &self.attributes { + let attr_matched = match self_attribute { + CustomAttribute::Flag(key) => schema_attributes + .get(key) + .map_or(false, |value| matches!(value, Value::Bool(true))), + CustomAttribute::KeyValue { + key, + value: attr_value, + } => { + schema_attributes + .get(key) + .map_or(false, |value| match value { + // Check string values directly. + Value::String(schema_attr_value) => { + schema_attr_value == attr_value + } + // For arrays, try and convert each item to a string, and + // for the values that are strings, see if they match. + Value::Array(schema_attr_values) => { + schema_attr_values.iter().any(|value| { + value + .as_str() + .map_or(false, |s| s == attr_value) + }) + } + _ => false, + }) + } + }; + + if !attr_matched { + continue 'schema; + } + } + } + + matches.push(schema_object.into()); + } + } + } + + matches + } + + /// Executes the query, returning a single matching schema. + /// + /// # Errors + /// + /// If no schemas match, or more than one schema matches, then an error variant will be + /// returned. + pub fn run_single(self) -> Result, QueryError> { + let mut matches = self.run(); + match matches.len() { + 0 => Err(QueryError::NoMatches), + 1 => Ok(matches.remove(0)), + len => Err(QueryError::MultipleMatches { len }), + } + } +} + +pub enum OneOrMany { + One(T), + Many(Vec), +} + +pub enum SchemaType<'a> { + /// A set of subschemas in which all must match. + /// + /// Referred to as an `allOf` schema in JSON Schema. + /// + /// For a given input, the input is only valid if it is valid against all specified subschemas. + AllOf(Vec>), + + /// A set of subschemas in which only one must match. + /// + /// Referred to as a `oneOf` schema in JSON Schema. + /// + /// For a given input, the input is only valid if it is valid against exactly one of the + /// specified subschemas. + OneOf(Vec>), + + /// A set of subschemas in which at least one must match. + /// + /// Referred to as a `anyOf` schema in JSON Schema. + /// + /// For a given input, the input is only valid if it is valid against at least one of the + /// specified subschemas. + AnyOf(Vec>), + + /// A schema that matches a well-known, constant value. + /// + /// Referred to by the `const` field in JSON Schema. + /// + /// For a given input, the input is only valid if it matches the value specified by `const` + /// exactly. The value can be any valid JSON value. + Constant(&'a Value), + + /// A schema that matches one of many well-known, constant values. + /// + /// Referred to by the `enum` field in JSON Schema. + /// + /// For a given input, the input is only valid if it matches one of the values specified by + /// `enum` exactly. The values can be any valid JSON value. + Enum(&'a Vec), + + /// A typed schema that matches a JSON data type. + /// + /// Referred to by the `type` field in JSON Schema. + /// + /// For a given input, the input is only valid if it is the same type as one of the types + /// specified by `type`. A schema can allow multiple data types. + Typed(OneOrMany), +} + +pub trait QueryableSchema { + fn schema_type(&self) -> SchemaType; + fn description(&self) -> Option<&str>; + fn title(&self) -> Option<&str>; + fn get_attributes(&self, key: &str) -> Option>; + fn get_attribute(&self, key: &str) -> Result, QueryError>; + fn has_flag_attribute(&self, key: &str) -> Result; +} + +impl<'a, T> QueryableSchema for &'a T +where + T: QueryableSchema, +{ + fn schema_type(&self) -> SchemaType { + (*self).schema_type() + } + + fn description(&self) -> Option<&str> { + (*self).description() + } + + fn title(&self) -> Option<&str> { + (*self).title() + } + + fn get_attributes(&self, key: &str) -> Option> { + (*self).get_attributes(key) + } + + fn get_attribute(&self, key: &str) -> Result, QueryError> { + (*self).get_attribute(key) + } + + fn has_flag_attribute(&self, key: &str) -> Result { + (*self).has_flag_attribute(key) + } +} + +impl<'a> QueryableSchema for &'a SchemaObject { + fn schema_type(&self) -> SchemaType { + // TODO: Technically speaking, it is allowed to use the "X of" schema types in conjunction + // with other schema types i.e. `allOf` in conjunction with specifying a `type`. + // + // Right now, the configuration schema codegen should not actually be emitting anything like + // this, so our logic below is written against what we generate, not against what is + // technically possible. This _may_ need to change in the future if we end up using any "X + // of" schema composition mechanisms for richer validation (i.e. sticking special validation + // logic in various subschemas under `allOf`, while defining the main data schema via + // `type`, etc.) + if let Some(subschemas) = self.subschemas.as_ref() { + // Of all the possible "subschema" validation mechanism, we only support `allOf` and + // `oneOf`, based on what the configuration schema codegen will spit out. + if let Some(all_of) = subschemas.all_of.as_ref() { + return SchemaType::AllOf(all_of.iter().map(schema_to_simple_schema).collect()); + } else if let Some(one_of) = subschemas.one_of.as_ref() { + return SchemaType::OneOf(one_of.iter().map(schema_to_simple_schema).collect()); + } else if let Some(any_of) = subschemas.any_of.as_ref() { + return SchemaType::AnyOf(any_of.iter().map(schema_to_simple_schema).collect()); + } else { + panic!("Encountered schema with subschema validation that wasn't one of the supported types: allOf, oneOf, anyOf."); + } + } + + if let Some(instance_types) = self.instance_type.as_ref() { + return match instance_types { + SingleOrVec::Single(single) => SchemaType::Typed(OneOrMany::One(*single.clone())), + SingleOrVec::Vec(many) => SchemaType::Typed(OneOrMany::Many(many.clone())), + }; + } + + if let Some(const_value) = self.const_value.as_ref() { + return SchemaType::Constant(const_value); + } + + if let Some(enum_values) = self.enum_values.as_ref() { + return SchemaType::Enum(enum_values); + } + + panic!("Schema type was not able to be detected!"); + } + + fn description(&self) -> Option<&str> { + self.metadata + .as_ref() + .and_then(|metadata| metadata.description.as_deref()) + } + + fn title(&self) -> Option<&str> { + self.metadata + .as_ref() + .and_then(|metadata| metadata.title.as_deref()) + } + + fn get_attributes(&self, key: &str) -> Option> { + self.extensions.get(constants::METADATA) + .map(|metadata| match metadata { + Value::Object(attributes) => attributes, + _ => panic!("Found metadata extension in schema that was not of type 'object'."), + }) + .and_then(|attributes| attributes.get(key)) + .map(|attribute| match attribute { + Value::Bool(b) => match b { + true => OneOrMany::One(CustomAttribute::flag(key)), + false => panic!("Custom attribute flags should never be false."), + }, + Value::String(s) => OneOrMany::One(CustomAttribute::kv(key, s)), + Value::Array(values) => { + let mapped = values.iter() + .map(|value| if let Value::String(s) = value { + CustomAttribute::kv(key, s) + } else { + panic!("Custom attribute key/value pair had array of values with a non-string value.") + }) + .collect(); + OneOrMany::Many(mapped) + }, + _ => panic!("Custom attribute had unexpected non-flag/non-KV value."), + }) + } + + fn get_attribute(&self, key: &str) -> Result, QueryError> { + self.get_attributes(key) + .map(|attrs| match attrs { + OneOrMany::One(attr) => Ok(attr), + OneOrMany::Many(_) => Err(QueryError::AttributeMultipleValues), + }) + .transpose() + } + + fn has_flag_attribute(&self, key: &str) -> Result { + self.get_attribute(key) + .and_then(|maybe_attr| match maybe_attr { + None => Ok(false), + Some(attr) => { + if attr.is_flag() { + Ok(true) + } else { + Err(QueryError::AttributeNotFlag) + } + } + }) + } +} + +pub struct SimpleSchema<'a> { + schema: &'a SchemaObject, +} + +impl<'a> SimpleSchema<'a> { + pub fn into_inner(self) -> &'a SchemaObject { + self.schema + } +} + +impl<'a> From<&'a SchemaObject> for SimpleSchema<'a> { + fn from(schema: &'a SchemaObject) -> Self { + Self { schema } + } +} + +impl<'a> QueryableSchema for SimpleSchema<'a> { + fn schema_type(&self) -> SchemaType { + self.schema.schema_type() + } + + fn description(&self) -> Option<&str> { + self.schema.description() + } + + fn title(&self) -> Option<&str> { + self.schema.title() + } + + fn get_attributes(&self, key: &str) -> Option> { + self.schema.get_attributes(key) + } + + fn get_attribute(&self, key: &str) -> Result, QueryError> { + self.schema.get_attribute(key) + } + + fn has_flag_attribute(&self, key: &str) -> Result { + self.schema.has_flag_attribute(key) + } +} + +fn schema_to_simple_schema(schema: &Schema) -> SimpleSchema<'_> { + static TRUE_SCHEMA_OBJECT: OnceCell = OnceCell::new(); + static FALSE_SCHEMA_OBJECT: OnceCell = OnceCell::new(); + + let schema_object = match schema { + Schema::Bool(bool) => { + if *bool { + TRUE_SCHEMA_OBJECT.get_or_init(|| Schema::Bool(true).into_object()) + } else { + FALSE_SCHEMA_OBJECT.get_or_init(|| Schema::Bool(false).into_object()) + } + } + Schema::Object(object) => object, + }; + + SimpleSchema { + schema: schema_object, + } +} diff --git a/lib/vector-config/src/stdlib.rs b/lib/vector-config/src/stdlib.rs index e0697e582d32b..7958c42d62609 100644 --- a/lib/vector-config/src/stdlib.rs +++ b/lib/vector-config/src/stdlib.rs @@ -13,7 +13,7 @@ use std::{ use indexmap::IndexMap; use serde_json::{Number, Value}; -use vector_config_common::{attributes::CustomAttribute, validation::Validation}; +use vector_config_common::{attributes::CustomAttribute, constants, validation::Validation}; use crate::{ num::ConfigurableNumber, @@ -141,8 +141,10 @@ macro_rules! impl_configurable_numeric { fn metadata() -> Metadata { let mut metadata = Metadata::with_transparent(true); let numeric_type = ::class(); - metadata - .add_custom_attribute(CustomAttribute::kv("docs::numeric_type", numeric_type)); + metadata.add_custom_attribute(CustomAttribute::kv( + constants::DOCS_META_NUMERIC_TYPE, + numeric_type, + )); metadata } diff --git a/lib/vector-core/src/stream/partitioned_batcher.rs b/lib/vector-core/src/stream/partitioned_batcher.rs index 6976dfc8e93d7..aa2f7e6b58bcd 100644 --- a/lib/vector-core/src/stream/partitioned_batcher.rs +++ b/lib/vector-core/src/stream/partitioned_batcher.rs @@ -436,7 +436,7 @@ where // We have no batch yet for this partition key, so // create one and create the expiration entries as well. // This allows the batch to expire before filling up, - // and vise versa. + // and vice versa. let batch = Batch::new(item_limit, alloc_limit).with(item); this.batches.insert(item_key.clone(), batch); this.timer.insert(item_key); diff --git a/rfcs/2022-11-04-automatic-component-validation.md b/rfcs/2022-11-04-automatic-component-validation.md index c8f714ba7e0b0..1c76bcdcc84a1 100644 --- a/rfcs/2022-11-04-automatic-component-validation.md +++ b/rfcs/2022-11-04-automatic-component-validation.md @@ -249,7 +249,7 @@ what the sink is sending. While the existing encoding/decoding enums -- where w standard supported codecs -- do not have parity between each other to seamlessly facilitate this, we'll look to remedy that situation as part of the work to actually implement support for each of these codecs. There's no fundamental reason why all of the currently supported encodings cannot have -reciprocal decoders, and vise versa. +reciprocal decoders, and vice versa. #### Test cases diff --git a/scripts/generate-component-docs.rb b/scripts/generate-component-docs.rb index b9a7e98381696..f499e13968ddf 100755 --- a/scripts/generate-component-docs.rb +++ b/scripts/generate-component-docs.rb @@ -88,21 +88,6 @@ def find_command_on_path(command) # Helpers for caching resolved/expanded schemas and detecting schema resolution cycles. @resolved_schema_cache = {} @expanded_schema_cache = {} -@schema_resolution_queue = {} - -def add_to_schema_resolution_stack(schema_name) - @logger.debug "Adding '#{schema_name}' to resolution stack." - @schema_resolution_queue[schema_name] = true -end - -def remove_from_schema_resolution_stack(schema_name) - @logger.debug "Removing '#{schema_name}' from resolution stack." - @schema_resolution_queue.delete(schema_name) -end - -def schema_resolution_cycle?(schema_name) - @schema_resolution_queue.key?(schema_name) -end # Gets the schema of the given `name` from the resolved schema cache, if it exists. def get_cached_resolved_schema(schema_name) @@ -519,11 +504,6 @@ def apply_object_property_fields!(parent_schema, property_schema, property_name, # For any overlapping fields in the given schema and the referenced schema, the fields from the # given schema will win. def expand_schema_references(root_schema, unexpanded_schema) - # Break any cycles during expansion, the same as we do during resolution. - if get_schema_metadata(unexpanded_schema, 'docs::cycle_entrypoint') - return unexpanded_schema - end - schema = deep_copy(unexpanded_schema) # Grab the existing title/description from our unexpanded schema, and reset them after @@ -710,21 +690,9 @@ def resolve_schema_by_name(root_schema, schema_name) resolved = get_cached_resolved_schema(schema_name) return deep_copy(resolved) unless resolved.nil? - if schema_resolution_cycle?(schema_name) - @logger.error "Cycle detected while resolving schema '#{schema_name}'. \ - \ - Cycles must be broken manually at the source code level by annotating fields that induce \ - cycles with `#[configurable(metadata(docs::cycle_entrypoint))]`. As such a field will have no type \ - information rendered, it is advised to supply a sufficiently detailed field description that \ - describes the allowable values, etc." - exit 1 - end - # It wasn't already cached, so we actually have to resolve it. schema = get_schema_by_name(root_schema, schema_name) - add_to_schema_resolution_stack(schema_name) resolved = resolve_schema(root_schema, schema) - remove_from_schema_resolution_stack(schema_name) @resolved_schema_cache[schema_name] = resolved deep_copy(resolved) end @@ -756,21 +724,6 @@ def resolve_schema(root_schema, schema) return end - # Avoid schemas that represent a resolution cycle. - # - # When a schema is marked as a "cycle entrypoint", this means the schema is self-referential (i.e. - # the `pipelines` transform, which is part of `Transforms`, having a field that references - # `Transforms`) and we have to break the cycle. - # - # We have to return _something_, as it's a real part of the schema, so we just return a basic - # schema with no type information but with any description that is specified, etc. - if get_schema_metadata(schema, 'docs::cycle_entrypoint') - resolved = { 'type' => 'blank' } - description = get_rendered_description_from_schema(schema) - resolved['description'] = description unless description.empty? - return resolved - end - # Handle schemas that have type overrides. # # In order to better represent specific field types in the documentation, we may opt to use a @@ -1590,7 +1543,7 @@ def reconcile_resolved_schema!(resolved_schema) # This means that we can generate output for a field that says it has a default value of `null` # but is a required field, which is a logical inconsistency in terms of the Cue schema where we # import the generated output of this script: it doesn't allow setting a default value for a field - # if the field is required, and vise versa. + # if the field is required, and vice versa. if resolved_schema['required'] # For all schema type fields, see if they have a default value equal to `nil`. If so, remove # the `default` field entirely. @@ -1635,7 +1588,6 @@ def reconcile_resolved_schema!(resolved_schema) enum_values = if const_type_field.is_a?(Array) const_type_field .map { |const| [const['value'], const['description']] } - .add_to_schema_resolution_stack else # If the value isn't already an array, we'll create the enum values map directly. { const_type_field['value'] => const_type_field['description'] } diff --git a/src/components/validation/resources/mod.rs b/src/components/validation/resources/mod.rs index 93c6d2350d95f..b6427bd7f6ab5 100644 --- a/src/components/validation/resources/mod.rs +++ b/src/components/validation/resources/mod.rs @@ -64,7 +64,7 @@ impl ResourceCodec { /// Gets an encoder for this codec. /// /// The encoder is generated as an inverse to the input codec: if a decoding configuration was - /// given, we generate an encoder that satisfies that decoding configuration, and vise versa. + /// given, we generate an encoder that satisfies that decoding configuration, and vice versa. pub fn into_encoder(&self) -> Encoder { let (framer, serializer) = match self { Self::Encoding(config) => ( @@ -95,7 +95,7 @@ impl ResourceCodec { /// Gets a decoder for this codec. /// /// The decoder is generated as an inverse to the input codec: if an encoding configuration was - /// given, we generate a decoder that satisfies that encoding configuration, and vise versa. + /// given, we generate a decoder that satisfies that encoding configuration, and vice versa. pub fn into_decoder(&self) -> Decoder { let (framer, deserializer) = match self { Self::Decoding(config) => return config.build(), diff --git a/src/internal_telemetry/allocations/allocator/tracer.rs b/src/internal_telemetry/allocations/allocator/tracer.rs index 14db77136d20e..d453e03cf2fcc 100644 --- a/src/internal_telemetry/allocations/allocator/tracer.rs +++ b/src/internal_telemetry/allocations/allocator/tracer.rs @@ -7,7 +7,7 @@ pub trait Tracer { /// All allocations/deallocations that occur within the call to `Tracer::trace_allocation` are ignored, so /// implementors can allocate/deallocate without risk of reentrancy bugs. It does mean, however, that the /// allocations/deallocations that occur will be effectively lost, so implementors should ensure that the only data - /// they deallocate in the tracer is data that was similarly allocated, and vise versa. + /// they deallocate in the tracer is data that was similarly allocated, and vice versa. /// /// The object size is from the original layout excluding the group ID size. fn trace_allocation(&self, object_size: usize, group_id: AllocationGroupId); @@ -21,7 +21,7 @@ pub trait Tracer { /// All allocations/deallocations that occur within the call to `Tracer::trace_deallocation` are ignored, so /// implementors can allocate/deallocate without risk of reentrancy bugs. It does mean, however, that the /// allocations/deallocations that occur will be effectively lost, so implementors should ensure that the only data - /// they deallocate in the tracer is data that was similarly allocated, and vise versa. + /// they deallocate in the tracer is data that was similarly allocated, and vice versa. /// /// The object size is from the original layout excluding the group ID size. fn trace_deallocation(&self, object_size: usize, source_group_id: AllocationGroupId);