From 697bfb0635e7cd49929020203a2f67194f3c3bf6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kat=20March=C3=A1n?= Date: Thu, 4 Apr 2024 09:24:07 -0700 Subject: [PATCH] checkpoint: parser done, now to hook up everything else --- src/document.rs | 13 +--- src/error.rs | 12 +-- src/v2_parser.rs | 198 ++++++++++++++++++++++++++++++++++++++++++++--- src/value.rs | 2 + 4 files changed, 194 insertions(+), 31 deletions(-) diff --git a/src/document.rs b/src/document.rs index d2bd654..48e21db 100644 --- a/src/document.rs +++ b/src/document.rs @@ -1,8 +1,8 @@ #[cfg(feature = "span")] use miette::SourceSpan; -use std::{fmt::Display, str::FromStr}; +use std::fmt::Display; -use crate::{v1_parser, IntoKdlQuery, KdlDiagnostic, KdlNode, KdlQueryIterator, KdlValue, NodeKey}; +use crate::{IntoKdlQuery, KdlDiagnostic, KdlNode, KdlQueryIterator, KdlValue, NodeKey}; /// Represents a KDL /// [`Document`](https://github.com/kdl-org/kdl/blob/main/SPEC.md#document). @@ -373,15 +373,6 @@ impl IntoIterator for KdlDocument { } } -impl FromStr for KdlDocument { - type Err = KdlDiagnostic; - - fn from_str(input: &str) -> Result { - let kdl_parser = v1_parser::KdlParser::new(input); - kdl_parser.parse(v1_parser::document(&kdl_parser)) - } -} - #[cfg(test)] mod test { #[cfg(feature = "span")] diff --git a/src/error.rs b/src/error.rs index d0af3b2..9371640 100644 --- a/src/error.rs +++ b/src/error.rs @@ -51,20 +51,16 @@ pub struct KdlParseFailure { #[derive(Debug, Diagnostic, Clone, Eq, PartialEq, Error)] #[error("{kind}")] pub struct KdlDiagnostic { - /// Source string for the KDL document that failed to parse. - #[source_code] - pub input: String, - /// Offset in chars of the error. - #[label("{}", label.unwrap_or("here"))] + #[label("{}", label.unwrap_or("here".into()))] pub span: SourceSpan, /// Label text for this span. Defaults to `"here"`. - pub label: Option<&'static str>, + pub label: Option, /// Suggestion for fixing the parser error. #[help] - pub help: Option<&'static str>, + pub help: Option, /// Severity level for the Diagnostic. #[diagnostic(severity)] @@ -91,7 +87,7 @@ pub enum KdlErrorKind { /// that failed to parse. #[error("Expected {0}.")] #[diagnostic(code(kdl::parse_component))] - Context(&'static str), + Context(String), /// Generic unspecified error. If this is returned, the call site should /// be annotated with context, if possible. diff --git a/src/v2_parser.rs b/src/v2_parser.rs index d366839..b9b1114 100644 --- a/src/v2_parser.rs +++ b/src/v2_parser.rs @@ -3,27 +3,53 @@ // swiss-cheese, it's simpler to just hush the compiler about it #![cfg_attr(not(feature = "span"), allow(dead_code, unused_variables))] -use std::f32::consts::E; - -use miette::SourceSpan; +use miette::{Severity, SourceSpan}; use winnow::{ ascii::{digit1, escaped_transform, hex_digit1, oct_digit1, Caseless}, combinator::{alt, cut_err, eof, not, opt, peek, preceded, repeat, repeat_till}, - error::{ContextError, ErrorKind, ParserError, StrContext}, + error::{ContextError, StrContext}, prelude::*, - stream::{AsChar, Recoverable, Stream}, - token::{any, none_of, one_of, take, take_till, take_until, take_while}, + stream::{AsChar, Recoverable}, + token::{any, none_of, one_of, take, take_until, take_while}, Located, }; use crate::{ KdlDiagnostic, KdlDocument, KdlEntry, KdlErrorKind, KdlIdentifier, KdlNode, KdlParseError, - KdlValue, + KdlParseFailure, KdlValue, }; type Input<'a> = Recoverable, ContextError>; +impl std::str::FromStr for KdlDocument { + type Err = KdlParseFailure; + + fn from_str(s: &str) -> Result { + let (_, maybe_doc, errs) = document.recoverable_parse(Located::new(s)); + if let Some(doc) = maybe_doc { + Ok(doc) + } else { + Err(KdlParseFailure { + input: String::from(s), + diagnostics: errs + .into_iter() + .map(|e| { + let labels = e.context().map(|c| c.to_string()).collect::>(); + KdlDiagnostic { + span: SourceSpan::new(0.into(), 0.into()), + label: Default::default(), + help: None, + severity: Severity::Error, + kind: KdlErrorKind::Context(labels.join(", ")), + } + }) + .collect(), + }) + } + } +} + fn new_input<'s>(input: &'s str) -> Input<'s> { Recoverable::new(Located::new(input)) } @@ -40,6 +66,143 @@ fn lbl(label: &'static str) -> StrContext { StrContext::Label(label) } +/// `document := bom? nodes` +fn document<'s>(input: &mut Input<'s>) -> PResult { + opt(bom).parse_next(input)?; + nodes.parse_next(input) +} + +/// `nodes := (line-space* node)* line-space*` +fn nodes<'s>(input: &mut Input<'s>) -> PResult { + let nodes = repeat( + 0.., + (repeat(0.., line_space).map(|_: ()| ()), node).map(|(_, n)| n), + ) + .parse_next(input)?; + repeat(0.., line_space).parse_next(input)?; + Ok(KdlDocument { + leading: None, + nodes, + trailing: None, + span: SourceSpan::new(0.into(), 0.into()), + }) +} + +/// `base-node := type? optional-node-space string (required-node-space node-prop-or-arg)* (required-node-space node-children)?` +fn base_node<'s>(input: &mut Input<'s>) -> PResult { + let ty = opt(ty).parse_next(input)?.flatten(); + optional_node_space.parse_next(input)?; + let name = identifier.parse_next(input)?; + let entries = repeat( + 0.., + (required_node_space, node_entry).map(|(_, e): ((), _)| e), + ) + .map(|e: Vec>| e.into_iter().filter_map(|e| e).collect::>()) + .parse_next(input)?; + let children = (required_node_space, opt(node_children)) + .map(|(_, c)| c) + .parse_next(input)?; + Ok(KdlNode { + ty, + name, + entries, + children, + span: SourceSpan::new(0.into(), 0.into()), + before_children: None, + leading: None, + trailing: None, + }) +} + +/// `node := base-node optional-node-space node-terminator` +fn node<'s>(input: &mut Input<'s>) -> PResult { + let node = base_node.parse_next(input)?; + optional_node_space.parse_next(input)?; + node_terminator.parse_next(input)?; + Ok(node) +} + +/// `final-node := base-node optional-node-space node-terminator?` +fn final_node<'s>(input: &mut Input<'s>) -> PResult { + let node = base_node.parse_next(input)?; + optional_node_space.parse_next(input)?; + opt(node_terminator).parse_next(input)?; + Ok(node) +} + +/// `node-prop-or-arg := prop | value` +fn node_entry<'s>(input: &mut Input<'s>) -> PResult> { + alt((prop, value)) + .context(lbl("node entry")) + .parse_next(input) +} + +/// `node-children := '{' nodes final-node? '}'` +fn node_children<'s>(input: &mut Input<'s>) -> PResult { + "{".parse_next(input)?; + let ns = cut_err(nodes).parse_next(input)?; + cut_err("}").parse_next(input)?; + Ok(ns) +} + +/// `node-terminator := single-line-comment | newline | ';' | eof` +fn node_terminator<'s>(input: &mut Input<'s>) -> PResult<()> { + alt((single_line_comment, newline, ";".void(), eof.void())).parse_next(input) +} + +/// `prop := string optional-node-space equals-sign optional-node-space value` +fn prop<'s>(input: &mut Input<'s>) -> PResult> { + let key = identifier.parse_next(input)?; + optional_node_space.parse_next(input)?; + equals_sign.parse_next(input)?; + optional_node_space.parse_next(input)?; + let value = cut_err(value) + .resume_after(badval) + .parse_next(input)? + .flatten(); + Ok(value.map(|value| KdlEntry { + ty: value.ty, + name: Some(key), + value: value.value, + value_repr: None, + leading: None, + trailing: None, + span: SourceSpan::new(0.into(), 0.into()), + })) +} + +/// `value := type? optional-node-space (string | number | keyword)` +fn value<'s>(input: &mut Input<'s>) -> PResult> { + let ty = opt(ty).parse_next(input)?.flatten(); + optional_node_space.parse_next(input)?; + let val = alt((string, number, keyword)) + .context(lbl("value")) + .resume_after(badval) + .parse_next(input)? + .flatten(); + Ok(val.map(|value| KdlEntry { + ty, + value, + leading: None, + value_repr: None, + name: None, + trailing: None, + span: SourceSpan::new(0.into(), 0.into()), + })) +} + +/// `type := '(' optional-node-space string optional-node-space ')'` +fn ty<'s>(input: &mut Input<'s>) -> PResult> { + "(".parse_next(input)?; + optional_node_space.parse_next(input)?; + let ty = cut_err(identifier.context(lbl("type identifier"))) + .resume_after(badval) + .parse_next(input)?; + optional_node_space.parse_next(input)?; + cut_err(")").parse_next(input)?; + Ok(ty) +} + /// `plain-line-space := newline | ws | single-line-comment` fn plain_line_space<'s>(input: &mut Input<'s>) -> PResult<()> { alt((newline, ws, single_line_comment)).parse_next(input) @@ -53,8 +216,8 @@ fn plain_node_space<'s>(input: &mut Input<'s>) -> PResult<()> { /// `line-space := plain-line-space+ | '/-' plain-node-space* node` fn line_space<'s>(input: &mut Input<'s>) -> PResult<()> { alt(( - repeat(1.., plain_line_space).map(|_: ()| ()), - ("/-", repeat(0.., plain_node_space).map(|_: ()| ()), node), + repeat(1.., plain_line_space).map(|_: ()| ()).void(), + ("/-", repeat(0.., plain_node_space).map(|_: ()| ()), node).void(), )) .parse_next(input) } @@ -67,7 +230,7 @@ fn node_space<'s>(input: &mut Input<'s>) -> PResult<()> { opt(( "/-", repeat(0.., plain_node_space).map(|_: ()| ()), - alt((node_or_prop_arg, node_children)), + alt((node_entry.void(), node_children.void())), )) .void() .parse_next(input) @@ -89,6 +252,17 @@ fn string<'s>(input: &mut Input<'s>) -> PResult> { alt((identifier_string, quoted_string, raw_string)).parse_next(input) } +fn identifier<'s>(input: &mut Input<'s>) -> PResult { + identifier_string + .verify_map(|i| { + i.and_then(|v| match v { + KdlValue::String(s) => Some(KdlIdentifier::from(s)), + _ => None, + }) + }) + .parse_next(input) +} + /// `identifier-string := unambiguous-ident | signed-ident | dotted-ident` fn identifier_string<'s>(input: &mut Input<'s>) -> PResult> { alt(( @@ -194,7 +368,7 @@ fn quoted_string<'s>(input: &mut Input<'s>) -> PResult> { .resume_after(raw_string_badval) .parse_next(input)? }; - Ok(body.map(|body| KdlValue::RawString(body))) + Ok(body.map(|body| KdlValue::String(body))) } /// ```text @@ -293,7 +467,7 @@ fn raw_string<'s>(input: &mut Input<'s>) -> PResult> { .resume_after(raw_string_badval) .parse_next(input)? }; - Ok(body.map(|body| KdlValue::RawString(body))) + Ok(body.map(|body| KdlValue::String(body))) } /// Like badval, but is able to slurp up invalid raw strings, which contain whitespace. diff --git a/src/value.rs b/src/value.rs index a155cf4..51624f6 100644 --- a/src/value.rs +++ b/src/value.rs @@ -4,6 +4,8 @@ use std::fmt::Display; #[derive(Debug, Clone, PartialOrd)] pub enum KdlValue { /// A [KDL Raw String](https://github.com/kdl-org/kdl/blob/main/SPEC.md#raw-string). + // TODO: remove this and use `String` for all strings. We can use the repr + // to keep track of whether it's raw or not. RawString(String), /// A [KDL String](https://github.com/kdl-org/kdl/blob/main/SPEC.md#string).