From a20104f3cde042beb5257d186bed9709066b69bb Mon Sep 17 00:00:00 2001 From: Alex Hansen Date: Thu, 15 Apr 2021 17:35:17 -0700 Subject: [PATCH] Control flow analysis + more (#28) * begin documentation of control flow analysis algorithm * begin graph construction * begin graph construction * generate graphs; further control flow work * some improvements in control flow; still need to wire up imports into the control flow graph * control flow graph improvements * fix up implicit return printing in control flow graph * organize graph code * find dead code * unreachable code warnings * parse unit type as type info * start enum expressions * fix enum lookup bug * type check all enum instantiations * Resolved types (#29) * begin swappnig over to resolved types * progress in refactore * progress on converting types * progress in switching to resolved types * progress in switching to resolved types * finalize refactor to resolved types * remove unused type variants * fix tests * enum variant construction code analysis * resolve warnings * missing enum instantiator error * trait declarations control flow * fix trait type mismatch error message * individual method dead code warnings * fix method call bug * respect public visibility modifiers; control flow analysis on libraries * add public traits * move ident file * begin analysis on return paths * refactor of files * begin work on analyzing return paths * basic return path graph construction done * documentation * Return path analysis (#30) * begin analysis on return paths * refactor of files * begin work on analyzing return paths * basic return path graph construction done * documentation * fix code block type parsing * control flow analysis on if and code blocks * struct fields in graph * individual struct field warnings --- parser/Cargo.toml | 1 + parser/README.md | 3 + .../analyze_return_paths.rs | 294 +++++++ .../dead_code_analysis.rs | 799 ++++++++++++++++++ .../control_flow_analysis/flow_graph/mod.rs | 154 ++++ .../flow_graph/namespace.rs | 137 +++ parser/src/control_flow_analysis/mod.rs | 42 + parser/src/error.rs | 73 +- parser/src/hll.pest | 21 +- parser/src/ident.rs | 51 ++ parser/src/lib.rs | 88 +- parser/src/parse_tree/call_path.rs | 37 +- .../declaration/enum_declaration.rs | 78 +- .../declaration/function_declaration.rs | 6 +- parser/src/parse_tree/declaration/mod.rs | 3 +- .../parse_tree/declaration/reassignment.rs | 3 +- .../declaration/struct_declaration.rs | 43 +- .../declaration/trait_declaration.rs | 19 +- .../declaration/variable_declaration.rs | 4 +- parser/src/parse_tree/expression/asm.rs | 2 +- parser/src/parse_tree/expression/mod.rs | 139 +-- parser/src/parse_tree/mod.rs | 4 +- parser/src/parse_tree/variable_declaration.rs | 1 - parser/src/semantics/ast_node/code_block.rs | 67 +- parser/src/semantics/ast_node/declaration.rs | 275 +++++- .../ast_node/expression/enum_instantiation.rs | 86 ++ .../src/semantics/ast_node/expression/mod.rs | 8 +- .../ast_node/expression/struct_expr_field.rs | 3 +- .../ast_node/expression/typed_expression.rs | 196 +++-- .../expression/typed_expression_variant.rs | 66 +- parser/src/semantics/ast_node/impl_trait.rs | 139 +-- parser/src/semantics/ast_node/mod.rs | 149 ++-- parser/src/semantics/ast_node/while_loop.rs | 6 + parser/src/semantics/mod.rs | 2 +- parser/src/semantics/namespace.rs | 147 +++- parser/src/semantics/syntax_tree.rs | 8 +- parser/src/types/mod.rs | 4 + .../src/{types.rs => types/resolved_type.rs} | 142 ++-- parser/src/types/type_info.rs | 89 ++ stdlib/src/main.fm | 51 +- 40 files changed, 2907 insertions(+), 533 deletions(-) create mode 100644 parser/src/control_flow_analysis/analyze_return_paths.rs create mode 100644 parser/src/control_flow_analysis/dead_code_analysis.rs create mode 100644 parser/src/control_flow_analysis/flow_graph/mod.rs create mode 100644 parser/src/control_flow_analysis/flow_graph/namespace.rs create mode 100644 parser/src/control_flow_analysis/mod.rs create mode 100644 parser/src/ident.rs delete mode 100644 parser/src/parse_tree/variable_declaration.rs create mode 100644 parser/src/semantics/ast_node/expression/enum_instantiation.rs create mode 100644 parser/src/types/mod.rs rename parser/src/{types.rs => types/resolved_type.rs} (66%) create mode 100644 parser/src/types/type_info.rs diff --git a/parser/Cargo.toml b/parser/Cargo.toml index d88b6d4c615..12c33ad974e 100644 --- a/parser/Cargo.toml +++ b/parser/Cargo.toml @@ -12,3 +12,4 @@ pest_derive = "2.0" thiserror = "1.0" either = "1.6" Inflector = "0.11" +petgraph = "0.5" diff --git a/parser/README.md b/parser/README.md index 523cd096a0c..12a154eb605 100644 --- a/parser/README.md +++ b/parser/README.md @@ -1,2 +1,5 @@ # fuel-vm-hll High Level Language (Name Subject to Change) for the FuelVM + +# Minimum supported Rust version +As of now, this code was developed on and is guaranteed to run on Rust 1.50 stable. diff --git a/parser/src/control_flow_analysis/analyze_return_paths.rs b/parser/src/control_flow_analysis/analyze_return_paths.rs new file mode 100644 index 00000000000..65e32493849 --- /dev/null +++ b/parser/src/control_flow_analysis/analyze_return_paths.rs @@ -0,0 +1,294 @@ +//! This is the flow graph, a graph which contains edges that represent possible steps of program +//! execution. + +use super::*; +use super::{ControlFlowGraph, EntryPoint, ExitPoint, Graph}; +use crate::semantics::{ + ast_node::{ + TypedCodeBlock, TypedDeclaration, TypedExpression, TypedFunctionDeclaration, + TypedReassignment, TypedVariableDeclaration, TypedWhileLoop, + }, + TypedAstNode, TypedAstNodeContent, +}; +use crate::types::ResolvedType; +use crate::Ident; +use crate::{error::*, semantics::TypedParseTree}; +use pest::Span; +use petgraph::prelude::NodeIndex; + +impl<'sc> ControlFlowGraph<'sc> { + pub(crate) fn construct_return_path_graph(ast: &TypedParseTree<'sc>) -> Self { + let mut graph = ControlFlowGraph { + graph: Graph::new(), + entry_points: vec![], + namespace: Default::default(), + }; + // do a depth first traversal and cover individual inner ast nodes + let mut leaves = vec![]; + for ast_entrypoint in ast.root_nodes.iter() { + let l_leaves = connect_node(ast_entrypoint, &mut graph, &leaves); + + match l_leaves { + NodeConnection::NextStep(nodes) => leaves = nodes, + _ => (), + } + } + + graph + } + /// This function looks through the control flow graph and ensures that all paths that are + /// required to return a value do, indeed, return a value of the correct type. + /// It does this by checking every function declaration in both the methods namespace + /// and the functions namespace and validating that all paths leading to the function exit node + /// return the same type. Additionally, if a function has a return type, all paths must indeed + /// lead to the function exit node. + pub(crate) fn analyze_return_paths(&self) -> Vec> { + let mut errors = vec![]; + for ( + name, + FunctionNamespaceEntry { + entry_point, + exit_point, + return_type, + }, + ) in &self.namespace.function_namespace + { + // For every node connected to the entry point + errors.append(&mut self.ensure_all_paths_reach_exit( + *entry_point, + *exit_point, + name.primary_name, + return_type, + )); + } + errors + } + fn ensure_all_paths_reach_exit( + &self, + entry_point: EntryPoint, + exit_point: ExitPoint, + function_name: &'sc str, + return_ty: &ResolvedType<'sc>, + ) -> Vec> { + let mut rovers = vec![entry_point]; + let mut errors = vec![]; + let mut max_iterations = 50; + while rovers.len() >= 1 && rovers[0] != exit_point && max_iterations > 0 { + max_iterations -= 1; + /* + println!( + "{:?}", + rovers + .iter() + .map(|ix| self.graph[*ix].clone()) + .collect::>() + ); + */ + rovers = rovers + .into_iter() + .filter(|idx| *idx != exit_point) + .collect(); + let mut next_rovers = vec![]; + for rover in rovers { + let mut neighbors = self + .graph + .neighbors_directed(rover, petgraph::Direction::Outgoing) + .collect::>(); + if neighbors.is_empty() && *return_ty != ResolvedType::Unit { + errors.push(CompileError::PathDoesNotReturn { + // TODO: unwrap_to_node is a shortcut. In reality, the graph type should be + // different. To save some code duplication, + span: self.graph[rover].unwrap_to_node().span.clone(), + function_name, + ty: return_ty.friendly_type_str(), + }); + } + next_rovers.append(&mut neighbors); + } + rovers = next_rovers; + } + + errors + } +} + +/// The resulting edges from connecting a node to the graph. +enum NodeConnection { + /// This represents a node that steps on to the next node. + NextStep(Vec), + /// This represents a return or implicit return node, which aborts the stepwise flow. + Return(NodeIndex), +} + +fn connect_node<'sc>( + node: &TypedAstNode<'sc>, + graph: &mut ControlFlowGraph<'sc>, + leaves: &[NodeIndex], +) -> NodeConnection { + let span = node.span.clone(); + match &node.content { + TypedAstNodeContent::ReturnStatement(_) + | TypedAstNodeContent::ImplicitReturnExpression(_) => { + let this_index = graph.add_node(node.into()); + for leaf_ix in leaves { + graph.add_edge(*leaf_ix, this_index, "".into()); + } + NodeConnection::Return(this_index) + } + TypedAstNodeContent::WhileLoop(TypedWhileLoop { .. }) => { + // An abridged version of the dead code analysis for a while loop + // since we don't really care about what the loop body contains when detecting + // divergent paths + NodeConnection::NextStep(vec![graph.add_node(node.into())]) + } + TypedAstNodeContent::Expression(TypedExpression { .. }) => { + let entry = graph.add_node(node.into()); + // insert organizational dominator node + // connected to all current leaves + for leaf in leaves { + graph.add_edge(*leaf, entry, "".into()); + } + NodeConnection::NextStep(vec![entry]) + } + TypedAstNodeContent::SideEffect => NodeConnection::NextStep(leaves.to_vec()), + TypedAstNodeContent::Declaration(decl) => { + NodeConnection::NextStep(connect_declaration(node, &decl, graph, span, leaves)) + } + } +} + +fn connect_declaration<'sc>( + node: &TypedAstNode<'sc>, + decl: &TypedDeclaration<'sc>, + graph: &mut ControlFlowGraph<'sc>, + span: Span<'sc>, + leaves: &[NodeIndex], +) -> Vec { + use TypedDeclaration::*; + match decl { + TraitDeclaration(_) | StructDeclaration(_) | EnumDeclaration(_) => vec![], + VariableDeclaration(TypedVariableDeclaration { .. }) => { + let entry_node = graph.add_node(node.into()); + for leaf in leaves { + graph.add_edge(*leaf, entry_node, "".into()); + } + vec![entry_node] + } + FunctionDeclaration(fn_decl) => { + let entry_node = graph.add_node(node.into()); + for leaf in leaves { + graph.add_edge(*leaf, entry_node, "".into()); + } + connect_typed_fn_decl(fn_decl, graph, entry_node, span); + vec![] + } + Reassignment(TypedReassignment { .. }) => { + let entry_node = graph.add_node(node.into()); + for leaf in leaves { + graph.add_edge(*leaf, entry_node, "".into()); + } + vec![entry_node] + } + ImplTrait { + trait_name, + methods, + .. + } => { + let entry_node = graph.add_node(node.into()); + for leaf in leaves { + graph.add_edge(*leaf, entry_node, "".into()); + } + connect_impl_trait(trait_name, graph, methods, entry_node); + vec![] + } + SideEffect | ErrorRecovery => { + unreachable!("These are error cases and should be removed in the type checking stage. ") + } + } +} + +/// Implementations of traits are top-level things that are not conditional, so +/// we insert an edge from the function's starting point to the declaration to show +/// that the declaration was indeed at some point implemented. +/// Additionally, we insert the trait's methods into the method namespace in order to +/// track which exact methods are dead code. +fn connect_impl_trait<'sc>( + trait_name: &Ident<'sc>, + graph: &mut ControlFlowGraph<'sc>, + methods: &[TypedFunctionDeclaration<'sc>], + entry_node: NodeIndex, +) { + let mut methods_and_indexes = vec![]; + // insert method declarations into the graph + for fn_decl in methods { + let fn_decl_entry_node = graph.add_node(ControlFlowGraphNode::MethodDeclaration { + span: fn_decl.span.clone(), + method_name: fn_decl.name.clone(), + }); + graph.add_edge(entry_node, fn_decl_entry_node, "".into()); + // connect the impl declaration node to the functions themselves, as all trait functions are + // public if the trait is in scope + connect_typed_fn_decl(&fn_decl, graph, fn_decl_entry_node, fn_decl.span.clone()); + methods_and_indexes.push((fn_decl.name.clone(), fn_decl_entry_node)); + } + // Now, insert the methods into the trait method namespace. + graph + .namespace + .insert_trait_methods(trait_name.clone(), methods_and_indexes); +} + +/// The strategy here is to populate the trait namespace with just one singular trait +/// and if it is ever implemented, by virtue of type checking, we know all interface points +/// were met. +/// Upon implementation, we can populate the methods namespace and track dead functions that way. +/// TL;DR: At this point, we _only_ track the wholistic trait declaration and not the functions +/// contained within. +/// +/// The trait node itself has already been added (as `entry_node`), so we just need to insert that +/// node index into the namespace for the trait. + +/// When connecting a function declaration, we are inserting a new root node into the graph that +/// has no entry points, since it is just a declaration. +/// When something eventually calls it, it gets connected to the declaration. +fn connect_typed_fn_decl<'sc>( + fn_decl: &TypedFunctionDeclaration<'sc>, + graph: &mut ControlFlowGraph<'sc>, + entry_node: NodeIndex, + _span: Span<'sc>, +) { + let fn_exit_node = graph.add_node(format!("\"{}\" fn exit", fn_decl.name.primary_name).into()); + let return_nodes = depth_first_insertion_code_block(&fn_decl.body, graph, &[entry_node]); + for node in return_nodes { + graph.add_edge(node, fn_exit_node, "return".into()); + } + + let namespace_entry = FunctionNamespaceEntry { + entry_point: entry_node, + exit_point: fn_exit_node, + return_type: fn_decl.return_type.clone(), + }; + graph + .namespace + .insert_function(fn_decl.name.clone(), namespace_entry); +} + +type ReturnStatementNodes = Vec; + +fn depth_first_insertion_code_block<'sc>( + node_content: &TypedCodeBlock<'sc>, + graph: &mut ControlFlowGraph<'sc>, + leaves: &[NodeIndex], +) -> ReturnStatementNodes { + let mut leaves = leaves.to_vec(); + let mut return_nodes = vec![]; + for node in node_content.contents.iter() { + let this_node = connect_node(node, graph, &leaves); + match this_node { + NodeConnection::NextStep(nodes) => leaves = nodes, + NodeConnection::Return(node) => { + return_nodes.push(node); + } + } + } + return_nodes +} diff --git a/parser/src/control_flow_analysis/dead_code_analysis.rs b/parser/src/control_flow_analysis/dead_code_analysis.rs new file mode 100644 index 00000000000..aafaa793153 --- /dev/null +++ b/parser/src/control_flow_analysis/dead_code_analysis.rs @@ -0,0 +1,799 @@ +use super::*; +use crate::semantics::ast_node::TypedStructExpressionField; +use crate::types::ResolvedType; +use crate::{ + parse_tree::Visibility, + semantics::ast_node::{TypedExpressionVariant, TypedStructDeclaration, TypedTraitDeclaration}, + Ident, TreeType, +}; +use crate::{ + semantics::{ + ast_node::{ + TypedCodeBlock, TypedDeclaration, TypedEnumDeclaration, TypedExpression, + TypedFunctionDeclaration, TypedReassignment, TypedVariableDeclaration, TypedWhileLoop, + }, + TypedAstNode, TypedAstNodeContent, TypedParseTree, + }, + CompileWarning, Warning, +}; +use pest::Span; +use petgraph::algo::has_path_connecting; +use petgraph::prelude::NodeIndex; + +impl<'sc> ControlFlowGraph<'sc> { + pub(crate) fn find_dead_code(&self) -> Vec> { + // dead code is code that has no path to the entry point + let mut dead_nodes = vec![]; + for destination in self.graph.node_indices() { + let mut is_connected = false; + for entry in &self.entry_points { + if has_path_connecting(&self.graph, *entry, destination, None) { + is_connected = true; + break; + } + } + if !is_connected { + dead_nodes.push(destination); + } + } + let dead_enum_variant_warnings = dead_nodes + .iter() + .filter_map(|x| match &self.graph[*x] { + ControlFlowGraphNode::EnumVariant { span, variant_name } => Some(CompileWarning { + span: span.clone(), + warning_content: Warning::DeadEnumVariant { + variant_name: variant_name.to_string(), + }, + }), + _ => None, + }) + .collect::>(); + + let dead_ast_node_warnings = dead_nodes + .into_iter() + .filter_map(|x| match &self.graph[x] { + ControlFlowGraphNode::ProgramNode(node) => { + Some(construct_dead_code_warning_from_node(node)) + } + ControlFlowGraphNode::EnumVariant { span, variant_name } => Some(CompileWarning { + span: span.clone(), + warning_content: Warning::DeadEnumVariant { + variant_name: variant_name.to_string(), + }, + }), + ControlFlowGraphNode::MethodDeclaration { span, .. } => Some(CompileWarning { + span: span.clone(), + warning_content: Warning::DeadMethod, + }), + ControlFlowGraphNode::StructField { span, .. } => Some(CompileWarning { + span: span.clone(), + warning_content: Warning::StructFieldNeverRead, + }), + ControlFlowGraphNode::OrganizationalDominator(..) => None, + }) + .collect::>(); + + let all_warnings = [dead_enum_variant_warnings, dead_ast_node_warnings].concat(); + // filter out any overlapping spans -- if a span is contained within another one, + // remove it. + all_warnings + .clone() + .into_iter() + .filter(|CompileWarning { span, .. }| { + // if any other warnings contain a span which completely covers this one, filter + // out this one. + all_warnings + .iter() + .find( + |CompileWarning { + span: other_span, .. + }| { + other_span.end() > span.end() && other_span.start() < span.start() + }, + ) + .is_none() + }) + .collect() + } + /// Constructs a graph that is designed to identify unused declarations and sections of code. + pub(crate) fn construct_dead_code_graph( + ast: &TypedParseTree<'sc>, + tree_type: TreeType, + ) -> Self { + let mut graph = ControlFlowGraph { + graph: Graph::new(), + entry_points: vec![], + namespace: Default::default(), + }; + // do a depth first traversal and cover individual inner ast nodes + let mut leaves = vec![]; + let exit_node = Some(graph.add_node(("Program exit".to_string()).into())); + for ast_entrypoint in ast.root_nodes.iter() { + let (l_leaves, _new_exit_node) = + connect_node(ast_entrypoint, &mut graph, &leaves, exit_node, tree_type); + + leaves = l_leaves; + } + + // calculate the entry points based on the tree type + graph.entry_points = match tree_type { + TreeType::Predicate | TreeType::Script => { + // a predicate or script have a main function as the only entry point + vec![ + graph + .graph + .node_indices() + .find(|i| match graph.graph[*i] { + ControlFlowGraphNode::OrganizationalDominator(_) => false, + ControlFlowGraphNode::ProgramNode(TypedAstNode { + content: + TypedAstNodeContent::Declaration( + TypedDeclaration::FunctionDeclaration( + TypedFunctionDeclaration { ref name, .. }, + ), + ), + .. + }) => name.primary_name == "main", + _ => false, + }) + .unwrap(), + ] + } + TreeType::Contract | TreeType::Library => graph + .graph + .node_indices() + .filter(|i| match graph.graph[*i] { + ControlFlowGraphNode::OrganizationalDominator(_) => false, + ControlFlowGraphNode::ProgramNode(TypedAstNode { + content: + TypedAstNodeContent::Declaration(TypedDeclaration::FunctionDeclaration( + TypedFunctionDeclaration { + visibility: Visibility::Public, + .. + }, + )), + .. + }) => true, + ControlFlowGraphNode::ProgramNode(TypedAstNode { + content: + TypedAstNodeContent::Declaration(TypedDeclaration::TraitDeclaration( + TypedTraitDeclaration { + visibility: Visibility::Public, + .. + }, + )), + .. + }) => true, + ControlFlowGraphNode::ProgramNode(TypedAstNode { + content: + TypedAstNodeContent::Declaration(TypedDeclaration::ImplTrait { .. }), + .. + }) => true, + _ => false, + }) + .collect(), + }; + graph.visualize(); + graph + } +} +fn connect_node<'sc>( + node: &TypedAstNode<'sc>, + graph: &mut ControlFlowGraph<'sc>, + leaves: &[NodeIndex], + exit_node: Option, + tree_type: TreeType, +) -> (Vec, Option) { + // let mut graph = graph.clone(); + let span = node.span.clone(); + match &node.content { + TypedAstNodeContent::ReturnStatement(_) + | TypedAstNodeContent::ImplicitReturnExpression(_) => { + let this_index = graph.add_node(node.into()); + for leaf_ix in leaves { + graph.add_edge(*leaf_ix, this_index, "".into()); + } + // connect return to the exit node + if let Some(exit_node) = exit_node { + graph.add_edge(this_index, exit_node, "return".into()); + (vec![], None) + } else { + (vec![], None) + } + } + TypedAstNodeContent::WhileLoop(TypedWhileLoop { body, .. }) => { + // a while loop can loop back to the beginning, + // or it can terminate. + // so we connect the _end_ of the while loop _both_ to its beginning and the next node. + // the loop could also be entirely skipped + + let entry = graph.add_node(node.into()); + let while_loop_exit = graph.add_node("while loop exit".to_string().into()); + for leaf in leaves { + graph.add_edge(*leaf, entry, "".into()); + } + // it is possible for a whole while loop to be skipped so add edge from + // beginning of while loop straight to exit + graph.add_edge( + entry, + while_loop_exit, + "condition is initially false".into(), + ); + let mut leaves = vec![entry]; + let (l_leaves, _l_exit_node) = + depth_first_insertion_code_block(body, graph, &leaves, exit_node, tree_type); + // insert edges from end of block back to beginning of it + for leaf in &l_leaves { + graph.add_edge(*leaf, entry, "loop repeats".into()); + } + + leaves = l_leaves; + for leaf in leaves { + graph.add_edge(leaf, while_loop_exit, "".into()); + } + (vec![while_loop_exit], exit_node) + } + TypedAstNodeContent::Expression(TypedExpression { + expression: expr_variant, + .. + }) => { + let entry = graph.add_node(node.into()); + // insert organizational dominator node + // connected to all current leaves + for leaf in leaves { + graph.add_edge(*leaf, entry, "".into()); + } + + ( + connect_expression(expr_variant, graph, &[entry], exit_node, "", tree_type), + exit_node, + ) + } + TypedAstNodeContent::SideEffect => (leaves.to_vec(), exit_node), + TypedAstNodeContent::Declaration(decl) => { + // all leaves connect to this node, then this node is the singular leaf + let decl_node = graph.add_node(node.into()); + for leaf in leaves { + graph.add_edge(*leaf, decl_node, "".into()); + } + ( + connect_declaration(&decl, graph, decl_node, span, exit_node, tree_type), + exit_node, + ) + } + } +} + +fn connect_declaration<'sc>( + decl: &TypedDeclaration<'sc>, + graph: &mut ControlFlowGraph<'sc>, + entry_node: NodeIndex, + span: Span<'sc>, + exit_node: Option, + tree_type: TreeType, +) -> Vec { + use TypedDeclaration::*; + match decl { + VariableDeclaration(TypedVariableDeclaration { body, .. }) => connect_expression( + &body.expression, + graph, + &[entry_node], + exit_node, + "variable instantiation", + tree_type, + ), + FunctionDeclaration(fn_decl) => { + connect_typed_fn_decl(fn_decl, graph, entry_node, span, exit_node, tree_type); + vec![] + } + TraitDeclaration(trait_decl) => { + connect_trait_declaration(&trait_decl, graph, entry_node); + vec![] + } + StructDeclaration(struct_decl) => { + connect_struct_declaration(&struct_decl, graph, entry_node, tree_type); + vec![] + } + EnumDeclaration(enum_decl) => { + connect_enum_declaration(&enum_decl, graph, entry_node); + vec![] + } + Reassignment(TypedReassignment { rhs, .. }) => connect_expression( + &rhs.expression, + graph, + &[entry_node], + exit_node, + "variable reassignment", + tree_type, + ), + ImplTrait { + trait_name, + methods, + .. + } => { + connect_impl_trait(trait_name, graph, methods, entry_node, tree_type); + vec![] + } + SideEffect | ErrorRecovery => { + unreachable!("These are error cases and should be removed in the type checking stage. ") + } + } +} + +/// Connect each individual struct field, and when that field is accessed in a subfield expression, +/// connect that field. +fn connect_struct_declaration<'sc>( + struct_decl: &TypedStructDeclaration<'sc>, + graph: &mut ControlFlowGraph<'sc>, + entry_node: NodeIndex, + tree_type: TreeType, +) { + let TypedStructDeclaration { + name, + fields, + visibility, + .. + } = struct_decl; + let field_nodes = fields + .into_iter() + .map(|field| (field.name.clone(), graph.add_node(field.into()))) + .collect::>(); + // If this is a library or smart contract, and if this is public, then we want to connect the + // declaration node itself to the individual fields. + // + // this is important because if the struct is public, you want to be able to signal that all + // fields are accessible by just adding an edge to the struct declaration node + if [TreeType::Contract, TreeType::Library].contains(&tree_type) + && *visibility == Visibility::Public + { + for (_name, node) in &field_nodes { + graph.add_edge(entry_node, *node, "".into()); + } + } + + // Now, populate the struct namespace with the location of this struct as well as the indexes + // of the field names + graph + .namespace + .insert_struct(name.clone(), entry_node, field_nodes); +} + +/// Implementations of traits are top-level things that are not conditional, so +/// we insert an edge from the function's starting point to the declaration to show +/// that the declaration was indeed at some point implemented. +/// Additionally, we insert the trait's methods into the method namespace in order to +/// track which exact methods are dead code. +fn connect_impl_trait<'sc>( + trait_name: &Ident<'sc>, + graph: &mut ControlFlowGraph<'sc>, + methods: &[TypedFunctionDeclaration<'sc>], + entry_node: NodeIndex, + tree_type: TreeType, +) { + let graph_c = graph.clone(); + let trait_decl_node = graph_c.namespace.find_trait(trait_name); + match trait_decl_node { + None => { + let edge_ix = graph.add_node("External trait".into()); + graph.add_edge(entry_node, edge_ix, "".into()); + } + Some(trait_decl_node) => { + graph.add_edge_from_entry(entry_node, "".into()); + graph.add_edge(entry_node, *trait_decl_node, "".into()); + } + } + let mut methods_and_indexes = vec![]; + // insert method declarations into the graph + for fn_decl in methods { + let fn_decl_entry_node = graph.add_node(ControlFlowGraphNode::MethodDeclaration { + span: fn_decl.span.clone(), + method_name: fn_decl.name.clone(), + }); + graph.add_edge(entry_node, fn_decl_entry_node, "".into()); + // connect the impl declaration node to the functions themselves, as all trait functions are + // public if the trait is in scope + connect_typed_fn_decl( + &fn_decl, + graph, + fn_decl_entry_node, + fn_decl.span.clone(), + None, + tree_type, + ); + methods_and_indexes.push((fn_decl.name.clone(), fn_decl_entry_node)); + } + // Now, insert the methods into the trait method namespace. + graph + .namespace + .insert_trait_methods(trait_name.clone(), methods_and_indexes); +} + +/// The strategy here is to populate the trait namespace with just one singular trait +/// and if it is ever implemented, by virtue of type checking, we know all interface points +/// were met. +/// Upon implementation, we can populate the methods namespace and track dead functions that way. +/// TL;DR: At this point, we _only_ track the wholistic trait declaration and not the functions +/// contained within. +/// +/// The trait node itself has already been added (as `entry_node`), so we just need to insert that +/// node index into the namespace for the trait. +fn connect_trait_declaration<'sc>( + decl: &TypedTraitDeclaration<'sc>, + graph: &mut ControlFlowGraph<'sc>, + entry_node: NodeIndex, +) { + graph.namespace.add_trait(decl.name.clone(), entry_node); +} + +/// For an enum declaration, we want to make a declaration node for every individual enum +/// variant. When a variant is constructed, we can point an edge at that variant. This way, +/// we can see clearly, and thusly warn, when individual variants are not ever constructed. +fn connect_enum_declaration<'sc>( + enum_decl: &TypedEnumDeclaration<'sc>, + graph: &mut ControlFlowGraph<'sc>, + entry_node: NodeIndex, +) { + // keep a mapping of each variant + for variant in &enum_decl.variants { + let variant_index = graph.add_node(variant.into()); + + // graph.add_edge(entry_node, variant_index, "".into()); + graph.namespace.insert_enum( + enum_decl.name.clone(), + entry_node, + variant.name.clone(), + variant_index, + ); + } +} + +/// When connecting a function declaration, we are inserting a new root node into the graph that +/// has no entry points, since it is just a declaration. +/// When something eventually calls it, it gets connected to the declaration. +fn connect_typed_fn_decl<'sc>( + fn_decl: &TypedFunctionDeclaration<'sc>, + graph: &mut ControlFlowGraph<'sc>, + entry_node: NodeIndex, + _span: Span<'sc>, + exit_node: Option, + tree_type: TreeType, +) { + let fn_exit_node = graph.add_node(format!("\"{}\" fn exit", fn_decl.name.primary_name).into()); + let (_exit_nodes, _exit_node) = depth_first_insertion_code_block( + &fn_decl.body, + graph, + &[entry_node], + Some(fn_exit_node), + tree_type, + ); + if let Some(exit_node) = exit_node { + graph.add_edge(fn_exit_node, exit_node, "".into()); + } + + let namespace_entry = FunctionNamespaceEntry { + entry_point: entry_node, + exit_point: fn_exit_node, + return_type: fn_decl.return_type.clone(), + }; + + graph + .namespace + .insert_function(fn_decl.name.clone(), namespace_entry); +} + +fn depth_first_insertion_code_block<'sc>( + node_content: &TypedCodeBlock<'sc>, + graph: &mut ControlFlowGraph<'sc>, + leaves: &[NodeIndex], + exit_node: Option, + tree_type: TreeType, +) -> (Vec, Option) { + let mut leaves = leaves.to_vec(); + let mut exit_node = exit_node.clone(); + for node in node_content.contents.iter() { + let (this_node, l_exit_node) = connect_node(node, graph, &leaves, exit_node, tree_type); + leaves = this_node; + exit_node = l_exit_node; + } + (leaves, exit_node) +} + +/// connects any inner parts of an expression to the graph +/// note the main expression node has already been inserted +fn connect_expression<'sc>( + expr_variant: &TypedExpressionVariant<'sc>, + graph: &mut ControlFlowGraph<'sc>, + leaves: &[NodeIndex], + exit_node: Option, + label: &'static str, + tree_type: TreeType, +) -> Vec { + use TypedExpressionVariant::*; + match expr_variant { + FunctionApplication { + name, arguments, .. + } => { + let mut is_external = false; + // find the function in the namespace + let (fn_entrypoint, fn_exit_point) = graph + .namespace + .get_function(&name.suffix) + .cloned() + .map( + |FunctionNamespaceEntry { + entry_point, + exit_point, + .. + }| (entry_point, exit_point), + ) + .unwrap_or_else(|| { + let node_idx = + graph.add_node(format!("extern fn {}()", name.suffix.primary_name).into()); + is_external = true; + ( + node_idx, + graph.add_node( + format!("extern fn {} exit", name.suffix.primary_name).into(), + ), + ) + }); + for leaf in leaves { + graph.add_edge(*leaf, fn_entrypoint, label.into()); + } + // we evaluate every one of the function arguments + let mut current_leaf = vec![fn_entrypoint]; + for arg in arguments { + current_leaf = connect_expression( + &arg.expression, + graph, + ¤t_leaf, + exit_node, + "arg eval", + tree_type, + ); + } + // connect final leaf to fn exit + for leaf in current_leaf { + graph.add_edge(leaf, fn_exit_point, "".into()); + } + // the exit points get connected to an exit node for the application + if !is_external { + if let Some(exit_node) = exit_node { + graph.add_edge(fn_exit_point, exit_node, "".into()); + vec![exit_node] + } else { + vec![fn_exit_point] + } + } else { + vec![fn_entrypoint] + } + } + Literal(_) => { + let node = graph.add_node("Literal value".into()); + for leaf in leaves { + graph.add_edge(*leaf, node, "".into()); + } + vec![node] + } + VariableExpression { .. } => leaves.to_vec(), + EnumInstantiation { + enum_name, + variant_name, + .. + } => { + // connect this particular instantiation to its variants declaration + connect_enum_instantiation(enum_name, variant_name, graph, leaves) + } + IfExp { + condition, + then, + r#else, + } => { + let condition_expr = connect_expression( + &(*condition).expression, + graph, + leaves, + exit_node, + "", + tree_type, + ); + let then_expr = connect_expression( + &(*then).expression, + graph, + &condition_expr, + exit_node, + "then branch", + tree_type, + ); + + let else_expr = if let Some(else_expr) = r#else { + connect_expression( + &(*else_expr).expression, + graph, + &condition_expr, + exit_node, + "else branch", + tree_type, + ) + } else { + vec![] + }; + + [then_expr, else_expr].concat() + } + CodeBlock(TypedCodeBlock { contents }) => { + let block_entry = graph.add_node("Code block entry".into()); + for leaf in leaves { + graph.add_edge(*leaf, block_entry, label.into()); + } + let mut current_leaf = vec![block_entry]; + for node in contents { + current_leaf = connect_node(node, graph, ¤t_leaf, exit_node, tree_type).0; + } + + let block_exit = graph.add_node("Code block exit".into()); + for leaf in current_leaf { + graph.add_edge(leaf, block_exit, "".into()); + } + vec![block_exit] + } + StructExpression { + struct_name, + fields, + } => { + let decl = match graph.namespace.find_struct_decl(struct_name) { + Some(ix) => *ix, + None => { + graph.add_node(format!("External struct {}", struct_name.primary_name).into()) + } + }; + let entry = graph.add_node("Struct declaration entry".into()); + let exit = graph.add_node("Struct declaration exit".into()); + // connect current leaves to the beginning of this expr + for leaf in leaves { + graph.add_edge(*leaf, entry, label.into()); + } + // connect the entry to the decl, to denote that the struct has been constructed + graph.add_edge(entry, decl, "".into()); + + let mut current_leaf = vec![entry]; + // for every field, connect its expression + for TypedStructExpressionField { value, .. } in fields { + current_leaf = connect_expression( + &value.expression, + graph, + ¤t_leaf, + exit_node, + "struct field instantiation", + tree_type, + ); + } + + // connect the final field to the exit + for leaf in current_leaf { + graph.add_edge(leaf, exit, "".into()); + } + vec![exit] + } + SubfieldExpression { + name, + resolved_type_of_parent, + .. + } => { + assert!(matches!( + resolved_type_of_parent, + ResolvedType::Struct { .. } + )); + let resolved_type_of_parent = match resolved_type_of_parent { + ResolvedType::Struct { name } => name.clone(), + _ => panic!("Called subfvield on a non-struct"), + }; + let field_name = name.last().unwrap(); + // find the struct field index in the namespace + let field_ix = match graph + .namespace + .find_struct_field_idx(&resolved_type_of_parent, field_name) + { + Some(ix) => ix.clone(), + None => graph.add_node("external struct".into()), + }; + + let this_ix = graph.add_node( + format!( + "Struct field access: {}.{}", + resolved_type_of_parent.primary_name, field_name.primary_name + ) + .into(), + ); + for leaf in leaves { + graph.add_edge(*leaf, this_ix, "".into()); + } + graph.add_edge(this_ix, field_ix, "".into()); + vec![this_ix] + } + a => todo!("{:?}", a), + } +} + +fn connect_enum_instantiation<'sc>( + enum_name: &Ident<'sc>, + variant_name: &Ident<'sc>, + graph: &mut ControlFlowGraph, + leaves: &[NodeIndex], +) -> Vec { + let (decl_ix, variant_index) = graph + .namespace + .find_enum_variant_index(enum_name, variant_name) + .unwrap_or_else(|| { + let node_idx = graph.add_node( + format!( + "extern enum {}::{}", + enum_name.primary_name, variant_name.primary_name + ) + .into(), + ); + (node_idx, node_idx) + }); + + // insert organizational nodes for instantiation of enum + let enum_instantiation_entry_idx = graph.add_node("enum instantiation entry".into()); + let enum_instantiation_exit_idx = graph.add_node("enum instantiation exit".into()); + + // connect to declaration node itself to show that the declaration is used + graph.add_edge(enum_instantiation_entry_idx, decl_ix, "".into()); + for leaf in leaves { + graph.add_edge(*leaf, enum_instantiation_entry_idx, "".into()); + } + + graph.add_edge(decl_ix, variant_index, "".into()); + graph.add_edge(variant_index, enum_instantiation_exit_idx, "".into()); + + vec![enum_instantiation_exit_idx] +} + +fn construct_dead_code_warning_from_node<'sc>(node: &TypedAstNode<'sc>) -> CompileWarning<'sc> { + match node { + // if this is a function, struct, or trait declaration that is never called, then it is dead + // code. + TypedAstNode { + content: + TypedAstNodeContent::Declaration(TypedDeclaration::FunctionDeclaration( + TypedFunctionDeclaration { .. }, + )), + span, + .. + } => CompileWarning { + span: span.clone(), + warning_content: Warning::DeadFunctionDeclaration, + }, + TypedAstNode { + content: TypedAstNodeContent::Declaration(TypedDeclaration::StructDeclaration { .. }), + span, + } => CompileWarning { + span: span.clone(), + warning_content: Warning::DeadStructDeclaration, + }, + TypedAstNode { + content: + TypedAstNodeContent::Declaration(TypedDeclaration::TraitDeclaration( + TypedTraitDeclaration { name, .. }, + )), + .. + } => CompileWarning { + span: name.span.clone(), + warning_content: Warning::DeadTrait, + }, + TypedAstNode { + content: TypedAstNodeContent::Declaration(TypedDeclaration::EnumDeclaration(..)), + span, + } => CompileWarning { + span: span.clone(), + warning_content: Warning::DeadDeclaration, + }, + // otherwise, this is unreachable. + TypedAstNode { span, .. } => CompileWarning { + span: span.clone(), + warning_content: Warning::UnreachableCode, + }, + } +} diff --git a/parser/src/control_flow_analysis/flow_graph/mod.rs b/parser/src/control_flow_analysis/flow_graph/mod.rs new file mode 100644 index 00000000000..6784012d75f --- /dev/null +++ b/parser/src/control_flow_analysis/flow_graph/mod.rs @@ -0,0 +1,154 @@ +//! This is the flow graph, a graph which contains edges that represent possible steps of program +//! execution. + +use crate::semantics::{ast_node::TypedStructField, TypedAstNode}; +use crate::{semantics::ast_node::TypedEnumVariant, Ident}; +use pest::Span; + +use petgraph::{graph::EdgeIndex, prelude::NodeIndex}; + +mod namespace; +use namespace::ControlFlowNamespace; +pub(crate) use namespace::FunctionNamespaceEntry; + +pub type EntryPoint = NodeIndex; +pub type ExitPoint = NodeIndex; + +#[derive(Clone)] +/// A graph that can be used to model the control flow of a fuel HLL program. +/// This graph is used as the basis for all of the algorithms in the control flow analysis portion +/// of the compiler. +pub struct ControlFlowGraph<'sc> { + pub(crate) graph: Graph<'sc>, + pub(crate) entry_points: Vec, + pub(crate) namespace: ControlFlowNamespace<'sc>, +} + +pub type Graph<'sc> = petgraph::Graph, ControlFlowGraphEdge>; + +#[derive(Clone)] +pub struct ControlFlowGraphEdge(String); + +impl std::fmt::Debug for ControlFlowGraphEdge { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(&self.0) + } +} + +impl std::convert::From<&str> for ControlFlowGraphEdge { + fn from(o: &str) -> Self { + ControlFlowGraphEdge(o.to_string()) + } +} + +#[derive(Clone)] +pub enum ControlFlowGraphNode<'sc> { + OrganizationalDominator(String), + ProgramNode(TypedAstNode<'sc>), + EnumVariant { + span: Span<'sc>, + variant_name: String, + }, + MethodDeclaration { + span: Span<'sc>, + method_name: Ident<'sc>, + }, + StructField { + struct_field_name: Ident<'sc>, + span: Span<'sc>, + }, +} + +impl<'sc> ControlFlowGraphNode<'sc> { + pub(crate) fn unwrap_to_node(&self) -> TypedAstNode<'sc> { + match self { + ControlFlowGraphNode::ProgramNode(node) => node.clone(), + _ => panic!("Called unwrap_to_node() on a non-program-node value."), + } + } +} + +impl<'sc> std::fmt::Debug for ControlFlowGraphNode<'sc> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let text = match self { + ControlFlowGraphNode::OrganizationalDominator(s) => s.to_string(), + ControlFlowGraphNode::ProgramNode(node) => format!("{:?}", node), + ControlFlowGraphNode::EnumVariant { variant_name, .. } => { + format!("Enum variant {}", variant_name.to_string()) + } + ControlFlowGraphNode::MethodDeclaration { method_name, .. } => { + format!("Method {}", method_name.primary_name.to_string()) + } + ControlFlowGraphNode::StructField { + struct_field_name, .. + } => { + format!( + "Struct field {}", + struct_field_name.primary_name.to_string() + ) + } + }; + f.write_str(&text) + } +} + +impl<'sc> std::convert::From<&TypedAstNode<'sc>> for ControlFlowGraphNode<'sc> { + fn from(other: &TypedAstNode<'sc>) -> Self { + ControlFlowGraphNode::ProgramNode(other.clone()) + } +} + +impl<'sc> std::convert::From<&TypedEnumVariant<'sc>> for ControlFlowGraphNode<'sc> { + fn from(other: &TypedEnumVariant<'sc>) -> Self { + ControlFlowGraphNode::EnumVariant { + variant_name: other.name.primary_name.to_string(), + span: other.span.clone(), + } + } +} + +impl<'sc> std::convert::From<&TypedStructField<'sc>> for ControlFlowGraphNode<'sc> { + fn from(other: &TypedStructField<'sc>) -> Self { + ControlFlowGraphNode::StructField { + struct_field_name: other.name.clone(), + span: other.span.clone(), + } + } +} +impl std::convert::From for ControlFlowGraphNode<'_> { + fn from(other: String) -> Self { + ControlFlowGraphNode::OrganizationalDominator(other) + } +} + +impl std::convert::From<&str> for ControlFlowGraphNode<'_> { + fn from(other: &str) -> Self { + ControlFlowGraphNode::OrganizationalDominator(other.to_string()) + } +} + +impl<'sc> ControlFlowGraph<'sc> { + pub(crate) fn add_edge_from_entry(&mut self, to: NodeIndex, label: ControlFlowGraphEdge) { + for entry in &self.entry_points { + self.graph.add_edge(*entry, to, label.clone()); + } + } + pub(crate) fn add_node(&mut self, node: ControlFlowGraphNode<'sc>) -> NodeIndex { + self.graph.add_node(node) + } + pub(crate) fn add_edge( + &mut self, + from: NodeIndex, + to: NodeIndex, + edge: ControlFlowGraphEdge, + ) -> EdgeIndex { + self.graph.add_edge(from, to, edge) + } + + #[allow(dead_code)] + /// Prints out graphviz for this graph + pub(crate) fn visualize(&self) { + use petgraph::dot::Dot; + println!("{:?}", Dot::with_config(&self.graph, &[])); + } +} diff --git a/parser/src/control_flow_analysis/flow_graph/namespace.rs b/parser/src/control_flow_analysis/flow_graph/namespace.rs new file mode 100644 index 00000000000..6d337067876 --- /dev/null +++ b/parser/src/control_flow_analysis/flow_graph/namespace.rs @@ -0,0 +1,137 @@ +use super::{EntryPoint, ExitPoint}; +use crate::{types::ResolvedType, Ident}; +use petgraph::prelude::NodeIndex; +use std::collections::HashMap; + +#[derive(Default, Clone)] +/// Represents a single entry in the [ControlFlowNamespace]'s function namespace. Contains various +/// metadata about a function including its node indexes in the graph, its return type, and more. +/// Used to both perform control flow analysis on functions as well as produce good error messages. +pub(crate) struct FunctionNamespaceEntry<'sc> { + pub(crate) entry_point: EntryPoint, + pub(crate) exit_point: ExitPoint, + pub(crate) return_type: ResolvedType<'sc>, +} + +#[derive(Default, Clone)] +pub(crate) struct StructNamespaceEntry<'sc> { + pub(crate) struct_decl_ix: NodeIndex, + pub(crate) fields: HashMap, NodeIndex>, +} + +#[derive(Default, Clone)] +/// This namespace holds mappings from various declarations to their indexes in the graph. This is +/// used for connecting those vertices when the declarations are instantiated. +/// +/// Since control flow happens after type checking, we are not concerned about things being out +/// of scope at this point, as that would have been caught earlier and aborted the compilation +/// process. +pub struct ControlFlowNamespace<'sc> { + pub(crate) function_namespace: HashMap, FunctionNamespaceEntry<'sc>>, + pub(crate) enum_namespace: HashMap, (NodeIndex, HashMap, NodeIndex>)>, + pub(crate) trait_namespace: HashMap, NodeIndex>, + /// This is a mapping from trait name to method names and their node indexes + pub(crate) trait_method_namespace: HashMap, HashMap, NodeIndex>>, + /// This is a mapping from struct name to field names and their node indexes + pub(crate) struct_namespace: HashMap, StructNamespaceEntry<'sc>>, +} + +impl<'sc> ControlFlowNamespace<'sc> { + pub(crate) fn get_function(&self, ident: &Ident<'sc>) -> Option<&FunctionNamespaceEntry<'sc>> { + self.function_namespace.get(ident) + } + pub(crate) fn insert_function( + &mut self, + ident: Ident<'sc>, + entry: FunctionNamespaceEntry<'sc>, + ) { + self.function_namespace.insert(ident, entry); + } + pub(crate) fn insert_enum( + &mut self, + enum_name: Ident<'sc>, + enum_decl_index: NodeIndex, + variant_name: Ident<'sc>, + variant_index: NodeIndex, + ) { + match self.enum_namespace.get_mut(&enum_name) { + Some((_ix, variants)) => { + variants.insert(variant_name, variant_index); + } + None => { + let variant_space = { + let mut map = HashMap::new(); + map.insert(variant_name, variant_index); + map + }; + self.enum_namespace + .insert(enum_name, (enum_decl_index, variant_space)); + } + } + } + pub(crate) fn find_enum_variant_index( + &self, + enum_name: &Ident<'sc>, + variant_name: &Ident<'sc>, + ) -> Option<(NodeIndex, NodeIndex)> { + let (enum_ix, enum_decl) = self.enum_namespace.get(enum_name)?; + Some((enum_ix.clone(), enum_decl.get(variant_name)?.clone())) + } + + pub(crate) fn add_trait(&mut self, trait_name: Ident<'sc>, trait_idx: NodeIndex) { + self.trait_namespace.insert(trait_name, trait_idx); + } + + pub(crate) fn find_trait(&self, name: &Ident<'sc>) -> Option<&NodeIndex> { + self.trait_namespace.get(name) + } + + pub(crate) fn insert_trait_methods( + &mut self, + trait_name: Ident<'sc>, + methods: Vec<(Ident<'sc>, NodeIndex)>, + ) { + match self.trait_method_namespace.get_mut(&trait_name) { + Some(methods_ns) => { + for (name, ix) in methods { + methods_ns.insert(name, ix); + } + } + None => { + let mut ns = HashMap::default(); + for (name, ix) in methods { + ns.insert(name, ix); + } + self.trait_method_namespace.insert(trait_name, ns); + } + } + } + + pub(crate) fn insert_struct( + &mut self, + struct_name: Ident<'sc>, + declaration_node: NodeIndex, + field_nodes: Vec<(Ident<'sc>, NodeIndex)>, + ) { + let entry = StructNamespaceEntry { + struct_decl_ix: declaration_node, + fields: field_nodes.into_iter().collect(), + }; + self.struct_namespace.insert(struct_name, entry); + } + pub(crate) fn find_struct_decl(&self, struct_name: &Ident<'sc>) -> Option<&NodeIndex> { + self.struct_namespace + .get(struct_name) + .map(|StructNamespaceEntry { struct_decl_ix, .. }| struct_decl_ix) + } + pub(crate) fn find_struct_field_idx( + &self, + struct_name: &Ident<'sc>, + field_name: &Ident<'sc>, + ) -> Option<&NodeIndex> { + self.struct_namespace + .get(struct_name)? + .fields + .get(field_name) + } +} diff --git a/parser/src/control_flow_analysis/mod.rs b/parser/src/control_flow_analysis/mod.rs new file mode 100644 index 00000000000..9617eb6bb7f --- /dev/null +++ b/parser/src/control_flow_analysis/mod.rs @@ -0,0 +1,42 @@ +//! +//! This module contains all of the logic related to control flow analysis. +//! +//! # Synopsis of Dead-Code Analysis Algorithm +//! The dead code analysis algorithm constructs a node for every declaration, expression, and +//! statement. Then, from the entry points of the AST, we begin drawing edges along the control +//! flow path. If a declaration is instantiated, we draw an edge to it. If an expression or +//! statement is executed, an edge is drawn to it. Finally, we trace the edges from the entry +//! points of the AST. If there are no paths from any entry point to a node, then it is either a +//! dead declaration or an unreachable expression or statement. +//! +//! See the Terms section for details on how entry points are determined. +//! +//! # Synopsis of Return-Path Analysis Algorithm +//! The graph constructed for this algorithm does not go into the details of the contents of any +//! declaration except for function declarations. Inside of every function, it traces the execution +//! path along to ensure that all reachable paths do indeed return a value. We don't need to type +//! check the value that is returned, since type checking of return statements happens in the type +//! checking stage. Here, we know all present return statements have the right type, and we just +//! need to verify that all paths do indeed contain a return statement. +//! +//! +//! # # Terms +//! # # # Node +//! A node is any [crate::semantics::TypedAstNode], with some [crate::semantics::TypedAstNodeContent]. +//! # # # Dominating nodes +//! A dominating node is a node which all previous nodes pass through. These are what we are +//! concerned about in control flow analysis. More formally, +//! A node _M_ dominates a node _N_ if every path from the entry that reaches node _N_ has to pass through node _M_. +//! # # # Reachability +//! A node _N_ is reachable if there is a path to it from any one of the tree's entry points. +//! # # # Entry Points +//! The entry points to an AST depend on what type of AST it is. If it is a predicate or script, +//! then the main function is the sole entry point. If it is a library or contract, then public +//! functions or declarations are entry points. +//! +mod analyze_return_paths; +mod dead_code_analysis; +mod flow_graph; +pub use analyze_return_paths::*; +pub use dead_code_analysis::*; +pub use flow_graph::*; diff --git a/parser/src/error.rs b/parser/src/error.rs index da1f9e0bfcf..2a480357b12 100644 --- a/parser/src/error.rs +++ b/parser/src/error.rs @@ -1,5 +1,4 @@ -use crate::parser::Rule; -use crate::types::TypeInfo; +use crate::{parser::Rule, types::ResolvedType}; use inflector::cases::classcase::to_class_case; use inflector::cases::snakecase::to_snake_case; use pest::Span; @@ -113,6 +112,12 @@ impl<'sc, T> CompileResult<'sc, T> { } } } + pub fn ok(&self) -> Option<&T> { + match self { + CompileResult::Ok { value, .. } => Some(value), + _ => None, + } + } } #[derive(Debug, Clone)] @@ -152,11 +157,11 @@ pub enum Warning<'sc> { name: &'sc str, }, LossOfPrecision { - initial_type: TypeInfo<'sc>, - cast_to: TypeInfo<'sc>, + initial_type: ResolvedType<'sc>, + cast_to: ResolvedType<'sc>, }, UnusedReturnValue { - r#type: TypeInfo<'sc>, + r#type: ResolvedType<'sc>, }, SimilarMethodFound { lib: String, @@ -167,6 +172,16 @@ pub enum Warning<'sc> { name: &'sc str, }, OverridingTraitImplementation, + DeadDeclaration, + DeadFunctionDeclaration, + DeadStructDeclaration, + DeadTrait, + UnreachableCode, + DeadEnumVariant { + variant_name: String, + }, + DeadMethod, + StructFieldNeverRead, } impl<'sc> Warning<'sc> { @@ -183,12 +198,20 @@ impl<'sc> Warning<'sc> { UnusedReturnValue { r#type } => format!("This returns a value of type {}, which is not assigned to anything and is ignored.", r#type.friendly_type_str()), SimilarMethodFound { lib, module, name } => format!("A method with the same name was found for type {} in dependency \"{}::{}\". Traits must be in scope in order to access their methods. ", name, lib, module), OverridesOtherSymbol { name } => format!("This import would override another symbol with the same name \"{}\" in this namespace.", name), - OverridingTraitImplementation => format!("This trait implementation overrides another one that was previously defined.") + OverridingTraitImplementation => format!("This trait implementation overrides another one that was previously defined."), + DeadDeclaration => "This declaration is never used.".into(), + DeadStructDeclaration => "This struct is never instantiated.".into(), + DeadFunctionDeclaration => "This function is never called.".into(), + UnreachableCode => "This code is unreachable.".into(), + DeadEnumVariant { variant_name } => format!("Enum variant {} is never constructed.", variant_name), + DeadTrait => "This trait is never implemented.".into(), + DeadMethod => "This method is never called.".into(), + StructFieldNeverRead => "This struct field is never accessed.".into() } } } -#[derive(Error, Debug)] +#[derive(Error, Debug, Clone)] pub enum CompileError<'sc> { #[error("Variable \"{var_name}\" does not exist in this scope.")] UnknownVariable { var_name: &'sc str, span: Span<'sc> }, @@ -344,8 +367,12 @@ pub enum CompileError<'sc> { NonFinalAsteriskInPath { span: Span<'sc> }, #[error("Module \"{name}\" could not be found.")] ModuleNotFound { span: Span<'sc>, name: String }, - #[error("\"{name}\" is not a struct. Fields can only be accessed on structs.")] - NotAStruct { name: &'sc str, span: Span<'sc> }, + #[error("\"{name}\" is a {actually}, not a struct. Fields can only be accessed on structs.")] + NotAStruct { + name: &'sc str, + span: Span<'sc>, + actually: String, + }, #[error("Field \"{field_name}\" not found on struct \"{struct_name}\". Available fields are:\n {available_fields}")] FieldNotFound { field_name: &'sc str, @@ -357,6 +384,26 @@ pub enum CompileError<'sc> { SymbolNotFound { span: Span<'sc>, name: &'sc str }, #[error("Because this if expression's value is used, an \"else\" branch is required and it must return type \"{r#type}\"")] NoElseBranch { span: Span<'sc>, r#type: String }, + #[error("Use of type `Self` outside of a context in which `Self` refers to a type.")] + UnqualifiedSelfType { span: Span<'sc> }, + #[error("Symbol \"{name}\" does not refer to a type, it refers to a {actually_is}. It cannot be used in this position.")] + NotAType { + span: Span<'sc>, + name: String, + actually_is: String, + }, + #[error("This enum variant requires an instantiation expression. Try initializing it with arguments in parentheses.")] + MissingEnumInstantiator { span: Span<'sc> }, + #[error("This path must return a value of type \"{ty}\" from function \"{function_name}\", but it does not.")] + PathDoesNotReturn { + span: Span<'sc>, + ty: String, + function_name: &'sc str, + }, + #[error("Expected block to implicitly return a value of type \"{ty}\".")] + ExpectedImplicitReturnFromBlockWithType { span: Span<'sc>, ty: String }, + #[error("Expected block to implicitly return a value.")] + ExpectedImplicitReturnFromBlock { span: Span<'sc> }, } impl<'sc> std::convert::From> for CompileError<'sc> { @@ -365,7 +412,7 @@ impl<'sc> std::convert::From> for CompileError<'sc> { } } -#[derive(Error, Debug)] +#[derive(Error, Debug, Clone)] pub enum TypeError<'sc> { #[error("Mismatched types: Expected type {expected} but found type {received}. Type {received} is not castable to type {expected}.\n help: {help_text}")] MismatchedType { @@ -479,6 +526,12 @@ impl<'sc> CompileError<'sc> { FieldNotFound { span, .. } => (span.start(), span.end()), SymbolNotFound { span, .. } => (span.start(), span.end()), NoElseBranch { span, .. } => (span.start(), span.end()), + UnqualifiedSelfType { span, .. } => (span.start(), span.end()), + NotAType { span, .. } => (span.start(), span.end()), + MissingEnumInstantiator { span, .. } => (span.start(), span.end()), + PathDoesNotReturn { span, .. } => (span.start(), span.end()), + ExpectedImplicitReturnFromBlockWithType { span, .. } => (span.start(), span.end()), + ExpectedImplicitReturnFromBlock { span, .. } => (span.start(), span.end()), } } } diff --git a/parser/src/hll.pest b/parser/src/hll.pest index 48e2b01f245..4b67acab01b 100644 --- a/parser/src/hll.pest +++ b/parser/src/hll.pest @@ -5,6 +5,7 @@ trait_decl_keyword = {"trait"} return_keyword = {"return"} use_keyword = {"use"} enum_keyword = @{"enum"} +struct_keyword = @{"struct"} impl_keyword = {"impl"} asm_keyword = {"asm"} while_keyword = {"while"} @@ -28,7 +29,7 @@ script = { "script" ~ "{" ~ (control_flow|declaration|use_statement)* ~ "}" } predicate = { "predicate" ~ "{" ~ (control_flow|declaration|use_statement)* ~ "}" } // expressions -expr_inner = _{literal_value|if_exp|parenthesized_expression|asm_expression|code_block|func_app|struct_expression|method_exp|subfield_exp|var_exp|array_exp|match_expression} +expr_inner = _{literal_value|if_exp|parenthesized_expression|asm_expression|code_block|func_app|struct_expression|delineated_path|method_exp|subfield_exp|var_exp|array_exp|match_expression} parenthesized_expression = { "(" ~ expr ~ ")" } // // op exps built in to expr to prevent left recursion expr = {expr_inner ~ (op ~ expr_inner)*} @@ -36,7 +37,11 @@ func_app = { fn_name ~ "(" ~ (expr ~ ("," ~ expr)* )? ~ ")" } fn_name = {var_exp} var_exp = { unary_op? ~ var_name_ident } method_exp = { subfield_exp ~ "(" ~ (expr ~ ("," ~ expr)* )? ~ ")" } -subfield_exp = { (ident ~ ".")+ ~ ident } +subfield_exp = { (call_item ~ ".")+ ~ call_item } +call_item = { ident } +delineated_path = { path_component ~ ( "(" ~ expr ~ ")" )? } +path_component = { path_ident ~ (path_separator ~ path_ident )+ } +path_ident = { ident } var_name_ident = { ident } @@ -83,15 +88,15 @@ reassignment = { var_exp ~ assign ~ expr ~ ";" } visibility = { "pub"? } -struct_decl = { "struct" ~ struct_name ~ type_params? ~ trait_bounds? ~ "{" ~ struct_fields ~ "}" } +struct_decl = { visibility ~ struct_keyword ~ struct_name ~ type_params? ~ trait_bounds? ~ "{" ~ struct_fields ~ "}" } struct_name = {ident} struct_fields = { struct_field_name ~ ":" ~ type_name ~ ("," ~ struct_field_name ~ ":" ~ type_name)* ~ ","? } -struct_field_name = @{ident} +struct_field_name = {ident} // // enum declaration enum_decl = { enum_keyword ~ enum_name ~ type_params? ~ trait_bounds? ~ "{" ~ enum_fields ~ "}" } enum_fields = { enum_field_name ~ ":" ~ type_name ~ ("," ~ enum_field_name ~ ":" ~ type_name)* ~ ","? } -enum_name = @{ident} -enum_field_name = @{ident} +enum_name = {ident} +enum_field_name = {ident} impl_self = { impl_keyword ~ type_name ~ type_params? ~ trait_bounds? ~ ("{" ~ fn_decl* ~ "}") } @@ -112,7 +117,7 @@ return_statement = {return_keyword ~ expr? ~ ";"} expr_statement = { expr ~ ";" } // traits -trait_decl = { trait_decl_keyword ~ trait_name ~ type_params? ~ trait_bounds? ~ trait_methods } +trait_decl = { visibility ~ trait_decl_keyword ~ trait_name ~ type_params? ~ trait_bounds? ~ trait_methods } // // just a fn signature denotes a fn something must implement in order for it to be part of the trait // // i like the idea of separating the interface points from the actual // // functional implementations the trait provides, so i have them @@ -156,6 +161,6 @@ char = @{ | "\\" ~ ("\""|"\\"|"/"|"b"|"f"|"n"|"r"|"t") | "\\" ~ ("u" ~ ASCII_HEX_DIGIT{4}) } -ident = @{!(reserved_words) ~ ASCII_ALPHA ~ (ASCII_ALPHANUMERIC|"_")*} +ident = @{(!(reserved_words) ~ ASCII_ALPHA ~ (ASCII_ALPHANUMERIC|"_")*) | "()"} reserved_words = { while_keyword | "struct" | enum_keyword | match_keyword | use_keyword | var_decl_keyword | fn_decl_keyword | trait_decl_keyword | return_keyword } diff --git a/parser/src/ident.rs b/parser/src/ident.rs new file mode 100644 index 00000000000..7850570749a --- /dev/null +++ b/parser/src/ident.rs @@ -0,0 +1,51 @@ +use crate::error::*; +use crate::parser::Rule; +use pest::iterators::Pair; +use pest::Span; +use std::hash::{Hash, Hasher}; +#[derive(Debug, Clone)] +pub struct Ident<'sc> { + pub(crate) primary_name: &'sc str, + // sub-names are the stuff after periods + // like x.test.thing.method() + // `test`, `thing`, and `method` are sub-names + // the primary name is `x` + pub(crate) span: Span<'sc>, +} + +// custom implementation of Hash so that namespacing isn't reliant on the span itself, which will +// always be different. +impl Hash for Ident<'_> { + fn hash(&self, state: &mut H) { + self.primary_name.hash(state); + } +} +impl PartialEq for Ident<'_> { + fn eq(&self, other: &Self) -> bool { + self.primary_name == other.primary_name + } +} + +impl Eq for Ident<'_> {} + +impl<'sc> Ident<'sc> { + pub(crate) fn parse_from_pair(pair: Pair<'sc, Rule>) -> CompileResult<'sc, Ident> { + let span = { + let pair = pair.clone(); + if pair.as_rule() != Rule::ident { + pair.into_inner().next().unwrap().as_span() + } else { + pair.as_span() + } + }; + let name = pair.as_str().trim(); + ok( + Ident { + primary_name: name, + span, + }, + Vec::new(), + Vec::new(), + ) + } +} diff --git a/parser/src/lib.rs b/parser/src/lib.rs index 59035492c45..0ce79e40a76 100644 --- a/parser/src/lib.rs +++ b/parser/src/lib.rs @@ -3,24 +3,29 @@ extern crate pest_derive; #[macro_use] mod error; +mod control_flow_analysis; +mod ident; mod parse_tree; mod parser; mod semantics; -pub(crate) mod types; -pub(crate) mod utils; + use crate::error::*; -pub use crate::parse_tree::Ident; use crate::parse_tree::*; -pub(crate) use crate::parse_tree::{Expression, UseStatement, WhileLoop}; use crate::parser::{HllParser, Rule}; +use control_flow_analysis::ControlFlowGraph; use pest::iterators::Pair; use pest::Parser; -pub use semantics::{Namespace, TypedDeclaration, TypedFunctionDeclaration}; use semantics::{TreeType, TypedParseTree}; -pub use types::TypeInfo; + +pub(crate) mod types; +pub(crate) mod utils; +pub(crate) use crate::parse_tree::{Expression, UseStatement, WhileLoop}; pub use error::{CompileError, CompileResult, CompileWarning}; +pub use ident::Ident; pub use pest::Span; +pub use semantics::{Namespace, TypedDeclaration, TypedFunctionDeclaration}; +pub use types::TypeInfo; // todo rename to language name #[derive(Debug)] @@ -42,6 +47,7 @@ pub struct HllTypedParseTree<'sc> { #[derive(Debug)] pub struct LibraryExports<'sc> { pub namespace: Namespace<'sc>, + trees: Vec>, } #[derive(Debug)] @@ -248,14 +254,51 @@ pub fn compile<'sc, 'manifest>( .collect(); let mut exports = LibraryExports { namespace: Default::default(), + trees: vec![], }; for (ref name, parse_tree) in res { exports .namespace - .insert_module(name.primary_name.to_string(), parse_tree.namespace); + .insert_module(name.primary_name.to_string(), parse_tree.namespace.clone()); + exports.trees.push(parse_tree); } exports }; + // If there are errors, display them now before performing control flow analysis. + // It is necessary that the syntax tree is well-formed for control flow analysis + // to be correct. + if !errors.is_empty() { + return Err((errors, warnings)); + } + + // perform control flow analysis on each branch + let (script_warnings, script_errors) = + perform_control_flow_analysis(&script_ast, TreeType::Script); + let (contract_warnings, contract_errors) = + perform_control_flow_analysis(&contract_ast, TreeType::Contract); + let (predicate_warnings, predicate_errors) = + perform_control_flow_analysis(&predicate_ast, TreeType::Predicate); + let (library_warnings, library_errors) = + perform_control_flow_analysis_on_library_exports(&library_exports); + + let mut l_warnings = [ + script_warnings, + contract_warnings, + predicate_warnings, + library_warnings, + ] + .concat(); + let mut l_errors = [ + script_errors, + contract_errors, + predicate_errors, + library_errors, + ] + .concat(); + + errors.append(&mut l_errors); + warnings.append(&mut l_warnings); + if errors.is_empty() { Ok(( HllTypedParseTree { @@ -271,6 +314,37 @@ pub fn compile<'sc, 'manifest>( } } +fn perform_control_flow_analysis<'sc>( + tree: &Option>, + tree_type: TreeType, +) -> (Vec>, Vec>) { + match tree { + Some(tree) => { + let graph = ControlFlowGraph::construct_dead_code_graph(tree, tree_type); + let mut warnings = vec![]; + let mut errors = vec![]; + warnings.append(&mut graph.find_dead_code()); + let graph = ControlFlowGraph::construct_return_path_graph(tree); + errors.append(&mut graph.analyze_return_paths()); + (warnings, errors) + } + None => (vec![], vec![]), + } +} +fn perform_control_flow_analysis_on_library_exports<'sc>( + lib: &LibraryExports<'sc>, +) -> (Vec>, Vec>) { + let mut warnings = vec![]; + let mut errors = vec![]; + for tree in &lib.trees { + let graph = ControlFlowGraph::construct_dead_code_graph(tree, TreeType::Library); + warnings.append(&mut graph.find_dead_code()); + let graph = ControlFlowGraph::construct_return_path_graph(tree); + errors.append(&mut graph.analyze_return_paths()); + } + (warnings, errors) +} + // strategy: parse top level things // and if we encounter a function body or block, recursively call this function and build // sub-nodes diff --git a/parser/src/parse_tree/call_path.rs b/parser/src/parse_tree/call_path.rs index 0b0511b0fe3..4247372ff0e 100644 --- a/parser/src/parse_tree/call_path.rs +++ b/parser/src/parse_tree/call_path.rs @@ -1,6 +1,6 @@ -use super::Ident; use crate::error::*; use crate::parser::Rule; +use crate::Ident; use pest::iterators::Pair; use pest::Span; @@ -32,19 +32,28 @@ impl<'sc> CallPath<'sc> { crate::utils::join_spans(prefixes_span, self.suffix.span.clone()) } pub(crate) fn parse_from_pair(pair: Pair<'sc, Rule>) -> CompileResult { - let warnings = vec![]; - let errors = vec![]; + let mut warnings = vec![]; + let mut errors = vec![]; + let mut pairs_buf = vec![]; + for pair in pair.clone().into_inner() { + if pair.as_rule() != Rule::path_separator { + pairs_buf.push(eval!( + Ident::parse_from_pair, + warnings, + errors, + pair, + continue + )); + } + } + if pairs_buf.len() == 0 { + dbg!(&pair); + } + assert!(pairs_buf.len() > 0); + let suffix = pairs_buf.pop().unwrap(); + let prefixes = pairs_buf; + // TODO eventually we want to be able to call methods with colon-delineated syntax - ok( - CallPath { - prefixes: vec![], - suffix: Ident { - primary_name: pair.as_str().trim(), - span: pair.as_span(), - }, - }, - warnings, - errors, - ) + ok(CallPath { prefixes, suffix }, warnings, errors) } } diff --git a/parser/src/parse_tree/declaration/enum_declaration.rs b/parser/src/parse_tree/declaration/enum_declaration.rs index 0ba2d751c62..f14da898901 100644 --- a/parser/src/parse_tree/declaration/enum_declaration.rs +++ b/parser/src/parse_tree/declaration/enum_declaration.rs @@ -1,13 +1,15 @@ -use crate::error::*; -use crate::parse_tree::declaration::TypeParameter; use crate::parser::Rule; use crate::types::TypeInfo; +use crate::Ident; +use crate::Namespace; +use crate::{error::*, semantics::ast_node::TypedEnumDeclaration}; +use crate::{parse_tree::declaration::TypeParameter, semantics::ast_node::TypedEnumVariant}; use inflector::cases::classcase::is_class_case; use pest::iterators::Pair; use pest::Span; #[derive(Debug, Clone)] pub struct EnumDeclaration<'sc> { - pub(crate) name: &'sc str, + pub(crate) name: Ident<'sc>, pub(crate) type_parameters: Vec>, pub(crate) variants: Vec>, pub(crate) span: Span<'sc>, @@ -15,11 +17,27 @@ pub struct EnumDeclaration<'sc> { #[derive(Debug, Clone)] pub(crate) struct EnumVariant<'sc> { - pub(crate) name: &'sc str, + pub(crate) name: Ident<'sc>, pub(crate) r#type: TypeInfo<'sc>, + pub(crate) tag: usize, + pub(crate) span: Span<'sc>, } impl<'sc> EnumDeclaration<'sc> { + /// Looks up the various TypeInfos in the [Namespace] to see if they are generic or refer to + /// something. + pub(crate) fn to_typed_decl(&self, namespace: &Namespace<'sc>) -> TypedEnumDeclaration<'sc> { + TypedEnumDeclaration { + name: self.name.clone(), + type_parameters: self.type_parameters.clone(), + variants: self + .variants + .iter() + .map(|x| x.to_typed_decl(namespace)) + .collect(), + span: self.span.clone(), + } + } pub(crate) fn parse_from_pair(decl_inner: Pair<'sc, Rule>) -> CompileResult<'sc, Self> { let whole_enum_span = decl_inner.as_span(); let mut warnings = Vec::new(); @@ -72,13 +90,20 @@ impl<'sc> EnumDeclaration<'sc> { // unwrap non-optional fields let enum_name = enum_name.unwrap(); - let span = enum_name.as_span(); - let name = enum_name.as_str(); + let name = eval!( + Ident::parse_from_pair, + warnings, + errors, + enum_name, + return err(warnings, errors) + ); assert_or_warn!( - is_class_case(name), + is_class_case(name.primary_name), warnings, - span, - Warning::NonClassCaseEnumName { enum_name: name } + enum_name.as_span(), + Warning::NonClassCaseEnumName { + enum_name: name.primary_name + } ); let variants = eval!( @@ -103,22 +128,39 @@ impl<'sc> EnumDeclaration<'sc> { } impl<'sc> EnumVariant<'sc> { + pub(crate) fn to_typed_decl(&self, namespace: &Namespace<'sc>) -> TypedEnumVariant<'sc> { + TypedEnumVariant { + name: self.name.clone(), + r#type: namespace.resolve_type(&self.r#type), + tag: self.tag, + span: self.span.clone(), + } + } pub(crate) fn parse_from_pairs( decl_inner: Option>, ) -> CompileResult<'sc, Vec> { let mut warnings = Vec::new(); let mut errors = Vec::new(); let mut fields_buf = Vec::new(); + let mut tag = 0; if let Some(decl_inner) = decl_inner { let fields = decl_inner.into_inner().collect::>(); for i in (0..fields.len()).step_by(2) { - let span = fields[i].as_span(); - let name = fields[i].as_str(); + let variant_span = fields[i].as_span(); + let name = eval!( + Ident::parse_from_pair, + warnings, + errors, + fields[i], + return err(warnings, errors) + ); assert_or_warn!( - is_class_case(name), + is_class_case(name.primary_name), warnings, - span, - Warning::NonClassCaseEnumVariantName { variant_name: name } + name.span.clone(), + Warning::NonClassCaseEnumVariantName { + variant_name: name.primary_name + } ); let r#type = eval!( TypeInfo::parse_from_pair_inner, @@ -127,7 +169,13 @@ impl<'sc> EnumVariant<'sc> { fields[i + 1].clone(), TypeInfo::Unit ); - fields_buf.push(EnumVariant { name, r#type }); + fields_buf.push(EnumVariant { + name, + r#type, + tag, + span: variant_span, + }); + tag = tag + 1; } } ok(fields_buf, warnings, errors) diff --git a/parser/src/parse_tree/declaration/function_declaration.rs b/parser/src/parse_tree/declaration/function_declaration.rs index 13bffcdcb36..ea821b884fb 100644 --- a/parser/src/parse_tree/declaration/function_declaration.rs +++ b/parser/src/parse_tree/declaration/function_declaration.rs @@ -1,12 +1,12 @@ use crate::error::*; -use crate::parse_tree::{declaration::TypeParameter, Ident}; +use crate::parse_tree::declaration::TypeParameter; use crate::types::TypeInfo; -use crate::{CodeBlock, Rule}; +use crate::{CodeBlock, Ident, Rule}; use inflector::cases::snakecase::is_snake_case; use pest::iterators::Pair; use pest::Span; -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] pub(crate) enum Visibility { Public, Private, diff --git a/parser/src/parse_tree/declaration/mod.rs b/parser/src/parse_tree/declaration/mod.rs index 8a154b19de1..991d41a602d 100644 --- a/parser/src/parse_tree/declaration/mod.rs +++ b/parser/src/parse_tree/declaration/mod.rs @@ -17,9 +17,10 @@ pub(crate) use type_parameter::*; pub(crate) use variable_declaration::*; use crate::error::*; -use crate::parse_tree::{Expression, Ident}; +use crate::parse_tree::Expression; use crate::parser::Rule; use crate::types::TypeInfo; +use crate::Ident; use pest::iterators::Pair; #[derive(Debug, Clone)] diff --git a/parser/src/parse_tree/declaration/reassignment.rs b/parser/src/parse_tree/declaration/reassignment.rs index 712f3dafefc..209012d082b 100644 --- a/parser/src/parse_tree/declaration/reassignment.rs +++ b/parser/src/parse_tree/declaration/reassignment.rs @@ -1,6 +1,7 @@ use crate::error::{err, ok, CompileResult}; -use crate::parse_tree::{Expression, Ident}; +use crate::parse_tree::Expression; use crate::parser::Rule; +use crate::Ident; use pest::iterators::Pair; use pest::Span; diff --git a/parser/src/parse_tree/declaration/struct_declaration.rs b/parser/src/parse_tree/declaration/struct_declaration.rs index 12f759b2337..67fc91a0680 100644 --- a/parser/src/parse_tree/declaration/struct_declaration.rs +++ b/parser/src/parse_tree/declaration/struct_declaration.rs @@ -1,22 +1,27 @@ -use crate::error::*; -use crate::parse_tree::{declaration::TypeParameter, Ident}; +use crate::parse_tree::declaration::TypeParameter; use crate::parser::Rule; use crate::types::TypeInfo; +use crate::{error::*, Ident}; use inflector::cases::classcase::is_class_case; use inflector::cases::snakecase::is_snake_case; use pest::iterators::Pair; +use pest::Span; + +use super::Visibility; #[derive(Debug, Clone)] pub struct StructDeclaration<'sc> { pub(crate) name: Ident<'sc>, pub(crate) fields: Vec>, pub(crate) type_parameters: Vec>, + pub(crate) visibility: Visibility, } #[derive(Debug, Clone)] pub(crate) struct StructField<'sc> { - pub(crate) name: &'sc str, + pub(crate) name: Ident<'sc>, pub(crate) r#type: TypeInfo<'sc>, + pub(crate) span: Span<'sc>, } impl<'sc> StructDeclaration<'sc> { @@ -24,7 +29,8 @@ impl<'sc> StructDeclaration<'sc> { let mut warnings = Vec::new(); let mut errors = Vec::new(); let mut decl = decl.into_inner(); - let name = decl.next().unwrap(); + let mut visibility = Visibility::Private; + let mut name = None; let mut type_params_pair = None; let mut where_clause_pair = None; let mut fields_pair = None; @@ -39,9 +45,17 @@ impl<'sc> StructDeclaration<'sc> { Rule::struct_fields => { fields_pair = Some(pair); } - _ => unreachable!(), + Rule::struct_keyword => (), + Rule::struct_name => { + name = Some(pair); + } + Rule::visibility => { + visibility = Visibility::parse_from_pair(pair); + } + a => unreachable!("{:?}", a), } } + let name = name.expect("The parser would not have matched this if there was no name."); let type_parameters = match TypeParameter::parse_from_type_params_and_where_clause( type_params_pair, @@ -99,6 +113,7 @@ impl<'sc> StructDeclaration<'sc> { name, fields, type_parameters, + visibility, }, warnings, errors, @@ -114,12 +129,20 @@ impl<'sc> StructField<'sc> { let mut fields_buf = Vec::new(); for i in (0..fields.len()).step_by(2) { let span = fields[i].as_span(); - let name = fields[i].as_str(); + let name = eval!( + Ident::parse_from_pair, + warnings, + errors, + fields[i], + return err(warnings, errors) + ); assert_or_warn!( - is_snake_case(name), + is_snake_case(name.primary_name), warnings, - span, - Warning::NonSnakeCaseStructFieldName { field_name: name } + span.clone(), + Warning::NonSnakeCaseStructFieldName { + field_name: name.primary_name.clone() + } ); let r#type = eval!( TypeInfo::parse_from_pair_inner, @@ -128,7 +151,7 @@ impl<'sc> StructField<'sc> { fields[i + 1].clone(), TypeInfo::Unit ); - fields_buf.push(StructField { name, r#type }); + fields_buf.push(StructField { name, r#type, span }); } ok(fields_buf, warnings, errors) } diff --git a/parser/src/parse_tree/declaration/trait_declaration.rs b/parser/src/parse_tree/declaration/trait_declaration.rs index 45684433c62..2dade29aa8e 100644 --- a/parser/src/parse_tree/declaration/trait_declaration.rs +++ b/parser/src/parse_tree/declaration/trait_declaration.rs @@ -1,8 +1,8 @@ -use super::{FunctionDeclaration, FunctionParameter}; -use crate::error::*; -use crate::parse_tree::{Ident, TypeParameter}; +use super::{FunctionDeclaration, FunctionParameter, Visibility}; +use crate::parse_tree::TypeParameter; use crate::parser::Rule; use crate::types::TypeInfo; +use crate::{error::*, Ident}; use inflector::cases::classcase::is_class_case; use inflector::cases::snakecase::is_snake_case; use pest::iterators::Pair; @@ -13,6 +13,7 @@ pub(crate) struct TraitDeclaration<'sc> { pub(crate) interface_surface: Vec>, pub(crate) methods: Vec>, pub(crate) type_parameters: Vec>, + pub(crate) visibility: Visibility, } impl<'sc> TraitDeclaration<'sc> { @@ -20,7 +21,16 @@ impl<'sc> TraitDeclaration<'sc> { let mut warnings = Vec::new(); let mut errors = Vec::new(); let mut trait_parts = pair.into_inner().peekable(); - let _trait_keyword = trait_parts.next(); + let trait_keyword_or_visibility = trait_parts.next().unwrap(); + let (visibility, _trait_keyword) = + if trait_keyword_or_visibility.as_rule() == Rule::visibility { + ( + Visibility::parse_from_pair(trait_keyword_or_visibility), + trait_parts.next().unwrap(), + ) + } else { + (Visibility::Private, trait_keyword_or_visibility) + }; let name_pair = trait_parts.next().unwrap(); let name = eval!( Ident::parse_from_pair, @@ -108,6 +118,7 @@ impl<'sc> TraitDeclaration<'sc> { name, interface_surface: interface, methods, + visibility, }, warnings, errors, diff --git a/parser/src/parse_tree/declaration/variable_declaration.rs b/parser/src/parse_tree/declaration/variable_declaration.rs index 884e22be19b..c74a6c4ae4f 100644 --- a/parser/src/parse_tree/declaration/variable_declaration.rs +++ b/parser/src/parse_tree/declaration/variable_declaration.rs @@ -1,5 +1,5 @@ -use crate::parse_tree::{Expression, Ident}; -use crate::types::TypeInfo; +use crate::parse_tree::Expression; +use crate::{types::TypeInfo, Ident}; #[derive(Debug, Clone)] pub(crate) struct VariableDeclaration<'sc> { diff --git a/parser/src/parse_tree/expression/asm.rs b/parser/src/parse_tree/expression/asm.rs index 4b5bf2aa953..5c18e951ee6 100644 --- a/parser/src/parse_tree/expression/asm.rs +++ b/parser/src/parse_tree/expression/asm.rs @@ -1,6 +1,6 @@ use crate::error::*; -use crate::parse_tree::Ident; use crate::parser::Rule; +use crate::Ident; use pest::iterators::Pair; use pest::Span; diff --git a/parser/src/parse_tree/expression/mod.rs b/parser/src/parse_tree/expression/mod.rs index 2bf8d11fb35..6b0f4592526 100644 --- a/parser/src/parse_tree/expression/mod.rs +++ b/parser/src/parse_tree/expression/mod.rs @@ -1,15 +1,11 @@ use crate::error::*; -use crate::parse_tree::CallPath; -use crate::parse_tree::Literal; +use crate::parse_tree::{CallPath, Literal}; use crate::parser::Rule; -use crate::CodeBlock; +use crate::{CodeBlock, Ident}; use either::Either; use pest::iterators::Pair; use pest::Span; -use std::{ - collections::HashMap, - hash::{Hash, Hasher}, -}; +use std::collections::HashMap; mod asm; pub(crate) use asm::AsmExpression; @@ -72,16 +68,49 @@ pub(crate) enum Expression<'sc> { arguments: Vec>, span: Span<'sc>, }, + /// A subfield expression is anything of the form: + /// ```ignore + /// . + /// ``` + /// + /// Where there are `n >=2` idents. This is typically an access of a structure field. SubfieldExpression { name_parts: Vec>, span: Span<'sc>, unary_op: Option, }, + /// A [DelineatedPath] is anything of the form: + /// ```ignore + /// :: + /// ``` + /// Where there are `n >= 2` idents. + /// These could be either enum variant constructions, or they could be + /// references to some sort of module in the module tree. + /// For example, a reference to a module: + /// ```ignore + /// std::ops::add + /// ``` + /// + /// And, an enum declaration: + /// ```ignore + /// enum MyEnum { + /// Variant1, + /// Variant2 + /// } + /// + /// MyEnum::Variant1 + /// ``` + DelineatedPath { + call_path: CallPath<'sc>, + instantiator: Option>>, + span: Span<'sc>, + type_arguments: Vec>, + }, } #[derive(Debug, Clone)] pub(crate) struct StructExpressionField<'sc> { - pub(crate) name: &'sc str, + pub(crate) name: Ident<'sc>, pub(crate) value: Expression<'sc>, pub(crate) span: Span<'sc>, } @@ -103,6 +132,7 @@ impl<'sc> Expression<'sc> { AsmExpression { span, .. } => span, MethodApplication { span, .. } => span, SubfieldExpression { span, .. } => span, + DelineatedPath { span, .. } => span, }) .clone() } @@ -351,7 +381,13 @@ impl<'sc> Expression<'sc> { let fields = expr_iter.next().unwrap().into_inner().collect::>(); let mut fields_buf = Vec::new(); for i in (0..fields.len()).step_by(2) { - let name = fields[i].as_str(); + let name = eval!( + Ident::parse_from_pair, + warnings, + errors, + fields[i], + return err(warnings, errors) + ); let span = fields[i].as_span(); let value = eval!( Expression::parse_from_pair, @@ -515,6 +551,44 @@ impl<'sc> Expression<'sc> { expr, return err(warnings, errors) ), + Rule::delineated_path => { + // this is either an enum expression or looking something + // up in libraries + let span = expr.as_span(); + let mut parts = expr.into_inner(); + let path_component = parts.next().unwrap(); + let instantiator = parts.next(); + let path = eval!( + CallPath::parse_from_pair, + warnings, + errors, + path_component, + return err(warnings, errors) + ); + + let instantiator = if let Some(inst) = instantiator { + Some(Box::new(eval!( + Expression::parse_from_pair_inner, + warnings, + errors, + inst.into_inner().next().unwrap(), + return err(warnings, errors) + ))) + } else { + None + }; + + // if there is an expression in parenthesis, that is the instantiator. + + Expression::DelineatedPath { + call_path: path, + instantiator, + span, + // Eventually, when we support generic enums, we want to be able to parse type + // arguments on the enum name and throw them in here. TODO + type_arguments: vec![], + } + } a => { eprintln!( "Unimplemented expr: {:?} ({:?}) ({:?})", @@ -652,53 +726,6 @@ impl UnaryOp { } } -#[derive(Debug, Clone)] -pub struct Ident<'sc> { - pub(crate) primary_name: &'sc str, - // sub-names are the stuff after periods - // like x.test.thing.method() - // `test`, `thing`, and `method` are sub-names - // the primary name is `x` - pub(crate) span: Span<'sc>, -} - -// custom implementation of Hash so that namespacing isn't reliant on the span itself, which will -// always be different. -impl Hash for Ident<'_> { - fn hash(&self, state: &mut H) { - self.primary_name.hash(state); - } -} -impl PartialEq for Ident<'_> { - fn eq(&self, other: &Self) -> bool { - self.primary_name == other.primary_name - } -} - -impl Eq for Ident<'_> {} - -impl<'sc> Ident<'sc> { - pub(crate) fn parse_from_pair(pair: Pair<'sc, Rule>) -> CompileResult<'sc, Ident> { - let span = { - let pair = pair.clone(); - if pair.as_rule() != Rule::ident { - pair.into_inner().next().unwrap().as_span() - } else { - pair.as_span() - } - }; - let name = pair.as_str().trim(); - ok( - Ident { - primary_name: name, - span, - }, - Vec::new(), - Vec::new(), - ) - } -} - fn parse_op<'sc>(op: Pair<'sc, Rule>) -> CompileResult<'sc, Op> { use OpVariant::*; let mut errors = Vec::new(); diff --git a/parser/src/parse_tree/mod.rs b/parser/src/parse_tree/mod.rs index 04a2fa83180..3cb3bc1d564 100644 --- a/parser/src/parse_tree/mod.rs +++ b/parser/src/parse_tree/mod.rs @@ -4,14 +4,12 @@ mod declaration; mod expression; mod literal; mod use_statement; -mod variable_declaration; mod while_loop; pub(crate) use call_path::*; pub(crate) use code_block::*; pub(crate) use declaration::*; -pub use expression::Ident; -pub(crate) use expression::{AsmExpression, Expression, UnaryOp}; +pub(crate) use expression::*; pub(crate) use literal::Literal; pub(crate) use use_statement::{ImportType, UseStatement}; pub(crate) use while_loop::WhileLoop; diff --git a/parser/src/parse_tree/variable_declaration.rs b/parser/src/parse_tree/variable_declaration.rs deleted file mode 100644 index 8b137891791..00000000000 --- a/parser/src/parse_tree/variable_declaration.rs +++ /dev/null @@ -1 +0,0 @@ - diff --git a/parser/src/semantics/ast_node/code_block.rs b/parser/src/semantics/ast_node/code_block.rs index 972684cec5c..4832afb6fe1 100644 --- a/parser/src/semantics/ast_node/code_block.rs +++ b/parser/src/semantics/ast_node/code_block.rs @@ -1,5 +1,5 @@ use super::*; -use crate::types::TypeInfo; +use crate::types::ResolvedType; use crate::CodeBlock; #[derive(Clone, Debug)] @@ -8,13 +8,13 @@ pub(crate) struct TypedCodeBlock<'sc> { } impl<'sc> TypedCodeBlock<'sc> { - pub(crate) fn type_check<'manifest>( + pub(crate) fn type_check( other: CodeBlock<'sc>, namespace: &Namespace<'sc>, // this is for the return or implicit return - type_annotation: Option>, + type_annotation: Option>, help_text: impl Into + Clone, - ) -> CompileResult<'sc, (Self, TypeInfo<'sc>)> { + ) -> CompileResult<'sc, (Self, Option>)> { let mut warnings = Vec::new(); let mut errors = Vec::new(); let mut evaluated_contents = Vec::new(); @@ -57,37 +57,36 @@ impl<'sc> TypedCodeBlock<'sc> { } // find the implicit return, if any, and use it as the code block's return type. // The fact that there is at most one implicit return is an invariant held by the parser. - let return_type = evaluated_contents - .iter() - .find_map(|x| match x { - TypedAstNode { - content: - TypedAstNodeContent::ImplicitReturnExpression(TypedExpression { - ref return_type, - .. - }), - .. - } => Some(return_type.clone()), - _ => None, - }) - .unwrap_or(TypeInfo::Unit); - if let Some(type_annotation) = type_annotation { - let convertability = return_type.is_convertable( - &type_annotation, - implicit_return_span.unwrap_or(other.whole_block_span.clone()), - help_text, - ); - match convertability { - Ok(warning) => { - if let Some(warning) = warning { - warnings.push(CompileWarning { - warning_content: warning, - span: other.whole_block_span, - }); + let return_type = evaluated_contents.iter().find_map(|x| match x { + TypedAstNode { + content: + TypedAstNodeContent::ImplicitReturnExpression(TypedExpression { + ref return_type, + .. + }), + .. + } => Some(return_type.clone()), + _ => None, + }); + if let Some(ref return_type) = return_type { + if let Some(type_annotation) = type_annotation { + let convertability = return_type.is_convertable( + &type_annotation, + implicit_return_span.unwrap_or(other.whole_block_span.clone()), + help_text, + ); + match convertability { + Ok(warning) => { + if let Some(warning) = warning { + warnings.push(CompileWarning { + warning_content: warning, + span: other.whole_block_span, + }); + } + } + Err(err) => { + errors.push(err.into()); } - } - Err(err) => { - errors.push(err.into()); } } } diff --git a/parser/src/semantics/ast_node/declaration.rs b/parser/src/semantics/ast_node/declaration.rs index 4a3939b4664..debadfb0537 100644 --- a/parser/src/semantics/ast_node/declaration.rs +++ b/parser/src/semantics/ast_node/declaration.rs @@ -1,20 +1,24 @@ use super::{ IsConstant, TypedCodeBlock, TypedExpression, TypedExpressionVariant, TypedReturnStatement, }; -use crate::error::*; use crate::parse_tree::*; use crate::semantics::Namespace; -use crate::types::TypeInfo; -use crate::TraitFn; +use crate::{error::*, types::ResolvedType, Ident}; +use pest::Span; #[derive(Clone, Debug)] pub enum TypedDeclaration<'sc> { VariableDeclaration(TypedVariableDeclaration<'sc>), FunctionDeclaration(TypedFunctionDeclaration<'sc>), TraitDeclaration(TypedTraitDeclaration<'sc>), - StructDeclaration(StructDeclaration<'sc>), - EnumDeclaration(EnumDeclaration<'sc>), + StructDeclaration(TypedStructDeclaration<'sc>), + EnumDeclaration(TypedEnumDeclaration<'sc>), Reassignment(TypedReassignment<'sc>), + ImplTrait { + trait_name: Ident<'sc>, + span: Span<'sc>, + methods: Vec>, + }, // no contents since it is a side-effectful declaration, i.e it populates a namespace SideEffect, ErrorRecovery, @@ -31,30 +35,135 @@ impl<'sc> TypedDeclaration<'sc> { StructDeclaration(_) => "struct", EnumDeclaration(_) => "enum", Reassignment(_) => "reassignment", + ImplTrait { .. } => "impl trait", SideEffect => "", ErrorRecovery => "error", } } - pub(crate) fn return_type(&self) -> CompileResult<'sc, TypeInfo<'sc>> { + pub(crate) fn return_type(&self) -> CompileResult<'sc, ResolvedType<'sc>> { ok( match self { TypedDeclaration::VariableDeclaration(TypedVariableDeclaration { body, .. }) => body.return_type.clone(), TypedDeclaration::FunctionDeclaration { .. } => todo!("fn pointer type"), - TypedDeclaration::StructDeclaration(StructDeclaration { name, .. }) => { - TypeInfo::Struct { name: name.clone() } + TypedDeclaration::StructDeclaration(TypedStructDeclaration { name, .. }) => { + ResolvedType::Struct { name: name.clone() } } TypedDeclaration::Reassignment(TypedReassignment { rhs, .. }) => { rhs.return_type.clone() } - _ => return err(vec![], vec![todo!("used typeless symbol as type err")]), + decl => { + return err( + vec![], + vec![CompileError::NotAType { + span: decl.span(), + name: decl.pretty_print(), + actually_is: decl.friendly_name().to_string(), + }], + ) + } }, vec![], vec![], ) } + + pub(crate) fn span(&self) -> Span<'sc> { + use TypedDeclaration::*; + match self { + VariableDeclaration(TypedVariableDeclaration { name, .. }) => name.span.clone(), + FunctionDeclaration(TypedFunctionDeclaration { span, .. }) => span.clone(), + TraitDeclaration(TypedTraitDeclaration { name, .. }) => name.span.clone(), + StructDeclaration(TypedStructDeclaration { name, .. }) => name.span.clone(), + EnumDeclaration(TypedEnumDeclaration { span, .. }) => span.clone(), + Reassignment(TypedReassignment { lhs, .. }) => lhs.span.clone(), + ImplTrait { span, .. } => span.clone(), + SideEffect | ErrorRecovery => unreachable!("No span exists for these ast node types"), + } + } + + pub(crate) fn pretty_print(&self) -> String { + format!( + "{} declaration ({})", + self.friendly_name(), + match self { + TypedDeclaration::VariableDeclaration(TypedVariableDeclaration { + is_mutable, + name, + .. + }) => format!( + "{} {}", + if *is_mutable { "mut" } else { "" }, + name.primary_name + ), + TypedDeclaration::FunctionDeclaration(TypedFunctionDeclaration { + name, .. + }) => { + name.primary_name.into() + } + TypedDeclaration::TraitDeclaration(TypedTraitDeclaration { name, .. }) => + name.primary_name.into(), + TypedDeclaration::StructDeclaration(TypedStructDeclaration { name, .. }) => + name.primary_name.into(), + TypedDeclaration::EnumDeclaration(TypedEnumDeclaration { name, .. }) => + name.primary_name.into(), + TypedDeclaration::Reassignment(TypedReassignment { lhs, .. }) => + lhs.primary_name.into(), + _ => String::new(), + } + ) + } +} + +#[derive(Clone, Debug)] +pub struct TypedStructDeclaration<'sc> { + pub(crate) name: Ident<'sc>, + pub(crate) fields: Vec>, + pub(crate) type_parameters: Vec>, + pub(crate) visibility: Visibility, +} + +#[derive(Debug, Clone)] +pub struct TypedStructField<'sc> { + pub(crate) name: Ident<'sc>, + pub(crate) r#type: ResolvedType<'sc>, + pub(crate) span: Span<'sc>, +} + +#[derive(Clone, Debug)] +pub struct TypedEnumDeclaration<'sc> { + pub(crate) name: Ident<'sc>, + pub(crate) type_parameters: Vec>, + pub(crate) variants: Vec>, + pub(crate) span: Span<'sc>, +} +impl<'sc> TypedEnumDeclaration<'sc> { + /// Given type arguments, match them up with the type parameters and return the result. + /// Currently unimplemented as we don't support generic enums yet, but when we do, this will be + /// the place to resolve those typed. + pub(crate) fn resolve_generic_types( + &self, + _type_arguments: Vec>, + ) -> CompileResult<'sc, Self> { + ok(self.clone(), vec![], vec![]) + } + /// Returns the [ResolvedType] corresponding to this enum's type. + pub(crate) fn as_type(&self) -> ResolvedType<'sc> { + ResolvedType::Enum { + name: self.name.clone(), + } + } +} + +#[derive(Debug, Clone)] +pub struct TypedEnumVariant<'sc> { + pub(crate) name: Ident<'sc>, + pub(crate) r#type: ResolvedType<'sc>, + pub(crate) tag: usize, + pub(crate) span: Span<'sc>, } + #[derive(Clone, Debug)] pub struct TypedVariableDeclaration<'sc> { pub(crate) name: Ident<'sc>, @@ -67,18 +176,36 @@ pub struct TypedVariableDeclaration<'sc> { pub struct TypedFunctionDeclaration<'sc> { pub(crate) name: Ident<'sc>, pub(crate) body: TypedCodeBlock<'sc>, - pub(crate) parameters: Vec>, + pub(crate) parameters: Vec>, pub(crate) span: pest::Span<'sc>, - pub(crate) return_type: TypeInfo<'sc>, + pub(crate) return_type: ResolvedType<'sc>, pub(crate) type_parameters: Vec>, + /// Used for error messages -- the span pointing to the return type + /// annotation of the function + pub(crate) return_type_span: Span<'sc>, + pub(crate) visibility: Visibility, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct TypedFunctionParameter<'sc> { + pub(crate) name: Ident<'sc>, + pub(crate) r#type: ResolvedType<'sc>, + pub(crate) type_span: Span<'sc>, } #[derive(Clone, Debug)] pub struct TypedTraitDeclaration<'sc> { pub(crate) name: Ident<'sc>, - pub(crate) interface_surface: Vec>, // TODO typed TraitFn which checks geneerics + pub(crate) interface_surface: Vec>, pub(crate) methods: Vec>, pub(crate) type_parameters: Vec>, + pub(crate) visibility: Visibility, +} +#[derive(Clone, Debug)] +pub struct TypedTraitFn<'sc> { + pub(crate) name: Ident<'sc>, + pub(crate) parameters: Vec>, + pub(crate) return_type: ResolvedType<'sc>, } #[derive(Clone, Debug)] @@ -91,8 +218,11 @@ impl<'sc> TypedFunctionDeclaration<'sc> { pub(crate) fn type_check( fn_decl: FunctionDeclaration<'sc>, namespace: &Namespace<'sc>, - _return_type_annotation: Option>, + _return_type_annotation: Option>, _help_text: impl Into, + // If there are any `Self` types in this declaration, + // resolve them to this type. + self_type: Option>, ) -> CompileResult<'sc, TypedFunctionDeclaration<'sc>> { let mut warnings = Vec::new(); let mut errors = Vec::new(); @@ -103,58 +233,93 @@ impl<'sc> TypedFunctionDeclaration<'sc> { span, return_type, type_parameters, + return_type_span, + visibility, .. } = fn_decl.clone(); + let return_type = namespace.resolve_type(&return_type); // insert parameters into namespace let mut namespace = namespace.clone(); - parameters - .clone() - .into_iter() - .for_each(|FunctionParameter { name, r#type, .. }| { - namespace.insert( - name.clone(), - TypedDeclaration::VariableDeclaration(TypedVariableDeclaration { - name: name.clone(), - body: TypedExpression { - expression: TypedExpressionVariant::FunctionParameter, - return_type: r#type, - is_constant: IsConstant::No, - }, - is_mutable: false, // TODO allow mutable function params? - }), - ); - }); - let (body, _implicit_block_return) = type_check!( - TypedCodeBlock::type_check( - body, - &namespace, - Some(return_type.clone()), - "Function body's return type does not match up with its return type annotation." - ), - (TypedCodeBlock { contents: vec![] }, TypeInfo::ErrorRecovery), - warnings, - errors - ); + for FunctionParameter { + name, ref r#type, .. + } in parameters.clone() + { + let r#type = namespace.resolve_type(r#type); + namespace.insert( + name.clone(), + TypedDeclaration::VariableDeclaration(TypedVariableDeclaration { + name: name.clone(), + body: TypedExpression { + expression: TypedExpressionVariant::FunctionParameter, + return_type: r#type, + is_constant: IsConstant::No, + }, + is_mutable: false, // TODO allow mutable function params? + }), + ); + } + // check return type for Self types + let return_type = if return_type == ResolvedType::SelfType { + match self_type { + Some(ref ty) => ty.clone(), + None => { + errors.push(CompileError::UnqualifiedSelfType { + span: return_type_span.clone(), + }); + return_type + } + } + } else { + return_type + }; + // If there are no implicit block returns, then we do not want to type check them, so we + // stifle the errors. If there _are_ implicit block returns, we want to type_check them. + let (body, _implicit_block_return) = + type_check!( + TypedCodeBlock::type_check( + body, + &namespace, + Some(return_type.clone()), + "Function body's return type does not match up with its return type annotation." + ), + (TypedCodeBlock { contents: vec![] }, Some(ResolvedType::ErrorRecovery)), + warnings, + errors + ); // check the generic types in the arguments, make sure they are in the type // scope + let mut parameters = parameters + .into_iter() + .map( + |FunctionParameter { + name, + r#type, + type_span, + }| TypedFunctionParameter { + name, + r#type: namespace.resolve_type(&r#type), + type_span, + }, + ) + .collect::>(); let mut generic_params_buf_for_error_message = Vec::new(); for param in parameters.iter() { - if let TypeInfo::Custom { ref name } = param.r#type { + if let ResolvedType::Generic { ref name } = param.r#type { generic_params_buf_for_error_message.push(name.primary_name); } } let comma_separated_generic_params = generic_params_buf_for_error_message.join(", "); - for FunctionParameter { + for TypedFunctionParameter { ref r#type, name, .. } in parameters.iter() { let span = name.span.clone(); - if let TypeInfo::Custom { name, .. } = r#type { + if let ResolvedType::Generic { name, .. } = r#type { let args_span = parameters.iter().fold( parameters[0].name.span.clone(), |acc, - FunctionParameter { + TypedFunctionParameter { name: Ident { span, .. }, .. }| crate::utils::join_spans(acc, span.clone()), @@ -213,6 +378,24 @@ impl<'sc> TypedFunctionDeclaration<'sc> { } } } + for TypedFunctionParameter { + ref mut r#type, + type_span, + .. + } in parameters.iter_mut() + { + if *r#type == ResolvedType::SelfType { + match self_type { + Some(ref ty) => *r#type = ty.clone(), + None => { + errors.push(CompileError::UnqualifiedSelfType { + span: type_span.clone(), + }); + continue; + } + } + } + } ok( TypedFunctionDeclaration { @@ -222,6 +405,8 @@ impl<'sc> TypedFunctionDeclaration<'sc> { span: span.clone(), return_type, type_parameters, + return_type_span, + visibility, }, warnings, errors, diff --git a/parser/src/semantics/ast_node/expression/enum_instantiation.rs b/parser/src/semantics/ast_node/expression/enum_instantiation.rs new file mode 100644 index 00000000000..326a61e83aa --- /dev/null +++ b/parser/src/semantics/ast_node/expression/enum_instantiation.rs @@ -0,0 +1,86 @@ +use crate::error::*; +use crate::semantics::ast_node::*; + +/// Given an enum declaration and the instantiation expression/type arguments, construct a valid +/// [TypedExpression]. +pub(crate) fn instantiate_enum<'sc>( + enum_decl: TypedEnumDeclaration<'sc>, + enum_field_name: Ident<'sc>, + instantiator: Option>>, + type_arguments: Vec>, + namespace: &Namespace<'sc>, +) -> CompileResult<'sc, TypedExpression<'sc>> { + let mut warnings = vec![]; + let mut errors = vec![]; + let enum_decl = type_check!( + enum_decl.resolve_generic_types(type_arguments), + return err(warnings, errors), + warnings, + errors + ); + let (enum_field_type, tag, variant_name) = match enum_decl + .variants + .iter() + .find(|x| x.name.primary_name == enum_field_name.primary_name) + { + Some(o) => (o.r#type.clone(), o.tag, o.name.clone()), + None => todo!("Field does not exist on this enum error"), + }; + + // If there is an instantiator, it must match up with the type. If there is not an + // instantiator, then the type of the enum is necessarily the unit type. + + match (instantiator, enum_field_type) { + (None, ResolvedType::Unit) => ok( + TypedExpression { + return_type: ResolvedType::Unit, + expression: TypedExpressionVariant::EnumInstantiation { + tag, + contents: None, + enum_name: enum_decl.name.clone(), + variant_name, + }, + is_constant: IsConstant::No, + }, + warnings, + errors, + ), + (Some(boxed_expr), r#type) => { + let typed_expr = type_check!( + TypedExpression::type_check( + *boxed_expr, + namespace, + Some(r#type.clone()), + "Enum instantiator must match its declared variant type." + ), + return err(warnings, errors), + warnings, + errors + ); + + // we now know that the instantiator type matches the declared type, via the above tpe + // check + + ok( + TypedExpression { + return_type: enum_decl.as_type(), + expression: TypedExpressionVariant::EnumInstantiation { + tag, + contents: Some(Box::new(typed_expr)), + enum_name: enum_decl.name.clone(), + variant_name, + }, + is_constant: IsConstant::No, + }, + warnings, + errors, + ) + } + (None, _) => { + errors.push(CompileError::MissingEnumInstantiator { + span: enum_field_name.span.clone(), + }); + return err(warnings, errors); + } + } +} diff --git a/parser/src/semantics/ast_node/expression/mod.rs b/parser/src/semantics/ast_node/expression/mod.rs index 41d716d119b..f993de5c7a7 100644 --- a/parser/src/semantics/ast_node/expression/mod.rs +++ b/parser/src/semantics/ast_node/expression/mod.rs @@ -1,10 +1,12 @@ +mod enum_instantiation; mod match_branch; mod match_condition; mod struct_expr_field; mod typed_expression; mod typed_expression_variant; -pub(super) use match_branch::TypedMatchBranch; -pub(super) use match_condition::TypedMatchCondition; -pub(super) use struct_expr_field::TypedStructExpressionField; +pub(crate) use enum_instantiation::instantiate_enum; +pub(crate) use match_branch::TypedMatchBranch; +pub(crate) use match_condition::TypedMatchCondition; +pub(crate) use struct_expr_field::TypedStructExpressionField; pub(crate) use typed_expression::{TypedExpression, ERROR_RECOVERY_EXPR}; pub(crate) use typed_expression_variant::TypedExpressionVariant; diff --git a/parser/src/semantics/ast_node/expression/struct_expr_field.rs b/parser/src/semantics/ast_node/expression/struct_expr_field.rs index 8fc2b42568a..013e74c4e9f 100644 --- a/parser/src/semantics/ast_node/expression/struct_expr_field.rs +++ b/parser/src/semantics/ast_node/expression/struct_expr_field.rs @@ -1,7 +1,8 @@ use crate::semantics::TypedExpression; +use crate::Ident; #[derive(Clone, Debug)] pub(crate) struct TypedStructExpressionField<'sc> { - pub(crate) name: &'sc str, + pub(crate) name: Ident<'sc>, pub(crate) value: TypedExpression<'sc>, } diff --git a/parser/src/semantics/ast_node/expression/typed_expression.rs b/parser/src/semantics/ast_node/expression/typed_expression.rs index 860317ad7b3..5e01b41ebd8 100644 --- a/parser/src/semantics/ast_node/expression/typed_expression.rs +++ b/parser/src/semantics/ast_node/expression/typed_expression.rs @@ -1,12 +1,12 @@ use super::*; -use crate::error::*; use crate::semantics::ast_node::*; -use crate::types::{IntegerBits, TypeInfo}; +use crate::types::{IntegerBits, ResolvedType}; +use either::Either; #[derive(Clone, Debug)] pub(crate) struct TypedExpression<'sc> { pub(crate) expression: TypedExpressionVariant<'sc>, - pub(crate) return_type: TypeInfo<'sc>, + pub(crate) return_type: ResolvedType<'sc>, /// whether or not this expression is constantly evaluatable (if the result is known at compile /// time) pub(crate) is_constant: IsConstant, @@ -14,16 +14,15 @@ pub(crate) struct TypedExpression<'sc> { pub(crate) const ERROR_RECOVERY_EXPR: TypedExpression = TypedExpression { expression: TypedExpressionVariant::Unit, - return_type: TypeInfo::ErrorRecovery, + return_type: ResolvedType::ErrorRecovery, is_constant: IsConstant::No, }; - impl<'sc> TypedExpression<'sc> { pub(crate) fn type_check( other: Expression<'sc>, namespace: &Namespace<'sc>, - type_annotation: Option>, + type_annotation: Option>, help_text: impl Into + Clone, ) -> CompileResult<'sc, Self> { let mut warnings = Vec::new(); @@ -32,15 +31,15 @@ impl<'sc> TypedExpression<'sc> { let mut typed_expression = match other { Expression::Literal { value: lit, .. } => { let return_type = match lit { - Literal::String(_) => TypeInfo::String, - Literal::U8(_) => TypeInfo::UnsignedInteger(IntegerBits::Eight), - Literal::U16(_) => TypeInfo::UnsignedInteger(IntegerBits::Sixteen), - Literal::U32(_) => TypeInfo::UnsignedInteger(IntegerBits::ThirtyTwo), - Literal::U64(_) => TypeInfo::UnsignedInteger(IntegerBits::SixtyFour), - Literal::U128(_) => TypeInfo::UnsignedInteger(IntegerBits::OneTwentyEight), - Literal::Boolean(_) => TypeInfo::Boolean, - Literal::Byte(_) => TypeInfo::Byte, - Literal::Byte32(_) => TypeInfo::Byte32, + Literal::String(_) => ResolvedType::String, + Literal::U8(_) => ResolvedType::UnsignedInteger(IntegerBits::Eight), + Literal::U16(_) => ResolvedType::UnsignedInteger(IntegerBits::Sixteen), + Literal::U32(_) => ResolvedType::UnsignedInteger(IntegerBits::ThirtyTwo), + Literal::U64(_) => ResolvedType::UnsignedInteger(IntegerBits::SixtyFour), + Literal::U128(_) => ResolvedType::UnsignedInteger(IntegerBits::OneTwentyEight), + Literal::Boolean(_) => ResolvedType::Boolean, + Literal::Byte(_) => ResolvedType::Byte, + Literal::Byte32(_) => ResolvedType::Byte32, }; TypedExpression { expression: TypedExpressionVariant::Literal(lit), @@ -78,8 +77,15 @@ impl<'sc> TypedExpression<'sc> { } } } - Expression::FunctionApplication { name, arguments, .. } => { - let function_declaration = type_check!(namespace.get_call_path(&name), return err(warnings, errors), warnings, errors); + Expression::FunctionApplication { + name, arguments, .. + } => { + let function_declaration = type_check!( + namespace.get_call_path(&name), + return err(warnings, errors), + warnings, + errors + ); match function_declaration { TypedDeclaration::FunctionDeclaration(TypedFunctionDeclaration { parameters, @@ -199,7 +205,7 @@ impl<'sc> TypedExpression<'sc> { ERROR_RECOVERY_EXPR.clone() */ } - Expression::CodeBlock { contents, .. } => { + Expression::CodeBlock { contents, span, .. } => { let (typed_block, block_return_type) = type_check!( TypedCodeBlock::type_check( contents.clone(), @@ -207,10 +213,22 @@ impl<'sc> TypedExpression<'sc> { type_annotation.clone(), help_text.clone() ), - (TypedCodeBlock { contents: vec![] }, TypeInfo::Unit), + (TypedCodeBlock { contents: vec![] }, Some(ResolvedType::Unit)), warnings, errors ); + let block_return_type = match block_return_type { + Some(ty) => ty, + None => { + match type_annotation { + Some(ref ty) if ty != &ResolvedType::Unit =>{ + errors.push(CompileError::ExpectedImplicitReturnFromBlockWithType { span: span.clone(), ty: ty.friendly_type_str() }); + ResolvedType::ErrorRecovery + } + _ => ResolvedType::Unit + } + } + }; TypedExpression { expression: TypedExpressionVariant::CodeBlock(TypedCodeBlock { contents: typed_block.contents, @@ -231,7 +249,7 @@ impl<'sc> TypedExpression<'sc> { TypedExpression::type_check( *condition, &namespace, - Some(TypeInfo::Boolean), + Some(ResolvedType::Boolean), "The condition of an if expression must be a boolean expression.", ), ERROR_RECOVERY_EXPR.clone(), @@ -262,13 +280,11 @@ impl<'sc> TypedExpression<'sc> { // if there is a type annotation, then the else branch must exist if let Some(ref annotation) = type_annotation { - if r#else.is_none() { - errors.push(CompileError::NoElseBranch { + errors.push(CompileError::NoElseBranch { span: span.clone(), r#type: annotation.friendly_type_str(), }); - } } @@ -284,9 +300,9 @@ impl<'sc> TypedExpression<'sc> { } Expression::AsmExpression { asm, .. } => { let return_type = if asm.returns.is_some() { - TypeInfo::UnsignedInteger(IntegerBits::SixtyFour) + ResolvedType::UnsignedInteger(IntegerBits::SixtyFour) } else { - TypeInfo::Unit + ResolvedType::Unit }; TypedExpression { expression: TypedExpressionVariant::AsmExpression { asm }, @@ -301,7 +317,7 @@ impl<'sc> TypedExpression<'sc> { } => { // TODO in here replace generic types with provided types // find the struct definition in the namespace - let definition: &StructDeclaration = match namespace.get_symbol(&struct_name) { + let definition: &TypedStructDeclaration = match namespace.get_symbol(&struct_name) { Some(TypedDeclaration::StructDeclaration(st)) => st, Some(_) => { errors.push(CompileError::DeclaredNonStructAsStruct { @@ -322,19 +338,19 @@ impl<'sc> TypedExpression<'sc> { // match up the names with their type annotations from the declaration for def_field in definition.fields.iter() { - let expr_field = match fields.iter().find(|x| x.name == def_field.name) { + let expr_field: crate::parse_tree::StructExpressionField = match fields.iter().find(|x| x.name == def_field.name) { Some(val) => val.clone(), None => { errors.push(CompileError::StructMissingField { - field_name: def_field.name, + field_name: def_field.name.primary_name, struct_name: definition.name.primary_name, span: span.clone(), }); typed_fields_buf.push(TypedStructExpressionField { - name: def_field.name, + name: def_field.name.clone(), value: TypedExpression { expression: TypedExpressionVariant::Unit, - return_type: TypeInfo::ErrorRecovery, + return_type: ResolvedType::ErrorRecovery, is_constant: IsConstant::No, }, }); @@ -356,7 +372,7 @@ impl<'sc> TypedExpression<'sc> { typed_fields_buf.push(TypedStructExpressionField { value: typed_field, - name: expr_field.name, + name: expr_field.name.clone(), }); } @@ -369,7 +385,7 @@ impl<'sc> TypedExpression<'sc> { .is_none() { errors.push(CompileError::StructDoesntHaveThisField { - field_name: field.name, + field_name: field.name.primary_name.clone(), struct_name: definition.name.primary_name, span: field.span, }); @@ -380,7 +396,7 @@ impl<'sc> TypedExpression<'sc> { struct_name: definition.name.clone(), fields: typed_fields_buf, }, - return_type: TypeInfo::Struct { + return_type: ResolvedType::Struct { name: definition.name.clone(), }, is_constant: IsConstant::No, @@ -391,24 +407,23 @@ impl<'sc> TypedExpression<'sc> { name_parts, span, } => { - // let name_parts = VecDeque::from(name_parts); // this must be >= 2, or else the parser would not have matched it. asserting that // invariant here, since it is an assumption that is acted upon later. assert!(name_parts.len() >= 2); - let return_type = type_check!( + let (return_type, resolved_type_of_parent) = type_check!( namespace.find_subfield(&name_parts), return err(warnings, errors), warnings, errors ); - TypedExpression { return_type, expression: TypedExpressionVariant::SubfieldExpression { - unary_op, + unary_op, name: name_parts, span, + resolved_type_of_parent }, is_constant: IsConstant::No, } @@ -422,7 +437,12 @@ impl<'sc> TypedExpression<'sc> { let (method, parent_type) = if subfield_exp.is_empty() { // if subfield exp is empty, then we are calling a method using either :: // syntax or an operator - let ns = type_check!(namespace.find_module(&method_name.prefixes), return err(warnings, errors), warnings, errors); + let ns = type_check!( + namespace.find_module(&method_name.prefixes), + return err(warnings, errors), + warnings, + errors + ); // a method is defined by the type of the parent, and in this case the parent // is the first argument let parent_expr = match TypedExpression::type_check( @@ -460,21 +480,20 @@ impl<'sc> TypedExpression<'sc> { parent_expr.return_type, ) } else { - let parent_type = type_check!( + let (parent_type, _) = type_check!( namespace.find_subfield(&subfield_exp.clone()), return err(warnings, errors), warnings, errors ); ( - match namespace.find_method_for_type( - &parent_type, - method_name.suffix.clone(), - ) { + match namespace + .find_method_for_type(&parent_type, method_name.suffix.clone()) + { Some(o) => o, None => todo!("Method not found error"), }, - parent_type + parent_type, ) }; @@ -482,8 +501,8 @@ impl<'sc> TypedExpression<'sc> { let zipped = method.parameters.iter().zip(arguments.iter()); let mut typed_arg_buf = vec![]; - for (FunctionParameter { r#type, .. }, arg) in zipped { - let un_self_type = if *r#type == TypeInfo::SelfType { + for (TypedFunctionParameter { r#type, .. }, arg) in zipped { + let un_self_type = if r#type == &ResolvedType::SelfType { parent_type.clone() } else { r#type.clone() @@ -507,16 +526,90 @@ impl<'sc> TypedExpression<'sc> { name: method_name.into(), // TODO todo!("put the actual fully-typed function bodies in these applications"), arguments: typed_arg_buf, }, - return_type: method.return_type.clone(), + return_type: method.return_type, is_constant: IsConstant::No, } } Expression::Unit { span: _span } => TypedExpression { expression: TypedExpressionVariant::Unit, - return_type: TypeInfo::Unit, + return_type: ResolvedType::Unit, is_constant: IsConstant::Yes, }, + Expression::DelineatedPath { + call_path, + span, + instantiator, + type_arguments, + } => { + // The first step is to determine if the call path refers to a module or an enum. + // We could rely on the capitalization convention, where modules are lowercase + // and enums are uppercase, but this is not robust in the long term. + // Instead, we try to resolve both paths. + // If only one exists, then we use that one. Otherwise, if both exist, it is + // an ambiguous reference error. + let module_result = namespace.find_module(&call_path.prefixes).ok().cloned(); + /* + let enum_result_result = { + // an enum could be combined with a module path + // e.g. + // ``` + // module1::MyEnum::Variant1 + // ``` + // + // so, in this case, the suffix is Variant1 and the prefixes are module1 and + // MyEnum. When looking for an enum, we just want the _last_ prefix entry in the + // namespace of the first 0..len-1 entries' module + namespace.find_enum(&all_path.prefixes[0]) + }; + */ + let enum_module_combined_result = { + // also, check if this is an enum _in_ another module. + let (module_path, enum_name) = + call_path.prefixes.split_at(call_path.prefixes.len() - 1); + let enum_name = enum_name[0].clone(); + let namespace = namespace.find_module(module_path); + let namespace = namespace.ok(); + namespace.map(|ns| ns.find_enum(&enum_name)).flatten() + }; + let type_arguments = type_arguments + .iter() + .map(|x| namespace.resolve_type(x)) + .collect(); + // now we can see if this thing is a symbol (typed declaration) or reference to an + // enum instantiation + let this_thing: Either = + match (module_result, enum_module_combined_result) { + (Some(_module), Some(_enum_res)) => todo!("Ambiguous reference error"), + (Some(module), None) => { + match module.get_symbol(&call_path.suffix).cloned() { + Some(decl) => Either::Left(decl), + None => todo!("symbol not found in module error"), + } + } + (None, Some(enum_decl)) => Either::Right(type_check!( + instantiate_enum( + enum_decl, + call_path.suffix, + instantiator, + type_arguments, + namespace + ), + return err(warnings, errors), + warnings, + errors + )), + (None, None) => todo!("symbol not found error"), + }; + + match this_thing { + Either::Left(_) => { + errors.push(CompileError::Unimplemented("Unable to refer to declarations in other modules directly. Try importing it instead.", span)); + return err(warnings, errors); + } + Either::Right(expr) => expr, + } + } a => { println!("Unimplemented semantics for expression: {:?}", a); errors.push(CompileError::Unimplemented( @@ -553,4 +646,11 @@ impl<'sc> TypedExpression<'sc> { ok(typed_expression, warnings, errors) } + pub(crate) fn pretty_print(&self) -> String { + format!( + "{} ({})", + self.expression.pretty_print(), + self.return_type.friendly_type_str() + ) + } } diff --git a/parser/src/semantics/ast_node/expression/typed_expression_variant.rs b/parser/src/semantics/ast_node/expression/typed_expression_variant.rs index 83d8c7d5f36..e09ea5b423e 100644 --- a/parser/src/semantics/ast_node/expression/typed_expression_variant.rs +++ b/parser/src/semantics/ast_node/expression/typed_expression_variant.rs @@ -1,6 +1,6 @@ use super::*; -use crate::parse_tree::Ident; use crate::semantics::ast_node::*; +use crate::Ident; #[derive(Clone, Debug)] pub(crate) enum TypedExpressionVariant<'sc> { Literal(Literal<'sc>), @@ -43,5 +43,69 @@ pub(crate) enum TypedExpressionVariant<'sc> { unary_op: Option, name: Vec>, span: Span<'sc>, + resolved_type_of_parent: ResolvedType<'sc>, }, + EnumInstantiation { + /// for printing + enum_name: Ident<'sc>, + /// for printing + variant_name: Ident<'sc>, + tag: usize, + contents: Option>>, + }, +} + +impl<'sc> TypedExpressionVariant<'sc> { + pub(crate) fn pretty_print(&self) -> String { + match self { + TypedExpressionVariant::Literal(lit) => format!( + "literal {}", + match lit { + Literal::U8(content) => content.to_string(), + Literal::U16(content) => content.to_string(), + Literal::U32(content) => content.to_string(), + Literal::U64(content) => content.to_string(), + Literal::U128(content) => content.to_string(), + Literal::String(content) => content.to_string(), + Literal::Boolean(content) => content.to_string(), + Literal::Byte(content) => content.to_string(), + Literal::Byte32(content) => content + .iter() + .map(|x| x.to_string()) + .collect::>() + .join(", "), + } + ), + TypedExpressionVariant::FunctionApplication { name, .. } => { + format!("\"{}\" fn entry", name.suffix.primary_name) + } + TypedExpressionVariant::Unit => "unit".into(), + TypedExpressionVariant::Array { .. } => "array".into(), + TypedExpressionVariant::MatchExpression { .. } => "match exp".into(), + TypedExpressionVariant::StructExpression { struct_name, .. } => { + format!("\"{}\" struct init", struct_name.primary_name) + } + TypedExpressionVariant::CodeBlock(_) => "code block entry".into(), + TypedExpressionVariant::FunctionParameter => "fn param access".into(), + TypedExpressionVariant::IfExp { .. } => "if exp".into(), + TypedExpressionVariant::AsmExpression { .. } => "inline asm".into(), + TypedExpressionVariant::SubfieldExpression { span, .. } => { + format!("\"{}\" subfield access", span.as_str()) + } + TypedExpressionVariant::VariableExpression { name, .. } => { + format!("\"{}\" variable exp", name.primary_name) + } + TypedExpressionVariant::EnumInstantiation { + tag, + enum_name, + variant_name, + .. + } => { + format!( + "{}::{} enum instantiation (tag: {})", + enum_name.primary_name, variant_name.primary_name, tag + ) + } + } + } } diff --git a/parser/src/semantics/ast_node/impl_trait.rs b/parser/src/semantics/ast_node/impl_trait.rs index 02ad80c7763..97d7f0317b7 100644 --- a/parser/src/semantics/ast_node/impl_trait.rs +++ b/parser/src/semantics/ast_node/impl_trait.rs @@ -1,8 +1,10 @@ -use super::ERROR_RECOVERY_DECLARATION; -use crate::error::*; -use crate::parse_tree::{FunctionParameter, Ident, ImplTrait, TraitFn}; +use super::{ + declaration::{TypedFunctionParameter, TypedTraitFn}, + ERROR_RECOVERY_DECLARATION, +}; +use crate::parse_tree::ImplTrait; use crate::semantics::{Namespace, TypedDeclaration, TypedFunctionDeclaration}; -use crate::types::TypeInfo; +use crate::{Ident, error::*, types::ResolvedType}; pub(crate) fn implementation_of_trait<'sc>( impl_trait: ImplTrait<'sc>, @@ -18,6 +20,7 @@ pub(crate) fn implementation_of_trait<'sc>( type_arguments_span, block_span, } = impl_trait; + let type_implementing_for = namespace.resolve_type(&type_implementing_for); match namespace.get_symbol(&trait_name) { Some(TypedDeclaration::TraitDeclaration(tr)) => { let mut tr = tr.clone(); @@ -30,20 +33,20 @@ pub(crate) fn implementation_of_trait<'sc>( } // replace all references to Self in the interface surface with the // concrete type - for TraitFn { + for TypedTraitFn { ref mut parameters, ref mut return_type, .. } in tr.interface_surface.iter_mut() { - parameters - .iter_mut() - .for_each(|FunctionParameter { ref mut r#type, .. }| { - if r#type == &TypeInfo::SelfType { + parameters.iter_mut().for_each( + |TypedFunctionParameter { ref mut r#type, .. }| { + if r#type == &ResolvedType::SelfType { *r#type = type_implementing_for.clone(); } - }); - if return_type == &TypeInfo::SelfType { + }, + ); + if return_type == &ResolvedType::SelfType { *return_type = type_implementing_for.clone(); } } @@ -56,21 +59,36 @@ pub(crate) fn implementation_of_trait<'sc>( let mut function_checklist: Vec<&Ident> = tr .interface_surface .iter() - .map(|TraitFn { name, .. }| name) + .map(|TypedTraitFn { name, .. }| name) .collect(); - for mut fn_decl in functions.into_iter() { + for fn_decl in functions.into_iter() { // replace SelfType with type of implementor // i.e. fn add(self, other: u64) -> Self becomes fn // add(self: u64, other: u64) -> u64 + + let mut fn_decl = type_check!( + TypedFunctionDeclaration::type_check( + fn_decl.clone(), + &namespace, + None, + "", + Some(type_implementing_for.clone()) + ), + continue, + warnings, + errors + ); + /* fn_decl .parameters .iter_mut() - .filter(|FunctionParameter { r#type, .. }| r#type == &TypeInfo::SelfType) - .for_each(|FunctionParameter { ref mut r#type, .. }| { + .filter(|TypedFunctionParameter { r#type, .. }| r#type == &ResolvedType::SelfType) + .for_each(|TypedFunctionParameter { ref mut r#type, .. }| { *r#type = type_implementing_for.clone() }); + */ - if fn_decl.return_type == TypeInfo::SelfType { + if fn_decl.return_type == ResolvedType::SelfType { fn_decl.return_type = type_implementing_for.clone(); } @@ -82,49 +100,51 @@ pub(crate) fn implementation_of_trait<'sc>( // ensure this fn decl's parameters and signature lines up with the one // in the trait - if let Some(mut l_e) = tr.interface_surface.iter().find_map(|TraitFn { name, parameters, return_type }| { - if fn_decl.name == *name { - let mut errors = vec![]; - if let Some(mut maybe_err) = parameters.iter().zip(fn_decl.parameters.iter()).find_map(|(fn_decl_param, trait_param)| { - let mut errors = vec![]; - if let TypeInfo::Custom { .. /* TODO use trait constraints as part of the type here to implement trait constraint solver */ } = fn_decl_param.r#type { - match trait_param.r#type { - TypeInfo::Custom { .. } => (), - _ => + if let Some(mut l_e) = tr.interface_surface.iter().find_map(|TypedTraitFn { name, parameters, return_type }| { + if fn_decl.name == *name { + let mut errors = vec![]; + if let Some(mut maybe_err) = parameters.iter().zip(fn_decl.parameters.iter()).find_map(|(fn_decl_param, trait_param)| { + let mut errors = vec![]; + if let ResolvedType::Generic { .. /* TODO use trait constraints as part of the type here to implement trait constraint solver */ } = fn_decl_param.r#type { + match trait_param.r#type { + ResolvedType::Generic { .. } => (), + _ => - errors.push(CompileError::MismatchedTypeInTrait { - span: fn_decl_param.type_span.clone(), - given: fn_decl_param.r#type.friendly_type_str(), - expected: trait_param.r#type.friendly_type_str() - }) - } - } else { - if fn_decl_param.r#type != trait_param.r#type { - errors.push(CompileError::MismatchedTypeInTrait {span: fn_decl_param.type_span.clone(), - given: fn_decl_param.r#type.friendly_type_str(), - expected: trait_param.r#type.friendly_type_str()}); - } - } - if errors.is_empty() { None } else { Some(errors) } - }) { - errors.append(&mut maybe_err); + errors.push(CompileError::MismatchedTypeInTrait { + span: trait_param.type_span.clone(), + given: trait_param.r#type.friendly_type_str(), + expected: fn_decl_param.r#type.friendly_type_str() + }) } - if fn_decl.return_type != *return_type { + } else { + if fn_decl_param.r#type != trait_param.r#type { errors.push(CompileError::MismatchedTypeInTrait { - span: fn_decl.return_type_span.clone(), - expected: return_type.friendly_type_str(), - given: fn_decl.return_type.friendly_type_str() + span: trait_param.type_span.clone(), + given: trait_param.r#type.friendly_type_str(), + expected: fn_decl_param.r#type.friendly_type_str() }); } - if errors.is_empty() { None } else { Some(errors) } - } else { - None - } - }) - { - errors.append(&mut l_e); - continue; + } + if errors.is_empty() { None } else { Some(errors) } + }) { + errors.append(&mut maybe_err); + } + if fn_decl.return_type != *return_type { + errors.push(CompileError::MismatchedTypeInTrait { + span: fn_decl.return_type_span.clone(), + expected: return_type.friendly_type_str(), + given: fn_decl.return_type.friendly_type_str() + }); } + if errors.is_empty() { None } else { Some(errors) } + } else { + None + } + }) + { + errors.append(&mut l_e); + continue; + } // remove this function from the "checklist" let ix_of_thing_to_remove = match function_checklist .iter() @@ -142,18 +162,13 @@ pub(crate) fn implementation_of_trait<'sc>( }; function_checklist.remove(ix_of_thing_to_remove); - functions_buf.push(type_check!( - TypedFunctionDeclaration::type_check(fn_decl.clone(), &namespace, None, ""), - continue, - warnings, - errors - )); + functions_buf.push(fn_decl); } // check that the implementation checklist is complete if !function_checklist.is_empty() { errors.push(CompileError::MissingInterfaceSurfaceMethods { - span: block_span, + span: block_span.clone(), missing_functions: function_checklist .into_iter() .map(|Ident { primary_name, .. }| primary_name.to_string()) @@ -162,8 +177,8 @@ pub(crate) fn implementation_of_trait<'sc>( }); } - namespace.insert_trait_implementation(trait_name, type_implementing_for, functions_buf); - ok(TypedDeclaration::SideEffect, warnings, errors) + namespace.insert_trait_implementation(trait_name.clone(), type_implementing_for, functions_buf.clone()); + ok(TypedDeclaration::ImplTrait { trait_name, span: block_span, methods: functions_buf }, warnings, errors) } Some(_) => { errors.push(CompileError::NotATrait { diff --git a/parser/src/semantics/ast_node/mod.rs b/parser/src/semantics/ast_node/mod.rs index 950e7769f61..503aa549ade 100644 --- a/parser/src/semantics/ast_node/mod.rs +++ b/parser/src/semantics/ast_node/mod.rs @@ -1,8 +1,9 @@ use crate::error::*; use crate::parse_tree::*; use crate::semantics::Namespace; -use crate::types::TypeInfo; -use crate::{AstNode, AstNodeContent, ReturnStatement}; +use crate::types::{ResolvedType, TypeInfo}; +use crate::{AstNode, AstNodeContent, ReturnStatement, Ident}; +use declaration::TypedTraitFn; use pest::Span; mod code_block; @@ -14,11 +15,12 @@ mod while_loop; use super::ERROR_RECOVERY_DECLARATION; pub(crate) use code_block::TypedCodeBlock; -pub use declaration::{TypedDeclaration, TypedFunctionDeclaration}; -pub(crate) use declaration::{TypedReassignment, TypedTraitDeclaration, TypedVariableDeclaration}; -pub(crate) use expression::{ - TypedExpression, TypedExpressionVariant, ERROR_RECOVERY_EXPR, +pub use declaration::{ + TypedDeclaration, TypedEnumDeclaration, TypedEnumVariant, TypedFunctionDeclaration, + TypedFunctionParameter, TypedStructDeclaration, TypedStructField, }; +pub(crate) use declaration::{TypedReassignment, TypedTraitDeclaration, TypedVariableDeclaration}; +pub(crate) use expression::*; use impl_trait::implementation_of_trait; use return_statement::TypedReturnStatement; pub(crate) use while_loop::TypedWhileLoop; @@ -42,21 +44,37 @@ pub(crate) enum TypedAstNodeContent<'sc> { SideEffect, } -#[derive(Clone, Debug)] -pub(crate) struct TypedAstNode<'sc> { +#[derive(Clone)] +pub struct TypedAstNode<'sc> { pub(crate) content: TypedAstNodeContent<'sc>, pub(crate) span: Span<'sc>, } +impl<'sc> std::fmt::Debug for TypedAstNode<'sc> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use TypedAstNodeContent::*; + let text = match &self.content { + ReturnStatement(TypedReturnStatement { ref expr }) => { + format!("return {}", expr.pretty_print()) + } + Declaration(ref typed_decl) => typed_decl.pretty_print(), + Expression(exp) => exp.pretty_print(), + ImplicitReturnExpression(exp) => format!("return {}", exp.pretty_print()), + WhileLoop(w_loop) => w_loop.pretty_print(), + SideEffect => "".into(), + }; + f.write_str(&text) + } +} impl<'sc> TypedAstNode<'sc> { - fn type_info(&self) -> TypeInfo<'sc> { + fn type_info(&self) -> ResolvedType<'sc> { // return statement should be () use TypedAstNodeContent::*; match &self.content { - ReturnStatement(_) | Declaration(_) => TypeInfo::Unit, + ReturnStatement(_) | Declaration(_) => ResolvedType::Unit, Expression(TypedExpression { return_type, .. }) => return_type.clone(), ImplicitReturnExpression(TypedExpression { return_type, .. }) => return_type.clone(), - WhileLoop(_) | SideEffect => TypeInfo::Unit, + WhileLoop(_) | SideEffect => ResolvedType::Unit, } } } @@ -65,7 +83,7 @@ impl<'sc> TypedAstNode<'sc> { pub(crate) fn type_check( node: AstNode<'sc>, namespace: &mut Namespace<'sc>, - return_type_annotation: Option>, + return_type_annotation: Option>, help_text: impl Into, ) -> CompileResult<'sc, TypedAstNode<'sc>> { let mut warnings = Vec::new(); @@ -90,17 +108,22 @@ impl<'sc> TypedAstNode<'sc> { body, is_mutable, }) => { + let type_ascription = type_ascription.map(|type_ascription| namespace.resolve_type(&type_ascription)); let body = type_check!( - TypedExpression::type_check( - body, - &namespace, - type_ascription.clone(), - format!("Variable declaration's type annotation (type {})\ - does not match up with the assigned expression's type.", - type_ascription.map(|x| x.friendly_type_str()).unwrap_or("none".into()))), - ERROR_RECOVERY_EXPR.clone(), - warnings, - errors); + TypedExpression::type_check( + body, + &namespace, + type_ascription.clone(), + format!("Variable declaration's type annotation (type {}) \ + does not match up with the assigned expression's type.", + type_ascription.map(|x| x.friendly_type_str()).unwrap_or("none".into()) + ) + ), + ERROR_RECOVERY_EXPR.clone(), + warnings, + errors + ); + let body = TypedDeclaration::VariableDeclaration(TypedVariableDeclaration { name: name.clone(), @@ -112,14 +135,15 @@ impl<'sc> TypedAstNode<'sc> { } Declaration::EnumDeclaration(e) => { let span = e.span.clone(); - let primary_name = e.name; - let decl = TypedDeclaration::EnumDeclaration(e); + let primary_name = e.name.primary_name; + let decl = TypedDeclaration::EnumDeclaration(e.to_typed_decl(namespace)); + namespace.insert(Ident { primary_name, span }, decl.clone()); decl } Declaration::FunctionDeclaration(fn_decl) => { let decl = type_check!( - TypedFunctionDeclaration::type_check(fn_decl, &namespace, None, ""), + TypedFunctionDeclaration::type_check(fn_decl, &namespace, None, "", None), return err(warnings, errors), warnings, errors @@ -136,8 +160,18 @@ impl<'sc> TypedAstNode<'sc> { interface_surface, methods, type_parameters, + visibility }) => { let mut methods_buf = Vec::new(); + let interface_surface = interface_surface.into_iter().map(|TraitFn { name, parameters, return_type }| TypedTraitFn { + name, + parameters: parameters + .into_iter() + .map(|FunctionParameter { name, r#type, type_span }| + TypedFunctionParameter { name, r#type: namespace.resolve_type(&r#type), type_span } + ).collect(), + return_type: namespace.resolve_type(&return_type) + }).collect::>(); for FunctionDeclaration { body, name: fn_name, @@ -145,12 +179,14 @@ impl<'sc> TypedAstNode<'sc> { span, return_type, type_parameters, + return_type_span, .. } in methods { let mut namespace = namespace.clone(); parameters.clone().into_iter().for_each( |FunctionParameter { name, r#type, .. }| { + let r#type = namespace.resolve_type(&r#type); namespace.insert( name.clone(), TypedDeclaration::VariableDeclaration( @@ -206,7 +242,13 @@ impl<'sc> TypedAstNode<'sc> { } } } + let parameters = parameters.into_iter().map(|FunctionParameter { name, r#type, type_span }| TypedFunctionParameter { + name, + r#type: namespace.resolve_type(&r#type), + type_span + }).collect::>(); // TODO check code block implicit return + let return_type = namespace.resolve_type(&return_type); let (body, _code_block_implicit_return) = type_check!( TypedCodeBlock::type_check( @@ -218,6 +260,7 @@ impl<'sc> TypedAstNode<'sc> { continue, warnings, errors ); + methods_buf.push(TypedFunctionDeclaration { name: fn_name, body, @@ -225,6 +268,10 @@ impl<'sc> TypedAstNode<'sc> { span, return_type, type_parameters, + // For now, any method declared is automatically public. + // We can tweak that later if we want. + visibility: Visibility::Public, + return_type_span }); } let trait_decl = @@ -233,6 +280,7 @@ impl<'sc> TypedAstNode<'sc> { interface_surface, methods: methods_buf, type_parameters, + visibility, }); namespace.insert(name, trait_decl.clone()); trait_decl @@ -303,11 +351,8 @@ impl<'sc> TypedAstNode<'sc> { block_span, .. }) => { + let type_implementing_for_resolved = namespace.resolve_type(&type_implementing_for); // check, if this is a custom type, if it is in scope or a generic. - let type_implementing_for = match type_implementing_for { - TypeInfo::Custom { name } => lookup_in_scope(&name, namespace) , - o => o, - }; let mut functions_buf: Vec = vec![]; for mut fn_decl in functions.into_iter() { let mut type_arguments = type_arguments.clone(); @@ -331,31 +376,38 @@ impl<'sc> TypedAstNode<'sc> { } functions_buf.push(type_check!( - TypedFunctionDeclaration::type_check(fn_decl, &namespace, None, ""), + TypedFunctionDeclaration::type_check(fn_decl, &namespace, None, "", Some(type_implementing_for_resolved.clone())), continue, warnings, errors )); } - namespace.insert_trait_implementation( Ident { primary_name: "r#Self", span: block_span.clone(), }, - type_implementing_for, + type_implementing_for_resolved, functions_buf, ); TypedDeclaration::SideEffect } Declaration::StructDeclaration(decl) => { // look up any generic or struct types in the namespace - let mut decl = decl.clone(); - for ref mut field in decl.fields.iter_mut() { - if let TypeInfo::Custom { ref name } = field.r#type { - field.r#type = lookup_in_scope(name , namespace); + let fields = decl.fields.into_iter().map(|StructField { name, r#type, span }| { + TypedStructField { + name, + r#type: namespace.resolve_type(&r#type), + span } - } + }).collect::>(); + let decl = TypedStructDeclaration { + name: decl.name.clone(), + type_parameters: decl.type_parameters.clone(), + fields, + visibility: decl.visibility + + }; // insert struct into namespace namespace.insert( @@ -410,7 +462,7 @@ impl<'sc> TypedAstNode<'sc> { TypedExpression::type_check( condition, &namespace, - Some(TypeInfo::Boolean), + Some(ResolvedType::Boolean), "A while loop's loop condition must be a boolean expression." ), return err(warnings, errors), @@ -421,10 +473,10 @@ impl<'sc> TypedAstNode<'sc> { TypedCodeBlock::type_check( body, &namespace, - Some(TypeInfo::Unit), + Some(ResolvedType::Unit), "A while loop's loop body cannot implicitly return a value.\ Try assigning it to a mutable variable declared outside of the loop instead."), - (TypedCodeBlock { contents: vec![] }, TypeInfo::Unit), + (TypedCodeBlock { contents: vec![] }, Some(ResolvedType::Unit)), warnings, errors ); @@ -445,8 +497,8 @@ impl<'sc> TypedAstNode<'sc> { r#type: node.type_info(), }; assert_or_warn!( - node.type_info() == TypeInfo::Unit - || node.type_info() == TypeInfo::ErrorRecovery, + node.type_info() == ResolvedType::Unit + || node.type_info() == ResolvedType::ErrorRecovery, warnings, node.span.clone(), warning @@ -458,16 +510,3 @@ impl<'sc> TypedAstNode<'sc> { ok(node, warnings, errors) } } - - - /// this function either returns a struct (i.e. custom type), `None`, denoting the type that is - /// being looked for is actually a generic. -fn lookup_in_scope<'sc>(custom_type_name: &Ident<'sc>, namespace: &Namespace<'sc>) -> TypeInfo<'sc> { - match namespace.get_symbol(custom_type_name) { - Some(TypedDeclaration::StructDeclaration(StructDeclaration { - name, .. - })) => TypeInfo::Struct { name: name.clone() }, - Some(_) => TypeInfo::Generic { name: custom_type_name.clone() }, - None => TypeInfo::Generic { name: custom_type_name.clone() } - } -} diff --git a/parser/src/semantics/ast_node/while_loop.rs b/parser/src/semantics/ast_node/while_loop.rs index 70ba6d4d6f9..c30e0e25961 100644 --- a/parser/src/semantics/ast_node/while_loop.rs +++ b/parser/src/semantics/ast_node/while_loop.rs @@ -4,3 +4,9 @@ pub(crate) struct TypedWhileLoop<'sc> { pub(crate) condition: TypedExpression<'sc>, pub(crate) body: TypedCodeBlock<'sc>, } + +impl<'sc> TypedWhileLoop<'sc> { + pub(crate) fn pretty_print(&self) -> String { + format!("while loop on {}", self.condition.pretty_print()) + } +} diff --git a/parser/src/semantics/mod.rs b/parser/src/semantics/mod.rs index 54af9d20521..7f29c885741 100644 --- a/parser/src/semantics/mod.rs +++ b/parser/src/semantics/mod.rs @@ -1,4 +1,4 @@ -mod ast_node; +pub(crate) mod ast_node; mod namespace; mod syntax_tree; pub(crate) use ast_node::{TypedAstNode, TypedAstNodeContent, TypedExpression}; diff --git a/parser/src/semantics/namespace.rs b/parser/src/semantics/namespace.rs index 55f2b1740e1..97c98356676 100644 --- a/parser/src/semantics/namespace.rs +++ b/parser/src/semantics/namespace.rs @@ -1,6 +1,11 @@ -use super::{ast_node::TypedVariableDeclaration, TypedExpression}; +use super::{ + ast_node::{ + TypedEnumDeclaration, TypedStructDeclaration, TypedStructField, TypedVariableDeclaration, + }, + TypedExpression, +}; use crate::error::*; -use crate::parse_tree::{StructDeclaration, StructField}; +use crate::types::ResolvedType; use crate::CallPath; use crate::{CompileResult, TypeInfo}; use crate::{Ident, TypedDeclaration, TypedFunctionDeclaration}; @@ -11,12 +16,35 @@ type ModuleName = String; #[derive(Clone, Debug, Default)] pub struct Namespace<'sc> { symbols: HashMap, TypedDeclaration<'sc>>, - implemented_traits: HashMap<(Ident<'sc>, TypeInfo<'sc>), Vec>>, + implemented_traits: + HashMap<(Ident<'sc>, ResolvedType<'sc>), Vec>>, /// any imported namespaces associated with an ident which is a library name modules: HashMap>, } impl<'sc> Namespace<'sc> { + /// this function either returns a struct (i.e. custom type), `None`, denoting the type that is + /// being looked for is actually a generic, not-yet-resolved type. + /// Eventually, this should return a [ResolvedType], which currently doesn't exist, + /// to further solidify the bounary between the monomorphized AST and the parameterized one. + pub(crate) fn resolve_type(&self, ty: &TypeInfo<'sc>) -> ResolvedType<'sc> { + let ty = ty.clone(); + match ty { + TypeInfo::Custom { name } => match self.get_symbol(&name) { + Some(TypedDeclaration::StructDeclaration(TypedStructDeclaration { + name, .. + })) => ResolvedType::Struct { name: name.clone() }, + Some(TypedDeclaration::EnumDeclaration(TypedEnumDeclaration { name, .. })) => { + ResolvedType::Enum { name: name.clone() } + } + Some(_) => ResolvedType::Generic { name: name.clone() }, + None => ResolvedType::Generic { name: name.clone() }, + }, + o => o.to_resolved(), + } + } + /// Given a path to a module, import everything from it and merge it into this namespace. + /// This is used when an import path contains an asterisk. pub(crate) fn star_import(&mut self, idents: Vec>) -> CompileResult<()> { let idents_buf = idents.into_iter(); let mut namespace = self.clone(); @@ -38,6 +66,7 @@ impl<'sc> Namespace<'sc> { ok((), vec![], vec![]) } + /// Pull a single item from a module and import it into this namespace. pub(crate) fn item_import( &mut self, path: Vec>, @@ -154,7 +183,7 @@ impl<'sc> Namespace<'sc> { } } - pub(crate) fn find_module(&self, path: &Vec>) -> CompileResult<'sc, Namespace<'sc>> { + pub(crate) fn find_module(&self, path: &[Ident<'sc>]) -> CompileResult<'sc, Namespace<'sc>> { let mut namespace = self.clone(); let mut errors = vec![]; let warnings = vec![]; @@ -181,7 +210,7 @@ impl<'sc> Namespace<'sc> { pub(crate) fn insert_trait_implementation( &mut self, trait_name: Ident<'sc>, - type_implementing_for: TypeInfo<'sc>, + type_implementing_for: ResolvedType<'sc>, functions_buf: Vec>, ) -> CompileResult<()> { let mut warnings = vec![]; @@ -207,14 +236,21 @@ impl<'sc> Namespace<'sc> { pub fn insert_module(&mut self, module_name: String, module_contents: Namespace<'sc>) { self.modules.insert(module_name, module_contents); } - + pub(crate) fn find_enum(&self, enum_name: &Ident<'sc>) -> Option> { + match self.get_symbol(enum_name) { + Some(TypedDeclaration::EnumDeclaration(inner)) => Some(inner.clone()), + _ => None, + } + } + /// Returns a tuple where the first element is the [ResolvedType] of the actual expression, + /// and the second is the [ResolvedType] of its parent, for control-flow analysis. pub(crate) fn find_subfield( &self, subfield_exp: &[Ident<'sc>], - ) -> CompileResult<'sc, TypeInfo<'sc>> { + ) -> CompileResult<'sc, (ResolvedType<'sc>, ResolvedType<'sc>)> { let mut warnings = vec![]; let mut errors = vec![]; - let mut ident_iter = subfield_exp.into_iter(); + let mut ident_iter = subfield_exp.into_iter().peekable(); let first_ident = ident_iter.next().unwrap(); let symbol = match self.symbols.get(&first_ident) { Some(s) => s, @@ -226,6 +262,15 @@ impl<'sc> Namespace<'sc> { return err(warnings, errors); } }; + if ident_iter.peek().is_none() { + let ty = type_check!( + symbol.return_type(), + return err(warnings, errors), + warnings, + errors + ); + return ok((ty.clone(), ty), warnings, errors); + } let (mut fields, struct_name) = match self.get_struct_type_fields(symbol, &first_ident) { CompileResult::Ok { value, @@ -254,64 +299,69 @@ impl<'sc> Namespace<'sc> { warnings, errors ); + let mut parent_rover = ret_ty.clone(); for ident in ident_iter { // find the ident in the currently available fields - let StructField { r#type, .. } = - match fields.iter().find(|x| x.name == ident.primary_name) { - Some(field) => field.clone(), - None => { - // gather available fields for the error message - let field_name = ident.primary_name.clone(); - let available_fields = - fields.iter().map(|x| x.name.clone()).collect::>(); + let TypedStructField { r#type, .. } = match fields.iter().find(|x| x.name == *ident) { + Some(field) => field.clone(), + None => { + // gather available fields for the error message + let field_name = ident.primary_name.clone(); + let available_fields = fields + .iter() + .map(|x| x.name.primary_name.clone()) + .collect::>(); - errors.push(CompileError::FieldNotFound { - field_name, - struct_name: struct_name.primary_name.clone(), - available_fields: available_fields.join(", "), - span: ident.span.clone(), - }); - return err(warnings, errors); - } - }; + errors.push(CompileError::FieldNotFound { + field_name, + struct_name: struct_name.primary_name.clone(), + available_fields: available_fields.join(", "), + span: ident.span.clone(), + }); + return err(warnings, errors); + } + }; match r#type { - TypeInfo::Struct { .. } => { + ResolvedType::Struct { .. } => { let (l_fields, _l_name) = type_check!( self.find_struct_name_and_fields(&r#type, &ident), return err(warnings, errors), warnings, errors ); + parent_rover = ret_ty.clone(); fields = l_fields; } _ => { fields = vec![]; + parent_rover = ret_ty.clone(); ret_ty = r#type; } } } - ok(ret_ty, warnings, errors) + ok((ret_ty, parent_rover), warnings, errors) } pub(crate) fn get_methods_for_type( &self, - r#type: &TypeInfo<'sc>, - ) -> Option>> { - for ((_trait_name, type_info), methods) in &self.implemented_traits { + r#type: &ResolvedType<'sc>, + ) -> Vec> { + let mut methods = vec![]; + for ((_trait_name, type_info), l_methods) in &self.implemented_traits { if type_info == r#type { - return Some(methods.clone()); + methods.append(&mut l_methods.clone()); } } - None + methods } pub(crate) fn find_method_for_type( &self, - r#type: &TypeInfo<'sc>, + r#type: &ResolvedType<'sc>, method_name: Ident<'sc>, ) -> Option> { - let methods = self.get_methods_for_type(r#type)?; + let methods = self.get_methods_for_type(r#type); methods .into_iter() .find(|TypedFunctionDeclaration { name, .. }| *name == method_name) @@ -325,17 +375,22 @@ impl<'sc> Namespace<'sc> { &self, decl: &TypedDeclaration<'sc>, debug_ident: &Ident<'sc>, - ) -> CompileResult<'sc, (Vec>, &Ident<'sc>)> { + ) -> CompileResult<'sc, (Vec>, &Ident<'sc>)> { match decl { TypedDeclaration::VariableDeclaration(TypedVariableDeclaration { body: TypedExpression { return_type, .. }, .. }) => self.find_struct_name_and_fields(return_type, debug_ident), - o => todo!( - "err: {} is not a struct with field {}", - o.friendly_name(), - debug_ident.primary_name - ), + a => { + return err( + vec![], + vec![CompileError::NotAStruct { + name: debug_ident.primary_name.clone(), + span: debug_ident.span.clone(), + actually: a.friendly_name().to_string(), + }], + ) + } } } /// given a type, look that type up in the namespace and: @@ -343,21 +398,22 @@ impl<'sc> Namespace<'sc> { /// 2) return its fields and struct name fn find_struct_name_and_fields( &self, - return_type: &TypeInfo<'sc>, + return_type: &ResolvedType<'sc>, debug_ident: &Ident<'sc>, - ) -> CompileResult<'sc, (Vec>, &Ident<'sc>)> { - if let TypeInfo::Struct { name } = return_type { + ) -> CompileResult<'sc, (Vec>, &Ident<'sc>)> { + if let ResolvedType::Struct { name } = return_type { match self.get_symbol(name) { - Some(TypedDeclaration::StructDeclaration(StructDeclaration { + Some(TypedDeclaration::StructDeclaration(TypedStructDeclaration { fields, name, .. })) => ok((fields.clone(), name), vec![], vec![]), - Some(_) => err( + Some(a) => err( vec![], vec![CompileError::NotAStruct { name: debug_ident.span.as_str(), span: debug_ident.span.clone(), + actually: a.friendly_name().to_string(), }], ), None => err( @@ -374,6 +430,7 @@ impl<'sc> Namespace<'sc> { vec![CompileError::NotAStruct { name: debug_ident.span.as_str(), span: debug_ident.span.clone(), + actually: return_type.friendly_type_str(), }], ) } diff --git a/parser/src/semantics/syntax_tree.rs b/parser/src/semantics/syntax_tree.rs index 4c83643ebfe..76ad275e092 100644 --- a/parser/src/semantics/syntax_tree.rs +++ b/parser/src/semantics/syntax_tree.rs @@ -1,9 +1,9 @@ use super::{TypedAstNode, TypedAstNodeContent, TypedDeclaration, TypedFunctionDeclaration}; -use crate::error::*; use crate::semantics::Namespace; -use crate::types::TypeInfo; use crate::ParseTree; +use crate::{error::*, types::ResolvedType}; +#[derive(Clone, Copy, PartialEq, Eq)] pub(crate) enum TreeType { Predicate, Script, @@ -13,7 +13,7 @@ pub(crate) enum TreeType { #[derive(Debug)] pub(crate) struct TypedParseTree<'sc> { - root_nodes: Vec>, + pub(crate) root_nodes: Vec>, pub(crate) namespace: Namespace<'sc>, } @@ -88,7 +88,7 @@ impl<'sc> TypedParseTree<'sc> { } let main_func = main_func_vec[0]; match main_func { - (TypeInfo::Boolean, _span) => (), + (ResolvedType::Boolean, _span) => (), (_, span) => { errors.push(CompileError::PredicateMainDoesNotReturnBool(span.clone())) } diff --git a/parser/src/types/mod.rs b/parser/src/types/mod.rs new file mode 100644 index 00000000000..71090553f23 --- /dev/null +++ b/parser/src/types/mod.rs @@ -0,0 +1,4 @@ +mod resolved_type; +mod type_info; +pub use resolved_type::ResolvedType; +pub use type_info::{IntegerBits, TypeInfo}; diff --git a/parser/src/types.rs b/parser/src/types/resolved_type.rs similarity index 66% rename from parser/src/types.rs rename to parser/src/types/resolved_type.rs index d2e27a7e42b..27c17ad0d6b 100644 --- a/parser/src/types.rs +++ b/parser/src/types/resolved_type.rs @@ -1,11 +1,14 @@ -use crate::error::*; -use crate::{parse_tree::Ident, Rule}; -use pest::iterators::Pair; +use super::IntegerBits; +use crate::{error::*, Ident}; use pest::Span; - -/// Type information without an associated value, used for type inferencing and definition. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub enum TypeInfo<'sc> { +/// [ResolvedType] refers to a fully qualified type that has been looked up in the namespace. +/// Type symbols are ambiguous in the beginning of compilation, as any custom symbol could be +/// an enum, struct, or generic type name. This enum is similar to [TypeInfo], except it lacks +/// the capability to be `TypeInfo::Custom`, i.e., pending this resolution of whether it is generic or a +/// known type. This allows us to ensure structurally that no unresolved types bleed into the +/// syntax tree. +#[derive(Debug, Clone, Eq, PartialEq, Hash)] +pub enum ResolvedType<'sc> { String, UnsignedInteger(IntegerBits), Boolean, @@ -13,9 +16,6 @@ pub enum TypeInfo<'sc> { /// or just a generic parameter if it is not. /// At parse time, there is no sense of scope, so this determination is not made /// until the semantic analysis stage. - Custom { - name: Ident<'sc>, - }, Generic { name: Ident<'sc>, }, @@ -26,61 +26,60 @@ pub enum TypeInfo<'sc> { Struct { name: Ident<'sc>, }, + Enum { + name: Ident<'sc>, + }, // used for recovering from errors in the ast ErrorRecovery, } -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub enum IntegerBits { - Eight, - Sixteen, - ThirtyTwo, - SixtyFour, - OneTwentyEight, -} -impl<'sc> TypeInfo<'sc> { - pub(crate) fn parse_from_pair(input: Pair<'sc, Rule>) -> CompileResult<'sc, Self> { - let mut r#type = input.into_inner(); - Self::parse_from_pair_inner(r#type.next().unwrap()) - } - pub(crate) fn parse_from_pair_inner(input: Pair<'sc, Rule>) -> CompileResult<'sc, Self> { - let mut warnings = vec![]; - let mut errors = vec![]; - ok( - match input.as_str().trim() { - "u8" => TypeInfo::UnsignedInteger(IntegerBits::Eight), - "u16" => TypeInfo::UnsignedInteger(IntegerBits::Sixteen), - "u32" => TypeInfo::UnsignedInteger(IntegerBits::ThirtyTwo), - "u64" => TypeInfo::UnsignedInteger(IntegerBits::SixtyFour), - "u128" => TypeInfo::UnsignedInteger(IntegerBits::OneTwentyEight), - "bool" => TypeInfo::Boolean, - "string" => TypeInfo::String, - "unit" => TypeInfo::Unit, - "byte" => TypeInfo::Byte, - "Self" => TypeInfo::SelfType, - _other => TypeInfo::Custom { - name: eval!( - Ident::parse_from_pair, - warnings, - errors, - input, - return err(warnings, errors) - ), - }, - }, - warnings, - errors, - ) +impl Default for ResolvedType<'_> { + fn default() -> Self { + ResolvedType::Unit } +} +impl<'sc> ResolvedType<'sc> { + pub(crate) fn friendly_type_str(&self) -> String { + use ResolvedType::*; + match self { + String => "String".into(), + UnsignedInteger(bits) => { + use IntegerBits::*; + match bits { + Eight => "u8", + Sixteen => "u16", + ThirtyTwo => "u32", + SixtyFour => "u64", + OneTwentyEight => "u128", + } + .into() + } + Boolean => "bool".into(), + Generic { name } => format!("generic {}", name.primary_name), + Unit => "()".into(), + SelfType => "Self".into(), + Byte => "byte".into(), + Byte32 => "byte32".into(), + Struct { + name: Ident { primary_name, .. }, + .. + } => format!("struct {}", primary_name), + Enum { + name: Ident { primary_name, .. }, + .. + } => format!("enum {}", primary_name), + ErrorRecovery => "\"unknown due to error\"".into(), + } + } pub(crate) fn is_convertable( &self, - other: &TypeInfo<'sc>, + other: &ResolvedType<'sc>, debug_span: Span<'sc>, help_text: impl Into, ) -> Result>, TypeError<'sc>> { let help_text = help_text.into(); - if *self == TypeInfo::ErrorRecovery || *other == TypeInfo::ErrorRecovery { + if *self == ResolvedType::ErrorRecovery || *other == ResolvedType::ErrorRecovery { return Ok(None); } // TODO actually check more advanced conversion rules like upcasting vs downcasting @@ -102,10 +101,9 @@ impl<'sc> TypeInfo<'sc> { }) } } - - fn numeric_cast_compat(&self, other: &TypeInfo<'sc>) -> Result<(), Warning<'sc>> { + fn numeric_cast_compat(&self, other: &ResolvedType<'sc>) -> Result<(), Warning<'sc>> { assert!(self.is_numeric(), other.is_numeric()); - use TypeInfo::*; + use ResolvedType::*; // if this is a downcast, warn for loss of precision. if upcast, then no warning. match self { UnsignedInteger(IntegerBits::Eight) => Ok(()), @@ -146,38 +144,8 @@ impl<'sc> TypeInfo<'sc> { _ => unreachable!(), } } - - pub(crate) fn friendly_type_str(&self) -> String { - use TypeInfo::*; - match self { - String => "String".into(), - UnsignedInteger(bits) => { - use IntegerBits::*; - match bits { - Eight => "u8", - Sixteen => "u16", - ThirtyTwo => "u32", - SixtyFour => "u64", - OneTwentyEight => "u128", - } - .into() - } - Boolean => "bool".into(), - Generic { name } => format!("generic {}", name.primary_name), - Custom { name } => format!("unknown {}", name.primary_name), - Unit => "()".into(), - SelfType => "Self".into(), - Byte => "byte".into(), - Byte32 => "byte32".into(), - Struct { - name: Ident { primary_name, .. }, - .. - } => format!("struct {}", primary_name), - ErrorRecovery => "\"unknown due to error\"".into(), - } - } fn is_numeric(&self) -> bool { - if let TypeInfo::UnsignedInteger(_) = self { + if let ResolvedType::UnsignedInteger(_) = self { true } else { false diff --git a/parser/src/types/type_info.rs b/parser/src/types/type_info.rs new file mode 100644 index 00000000000..9566c8b139d --- /dev/null +++ b/parser/src/types/type_info.rs @@ -0,0 +1,89 @@ +use crate::error::*; +use crate::{Ident, Rule}; +use pest::iterators::Pair; + +use super::ResolvedType; + +/// Type information without an associated value, used for type inferencing and definition. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum TypeInfo<'sc> { + String, + UnsignedInteger(IntegerBits), + Boolean, + /// A custom type could be a struct or similar if the name is in scope, + /// or just a generic parameter if it is not. + /// At parse time, there is no sense of scope, so this determination is not made + /// until the semantic analysis stage. + Custom { + name: Ident<'sc>, + }, + Unit, + SelfType, + Byte, + Byte32, + // used for recovering from errors in the ast + ErrorRecovery, +} +#[derive(Debug, Clone, PartialEq, Eq, Hash, Copy)] +pub enum IntegerBits { + Eight, + Sixteen, + ThirtyTwo, + SixtyFour, + OneTwentyEight, +} + +impl<'sc> TypeInfo<'sc> { + /// This is a shortcut function. It should only be called as a convenience method in match + /// statements resolving types when it has already been verified that this type is _not_ + /// a custom (enum, struct, user-defined) or generic type. + /// This function just passes all the trivial types through to a [ResolvedType]. + pub(crate) fn to_resolved(&self) -> ResolvedType<'sc> { + match self { + TypeInfo::Custom { .. } => panic!("Invalid use of `to_resolved`. See documentation of [TypeInfo::to_resolved] for more details."), + TypeInfo::Boolean => ResolvedType::Boolean, + TypeInfo::String => ResolvedType::String, + TypeInfo::UnsignedInteger(bits) => ResolvedType::UnsignedInteger(*bits), + TypeInfo::Unit => ResolvedType::Unit, + TypeInfo::SelfType => ResolvedType::SelfType, + TypeInfo::Byte => ResolvedType::Byte, + TypeInfo::Byte32 => ResolvedType::Byte32, + TypeInfo::ErrorRecovery => ResolvedType::ErrorRecovery + + } + } + pub(crate) fn parse_from_pair(input: Pair<'sc, Rule>) -> CompileResult<'sc, Self> { + let mut r#type = input.into_inner(); + Self::parse_from_pair_inner(r#type.next().unwrap()) + } + pub(crate) fn parse_from_pair_inner(input: Pair<'sc, Rule>) -> CompileResult<'sc, Self> { + let mut warnings = vec![]; + let mut errors = vec![]; + ok( + match input.as_str().trim() { + "u8" => TypeInfo::UnsignedInteger(IntegerBits::Eight), + "u16" => TypeInfo::UnsignedInteger(IntegerBits::Sixteen), + "u32" => TypeInfo::UnsignedInteger(IntegerBits::ThirtyTwo), + "u64" => TypeInfo::UnsignedInteger(IntegerBits::SixtyFour), + "u128" => TypeInfo::UnsignedInteger(IntegerBits::OneTwentyEight), + "bool" => TypeInfo::Boolean, + "string" => TypeInfo::String, + "unit" => TypeInfo::Unit, + "byte" => TypeInfo::Byte, + "Self" | "self" => TypeInfo::SelfType, + "()" => TypeInfo::Unit, + _other => TypeInfo::Custom { + name: eval!( + Ident::parse_from_pair, + warnings, + errors, + input, + return err(warnings, errors) + ), + }, + }, + warnings, + errors, + ) + } +} diff --git a/stdlib/src/main.fm b/stdlib/src/main.fm index 19e1987cf72..629988f45bc 100644 --- a/stdlib/src/main.fm +++ b/stdlib/src/main.fm @@ -1,8 +1,19 @@ library ops { - trait Add { + pub trait Add { fn add(self, other: Self) -> Self; } + pub trait Subtract { + fn subtract(self, other: Self) -> Self; + } + + impl Subtract for u64 { + fn subtract(self, other: Self) -> Self { + // TODO write asm + 0 + } + } + impl Add for u64 { fn add(self, other: Self) -> Self { asm(r1: self, r2: other, r3) { @@ -21,21 +32,39 @@ library ops { } } + impl Add for u16 { + fn add(self, other: Self) -> Self { + asm(r1: self, r2: other, r3) { + add r3 r2 r1 i10; + r3 + } + } + } -/* - struct Test { - a: u64, - b: u64 + pub trait Cmp { + fn less_than(self, other: Self) -> bool; } - fn test() { - // now, need to work out methods and using the self type on them - let test = Test { a: 5, b: 5 }; - let y: u64 = test.a; + impl Cmp for u64 { + fn less_than(self, other: Self) -> bool { + // TODO write asm + true + } + } + + impl Cmp for u32 { + fn less_than(self, other: Self) -> bool { + // TODO write asm + true + } + } - let z = y.add(test.b); + impl Cmp for u16 { + fn less_than(self, other: Self) -> bool { + // TODO write asm + true + } } - */ } /*