Skip to content

Commit

Permalink
Control flow analysis + more (#28)
Browse files Browse the repository at this point in the history
* begin documentation of control flow analysis algorithm

* begin graph construction

* begin graph construction

* generate graphs; further control flow work

* some improvements in control flow; still need to wire up imports into the control flow graph

* control flow graph improvements

* fix up implicit return printing in control flow graph

* organize graph code

* find dead code

* unreachable code warnings

* parse unit type as type info

* start enum expressions

* fix enum lookup bug

* type check all enum instantiations

* Resolved types (#29)

* begin swappnig over to resolved types

* progress in refactore

* progress on converting types

* progress in switching to resolved types

* progress in switching to resolved types

* finalize refactor to resolved types

* remove unused type variants

* fix tests

* enum variant construction code analysis

* resolve warnings

* missing enum instantiator error

* trait declarations control flow

* fix trait type mismatch error message

* individual method dead code warnings

* fix method call bug

* respect public visibility modifiers; control flow analysis on libraries

* add public traits

* move ident file

* begin analysis on return paths

* refactor of files

* begin work on analyzing return paths

* basic return path graph construction done

* documentation

* Return path analysis (#30)

* begin analysis on return paths

* refactor of files

* begin work on analyzing return paths

* basic return path graph construction done

* documentation

* fix code block type parsing

* control flow analysis on if and code blocks

* struct fields in graph

* individual struct field warnings
  • Loading branch information
sezna authored Apr 16, 2021
1 parent ae30683 commit a20104f
Show file tree
Hide file tree
Showing 40 changed files with 2,907 additions and 533 deletions.
1 change: 1 addition & 0 deletions parser/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ pest_derive = "2.0"
thiserror = "1.0"
either = "1.6"
Inflector = "0.11"
petgraph = "0.5"
3 changes: 3 additions & 0 deletions parser/README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
# fuel-vm-hll
High Level Language (Name Subject to Change) for the FuelVM

# Minimum supported Rust version
As of now, this code was developed on and is guaranteed to run on Rust 1.50 stable.
294 changes: 294 additions & 0 deletions parser/src/control_flow_analysis/analyze_return_paths.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,294 @@
//! This is the flow graph, a graph which contains edges that represent possible steps of program
//! execution.

use super::*;
use super::{ControlFlowGraph, EntryPoint, ExitPoint, Graph};
use crate::semantics::{
ast_node::{
TypedCodeBlock, TypedDeclaration, TypedExpression, TypedFunctionDeclaration,
TypedReassignment, TypedVariableDeclaration, TypedWhileLoop,
},
TypedAstNode, TypedAstNodeContent,
};
use crate::types::ResolvedType;
use crate::Ident;
use crate::{error::*, semantics::TypedParseTree};
use pest::Span;
use petgraph::prelude::NodeIndex;

impl<'sc> ControlFlowGraph<'sc> {
pub(crate) fn construct_return_path_graph(ast: &TypedParseTree<'sc>) -> Self {
let mut graph = ControlFlowGraph {
graph: Graph::new(),
entry_points: vec![],
namespace: Default::default(),
};
// do a depth first traversal and cover individual inner ast nodes
let mut leaves = vec![];
for ast_entrypoint in ast.root_nodes.iter() {
let l_leaves = connect_node(ast_entrypoint, &mut graph, &leaves);

match l_leaves {
NodeConnection::NextStep(nodes) => leaves = nodes,
_ => (),
}
}

graph
}
/// This function looks through the control flow graph and ensures that all paths that are
/// required to return a value do, indeed, return a value of the correct type.
/// It does this by checking every function declaration in both the methods namespace
/// and the functions namespace and validating that all paths leading to the function exit node
/// return the same type. Additionally, if a function has a return type, all paths must indeed
/// lead to the function exit node.
pub(crate) fn analyze_return_paths(&self) -> Vec<CompileError<'sc>> {
let mut errors = vec![];
for (
name,
FunctionNamespaceEntry {
entry_point,
exit_point,
return_type,
},
) in &self.namespace.function_namespace
{
// For every node connected to the entry point
errors.append(&mut self.ensure_all_paths_reach_exit(
*entry_point,
*exit_point,
name.primary_name,
return_type,
));
}
errors
}
fn ensure_all_paths_reach_exit(
&self,
entry_point: EntryPoint,
exit_point: ExitPoint,
function_name: &'sc str,
return_ty: &ResolvedType<'sc>,
) -> Vec<CompileError<'sc>> {
let mut rovers = vec![entry_point];
let mut errors = vec![];
let mut max_iterations = 50;
while rovers.len() >= 1 && rovers[0] != exit_point && max_iterations > 0 {
max_iterations -= 1;
/*
println!(
"{:?}",
rovers
.iter()
.map(|ix| self.graph[*ix].clone())
.collect::<Vec<_>>()
);
*/
rovers = rovers
.into_iter()
.filter(|idx| *idx != exit_point)
.collect();
let mut next_rovers = vec![];
for rover in rovers {
let mut neighbors = self
.graph
.neighbors_directed(rover, petgraph::Direction::Outgoing)
.collect::<Vec<_>>();
if neighbors.is_empty() && *return_ty != ResolvedType::Unit {
errors.push(CompileError::PathDoesNotReturn {
// TODO: unwrap_to_node is a shortcut. In reality, the graph type should be
// different. To save some code duplication,
span: self.graph[rover].unwrap_to_node().span.clone(),
function_name,
ty: return_ty.friendly_type_str(),
});
}
next_rovers.append(&mut neighbors);
}
rovers = next_rovers;
}

errors
}
}

/// The resulting edges from connecting a node to the graph.
enum NodeConnection {
/// This represents a node that steps on to the next node.
NextStep(Vec<NodeIndex>),
/// This represents a return or implicit return node, which aborts the stepwise flow.
Return(NodeIndex),
}

fn connect_node<'sc>(
node: &TypedAstNode<'sc>,
graph: &mut ControlFlowGraph<'sc>,
leaves: &[NodeIndex],
) -> NodeConnection {
let span = node.span.clone();
match &node.content {
TypedAstNodeContent::ReturnStatement(_)
| TypedAstNodeContent::ImplicitReturnExpression(_) => {
let this_index = graph.add_node(node.into());
for leaf_ix in leaves {
graph.add_edge(*leaf_ix, this_index, "".into());
}
NodeConnection::Return(this_index)
}
TypedAstNodeContent::WhileLoop(TypedWhileLoop { .. }) => {
// An abridged version of the dead code analysis for a while loop
// since we don't really care about what the loop body contains when detecting
// divergent paths
NodeConnection::NextStep(vec![graph.add_node(node.into())])
}
TypedAstNodeContent::Expression(TypedExpression { .. }) => {
let entry = graph.add_node(node.into());
// insert organizational dominator node
// connected to all current leaves
for leaf in leaves {
graph.add_edge(*leaf, entry, "".into());
}
NodeConnection::NextStep(vec![entry])
}
TypedAstNodeContent::SideEffect => NodeConnection::NextStep(leaves.to_vec()),
TypedAstNodeContent::Declaration(decl) => {
NodeConnection::NextStep(connect_declaration(node, &decl, graph, span, leaves))
}
}
}

fn connect_declaration<'sc>(
node: &TypedAstNode<'sc>,
decl: &TypedDeclaration<'sc>,
graph: &mut ControlFlowGraph<'sc>,
span: Span<'sc>,
leaves: &[NodeIndex],
) -> Vec<NodeIndex> {
use TypedDeclaration::*;
match decl {
TraitDeclaration(_) | StructDeclaration(_) | EnumDeclaration(_) => vec![],
VariableDeclaration(TypedVariableDeclaration { .. }) => {
let entry_node = graph.add_node(node.into());
for leaf in leaves {
graph.add_edge(*leaf, entry_node, "".into());
}
vec![entry_node]
}
FunctionDeclaration(fn_decl) => {
let entry_node = graph.add_node(node.into());
for leaf in leaves {
graph.add_edge(*leaf, entry_node, "".into());
}
connect_typed_fn_decl(fn_decl, graph, entry_node, span);
vec![]
}
Reassignment(TypedReassignment { .. }) => {
let entry_node = graph.add_node(node.into());
for leaf in leaves {
graph.add_edge(*leaf, entry_node, "".into());
}
vec![entry_node]
}
ImplTrait {
trait_name,
methods,
..
} => {
let entry_node = graph.add_node(node.into());
for leaf in leaves {
graph.add_edge(*leaf, entry_node, "".into());
}
connect_impl_trait(trait_name, graph, methods, entry_node);
vec![]
}
SideEffect | ErrorRecovery => {
unreachable!("These are error cases and should be removed in the type checking stage. ")
}
}
}

/// Implementations of traits are top-level things that are not conditional, so
/// we insert an edge from the function's starting point to the declaration to show
/// that the declaration was indeed at some point implemented.
/// Additionally, we insert the trait's methods into the method namespace in order to
/// track which exact methods are dead code.
fn connect_impl_trait<'sc>(
trait_name: &Ident<'sc>,
graph: &mut ControlFlowGraph<'sc>,
methods: &[TypedFunctionDeclaration<'sc>],
entry_node: NodeIndex,
) {
let mut methods_and_indexes = vec![];
// insert method declarations into the graph
for fn_decl in methods {
let fn_decl_entry_node = graph.add_node(ControlFlowGraphNode::MethodDeclaration {
span: fn_decl.span.clone(),
method_name: fn_decl.name.clone(),
});
graph.add_edge(entry_node, fn_decl_entry_node, "".into());
// connect the impl declaration node to the functions themselves, as all trait functions are
// public if the trait is in scope
connect_typed_fn_decl(&fn_decl, graph, fn_decl_entry_node, fn_decl.span.clone());
methods_and_indexes.push((fn_decl.name.clone(), fn_decl_entry_node));
}
// Now, insert the methods into the trait method namespace.
graph
.namespace
.insert_trait_methods(trait_name.clone(), methods_and_indexes);
}

/// The strategy here is to populate the trait namespace with just one singular trait
/// and if it is ever implemented, by virtue of type checking, we know all interface points
/// were met.
/// Upon implementation, we can populate the methods namespace and track dead functions that way.
/// TL;DR: At this point, we _only_ track the wholistic trait declaration and not the functions
/// contained within.
///
/// The trait node itself has already been added (as `entry_node`), so we just need to insert that
/// node index into the namespace for the trait.

/// When connecting a function declaration, we are inserting a new root node into the graph that
/// has no entry points, since it is just a declaration.
/// When something eventually calls it, it gets connected to the declaration.
fn connect_typed_fn_decl<'sc>(
fn_decl: &TypedFunctionDeclaration<'sc>,
graph: &mut ControlFlowGraph<'sc>,
entry_node: NodeIndex,
_span: Span<'sc>,
) {
let fn_exit_node = graph.add_node(format!("\"{}\" fn exit", fn_decl.name.primary_name).into());
let return_nodes = depth_first_insertion_code_block(&fn_decl.body, graph, &[entry_node]);
for node in return_nodes {
graph.add_edge(node, fn_exit_node, "return".into());
}

let namespace_entry = FunctionNamespaceEntry {
entry_point: entry_node,
exit_point: fn_exit_node,
return_type: fn_decl.return_type.clone(),
};
graph
.namespace
.insert_function(fn_decl.name.clone(), namespace_entry);
}

type ReturnStatementNodes = Vec<NodeIndex>;

fn depth_first_insertion_code_block<'sc>(
node_content: &TypedCodeBlock<'sc>,
graph: &mut ControlFlowGraph<'sc>,
leaves: &[NodeIndex],
) -> ReturnStatementNodes {
let mut leaves = leaves.to_vec();
let mut return_nodes = vec![];
for node in node_content.contents.iter() {
let this_node = connect_node(node, graph, &leaves);
match this_node {
NodeConnection::NextStep(nodes) => leaves = nodes,
NodeConnection::Return(node) => {
return_nodes.push(node);
}
}
}
return_nodes
}
Loading

0 comments on commit a20104f

Please sign in to comment.