From 5d1849943b30d809d9597ecf0b4f09e1e6de1dbc Mon Sep 17 00:00:00 2001 From: Seemann Date: Tue, 16 Jan 2024 20:29:26 -0500 Subject: [PATCH] support annotations, visibility zones, refactor source parser --- Cargo.lock | 2 +- Cargo.toml | 2 +- src/language_service/ffi.rs | 87 ++-- src/language_service/scanner.rs | 650 ++++++++++++++++++++------- src/language_service/server.rs | 57 +-- src/language_service/symbol_table.rs | 49 +- src/parser/declaration.rs | 400 ++++++++++++++++- src/parser/interface.rs | 21 + src/parser/mod.rs | 4 +- src/utils/compiler_const.rs | 2 + 10 files changed, 1052 insertions(+), 222 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 85066e4..31977b4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -605,7 +605,7 @@ dependencies = [ [[package]] name = "sanny_builder_core" -version = "0.3.1" +version = "0.4.0" dependencies = [ "base64", "ctor", diff --git a/Cargo.toml b/Cargo.toml index 7e299c7..0e4c7a5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "sanny_builder_core" -version = "0.3.1" +version = "0.4.0" authors = ["Seemann "] edition = "2021" diff --git a/src/language_service/ffi.rs b/src/language_service/ffi.rs index 7aef2ad..897ac8d 100644 --- a/src/language_service/ffi.rs +++ b/src/language_service/ffi.rs @@ -3,53 +3,24 @@ use std::ffi::CString; use crate::{ common_ffi::{pchar_to_str, pchar_to_string, ptr_free, ptr_new, PChar}, language_service::server::LanguageServer, + v4::helpers::token_str, }; -#[repr(C)] -#[derive(Debug, Clone, Copy, PartialEq)] -pub enum SymbolType { - Number = 0, - String = 1, - Var = 2, - Label = 3, - ModelName = 4, -} -pub struct SymbolInfo { - pub line_number: u32, // defined on this line - pub end_line_number: u32, // not visible on and after this line - pub _type: SymbolType, - pub value: Option, // value of the symbol (for literals) -} +use super::symbol_table::SymbolType; + #[repr(C)] #[derive(Debug, Clone)] pub struct SymbolInfoRaw { - pub line_number: u32, - pub end_line_number: u32, pub _type: SymbolType, pub value: PChar, + pub name_no_format: PChar, + pub annotation: PChar, } pub struct DocumentInfo { pub is_active: bool, } -#[derive(Clone, Debug)] -pub struct SymbolInfoMap { - pub line_number: u32, - pub stack_id: u32, - pub end_line_number: u32, - pub _type: SymbolType, - pub value: Option, - pub name_no_format: String, // used for autocomplete -} - -impl SymbolInfoMap { - pub fn is_visible(&self, line_number: u32) -> bool { - line_number >= self.line_number - && (self.end_line_number == 0 || line_number < self.end_line_number) - } -} - #[repr(C)] #[derive(Clone, Copy)] #[allow(dead_code)] @@ -153,9 +124,14 @@ pub unsafe extern "C" fn language_service_find( let s = server.find(pchar_to_str(symbol)?, handle, line_number)?; let out_value = out_value.as_mut()?; out_value._type = s._type; - out_value.line_number = s.line_number; - out_value.end_line_number = s.end_line_number; + + // don't return line numbers as they are not used on the client side + // out_value.line_number = s.line_number; + // out_value.end_line_number = s.end_line_number; + out_value.value = CString::new(s.value.unwrap_or_default()).unwrap().into_raw(); + out_value.name_no_format = CString::new(s.name_no_format).unwrap().into_raw(); + out_value.annotation = CString::new(s.annotation.unwrap_or_default()).unwrap().into_raw(); Some(()) }} } @@ -181,8 +157,45 @@ pub unsafe extern "C" fn language_service_filter_constants_by_name( boolclosure! {{ let items = server.as_mut()?.filter_constants_by_name(pchar_to_str(needle)?, handle, line_number)?; for item in items { - dict.as_mut()?.add(CString::new(item.0).ok()?, CString::new(item.1).ok()?) + dict.as_mut()?.add(CString::new(item).ok()?, CString::new("").ok()?) // todo: use simple list instead of dict } Some(()) }} } + +#[no_mangle] +pub unsafe extern "C" fn language_service_format_function_signature( + server: *mut LanguageServer, + value: PChar, + out: *mut PChar, +) -> bool { + boolclosure! {{ + let _server = server.as_mut()?; + use crate::parser::{function_arguments_and_return_types, Span}; + + let line = pchar_to_str(value)?; + let (_, ref signature) = function_arguments_and_return_types(Span::from(line)).ok()?; + + let params = signature.0 + .iter() + .map(|param|{ + let type_token = token_str(line, ¶m._type); + let name_token = param.name.as_ref().map(|name| token_str(line, name)); + + match name_token { + Some(name) => format!("\"{}: {}\"", name, type_token), + None => format!("\"{}\"", type_token), + } + }) + .collect::>() + .join(", "); + + // let return_types = signature.1 + // .iter() + // .map(|ret_type| format!("\"{}\"", token_str(line, &ret_type.token))) + // .collect::>() + // .join(", "); + *out = CString::new(format!("{params}")).unwrap().into_raw(); + Some(()) + }} +} diff --git a/src/language_service/scanner.rs b/src/language_service/scanner.rs index 33063ff..f53b51d 100644 --- a/src/language_service/scanner.rs +++ b/src/language_service/scanner.rs @@ -1,8 +1,10 @@ -use super::ffi::{Source, SymbolInfoMap, SymbolType}; -use super::symbol_table::SymbolTable; +use super::ffi::Source; +use super::symbol_table::{SymbolInfoMap, SymbolTable, SymbolType, VisibilityZone}; use crate::dictionary::DictNumByString; use crate::language_service::server::CACHE_FILE_SYMBOLS; +use crate::parser::FunctionSignature; use crate::utils::compiler_const::*; +use crate::v4::helpers::token_str; use std::collections::HashSet; use std::fs; use std::path::Path; @@ -33,7 +35,7 @@ fn file_walk( // create a new table for this file and its descendants let mut local_table = SymbolTable::new(); - find_constants( + scan_text( &content, reserved_words, class_names, @@ -52,11 +54,11 @@ fn file_walk( .insert(file_name.clone(), local_table); } -fn resolve_path(p: String, parent_file: &Option) -> Option { - let path = Path::new(&p); +fn resolve_path(p: &str, parent_file: &Option) -> Option { + let path = Path::new(p); if path.is_absolute() { - return Some(p); + return Some(p.to_string()); } match parent_file { @@ -101,7 +103,7 @@ pub fn scan_document<'a>( ); } - find_constants( + scan_text( text, reserved_words, class_names, @@ -113,7 +115,7 @@ pub fn scan_document<'a>( ); } -pub fn find_constants<'a>( +pub fn scan_text<'a>( content: &String, reserved_words: &DictNumByString, class_names: &Vec, @@ -123,88 +125,45 @@ pub fn find_constants<'a>( table: &mut SymbolTable, line_number: Option, ) { - let mut lines: Vec = vec![]; - let mut line = String::new(); - let mut chars = content.chars().peekable(); - - 'outer: while let Some(c) = chars.next() { - match c { - '\n' => { - lines.push(line); - line = String::new(); - } - '{' if chars.peek() != Some(&'$') => { - // directives {$...} - // { } block - loop { - match chars.next() { - Some('}') => break, - Some(_) => {} // ignore other chars inside block - None => break 'outer, - } - } - } - '/' => match chars.next() { - // /* */ comment - Some('*') => loop { - match chars.next() { - Some('*') => { - if chars.next() == Some('/') { - break; - } - } - Some(_) => {} // ignore other chars inside comment - None => break 'outer, - } - }, - // // comment - Some('/') => { - loop { - match chars.next() { - Some('\n') => { - lines.push(line); - line = String::new(); - break; - } - Some(_) => {} // ignore other chars inside comment - None => break 'outer, - } - } - } - Some(c) => { - line.push('/'); - line.push(c); - } - None => { - break 'outer; - } - }, - - c => { - // trim left - if !c.is_ascii_whitespace() || !line.is_empty() { - line.push(c); - } - } - } - } - lines.push(line); - - // let mut lines = content.lines().enumerate(); let mut inside_const = false; let file_name = match source { Source::File(path) => Some(path.clone()), Source::Memory => None, }; - for (_index, line) in lines.iter().enumerate() { - if line.is_empty() { + + let mut inside_comment = false; + let mut inside_comment2 = false; + let mut next_annotation: Option = None; + + let lines = content.lines(); + for (_index, line1) in lines.enumerate() { + let (first, rest) = strip_comments(line1, &mut inside_comment, &mut inside_comment2); + + if first.is_empty() { + continue; + } + + if first.eq("///") { + //append or create next_annotation + + if let Some(ref mut annotation) = next_annotation { + annotation.push_str("\n"); + annotation.push_str(rest.as_str()); + } else { + next_annotation = Some(rest.to_string()); + } + continue; } - let mut words = line.split_ascii_whitespace(); - let first = match words.next() { - Some(word) => word.to_ascii_lowercase(), - None => continue, - }; + + let first_lower = first.to_ascii_lowercase(); + let token_id = reserved_words.map.get(&first_lower); + + // reset annotation if this line is not a function + if token_id != Some(&TOKEN_FUNCTION) && token_id != Some(&TOKEN_DEFINE) { + next_annotation = None; + } + /* if the file is a $include in the current document, then we need all its symbols (including deep $include's) to have a line number of the $include statement in the current document @@ -212,17 +171,16 @@ pub fn find_constants<'a>( if the file is an implicit include (constants.txt), then all its symbols have a line number of 0 */ let line_number = line_number.unwrap_or(_index); - let stack_id = scope_stack.len() as u32; - match reserved_words.map.get(&first) { + match token_id { Some(token) => match *token { TOKEN_INCLUDE | TOKEN_INCLUDE_ONCE => { - let mut include_path = words.collect::(); - - if include_path.ends_with('}') { - include_path.pop(); - } + let include_path = if rest.ends_with('}') { + &rest[..rest.len() - 1] + } else { + rest.as_str() + }; let Some(path) = resolve_path(include_path, &file_name) else { continue }; @@ -237,8 +195,6 @@ pub fn find_constants<'a>( ); } TOKEN_CONST => { - let rest = words.collect::(); - if !rest.is_empty() { let declarations = split_const_line(&rest); @@ -251,26 +207,50 @@ pub fn find_constants<'a>( } TOKEN_END if inside_const => inside_const = false, TOKEN_END => { - log::debug!("Found end of block in line {}", line_number + 1); if stack_id < 2 { // global scope, only ends when the file ends continue; } // number of nested blocks in the current function - let mut fn_blocks = scope_stack.last_mut().unwrap(); + let fn_blocks = scope_stack.last_mut().unwrap(); if *fn_blocks == 0 { // there are no other open blocks in this function, this is the function's end - // find all symbols with stack_id - for (_, symbol) in table.symbols.iter_mut() { - if symbol.stack_id == stack_id { - symbol.end_line_number = line_number as u32; - symbol.stack_id = 0; // mark as processed + // find all symbols defined in the current scope and close their visibility zone + for (_, symbols) in table.symbols.iter_mut() { + for symbol in symbols { + if symbol.stack_id == stack_id { + let Some(last_zone) = symbol.zones.last_mut() else { continue }; + + if last_zone.end != 0 { + // should not happen + log::error!( + "Symbol {} does not have an open visibility zone", + symbol.name_no_format + ); + continue; + } + + // this local variable is not visible inside the function + last_zone.end = line_number as u32; + + symbol.stack_id = 0; // mark as processed + } } } // delete function scope scope_stack.pop(); + + let stack_id = scope_stack.len() as u32; + // open visibility zone for the local variables of the parent scope + for (_, symbols) in table.symbols.iter_mut() { + for symbol in symbols { + if symbol.stack_id == stack_id && symbol._type == SymbolType::Var { + symbol.add_zone(line_number as u32) + } + } + } } else { // exit block *fn_blocks -= 1; @@ -279,17 +259,84 @@ pub fn find_constants<'a>( TOKEN_INT | TOKEN_FLOAT | TOKEN_STRING | TOKEN_LONGSTRING | TOKEN_HANDLE | TOKEN_BOOL => { // inline variable declaration - - let rest = words.collect::(); let names = split_const_line(&rest); for name in names { - process_var_declaration(&name, table, line_number, stack_id) + process_var_declaration(&name, table, line_number, stack_id, &first) } } + TOKEN_DEFINE => { + // define function + // todo: other defines? + + use crate::parser::{function_signature, Span}; + + // parse function signature and add its parameters to the symbol table as variables + let Ok((_, ref signature)) = function_signature(Span::from(rest.as_str())) else { continue }; + + register_function( + table, + line_number, + stack_id, + rest.as_str(), + signature, + next_annotation.take(), + ); + } TOKEN_FUNCTION => { // push new scope scope_stack.push(0); + + // end visibility zone for the local variables of the parent scope + // because parent local variables can not be seen in functions + // todo: make sure global vars is an exception + for (_, symbols) in table.symbols.iter_mut() { + for symbol in symbols { + if symbol.stack_id == stack_id && symbol._type == SymbolType::Var { + let Some(last_zone) = symbol.zones.last_mut() else { continue }; + + if last_zone.end != 0 { + // should not happen + log::error!( + "Symbol {} does not have an open visibility zone", + symbol.name_no_format + ); + continue; + } + + last_zone.end = line_number as u32; + } + } + } + + use crate::parser::{function_signature, Span}; + + // parse function signature and add its parameters to the symbol table as variables + let line = first + " " + &rest; + let line = line.as_str(); + let Ok((_, ref signature)) = function_signature(Span::from(line)) else { continue }; + + register_function( + table, + line_number, + stack_id, + line, + signature, + next_annotation.take(), + ); + + for param in &signature.parameters { + if let Some(ref name) = param.name { + register_var( + table, + line_number, + stack_id + 1, // register function parameters in the function's stack + token_str(&line, name), + Some(token_str(&line, ¶m._type).to_string()), + None, + ); + } + } } TOKEN_IF | TOKEN_FOR | TOKEN_WHILE | TOKEN_SWITCH => { @@ -304,19 +351,20 @@ pub fn find_constants<'a>( _ => {} }, _ if inside_const => { + let line = first + " " + &rest; + let line = line.as_str(); let declarations = split_const_line(line); for declaration in declarations.iter() { process_const_declaration(&declaration, table, line_number, stack_id); } } - _ if class_names.contains(&first) => { + _ if class_names.contains(&first_lower) => { // class declaration - let rest = words.collect::(); let names = split_const_line(&rest); for name in names { - process_var_declaration(&name, table, line_number, stack_id) + process_var_declaration(&name, table, line_number, stack_id, &first) } } _ => { @@ -326,6 +374,175 @@ pub fn find_constants<'a>( } } +fn register_symbol(table: &mut SymbolTable, map: SymbolInfoMap) { + let name_lower = map.name_no_format.to_ascii_lowercase(); + match table.symbols.get_mut(&name_lower) { + Some(symbols) => { + for symbol in symbols.iter() { + if symbol.stack_id == map.stack_id { + log::debug!( + "Found duplicate symbol declaration {} in line {}", + map.name_no_format, + // map.line_number + 1 + map.zones[0].start + 1 + ); + return; + } + } + + symbols.push(map); + } + None => { + table.symbols.insert(name_lower, vec![map]); + } + } +} + +fn register_function( + table: &mut SymbolTable, + line_number: usize, + stack_id: u32, + line: &str, + signature: &FunctionSignature, + annotation: Option, +) { + let map = SymbolInfoMap { + zones: vec![VisibilityZone { + start: line_number as u32, + end: 0, + }], + // line_number: line_number as u32, + _type: SymbolType::Function, + stack_id, // register function in parent stack + // end_line_number: 0, + value: Some(function_params_and_return_types(line, signature)), + name_no_format: token_str(&line, &signature.name).to_string(), + annotation, + }; + register_symbol(table, map); +} + +fn register_var( + table: &mut SymbolTable, + line_number: usize, + stack_id: u32, + name: &str, + _type: Option, + annotation: Option, +) { + register_const( + table, + line_number, + stack_id, + name, + _type, + SymbolType::Var, + annotation, + ); +} + +fn register_const( + table: &mut SymbolTable, + line_number: usize, + stack_id: u32, + name: &str, + value: Option, + _type: SymbolType, + annotation: Option, +) { + let map = SymbolInfoMap { + zones: vec![VisibilityZone { + start: line_number as u32, + end: 0, + }], + // line_number: line_number as u32, + _type, + stack_id, + // end_line_number: 0, + value, + name_no_format: name.to_string(), + annotation, + }; + register_symbol(table, map); +} + +pub fn strip_comments( + s: &str, + inside_comment: &mut bool, + inside_comment2: &mut bool, +) -> (String, String) { + let mut chars = s.chars().peekable(); + let mut first_word = String::new(); + let mut rest = String::new(); + let mut buf = &mut first_word; + + // iterate over all chars, skip comment fragments (/* */ and //) + while let Some(c) = chars.next() { + match c { + _ if *inside_comment => { + // skip until the end of the comment + if c == '*' { + if let Some('/') = chars.next() { + *inside_comment = false; + } + } + } + _ if *inside_comment2 => { + // skip until the end of the comment + if c == '}' { + *inside_comment2 = false; + } + } + '/' if chars.peek() == Some(&'/') => { + chars.next(); // skip / + + // annotation /// + if chars.peek() == Some(&'/') { + chars.next(); // skip / + + if buf.is_empty() { + // start of the line + buf.push_str("///"); // first word is /// + buf = &mut rest; // the rest of the line is the annotation + continue; + } + } + + // line comment // + // there is nothing left on this line, exiting + break; + } + '/' if chars.peek() == Some(&'*') => { + // block comment /* */ + *inside_comment = true; + chars.next(); // skip * + } + + '{' if chars.peek() != Some(&'$') => { + // block comment {} but not directives {$...} + *inside_comment2 = true; + } + _ if c.is_ascii_whitespace() => { + if buf.is_empty() { + // skip leading whitespace + continue; + } else { + buf = &mut rest; + if !buf.is_empty() { + buf.push(c); + } + } + } + _ => { + // line_without_comments.push(c); + buf.push(c); + } + } + } + + return (first_word, rest.trim_end().to_string()); +} + pub fn process_const_declaration( line: &str, table: &mut SymbolTable, @@ -336,37 +553,45 @@ pub fn process_const_declaration( let Some(name) = tokens.next() else { return }; let name = name.trim(); - let name_lower = name.to_ascii_lowercase(); - if let Some(symbol) = table.symbols.get(&name_lower) { - if symbol.stack_id == stack_id { - log::debug!( - "Found duplicate const declaration {} in line {}", - name, - line_number + 1 - ); - return; - } - } + // let name_lower = name.to_ascii_lowercase(); + // if let Some(symbols) = table.symbols.get(&name_lower) { + // for symbol in symbols { + // if symbol.stack_id == stack_id { + // log::debug!( + // "Found duplicate const declaration {} in line {}", + // name, + // line_number + 1 + // ); + // return; + // } + // } + // } let Some(value) = tokens.next() else { return }; let value = value.trim(); - let Some(_type) = get_type(value).or_else(|| table.symbols.get(value).map(|x| x._type)) else { return }; + let value_lower = value.to_ascii_lowercase(); + let Some(_type) = get_type(value_lower.as_str()) + .or_else(|| { + table.symbols.get(value_lower.as_str()).and_then(|symbols| { + symbols.iter().find(|symbol| symbol.stack_id == stack_id).map(|symbol| symbol._type) + }) + }) + else { return }; log::debug!( "Found const declaration {} in line {}", name, line_number + 1 ); - table.symbols.insert( - name_lower, - SymbolInfoMap { - line_number: line_number as u32, - _type, - stack_id, - end_line_number: 0, - value: Some(String::from(value)), - name_no_format: name.to_string(), - }, + + register_const( + table, + line_number, + stack_id, + name, + Some(String::from(value)), + _type, + None, ); } @@ -375,6 +600,7 @@ pub fn process_var_declaration( table: &mut SymbolTable, line_number: usize, stack_id: u32, + _type: &str, ) { let mut tokens = line.split('='); @@ -385,29 +611,27 @@ pub fn process_var_declaration( } let name = name.trim(); - let name_lower = name.to_ascii_lowercase(); - if let Some(symbol) = table.symbols.get(&name_lower) { - if symbol.stack_id == stack_id { - log::debug!( - "Found duplicate var declaration {} in line {}", - name, - line_number + 1 - ); - return; - } - } - // todo: try_insert - // todo: value should be vector of SymbolInfoMap for each possible scope (functions may declare the same variable name) - table.symbols.insert( - name_lower, - SymbolInfoMap { - line_number: line_number as u32, - _type: SymbolType::Var, - stack_id, - end_line_number: 0, - value: None, - name_no_format: name.to_string(), - }, + // let name_lower = name.to_ascii_lowercase(); + // if let Some(symbols) = table.symbols.get(&name_lower) { + // for symbol in symbols { + // if symbol.stack_id == stack_id { + // log::debug!( + // "Found duplicate var declaration {} in line {}", + // name, + // line_number + 1 + // ); + // return; + // } + // } + // } + + register_var( + table, + line_number, + stack_id, + name, + Some(String::from(_type)), + None, ); } @@ -480,13 +704,137 @@ pub fn get_type(value: &str) -> Option { return None; } +fn function_params_and_return_types(line: &str, signature: &FunctionSignature) -> String { + let params = signature + .parameters + .iter() + .map(|param| { + let type_token = token_str(line, ¶m._type); + let name_token = param.name.as_ref().map(|name| token_str(line, name)); + + match name_token { + Some(name) => format!("{}: {}", name, type_token), + None => format!("{}", type_token), + } + }) + // .map(|param| [token_str(line, ¶m.name), token_str(line, ¶m._type)].join(": ")) + .collect::>() + .join(", "); + + let return_types = signature + .return_types + .iter() + .map(|_type| token_str(line, &_type.token).to_string()) + .collect::>() + .join(", "); + + if return_types.is_empty() { + format!("({params})") + } else { + format!("({params}): {return_types}") + } +} + #[cfg(test)] mod tests { use super::*; #[test] fn test1() { - let p = resolve_path(String::from("2.txt"), &Some(String::from("C:/dev/1.txt"))).unwrap(); + let p = resolve_path("2.txt", &Some(String::from("C:/dev/1.txt"))).unwrap(); assert_eq!(p, String::from("C:/dev\\2.txt")); } + + #[test] + fn test2() { + let s = "test line"; + let mut inside_comment = false; + let mut inside_comment2 = false; + let s = strip_comments(s, &mut inside_comment, &mut inside_comment2); + assert_eq!(s, ("test".to_string(), "line".to_string())); + + let s = "test line // comment"; + let mut inside_comment = false; + let mut inside_comment2 = false; + let s = strip_comments(s, &mut inside_comment, &mut inside_comment2); + assert_eq!(s, ("test".to_string(), "line".to_string())); + + let s = "test line /* comment */"; + let mut inside_comment = false; + let mut inside_comment2 = false; + let s = strip_comments(s, &mut inside_comment, &mut inside_comment2); + assert_eq!(s, ("test".to_string(), "line".to_string())); + + let s = "test line /* comment */ test line"; + let mut inside_comment = false; + let mut inside_comment2 = false; + let s = strip_comments(s, &mut inside_comment, &mut inside_comment2); + assert_eq!(s, ("test".to_string(), "line test line".to_string())); + + let s = "test line /* comment */ test line /* comment */"; + let mut inside_comment = false; + let mut inside_comment2 = false; + let s = strip_comments(s, &mut inside_comment, &mut inside_comment2); + assert_eq!(s, ("test".to_string(), "line test line".to_string())); + + let s = "test line"; + let mut inside_comment = true; + let mut inside_comment2 = false; + let s = strip_comments(s, &mut inside_comment, &mut inside_comment2); + assert_eq!(s, ("".to_string(), "".to_string())); + + let s = "test line */ after comment"; + let mut inside_comment = true; + let mut inside_comment2 = false; + let s = strip_comments(s, &mut inside_comment, &mut inside_comment2); + assert_eq!(s, ("after".to_string(), "comment".to_string())); + + let s = " leading whitespace"; + let mut inside_comment = false; + let mut inside_comment2 = false; + let s = strip_comments(s, &mut inside_comment, &mut inside_comment2); + assert_eq!(s, ("leading".to_string(), "whitespace".to_string())); + + let s = " {comment} test {comment} line"; + let mut inside_comment = false; + let mut inside_comment2 = false; + let s = strip_comments(s, &mut inside_comment, &mut inside_comment2); + assert_eq!(s, ("test".to_string(), "line".to_string())); + + let s = " comment} test {comment} line {comment} "; + let mut inside_comment = false; + let mut inside_comment2 = false; + let s = strip_comments(s, &mut inside_comment, &mut inside_comment2); + assert_eq!(s, ("comment}".to_string(), "test line".to_string())); + + let s = " comment} test {comment} line {comment} "; + let mut inside_comment = false; + let mut inside_comment2 = true; + let s = strip_comments(s, &mut inside_comment, &mut inside_comment2); + assert_eq!(s, ("test".to_string(), "line".to_string())); + + let s = "test{ /* */ } line"; + let mut inside_comment = false; + let mut inside_comment2 = false; + let s = strip_comments(s, &mut inside_comment, &mut inside_comment2); + assert_eq!(s, ("test".to_string(), "line".to_string())); + + let s = "test/* {} */ line"; + let mut inside_comment = false; + let mut inside_comment2 = false; + let s = strip_comments(s, &mut inside_comment, &mut inside_comment2); + assert_eq!(s, ("test".to_string(), "line".to_string())); + + let s = "*/test"; + let mut inside_comment = true; + let mut inside_comment2 = false; + let s = strip_comments(s, &mut inside_comment, &mut inside_comment2); + assert_eq!(s, ("test".to_string(), "".to_string())); + + let s = "}test"; + let mut inside_comment = false; + let mut inside_comment2 = true; + let s = strip_comments(s, &mut inside_comment, &mut inside_comment2); + assert_eq!(s, ("test".to_string(), "".to_string())); + } } diff --git a/src/language_service/server.rs b/src/language_service/server.rs index 086a394..ad6a3da 100644 --- a/src/language_service/server.rs +++ b/src/language_service/server.rs @@ -1,7 +1,10 @@ use super::{ - ffi::{DocumentInfo, EditorHandle, Source, Status, SymbolInfo}, + ffi::{DocumentInfo, EditorHandle, Source, Status}, watcher::FileWatcher, - {scanner, symbol_table::SymbolTable}, + { + scanner, + symbol_table::{SymbolInfoMap, SymbolTable}, + }, }; use crate::{ dictionary::{config, ffi::CaseFormat, DictNumByString}, @@ -115,21 +118,18 @@ impl LanguageServer { symbol: &str, handle: EditorHandle, line_number: u32, - ) -> Option { + ) -> Option { let st = SYMBOL_TABLES.lock().unwrap(); let table = st.get(&handle)?; - let map = table.symbols.get(&symbol.to_ascii_lowercase())?; + let symbol_infos = table.symbols.get(&symbol.to_ascii_lowercase())?; - // check if symbol is visible in current scope in current line - if !map.is_visible(line_number) { - return None; + for symbol_info in symbol_infos { + if symbol_info.is_visible_at(line_number) { + // check if symbol is visible in current scope in current line + return Some(symbol_info.clone()); + } } - Some(SymbolInfo { - line_number: map.line_number, - end_line_number: map.end_line_number, - _type: map._type, - value: map.value.clone(), - }) + None } pub fn get_document_info(&self, handle: EditorHandle) -> DocumentInfo { @@ -143,20 +143,27 @@ impl LanguageServer { needle: &str, handle: EditorHandle, line_number: u32, - ) -> Option> { + ) -> Option> { let st = SYMBOL_TABLES.lock().unwrap(); let table = st.get(&handle)?; let needle = needle.to_ascii_lowercase(); - Some( - table - .symbols - .iter() - .filter_map(|(name, map)| { - (name.to_ascii_lowercase().starts_with(&needle) && map.is_visible(line_number)) - .then_some((map.name_no_format.clone(), map.value.clone()?.clone())) - }) - .collect::>(), - ) + + let list = table + .symbols + .iter() + .filter_map(|(name, map)| { + if name.to_ascii_lowercase().starts_with(&needle) { + for symbol_info in map { + if symbol_info.is_visible_at(line_number) { + return Some(name.clone()); + } + } + } + return None; + }) + .collect::>(); + // list.sort_by(|v1, v2| v1.0.cmp(&v2.0)); + Some(list) } fn setup_message_queue() -> Sender<(EditorHandle, String)> { @@ -283,8 +290,6 @@ impl LanguageServer { ); LanguageServer::update_watchers(&visited, handle); - log::debug!("Symbol table is ready: {:?} symbols", table.symbols); - symbol_table.insert(handle, table); status_change(handle, Status::Idle); } diff --git a/src/language_service/symbol_table.rs b/src/language_service/symbol_table.rs index 0809b90..d9b9bbd 100644 --- a/src/language_service/symbol_table.rs +++ b/src/language_service/symbol_table.rs @@ -1,8 +1,53 @@ -use super::ffi::SymbolInfoMap; use std::collections::HashMap; + +#[repr(C)] +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum SymbolType { + Number = 0, + String = 1, + Var = 2, + Label = 3, + ModelName = 4, + Function = 5, +} + +#[derive(Clone, Debug)] +pub struct VisibilityZone { + pub start: u32, // line number where the symbol is defined + pub end: u32, // line number where the symbol can no longer be seen (start of a new function or end of the file) +} + +impl VisibilityZone { + pub fn is_visible_at(&self, line_number: u32) -> bool { + line_number >= self.start && (self.end == 0 || line_number < self.end) + } +} + +#[derive(Clone, Debug)] +pub struct SymbolInfoMap { + pub zones: Vec, + pub stack_id: u32, + pub _type: SymbolType, + pub value: Option, // value of the symbol (for literals) + pub name_no_format: String, // used for autocomplete + pub annotation: Option, +} + +impl SymbolInfoMap { + pub fn is_visible_at(&self, line_number: u32) -> bool { + self.zones + .iter() + .any(|zone| zone.is_visible_at(line_number)) + } + + pub fn add_zone(&mut self, start: u32) { + self.zones.push(VisibilityZone { start, end: 0 }); + } +} + pub struct SymbolTable { - pub symbols: HashMap, + pub symbols: HashMap>, } impl SymbolTable { diff --git a/src/parser/declaration.rs b/src/parser/declaration.rs index 4cefd58..372da83 100644 --- a/src/parser/declaration.rs +++ b/src/parser/declaration.rs @@ -1,8 +1,9 @@ use crate::parser::interface::*; use nom::bytes::complete::{tag, tag_no_case}; -use nom::combinator::map; -use nom::multi::many1; -use nom::sequence::{delimited, separated_pair}; +use nom::character::complete::space1; +use nom::combinator::{map, opt}; +use nom::multi::{many1, separated_list0, separated_list1}; +use nom::sequence::{delimited, pair, preceded, separated_pair, tuple}; use nom::{branch::alt, combinator::consumed}; use nom::{character::complete::multispace0, sequence::terminated}; @@ -15,6 +16,85 @@ pub fn declaration(s: Span) -> R { terminated(alt((statement::statement, const_declaration)), multispace0)(s) } +pub fn function_signature(s: Span) -> R { + map( + consumed(helpers::line(tuple(( + helpers::ws(terminated(tag_no_case("function"), space1)), + helpers::ws(literal::identifier), + function_arguments_and_return_types, + )))), + |(span, (_, name, (parameters, return_types)))| FunctionSignature { + name, + parameters, + return_types, + token: Token::from(span, SyntaxKind::FunctionSignature), + }, + )(s) +} + +pub fn function_arguments_and_return_types( + s: Span, +) -> R<(Vec, Vec)> { + map( + tuple(( + helpers::ws(opt(function_arguments)), + helpers::ws(opt(function_return_types)), + )), + |(parameters, return_types)| { + ( + parameters.unwrap_or_default(), + return_types.unwrap_or_default(), + ) + }, + )(s) +} + +fn function_arguments(s: Span) -> R> { + map( + delimited( + helpers::ws(tag("(")), + separated_list0( + helpers::ws(tag(",")), + consumed(pair( + opt(terminated( // param names are optional in define function + helpers::ws(literal::identifier), + helpers::ws(tag(":")), + )), + helpers::ws(literal::identifier), + )), + ), + helpers::ws(tag(")")), + ), + |args| { + args.into_iter() + .map(|(span, (name, _type))| FunctionParameter { + name, + _type, + token: Token::from(span, SyntaxKind::LocalVariable), + }) + .collect() + }, + )(s) +} + +fn function_return_types(s: Span) -> R> { + map( + preceded( + helpers::ws(tag(":")), + separated_list1(helpers::ws(tag(",")), helpers::ws(literal::identifier)), + ), + |types| { + types + .into_iter() + .map(|_type| FunctionReturnType { + token: _type.clone(), + _type, + }) + .collect() + }, + )(s) +} + pub fn const_declaration(s: Span) -> R { map( consumed(delimited( @@ -107,4 +187,318 @@ end"#, }) ) } + + #[test] + fn test_function_signature() { + let (_, node) = function_signature(Span::from(r#" function foo "#)).unwrap(); + + assert_eq!( + node, + FunctionSignature { + name: Token { + start: 11, + len: 3, + syntax_kind: SyntaxKind::Identifier + }, + parameters: vec![], + return_types: vec![], + token: Token { + start: 1, + len: 14, + syntax_kind: SyntaxKind::FunctionSignature + } + } + ); + + let (_, node) = function_signature(Span::from(r#"function foo: string"#)).unwrap(); + + assert_eq!( + node, + FunctionSignature { + name: Token { + start: 10, + len: 3, + syntax_kind: SyntaxKind::Identifier + }, + parameters: vec![], + return_types: vec![FunctionReturnType { + token: Token { + start: 15, + len: 6, + syntax_kind: SyntaxKind::Identifier + }, + _type: Token { + start: 15, + len: 6, + syntax_kind: SyntaxKind::Identifier + } + }], + token: Token { + start: 1, + len: 20, + syntax_kind: SyntaxKind::FunctionSignature + } + } + ); + + let (_, node) = function_signature(Span::from(r#"function foo()"#)).unwrap(); + + assert_eq!( + node, + FunctionSignature { + name: Token { + start: 10, + len: 3, + syntax_kind: SyntaxKind::Identifier + }, + parameters: vec![], + return_types: vec![], + token: Token { + start: 1, + len: 14, + syntax_kind: SyntaxKind::FunctionSignature + } + } + ); + + let (_, node) = function_signature(Span::from(r#"function foo(): int"#)).unwrap(); + + assert_eq!( + node, + FunctionSignature { + name: Token { + start: 10, + len: 3, + syntax_kind: SyntaxKind::Identifier + }, + parameters: vec![], + return_types: vec![FunctionReturnType { + token: Token { + start: 17, + len: 3, + syntax_kind: SyntaxKind::Identifier + }, + _type: Token { + start: 17, + len: 3, + syntax_kind: SyntaxKind::Identifier + } + }], + token: Token { + start: 1, + len: 19, + syntax_kind: SyntaxKind::FunctionSignature + } + } + ); + + let (_, node) = function_signature(Span::from(r#"function foo(a: int): int"#)).unwrap(); + + assert_eq!( + node, + FunctionSignature { + name: Token { + start: 10, + len: 3, + syntax_kind: SyntaxKind::Identifier + }, + parameters: vec![FunctionParameter { + name: Some(Token { + start: 14, + len: 1, + syntax_kind: SyntaxKind::Identifier + }), + _type: Token { + start: 17, + len: 3, + syntax_kind: SyntaxKind::Identifier + }, + token: Token { + start: 14, + len: 6, + syntax_kind: SyntaxKind::LocalVariable + } + }], + return_types: vec![FunctionReturnType { + token: Token { + start: 23, + len: 3, + syntax_kind: SyntaxKind::Identifier + }, + _type: Token { + start: 23, + len: 3, + syntax_kind: SyntaxKind::Identifier + } + }], + token: Token { + start: 1, + len: 25, + syntax_kind: SyntaxKind::FunctionSignature + } + } + ); + + let (_, node) = + function_signature(Span::from(r#"function foo(a: int, b: string): int"#)).unwrap(); + + assert_eq!( + node, + FunctionSignature { + name: Token { + start: 10, + len: 3, + syntax_kind: SyntaxKind::Identifier + }, + parameters: vec![ + FunctionParameter { + name: Some(Token { + start: 14, + len: 1, + syntax_kind: SyntaxKind::Identifier + }), + _type: Token { + start: 17, + len: 3, + syntax_kind: SyntaxKind::Identifier + }, + token: Token { + start: 14, + len: 6, + syntax_kind: SyntaxKind::LocalVariable + } + }, + FunctionParameter { + name: Some(Token { + start: 22, + len: 1, + syntax_kind: SyntaxKind::Identifier + }), + _type: Token { + start: 25, + len: 6, + syntax_kind: SyntaxKind::Identifier + }, + token: Token { + start: 22, + len: 9, + syntax_kind: SyntaxKind::LocalVariable + } + } + ], + return_types: vec![FunctionReturnType { + token: Token { + start: 34, + len: 3, + syntax_kind: SyntaxKind::Identifier + }, + _type: Token { + start: 34, + len: 3, + syntax_kind: SyntaxKind::Identifier + } + }], + token: Token { + start: 1, + len: 36, + syntax_kind: SyntaxKind::FunctionSignature + } + } + ); + + let (_, node) = function_signature(Span::from( + r#"function foo(a: int, b: string): int, int, int"#, + )) + .unwrap(); + + assert_eq!( + node, + FunctionSignature { + name: Token { + start: 10, + len: 3, + syntax_kind: SyntaxKind::Identifier + }, + parameters: vec![ + FunctionParameter { + name: Some(Token { + start: 14, + len: 1, + syntax_kind: SyntaxKind::Identifier + }), + _type: Token { + start: 17, + len: 3, + syntax_kind: SyntaxKind::Identifier + }, + token: Token { + start: 14, + len: 6, + syntax_kind: SyntaxKind::LocalVariable + } + }, + FunctionParameter { + name: Some(Token { + start: 22, + len: 1, + syntax_kind: SyntaxKind::Identifier + }), + _type: Token { + start: 25, + len: 6, + syntax_kind: SyntaxKind::Identifier + }, + token: Token { + start: 22, + len: 9, + syntax_kind: SyntaxKind::LocalVariable + } + } + ], + return_types: vec![ + FunctionReturnType { + token: Token { + start: 34, + len: 3, + syntax_kind: SyntaxKind::Identifier + }, + _type: Token { + start: 34, + len: 3, + syntax_kind: SyntaxKind::Identifier + } + }, + FunctionReturnType { + token: Token { + start: 39, + len: 3, + syntax_kind: SyntaxKind::Identifier + }, + _type: Token { + start: 39, + len: 3, + syntax_kind: SyntaxKind::Identifier + } + }, + FunctionReturnType { + token: Token { + start: 44, + len: 3, + syntax_kind: SyntaxKind::Identifier + }, + _type: Token { + start: 44, + len: 3, + syntax_kind: SyntaxKind::Identifier + } + } + ], + token: Token { + start: 1, + len: 46, + syntax_kind: SyntaxKind::FunctionSignature + } + } + ); + } } diff --git a/src/parser/interface.rs b/src/parser/interface.rs index 3d64cb8..7f2a7ef 100644 --- a/src/parser/interface.rs +++ b/src/parser/interface.rs @@ -15,6 +15,7 @@ pub enum SyntaxKind { BinaryExpr, ConstDeclaration, ConstInitialization, + FunctionSignature, OperatorBitwiseNot, // ~ OperatorBitwiseAnd, // & @@ -78,6 +79,7 @@ pub enum Node { /// Unary expression, e.g. `~var` Unary(UnaryPrefixExpr), ConstDeclaration(ConstDeclaration), + FunctionSignature(FunctionSignature), } #[derive(Debug, PartialEq, Clone)] @@ -196,5 +198,24 @@ pub struct AST { pub body: Vec, } +#[derive(Debug, PartialEq, Clone)] +pub struct FunctionSignature { + pub name: Token, + pub parameters: Vec, + pub return_types: Vec, + pub token: Token, +} +#[derive(Debug, PartialEq, Clone)] +pub struct FunctionParameter { + pub name: Option, + pub _type: Token, + pub token: Token, +} +#[derive(Debug, PartialEq, Clone)] +pub struct FunctionReturnType { + pub _type: Token, + pub token: Token, +} + pub type Span<'a> = LocatedSpan<&'a str>; pub type R<'a, T> = IResult, T>; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index d89c79e..bfeceb3 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3,7 +3,7 @@ use nom::combinator::map; use nom::multi::many1; pub mod interface; -use interface::*; +pub use interface::*; mod binary; mod declaration; @@ -15,6 +15,8 @@ mod statement; mod unary; mod variable; +pub use declaration::{function_signature, function_arguments_and_return_types}; // used in LanguageService + pub fn parse(s: &str) -> R { all_consuming(map(many1(declaration::declaration), |body| AST { body }))(Span::from(s)) } diff --git a/src/utils/compiler_const.rs b/src/utils/compiler_const.rs index e1a2563..0641545 100644 --- a/src/utils/compiler_const.rs +++ b/src/utils/compiler_const.rs @@ -9,6 +9,8 @@ pub const TOKEN_LONGSTRING: i32 = 4; pub const TOKEN_HANDLE: i32 = 5; pub const TOKEN_BOOL: i32 = 6; +pub const TOKEN_DEFINE: i32 = 43; + // blocks pub const TOKEN_HEX: i32 = 45; pub const TOKEN_IF: i32 = 50;