diff --git a/.gitignore b/.gitignore index 3aa22b9f..260e3e77 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ /target history.txt *.cast +/target* diff --git a/CHANGELOG.md b/CHANGELOG.md index 0480f21d..0309e88a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,7 @@ # Changelog -All notable changes to this project will be documented in this file. - -The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), -and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +### [0.2.0-lineclip] - 2025-3-29 +- use `\n` or `;` to split statement. which means you don't have to type `;` to every lineend. +- use `\\n` to continue a line. ## [Unreleased] diff --git a/src/bin.rs b/src/bin.rs index c85931be..e17f17e3 100644 --- a/src/bin.rs +++ b/src/bin.rs @@ -273,6 +273,12 @@ fn syntax_highlight(line: &str) -> String { (TokenKind::Whitespace, w) => { result.push_str(w); } + (TokenKind::LineBreak, w) => { + result.push_str(w); + } + // (TokenKind::LineContinuation, w) => { + // result.push_str(w); + // } (TokenKind::Comment, w) => { result.push_str("\x1b[38;5;247m"); is_colored = true; @@ -546,14 +552,14 @@ fn run_file(path: PathBuf, env: &mut Environment) -> Result { fn main() -> Result<(), Error> { let matches = App::new( r#" - 888 - 888 - 888 - .d88888 888 888 88888b. .d88b. - d88" 888 888 888 888 "88b d8P Y8b - 888 888 888 888 888 888 88888888 - Y88b 888 Y88b 888 888 888 Y8b. - "Y88888 "Y88888 888 888 "Y8888 + 888 + 888 + 888 + .d88888 888 888 88888b. .d88b. + d88" 888 888 888 888 "88b d8P Y8b + 888 888 888 888 888 888 88888888 + Y88b 888 Y88b 888 888 888 Y8b. + "Y88888 "Y88888 888 888 "Y8888 "#, ) .author(crate_authors!()) diff --git a/src/parser.rs b/src/parser.rs index 32f8567f..9a097b8c 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -183,38 +183,62 @@ fn is_symbol_like(kind: TokenKind) -> bool { } fn parse_statement(input: Tokens<'_>) -> IResult, Expression, SyntaxError> { + // let (input, _) = opt(kind(TokenKind::LineBreak))(input)?; let (input, expr) = parse_expression(input)?; - match (&expr, text(";")(input)) { - (Expression::For(_, _, _), Ok((input, _))) => Ok((input, expr)), - (Expression::For(_, _, _), Err(_)) => Ok((input, expr)), - (Expression::If(_, _, _), Ok((input, _))) => Ok((input, expr)), - (Expression::If(_, _, _), Err(_)) => Ok((input, expr)), - - (_, Ok((input, _))) => Ok((input, expr)), - (_, Err(_)) => Err(SyntaxError::expected( - input.get_str_slice(), - ";", - None, - Some("try adding a semicolon"), - )), - } + // dbg!("---[got expression]---", &expr, input.get_str_slice()); + + // 尝试消费终止符(分号或换行符) + match &expr { + // 控制结构不需要显式终止符 + Expression::For(_, _, _) | Expression::If(_, _, _) => { + // opt(kind(TokenKind::NewLine))(input)?; //消费换行符 + return Ok((input, expr)); + } + // 普通表达式需要终止符 + _ => { + // 尝试匹配分号或换行符 + // (input, _) = lineterminator(input)?; + // 检查是否存在其他终止符(如分号) + // 允许行继续符出现在语句结尾 + // let (input, _) = alt(( + // map(kind(TokenKind::LineContinuation), |_| ()), + // map(kind(TokenKind::LineBreak), |_| ()), + // ))(input)?; + let (input, _) = kind(TokenKind::LineBreak)(input)?; + // let (input, _) = opt(kind(TokenKind::LineBreak))(input)?; + // let (input, _) = opt(text(";"))(input)?; + + // dbg!("---[got expression lineEnd]---"); + return Ok((input, expr)); + } + }; } fn parse_script_tokens( input: Tokens<'_>, require_eof: bool, ) -> IResult, Expression, SyntaxError> { - // println!("hmm {}", input); + // print!("passing scripot tokens") + // dbg!("parse script tokens ------>", input); let (input, mut exprs) = many0(parse_statement)(input)?; + // dbg!("parse_statement-->", input.get_str_slice(), &exprs); - let (mut input, last) = opt(terminated(parse_expression, opt(text(";"))))(input)?; + // 解析最后一行,可选的;作为终止 + let (input, last) = opt(terminated( + parse_expression, + opt(kind(TokenKind::LineBreak)), + ))(input)?; + // dbg!("after terminated-->", input.get_str_slice(), &last); if let Some(expr) = last { exprs.push(expr); } + // 新增:清理所有末尾换行符 + let (input, _) = many0(kind(TokenKind::LineBreak))(input)?; if require_eof { - input = eof(input) + // input.is_empty() + eof(input) .map_err(|_: nom::Err| { SyntaxError::expected(input.get_str_slice(), "end of input", None, None) })? @@ -451,7 +475,8 @@ fn parse_for_loop(input: Tokens<'_>) -> IResult, Expression, SyntaxEr fn parse_if(input: Tokens<'_>) -> IResult, Expression, SyntaxError> { let (input, _) = text("if")(input)?; - + // + // dbg!(input); let (input, cond) = parse_expression_prec_six(input).map_err(|_| { SyntaxError::unrecoverable( input.get_str_slice(), @@ -543,10 +568,19 @@ fn parse_apply_operator(input: Tokens<'_>) -> IResult, Expression, Sy fn parse_expression(input: Tokens<'_>) -> IResult, Expression, SyntaxError> { no_terminating_punctuation(input)?; + // 过滤行继续符和后续换行符 + let (input, _) = opt(kind(TokenKind::LineBreak))(input)?; // 消费行继续符 + + // let (input, _) = many0(alt(( + // // 匹配0次或多次 + // kind(TokenKind::LineContinuation), // 消费行继续符 + // kind(TokenKind::LineBreak), // 消费换行符 + // )))(input)?; let expr_parser = parse_expression_prec_seven; let (input, head) = expr_parser(input)?; + // let (input, _) = opt(kind(TokenKind::LineContinuation))(input)?; // 消费行继续符 let (input, list) = many0(pair( alt((text("|"), text(">>>"), text(">>"), text("<<"))), diff --git a/src/tokenizer.rs b/src/tokenizer.rs index c0941937..62c7fe7f 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -42,7 +42,13 @@ fn parse_token(input: Input) -> TokenizationResult<'_, (Token, Diagnostic)> { if input.is_empty() { Err(NOT_FOUND) } else { + // dbg!("------>", input); + Ok(alt(( + // 优先处理续航、换行符(新增) + map_valid_token(line_continuation, TokenKind::Whitespace), + // triple_quote_string, + map_valid_token(linebreak, TokenKind::LineBreak), map_valid_token(long_operator, TokenKind::Operator), map_valid_token(any_punctuation, TokenKind::Punctuation), map_valid_token(any_keyword, TokenKind::Keyword), @@ -141,6 +147,59 @@ fn string_literal(input: Input<'_>) -> TokenizationResult<'_, (Token, Diagnostic let token = Token::new(TokenKind::StringLiteral, range); Ok((rest, (token, diagnostics))) } +// 新增函数:专门处理三重引号字符串 +// fn triple_quote_string(input: Input<'_>) -> TokenizationResult<'_, (Token, Diagnostic)> { +// // 1. 匹配起始 """ +// let (rest, _) = input.strip_prefix("\"\"\"").ok_or(NOT_FOUND)?; + +// let mut content = String::new(); +// let mut errors = Vec::new(); +// let mut current = rest; +// let start_offset = input.get(offset); + +// // 2. 遍历直到找到结束 """ 或输入结束 +// loop { +// // 检测结束标记 """ +// if let Some(new_rest) = current.strip_prefix("\"\"\"") { +// current = new_rest; +// break; +// } + +// // 处理转义字符(可选,根据需求) +// if let Some('\\') = current.chars().next() { +// let (r, escaped_char) = parse_escape(current)?; +// content.push(escaped_char); +// current = r; +// continue; +// } + +// // 消费普通字符 +// let next_special = current.find(|c| c == '\\' || c == '"'); +// let (text_part, remaining) = match next_special { +// Some(pos) => current.split_at(pos), +// None => current.split_at(current.len()), +// }; + +// content.push_str(text_part.to_str(current.get(str))); +// current = remaining; + +// // 输入耗尽但未找到结束符 +// if current.is_empty() { +// errors.push(input.get(str).get(start_offset..input.len())); +// break; +// } +// } + +// // 3. 生成Token和诊断信息 +// let (rest, range) = input.split_until(current); +// let token = Token::new(TokenKind::StringLiteral, range); +// let diag = if errors.is_empty() { +// Diagnostic::Valid +// } else { +// Diagnostic::InvalidStringEscapes(errors.into_boxed_slice()) +// }; +// Ok((rest, (token, diag))) +// } fn number_literal(input: Input<'_>) -> TokenizationResult<'_, (Token, Diagnostic)> { // skip sign @@ -211,6 +270,86 @@ fn whitespace(input: Input<'_>) -> TokenizationResult<'_> { Ok(input.split_at(ws_chars)) } +fn find_prev_char(original_str: &str, current_offset: usize) -> Option { + // let current_offset = original_str.len() - rest.len(); + let first_c = original_str.get(..current_offset); + match first_c { + Some(c) => { + if !c.is_empty() { + return c.chars().last(); + } + // 2. 反向计算前导空白字节长度 + let ws_len = c + .chars() + .rev() + .take_while(|c| c.is_whitespace() && *c != '\n') + .map(|c| c.len_utf8()) + .sum(); + + // 3. 安全切割空白部分 + let ws_start = current_offset.saturating_sub(ws_len); + let before_nl = original_str.get(..ws_start).unwrap_or(""); + + // 4. 获取最后一个非空白字符 + return before_nl.chars().last(); + } + None => return None, + } +} + +fn linebreak(input: Input<'_>) -> TokenizationResult<'_> { + // dbg!("--->", input.as_str_slice()); + + if let Some((rest, nl_slice)) = input.strip_prefix("\n") { + // dbg!(nl_slice); + let original_str = input.as_original_str(); + + // 1. 计算换行符的字节位置 + let current_offset = original_str.len().saturating_sub(rest.len() + 1); + + match find_prev_char(original_str, current_offset) { + Some(c) => { + // dbg!(c); + if matches!(c, '{' | '(' | '[' | ',' | '>' | '=' | ';' | '\n' | '\\') { + // skip ; and \n because there's already a linebreak parsed. + // > is for -> + // dbg!("=== skip "); + return Err(NOT_FOUND); + } + } + // 读取前面字符失败,跳过 + None => return Err(NOT_FOUND), + } + // dbg!("---> LineBreak "); + + Ok((rest, nl_slice)) + } else if let Some((rest, matched)) = input.strip_prefix(";") { + Ok((rest, matched)) + } else { + Err(NOT_FOUND) + } +} +// 新增续行符解析函数 +fn line_continuation(input: Input<'_>) -> TokenizationResult<'_> { + if let Some((rest, matched)) = input.strip_prefix("\\\n") { + // println!("rest={},matched=", rest, matched); + // // dbg!(rest, matched); + Ok((rest, matched)) + } else { + Err(NOT_FOUND) + } +} +// 新增行继续符识别逻辑 +// fn line_continuation(input: Input<'_>) -> TokenizationResult<'_> { +// if let Some((rest, _)) = input.strip_prefix("\\") { +// // 消费后续所有空白(包括换行符) +// let ws = rest.chars().take_while(char::is_ascii_digit).count(); +// let (rest, _) = rest.split_at(ws); +// Ok((rest, input.split_at(1).1)) +// } else { +// Err(NOT_FOUND) +// } +// } fn comment(input: Input<'_>) -> TokenizationResult<'_> { if input.starts_with('#') { let len = input diff --git a/src/tokens.rs b/src/tokens.rs index 339cbfa9..67ad7928 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -105,6 +105,8 @@ pub enum TokenKind { BooleanLiteral, Symbol, Whitespace, + LineBreak, //add newline + // LineContinuation, //add linecontinue Comment, }