Skip to content

Commit 50565e0

Browse files
committed
Add calclet
1 parent 3d76242 commit 50565e0

File tree

5 files changed

+380
-0
lines changed

5 files changed

+380
-0
lines changed

calclet/Cargo.lock

+117
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

calclet/Cargo.toml

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
[package]
2+
name = "calclet"
3+
version = "0.1.0"
4+
edition = "2021"
5+
6+
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
7+
8+
[dependencies]
9+
anyhow = "1"
10+
regex = "1"
11+
thiserror = "1"

calclet/src/lexer.rs

+205
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,205 @@
1+
use crate::token::Token;
2+
use regex::Regex;
3+
use std::str::FromStr;
4+
use std::sync::OnceLock;
5+
6+
#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
7+
pub enum LexicalError {
8+
#[error("unexpected character: '{0}'")]
9+
UnexpectedCharacter(char),
10+
11+
#[error("unexpected end of file")]
12+
UnexpectedEndOfFile,
13+
14+
#[error("undefined escape sequence: '\\{0}'")]
15+
UndefinedEscapeSequence(char),
16+
}
17+
18+
// Success: Ok(Some((token, next_index)))
19+
// Failure: Err(LexicalError)
20+
// EOF: Ok(None)
21+
type LexResult = Result<Option<(Token, usize)>, LexicalError>;
22+
23+
fn ok(token: Token, bytes_consumed: usize) -> LexResult {
24+
Ok(Some((token, bytes_consumed)))
25+
}
26+
27+
macro_rules! static_regex {
28+
($pattern:expr) => {{
29+
static RE: OnceLock<Regex> = OnceLock::new();
30+
RE.get_or_init(|| Regex::new($pattern).unwrap())
31+
}};
32+
}
33+
34+
// Cuts a single token from `input` and returns `(token, bytes_consumed)`.
35+
fn lex(input: &str) -> LexResult {
36+
let Some(first) = input.chars().next() else {
37+
return Ok(None); // EOF
38+
};
39+
match first {
40+
'!' => {
41+
return if second(input) == Some('=') {
42+
ok(Token::NotEq, 2)
43+
} else {
44+
ok(Token::Exclamation, 1)
45+
}
46+
}
47+
'(' => return ok(Token::LParen, 1),
48+
')' => return ok(Token::RParen, 1),
49+
'*' => return ok(Token::Asterisk, 1),
50+
'+' => return ok(Token::Plus, 1),
51+
'-' => return ok(Token::Minus, 1),
52+
'/' => return ok(Token::Slash, 1),
53+
';' => return ok(Token::Semicolon, 1),
54+
'<' => {
55+
return if second(input) == Some('=') {
56+
ok(Token::LtEq, 2)
57+
} else {
58+
ok(Token::Lt, 1)
59+
}
60+
}
61+
'=' => {
62+
return if second(input) == Some('=') {
63+
ok(Token::EqEq, 2)
64+
} else {
65+
ok(Token::Eq, 1)
66+
}
67+
}
68+
'>' => {
69+
return if second(input) == Some('=') {
70+
ok(Token::GtEq, 2)
71+
} else {
72+
ok(Token::Gt, 1)
73+
}
74+
}
75+
'"' => return lex_string_literal(input),
76+
_ => {} // fallthrough
77+
}
78+
79+
let re_identifier_or_reserved = static_regex!("^[a-zA-Z_][a-zA-Z0-9_]*");
80+
if let Some(m) = re_identifier_or_reserved.find(input) {
81+
let s = m.as_str();
82+
let token = match s {
83+
"let" => Token::Let,
84+
"if" => Token::If,
85+
"then" => Token::Then,
86+
"else" => Token::Else,
87+
_ => Token::Identifier(s.into()),
88+
};
89+
return ok(token, m.end());
90+
}
91+
92+
#[rustfmt::skip]
93+
let re_number = static_regex!(r"(?x)^
94+
(0|[1-9][0-9]*) # integer
95+
([.][0-9]+)? # fraction
96+
([eE][-+]?[0-9]+)? # exponent
97+
");
98+
if let Some(m) = re_number.find(input) {
99+
let n = f64::from_str(m.as_str()).unwrap();
100+
return ok(Token::Number(n), m.end());
101+
}
102+
103+
Err(LexicalError::UnexpectedCharacter(first))
104+
}
105+
106+
fn lex_string_literal(input: &str) -> LexResult {
107+
let mut chars = input.chars();
108+
chars.next(); // skip '"'
109+
110+
let mut string_closed = false;
111+
let mut buffer = String::new();
112+
while let Some(c) = chars.next() {
113+
match c {
114+
'\\' => {
115+
let Some(c2) = chars.next() else {
116+
return Err(LexicalError::UnexpectedEndOfFile);
117+
};
118+
match c2 {
119+
'"' => buffer.push('"'),
120+
'\\' => buffer.push('\\'),
121+
'/' => buffer.push('/'),
122+
'n' => buffer.push('\n'),
123+
'r' => buffer.push('\r'),
124+
't' => buffer.push('\t'),
125+
_ => return Err(LexicalError::UndefinedEscapeSequence(c2)),
126+
}
127+
}
128+
'"' => {
129+
string_closed = true;
130+
break;
131+
}
132+
_ => buffer.push(c),
133+
}
134+
}
135+
if !string_closed {
136+
return Err(LexicalError::UnexpectedEndOfFile);
137+
}
138+
139+
let bytes_consumed = input.len() - chars.as_str().len();
140+
ok(Token::String(buffer), bytes_consumed)
141+
}
142+
143+
// Same as `lex` except that it ignores leading whitespaces and comments.
144+
fn lex_strip(input: &str) -> LexResult {
145+
#[rustfmt::skip]
146+
let re_whitespaces = static_regex!(r"(?x)^
147+
[\t\n\r\ ]* # whitespaces
148+
(
149+
//.*(\n|$) # comment
150+
[\t\n\r\ ]* # whitespaces
151+
)*
152+
");
153+
match re_whitespaces.find(input) {
154+
Some(m) if !m.is_empty() => {
155+
let r = lex(&input[m.end()..]);
156+
match r {
157+
Ok(Some((token, bytes_consumed))) => ok(token, m.end() + bytes_consumed),
158+
_ => r,
159+
}
160+
}
161+
_ => lex(input),
162+
}
163+
}
164+
165+
// Returns the second character of `input` if any.
166+
fn second(input: &str) -> Option<char> {
167+
let mut chars = input.chars();
168+
chars.next();
169+
chars.next()
170+
}
171+
172+
// Custom lexer for lalrpop.
173+
pub struct Lexer<'input> {
174+
input: &'input str,
175+
bytes_consumed: usize,
176+
}
177+
178+
impl<'input> Lexer<'input> {
179+
pub fn new(input: &'input str) -> Self {
180+
Self {
181+
input,
182+
bytes_consumed: 0,
183+
}
184+
}
185+
}
186+
187+
impl<'input> Iterator for Lexer<'input> {
188+
type Item = Result<(usize, Token, usize), LexicalError>;
189+
190+
fn next(&mut self) -> Option<Self::Item> {
191+
match lex_strip(&self.input[self.bytes_consumed..]) {
192+
// Success
193+
Ok(Some((token, bytes_consumed))) => {
194+
let span_start = self.bytes_consumed;
195+
let span_end = self.bytes_consumed + bytes_consumed;
196+
self.bytes_consumed = span_end;
197+
Some(Ok((span_start, token, span_end)))
198+
}
199+
// Failure
200+
Err(e) => Some(Err(e)),
201+
// EOF
202+
Ok(None) => None,
203+
}
204+
}
205+
}

calclet/src/main.rs

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
mod lexer;
2+
mod token;
3+
4+
static SAMPLE_CODE: &str = "// This is a comment
5+
let x = 42;
6+
let y = 3.14;
7+
let z = \"Hello, World!\";
8+
if y >= 0 then
9+
x + y
10+
else
11+
x - y
12+
";
13+
14+
fn main() -> anyhow::Result<()> {
15+
for r in lexer::Lexer::new(SAMPLE_CODE) {
16+
let (span_start, token, span_end) = r?;
17+
println!("[{:3}:{:3}] {:?}", span_start, span_end, token);
18+
}
19+
Ok(())
20+
}

calclet/src/token.rs

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#[derive(Debug, Clone, PartialEq)]
2+
pub enum Token {
3+
Number(f64),
4+
String(String),
5+
Identifier(String),
6+
7+
Let,
8+
If,
9+
Then,
10+
Else,
11+
12+
Exclamation,
13+
LParen,
14+
RParen,
15+
Asterisk,
16+
Plus,
17+
Minus,
18+
Slash,
19+
Semicolon,
20+
Lt,
21+
Eq,
22+
Gt,
23+
LtEq,
24+
EqEq,
25+
NotEq,
26+
GtEq,
27+
}

0 commit comments

Comments
 (0)