Skip to content

Commit bc04936

Browse files
committed
ls: Implement semantic tokens
1 parent 635d7b3 commit bc04936

File tree

6 files changed

+238
-22
lines changed

6 files changed

+238
-22
lines changed

Cargo.lock

+2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

omniwsa-ls/Cargo.toml

+2
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@ keywords = ["whitespace", "language-server"]
1010
categories = ["compilers"]
1111

1212
[dependencies]
13+
bstr = "1.11"
1314
lsp-server = "0.7"
1415
lsp-types = "0.97"
16+
omniwsa = { path = ".." }
1517
serde = "1.0"
1618
serde_json = "1.0"

omniwsa-ls/src/main.rs

+193-15
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,32 @@
11
use std::{
22
error::Error,
3+
fs,
34
io::{self, ErrorKind},
45
process::exit,
56
};
67

8+
use bstr::ByteSlice;
79
use lsp_server::{Connection, Message, Request, Response};
810
use lsp_types::{
911
request::{Request as _, SemanticTokensFullRequest},
1012
InitializeParams, SemanticToken, SemanticTokenModifier, SemanticTokenType, SemanticTokens,
1113
SemanticTokensFullOptions, SemanticTokensLegend, SemanticTokensOptions, SemanticTokensParams,
1214
SemanticTokensResult, SemanticTokensServerCapabilities, ServerCapabilities,
1315
};
16+
use omniwsa::{
17+
dialects::{Dialect, Palaiologos},
18+
tokens::{
19+
comment::BlockCommentError,
20+
string::{CharError, QuotedError, StringError},
21+
Token,
22+
},
23+
};
1424
use serde_json::{from_value as from_json, to_value as to_json};
1525

26+
// TODO:
27+
// - Implement text document API, instead of reading from disk.
28+
// - Record spans in tokens.
29+
1630
fn main() {
1731
if let Err(err) = do_main() {
1832
eprintln!("Error: {err}");
@@ -39,11 +53,13 @@ fn server_capabilities() -> ServerCapabilities {
3953
work_done_progress_options: Default::default(),
4054
legend: SemanticTokensLegend {
4155
token_types: vec![
56+
SemanticTokenType::VARIABLE,
4257
SemanticTokenType::FUNCTION,
4358
SemanticTokenType::KEYWORD,
4459
SemanticTokenType::COMMENT,
4560
SemanticTokenType::STRING,
4661
SemanticTokenType::NUMBER,
62+
SemanticTokenType::OPERATOR,
4763
],
4864
token_modifiers: vec![
4965
SemanticTokenModifier::DECLARATION,
@@ -58,40 +74,119 @@ fn server_capabilities() -> ServerCapabilities {
5874
}
5975
}
6076

77+
// The order corresponds to the index in SemanticTokensLegend::token_types.
78+
#[derive(Clone, Copy, Debug)]
79+
enum TokenType {
80+
Variable,
81+
Function,
82+
Keyword,
83+
Comment,
84+
String,
85+
Number,
86+
Operator,
87+
}
88+
89+
// The order corresponds to the index in SemanticTokensLegend::token_modifiers.
90+
#[derive(Clone, Copy, Debug)]
91+
enum TokenModifier {
92+
Declaration,
93+
Definition,
94+
}
95+
6196
fn main_loop(
6297
connection: Connection,
6398
_initialize_params: InitializeParams,
6499
) -> Result<(), Box<dyn Error + Send + Sync>> {
65100
for msg in &connection.receiver {
66101
match msg {
67102
Message::Request(req) => {
68-
eprintln!(
69-
"Received request {} #{}: {:?}",
70-
req.method, req.id, req.params,
71-
);
103+
eprintln!("Receive {req:?}");
72104
if connection.handle_shutdown(&req)? {
73105
return Ok(());
74106
}
75107
let Request { id, method, params } = req;
76108
match &*method {
77109
SemanticTokensFullRequest::METHOD => {
78-
let _params: SemanticTokensParams = from_json(params)?;
79-
let tokens = vec![SemanticToken {
80-
delta_line: 1,
81-
delta_start: 2,
82-
length: 3,
83-
token_type: 0,
84-
token_modifiers_bitset: 0,
85-
}];
110+
// https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#textDocument_semanticTokens
111+
// https://code.visualstudio.com/api/language-extensions/semantic-highlight-guide
112+
// https://github.com/rust-lang/rust-analyzer/blob/master/crates/rust-analyzer/src/lsp/semantic_tokens.rs
113+
// https://github.com/rust-lang/rust-analyzer/blob/c4e040ea8dc4651569514bd8a8d8b293b49390a6/crates/rust-analyzer/src/lsp/capabilities.rs#L123
114+
115+
let params: SemanticTokensParams = from_json(params)?;
116+
// TODO: Implement text document API, instead of reading
117+
// from disk.
118+
let path = params.text_document.uri.as_str();
119+
let path = path.strip_prefix("file://").unwrap_or(path);
120+
let src = fs::read(path)?;
121+
let tokens = Palaiologos::new().lex(&src);
122+
123+
let mut tokens_out = Vec::with_capacity(tokens.len());
124+
let (mut curr_line, mut curr_col) = (0, 0);
125+
let (mut prev_line, mut prev_col) = (0, 0);
126+
for tok in &tokens {
127+
eprintln!("{tok:?}");
128+
let ungrouped = tok.ungroup();
129+
let ty = match ungrouped {
130+
Token::Mnemonic(_) => Some(TokenType::Keyword),
131+
Token::Integer(_) => Some(TokenType::Number),
132+
Token::String(_) | Token::Char(_) => Some(TokenType::String),
133+
Token::Variable(_) => Some(TokenType::Variable),
134+
Token::Label(_) => Some(TokenType::Function),
135+
Token::LabelColon(_) => Some(TokenType::Operator),
136+
Token::Space(_) | Token::LineTerm(_) | Token::Eof(_) => None,
137+
Token::InstSep(_) | Token::ArgSep(_) => Some(TokenType::Operator),
138+
Token::LineComment(_) | Token::BlockComment(_) => {
139+
Some(TokenType::Comment)
140+
}
141+
Token::Word(_) => Some(TokenType::Variable),
142+
Token::Quoted(_) | Token::Spliced(_) => panic!("not ungrouped"),
143+
Token::Error(_) => None,
144+
Token::Placeholder => panic!("placeholder"),
145+
};
146+
let modifiers = match ungrouped {
147+
Token::Label(_) => TokenModifier::Declaration as _,
148+
Token::Variable(_) => TokenModifier::Definition as _,
149+
_ => 0,
150+
};
151+
let (len, hlen, vlen) = token_len(tok);
152+
let (mut next_line, mut next_col) = (curr_line, curr_col);
153+
if vlen != 0 {
154+
next_col = 0;
155+
next_line += vlen;
156+
}
157+
next_col += hlen;
158+
if let Some(ty) = ty {
159+
let token_out = SemanticToken {
160+
delta_line: (curr_line - prev_line) as _,
161+
delta_start: if curr_line == prev_line {
162+
(curr_col - prev_col) as _
163+
} else {
164+
curr_col as _
165+
},
166+
length: len as _,
167+
token_type: ty as _,
168+
token_modifiers_bitset: modifiers,
169+
};
170+
eprintln!("=> {token_out:?}");
171+
tokens_out.push(token_out);
172+
prev_line = curr_line;
173+
prev_col = curr_col;
174+
}
175+
curr_line = next_line;
176+
curr_col = next_col;
177+
}
178+
86179
let result = Some(SemanticTokensResult::Tokens(SemanticTokens {
87180
result_id: None,
88-
data: tokens,
181+
data: tokens_out,
89182
}));
90183
let resp = Response {
91184
id,
92185
result: Some(to_json(&result)?),
93186
error: None,
94187
};
188+
eprintln!("Send {resp:?}");
189+
eprintln!();
95190
connection.sender.send(Message::Response(resp))?;
96191
}
97192
_ => {
@@ -103,12 +198,95 @@ fn main_loop(
103198
}
104199
}
105200
Message::Response(resp) => {
106-
eprintln!("Received response: {resp:?}");
201+
eprintln!("Receive {resp:?}");
107202
}
108203
Message::Notification(notif) => {
109-
eprintln!("Received notification: {notif:?}");
204+
eprintln!("Receive {notif:?}");
110205
}
111206
}
112207
}
113208
Ok(())
114209
}
210+
211+
/// Computes the length of the token. Returns the linear, horizontal, vertical
212+
/// lengths in chars.
213+
// TODO: Record spans in tokens instead of this hack.
214+
fn token_len(tok: &Token<'_>) -> (usize, usize, usize) {
215+
let (text, len_before, len_after): (&[u8], usize, usize) = match tok {
216+
Token::Mnemonic(tok) => (&tok.mnemonic, 0, 0),
217+
Token::Integer(tok) => (&tok.literal, 0, 0),
218+
Token::String(tok) => (
219+
&tok.literal,
220+
tok.quotes.quote().len(),
221+
if tok.errors.contains(StringError::Unterminated) {
222+
0
223+
} else {
224+
tok.quotes.quote().len()
225+
},
226+
),
227+
Token::Char(tok) => (
228+
&tok.literal,
229+
tok.quotes.quote().len(),
230+
if tok.errors.contains(CharError::Unterminated) {
231+
0
232+
} else {
233+
tok.quotes.quote().len()
234+
},
235+
),
236+
Token::Variable(tok) => (&tok.ident, tok.style.sigil().len(), 0),
237+
Token::Label(tok) => (&tok.label, tok.style.sigil().len(), 0),
238+
Token::LabelColon(_) => (b":", 0, 0),
239+
Token::Space(tok) => (&tok.space, 0, 0),
240+
Token::LineTerm(tok) => (tok.style.as_str().as_bytes(), 0, 0),
241+
Token::Eof(_) => (b"", 0, 0),
242+
Token::InstSep(tok) => (tok.style.as_str().as_bytes(), 0, 0),
243+
Token::ArgSep(tok) => (tok.style.as_str().as_bytes(), 0, 0),
244+
Token::LineComment(tok) => (tok.text, tok.style.prefix().len(), 0),
245+
Token::BlockComment(tok) => (
246+
tok.text,
247+
tok.style.open().len(),
248+
if tok.errors.contains(BlockCommentError::Unterminated) {
249+
0
250+
} else {
251+
tok.style.close().len()
252+
},
253+
),
254+
Token::Word(tok) => (&tok.word, 0, 0),
255+
Token::Quoted(tok) => {
256+
let (len, hlen, vlen) = token_len(&tok.inner);
257+
let mut quotes = 0;
258+
if vlen != 0 {
259+
quotes += tok.quotes.quote().len();
260+
}
261+
if !tok.errors.contains(QuotedError::Unterminated) {
262+
quotes += tok.quotes.quote().len();
263+
}
264+
return (len + quotes, hlen + quotes, vlen);
265+
}
266+
Token::Spliced(tok) => {
267+
let (mut spliced_len, mut spliced_hlen, mut spliced_vlen) = (0, 0, 0);
268+
for tok in &tok.tokens {
269+
let (len, hlen, vlen) = token_len(tok);
270+
if vlen != 0 {
271+
spliced_hlen = 0;
272+
spliced_vlen += vlen;
273+
}
274+
spliced_len += len;
275+
spliced_hlen += hlen;
276+
}
277+
return (spliced_len, spliced_hlen, spliced_vlen);
278+
}
279+
Token::Error(tok) => (&tok.text, 0, 0),
280+
Token::Placeholder => panic!("placeholder"),
281+
};
282+
let (len, hlen, vlen) =
283+
text.chars()
284+
.fold((len_before, len_before, 0), |(len, hlen, vlen), ch| {
285+
if ch == '\n' {
286+
(len + 1, 0, vlen + 1)
287+
} else {
288+
(len + 1, hlen + 1, vlen)
289+
}
290+
});
291+
(len + len_after, hlen + len_after, vlen)
292+
}

src/dialects/burghard/dialect.rs

+16-2
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,14 @@
22
33
use crate::{
44
dialects::{
5-
burghard::{option::OptionNester, parse::Parser},
5+
burghard::{lex::Lexer, option::OptionNester, parse::Parser},
66
define_mnemonics,
77
dialect::DialectState,
88
Dialect,
99
},
10+
lex::Lex,
1011
syntax::Cst,
11-
tokens::integer::IntegerSyntax,
12+
tokens::{integer::IntegerSyntax, Token},
1213
};
1314

1415
// TODO:
@@ -66,6 +67,19 @@ impl Dialect for Burghard {
6667
OptionNester::new().nest(&mut Parser::new(src, dialect))
6768
}
6869

70+
fn lex<'s>(src: &'s [u8], _dialect: &DialectState<Self>) -> Vec<Token<'s>> {
71+
let mut lex = Lexer::new(src);
72+
let mut toks = Vec::new();
73+
loop {
74+
let tok = lex.next_token();
75+
if let Token::Eof(_) = tok {
76+
break;
77+
}
78+
toks.push(tok);
79+
}
80+
toks
81+
}
82+
6983
fn make_integers() -> IntegerSyntax {
7084
IntegerSyntax::haskell()
7185
}

src/dialects/dialect.rs

+1-3
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,7 @@ pub trait Dialect {
3434
fn parse<'s>(src: &'s [u8], dialect: &DialectState<Self>) -> Cst<'s>;
3535

3636
/// Lexes a Whitespace assembly program in the dialect.
37-
fn lex<'s>(_src: &'s [u8], _dialect: &DialectState<Self>) -> Vec<Token<'s>> {
38-
unimplemented!();
39-
}
37+
fn lex<'s>(src: &'s [u8], dialect: &DialectState<Self>) -> Vec<Token<'s>>;
4038

4139
/// Constructs an integer syntax description for this dialect.
4240
fn make_integers() -> IntegerSyntax;

src/dialects/palaiologos/dialect.rs

+24-2
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,18 @@
11
//! Parsing for the Palaiologos Whitespace assembly dialect.
22
33
use crate::{
4-
dialects::{define_mnemonics, dialect::DialectState, palaiologos::parse::Parser, Dialect},
4+
dialects::{
5+
define_mnemonics,
6+
dialect::DialectState,
7+
palaiologos::{lex::Lexer, parse::Parser},
8+
Dialect,
9+
},
10+
lex::Lex,
511
syntax::Cst,
6-
tokens::integer::{BaseStyle, DigitSep, Integer, IntegerSyntax, SignStyle},
12+
tokens::{
13+
integer::{BaseStyle, DigitSep, Integer, IntegerSyntax, SignStyle},
14+
Token,
15+
},
716
};
817

918
/// Palaiologos Whitespace assembly dialect.
@@ -55,6 +64,19 @@ impl Dialect for Palaiologos {
5564
Parser::new(src, dialect).parse()
5665
}
5766

67+
fn lex<'s>(src: &'s [u8], dialect: &DialectState<Self>) -> Vec<Token<'s>> {
68+
let mut lex = Lexer::new(src, dialect);
69+
let mut toks = Vec::new();
70+
loop {
71+
let tok = lex.next_token();
72+
if let Token::Eof(_) = tok {
73+
break;
74+
}
75+
toks.push(tok);
76+
}
77+
toks
78+
}
79+
5880
/// Constructs an integer syntax description for this dialect.
5981
///
6082
/// # Syntax

0 commit comments

Comments
 (0)