Skip to content

Commit

Permalink
Add lingua franca language (XAMPPRocky#993)
Browse files Browse the repository at this point in the history
  • Loading branch information
oowekyala authored and ErikSchierboom committed Jun 7, 2024
1 parent b7624ab commit fde0238
Show file tree
Hide file tree
Showing 7 changed files with 188 additions and 24 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,7 @@ Kotlin
Lean
Less
LFE
Lingua Franca
LinkerScript
Liquid
Lisp
Expand Down
9 changes: 9 additions & 0 deletions languages.json
Original file line number Diff line number Diff line change
Expand Up @@ -878,6 +878,15 @@
"extensions": ["liquid"],
"multi_line_comments": [["<!--", "-->"], ["{% comment %}", "{% endcomment %}"]]
},
"LinguaFranca": {
"name": "Lingua Franca",
"line_comment": ["//", "#"],
"important_syntax": ["{="],
"multi_line_comments": [["/*", "*/"]],
"quotes": [["\\\"", "\\\""]],
"nested": true,
"extensions": ["lf"]
},
"LinkerScript": {
"name": "LD Script",
"multi_line_comments": [["/*", "*/"]],
Expand Down
36 changes: 24 additions & 12 deletions src/language/embedding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ pub static END_TEMPLATE: Lazy<Regex> = Lazy::new(|| Regex::new(r#"</template>"#)
pub static STARTING_MARKDOWN_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r#"```\S+\s"#).unwrap());
pub static ENDING_MARKDOWN_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r#"```\s?"#).unwrap());

pub static STARTING_LF_BLOCK_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r#"\{="#).unwrap());
pub static ENDING_LF_BLOCK_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r#"=}"#).unwrap());

/// A memory of a regex matched.
/// The values provided by `Self::start` and `Self::end` are in the same space as the
/// start value supplied to `RegexCache::build`
Expand Down Expand Up @@ -61,7 +64,8 @@ pub(crate) struct RegexCache<'a> {
/// as well as the actual matches
pub(crate) enum RegexFamily<'a> {
HtmlLike(HtmlLike<'a>),
Markdown(Markdown<'a>),
LinguaFranca(SimpleCapture<'a>),
Markdown(SimpleCapture<'a>),
Rust,
}

Expand All @@ -71,10 +75,11 @@ pub(crate) struct HtmlLike<'a> {
start_template: Option<Box<[Capture<'a>]>>,
}

pub(crate) struct Markdown<'a> {
pub(crate) struct SimpleCapture<'a> {
starts: Option<Box<[Capture<'a>]>>,
}


impl<'a> HtmlLike<'a> {
pub fn start_script_in_range(
&'a self,
Expand All @@ -101,10 +106,22 @@ impl<'a> HtmlLike<'a> {
}
}

impl<'a> Markdown<'a> {
impl<'a> SimpleCapture<'a> {
pub fn starts_in_range(&'a self, start: usize, end: usize) -> Option<&Capture<'a>> {
filter_range(self.starts.as_ref()?, start, end).and_then(|mut it| it.next())
}

fn make_capture(regex: &Regex, lines: &'a [u8], start: usize, end: usize) -> Option<SimpleCapture<'a>> {
let capture = SimpleCapture {
starts: save_captures(regex, lines, start, end),
};

if capture.starts.is_some() {
Some(capture)
} else {
None
}
}
}

fn filter_range<'a>(
Expand Down Expand Up @@ -139,17 +156,12 @@ impl<'a> RegexCache<'a> {
pub(crate) fn build(lang: LanguageType, lines: &'a [u8], start: usize, end: usize) -> Self {
let inner = match lang {
LanguageType::Markdown | LanguageType::UnrealDeveloperMarkdown => {
let markdown = Markdown {
starts: save_captures(&STARTING_MARKDOWN_REGEX, lines, start, end),
};

if markdown.starts.is_some() {
Some(RegexFamily::Markdown(markdown))
} else {
None
}
SimpleCapture::make_capture(&STARTING_MARKDOWN_REGEX, lines, start, end).map(RegexFamily::Markdown)
}
LanguageType::Rust => Some(RegexFamily::Rust),
LanguageType::LinguaFranca => {
SimpleCapture::make_capture(&STARTING_LF_BLOCK_REGEX, lines, start, end).map(RegexFamily::LinguaFranca)
},
LanguageType::Html
| LanguageType::RubyHtml
| LanguageType::Svelte
Expand Down
70 changes: 59 additions & 11 deletions src/language/language_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use crate::{

use encoding_rs_io::DecodeReaderBytesBuilder;
use grep_searcher::{LineIter, LineStep};
use once_cell::sync::Lazy;
use rayon::prelude::*;
use serde::Serialize;

Expand Down Expand Up @@ -65,13 +66,19 @@ impl LanguageType {
pub fn parse_from_slice<A: AsRef<[u8]>>(self, text: A, config: &Config) -> CodeStats {
let text = text.as_ref();

if self == LanguageType::Jupyter {
if self == Jupyter {
return self
.parse_jupyter(text.as_ref(), config)
.unwrap_or_else(CodeStats::new);
}

let syntax = SyntaxCounter::new(self);
let syntax = {
let mut syntax_mut = SyntaxCounter::new(self);
if self == LinguaFranca {
syntax_mut.lf_embedded_language = self.find_lf_target_language(text);
}
syntax_mut
};

if let Some(end) = syntax
.shared
Expand Down Expand Up @@ -178,6 +185,10 @@ impl LanguageType {
// Add all the markdown blobs.
*stats.blobs.entry(LanguageType::Markdown).or_default() += blob;
}
LanguageContext::LinguaFranca => {
let child_lang = syntax.get_lf_target_language();
*stats.blobs.entry(child_lang).or_default() += blob;
}
LanguageContext::Html { language } => {
stats.code += 1;
// Add all the markdown blobs.
Expand Down Expand Up @@ -275,6 +286,28 @@ impl LanguageType {

Some(jupyter_stats)
}

/// The embedded language in LF is declared in a construct that looks like this: `target C;`, `target Python`.
/// This is the first thing in the file (although there may be comments before).
fn find_lf_target_language(&self, bytes: &[u8]) -> Option<LanguageType> {
use regex::bytes::Regex;
static LF_TARGET_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r#"(?m)\btarget\s+(\w+)\s*($|;|\{)"#).unwrap());
LF_TARGET_REGEX.captures(bytes)
.and_then(|captures| {
let name = captures.get(1).unwrap().as_bytes();
if name == b"CCpp" {
// this is a special alias for the C target in LF
Some(C)
} else {
let name_str = &String::from_utf8_lossy(name);
let by_name = LanguageType::from_name(&name_str);
if by_name.is_none() {
trace!("LF target not recognized: {}", name_str);
}
by_name
}
})
}
}

#[cfg(test)]
Expand All @@ -288,22 +321,37 @@ mod tests {
assert!(LanguageType::Rust.allows_nested());
}


fn assert_stats(stats: &CodeStats, blanks: usize, code: usize, comments: usize) {
assert_eq!(stats.blanks, blanks, "expected {} blank lines", blanks);
assert_eq!(stats.code, code, "expected {} code lines", code);
assert_eq!(stats.comments, comments, "expected {} comment lines", comments);
}

#[test]
fn jupyter_notebook_has_correct_totals() {
let sample_notebook =
fs::read_to_string(Path::new("tests").join("data").join("jupyter.ipynb")).unwrap();

let CodeStats {
blanks,
code,
comments,
..
} = LanguageType::Jupyter
let stats = LanguageType::Jupyter
.parse_jupyter(sample_notebook.as_bytes(), &Config::default())
.unwrap();

assert_eq!(blanks, 115);
assert_eq!(code, 528);
assert_eq!(comments, 333);
assert_stats(&stats, 115, 528, 333);
}

#[test]
fn lf_embedded_language_is_counted() {
let file_text =
fs::read_to_string(Path::new("tests").join("data").join("linguafranca.lf")).unwrap();

let stats = LinguaFranca
.parse_from_str(file_text, &Config::default());

assert_stats(&stats, 9, 11, 8);

assert_eq!(stats.blobs.len(), 1, "num embedded languages");
let rust_stats = stats.blobs.get(&Rust).expect("should have a Rust entry");
assert_stats(rust_stats, 2, 5, 1);
}
}
25 changes: 25 additions & 0 deletions src/language/language_type.tera.rs
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,31 @@ impl LanguageType {
}
}

/// Get language from its name.
///
/// ```no_run
/// use tokei::LanguageType;
///
/// let rust = LanguageType::from_name("Rust");
///
/// assert_eq!(rust, Some(LanguageType::Rust));
/// ```
#[must_use]
pub fn from_name(name: &str) -> Option<Self> {
match name {
{% for key, value in languages -%}
{% if value.name and value.name != key -%}
| "{{value.name}}"
{% endif -%}
| "{{key}}" => Some({{key}}),
{% endfor %}
unknown => {
warn!("Unknown language name: {}", unknown);
None
},
}
}

/// Get language from its MIME type if available.
///
/// ```no_run
Expand Down
35 changes: 34 additions & 1 deletion src/language/syntax.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@ use log::Level::Trace;
use once_cell::sync::Lazy;

use super::embedding::{
RegexCache, RegexFamily, ENDING_MARKDOWN_REGEX, END_SCRIPT, END_STYLE, END_TEMPLATE,
RegexCache, RegexFamily, ENDING_MARKDOWN_REGEX, ENDING_LF_BLOCK_REGEX, END_SCRIPT, END_STYLE, END_TEMPLATE
};
use crate::{stats::CodeStats, utils::ext::SliceExt, Config, LanguageType};
use crate::LanguageType::LinguaFranca;

/// Tracks the syntax of the language as well as the current state in the file.
/// Current has what could be consider three types of mode.
Expand All @@ -29,6 +30,7 @@ pub(crate) struct SyntaxCounter {
pub(crate) quote_is_doc_quote: bool,
pub(crate) stack: Vec<&'static str>,
pub(crate) quote_is_verbatim: bool,
pub(crate) lf_embedded_language: Option<LanguageType>
}

#[derive(Clone, Debug)]
Expand All @@ -53,6 +55,7 @@ pub(crate) enum LanguageContext {
Html {
language: LanguageType,
},
LinguaFranca,
Markdown {
balanced: bool,
language: LanguageType,
Expand Down Expand Up @@ -133,6 +136,7 @@ impl SyntaxCounter {
quote_is_doc_quote: false,
quote_is_verbatim: false,
stack: Vec::with_capacity(1),
lf_embedded_language: None,
quote: None,
}
}
Expand All @@ -152,6 +156,12 @@ impl SyntaxCounter {
!self.stack.is_empty()
}

pub(crate) fn get_lf_target_language(&self) -> LanguageType {
// in case the target declaration was not found, default it to that language
const DEFAULT_LANG: LanguageType = LinguaFranca;
self.lf_embedded_language.unwrap_or(DEFAULT_LANG)
}

#[inline]
pub(crate) fn parse_line_comment(&self, window: &[u8]) -> bool {
if self.quote.is_some() || !self.stack.is_empty() {
Expand Down Expand Up @@ -434,6 +444,29 @@ impl SyntaxCounter {
doc_block,
))
}
RegexFamily::LinguaFranca(lf) => {
let opening_fence = lf.starts_in_range(start, end)?;
let start_of_code = opening_fence.end();
let closing_fence = ENDING_LF_BLOCK_REGEX.find(&lines[start_of_code..]);
let end_of_code = closing_fence
.map_or_else(|| lines.len(),
|fence| start_of_code + fence.start());

let block_contents = &lines[start_of_code..end_of_code];
trace!(
"LF block: {:?}",
String::from_utf8_lossy(block_contents)
);
let stats =
self.get_lf_target_language().parse_from_slice(block_contents.trim_first_and_last_line_of_whitespace(), config);
trace!("-> stats: {:?}", stats);

Some(FileContext::new(
LanguageContext::LinguaFranca,
end_of_code,
stats,
))
}
RegexFamily::HtmlLike(html) => {
if let Some(mut captures) = html.start_script_in_range(start, end) {
let start_of_code = captures.next().unwrap().end();
Expand Down
36 changes: 36 additions & 0 deletions tests/data/linguafranca.lf
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// 36 lines 16 code 9 comments 11 blanks

target Rust;

// A C style comment
import KeyboardEvents from "KeyboardEvents.lf";

/* A block comment */
# a python like comment

main reactor Snake(grid_side: usize(32),
food_limit: u32(2)) {

// counts as 2 lines of Rust code and one blank
preamble {=
use crate::snakes::*;

use rand::prelude::*;
=}

/// rust doc comment
keyboard = new KeyboardEvents();

// T
state snake: CircularSnake ({= CircularSnake::new(grid_side) =});
state grid: SnakeGrid ({= SnakeGrid::new(grid_side, &snake) =});
state food_on_grid: u32(0);


// 1 line of rust code
reaction(shutdown) {=
// comment in Rust

println!("New high score: {}", self.snake.len());
=}
}

0 comments on commit fde0238

Please sign in to comment.