-
Notifications
You must be signed in to change notification settings - Fork 740
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Antonio Ognio
committed
Sep 15, 2024
1 parent
7ebb8d1
commit c5806d2
Showing
3 changed files
with
263 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,188 @@ | ||
# -*- coding: utf-8 -*- # | ||
# frozen_string_literal: true | ||
|
||
module Rouge | ||
module Lexers | ||
# Lexer for the Gleam programming language (https://gleam.run/) | ||
class Gleam < RegexLexer | ||
title 'Gleam' | ||
desc 'The Gleam programming language (https://gleam.run/)' | ||
tag 'gleam' | ||
filenames '*.gleam' | ||
mimetypes 'text/x-gleam' | ||
|
||
# Character sets | ||
ID = /[a-z_][a-zA-Z0-9_]*/.freeze | ||
TYPE_ID = /[A-Z][a-zA-Z0-9_]*/.freeze | ||
MODULE_METHOD_CALL = %r{([a-z_][a-zA-Z0-9_]*)(\.)([a-zA-Z_][a-zA-Z0-9_]*)}.freeze | ||
WHITESPACE = /\s+/.freeze | ||
NEWLINE = /\n/.freeze | ||
|
||
# Keywords, built-ins, constants | ||
KEYWORDS_LIST = %w[ | ||
as assert case const external fn if import let opaque pub todo try type use | ||
module else panic test | ||
].freeze | ||
|
||
KEYWORDS = %r{\b(?:#{KEYWORDS_LIST.join('|')})\b}.freeze | ||
|
||
BUILTINS_LIST = %w[ | ||
Int Float Bool String List Result Option Iterator | ||
].freeze | ||
|
||
BUILTINS = %r{\b(?:#{BUILTINS_LIST.join('|')})\b}.freeze | ||
|
||
CONSTANTS = %r{\b(?:Nil|Ok|Error|Stop|Continue|True|False)\b}.freeze | ||
|
||
BIT_STRING_KEYWORDS_LIST = %w[ | ||
binary bits bytes int float bit_string bit_array utf8 utf16 utf32 | ||
utf8_codepoint utf16_codepoint utf32_codepoint signed unsigned big little | ||
native unit size | ||
].freeze | ||
|
||
BIT_STRING_KEYWORDS = %r{\b(?:#{BIT_STRING_KEYWORDS_LIST.join('|')})\b}.freeze | ||
|
||
# Operators and punctuation | ||
OPERATORS = %r{>>|<<|\|\||&&|==|!=|<=|>=|->|=>|<-|<>|\|>|[-+\/*%=!<>&|^~]}.freeze | ||
PUNCTUATION = /[()\[\]{}.,:;]/.freeze | ||
|
||
# Numbers | ||
BINARY_NUMBER = /\b0b[01](?:_?[01]+)*\b/.freeze | ||
OCTAL_NUMBER = /\b0o[0-7](?:_?[0-7]+)*\b/.freeze | ||
HEX_NUMBER = /\b0x[0-9a-fA-F](?:_?[0-9a-fA-F]+)*\b/.freeze | ||
FLOAT_NUMBER = /\b\d[\d_]*\.\d[\d_]*(e[+-]?\d[\d_]*)?\b/.freeze | ||
INTEGER_NUMBER = /\b\d[\d_]*\b/.freeze | ||
|
||
# Strings | ||
DOUBLE_QUOTED_STRING = %r{"(\\\\|\\"|[^"])*"}.freeze | ||
SINGLE_QUOTED_STRING = %r{'(\\\\|\\'|[^'])*'}.freeze | ||
ESCAPE_SEQUENCE = %r{\\[nrt\\"'0]}.freeze | ||
|
||
# Comments | ||
LINE_COMMENT = %r{//.*?$}.freeze | ||
|
||
state :root do | ||
mixin :simple_tokens | ||
|
||
# Raw strings (backticks) | ||
rule %r{`}, Str::Backtick, :raw_string | ||
|
||
# Triple-quoted strings | ||
rule %r{"""}, Str::Double, :triple_string | ||
|
||
# Double-quoted strings | ||
rule %r{"}, Str::Double, :string | ||
|
||
# Single-quoted strings (characters) | ||
rule %r{'}, Str::Char, :char | ||
|
||
# Bit arrays | ||
rule %r{<<}, Operator, :bitarray | ||
end | ||
|
||
state :simple_tokens do | ||
# Whitespace and newline | ||
rule WHITESPACE, Text::Whitespace | ||
rule NEWLINE, Text | ||
|
||
# Comments | ||
rule LINE_COMMENT, Comment::Single | ||
|
||
# Keywords, built-ins, constants | ||
rule KEYWORDS, Keyword | ||
rule BUILTINS, Name::Builtin | ||
rule CONSTANTS, Name::Constant | ||
|
||
# Type names (user-defined) | ||
rule %r{\b#{TYPE_ID}\b}, Name::Class | ||
|
||
# Function definitions | ||
rule %r{(\b(?:pub\s+)?fn\b)(\s+)(#{ID}) } do | ||
groups Keyword, Text::Whitespace, Name::Function | ||
end | ||
|
||
# Module and method calls (e.g., list.map) | ||
rule MODULE_METHOD_CALL do | ||
groups Name::Namespace, Punctuation, Name::Function | ||
end | ||
|
||
# Function calls | ||
rule %r{(#{ID})(\s*)(\() } do | ||
groups Name::Function, Text::Whitespace, Punctuation | ||
push :func_call_params | ||
end | ||
|
||
# Module-qualified function calls | ||
rule %r{(#{ID})(\.)(#{ID})(\s*)(\() } do | ||
groups Name::Namespace, Punctuation, Name::Function, Text::Whitespace, Punctuation | ||
push :func_call_params | ||
end | ||
|
||
# Identifiers (variables, fields) | ||
rule %r{\b#{ID}\b}, Name::Variable | ||
|
||
# Discard names (e.g., _var) | ||
rule %r{\b_[a-z][a-zA-Z0-9_]*\b}, Name::Builtin::Pseudo | ||
|
||
# Operators and punctuation | ||
rule OPERATORS, Operator | ||
rule PUNCTUATION, Punctuation | ||
|
||
# Numbers | ||
rule BINARY_NUMBER, Num::Bin | ||
rule OCTAL_NUMBER, Num::Oct | ||
rule HEX_NUMBER, Num::Hex | ||
rule FLOAT_NUMBER, Num::Float | ||
rule INTEGER_NUMBER, Num::Integer | ||
|
||
# Strings and escape sequences | ||
rule ESCAPE_SEQUENCE, Str::Escape | ||
|
||
# Attributes | ||
rule %r{[@]#{ID}}, Name::Decorator | ||
end | ||
|
||
# Function call parameters | ||
state :func_call_params do | ||
rule %r{\)}, Punctuation, :pop! | ||
rule %r{[^)]+}, Text | ||
end | ||
|
||
# Raw strings | ||
state :raw_string do | ||
rule %r{[^`]+}, Str::Backtick | ||
rule %r{`}, Str::Backtick, :pop! | ||
end | ||
|
||
# Triple-quoted strings | ||
state :triple_string do | ||
rule %r{"""}, Str::Double, :pop! | ||
rule %r{[^"]+}, Str::Double | ||
rule %r{"}, Str::Double | ||
end | ||
|
||
# Single-line strings | ||
state :string do | ||
rule %r{[^"\\]+}, Str::Double | ||
rule %r{\\[\\"]}, Str::Escape | ||
rule %r{"}, Str::Double, :pop! | ||
end | ||
|
||
# Character literals | ||
state :char do | ||
rule %r{[^'\\]+}, Str::Char | ||
rule %r{\\[\\']}, Str::Escape | ||
rule %r{'}, Str::Char, :pop! | ||
end | ||
|
||
# Bit arrays | ||
state :bitarray do | ||
rule %r{>>}, Operator, :pop! | ||
rule WHITESPACE, Text::Whitespace | ||
rule BIT_STRING_KEYWORDS, Keyword | ||
rule %r{[^>]+}, Text | ||
end | ||
end | ||
end | ||
end | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
# -*- coding: utf-8 -*- | ||
# frozen_string_literal: true | ||
|
||
describe Rouge::Lexers::Gleam do | ||
let(:subject) { Rouge::Lexers::Gleam.new } | ||
|
||
describe 'guessing' do | ||
include Support::Guessing | ||
|
||
it 'guesses by filename' do | ||
assert_guess(filename: 'example.gleam') | ||
end | ||
|
||
it 'guesses by mimetype' do | ||
assert_guess(mimetype: 'text/x-gleam') | ||
end | ||
end | ||
|
||
describe 'lexing keywords' do | ||
it 'recognizes keywords' do | ||
%w[let fn import pub case of type as if else try opaque assert todo async await].each do |keyword| | ||
expect(subject.lex(keyword).to_a).to include([:keyword, keyword]) | ||
end | ||
end | ||
end | ||
|
||
describe 'lexing built-in types' do | ||
it 'recognizes built-in types' do | ||
%w[Int Float Bool String List Nil Result Option Error Ok].each do |builtin| | ||
expect(subject.lex(builtin).to_a).to include([:keyword_type, builtin]) | ||
end | ||
end | ||
end | ||
|
||
describe 'lexing constants' do | ||
it 'recognizes constants' do | ||
%w[Nil Ok Error Stop Continue True False].each do |constant| | ||
expect(subject.lex(constant).to_a).to include([:keyword_constant, constant]) | ||
end | ||
end | ||
end | ||
|
||
describe 'lexing numbers' do | ||
it 'recognizes integers' do | ||
expect(subject.lex('42').to_a).to include([:num_integer, '42']) | ||
end | ||
|
||
it 'recognizes floating-point numbers' do | ||
expect(subject.lex('3.14').to_a).to include([:num_float, '3.14']) | ||
end | ||
|
||
it 'recognizes hexadecimal numbers' do | ||
expect(subject.lex('0x1A3F').to_a).to include([:num_hex, '0x1A3F']) | ||
end | ||
end | ||
|
||
describe 'lexing strings' do | ||
it 'recognizes double-quoted strings' do | ||
expect(subject.lex('"Hello, Gleam!"').to_a).to include([:str_double, '"Hello, Gleam!"']) | ||
end | ||
end | ||
|
||
describe 'lexing module and method calls' do | ||
it 'recognizes module and method calls' do | ||
expect(subject.lex('list.map').to_a).to include([:name_namespace, 'list'], [:punctuation, '.'], [:name_function, 'map']) | ||
end | ||
end | ||
|
||
describe 'lexing operators' do | ||
it 'recognizes pipeline operator' do | ||
expect(subject.lex('|>').to_a).to include([:operator, '|>']) | ||
end | ||
end | ||
end |