Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 24 additions & 7 deletions src/lexer.mll
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,23 @@ let error_nest start lexbuf msg =
lexbuf.Lexing.lex_start_p <- start;
error lexbuf msg

let unicode lexbuf s i =
let classify_utf8_leader lexbuf = Int32.(function
| ch when logand ch (lognot 0b01111111l) = 0b00000000l -> 0
| ch when logand ch (lognot 0b00011111l) = 0b11000000l -> 1
| ch when logand ch (lognot 0b00001111l) = 0b11100000l -> 2
| ch when logand ch (lognot 0b00000111l) = 0b11110000l -> 3
| ch -> error lexbuf (Printf.sprintf "invalid utf-8 character: 0x%x" (Int32.to_int ch)))

let utf8_decoder l lexbuf s i =
let leading = classify_utf8_leader lexbuf (Int32.of_int (Char.code s.[!i]))
in if leading = 0 then Char.code s.[!i]
else match Utf8.decode (String.sub s !i (1 + leading)) with
| [code] -> i := !i + leading; code
| _ -> error lexbuf "can not interpret unicode character"

let unicode lexbuf s i decoder =
let u =
if s.[!i] <> '\\' then Char.code s.[!i] else
if s.[!i] <> '\\' then decoder lexbuf s i else
match (incr i; s.[!i]) with
| 'n' -> Char.code '\n'
| 'r' -> Char.code '\r'
Expand All @@ -44,14 +58,17 @@ let unicode lexbuf s i =
int_of_string ("0x" ^ String.make 1 h ^ String.make 1 s.[!i])
in incr i; u

let char lexbuf s =
unicode lexbuf s (ref 1)
let char lexbuf s = unicode lexbuf s (ref 1) (fun _ _ _ ->
match Utf8.decode s with
| [39; code; 39] -> code (* surrounded by apostrophes *)
| _ -> error lexbuf "can not interpret unicode character")

let text lexbuf s =
let b = Buffer.create (String.length s) in
let l = String.length s in
let b = Buffer.create l in
let i = ref 1 in
while !i < String.length s - 1 do
let bs = Utf8.encode [unicode lexbuf s i] in
while !i < l - 1 do
let bs = Utf8.encode [unicode lexbuf s i (utf8_decoder l)] in
Buffer.add_substring b bs 0 (String.length bs)
done;
Buffer.contents b
Expand Down
1 change: 1 addition & 0 deletions test/run/conversions.as
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ println(word32ToInt 4294967295); // == (-1) // 2**32 - 1

assert(charToWord32 '\u{00}' == (0 : Word32));
assert(charToWord32 '*' == (42 : Word32));
assert(charToWord32 'П' == (1055 : Word32));
assert(charToWord32 '\u{ffff}' == (65535 : Word32)); // 2**16 - 1
assert(charToWord32 '\u{10ffff}' == (0x10FFFF : Word32));

Expand Down
3 changes: 3 additions & 0 deletions test/run/literals.as
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,7 @@ let byte : Word8 = 0xFF : Word8;
let short : Word16 = 0xFFFF : Word16;
let word : Word32 = 0xFFFF_FFFF : Word32;
let u = '\u{a34}';
let gu = '🎸';
let ru = "Приветствую, мир!\n";
let s = "a \t\22\00bb\'bc\\de \74xx\\x\"\u{000_234_42}\n";
let emojis = "🙈🎸😋";