From 126eecc28f9f100e286977e4ec65a045d2841640 Mon Sep 17 00:00:00 2001 From: Andreas Rossberg Date: Fri, 22 Mar 2019 12:31:57 +0100 Subject: [PATCH] Fast path for byte --- src/lexer.mll | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/src/lexer.mll b/src/lexer.mll index b8aa9bbb8f5..7f202103d7c 100644 --- a/src/lexer.mll +++ b/src/lexer.mll @@ -23,18 +23,15 @@ let error_nest start lexbuf msg = error lexbuf msg -let utf8 lexbuf s i = - let len = - if s.[!i] < '\x80' then 0 else - if s.[!i] < '\xe0' then 1 else - if s.[!i] < '\xf0' then 2 else 3 - in +let utf8 s i = + let len = if s.[!i] < '\xe0' then 1 else if s.[!i] < '\xf0' then 2 else 3 in i := !i + len; List.hd (Utf8.decode (String.sub s (!i - len) (1 + len))) -let unicode lexbuf s i = +let codepoint lexbuf s i = let u = - if s.[!i] <> '\\' then utf8 lexbuf s i else + if s.[!i] >= '\x80' then utf8 s i else + if s.[!i] <> '\\' then Char.code s.[!i] else match (incr i; s.[!i]) with | 'n' -> Char.code '\n' | 'r' -> Char.code '\r' @@ -55,13 +52,13 @@ let unicode lexbuf s i = in incr i; u let char lexbuf s = - unicode lexbuf s (ref 1) + codepoint lexbuf s (ref 1) let text lexbuf s = let b = Buffer.create (String.length s) in let i = ref 1 in while !i < String.length s - 1 do - let bs = Utf8.encode [unicode lexbuf s i] in + let bs = Utf8.encode [codepoint lexbuf s i] in Buffer.add_substring b bs 0 (String.length bs) done; Buffer.contents b