diff --git a/spec/std/json/parser_spec.cr b/spec/std/json/parser_spec.cr index 96cfd52277a2..7b8578f522c7 100644 --- a/spec/std/json/parser_spec.cr +++ b/spec/std/json/parser_spec.cr @@ -2,9 +2,13 @@ require "spec" require "json" private def it_parses(string, expected_value, file = __FILE__, line = __LINE__) - it "parses #{string}", file, line do + it "parses #{string} from String", file, line do JSON.parse(string).raw.should eq(expected_value) end + + it "parses #{string} from IO", file, line do + JSON.parse(IO::Memory.new(string)).raw.should eq(expected_value) + end end private def it_raises_on_parse(string, file = __FILE__, line = __LINE__) @@ -31,6 +35,7 @@ describe JSON::Parser do it_parses "[true]", [true] it_parses "[false]", [false] it_parses %(["hello"]), ["hello"] + it_parses %(["hello", 1]), ["hello", 1] it_parses "[0]", [0] it_parses " [ 0 ] ", [0] diff --git a/spec/std/json/pull_parser_spec.cr b/spec/std/json/pull_parser_spec.cr index 8de524e86c87..28ef4c6cf527 100644 --- a/spec/std/json/pull_parser_spec.cr +++ b/spec/std/json/pull_parser_spec.cr @@ -92,8 +92,8 @@ class JSON::PullParser end end -private def assert_pull_parse(string) - it "parses #{string}" do +private def assert_pull_parse(string, file = __FILE__, line = __LINE__) + it "parses #{string}", file, line do parser = JSON::PullParser.new string parser.assert JSON.parse(string).raw parser.kind.should eq(JSON::PullParser::Kind::EOF) diff --git a/src/json/lexer.cr b/src/json/lexer.cr index 3e61179b9844..067d595aeff3 100644 --- a/src/json/lexer.cr +++ b/src/json/lexer.cr @@ -220,8 +220,16 @@ abstract class JSON::Lexer private def consume_number number_start + # Integer values of up to 18 digits can be computed by doing math: + # no need to store a string value and later parse it. + # For larger numbers, or floats, we store the entire string and later parse it. + @token.int_value = nil + integer = 0_i64 + negative = false + if current_char == '-' append_number_char + negative = true next_char end @@ -238,13 +246,19 @@ abstract class JSON::Lexer unexpected_char else @token.kind = :int + @token.int_value = 0 number_end end when '1'..'9' append_number_char + digits = 1 + integer = (current_char - '0').to_i64 char = next_char while '0' <= char <= '9' append_number_char + digits += 1 + integer &*= 10 + integer &+= char - '0' char = next_char end @@ -255,7 +269,13 @@ abstract class JSON::Lexer consume_exponent else @token.kind = :int - number_end + # Int64::MAX is 9223372036854775807 which has 19 digits. + # With 18 digits we know the number we computed is the one we read. + if digits > 18 + number_end + else + @token.int_value = negative ? -integer : integer + end end else unexpected_char diff --git a/src/json/lexer/io_based.cr b/src/json/lexer/io_based.cr index d3989c54f0a8..f1f5346306db 100644 --- a/src/json/lexer/io_based.cr +++ b/src/json/lexer/io_based.cr @@ -2,17 +2,64 @@ class JSON::Lexer::IOBased < JSON::Lexer def initialize(@io : IO) super() - @current_char = @io.read_char || '\0' + @current_char = @io.read_byte.try(&.chr) || '\0' end private getter current_char private def next_char_no_column_increment - @current_char = @io.read_char || '\0' + @current_char = @io.read_byte.try(&.chr) || '\0' end private def consume_string - consume_string_with_buffer + peek = @io.peek + if !peek || peek.empty? + return consume_string_with_buffer + end + + pos = 0 + + while true + if pos >= peek.size + # We don't have enough data in the peek buffer to create a string: + # default to the slow method + return consume_string_with_buffer + end + + char = peek[pos] + case char + when '\\' + # If we find an escape character, go to the slow method + @column_number += pos + return consume_string_at_escape_char(peek, pos) + when '"' + break + else + if 0 <= current_char.ord < 32 + unexpected_char + else + pos += 1 + end + end + end + + @column_number += pos + @token.string_value = + if @expects_object_key + @string_pool.get(peek.to_unsafe, pos) + else + String.new(peek.to_unsafe, pos) + end + + @io.skip(pos + 1) + next_char + end + + private def consume_string_at_escape_char(peek, pos) + consume_string_with_buffer do + @buffer.write peek[0, pos] + @io.skip(pos) + end end private def number_start diff --git a/src/json/lexer/string_based.cr b/src/json/lexer/string_based.cr index 5696bc6f78b2..d8b3b64f1940 100644 --- a/src/json/lexer/string_based.cr +++ b/src/json/lexer/string_based.cr @@ -1,8 +1,9 @@ # :nodoc: class JSON::Lexer::StringBased < JSON::Lexer - def initialize(string) + def initialize(string : String) super() - @reader = Char::Reader.new(string) + @string = string + @pos = 0 @number_start = 0 end @@ -33,7 +34,7 @@ class JSON::Lexer::StringBased < JSON::Lexer if @expects_object_key start_pos += 1 end_pos = current_pos - 1 - @token.string_value = @string_pool.get(@reader.string.to_unsafe + start_pos, end_pos - start_pos) + @token.string_value = @string_pool.get(@string.to_unsafe + start_pos, end_pos - start_pos) else @token.string_value = string_range(start_pos + 1, current_pos - 1) end @@ -47,27 +48,30 @@ class JSON::Lexer::StringBased < JSON::Lexer end private def current_pos - @reader.pos + @pos end def string_range(start_pos, end_pos) : String - @reader.string.byte_slice(start_pos, end_pos - start_pos) + @string.byte_slice(start_pos, end_pos - start_pos) end def slice_range(start_pos, end_pos) : Bytes - @reader.string.to_slice[start_pos, end_pos - start_pos] + @string.to_slice[start_pos, end_pos - start_pos] end private def next_char_no_column_increment - char = @reader.next_char - if char == '\0' && @reader.pos != @reader.string.bytesize + @pos += 1 + + char = current_char + if char == '\0' && @pos != @string.bytesize unexpected_char end + char end private def current_char - @reader.current_char + @string.to_unsafe[@pos].chr end private def number_start diff --git a/src/json/token.cr b/src/json/token.cr index 436709aec233..f1862ce676f5 100644 --- a/src/json/token.cr +++ b/src/json/token.cr @@ -19,7 +19,7 @@ class JSON::Token property string_value : String def int_value : Int64 - raw_value.to_i64 + @int_value || raw_value.to_i64 rescue exc : ArgumentError raise ParseException.new(exc.message, line_number, column_number) end @@ -32,7 +32,8 @@ class JSON::Token property line_number : Int32 property column_number : Int32 - property raw_value : String + setter raw_value : String + setter int_value : Int64? def initialize @kind = :EOF @@ -40,6 +41,16 @@ class JSON::Token @column_number = 0 @string_value = "" @raw_value = "" + @int_value = nil + end + + def raw_value + case @kind + when .int? + @int_value.try(&.to_s) || @raw_value + else + @raw_value + end end def to_s(io : IO) : Nil @@ -51,7 +62,11 @@ class JSON::Token when .true? io << "true" when .int? - raw_value.to_s(io) + if int_value = @int_value + int_value.to_s(io) + else + raw_value.to_s(io) + end when .float? raw_value.to_s(io) when .string?