From 1bdd91a3ea3dbc39b16d5d901d8866f0b4762777 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20M=C3=BCller?= Date: Wed, 10 Jul 2024 17:22:02 +0200 Subject: [PATCH] Revert "Optimize JSON parsing a bit (#14366)" This reverts commit 9ef636644100b3ad41ab6094da20dbd506e439c6. --- spec/std/json/parser_spec.cr | 7 +--- spec/std/json/pull_parser_spec.cr | 4 +-- src/json/lexer.cr | 22 +------------ src/json/lexer/io_based.cr | 53 ++----------------------------- src/json/lexer/string_based.cr | 22 ++++++------- src/json/token.cr | 21 ++---------- 6 files changed, 19 insertions(+), 110 deletions(-) diff --git a/spec/std/json/parser_spec.cr b/spec/std/json/parser_spec.cr index 7b8578f522c7..96cfd52277a2 100644 --- a/spec/std/json/parser_spec.cr +++ b/spec/std/json/parser_spec.cr @@ -2,13 +2,9 @@ require "spec" require "json" private def it_parses(string, expected_value, file = __FILE__, line = __LINE__) - it "parses #{string} from String", file, line do + it "parses #{string}", file, line do JSON.parse(string).raw.should eq(expected_value) end - - it "parses #{string} from IO", file, line do - JSON.parse(IO::Memory.new(string)).raw.should eq(expected_value) - end end private def it_raises_on_parse(string, file = __FILE__, line = __LINE__) @@ -35,7 +31,6 @@ describe JSON::Parser do it_parses "[true]", [true] it_parses "[false]", [false] it_parses %(["hello"]), ["hello"] - it_parses %(["hello", 1]), ["hello", 1] it_parses "[0]", [0] it_parses " [ 0 ] ", [0] diff --git a/spec/std/json/pull_parser_spec.cr b/spec/std/json/pull_parser_spec.cr index 28ef4c6cf527..8de524e86c87 100644 --- a/spec/std/json/pull_parser_spec.cr +++ b/spec/std/json/pull_parser_spec.cr @@ -92,8 +92,8 @@ class JSON::PullParser end end -private def assert_pull_parse(string, file = __FILE__, line = __LINE__) - it "parses #{string}", file, line do +private def assert_pull_parse(string) + it "parses #{string}" do parser = JSON::PullParser.new string parser.assert JSON.parse(string).raw parser.kind.should eq(JSON::PullParser::Kind::EOF) diff --git a/src/json/lexer.cr b/src/json/lexer.cr index 067d595aeff3..3e61179b9844 100644 --- a/src/json/lexer.cr +++ b/src/json/lexer.cr @@ -220,16 +220,8 @@ abstract class JSON::Lexer private def consume_number number_start - # Integer values of up to 18 digits can be computed by doing math: - # no need to store a string value and later parse it. - # For larger numbers, or floats, we store the entire string and later parse it. - @token.int_value = nil - integer = 0_i64 - negative = false - if current_char == '-' append_number_char - negative = true next_char end @@ -246,19 +238,13 @@ abstract class JSON::Lexer unexpected_char else @token.kind = :int - @token.int_value = 0 number_end end when '1'..'9' append_number_char - digits = 1 - integer = (current_char - '0').to_i64 char = next_char while '0' <= char <= '9' append_number_char - digits += 1 - integer &*= 10 - integer &+= char - '0' char = next_char end @@ -269,13 +255,7 @@ abstract class JSON::Lexer consume_exponent else @token.kind = :int - # Int64::MAX is 9223372036854775807 which has 19 digits. - # With 18 digits we know the number we computed is the one we read. - if digits > 18 - number_end - else - @token.int_value = negative ? -integer : integer - end + number_end end else unexpected_char diff --git a/src/json/lexer/io_based.cr b/src/json/lexer/io_based.cr index f1f5346306db..d3989c54f0a8 100644 --- a/src/json/lexer/io_based.cr +++ b/src/json/lexer/io_based.cr @@ -2,64 +2,17 @@ class JSON::Lexer::IOBased < JSON::Lexer def initialize(@io : IO) super() - @current_char = @io.read_byte.try(&.chr) || '\0' + @current_char = @io.read_char || '\0' end private getter current_char private def next_char_no_column_increment - @current_char = @io.read_byte.try(&.chr) || '\0' + @current_char = @io.read_char || '\0' end private def consume_string - peek = @io.peek - if !peek || peek.empty? - return consume_string_with_buffer - end - - pos = 0 - - while true - if pos >= peek.size - # We don't have enough data in the peek buffer to create a string: - # default to the slow method - return consume_string_with_buffer - end - - char = peek[pos] - case char - when '\\' - # If we find an escape character, go to the slow method - @column_number += pos - return consume_string_at_escape_char(peek, pos) - when '"' - break - else - if 0 <= current_char.ord < 32 - unexpected_char - else - pos += 1 - end - end - end - - @column_number += pos - @token.string_value = - if @expects_object_key - @string_pool.get(peek.to_unsafe, pos) - else - String.new(peek.to_unsafe, pos) - end - - @io.skip(pos + 1) - next_char - end - - private def consume_string_at_escape_char(peek, pos) - consume_string_with_buffer do - @buffer.write peek[0, pos] - @io.skip(pos) - end + consume_string_with_buffer end private def number_start diff --git a/src/json/lexer/string_based.cr b/src/json/lexer/string_based.cr index d8b3b64f1940..5696bc6f78b2 100644 --- a/src/json/lexer/string_based.cr +++ b/src/json/lexer/string_based.cr @@ -1,9 +1,8 @@ # :nodoc: class JSON::Lexer::StringBased < JSON::Lexer - def initialize(string : String) + def initialize(string) super() - @string = string - @pos = 0 + @reader = Char::Reader.new(string) @number_start = 0 end @@ -34,7 +33,7 @@ class JSON::Lexer::StringBased < JSON::Lexer if @expects_object_key start_pos += 1 end_pos = current_pos - 1 - @token.string_value = @string_pool.get(@string.to_unsafe + start_pos, end_pos - start_pos) + @token.string_value = @string_pool.get(@reader.string.to_unsafe + start_pos, end_pos - start_pos) else @token.string_value = string_range(start_pos + 1, current_pos - 1) end @@ -48,30 +47,27 @@ class JSON::Lexer::StringBased < JSON::Lexer end private def current_pos - @pos + @reader.pos end def string_range(start_pos, end_pos) : String - @string.byte_slice(start_pos, end_pos - start_pos) + @reader.string.byte_slice(start_pos, end_pos - start_pos) end def slice_range(start_pos, end_pos) : Bytes - @string.to_slice[start_pos, end_pos - start_pos] + @reader.string.to_slice[start_pos, end_pos - start_pos] end private def next_char_no_column_increment - @pos += 1 - - char = current_char - if char == '\0' && @pos != @string.bytesize + char = @reader.next_char + if char == '\0' && @reader.pos != @reader.string.bytesize unexpected_char end - char end private def current_char - @string.to_unsafe[@pos].chr + @reader.current_char end private def number_start diff --git a/src/json/token.cr b/src/json/token.cr index f1862ce676f5..436709aec233 100644 --- a/src/json/token.cr +++ b/src/json/token.cr @@ -19,7 +19,7 @@ class JSON::Token property string_value : String def int_value : Int64 - @int_value || raw_value.to_i64 + raw_value.to_i64 rescue exc : ArgumentError raise ParseException.new(exc.message, line_number, column_number) end @@ -32,8 +32,7 @@ class JSON::Token property line_number : Int32 property column_number : Int32 - setter raw_value : String - setter int_value : Int64? + property raw_value : String def initialize @kind = :EOF @@ -41,16 +40,6 @@ class JSON::Token @column_number = 0 @string_value = "" @raw_value = "" - @int_value = nil - end - - def raw_value - case @kind - when .int? - @int_value.try(&.to_s) || @raw_value - else - @raw_value - end end def to_s(io : IO) : Nil @@ -62,11 +51,7 @@ class JSON::Token when .true? io << "true" when .int? - if int_value = @int_value - int_value.to_s(io) - else - raw_value.to_s(io) - end + raw_value.to_s(io) when .float? raw_value.to_s(io) when .string?