Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion spec/std/json/parser_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,13 @@ require "spec"
require "json"

private def it_parses(string, expected_value, file = __FILE__, line = __LINE__)
it "parses #{string}", file, line do
it "parses #{string} from String", file, line do
JSON.parse(string).raw.should eq(expected_value)
end

it "parses #{string} from IO", file, line do
JSON.parse(IO::Memory.new(string)).raw.should eq(expected_value)
end
end

private def it_raises_on_parse(string, file = __FILE__, line = __LINE__)
Expand All @@ -31,6 +35,7 @@ describe JSON::Parser do
it_parses "[true]", [true]
it_parses "[false]", [false]
it_parses %(["hello"]), ["hello"]
it_parses %(["hello", 1]), ["hello", 1]
it_parses "[0]", [0]
it_parses " [ 0 ] ", [0]

Expand Down
4 changes: 2 additions & 2 deletions spec/std/json/pull_parser_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@ class JSON::PullParser
end
end

private def assert_pull_parse(string)
it "parses #{string}" do
private def assert_pull_parse(string, file = __FILE__, line = __LINE__)
it "parses #{string}", file, line do
parser = JSON::PullParser.new string
parser.assert JSON.parse(string).raw
parser.kind.should eq(JSON::PullParser::Kind::EOF)
Expand Down
22 changes: 21 additions & 1 deletion src/json/lexer.cr
Original file line number Diff line number Diff line change
Expand Up @@ -220,8 +220,16 @@ abstract class JSON::Lexer
private def consume_number
number_start

# Integer values of up to 18 digits can be computed by doing math:
# no need to store a string value and later parse it.
# For larger numbers, or floats, we store the entire string and later parse it.
@token.int_value = nil
integer = 0_i64
negative = false

if current_char == '-'
append_number_char
negative = true
next_char
end

Expand All @@ -238,13 +246,19 @@ abstract class JSON::Lexer
unexpected_char
else
@token.kind = :int
@token.int_value = 0
number_end
end
when '1'..'9'
append_number_char
digits = 1
integer = (current_char - '0').to_i64
char = next_char
while '0' <= char <= '9'
append_number_char
digits += 1
integer &*= 10
integer &+= char - '0'
char = next_char
end

Expand All @@ -255,7 +269,13 @@ abstract class JSON::Lexer
consume_exponent
else
@token.kind = :int
number_end
# Int64::MAX is 9223372036854775807 which has 19 digits.
# With 18 digits we know the number we computed is the one we read.
if digits > 18
number_end
else
@token.int_value = negative ? -integer : integer
end
end
else
unexpected_char
Expand Down
53 changes: 50 additions & 3 deletions src/json/lexer/io_based.cr
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,64 @@
class JSON::Lexer::IOBased < JSON::Lexer
def initialize(@io : IO)
super()
@current_char = @io.read_char || '\0'
@current_char = @io.read_byte.try(&.chr) || '\0'
end

private getter current_char

private def next_char_no_column_increment
@current_char = @io.read_char || '\0'
@current_char = @io.read_byte.try(&.chr) || '\0'
end

private def consume_string
consume_string_with_buffer
peek = @io.peek
if !peek || peek.empty?
return consume_string_with_buffer
end

pos = 0

while true
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thought: I'm wondering if the strategy here could be based on byte search (peek.index('"')) instead? The implementation for that can be more efficient than iterating over each byte. Slice#index on a byte buffer is backed by memchr, so it depends on how much the libc implementation is optimized.

We still need to sanity check for escape sequences and unallowed characters in the potential string, though. So that would reduce effectiveness. I think it could overall be better that way, but I'm not sure. Just wanted to leave this thought here. It should be good to take the current implementation for now.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might be true. When I needed to find the end of a JSON string in a Bytes, I came up with something like this:

def json_bytes_end_of_string_index( haystack : Bytes, offset : Int32 = 0 ) : Int32?
  index = haystack.index( '"'.ord.to_u8, offset )  # memchr()
  return nil  if ! index
  return index  unless haystack[ index - 1 ] == '\\'.ord.to_u8
  # Fall back to the more complete bytewise implementation below.
  # ...
end

I did not check if it makes sense to use strpbrk() to find the next "interesting" byte, such as quote, backslash, ...

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's the other optimization I was thinking. It makes string parsing much faster. I didn't want to include it in this PR though!

if pos >= peek.size
# We don't have enough data in the peek buffer to create a string:
# default to the slow method
return consume_string_with_buffer
end

char = peek[pos]
case char
when '\\'
# If we find an escape character, go to the slow method
@column_number += pos
return consume_string_at_escape_char(peek, pos)
when '"'
break
else
if 0 <= current_char.ord < 32
unexpected_char
else
pos += 1
end
end
end

@column_number += pos
@token.string_value =
if @expects_object_key
@string_pool.get(peek.to_unsafe, pos)
else
String.new(peek.to_unsafe, pos)
end

@io.skip(pos + 1)
next_char
end

private def consume_string_at_escape_char(peek, pos)
consume_string_with_buffer do
@buffer.write peek[0, pos]
@io.skip(pos)
end
end

private def number_start
Expand Down
22 changes: 13 additions & 9 deletions src/json/lexer/string_based.cr
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# :nodoc:
class JSON::Lexer::StringBased < JSON::Lexer
def initialize(string)
def initialize(string : String)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

polish: This could be @string : String.

super()
@reader = Char::Reader.new(string)
@string = string
@pos = 0
@number_start = 0
end

Expand Down Expand Up @@ -33,7 +34,7 @@ class JSON::Lexer::StringBased < JSON::Lexer
if @expects_object_key
start_pos += 1
end_pos = current_pos - 1
@token.string_value = @string_pool.get(@reader.string.to_unsafe + start_pos, end_pos - start_pos)
@token.string_value = @string_pool.get(@string.to_unsafe + start_pos, end_pos - start_pos)
else
@token.string_value = string_range(start_pos + 1, current_pos - 1)
end
Expand All @@ -47,27 +48,30 @@ class JSON::Lexer::StringBased < JSON::Lexer
end

private def current_pos
@reader.pos
@pos
end

def string_range(start_pos, end_pos) : String
@reader.string.byte_slice(start_pos, end_pos - start_pos)
@string.byte_slice(start_pos, end_pos - start_pos)
end

def slice_range(start_pos, end_pos) : Bytes
@reader.string.to_slice[start_pos, end_pos - start_pos]
@string.to_slice[start_pos, end_pos - start_pos]
end

private def next_char_no_column_increment
char = @reader.next_char
if char == '\0' && @reader.pos != @reader.string.bytesize
@pos += 1

char = current_char
if char == '\0' && @pos != @string.bytesize
unexpected_char
end

char
end

private def current_char
@reader.current_char
@string.to_unsafe[@pos].chr
end

private def number_start
Expand Down
21 changes: 18 additions & 3 deletions src/json/token.cr
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class JSON::Token
property string_value : String

def int_value : Int64
raw_value.to_i64
@int_value || raw_value.to_i64
rescue exc : ArgumentError
raise ParseException.new(exc.message, line_number, column_number)
end
Expand All @@ -32,14 +32,25 @@ class JSON::Token

property line_number : Int32
property column_number : Int32
property raw_value : String
setter raw_value : String
setter int_value : Int64?

def initialize
@kind = :EOF
@line_number = 0
@column_number = 0
@string_value = ""
@raw_value = ""
@int_value = nil
end

def raw_value
case @kind
when .int?
@int_value.try(&.to_s) || @raw_value
else
@raw_value
end
end

def to_s(io : IO) : Nil
Expand All @@ -51,7 +62,11 @@ class JSON::Token
when .true?
io << "true"
when .int?
raw_value.to_s(io)
if int_value = @int_value
int_value.to_s(io)
else
raw_value.to_s(io)
end
when .float?
raw_value.to_s(io)
when .string?
Expand Down