diff --git a/lib/syntax_error_search.rb b/lib/syntax_error_search.rb index 5109a6c..943de76 100644 --- a/lib/syntax_error_search.rb +++ b/lib/syntax_error_search.rb @@ -14,13 +14,72 @@ def self.indent(string) end end + # This will tell you if the `code_lines` would be valid + # if you removed the `without_lines`. In short it's a + # way to detect if we've found the lines with syntax errors + # in our document yet. + # + # code_lines = [ + # CodeLine.new(line: "def foo\n", index: 0) + # CodeLine.new(line: " def bar\n", index: 1) + # CodeLine.new(line: "end\n", index: 2) + # ] + # + # SyntaxErrorSearch.valid_without?( + # without_lines: code_lines[1], + # code_lines: code_lines + # ) # => true + # + # SyntaxErrorSearch.valid?(code_lines) # => false + def self.valid_without?(without_lines: , code_lines:) + lines = code_lines - Array(without_lines).flatten + if lines.empty? + return true + else + return valid?(lines) + end + end + + # Returns truthy if a given input source is valid syntax + # + # SyntaxErrorSearch.valid?(<<~EOM) # => true + # def foo + # end + # EOM + # + # SyntaxErrorSearch.valid?(<<~EOM) # => false + # def foo + # def bar # Syntax error here + # end + # EOM + # + # You can also pass in an array of lines and they'll be + # joined before evaluating + # + # SyntaxErrorSearch.valid?( + # [ + # "def foo\n", + # "end\n" + # ] + # ) # => true + # + # SyntaxErrorSearch.valid?( + # [ + # "def foo\n", + # " def bar\n", # Syntax error here + # "end\n" + # ] + # ) # => false + # + # As an FYI the CodeLine class instances respond to `to_s` + # so passing a CodeLine in as an object or as an array + # will convert it to it's code representation. def self.valid?(source) source = source.join if source.is_a?(Array) source = source.to_s # Parser writes to stderr even if you catch the error - # stderr = $stderr $stderr = StringIO.new @@ -37,3 +96,4 @@ def self.valid?(source) require_relative "syntax_error_search/code_block" require_relative "syntax_error_search/code_frontier" require_relative "syntax_error_search/code_search" +require_relative "syntax_error_search/display_invalid_blocks" diff --git a/lib/syntax_error_search/code_frontier.rb b/lib/syntax_error_search/code_frontier.rb index f026cc8..1a23301 100644 --- a/lib/syntax_error_search/code_frontier.rb +++ b/lib/syntax_error_search/code_frontier.rb @@ -1,5 +1,143 @@ module SyntaxErrorSearch # This class is responsible for generating, storing, and sorting code blocks + # + # The search algorithm for finding our syntax errors isn't in this class, but + # this is class holds the bulk of the logic for generating, storing, detecting + # and filtering invalid code. + # + # This is loosely based on the idea of a "frontier" for searching for a path + # example: https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm + # + # In this case our path is going from code with a syntax error to code without a + # syntax error. We're currently doing that by evaluating individual lines + # with respect to indentation and other whitespace (empty lines). As represented + # by individual "code blocks". + # + # This class does not just store the frontier that we're searching, but is responsible + # for generating new code blocks as well. This is not ideal, but the state of generating + # and evaluating paths i.e. codeblocks is very tightly coupled. + # + # ## Creation + # + # This example code is re-used in the other sections + # + # Example: + # + # code_lines = [ + # CodeLine.new(line: "def cinco\n", index: 0) + # CodeLine.new(line: " def dog\n", index: 1) # Syntax error 1 + # CodeLine.new(line: " def cat\n", index: 2) # Syntax error 2 + # CodeLine.new(line: "end\n", index: 3) + # ] + # + # frontier = CodeFrontier.new(code_lines: code_lines) + # + # frontier << frontier.next_block if frontier.next_block? + # frontier << frontier.next_block if frontier.next_block? + # + # frontier.holds_all_syntax_errors? # => true + # block = frontier.pop + # frontier.holds_all_syntax_errors? # => false + # frontier << block + # frontier.holds_all_syntax_errors? # => true + # + # frontier.detect_invalid_blocks.map(&:to_s) # => + # [ + # "def dog\n", + # "def cat\n" + # ] + # + # ## Block Generation + # + # Currently code blocks are generated based off of indentation. With the idea that blocks are, + # well, indented. Once a code block is added to the frontier or it is expanded, or it is generated + # then we also need to remove those lines from our generation code so we don't generate the same block + # twice by accident. + # + # This is block generation is currently done via the "indent_hash" internally by starting at the outer + # most indentation. + # + # Example: + # + # ``` + # def river + # puts "lol" # <=== Start looking here and expand outwards + # end + # ``` + # + # Generating new code blocks is a little verbose but looks like this: + # + # frontier << frontier.next_block if frontier.next_block? + # + # Once a block is in the frontier, it can be popped off: + # + # frontier.pop + # # => <# CodeBlock > + # + # ## Block (frontier) storage, ordering and retrieval + # + # Once a block is generated it is stored internally in a frontier array. This is very similar to a search algorithm. + # The array is sorted by indentation order, so that when a block is popped off the array, the one with + # the largest current indentation is evaluated first. + # + # For example, if we have these two blocks in the frontier: + # + # ``` + # # Block A - 0 spaces for indentation + # + # def cinco + # puts "lol" + # end + # ``` + # + # ``` + # # Block B - 2 spaces for indentation + # + # def river + # puts "hehe" + # end + # ``` + # + # The "Block B" has more current indentation, so it would be evaluated first. + # + # ## Frontier evaluation (Find the syntax error) + # + # Another key difference between this and a normal search "frontier" is that we're not checking if + # an individual code block meets the goal (turning invalid code to valid code) since there can + # be multiple syntax errors and this will require multiple code blocks. To handle this, we're + # evaluating all the contents of the frontier at the same time to see if the solution exists in any + # of our search blocks. + # + # # Using the previously generated frontier + # + # frontier << Block.new(lines: code_lines[1], code_lines: code_lines) + # frontier.holds_all_syntax_errors? # => false + # + # frontier << Block.new(lines: code_lines[2], code_lines: code_lines) + # frontier.holds_all_syntax_errors? # => true + # + # ## Detect invalid blocks (Filter for smallest solution) + # + # After we prove that a solution exists and we've found it to be in our frontier, we can start stop searching. + # Once we've done this, we need to search through the existing frontier code blocks to find the minimum combination + # of blocks that hold the solution. This is done in: `detect_invalid_blocks`. + # + # # Using the previously generated frontier + # + # frontier << CodeBlock.new(lines: code_lines[0], code_lines: code_lines) + # frontier << CodeBlock.new(lines: code_lines[1], code_lines: code_lines) + # frontier << CodeBlock.new(lines: code_lines[2], code_lines: code_lines) + # frontier << CodeBlock.new(lines: code_lines[3], code_lines: code_lines) + # + # frontier.count # => 4 + # frontier.detect_invalid_blocks.length => 2 + # frontier.detect_invalid_blocks.map(&:to_s) # => + # [ + # "def dog\n", + # "def cat\n" + # ] + # + # Once invalid blocks are found and filtered, then they can be passed to a formatter. class CodeFrontier def initialize(code_lines: ) @code_lines = code_lines @@ -13,33 +151,36 @@ def initialize(code_lines: ) end end + def count + @frontier.count + end + # Returns true if the document is valid with all lines # removed. By default it checks all blocks in present in # the frontier array, but can be used for arbitrary arrays # of codeblocks as well def holds_all_syntax_errors?(block_array = @frontier) - lines = @code_lines - block_array.each do |block| - lines -= block.lines + without_lines = block_array.map do |block| + block.lines end - return true if lines.empty? - - CodeBlock.new( - code_lines: @code_lines, - lines: lines - ).valid? + SyntaxErrorSearch.valid_without?( + without_lines: without_lines, + code_lines: @code_lines + ) end # Returns a code block with the largest indentation possible def pop return nil if empty? - self << next_block unless @indent_hash.empty? - return @frontier.pop end + def next_block? + !@indent_hash.empty? + end + def next_block indent = @indent_hash.keys.sort.last lines = @indent_hash[indent].first diff --git a/lib/syntax_error_search/code_search.rb b/lib/syntax_error_search/code_search.rb index cb39430..2f780f7 100644 --- a/lib/syntax_error_search/code_search.rb +++ b/lib/syntax_error_search/code_search.rb @@ -1,4 +1,27 @@ module SyntaxErrorSearch + # Searches code for a syntax error + # + # The bulk of the heavy lifting is done by the CodeFrontier + # + # The flow looks like this: + # + # ## Syntax error detection + # + # When the frontier holds the syntax error, we can stop searching + # + # + # search = CodeSearch.new(<<~EOM) + # def dog + # def lol + # end + # EOM + # + # search.call + # + # search.invalid_blocks.map(&:to_s) # => + # # => ["def lol\n"] + # + # class CodeSearch private; attr_reader :frontier; public public; attr_reader :invalid_blocks @@ -13,6 +36,8 @@ def initialize(string) def call until frontier.holds_all_syntax_errors? + frontier << frontier.next_block if frontier.next_block? + block = frontier.pop if block.valid? diff --git a/lib/syntax_error_search/display_invalid_blocks.rb b/lib/syntax_error_search/display_invalid_blocks.rb new file mode 100644 index 0000000..e25b157 --- /dev/null +++ b/lib/syntax_error_search/display_invalid_blocks.rb @@ -0,0 +1,63 @@ +module SyntaxErrorSearch + # Used for formatting invalid blocks + class DisplayInvalidBlocks + attr_reader :filename + + def initialize(block_array, io: $stderr, filename: nil) + @filename = filename + @io = io + @blocks = block_array + @lines = @blocks.map(&:lines).flatten + @digit_count = @lines.last.line_number.to_s.length + @code_lines = @blocks.first.code_lines + + @invalid_line_hash = @lines.each_with_object({}) {|line, h| h[line] = true} + end + + def call + @io.puts <<~EOM + + SyntaxErrorSearch: A syntax error was detected + + This code has an unmatched `end` this is caused by either + missing a syntax keyword (`def`, `do`, etc.) or inclusion + of an extra `end` line + + EOM + @io.puts("file: #{filename}") if filename + @io.puts <<~EOM + simplified: + + #{code_with_filename(indent: 2)} + EOM + end + + + def code_with_filename(indent: 0) + string = String.new("") + string << "```\n" + # string << "#".rjust(@digit_count) + " filename: #{filename}\n\n" if filename + string << code_with_lines + string << "```\n" + + string.each_line.map {|l| " " * indent + l }.join + end + + def code_with_lines + @code_lines.map do |line| + next if line.hidden? + number = line.line_number.to_s.rjust(@digit_count) + if line.empty? + "#{number.to_s}#{line}" + else + string = String.new + string << "\e[1;3m" if @invalid_line_hash[line] # Bold, italics + string << "#{number.to_s} " + string << line.to_s + string << "\e[0m" + string + end + end.join + end + end +end diff --git a/spec/fixtures/this_project_extra_def.rb.txt b/spec/fixtures/this_project_extra_def.rb.txt new file mode 100644 index 0000000..1ad29ad --- /dev/null +++ b/spec/fixtures/this_project_extra_def.rb.txt @@ -0,0 +1,64 @@ +module SyntaxErrorSearch + # Used for formatting invalid blocks + class DisplayInvalidBlocks + attr_reader :filename + + def initialize(block_array, io: $stderr, filename: nil) + @filename = filename + @io = io + @blocks = block_array + @lines = @blocks.map(&:lines).flatten + @digit_count = @lines.last.line_number.to_s.length + @code_lines = @blocks.first.code_lines + + @invalid_line_hash = @lines.each_with_object({}) {|line, h| h[line] = true} + end + + def call + @io.puts <<~EOM + + SyntaxErrorSearch: A syntax error was detected + + This code has an unmatched `end` this is caused by either + missing a syntax keyword (`def`, `do`, etc.) or inclusion + of an extra `end` line: + EOM + + @io.puts(<<~EOM) if filename + file: #{filename} + EOM + + @io.puts <<~EOM + #{code_with_filename} + EOM + end + + def filename + + def code_with_filename + string = String.new("") + string << "```\n" + string << "#".rjust(@digit_count) + " filename: #{filename}\n\n" if filename + string << code_with_lines + string << "```\n" + string + end + + def code_with_lines + @code_lines.map do |line| + next if line.hidden? + number = line.line_number.to_s.rjust(@digit_count) + if line.empty? + "#{number.to_s}#{line}" + else + string = String.new + string << "\e[1;3m" if @invalid_line_hash[line] # Bold, italics + string << "#{number.to_s} " + string << line.to_s + string << "\e[0m" + string + end + end.join + end + end +end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index b3bf001..434a7cc 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -13,6 +13,18 @@ end end +def spec_dir + Pathname(__dir__) +end + +def root_dir + spec_dir.join("..") +end + +def fixtures_dir + spec_dir.join("fixtures") +end + def code_line_array(string) code_lines = [] string.lines.each_with_index do |line, index| @@ -34,6 +46,10 @@ def indent(number) end end.join end + + def strip_control_codes + self.gsub(/\e\[[^\x40-\x7E]*[\x40-\x7E]/, "") + end end diff --git a/spec/unit/code_frontier_spec.rb b/spec/unit/code_frontier_spec.rb index e263cc6..1f963a2 100644 --- a/spec/unit/code_frontier_spec.rb +++ b/spec/unit/code_frontier_spec.rb @@ -16,8 +16,10 @@ module SyntaxErrorSearch EOM frontier = CodeFrontier.new(code_lines: code_lines) + frontier << frontier.next_block if frontier.next_block? until frontier.holds_all_syntax_errors? + frontier << frontier.next_block if frontier.next_block? block = frontier.pop if block.valid? @@ -87,6 +89,8 @@ def foo EOM frontier = CodeFrontier.new(code_lines: code_lines) + + frontier << frontier.next_block if frontier.next_block? block = frontier.pop expect(block.to_s).to eq(<<~EOM.indent(2)) end @@ -110,6 +114,7 @@ def foo EOM frontier = CodeFrontier.new(code_lines: code_lines) + frontier << frontier.next_block if frontier.next_block? block = frontier.pop expect(block.to_s).to eq(<<~EOM.indent(2)) puts "lol" @@ -131,6 +136,7 @@ def foo EOM frontier = CodeFrontier.new(code_lines: code_lines) + frontier << frontier.next_block if frontier.next_block? expect(frontier.pop.to_s).to eq(<<~EOM.indent(2)) puts "lol1" puts "lol2" @@ -140,11 +146,13 @@ def foo expect(frontier.generate_new_block?).to be_truthy + frontier << frontier.next_block if frontier.next_block? expect(frontier.pop.to_s).to eq(<<~EOM.indent(2)) puts "lol4" EOM + frontier << frontier.next_block if frontier.next_block? expect(frontier.pop.to_s).to eq(<<~EOM) def foo EOM diff --git a/spec/unit/code_search_spec.rb b/spec/unit/code_search_spec.rb index 4d3c779..31796c0 100644 --- a/spec/unit/code_search_spec.rb +++ b/spec/unit/code_search_spec.rb @@ -2,9 +2,99 @@ module SyntaxErrorSearch RSpec.describe CodeSearch do + it "def with missing end" do + search = CodeSearch.new(<<~EOM) + class OH + def hello + def hai + end + end + EOM + search.call + + expect(search.invalid_blocks.join).to eq(<<~EOM.indent(2)) + def hello + def hai + end + EOM + end + # For code that's not perfectly formatted, we ideally want to do our best # These examples represent the results that exist today, but I would like to improve upon them describe "needs improvement" do + describe "missing describe/do line" do + + it "this project" do + skip("Lol the results are really bad on this one") + search = CodeSearch.new(fixtures_dir.join("this_project_extra_def.rb.txt").read) + + search.call + + blocks = search.invalid_blocks + io = StringIO.new + display = DisplayInvalidBlocks.new(blocks, io: io, filename: "fake/spec/lol.rb") + display.call + puts io.string + + expect(display.code_with_lines.strip_control_codes).to eq(<<~EOM) + 36 def filename + EOM + end + + it "Format Code blocks real world example" do + search = CodeSearch.new(<<~EOM) + require 'rails_helper' + + RSpec.describe AclassNameHere, type: :worker do + describe "thing" do + context "when" do + let(:thing) { stuff } + let(:another_thing) { moarstuff } + subject { foo.new.perform(foo.id, true) } + + it "stuff" do + subject + + expect(foo.foo.foo).to eq(true) + end + end + end # here + + context "stuff" do + let(:thing) { create(:foo, foo: stuff) } + let(:another_thing) { create(:stuff) } + + subject { described_class.new.perform(foo.id, false) } + + it "more stuff" do + subject + + expect(foo.foo.foo).to eq(false) + end + end + end + end + EOM + search.call + + blocks = search.invalid_blocks + io = StringIO.new + display = DisplayInvalidBlocks.new(blocks, io: io, filename: "fake/spec/lol.rb") + display.call + # puts io.string + + expect(display.code_with_lines.strip_control_codes).to eq(<<~EOM) + 1 require 'rails_helper' + 2 + 3 RSpec.describe AclassNameHere, type: :worker do + 4 describe "thing" do + 16 end # here + 30 end + 31 end + EOM + end + end + describe "mis-matched-indentation" do it "stacked ends " do search = CodeSearch.new(<<~EOM)