From abfd59cec4bded8b007927feeb54b501ae1d84cf Mon Sep 17 00:00:00 2001 From: schneems Date: Sat, 14 Nov 2020 09:57:04 -0600 Subject: [PATCH] Refactor: Immutable CodeBlocks To be able to have CodeBlock class not depend on knowledge of all code lines, we need to move the code expansion logic outside of the code class. It's now being represented in the BlockExpand class which can be independently tested. To simplify writing this class, a helper class AroundBlockScan was introduced. This class automates some of the chores around searching before/after a given block. The IndentScan class has been renamed to ParseBlocksFromIndentLine to be a little more descriptive about it's purpose and goal, but I'm still struggling to name it. --- Gemfile | 1 + Gemfile.lock | 2 + lib/syntax_search.rb | 7 +- lib/syntax_search/around_block_scan.rb | 91 +++++++ lib/syntax_search/block_expand.rb | 78 ++++++ lib/syntax_search/code_block.rb | 177 +------------ lib/syntax_search/code_frontier.rb | 241 +++--------------- lib/syntax_search/code_search.rb | 49 ++-- lib/syntax_search/display_invalid_blocks.rb | 4 +- .../parse_blocks_from_indent_line.rb | 56 ++++ spec/unit/around_block_scan_spec.rb | 81 ++++++ spec/unit/block_expand_spec.rb | 205 +++++++++++++++ spec/unit/code_block_spec.rb | 156 ++---------- spec/unit/code_frontier_spec.rb | 164 +++--------- spec/unit/code_search_spec.rb | 23 +- spec/unit/display_invalid_blocks_spec.rb | 27 +- 16 files changed, 695 insertions(+), 667 deletions(-) create mode 100644 lib/syntax_search/around_block_scan.rb create mode 100644 lib/syntax_search/block_expand.rb create mode 100644 lib/syntax_search/parse_blocks_from_indent_line.rb create mode 100644 spec/unit/around_block_scan_spec.rb create mode 100644 spec/unit/block_expand_spec.rb diff --git a/Gemfile b/Gemfile index 57fb696..f046253 100644 --- a/Gemfile +++ b/Gemfile @@ -7,3 +7,4 @@ gemspec gem "rake", "~> 12.0" gem "rspec", "~> 3.0" +gem "stackprof" diff --git a/Gemfile.lock b/Gemfile.lock index a8aa0f2..a341000 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -25,6 +25,7 @@ GEM diff-lcs (>= 1.2.0, < 2.0) rspec-support (~> 3.10.0) rspec-support (3.10.0) + stackprof (0.2.16) PLATFORMS ruby @@ -32,6 +33,7 @@ PLATFORMS DEPENDENCIES rake (~> 12.0) rspec (~> 3.0) + stackprof syntax_search! BUNDLED WITH diff --git a/lib/syntax_search.rb b/lib/syntax_search.rb index 86912c4..f8936c0 100644 --- a/lib/syntax_search.rb +++ b/lib/syntax_search.rb @@ -40,6 +40,7 @@ def self.call(source: , filename: , terminal: false, record_dir: nil) blocks: blocks, filename: filename, terminal: terminal, + code_lines: search.code_lines, invalid_type: invalid_type(source), io: $stderr ).call @@ -152,5 +153,9 @@ def self.invalid_type(source) require_relative "syntax_search/code_line" require_relative "syntax_search/code_block" require_relative "syntax_search/code_frontier" -require_relative "syntax_search/code_search" require_relative "syntax_search/display_invalid_blocks" +require_relative "syntax_search/around_block_scan" +require_relative "syntax_search/block_expand" +require_relative "syntax_search/parse_blocks_from_indent_line" + +require_relative "syntax_search/code_search" diff --git a/lib/syntax_search/around_block_scan.rb b/lib/syntax_search/around_block_scan.rb new file mode 100644 index 0000000..40f0e49 --- /dev/null +++ b/lib/syntax_search/around_block_scan.rb @@ -0,0 +1,91 @@ +# frozen_string_literal: true +# +module SyntaxErrorSearch + # This class is useful for exploring contents before and after + # a block + # + # It searches above and below the passed in block to match for + # whatever criteria you give it: + # + # Example: + # + # def dog + # puts "bark" + # puts "bark" + # end + # + # scan = AroundBlockScan.new( + # code_lines: code_lines + # block: CodeBlock.new(lines: code_lines[1]) + # ) + # + # scan.scan_while { true } + # + # puts scan.before_index # => 0 + # puts scan.after_index # => 3 + # + # Contents can also be filtered using AroundBlockScan#skip + # + # To grab the next surrounding indentation use AroundBlockScan#scan_adjacent_indent + class AroundBlockScan + def initialize(code_lines: , block:) + @code_lines = code_lines + @orig_before_index = block.lines.first.index + @orig_after_index = block.lines.last.index + @skip_array = [] + @after_array = [] + @before_array = [] + end + + def skip(name) + @skip_array << name + self + end + + def scan_while(&block) + @before_index = before_lines.reverse_each.take_while do |line| + next true if @skip_array.detect {|meth| line.send(meth) } + + block.call(line) + end.reverse.first&.index + + @after_index = after_lines.take_while do |line| + next true if @skip_array.detect {|meth| line.send(meth) } + + block.call(line) + end.last&.index + self + end + + def scan_adjacent_indent + before_indent = @code_lines[@orig_before_index.pred]&.indent || 0 + after_indent = @code_lines[@orig_after_index.next]&.indent || 0 + + indent = [before_indent, after_indent].min + @before_index = before_index.pred if before_indent >= indent + @after_index = after_index.next if after_indent >= indent + + self + end + + def code_block + CodeBlock.new(lines: @code_lines[before_index..after_index]) + end + + def before_index + @before_index || @orig_before_index + end + + def after_index + @after_index || @orig_after_index + end + + private def before_lines + @code_lines[0...@orig_before_index] + end + + private def after_lines + @code_lines[@orig_after_index.next..-1] + end + end +end diff --git a/lib/syntax_search/block_expand.rb b/lib/syntax_search/block_expand.rb new file mode 100644 index 0000000..fc1c989 --- /dev/null +++ b/lib/syntax_search/block_expand.rb @@ -0,0 +1,78 @@ +# frozen_string_literal: true +module SyntaxErrorSearch + # This class is responsible for taking a code block that exists + # at a far indentaion and then iteratively increasing the block + # so that it captures everything within the same indentation block. + # + # def dog + # puts "bow" + # puts "wow" + # end + # + # block = BlockExpand.new(code_lines: code_lines) + # .call(CodeBlock.new(lines: code_lines[1])) + # + # puts block.to_s + # # => puts "bow" + # puts "wow" + # + # + # Once a code block has captured everything at a given indentation level + # then it will expand to capture surrounding indentation. + # + # block = BlockExpand.new(code_lines: code_lines) + # .call(block) + # + # block.to_s + # # => def dog + # puts "bow" + # puts "wow" + # end + # + class BlockExpand + def initialize(code_lines: ) + @code_lines = code_lines + end + + def call(block) + if (next_block = expand_neighbors(block, grab_empty: true)) + return next_block + end + + expand_indent(block) + end + + def expand_indent(block) + block = AroundBlockScan.new(code_lines: @code_lines, block: block) + .scan_adjacent_indent + .code_block + + # Handle if/else/end case + if (next_block = expand_neighbors(block, grab_empty: false)) + return next_block + else + return block + end + end + + def expand_neighbors(block, grab_empty: true) + scan = AroundBlockScan.new(code_lines: @code_lines, block: block) + .skip(:hidden?) + .scan_while {|line| line.not_empty? && line.indent >= block.current_indent } + + # Slurp up empties + if grab_empty + scan = AroundBlockScan.new(code_lines: @code_lines, block: scan.code_block) + .scan_while {|line| line.empty? || line.hidden? } + end + + new_block = scan.code_block + + if block.lines == new_block.lines + return nil + else + return new_block + end + end + end +end diff --git a/lib/syntax_search/code_block.rb b/lib/syntax_search/code_block.rb index 2f53a11..56d303e 100644 --- a/lib/syntax_search/code_block.rb +++ b/lib/syntax_search/code_block.rb @@ -3,11 +3,7 @@ module SyntaxErrorSearch # Multiple lines form a singular CodeBlock # - # Source code is made of multiple CodeBlocks. A code block - # has a reference to the source code that created itself, this allows - # a code block to "expand" when needed - # - # The most important ability of a CodeBlock is this ability to expand: + # Source code is made of multiple CodeBlocks. # # Example: # @@ -16,21 +12,19 @@ module SyntaxErrorSearch # # puts "foo" # # end # - # code_block.expand_until_next_boundry + # code_block.valid? # => true + # code_block.in_valid? # => false # - # code_block.to_s # => - # # class Foo - # # def foo - # # puts "foo" - # # end - # # end # class CodeBlock attr_reader :lines - def initialize(code_lines: nil, lines: []) + def initialize(lines: []) @lines = Array(lines) - @code_lines = code_lines + end + + def mark_invisible + @lines.map(&:mark_invisible) end def is_end? @@ -38,11 +32,11 @@ def is_end? end def starts_at - @lines.first&.line_number + @starts_at ||= @lines.first&.line_number end - def code_lines - @code_lines + def ends_at + @ends_at ||= @lines.last&.line_number end # This is used for frontier ordering, we are searching from @@ -53,155 +47,8 @@ def <=>(other) self.current_indent <=> other.current_indent end - # Only the lines that are not empty and visible - def visible_lines - @lines - .select(&:not_empty?) - .select(&:visible?) - end - - # This method is used to expand a code block to capture it's calling context - def expand_until_next_boundry - expand_to_indent(next_indent) - self - end - - # This method expands the given code block until it captures - # its nearest neighbors. This is used to expand a single line of code - # to its smallest likely block. - # - # code_block.to_s # => - # # puts "foo" - # code_block.expand_until_neighbors - # - # code_block.to_s # => - # # puts "foo" - # # puts "bar" - # # puts "baz" - # - def expand_until_neighbors - expand_to_indent(current_indent) - - expand_hidden_parner_line if self.to_s.strip == "end" - self - end - - def expand_hidden_parner_line - index = @lines.first.index - indent = current_indent - partner_line = code_lines.select {|line| line.index < index && line.indent == indent }.last - - if partner_line&.hidden? - partner_line.mark_visible - @lines.prepend(partner_line) - end - end - - # This method expands the existing code block up (before) - # and down (after). It will break on change in indentation - # and empty lines. - # - # code_block.to_s # => - # # def foo - # # puts "foo" - # # end - # - # code_block.expand_to_indent(0) - # code_block.to_s # => - # # class Foo - # # def foo - # # puts "foo" - # # end - # # end - # - private def expand_to_indent(indent) - array = [] - before_lines(skip_empty: false).each do |line| - if line.empty? - array.prepend(line) - break - end - - if line.indent == indent - array.prepend(line) - else - break - end - end - - array << @lines - - after_lines(skip_empty: false).each do |line| - if line.empty? - array << line - break - end - - if line.indent == indent - array << line - else - break - end - end - - @lines = array.flatten - end - - def next_indent - [ - before_line&.indent || 0, - after_line&.indent || 0 - ].max - end - def current_indent - lines.detect(&:not_empty?)&.indent || 0 - end - - def before_line - before_lines.first - end - - def after_line - after_lines.first - end - - def before_lines(skip_empty: true) - index = @lines.first.index - lines = code_lines.select {|line| line.index < index } - lines.select!(&:not_empty?) if skip_empty - lines.select!(&:visible?) - lines.reverse! - - lines - end - - def after_lines(skip_empty: true) - index = @lines.last.index - lines = code_lines.select {|line| line.index > index } - lines.select!(&:not_empty?) if skip_empty - lines.select!(&:visible?) - lines - end - - # Returns a code block of the source that does not include - # the current lines. This is useful for checking if a source - # with the given lines removed parses successfully. If so - # - # Then it's proof that the current block is invalid - def block_without - @block_without ||= CodeBlock.new( - source: @source, - lines: @source.code_lines - @lines - ) - end - - def document_valid_without? - block_without.valid? - end - - def valid_without? - block_without.valid? + @current_indent ||= lines.select(&:not_empty?).map(&:indent).min || 0 end def invalid? diff --git a/lib/syntax_search/code_frontier.rb b/lib/syntax_search/code_frontier.rb index 79cd042..f8b87f4 100644 --- a/lib/syntax_search/code_frontier.rb +++ b/lib/syntax_search/code_frontier.rb @@ -1,178 +1,43 @@ # frozen_string_literal: true module SyntaxErrorSearch - # This class is responsible for generating, storing, and sorting code blocks + # The main function of the frontier is to hold the edges of our search and to + # evaluate when we can stop searching. # - # The search algorithm for finding our syntax errors isn't in this class, but - # this is class holds the bulk of the logic for generating, storing, detecting - # and filtering invalid code. + # ## Knowing where we've been # - # This is loosely based on the idea of a "frontier" for searching for a path - # example: https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm + # Once a code block is generated it is added onto the frontier where it will be + # sorted and then the frontier can be filtered. Large blocks that totally contain a + # smaller block will cause the smaller block to be evicted. # - # In this case our path is going from code with a syntax error to code without a - # syntax error. We're currently doing that by evaluating individual lines - # with respect to indentation and other whitespace (empty lines). As represented - # by individual "code blocks". + # CodeFrontier#<< + # CodeFrontier#pop # - # This class does not just store the frontier that we're searching, but is responsible - # for generating new code blocks as well. This is not ideal, but the state of generating - # and evaluating paths i.e. codeblocks is very tightly coupled. + # ## Knowing where we can go # - # ## Creation + # Internally it keeps track of an "indent hash" which is exposed via `next_indent_line` + # when called this will return a line of code with the most indentation. # - # This example code is re-used in the other sections + # This line of code can be used to build a CodeBlock via and then when that code block + # is added back to the frontier, then the lines in the code block are removed from the + # indent hash so we don't double-create the same block. # - # Example: + # CodeFrontier#next_indent_line + # CodeFrontier#register_indent_block # - # code_lines = [ - # CodeLine.new(line: "def cinco\n", index: 0) - # CodeLine.new(line: " def dog\n", index: 1) # Syntax error 1 - # CodeLine.new(line: " def cat\n", index: 2) # Syntax error 2 - # CodeLine.new(line: "end\n", index: 3) - # ] + # ## Knowing when to stop # - # frontier = CodeFrontier.new(code_lines: code_lines) + # The frontier holds the syntax error when removing all code blocks from the original + # source document allows it to be parsed as syntatically valid: # - # frontier << frontier.next_block if frontier.next_block? - # frontier << frontier.next_block if frontier.next_block? + # CodeFrontier#holds_all_syntax_errors? # - # frontier.holds_all_syntax_errors? # => true - # block = frontier.pop - # frontier.holds_all_syntax_errors? # => false - # frontier << block - # frontier.holds_all_syntax_errors? # => true + # ## Filtering false positives # - # frontier.detect_invalid_blocks.map(&:to_s) # => - # [ - # "def dog\n", - # "def cat\n" - # ] + # Once the search is completed, the frontier will have many blocks that do not contain + # the syntax error. To filter to the smallest subset that does call: # - # ## Block Generation - # - # Currently code blocks are generated based off of indentation. With the idea that blocks are, - # well, indented. Once a code block is added to the frontier or it is expanded, or it is generated - # then we also need to remove those lines from our generation code so we don't generate the same block - # twice by accident. - # - # This is block generation is currently done via the "indent_hash" internally by starting at the outer - # most indentation. - # - # Example: - # - # ``` - # def river - # puts "lol" # <=== Start looking here and expand outwards - # end - # ``` - # - # Generating new code blocks is a little verbose but looks like this: - # - # frontier << frontier.next_block if frontier.next_block? - # - # Once a block is in the frontier, it can be popped off: - # - # frontier.pop - # # => <# CodeBlock > - # - # ## Block (frontier) storage, ordering and retrieval - # - # Once a block is generated it is stored internally in a frontier array. This is very similar to a search algorithm. - # The array is sorted by indentation order, so that when a block is popped off the array, the one with - # the largest current indentation is evaluated first. - # - # For example, if we have these two blocks in the frontier: - # - # ``` - # # Block A - 0 spaces for indentation - # - # def cinco - # puts "lol" - # end - # ``` - # - # ``` - # # Block B - 2 spaces for indentation - # - # def river - # puts "hehe" - # end - # ``` - # - # The "Block B" has more current indentation, so it would be evaluated first. - # - # ## Frontier evaluation (Find the syntax error) - # - # Another key difference between this and a normal search "frontier" is that we're not checking if - # an individual code block meets the goal (turning invalid code to valid code) since there can - # be multiple syntax errors and this will require multiple code blocks. To handle this, we're - # evaluating all the contents of the frontier at the same time to see if the solution exists in any - # of our search blocks. - # - # # Using the previously generated frontier - # - # frontier << Block.new(lines: code_lines[1], code_lines: code_lines) - # frontier.holds_all_syntax_errors? # => false - # - # frontier << Block.new(lines: code_lines[2], code_lines: code_lines) - # frontier.holds_all_syntax_errors? # => true - # - # ## Detect invalid blocks (Filter for smallest solution) - # - # After we prove that a solution exists and we've found it to be in our frontier, we can start stop searching. - # Once we've done this, we need to search through the existing frontier code blocks to find the minimum combination - # of blocks that hold the solution. This is done in: `detect_invalid_blocks`. - # - # # Using the previously generated frontier - # - # frontier << CodeBlock.new(lines: code_lines[0], code_lines: code_lines) - # frontier << CodeBlock.new(lines: code_lines[1], code_lines: code_lines) - # frontier << CodeBlock.new(lines: code_lines[2], code_lines: code_lines) - # frontier << CodeBlock.new(lines: code_lines[3], code_lines: code_lines) - # - # frontier.count # => 4 - # frontier.detect_invalid_blocks.length => 2 - # frontier.detect_invalid_blocks.map(&:to_s) # => - # [ - # "def dog\n", - # "def cat\n" - # ] - # - # Once invalid blocks are found and filtered, then they can be passed to a formatter. - # - # - # - - class IndentScan - attr_reader :code_lines - - def initialize(code_lines: ) - @code_lines = code_lines - end - - def neighbors_from_top(top_line) - code_lines - .select {|l| l.index >= top_line.index } - .select {|l| l.not_empty? } - .select {|l| l.visible? } - .take_while {|l| l.indent >= top_line.indent } - end - - def each_neighbor_block(top_line) - neighbors = neighbors_from_top(top_line) - - until neighbors.empty? - lines = [neighbors.pop] - while (block = CodeBlock.new(lines: lines, code_lines: code_lines)) && block.invalid? && neighbors.any? - lines.prepend neighbors.pop - end - - yield block if block - end - end - end - + # CodeFrontier#detect_invalid_blocks class CodeFrontier def initialize(code_lines: ) @code_lines = code_lines @@ -207,16 +72,9 @@ def holds_all_syntax_errors?(block_array = @frontier) # Returns a code block with the largest indentation possible def pop - return nil if empty? - return @frontier.pop end - def next_block? - !@indent_hash.empty? - end - - def indent_hash_indent @indent_hash.keys.sort.last end @@ -226,40 +84,25 @@ def next_indent_line @indent_hash[indent]&.first end - def generate_blocks - end - - def next_block - indent = @indent_hash.keys.sort.last - lines = @indent_hash[indent].first - - block = CodeBlock.new( - lines: lines, - code_lines: @code_lines - ).expand_until_neighbors - - register(block) - block - end - def expand? return false if @frontier.empty? return true if @indent_hash.empty? - @frontier.last.current_indent >= @indent_hash.keys.sort.last - end + frontier_indent = @frontier.last.current_indent + hash_indent = @indent_hash.keys.sort.last - # This method is responsible for determining if a new code - # block should be generated instead of evaluating an already - # existing block in the frontier - def generate_new_block? - return false if @indent_hash.empty? - return true if @frontier.empty? + if ENV["DEBUG"] + puts "```" + puts @frontier.last.to_s + puts "```" + puts " @frontier indent: #{frontier_indent}" + puts " @hash indent: #{hash_indent}" + end - @frontier.last.current_indent <= @indent_hash.keys.sort.last + frontier_indent >= hash_indent end - def register(block) + def register_indent_block(block) block.lines.each do |line| @indent_hash[line.indent]&.delete(line) end @@ -273,22 +116,18 @@ def register(block) # and that each code block's lines are removed from the indentation hash so we # don't re-evaluate the same line multiple times. def <<(block) - register(block) + register_indent_block(block) + # Make sure we don't double expand, if a code block fully engulfs another code block, keep the bigger one + @frontier.reject! {|b| + b.starts_at >= block.starts_at && b.ends_at <= block.ends_at + } @frontier << block @frontier.sort! self end - def any? - !empty? - end - - def empty? - @frontier.empty? && @indent_hash.empty? - end - # Example: # # combination([:a, :b, :c, :d]) diff --git a/lib/syntax_search/code_search.rb b/lib/syntax_search/code_search.rb index 33a03fc..61d7481 100644 --- a/lib/syntax_search/code_search.rb +++ b/lib/syntax_search/code_search.rb @@ -3,15 +3,16 @@ module SyntaxErrorSearch # Searches code for a syntax error # - # The bulk of the heavy lifting is done by the CodeFrontier + # The bulk of the heavy lifting is done in: # - # The flow looks like this: + # - CodeFrontier (Holds information for generating blocks and determining if we can stop searching) + # - ParseBlocksFromLine (Creates blocks into the frontier) + # - BlockExpand (Expands existing blocks to search more code # # ## Syntax error detection # # When the frontier holds the syntax error, we can stop searching # - # # search = CodeSearch.new(<<~EOM) # def dog # def lol @@ -23,7 +24,6 @@ module SyntaxErrorSearch # search.invalid_blocks.map(&:to_s) # => # # => ["def lol\n"] # - # class CodeSearch private; attr_reader :frontier; public public; attr_reader :invalid_blocks, :record_dir, :code_lines @@ -41,24 +41,33 @@ def initialize(string, record_dir: ENV["SYNTAX_SEARCH_RECORD_DIR"]) @invalid_blocks = [] @name_tick = Hash.new {|hash, k| hash[k] = 0 } @tick = 0 - @scan = IndentScan.new(code_lines: @code_lines) + @block_expand = BlockExpand.new(code_lines: code_lines) + @parse_blocks_from_indent_line = ParseBlocksFromIndentLine.new(code_lines: @code_lines) end + # Used for debugging def record(block:, name: "record") return if !@record_dir @name_tick[name] += 1 filename = "#{@write_count += 1}-#{name}-#{@name_tick[name]}.txt" + if ENV["DEBUG"] + puts "\n\n==== #{filename} ====" + puts "\n```#{block.starts_at}:#{block.ends_at}" + puts "#{block.to_s}" + puts "```" + puts " block indent: #{block.current_indent}" + end @record_dir.join(filename).open(mode: "a") do |f| display = DisplayInvalidBlocks.new( blocks: block, - terminal: false + terminal: false, + code_lines: @code_lines, ) f.write(display.indent display.code_with_lines) end end - def push_if_invalid(block, name: ) - frontier.register(block) + def push(block, name: ) record(block: block, name: name) if block.valid? @@ -69,32 +78,36 @@ def push_if_invalid(block, name: ) end end + # Parses the most indented lines into blocks that are marked + # and added to the frontier def add_invalid_blocks max_indent = frontier.next_indent_line&.indent while (line = frontier.next_indent_line) && (line.indent == max_indent) - neighbors = @scan.neighbors_from_top(frontier.next_indent_line) - @scan.each_neighbor_block(frontier.next_indent_line) do |block| + @parse_blocks_from_indent_line.each_neighbor_block(frontier.next_indent_line) do |block| record(block: block, name: "add") - if block.valid? - block.lines.each(&:mark_invisible) - end - end - block = CodeBlock.new(lines: neighbors, code_lines: @code_lines) - push_if_invalid(block, name: "add") + block.mark_invisible if block.valid? + push(block, name: "add") + end end end + # Given an already existing block in the frontier, expand it to see + # if it contains our invalid syntax def expand_invalid_block block = frontier.pop return unless block - block.expand_until_next_boundry - push_if_invalid(block, name: "expand") + record(block: block, name: "pop") + + # block = block.expand_until_next_boundry + block = @block_expand.call(block) + push(block, name: "expand") end + # Main search loop def call until frontier.holds_all_syntax_errors? @tick += 1 diff --git a/lib/syntax_search/display_invalid_blocks.rb b/lib/syntax_search/display_invalid_blocks.rb index 8fa1310..bf43baa 100644 --- a/lib/syntax_search/display_invalid_blocks.rb +++ b/lib/syntax_search/display_invalid_blocks.rb @@ -5,14 +5,14 @@ module SyntaxErrorSearch class DisplayInvalidBlocks attr_reader :filename - def initialize(blocks:, io: $stderr, filename: nil, terminal: false, invalid_type: :unmatched_end) + def initialize(code_lines: ,blocks:, io: $stderr, filename: nil, terminal: false, invalid_type: :unmatched_end) @terminal = terminal @filename = filename @io = io @blocks = Array(blocks) @lines = @blocks.map(&:lines).flatten - @code_lines = @blocks.first&.code_lines || [] + @code_lines = code_lines @digit_count = @code_lines.last&.line_number.to_s.length @invalid_line_hash = @lines.each_with_object({}) {|line, h| h[line] = true } diff --git a/lib/syntax_search/parse_blocks_from_indent_line.rb b/lib/syntax_search/parse_blocks_from_indent_line.rb new file mode 100644 index 0000000..53cb8b4 --- /dev/null +++ b/lib/syntax_search/parse_blocks_from_indent_line.rb @@ -0,0 +1,56 @@ +# frozen_string_literal: true + +module SyntaxErrorSearch + # This class is responsible for generating initial code blocks + # that will then later be expanded. + # + # The biggest concern when guessing about code blocks, is accidentally + # grabbing one that contains only an "end". In this example: + # + # def dog + # begonn # mispelled `begin` + # puts "bark" + # end + # end + # + # The following lines would be matched (from bottom to top): + # + # 1) end + # + # 2) puts "bark" + # end + # + # 3) begonn + # puts "bark" + # end + # + # At this point it has no where else to expand, and it will yield this inner + # code as a block + class ParseBlocksFromIndentLine + attr_reader :code_lines + + def initialize(code_lines: ) + @code_lines = code_lines + end + + # Builds blocks from bottom up + def each_neighbor_block(target_line) + scan = AroundBlockScan.new(code_lines: code_lines, block: CodeBlock.new(lines: target_line)) + .skip(:empty?) + .skip(:hidden?) + .scan_while {|line| line.indent >= target_line.indent } + + neighbors = @code_lines[scan.before_index..scan.after_index] + + until neighbors.empty? + lines = [neighbors.pop] + while (block = CodeBlock.new(lines: lines)) && block.invalid? && neighbors.any? + lines.prepend neighbors.pop + end + + yield block if block + end + end + end +end + diff --git a/spec/unit/around_block_scan_spec.rb b/spec/unit/around_block_scan_spec.rb new file mode 100644 index 0000000..6b46f0e --- /dev/null +++ b/spec/unit/around_block_scan_spec.rb @@ -0,0 +1,81 @@ +# frozen_string_literal: true + +require_relative "../spec_helper.rb" + +module SyntaxErrorSearch + RSpec.describe AroundBlockScan do + it "captures multiple empty and hidden lines" do + source_string = <<~EOM + def foo + Foo.call + + puts "lol" + + end + end + EOM + + code_lines = code_line_array(source_string) + block = CodeBlock.new(lines: code_lines[3]) + expand = AroundBlockScan.new(code_lines: code_lines, block: block) + expand.scan_while { true } + + expect(expand.before_index).to eq(0) + expect(expand.after_index).to eq(6) + expect(expand.code_block.to_s).to eq(source_string) + end + + it "only takes what you ask" do + source_string = <<~EOM + def foo + Foo.call + + puts "lol" + + end + end + EOM + + code_lines = code_line_array(source_string) + block = CodeBlock.new(lines: code_lines[3]) + expand = AroundBlockScan.new(code_lines: code_lines, block: block) + expand.scan_while {|line| line.not_empty? } + + expect(expand.code_block.to_s).to eq(<<~EOM.indent(4)) + puts "lol" + EOM + end + + it "skips what you want" do + source_string = <<~EOM + def foo + Foo.call + + puts "haha" + # hide me + + puts "lol" + + end + end + EOM + + code_lines = code_line_array(source_string) + code_lines[4].mark_invisible + + block = CodeBlock.new(lines: code_lines[3]) + expand = AroundBlockScan.new(code_lines: code_lines, block: block) + expand.skip(:empty?) + expand.skip(:hidden?) + expand.scan_while {|line| line.indent >= block.current_indent } + + expect(expand.code_block.to_s).to eq(<<~EOM.indent(4)) + + puts "haha" + + puts "lol" + + EOM + end + end +end diff --git a/spec/unit/block_expand_spec.rb b/spec/unit/block_expand_spec.rb new file mode 100644 index 0000000..500c61d --- /dev/null +++ b/spec/unit/block_expand_spec.rb @@ -0,0 +1,205 @@ +# frozen_string_literal: true + +require_relative "../spec_helper.rb" + + +module SyntaxErrorSearch + + RSpec.describe BlockExpand do + it "captures multiple empty and hidden lines" do + source_string = <<~EOM + def foo + Foo.call + + + puts "lol" + + # hidden + end + end + EOM + + code_lines = code_line_array(source_string) + + code_lines[6].mark_invisible + + block = CodeBlock.new(lines: [code_lines[3]]) + expansion = BlockExpand.new(code_lines: code_lines) + block = expansion.call(block) + + expect(block.to_s).to eq(<<~EOM.indent(4)) + + + puts "lol" + + EOM + end + + it "captures multiple empty lines" do + source_string = <<~EOM + def foo + Foo.call + + + puts "lol" + + end + end + EOM + + code_lines = code_line_array(source_string) + block = CodeBlock.new(lines: [code_lines[3]]) + expansion = BlockExpand.new(code_lines: code_lines) + block = expansion.call(block) + + expect(block.to_s).to eq(<<~EOM.indent(4)) + + + puts "lol" + + EOM + end + + it "expands neighbors then indentation" do + source_string = <<~EOM + def foo + Foo.call + puts "hey" + puts "lol" + puts "sup" + end + end + EOM + + code_lines = code_line_array(source_string) + block = CodeBlock.new(lines: [code_lines[3]]) + expansion = BlockExpand.new(code_lines: code_lines) + block = expansion.call(block) + + expect(block.to_s).to eq(<<~EOM.indent(4)) + puts "hey" + puts "lol" + puts "sup" + EOM + + block = expansion.call(block) + + expect(block.to_s).to eq(<<~EOM.indent(2)) + Foo.call + puts "hey" + puts "lol" + puts "sup" + end + EOM + end + + it "handles else code" do + source_string = <<~EOM + Foo.call + if blerg + puts "lol" + else + puts "haha" + end + end + EOM + + code_lines = code_line_array(source_string) + block = CodeBlock.new(lines: [code_lines[2]]) + expansion = BlockExpand.new(code_lines: code_lines) + block = expansion.call(block) + + expect(block.to_s).to eq(<<~EOM.indent(2)) + if blerg + puts "lol" + else + puts "haha" + end + EOM + + block = expansion.call(block) + end + + it "expand until next boundry (indentation)" do + source_string = <<~EOM + describe "what" do + Foo.call + end + + describe "hi" + Bar.call do + Foo.call + end + end + + it "blerg" do + end + EOM + + code_lines = code_line_array(source_string) + + block = CodeBlock.new( + lines: code_lines[6] + ) + + expansion = BlockExpand.new(code_lines: code_lines) + block = expansion.call(block) + + expect(block.to_s).to eq(<<~EOM.indent(2)) + Bar.call do + Foo.call + end + EOM + + block = expansion.call(block) + + expect(block.to_s).to eq(<<~EOM) + describe "hi" + Bar.call do + Foo.call + end + end + EOM + end + + + it "expand until next boundry (empty lines)" do + source_string = <<~EOM + describe "what" do + end + + describe "hi" + end + + it "blerg" do + end + EOM + + code_lines = code_line_array(source_string) + expansion = BlockExpand.new(code_lines: code_lines) + + block = CodeBlock.new(lines: code_lines[3]) + block = expansion.call(block) + + expect(block.to_s).to eq(<<~EOM) + + describe "hi" + end + + EOM + + block = expansion.call(block) + + expect(block.to_s).to eq(<<~EOM) + describe "what" do + end + + describe "hi" + end + + it "blerg" do + end + EOM + end + end +end diff --git a/spec/unit/code_block_spec.rb b/spec/unit/code_block_spec.rb index 5c19a09..d160af4 100644 --- a/spec/unit/code_block_spec.rb +++ b/spec/unit/code_block_spec.rb @@ -4,130 +4,6 @@ module SyntaxErrorSearch RSpec.describe CodeBlock do - - it "expand until next boundry (indentation)" do - source_string = <<~EOM - def foo - Foo.call - end - end - EOM - - code_lines = code_line_array(source_string) - - scan = IndentScan.new(code_lines: code_lines) - neighbors = scan.neighbors_from_top(code_lines[1]) - - block = CodeBlock.new( - lines: neighbors.last, - code_lines: code_lines - ) - - expect(block.valid?).to be_falsey - expect(block.to_s).to eq(<<~EOM.indent(2)) - end - EOM - - frontier = [] - - scan.each_neighbor_block(code_lines[1]) do |block| - if block.valid? - block.lines.map(&:mark_valid) - else - frontier << block - end - end - - expect(frontier.join).to eq(<<~EOM.indent(2)) - Foo.call - end - EOM - end - - it "expand until next boundry (indentation)" do - source_string = <<~EOM - describe "what" do - Foo.call - end - - describe "hi" - Bar.call do - Foo.call - end - end - - it "blerg" do - end - EOM - - code_lines = code_line_array(source_string) - - block = CodeBlock.new( - lines: code_lines[6], - code_lines: code_lines - ) - - block.expand_until_next_boundry - - expect(block.to_s).to eq(<<~EOM.indent(2)) - Bar.call do - Foo.call - end - EOM - - block.expand_until_next_boundry - - expect(block.to_s).to eq(<<~EOM) - - describe "hi" - Bar.call do - Foo.call - end - end - - EOM - end - - it "expand until next boundry (empty lines)" do - source_string = <<~EOM - describe "what" do - end - - describe "hi" - end - - it "blerg" do - end - EOM - - code_lines = code_line_array(source_string) - block = CodeBlock.new( - lines: code_lines[0], - code_lines: code_lines - ) - block.expand_until_next_boundry - - expect(block.to_s.strip).to eq(<<~EOM.strip) - describe "what" do - end - EOM - - block = CodeBlock.new( - lines: code_lines[3], - code_lines: code_lines - ) - block.expand_until_next_boundry - - expect(block.to_s.strip).to eq(<<~EOM.strip) - describe "hi" - end - EOM - - block.expand_until_next_boundry - - expect(block.to_s.strip).to eq(source_string.strip) - end - it "can detect if it's valid or not" do code_lines = code_line_array(<<~EOM) def foo @@ -135,7 +11,7 @@ def foo end EOM - block = CodeBlock.new(code_lines: code_lines, lines: code_lines[1]) + block = CodeBlock.new(lines: code_lines[1]) expect(block.valid?).to be_truthy end @@ -146,9 +22,9 @@ def foo end EOM - block_0 = CodeBlock.new(code_lines: code_lines, lines: code_lines[0]) - block_1 = CodeBlock.new(code_lines: code_lines, lines: code_lines[1]) - block_2 = CodeBlock.new(code_lines: code_lines, lines: code_lines[2]) + block_0 = CodeBlock.new(lines: code_lines[0]) + block_1 = CodeBlock.new(lines: code_lines[1]) + block_2 = CodeBlock.new(lines: code_lines[2]) expect(block_0 <=> block_0).to eq(0) expect(block_1 <=> block_0).to eq(1) @@ -157,7 +33,7 @@ def foo array = [block_2, block_1, block_0].sort expect(array.last).to eq(block_2) - block = CodeBlock.new(code_lines: code_lines, lines: CodeLine.new(line: " " * 8 + "foo", index: 4)) + block = CodeBlock.new(lines: CodeLine.new(line: " " * 8 + "foo", index: 4)) array.prepend(block) expect(array.sort.last).to eq(block) end @@ -169,17 +45,23 @@ def foo end EOM - block = CodeBlock.new(code_lines: code_lines, lines: code_lines[1]) + block = CodeBlock.new(lines: code_lines[1]) expect(block.current_indent).to eq(2) - expect(block.before_lines).to eq([code_lines[0]]) - expect(block.before_line).to eq(code_lines[0]) - expect(block.after_lines).to eq([code_lines[2]]) - expect(block.after_line).to eq(code_lines[2]) - block = CodeBlock.new(code_lines: code_lines, lines: code_lines[0]) + block = CodeBlock.new(lines: code_lines[0]) expect(block.current_indent).to eq(0) end + it "knows it's current indentation level when mismatched indents" do + code_lines = code_line_array(<<~EOM) + def foo + puts 'lol' + end + EOM + + block = CodeBlock.new(lines: [code_lines[1], code_lines[2]]) + expect(block.current_indent).to eq(1) + end it "before lines and after lines" do code_lines = code_line_array(<<~EOM) @@ -188,10 +70,8 @@ def foo end EOM - block = CodeBlock.new(code_lines: code_lines, lines: code_lines[1]) + block = CodeBlock.new(lines: code_lines[1]) expect(block.valid?).to be_falsey - expect(block.before_lines).to eq([code_lines[0]]) - expect(block.after_lines).to eq([code_lines[2]]) end end end diff --git a/spec/unit/code_frontier_spec.rb b/spec/unit/code_frontier_spec.rb index 292308c..3b621fc 100644 --- a/spec/unit/code_frontier_spec.rb +++ b/spec/unit/code_frontier_spec.rb @@ -4,42 +4,6 @@ module SyntaxErrorSearch RSpec.describe CodeFrontier do - it "search example" do - code_lines = code_line_array(<<~EOM) - describe "lol" do - foo - end - end - - it "lol" do - bar - end - end - EOM - - frontier = CodeFrontier.new(code_lines: code_lines) - frontier << frontier.next_block if frontier.next_block? - - until frontier.holds_all_syntax_errors? - frontier << frontier.next_block if frontier.next_block? - block = frontier.pop - - if block.valid? - block.lines.each(&:mark_invisible) - - else - block.expand_until_neighbors - frontier << block - end - end - - expect(frontier.detect_invalid_blocks.join).to eq(<<~EOM.indent(2)) - foo - end - bar - end - EOM - end it "detect_bad_blocks" do code_lines = code_line_array(<<~EOM) describe "lol" do @@ -53,8 +17,8 @@ module SyntaxErrorSearch frontier = CodeFrontier.new(code_lines: code_lines) blocks = [] - blocks << CodeBlock.new(lines: code_lines[1], code_lines: code_lines) - blocks << CodeBlock.new(lines: code_lines[5], code_lines: code_lines) + blocks << CodeBlock.new(lines: code_lines[1]) + blocks << CodeBlock.new(lines: code_lines[5]) blocks.each do |b| frontier << b end @@ -83,133 +47,87 @@ module SyntaxErrorSearch ) end - it "detects if multiple syntax errors are found" do - code_lines = code_line_array(<<~EOM) - def foo - end - end - EOM - - frontier = CodeFrontier.new(code_lines: code_lines) - - frontier << frontier.next_block if frontier.next_block? - block = frontier.pop - expect(block.to_s).to eq(<<~EOM.indent(2)) - end - EOM - frontier << block - - expect(frontier.holds_all_syntax_errors?).to be_truthy - end - - it "detects if it has not captured all syntax errors" do + it "doesn't duplicate blocks" do code_lines = code_line_array(<<~EOM) def foo puts "lol" - end - - describe "lol" - end - - it "lol" + puts "lol" + puts "lol" end EOM frontier = CodeFrontier.new(code_lines: code_lines) - frontier << frontier.next_block if frontier.next_block? - block = frontier.pop - expect(block.to_s).to eq(<<~EOM.indent(2)) + frontier << CodeBlock.new(lines: [code_lines[2]]) + expect(frontier.count).to eq(1) + + frontier << CodeBlock.new(lines: [code_lines[1],code_lines[2],code_lines[3]]) + expect(frontier.count).to eq(1) + expect(frontier.pop.to_s).to eq(<<~EOM.indent(2)) + puts "lol" + puts "lol" puts "lol" EOM - frontier << block - expect(frontier.holds_all_syntax_errors?).to be_falsey - end - - it "generates a block when popping" do code_lines = code_line_array(<<~EOM) def foo - puts "lol1" - puts "lol2" - puts "lol3" - - puts "lol4" + puts "lol" + puts "lol" + puts "lol" end EOM frontier = CodeFrontier.new(code_lines: code_lines) - frontier << frontier.next_block if frontier.next_block? - expect(frontier.pop.to_s).to eq(<<~EOM.indent(2)) - puts "lol1" - puts "lol2" - puts "lol3" + frontier << CodeBlock.new(lines: [code_lines[2]]) + expect(frontier.count).to eq(1) - EOM - - expect(frontier.generate_new_block?).to be_truthy - - frontier << frontier.next_block if frontier.next_block? + frontier << CodeBlock.new(lines: [code_lines[3]]) + expect(frontier.count).to eq(2) expect(frontier.pop.to_s).to eq(<<~EOM.indent(2)) - - puts "lol4" - EOM - - frontier << frontier.next_block if frontier.next_block? - expect(frontier.pop.to_s).to eq(<<~EOM) - def foo + puts "lol" EOM end - it "generates continuous block lines" do + it "detects if multiple syntax errors are found" do code_lines = code_line_array(<<~EOM) def foo - puts "lol1" - puts "lol2" - puts "lol3" - - puts "lol4" + end end EOM frontier = CodeFrontier.new(code_lines: code_lines) - block = frontier.next_block - expect(block.to_s).to eq(<<~EOM.indent(2)) - puts "lol1" - puts "lol2" - puts "lol3" - EOM - - expect(frontier.generate_new_block?).to be_truthy - - frontier << block - - block = frontier.next_block + frontier << CodeBlock.new(lines: code_lines[1]) + block = frontier.pop expect(block.to_s).to eq(<<~EOM.indent(2)) - - puts "lol4" + end EOM frontier << block - expect(frontier.generate_new_block?).to be_falsey + expect(frontier.holds_all_syntax_errors?).to be_truthy end - it "detects empty" do + it "detects if it has not captured all syntax errors" do code_lines = code_line_array(<<~EOM) def foo puts "lol" end - EOM - frontier = CodeFrontier.new(code_lines: code_lines) + describe "lol" + end - expect(frontier.empty?).to be_falsey - expect(frontier.any?).to be_truthy + it "lol" + end + EOM - frontier = CodeFrontier.new(code_lines: []) + frontier = CodeFrontier.new(code_lines: code_lines) + frontier << CodeBlock.new(lines: [code_lines[1]]) + block = frontier.pop + expect(block.to_s).to eq(<<~EOM.indent(2)) + puts "lol" + EOM + frontier << block - expect(frontier.empty?).to be_truthy - expect(frontier.any?).to be_falsey + expect(frontier.holds_all_syntax_errors?).to be_falsey end end end diff --git a/spec/unit/code_search_spec.rb b/spec/unit/code_search_spec.rb index 82ceece..7113c1d 100644 --- a/spec/unit/code_search_spec.rb +++ b/spec/unit/code_search_spec.rb @@ -69,15 +69,15 @@ def hello describe "real world cases" do it "finds hanging def in this project" do - search = CodeSearch.new( - fixtures_dir.join("this_project_extra_def.rb.txt").read, - ) + source_string = fixtures_dir.join("this_project_extra_def.rb.txt").read + search = CodeSearch.new(source_string) search.call blocks = search.invalid_blocks io = StringIO.new display = DisplayInvalidBlocks.new( + code_lines: search.code_lines, blocks: blocks, io: io, ) @@ -127,7 +127,12 @@ def hello blocks = search.invalid_blocks io = StringIO.new - display = DisplayInvalidBlocks.new(blocks: blocks, io: io, filename: "fake/spec/lol.rb") + display = DisplayInvalidBlocks.new( + io: io, + blocks: blocks, + code_lines: search.code_lines, + filename: "fake/spec/lol.rb" + ) display.call # io.string @@ -135,7 +140,8 @@ def hello 1 require 'rails_helper' 2 3 RSpec.describe AclassNameHere, type: :worker do - ❯ 12 + ❯ 4 describe "thing" do + ❯ 16 end # line 16 accidental end, but valid block ❯ 30 end # mismatched due to 16 31 end EOM @@ -181,9 +187,9 @@ def foo EOM search.call - # TODO improve here, grab the two end instead of one - expect(search.invalid_blocks.join).to eq(<<~EOM.indent(3)) - end # one + expect(search.invalid_blocks.join).to eq(<<~EOM) + Foo.call + end # two EOM end @@ -271,7 +277,6 @@ def foo it "doesn't just return an empty `end`" do search = CodeSearch.new(<<~EOM) Foo.call - end EOM search.call diff --git a/spec/unit/display_invalid_blocks_spec.rb b/spec/unit/display_invalid_blocks_spec.rb index c8f54fc..ecb3be0 100644 --- a/spec/unit/display_invalid_blocks_spec.rb +++ b/spec/unit/display_invalid_blocks_spec.rb @@ -14,11 +14,14 @@ def hai end EOM + search = CodeSearch.new(syntax_string) + search.call io = StringIO.new display = DisplayInvalidBlocks.new( - blocks: CodeSearch.new(syntax_string).call.invalid_blocks, + io: io, + blocks: search.invalid_blocks, terminal: false, - io: io + code_lines: search.code_lines, ) display.call expect(io.string).to include("Syntax OK") @@ -34,11 +37,12 @@ def hai EOM io = StringIO.new - block = CodeBlock.new(code_lines: code_lines, lines: code_lines[1]) + block = CodeBlock.new(lines: code_lines[1]) display = DisplayInvalidBlocks.new( + io: io, blocks: block, terminal: false, - io: io + code_lines: code_lines, ) display.call expect(io.string).to include("❯ 2 def hello") @@ -54,10 +58,11 @@ def hai end EOM - block = CodeBlock.new(code_lines: code_lines, lines: code_lines[1]) + block = CodeBlock.new(lines: code_lines[1]) display = DisplayInvalidBlocks.new( blocks: block, - terminal: false + terminal: false, + code_lines: code_lines ) expect(display.code_block).to eq(<<~EOM) 1 class OH @@ -76,10 +81,11 @@ def hai end EOM - block = CodeBlock.new(code_lines: code_lines, lines: code_lines[1]) + block = CodeBlock.new(lines: code_lines[1]) display = DisplayInvalidBlocks.new( blocks: block, - terminal: false + terminal: false, + code_lines: code_lines ) expect(display.code_with_lines).to eq( @@ -93,10 +99,11 @@ def hai ].join($/) ) - block = CodeBlock.new(code_lines: code_lines, lines: code_lines[1]) + block = CodeBlock.new(lines: code_lines[1]) display = DisplayInvalidBlocks.new( blocks: block, - terminal: true + terminal: true, + code_lines: code_lines ) expect(display.code_with_lines).to eq(