From 45e1c9586467043ab4758ad78c4451f14e81f044 Mon Sep 17 00:00:00 2001 From: schneems Date: Thu, 13 Jan 2022 15:56:01 -0600 Subject: [PATCH 1/2] Refactor UnvisitedLines logic to own class The logic that tracks if the frontier has created a block based on a given line or not is tightly coupled to the Frontier class. By moving it out we enable re-use and possible ergonomic or performance optimizations as well as allowing separation of concerns for testing. --- lib/dead_end/api.rb | 1 + lib/dead_end/code_frontier.rb | 23 ++++++++------------- lib/dead_end/unvisited_lines.rb | 36 +++++++++++++++++++++++++++++++++ 3 files changed, 45 insertions(+), 15 deletions(-) create mode 100644 lib/dead_end/unvisited_lines.rb diff --git a/lib/dead_end/api.rb b/lib/dead_end/api.rb index f457862..99b754b 100644 --- a/lib/dead_end/api.rb +++ b/lib/dead_end/api.rb @@ -190,6 +190,7 @@ def self.valid?(source) require_relative "block_expand" require_relative "ripper_errors" require_relative "priority_queue" +require_relative "unvisited_lines" require_relative "around_block_scan" require_relative "pathname_from_message" require_relative "display_invalid_blocks" diff --git a/lib/dead_end/code_frontier.rb b/lib/dead_end/code_frontier.rb index 920ffcb..c6f56cb 100644 --- a/lib/dead_end/code_frontier.rb +++ b/lib/dead_end/code_frontier.rb @@ -50,13 +50,12 @@ module DeadEnd # CodeFrontier#detect_invalid_blocks # class CodeFrontier - def initialize(code_lines:) + + def initialize(code_lines:, unvisited: UnvisitedLines.new(code_lines: code_lines)) @code_lines = code_lines - @frontier = PriorityQueue.new - @unvisited_lines = @code_lines.sort_by(&:indent_index) - @visited_lines = {} + @unvisited = unvisited + @queue = PriorityEngulfQueue.new - @has_run = false @check_next = true end @@ -107,12 +106,12 @@ def pop end def next_indent_line - @unvisited_lines.last + @unvisited.peek end def expand? return false if @frontier.empty? - return true if @unvisited_lines.to_a.empty? + return true if @unvisited.empty? frontier_indent = @frontier.peek.current_indent unvisited_indent = next_indent_line.indent @@ -132,13 +131,7 @@ def expand? # Keeps track of what lines have been added to blocks and which are not yet # visited. def register_indent_block(block) - block.lines.each do |line| - next if @visited_lines[line] - @visited_lines[line] = true - - index = @unvisited_lines.bsearch_index { |l| line.indent_index <=> l.indent_index } - @unvisited_lines.delete_at(index) - end + @unvisited.visit_block(block) self end @@ -171,7 +164,7 @@ def register_engulf_block(block) # and that each code block's lines are removed from the indentation hash so we # don't re-evaluate the same line multiple times. def <<(block) - register_indent_block(block) + @unvisited.visit_block(block) register_engulf_block(block) @check_next = true if block.invalid? diff --git a/lib/dead_end/unvisited_lines.rb b/lib/dead_end/unvisited_lines.rb new file mode 100644 index 0000000..e616d85 --- /dev/null +++ b/lib/dead_end/unvisited_lines.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +module DeadEnd + # Tracks which lines various code blocks have expanded to + # and which are still unexplored + class UnvisitedLines + def initialize(code_lines: ) + @unvisited = code_lines.sort_by(&:indent_index) + @visited_lines = {} + @visited_lines.compare_by_identity + end + + def empty? + @unvisited.empty? + end + + def peek + @unvisited.last + end + + def pop + @unvisited.pop + end + + def visit_block(block) + block.lines.each do |line| + next if @visited_lines[line] + @visited_lines[line] = true + end + + while @visited_lines[@unvisited.last] + @unvisited.pop + end + end + end +end From b2ce3f73697d0c50337d37888b80b6ac4247bea5 Mon Sep 17 00:00:00 2001 From: schneems Date: Thu, 13 Jan 2022 16:31:22 -0600 Subject: [PATCH 2/2] Move queue logic and engulf logic to a class The queuing and engulfing logic are tied together. We can bundle the two of them into a single class with a specific interface. --- lib/dead_end/api.rb | 1 + lib/dead_end/code_frontier.rb | 36 ++++----------- lib/dead_end/priority_engulf_queue.rb | 63 +++++++++++++++++++++++++++ lib/dead_end/unvisited_lines.rb | 2 +- 4 files changed, 74 insertions(+), 28 deletions(-) create mode 100644 lib/dead_end/priority_engulf_queue.rb diff --git a/lib/dead_end/api.rb b/lib/dead_end/api.rb index 99b754b..0a52d28 100644 --- a/lib/dead_end/api.rb +++ b/lib/dead_end/api.rb @@ -192,6 +192,7 @@ def self.valid?(source) require_relative "priority_queue" require_relative "unvisited_lines" require_relative "around_block_scan" +require_relative "priority_engulf_queue" require_relative "pathname_from_message" require_relative "display_invalid_blocks" require_relative "parse_blocks_from_indent_line" diff --git a/lib/dead_end/code_frontier.rb b/lib/dead_end/code_frontier.rb index c6f56cb..f9e6920 100644 --- a/lib/dead_end/code_frontier.rb +++ b/lib/dead_end/code_frontier.rb @@ -50,7 +50,6 @@ module DeadEnd # CodeFrontier#detect_invalid_blocks # class CodeFrontier - def initialize(code_lines:, unvisited: UnvisitedLines.new(code_lines: code_lines)) @code_lines = code_lines @unvisited = unvisited @@ -60,7 +59,7 @@ def initialize(code_lines:, unvisited: UnvisitedLines.new(code_lines: code_lines end def count - @frontier.length + @queue.length end # Performance optimization @@ -87,7 +86,7 @@ def count # removed. By default it checks all blocks in present in # the frontier array, but can be used for arbitrary arrays # of codeblocks as well - def holds_all_syntax_errors?(block_array = @frontier, can_cache: true) + def holds_all_syntax_errors?(block_array = @queue, can_cache: true) return false if can_cache && can_skip_check? without_lines = block_array.to_a.flat_map do |block| @@ -102,7 +101,7 @@ def holds_all_syntax_errors?(block_array = @frontier, can_cache: true) # Returns a code block with the largest indentation possible def pop - @frontier.pop + @queue.pop end def next_indent_line @@ -110,15 +109,15 @@ def next_indent_line end def expand? - return false if @frontier.empty? + return false if @queue.empty? return true if @unvisited.empty? - frontier_indent = @frontier.peek.current_indent + frontier_indent = @queue.peek.current_indent unvisited_indent = next_indent_line.indent if ENV["DEBUG"] puts "```" - puts @frontier.peek.to_s + puts @queue.peek.to_s puts "```" puts " @frontier indent: #{frontier_indent}" puts " @unvisited indent: #{unvisited_indent}" @@ -139,23 +138,6 @@ def register_indent_block(block) # block from the frontier. This prevents double expansions and all-around # weird behavior. However this guarantee is quite expensive to maintain def register_engulf_block(block) - # If we're about to pop off the same block, we can skip deleting - # things from the frontier this iteration since we'll get it - # on the next iteration - return if @frontier.peek && (block <=> @frontier.peek) == 1 - - if block.starts_at != block.ends_at # A block of size 1 cannot engulf another - @frontier.to_a.each { |b| - if b.starts_at >= block.starts_at && b.ends_at <= block.ends_at - b.delete - true - end - } - end - - while (last = @frontier.peek) && last.deleted? - @frontier.pop - end end # Add a block to the frontier @@ -165,10 +147,10 @@ def register_engulf_block(block) # don't re-evaluate the same line multiple times. def <<(block) @unvisited.visit_block(block) - register_engulf_block(block) + + @queue.push(block) @check_next = true if block.invalid? - @frontier << block self end @@ -188,7 +170,7 @@ def self.combination(array) # Given that we know our syntax error exists somewhere in our frontier, we want to find # the smallest possible set of blocks that contain all the syntax errors def detect_invalid_blocks - self.class.combination(@frontier.to_a.select(&:invalid?)).detect do |block_array| + self.class.combination(@queue.to_a.select(&:invalid?)).detect do |block_array| holds_all_syntax_errors?(block_array, can_cache: false) end || [] end diff --git a/lib/dead_end/priority_engulf_queue.rb b/lib/dead_end/priority_engulf_queue.rb new file mode 100644 index 0000000..6254609 --- /dev/null +++ b/lib/dead_end/priority_engulf_queue.rb @@ -0,0 +1,63 @@ +# frozen_string_literal: true + +module DeadEnd + # Keeps track of what elements are in the queue in + # priority and also ensures that when one element + # engulfs/covers/eats another that the larger element + # evicts the smaller element + class PriorityEngulfQueue + def initialize + @queue = PriorityQueue.new + end + + def to_a + @queue.to_a + end + + def empty? + @queue.empty? + end + + def length + @queue.length + end + + def peek + @queue.peek + end + + def pop + @queue.pop + end + + def push(block) + prune_engulf(block) + @queue << block + flush_deleted + + self + end + + private def flush_deleted + while @queue&.peek&.deleted? + @queue.pop + end + end + + private def prune_engulf(block) + # If we're about to pop off the same block, we can skip deleting + # things from the frontier this iteration since we'll get it + # on the next iteration + return if @queue.peek && (block <=> @queue.peek) == 1 + + if block.starts_at != block.ends_at # A block of size 1 cannot engulf another + @queue.to_a.each { |b| + if b.starts_at >= block.starts_at && b.ends_at <= block.ends_at + b.delete + true + end + } + end + end + end +end diff --git a/lib/dead_end/unvisited_lines.rb b/lib/dead_end/unvisited_lines.rb index e616d85..3f5b496 100644 --- a/lib/dead_end/unvisited_lines.rb +++ b/lib/dead_end/unvisited_lines.rb @@ -4,7 +4,7 @@ module DeadEnd # Tracks which lines various code blocks have expanded to # and which are still unexplored class UnvisitedLines - def initialize(code_lines: ) + def initialize(code_lines:) @unvisited = code_lines.sort_by(&:indent_index) @visited_lines = {} @visited_lines.compare_by_identity