From 149a7e5a15c9650c10be023504eb3be74110afe9 Mon Sep 17 00:00:00 2001 From: schneems Date: Fri, 4 Dec 2020 10:50:44 -0600 Subject: [PATCH 1/2] Make debugging easier --- lib/syntax_search/code_search.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/syntax_search/code_search.rb b/lib/syntax_search/code_search.rb index 6d741c2..556bcf0 100644 --- a/lib/syntax_search/code_search.rb +++ b/lib/syntax_search/code_search.rb @@ -28,7 +28,7 @@ class CodeSearch private; attr_reader :frontier; public public; attr_reader :invalid_blocks, :record_dir, :code_lines - def initialize(source, record_dir: ENV["SYNTAX_SEARCH_RECORD_DIR"]) + def initialize(source, record_dir: ENV["SYNTAX_SEARCH_RECORD_DIR"] || ENV["DEBUG"] ? "tmp" : nil) @source = source if record_dir @time = Time.now.strftime('%Y-%m-%d-%H-%M-%s-%N') From bb9f7cc7f7c0b539aa6b7d08411b302bd15e6bb3 Mon Sep 17 00:00:00 2001 From: schneems Date: Fri, 4 Dec 2020 10:58:27 -0600 Subject: [PATCH 2/2] [Close #22] Lines are lexically aware of keywords CodeLines are now lexically aware of keywords. This is used by the AroundBlockScan to allow stopping after a keyword has been hit. In addition to stripping heredocs before searching, the CodeSearch now also strips lines that contain only comments. The combination of stripping heredocs and comments prevents false positives from lexing individual lines (since a comment might contain valid code i.e. `# def foo; end`. New APIs: - CodeLine#is_comment? - CodeLine#is_kw? - CodeLine#is_end? - AroundBlockScan.stop_after_kw - AroundBlockScan.scan_neighbors --- CHANGELOG.md | 1 + exe/syntax_search | 1 + lib/syntax_search/around_block_scan.rb | 36 ++++++++++++++++++-- lib/syntax_search/block_expand.rb | 11 ++---- lib/syntax_search/code_line.rb | 31 +++++++++++++++++ lib/syntax_search/code_search.rb | 8 ++++- spec/spec_helper.rb | 2 ++ spec/unit/around_block_scan_spec.rb | 46 +++++++++++++++++++++++++- spec/unit/code_line_spec.rb | 23 +++++++++++++ spec/unit/code_search_spec.rb | 23 ++++++++++--- spec/unit/exe_spec.rb | 10 ++---- 11 files changed, 168 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d941a26..ac08ded 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,6 @@ ## HEAD (unreleased) +- Block expansion is now lexically aware of keywords (def/do/end etc.) (https://github.com/zombocom/syntax_search/pull/24) - Fix bug where not all of a source is lexed which is used in heredoc detection/removal (https://github.com/zombocom/syntax_search/pull/23) ## 0.1.5 diff --git a/exe/syntax_search b/exe/syntax_search index d506a9b..5102f9d 100755 --- a/exe/syntax_search +++ b/exe/syntax_search @@ -62,6 +62,7 @@ if file.nil? || file.empty? end file = Pathname(file) +options[:record_dir] = "tmp" if ENV["DEBUG"] $stderr.puts "Record dir: #{options[:record_dir]}" if options[:record_dir] diff --git a/lib/syntax_search/around_block_scan.rb b/lib/syntax_search/around_block_scan.rb index 40f0e49..837dbc1 100644 --- a/lib/syntax_search/around_block_scan.rb +++ b/lib/syntax_search/around_block_scan.rb @@ -32,9 +32,11 @@ def initialize(code_lines: , block:) @code_lines = code_lines @orig_before_index = block.lines.first.index @orig_after_index = block.lines.last.index + @orig_indent = block.current_indent @skip_array = [] @after_array = [] @before_array = [] + @stop_after_kw = false end def skip(name) @@ -42,28 +44,58 @@ def skip(name) self end + def stop_after_kw + @stop_after_kw = true + self + end + def scan_while(&block) + stop_next = false + + kw_count = 0 + end_count = 0 @before_index = before_lines.reverse_each.take_while do |line| + next false if stop_next next true if @skip_array.detect {|meth| line.send(meth) } + kw_count += 1 if line.is_kw? + end_count += 1 if line.is_end? + if @stop_after_kw && kw_count > end_count + stop_next = true + end + block.call(line) end.reverse.first&.index + stop_next = false + kw_count = 0 + end_count = 0 @after_index = after_lines.take_while do |line| + next false if stop_next next true if @skip_array.detect {|meth| line.send(meth) } + kw_count += 1 if line.is_kw? + end_count += 1 if line.is_end? + if @stop_after_kw && end_count > kw_count + stop_next = true + end + block.call(line) end.last&.index self end + def scan_neighbors + self.scan_while {|line| line.not_empty? && line.indent >= @orig_indent } + end + def scan_adjacent_indent before_indent = @code_lines[@orig_before_index.pred]&.indent || 0 after_indent = @code_lines[@orig_after_index.next]&.indent || 0 + indent = [before_indent, after_indent].min - @before_index = before_index.pred if before_indent >= indent - @after_index = after_index.next if after_indent >= indent + self.scan_while {|line| line.not_empty? && line.indent >= indent } self end diff --git a/lib/syntax_search/block_expand.rb b/lib/syntax_search/block_expand.rb index fc1c989..3950539 100644 --- a/lib/syntax_search/block_expand.rb +++ b/lib/syntax_search/block_expand.rb @@ -44,21 +44,16 @@ def call(block) def expand_indent(block) block = AroundBlockScan.new(code_lines: @code_lines, block: block) + .stop_after_kw .scan_adjacent_indent .code_block - - # Handle if/else/end case - if (next_block = expand_neighbors(block, grab_empty: false)) - return next_block - else - return block - end end def expand_neighbors(block, grab_empty: true) scan = AroundBlockScan.new(code_lines: @code_lines, block: block) .skip(:hidden?) - .scan_while {|line| line.not_empty? && line.indent >= block.current_indent } + .stop_after_kw + .scan_neighbors # Slurp up empties if grab_empty diff --git a/lib/syntax_search/code_line.rb b/lib/syntax_search/code_line.rb index 8b6fa5e..4b6cce4 100644 --- a/lib/syntax_search/code_line.rb +++ b/lib/syntax_search/code_line.rb @@ -39,6 +39,37 @@ def initialize(line: , index:) @indent = SpaceCount.indent(line) @status = nil # valid, invalid, unknown @invalid = false + + @kw_count = 0 + @end_count = 0 + @lex = LexAll.new(source: line) + @lex.each do |lex| + next unless lex.type == :on_kw + + case lex.token + when 'def', 'case', 'for', 'begin', 'class', 'module', 'if', 'unless', 'while', 'until' , 'do' + @kw_count += 1 + when 'end' + @end_count += 1 + end + end + + @is_comment = true if @lex.detect {|lex| lex.type != :on_sp}&.type == :on_comment + + @is_kw = (@kw_count - @end_count) > 0 + @is_end = (@end_count - @kw_count) > 0 + end + + def is_comment? + @is_comment + end + + def is_kw? + @is_kw + end + + def is_end? + @is_end end def mark_invalid diff --git a/lib/syntax_search/code_search.rb b/lib/syntax_search/code_search.rb index 556bcf0..4f01640 100644 --- a/lib/syntax_search/code_search.rb +++ b/lib/syntax_search/code_search.rb @@ -108,7 +108,6 @@ def expand_invalid_block push(block, name: "expand") end - def sweep_heredocs HeredocBlockParse.new( source: @source, @@ -118,9 +117,16 @@ def sweep_heredocs end end + def sweep_comments + @code_lines.select(&:is_comment?).each do |line| + line.mark_invisible + end + end + # Main search loop def call sweep_heredocs + sweep_comments until frontier.holds_all_syntax_errors? @tick += 1 diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index d9cf0bd..b6394ba 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -3,6 +3,8 @@ require "bundler/setup" require "syntax_search" +require 'tempfile' + RSpec.configure do |config| # Enable flags like --only-failures and --next-failure config.example_status_persistence_file_path = ".rspec_status" diff --git a/spec/unit/around_block_scan_spec.rb b/spec/unit/around_block_scan_spec.rb index 6b46f0e..93401e9 100644 --- a/spec/unit/around_block_scan_spec.rb +++ b/spec/unit/around_block_scan_spec.rb @@ -4,6 +4,50 @@ module SyntaxErrorSearch RSpec.describe AroundBlockScan do + it "expands indentation" do + source_string = <<~EOM + def foo + if [options.output_format_tty, options.output_format_block].include?(nil) + raise("Bad output mode '\#{v}'; each must be one of \#{lookups.output_formats.keys}.") + end + end + EOM + + code_lines = code_line_array(source_string) + block = CodeBlock.new(lines: code_lines[2]) + expand = AroundBlockScan.new(code_lines: code_lines, block: block) + .stop_after_kw + .scan_adjacent_indent + + expect(expand.code_block.to_s).to eq(<<~EOM.indent(2)) + if [options.output_format_tty, options.output_format_block].include?(nil) + raise("Bad output mode '\#{v}'; each must be one of \#{lookups.output_formats.keys}.") + end + EOM + end + + it "can stop before hitting another end" do + source_string = <<~EOM + def lol + end + def foo + puts "lol" + end + EOM + + code_lines = code_line_array(source_string) + block = CodeBlock.new(lines: code_lines[3]) + expand = AroundBlockScan.new(code_lines: code_lines, block: block) + expand.stop_after_kw + expand.scan_while {true} + + expect(expand.code_block.to_s).to eq(<<~EOM) + def foo + puts "lol" + end + EOM + end + it "captures multiple empty and hidden lines" do source_string = <<~EOM def foo @@ -67,7 +111,7 @@ def foo expand = AroundBlockScan.new(code_lines: code_lines, block: block) expand.skip(:empty?) expand.skip(:hidden?) - expand.scan_while {|line| line.indent >= block.current_indent } + expand.scan_neighbors expect(expand.code_block.to_s).to eq(<<~EOM.indent(4)) diff --git a/spec/unit/code_line_spec.rb b/spec/unit/code_line_spec.rb index 5a82ed3..73664d9 100644 --- a/spec/unit/code_line_spec.rb +++ b/spec/unit/code_line_spec.rb @@ -4,6 +4,29 @@ module SyntaxErrorSearch RSpec.describe CodeLine do + it "knows it's a comment" do + line = CodeLine.new(line: " # iama comment", index: 0) + expect(line.is_comment?).to be_truthy + expect(line.is_end?).to be_falsey + expect(line.is_kw?).to be_falsey + end + + it "knows it's got an end" do + line = CodeLine.new(line: " end", index: 0) + + expect(line.is_comment?).to be_falsey + expect(line.is_end?).to be_truthy + expect(line.is_kw?).to be_falsey + end + + it "knows it's got a keyword" do + line = CodeLine.new(line: " if", index: 0) + + expect(line.is_comment?).to be_falsey + expect(line.is_end?).to be_falsey + expect(line.is_kw?).to be_truthy + end + it "can be marked as invalid or valid" do code_lines = code_line_array(<<~EOM) def foo diff --git a/spec/unit/code_search_spec.rb b/spec/unit/code_search_spec.rb index 7113c1d..fa3b85d 100644 --- a/spec/unit/code_search_spec.rb +++ b/spec/unit/code_search_spec.rb @@ -4,6 +4,24 @@ module SyntaxErrorSearch RSpec.describe CodeSearch do + it "handles no spaces between blocks" do + search = CodeSearch.new(<<~EOM) + context "timezones workaround" do + it "should receive a time in UTC format and return the time with the" do + travel_to DateTime.new(2020, 10, 1, 10, 0, 0) do + end + end + end + context "test" do + it "should" do + end + EOM + + search.call + + expect(search.invalid_blocks.join.strip).to eq('it "should" do') + end + it "recording" do Dir.mktmpdir do |dir| dir = Pathname(dir) @@ -140,8 +158,6 @@ def hello 1 require 'rails_helper' 2 3 RSpec.describe AclassNameHere, type: :worker do - ❯ 4 describe "thing" do - ❯ 16 end # line 16 accidental end, but valid block ❯ 30 end # mismatched due to 16 31 end EOM @@ -227,11 +243,8 @@ def foo EOM search.call - # TODO improve here, eliminate inner def foo expect(search.invalid_blocks.join).to eq(<<~EOM) Foo.call - def foo - end end EOM end diff --git a/spec/unit/exe_spec.rb b/spec/unit/exe_spec.rb index c0dee01..92be5fe 100644 --- a/spec/unit/exe_spec.rb +++ b/spec/unit/exe_spec.rb @@ -27,20 +27,16 @@ def exe(cmd) end it "handles heredocs" do - Dir.mktmpdir do |dir| - dir = Pathname(dir) + Tempfile.create do |file| lines = fixtures_dir.join("rexe.rb").read.lines lines.delete_at(85 - 1) - ruby_file = dir.join("tmp.rb") - ruby_file.write(lines.join) + Pathname(file.path).write(lines.join) - out = exe("#{ruby_file} --no-terminal") + out = exe("#{file.path} --no-terminal") expect(out.strip).to include(<<~EOM.indent(4)) 77 class Lookups ❯ 78 def input_modes - ❯ 87 def input_formats - ❯ 94 end EOM end end