From 2ed4c148d8c6f662be676125f7cbabf4e28125f2 Mon Sep 17 00:00:00 2001 From: schneems Date: Wed, 10 Nov 2021 20:04:38 -0600 Subject: [PATCH 1/4] Combine lex detection loops There are several places where lex items are looped through. This commit moves them all into one single loop so we only iterate over every element once instead of N times. (N=3) --- lib/dead_end/code_line.rb | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/lib/dead_end/code_line.rb b/lib/dead_end/code_line.rb index 8718fdb..f4b9e96 100644 --- a/lib/dead_end/code_line.rb +++ b/lib/dead_end/code_line.rb @@ -53,17 +53,8 @@ def initialize(line:, index:, lex:) @indent = SpaceCount.indent(line) end - kw_count = 0 - end_count = 0 - @lex.each do |lex| - kw_count += 1 if lex.is_kw? - end_count += 1 if lex.is_end? - end - - kw_count -= oneliner_method_count - @is_kw = (kw_count - end_count) > 0 - @is_end = (end_count - kw_count) > 0 + set_kw_end end # Used for stable sort via indentation level @@ -179,8 +170,7 @@ def <=>(other) # # For some reason this introduces `on_ignore_newline` but with BEG type def ignore_newline_not_beg? - lex_value = lex.detect { |l| l.type == :on_ignored_nl } - !!(lex_value && !lex_value.expr_beg?) + @ignore_newline_not_beg end # Determines if the given line has a trailing slash @@ -206,11 +196,22 @@ def trailing_slash? # # ENDFN -> BEG (token = '=' ) -> END # - private def oneliner_method_count + private def set_kw_end oneliner_count = 0 in_oneliner_def = nil + kw_count = 0 + end_count = 0 + + @ignore_newline_not_beg = false @lex.each do |lex| + kw_count += 1 if lex.is_kw? + end_count += 1 if lex.is_end? + + if lex.type == :on_ignored_nl + @ignore_newline_not_beg = !lex.expr_beg? + end + if in_oneliner_def.nil? in_oneliner_def = :ENDFN if lex.state.allbits?(Ripper::EXPR_ENDFN) elsif lex.state.allbits?(Ripper::EXPR_ENDFN) @@ -227,7 +228,10 @@ def trailing_slash? end end - oneliner_count + kw_count -= oneliner_count + + @is_kw = (kw_count - end_count) > 0 + @is_end = (end_count - kw_count) > 0 end end end From ac2bcc77bcdbecdd21b9b41d8b98232aa04f86c7 Mon Sep 17 00:00:00 2001 From: schneems Date: Wed, 10 Nov 2021 20:21:48 -0600 Subject: [PATCH 2/4] Only generate lines once There are several places where we call `String#lines` on the source code. Instead of splitting the same source multiple times we can split it once and pass that array in via dependency injection. --- lib/dead_end/clean_document.rb | 6 +++--- lib/dead_end/code_line.rb | 7 ++++--- lib/dead_end/lex_all.rb | 6 +++--- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/lib/dead_end/clean_document.rb b/lib/dead_end/clean_document.rb index d83e6c2..e4c5f18 100644 --- a/lib/dead_end/clean_document.rb +++ b/lib/dead_end/clean_document.rb @@ -85,8 +85,8 @@ module DeadEnd # class CleanDocument def initialize(source:) - @source = clean_sweep(source: source) - @document = CodeLine.from_source(@source) + lines = clean_sweep(source: source) + @document = CodeLine.from_source(lines.join, lines: lines) end # Call all of the document "cleaners" @@ -161,7 +161,7 @@ def clean_sweep(source:) else line end - end.join + end end # Smushes all heredoc lines into one line diff --git a/lib/dead_end/code_line.rb b/lib/dead_end/code_line.rb index f4b9e96..7cca3e0 100644 --- a/lib/dead_end/code_line.rb +++ b/lib/dead_end/code_line.rb @@ -26,9 +26,10 @@ class CodeLine # Returns an array of CodeLine objects # from the source string - def self.from_source(source) - lex_array_for_line = LexAll.new(source: source).each_with_object(Hash.new { |h, k| h[k] = [] }) { |lex, hash| hash[lex.line] << lex } - source.lines.map.with_index do |line, index| + def self.from_source(source, lines: nil) + lines ||= source.lines + lex_array_for_line = LexAll.new(source: source, source_lines: lines).each_with_object(Hash.new { |h, k| h[k] = [] }) { |lex, hash| hash[lex.line] << lex } + lines.map.with_index do |line, index| CodeLine.new( line: line, index: index, diff --git a/lib/dead_end/lex_all.rb b/lib/dead_end/lex_all.rb index 8bee05a..1fe8767 100644 --- a/lib/dead_end/lex_all.rb +++ b/lib/dead_end/lex_all.rb @@ -8,11 +8,11 @@ module DeadEnd class LexAll include Enumerable - def initialize(source:) + def initialize(source:, source_lines: nil) @lex = Ripper.lex(source) lineno = @lex.last.first.first + 1 - source_lines = source.lines - last_lineno = source_lines.count + source_lines ||= source.lines + last_lineno = source_lines.length until lineno >= last_lineno lines = source_lines[lineno..-1] From 6eddfaf6790b252767a9314fbbd19c0c39e3c6e3 Mon Sep 17 00:00:00 2001 From: schneems Date: Wed, 10 Nov 2021 20:46:45 -0600 Subject: [PATCH 3/4] Faster indent detection Before: 0.140603 0.003740 0.144343 ( 0.145287) After: 0.128684 0.003244 0.131928 ( 0.132673) Before: ![](https://www.dropbox.com/s/smiyw7imd1319a3/Screen%20Shot%202021-11-10%20at%208.48.14%20PM.png?raw=1) After: ![](https://www.dropbox.com/s/23sswxyol7s9yli/Screen%20Shot%202021-11-10%20at%208.51.53%20PM.png?raw=1) Calling `String#split` allocates many objects, we are already stripping the string to see if it is empty, we can use this same info to determine the indentation count. --- lib/dead_end/code_line.rb | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/dead_end/code_line.rb b/lib/dead_end/code_line.rb index 7cca3e0..6520518 100644 --- a/lib/dead_end/code_line.rb +++ b/lib/dead_end/code_line.rb @@ -43,18 +43,19 @@ def initialize(line:, index:, lex:) @lex = lex @line = line @index = index - @original = line.freeze + @original = line @line_number = @index + 1 + strip_line = line.dup + strip_line.lstrip! - if line.strip.empty? + if strip_line.empty? @empty = true @indent = 0 else @empty = false - @indent = SpaceCount.indent(line) + @indent = line.length - strip_line.length end - set_kw_end end From d2d992bc16bccbbdf2cd83ba26abf5cc349e2958 Mon Sep 17 00:00:00 2001 From: schneems Date: Fri, 5 Nov 2021 08:36:39 -0500 Subject: [PATCH 4/4] Use raw Lexer::Elem Related to https://github.com/ruby/ruby/pull/5093 Instead of building a bunch of objects then converting them into arrays, we can natively use the objects. Total allocated: 25407256 bytes (261284 objects) Total retained: 1937110 bytes (30266 objects) allocated memory by location ----------------------------------- 10045332 :90 5273752 Documents/projects/dead_end/lib/dead_end/code_frontier.rb:131 2267256 Documents/projects/dead_end/lib/dead_end.rb:65 986760 .rubies/ruby-3.0.2/lib/ruby/3.0.0/ripper/lexer.rb:189 938301 .rubies/ruby-3.0.2/lib/ruby/3.0.0/ripper/lexer.rb:132 734800 Documents/projects/dead_end/lib/dead_end/lex_all.rb:24 668000 .rubies/ruby-3.0.2/lib/ruby/3.0.0/ripper/lexer.rb:97 <== HERE 471980 Documents/projects/dead_end/lib/dead_end.rb:97 334040 .rubies/ruby-3.0.2/lib/ruby/3.0.0/ripper/lexer.rb:58 334040 .rubies/ruby-3.0.2/lib/ruby/3.0.0/ripper/lexer.rb:72 After Total allocated: 19648672 bytes (252933 objects) Total retained: 1944374 bytes (30266 objects) allocated memory by location ----------------------------------- 10045332 :90 5273752 Documents/projects/dead_end/lib/dead_end/code_frontier.rb:131 2267256 Documents/projects/dead_end/lib/dead_end.rb:65 986760 .rubies/ruby-3.0.2/lib/ruby/3.0.0/ripper/lexer.rb:189 938301 .rubies/ruby-3.0.2/lib/ruby/3.0.0/ripper/lexer.rb:132 734800 Documents/projects/dead_end/lib/dead_end/lex_all.rb:26 471980 Documents/projects/dead_end/lib/dead_end.rb:97 334040 .rubies/ruby-3.0.2/lib/ruby/3.0.0/ripper/lexer.rb:58 334040 .rubies/ruby-3.0.2/lib/ruby/3.0.0/ripper/lexer.rb:72 329051 Documents/projects/dead_end/lib/dead_end/code_line.rb:31 218144 Documents/projects/dead_end/lib/dead_end/code_line.rb:30 --- lib/dead_end/lex_all.rb | 14 +++++++++----- lib/dead_end/lex_value.rb | 2 ++ 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/lib/dead_end/lex_all.rb b/lib/dead_end/lex_all.rb index 1fe8767..08973ce 100644 --- a/lib/dead_end/lex_all.rb +++ b/lib/dead_end/lex_all.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + module DeadEnd # Ripper.lex is not guaranteed to lex the entire source document # @@ -9,19 +11,21 @@ class LexAll include Enumerable def initialize(source:, source_lines: nil) - @lex = Ripper.lex(source) - lineno = @lex.last.first.first + 1 + @lex = Ripper::Lexer.new(source, "-", 1).parse.sort_by(&:pos) + lineno = @lex.last.pos.first + 1 source_lines ||= source.lines last_lineno = source_lines.length until lineno >= last_lineno lines = source_lines[lineno..-1] - @lex.concat(Ripper.lex(lines.join, "-", lineno + 1)) - lineno = @lex.last.first.first + 1 + @lex.concat( + Ripper::Lexer.new(lines.join, "-", lineno + 1).parse.sort_by(&:pos) + ) + lineno = @lex.last.pos.first + 1 end - @lex.map! { |(line, _), type, token, state| LexValue.new(line, type, token, state) } + @lex.map! { |elem| LexValue.new(elem.pos.first, elem.event, elem.tok, elem.state) } end def to_a diff --git a/lib/dead_end/lex_value.rb b/lib/dead_end/lex_value.rb index 2ddb9ea..3119953 100644 --- a/lib/dead_end/lex_value.rb +++ b/lib/dead_end/lex_value.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + module DeadEnd # Value object for accessing lex values #