From baf8bf98dfbffa3e233cab606c3849d87d1e28f4 Mon Sep 17 00:00:00 2001
From: schneems <richard.scheeman+foo@gmail.com>
Date: Fri, 8 Oct 2021 14:00:17 -0500
Subject: [PATCH] ## [close #64 #76] Refactor CodeLine add CleanDocument
 "sweep" class

## Refactor CodeLine

Previously CodeLine knew how to "lex" itself. This strategy worked for many cases but failed pretty hard in cases where context can make the same line return drastically different results.

For example, by itself this line:

```
EOM
```

Will be "lex"-d into a constant while more lines:

```
foo = <<~EOM
  Hello world
EOM
```

It would be understood as a heredoc.

## Distinct "clean" phase added via `CleanDocument`

Even after we've "injected" the lexical information into `CodeLine` based on lexing the entire document, there are still problems with special cases such as #64, where multiple lines represent a single logical operation. Heredocs are also painful to work with.

Previously, a HeredocBlockParse would put all heredocs into a `CodeBlock` at the beginning of the `CodeSearch`. There was also a `TrailingSlashJoin` and a method for removing code comments.

All of that functionality has been moved to a single class, `CleanDocument`, responsible for prepping the source to be in a good state to be fed into `CodeSearch`.

## Join consecutive logical blocks in `CleanDocument`

To close #64, the `CleanDocument` can use information in the lex line to join multiple "consecutive" lines like:

```
User.
  where(name: 'schneems')
```

This code style is a problem because the second line looks like valid ruby code in isolation, but if it's removed, an error might be introduced (due to the trailing dot on `User.`)

More notes around this here https://gist.github.com/schneems/6a7d7f988d3329fb3bd4b5be3e2efc0c and #76.

Essentially it uses lex information to join into a single line:

```
"User.\nwhere(name: 'schneems')\n"
```

Once in a single line, the search algorithm cannot accidentally introduce an error into the document by removing part of it.

## CaptureCodeContext

This class is now being billed as a "third" phase for re-introducing ambiguity where it should logically exist. Previously we were handling the case where a keyword was missing an end at the end of a block like:

```
class Dog
  def bark
end
```

It was also determined there's a logical inverse of this that wasn't being covered:

```
class Dog
  end
end
```

It's now handled.

For the `capture_before_after_kws` method, it was determined that it only needed to remove ambiguity if there's only one visible line. Adding this check cleaned up several outputs and didn't remove any critical (or ambiguous) lines.


## Other

- Add a bunch of docs all over the place
- Add deprecation to requiring `dead_end/fyi`
- Move LexValue class to its own file
---
 CHANGELOG.md                                  |   3 +
 lib/dead_end/around_block_scan.rb             |  13 +-
 lib/dead_end/capture_code_context.rb          | 139 +++++++-
 lib/dead_end/clean_document.rb                | 313 ++++++++++++++++++
 lib/dead_end/code_frontier.rb                 |  37 ++-
 lib/dead_end/code_line.rb                     | 201 ++++++-----
 lib/dead_end/code_search.rb                   |  50 +--
 .../display_code_with_line_numbers.rb         |   1 -
 lib/dead_end/fyi.rb                           |   2 +
 lib/dead_end/heredoc_block_parse.rb           |  34 --
 lib/dead_end/internals.rb                     |  18 +-
 lib/dead_end/lex_all.rb                       |  36 +-
 lib/dead_end/lex_value.rb                     |  62 ++++
 lib/dead_end/parse_blocks_from_indent_line.rb |   2 +-
 lib/dead_end/trailing_slash_join.rb           |  53 ---
 lib/dead_end/who_dis_syntax_error.rb          |   2 +-
 spec/fixtures/webmock.rb.txt                  |  35 ++
 spec/integration/exe_cli_spec.rb              |   4 -
 .../improvement_regression_spec.rb            |  22 ++
 spec/perf/perf_spec.rb                        |   2 -
 spec/spec_helper.rb                           |   8 +-
 spec/unit/capture_code_context_spec.rb        |  46 ++-
 spec/unit/clean_document_spec.rb              | 259 +++++++++++++++
 spec/unit/code_block_spec.rb                  |   2 +-
 spec/unit/code_line_spec.rb                   |  50 +--
 spec/unit/code_search_spec.rb                 |   6 +-
 spec/unit/display_invalid_blocks_spec.rb      |   9 +-
 spec/unit/heredoc_block_parse_spec.rb         |  37 ---
 spec/unit/trailing_slash_join_spec.rb         |  90 -----
 29 files changed, 1086 insertions(+), 450 deletions(-)
 create mode 100644 lib/dead_end/clean_document.rb
 delete mode 100644 lib/dead_end/heredoc_block_parse.rb
 create mode 100644 lib/dead_end/lex_value.rb
 delete mode 100644 lib/dead_end/trailing_slash_join.rb
 create mode 100644 spec/fixtures/webmock.rb.txt
 create mode 100644 spec/unit/clean_document_spec.rb
 delete mode 100644 spec/unit/heredoc_block_parse_spec.rb
 delete mode 100644 spec/unit/trailing_slash_join_spec.rb

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b5309bc..b716f8f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,8 @@
 ## HEAD (unreleased)
 
+- Logically consecutive lines (such as chained methods are now joined) (https://github.com/zombocom/dead_end/pull/78)
+- Output improvement for cases where the only line is an single `end` (https://github.com/zombocom/dead_end/pull/78)
+
 ## 1.2.0
 
 - Output improvements via less greedy unmatched kw capture https://github.com/zombocom/dead_end/pull/73
diff --git a/lib/dead_end/around_block_scan.rb b/lib/dead_end/around_block_scan.rb
index b1ef1ee..9f5abbb 100644
--- a/lib/dead_end/around_block_scan.rb
+++ b/lib/dead_end/around_block_scan.rb
@@ -9,10 +9,10 @@ module DeadEnd
   #
   # Example:
   #
-  #   def dog
-  #     puts "bark"
-  #     puts "bark"
-  #   end
+  #   def dog         # 1
+  #     puts "bark"   # 2
+  #     puts "bark"   # 3
+  #   end             # 4
   #
   #   scan = AroundBlockScan.new(
   #     code_lines: code_lines
@@ -22,7 +22,7 @@ module DeadEnd
   #   scan.scan_while { true }
   #
   #   puts scan.before_index # => 0
-  #   puts scan.after_index # => 3
+  #   puts scan.after_index  # => 3
   #
   # Contents can also be filtered using AroundBlockScan#skip
   #
@@ -109,8 +109,6 @@ def capture_neighbor_context
       kw_count = 0
       end_count = 0
       after_lines.each do |line|
-        # puts "line: #{line.number} #{line.original_line}, indent: #{line.indent}, #{line.empty?} #{line.indent == @orig_indent}"
-
         next if line.empty?
         break if line.indent < @orig_indent
         next if line.indent != @orig_indent
@@ -124,7 +122,6 @@ def capture_neighbor_context
 
         lines << line
       end
-      lines.select! { |line| !line.is_comment? }
 
       lines
     end
diff --git a/lib/dead_end/capture_code_context.rb b/lib/dead_end/capture_code_context.rb
index da72a21..4134a39 100644
--- a/lib/dead_end/capture_code_context.rb
+++ b/lib/dead_end/capture_code_context.rb
@@ -1,13 +1,27 @@
 # frozen_string_literal: true
 
 module DeadEnd
-  # Given a block, this method will capture surrounding
-  # code to give the user more context for the location of
-  # the problem.
+  # Turns a "invalid block(s)" into useful context
   #
-  # Return is an array of CodeLines to be rendered.
+  # There are three main phases in the algorithm:
   #
-  # Surrounding code is captured regardless of visible state
+  # 1. Sanitize/format input source
+  # 2. Search for invalid blocks
+  # 3. Format invalid blocks into something meaninful
+  #
+  # This class handles the third part.
+  #
+  # The algorithm is very good at capturing all of a syntax
+  # error in a single block in number 2, however the results
+  # can contain ambiguities. Humans are good at pattern matching
+  # and filtering and can mentally remove extraneous data, but
+  # they can't add extra data that's not present.
+  #
+  # In the case of known ambiguious cases, this class adds context
+  # back to the ambiguitiy so the programmer has full information.
+  #
+  # Beyond handling these ambiguities, it also captures surrounding
+  # code context information:
   #
   #   puts block.to_s # => "def bark"
   #
@@ -16,7 +30,8 @@ module DeadEnd
   #     code_lines: code_lines
   #   )
   #
-  #   puts context.call.join
+  #   lines = context.call.map(&:original)
+  #   puts lines.join
   #   # =>
   #     class Dog
   #       def bark
@@ -34,19 +49,34 @@ def initialize(blocks:, code_lines:)
 
     def call
       @blocks.each do |block|
+        capture_first_kw_end_same_indent(block)
         capture_last_end_same_indent(block)
         capture_before_after_kws(block)
         capture_falling_indent(block)
       end
 
       @lines_to_output.select!(&:not_empty?)
-      @lines_to_output.select!(&:not_comment?)
       @lines_to_output.uniq!
       @lines_to_output.sort!
 
       @lines_to_output
     end
 
+    # Shows the context around code provided by "falling" indentation
+    #
+    # Converts:
+    #
+    #       it "foo" do
+    #
+    # into:
+    #
+    #   class OH
+    #     def hello
+    #       it "foo" do
+    #     end
+    #   end
+    #
+    #
     def capture_falling_indent(block)
       AroundBlockScan.new(
         block: block,
@@ -56,7 +86,36 @@ def capture_falling_indent(block)
       end
     end
 
+    # Shows surrounding kw/end pairs
+    #
+    # The purpose of showing these extra pairs is due to cases
+    # of ambiguity when only one visible line is matched.
+    #
+    # For example:
+    #
+    #     1  class Dog
+    #     2    def bark
+    #     4    def eat
+    #     5    end
+    #     6  end
+    #
+    # In this case either line 2 could be missing an `end` or
+    # line 4 was an extra line added by mistake (it happens).
+    #
+    # When we detect the above problem it shows the issue
+    # as only being on line 2
+    #
+    #     2    def bark
+    #
+    # Showing "neighbor" keyword pairs gives extra context:
+    #
+    #     2    def bark
+    #     4    def eat
+    #     5    end
+    #
     def capture_before_after_kws(block)
+      return unless block.visible_lines.count == 1
+
       around_lines = AroundBlockScan.new(code_lines: @code_lines, block: block)
         .start_at_next_line
         .capture_neighbor_context
@@ -66,9 +125,10 @@ def capture_before_after_kws(block)
       @lines_to_output.concat(around_lines)
     end
 
-    # When there is an invalid with a keyword
-    # right before an end, it's unclear where
-    # the correct code should be.
+    # When there is an invalid block with a keyword
+    # missing an end right before another end,
+    # it is unclear where which keyword is missing the
+    # end
     #
     # Take this example:
     #
@@ -87,20 +147,21 @@ def capture_before_after_kws(block)
     # line 4. Also work backwards and if there's a mis-matched keyword, show it
     # too
     def capture_last_end_same_indent(block)
-      start_index = block.visible_lines.first.index
-      lines = @code_lines[start_index..block.lines.last.index]
+      return if block.visible_lines.length != 1
+      return unless block.visible_lines.first.is_kw?
+
+      visible_line = block.visible_lines.first
+      lines = @code_lines[visible_line.index..block.lines.last.index]
 
       # Find first end with same indent
       # (this would return line 4)
       #
       #   end             # 4
-      matching_end = lines.find { |line| line.indent == block.current_indent && line.is_end? }
+      matching_end = lines.detect { |line| line.indent == block.current_indent && line.is_end? }
       return unless matching_end
 
       @lines_to_output << matching_end
 
-      lines = @code_lines[start_index..matching_end.index]
-
       # Work backwards from the end to
       # see if there are mis-matched
       # keyword/end pairs
@@ -113,7 +174,7 @@ def capture_last_end_same_indent(block)
       #   end             # 4
       end_count = 0
       kw_count = 0
-      kw_line = lines.reverse.detect do |line|
+      kw_line = @code_lines[visible_line.index..matching_end.index].reverse.detect do |line|
         end_count += 1 if line.is_end?
         kw_count += 1 if line.is_kw?
 
@@ -122,5 +183,51 @@ def capture_last_end_same_indent(block)
       return unless kw_line
       @lines_to_output << kw_line
     end
+
+    # The logical inverse of `capture_last_end_same_indent`
+    #
+    # When there is an invalid block with an `end`
+    # missing a keyword right after another `end`,
+    # it is unclear where which end is missing the
+    # keyword.
+    #
+    # Take this example:
+    #
+    #   class Dog       # 1
+    #       puts "woof" # 2
+    #     end           # 3
+    #   end             # 4
+    #
+    # the problem line will be identified as:
+    #
+    #  ❯ end            # 4
+    #
+    # This happens because lines 1, 2, and 3 are technically valid code and are expanded
+    # first, deemed valid, and hidden. We need to un-hide the matching keyword on
+    # line 1. Also work backwards and if there's a mis-matched end, show it
+    # too
+    def capture_first_kw_end_same_indent(block)
+      return if block.visible_lines.length != 1
+      return unless block.visible_lines.first.is_end?
+
+      visible_line = block.visible_lines.first
+      lines = @code_lines[block.lines.first.index..visible_line.index]
+      matching_kw = lines.reverse.detect { |line| line.indent == block.current_indent && line.is_kw? }
+      return unless matching_kw
+
+      @lines_to_output << matching_kw
+
+      kw_count = 0
+      end_count = 0
+      orphan_end = @code_lines[matching_kw.index..visible_line.index].detect do |line|
+        kw_count += 1 if line.is_kw?
+        end_count += 1 if line.is_end?
+
+        end_count >= kw_count
+      end
+
+      return unless orphan_end
+      @lines_to_output << orphan_end
+    end
   end
 end
diff --git a/lib/dead_end/clean_document.rb b/lib/dead_end/clean_document.rb
new file mode 100644
index 0000000..11bced4
--- /dev/null
+++ b/lib/dead_end/clean_document.rb
@@ -0,0 +1,313 @@
+# frozen_string_literal: true
+
+module DeadEnd
+  # Parses and sanitizes source into a lexically aware document
+  #
+  # Internally the document is represented by an array with each
+  # index containing a CodeLine correlating to a line from the source code.
+  #
+  # There are three main phases in the algorithm:
+  #
+  # 1. Sanitize/format input source
+  # 2. Search for invalid blocks
+  # 3. Format invalid blocks into something meaninful
+  #
+  # This class handles the first part.
+  #
+  # The reason this class exists is to format input source
+  # for better/easier/cleaner exploration.
+  #
+  # The CodeSearch class operates at the line level so
+  # we must be careful to not introduce lines that look
+  # valid by themselves, but when removed will trigger syntax errors
+  # or strange behavior.
+  #
+  # ## Join Trailing slashes
+  #
+  # Code with a trailing slash is logically treated as a single line:
+  #
+  #     1 it "code can be split" \
+  #     2    "across multiple lines" do
+  #
+  # In this case removing line 2 would add a syntax error. We get around
+  # this by internally joining the two lines into a single "line" object
+  #
+  # ## Logically Consecutive lines
+  #
+  # Code that can be broken over multiple
+  # lines such as method calls are on different lines:
+  #
+  #     1 User.
+  #     2   where(name: "schneems").
+  #     3   first
+  #
+  # Removing line 2 can introduce a syntax error. To fix this, all lines
+  # are joined into one.
+  #
+  # ## Heredocs
+  #
+  # A heredoc is an way of defining a multi-line string. They can cause many
+  # problems. If left as a single line, Ripper would try to parse the contents
+  # as ruby code rather than as a string. Even without this problem, we still
+  # hit an issue with indentation
+  #
+  #    1 foo = <<~HEREDOC
+  #    2  "Be yourself; everyone else is already taken.""
+  #    3    ― Oscar Wilde
+  #    4      puts "I look like ruby code" # but i'm still a heredoc
+  #    5 HEREDOC
+  #
+  # If we didn't join these lines then our algorithm would think that line 4
+  # is separate from the rest, has a higher indentation, then look at it first
+  # and remove it.
+  #
+  # If the code evaluates line 5 by itself it will think line 5 is a constant,
+  # remove it, and introduce a syntax errror.
+  #
+  # All of these problems are fixed by joining the whole heredoc into a single
+  # line.
+  #
+  # ## Comments and whitespace
+  #
+  # Comments can throw off the way the lexer tells us that the line
+  # logically belongs with the next line. This is valid ruby but
+  # results in a different lex output than before:
+  #
+  #     1 User.
+  #     2   where(name: "schneems").
+  #     3   # Comment here
+  #     4   first
+  #
+  # To handle this we can replace comment lines with empty lines
+  # and then re-lex the source. This removal and re-lexing preserves
+  # line index and document size, but generates an easier to work with
+  # document.
+  #
+  class CleanDocument
+    def initialize(source:)
+      @source = source
+      @document = CodeLine.from_source(@source)
+    end
+
+    # Call all of the document "cleaners"
+    # and return self
+    def call
+      clean_sweep
+        .join_trailing_slash!
+        .join_consecutive!
+        .join_heredoc!
+
+      self
+    end
+
+    # Return an array of CodeLines in the
+    # document
+    def lines
+      @document
+    end
+
+    # Renders the document back to a string
+    def to_s
+      @document.join
+    end
+
+    # Remove comments and whitespace only lines
+    #
+    # replace with empty newlines
+    #
+    #     source = <<~'EOM'
+    #       # Comment 1
+    #       puts "hello"
+    #       # Comment 2
+    #       puts "world"
+    #     EOM
+    #
+    #     lines = CleanDocument.new(source: source).clean_sweep.lines
+    #     expect(lines[0].to_s).to eq("\n")
+    #     expect(lines[1].to_s).to eq("puts "hello")
+    #     expect(lines[2].to_s).to eq("\n")
+    #     expect(lines[3].to_s).to eq("puts "world")
+    #
+    # WARNING:
+    # If you run this after any of the "join" commands, they
+    # will be un-joined.
+    #
+    # After this change is made, we re-lex the document because
+    # removing comments can change how the doc is parsed.
+    #
+    # For example:
+    #
+    #     values = LexAll.new(source: <<~EOM))
+    #       User.
+    #         # comment
+    #         where(name: 'schneems')
+    #     EOM
+    #     expect(values.count {|v| v.type == :on_ignored_nl}).to eq(1)
+    #
+    # After the comment is removed:
+    #
+    #     values = LexAll.new(source: <<~EOM))
+    #       User.
+    #
+    #         where(name: 'schneems')
+    #     EOM
+    #     expect(values.count {|v| v.type == :on_ignored_nl}).to eq(2)
+    #
+    def clean_sweep
+      source = @document.map do |code_line|
+        # Clean trailing whitespace on empty line
+        if code_line.line.strip.empty?
+          next CodeLine.new(line: "\n", index: code_line.index, lex: [])
+        end
+
+        # Remove comments
+        if code_line.lex.detect { |lex| lex.type != :on_sp }&.type == :on_comment
+          next CodeLine.new(line: "\n", index: code_line.index, lex: [])
+        end
+
+        code_line
+      end.join
+
+      @source = source
+      @document = CodeLine.from_source(source)
+      self
+    end
+
+    # Smushes all heredoc lines into one line
+    #
+    #     source = <<~'EOM'
+    #       foo = <<~HEREDOC
+    #          lol
+    #          hehehe
+    #       HEREDOC
+    #     EOM
+    #
+    #     lines = CleanDocument.new(source: source).join_heredoc!.lines
+    #     expect(lines[0].to_s).to eq(source)
+    #     expect(lines[1].to_s).to eq("")
+    def join_heredoc!
+      start_index_stack = []
+      heredoc_beg_end_index = []
+      lines.each do |line|
+        line.lex.each do |lex_value|
+          case lex_value.type
+          when :on_heredoc_beg
+            start_index_stack << line.index
+          when :on_heredoc_end
+            start_index = start_index_stack.pop
+            end_index = line.index
+            heredoc_beg_end_index << [start_index, end_index]
+          end
+        end
+      end
+
+      heredoc_groups = heredoc_beg_end_index.map { |start_index, end_index| @document[start_index..end_index] }
+
+      join_groups(heredoc_groups)
+      self
+    end
+
+    # Smushes logically "consecutive" lines
+    #
+    #     source = <<~'EOM'
+    #       User.
+    #         where(name: 'schneems').
+    #         first
+    #     EOM
+    #
+    #     lines = CleanDocument.new(source: source).join_consecutive!.lines
+    #     expect(lines[0].to_s).to eq(source)
+    #     expect(lines[1].to_s).to eq("")
+    #
+    # The one known case this doesn't handle is:
+    #
+    #     Ripper.lex <<~EOM
+    #       a &&
+    #        b ||
+    #        c
+    #     EOM
+    #
+    # For some reason this introduces `on_ignore_newline` but with BEG type
+    #
+    def join_consecutive!
+      consecutive_groups = @document.select(&:ignore_newline_not_beg?).map do |code_line|
+        take_while_including(code_line.index..) do |line|
+          line.ignore_newline_not_beg?
+        end
+      end
+
+      join_groups(consecutive_groups)
+      self
+    end
+
+    # Join lines with a trailing slash
+    #
+    #     source = <<~'EOM'
+    #       it "code can be split" \
+    #          "across multiple lines" do
+    #     EOM
+    #
+    #     lines = CleanDocument.new(source: source).join_consecutive!.lines
+    #     expect(lines[0].to_s).to eq(source)
+    #     expect(lines[1].to_s).to eq("")
+    def join_trailing_slash!
+      trailing_groups = @document.select(&:trailing_slash?).map do |code_line|
+        take_while_including(code_line.index..) { |x| x.trailing_slash? }
+      end
+      join_groups(trailing_groups)
+      self
+    end
+
+    # Helper method for joining "groups" of lines
+    #
+    # Input is expected to be type Array<Array<CodeLine>>
+    #
+    # The outer array holds the various "groups" while the
+    # inner array holds code lines.
+    #
+    # All code lines are "joined" into the first line in
+    # their group.
+    #
+    # To preserve document size, empty lines are placed
+    # in the place of the lines that were "joined"
+    def join_groups(groups)
+      groups.each do |lines|
+        line = lines.first
+
+        # Handle the case of multiple groups in a a row
+        # if one is already replaced, move on
+        next if @document[line.index].empty?
+
+        # Join group into the first line
+        @document[line.index] = CodeLine.new(
+          lex: lines.map(&:lex).flatten,
+          line: lines.join,
+          index: line.index
+        )
+
+        # Hide the rest of the lines
+        lines[1..].each do |line|
+          # The above lines already have newlines in them, if add more
+          # then there will be double newline, use an empty line instead
+          @document[line.index] = CodeLine.new(line: "", index: line.index, lex: [])
+        end
+      end
+      self
+    end
+
+    # Helper method for grabbing elements from document
+    #
+    # Like `take_while` except when it stops
+    # iterating, it also returns the line
+    # that caused it to stop
+    def take_while_including(range = 0..)
+      take_next_and_stop = false
+      @document[range].take_while do |line|
+        next if take_next_and_stop
+
+        take_next_and_stop = !(yield line)
+        true
+      end
+    end
+  end
+end
diff --git a/lib/dead_end/code_frontier.rb b/lib/dead_end/code_frontier.rb
index 5b1e481..f9e0792 100644
--- a/lib/dead_end/code_frontier.rb
+++ b/lib/dead_end/code_frontier.rb
@@ -3,11 +3,19 @@
 module DeadEnd
   # The main function of the frontier is to hold the edges of our search and to
   # evaluate when we can stop searching.
+
+  # There are three main phases in the algorithm:
+  #
+  # 1. Sanitize/format input source
+  # 2. Search for invalid blocks
+  # 3. Format invalid blocks into something meaninful
+  #
+  # The Code frontier is a critical part of the second step
   #
   # ## Knowing where we've been
   #
-  # Once a code block is generated it is added onto the frontier where it will be
-  # sorted and then the frontier can be filtered. Large blocks that totally contain a
+  # Once a code block is generated it is added onto the frontier. Then it will be
+  # sorted by indentation and frontier can be filtered. Large blocks that fully enclose a
   # smaller block will cause the smaller block to be evicted.
   #
   #   CodeFrontier#<<(block) # Adds block to frontier
@@ -15,11 +23,11 @@ module DeadEnd
   #
   # ## Knowing where we can go
   #
-  # Internally it keeps track of "unvisited" lines which is exposed via `next_indent_line`
-  # when called this will return a line of code with the most indentation.
+  # Internally the frontier keeps track of "unvisited" lines which are exposed via `next_indent_line`
+  # when called, this method returns, a line of code with the highest indentation.
   #
-  # This line of code can be used to build a CodeBlock and then when that code block
-  # is added back to the frontier, then the lines are removed from the
+  # The returned line of code can be used to build a CodeBlock and then that code block
+  # is added back to the frontier. Then, the lines are removed from the
   # "unvisited" so we don't double-create the same block.
   #
   #   CodeFrontier#next_indent_line # Shows next line
@@ -27,17 +35,20 @@ module DeadEnd
   #
   # ## Knowing when to stop
   #
-  # The frontier holds the syntax error when removing all code blocks from the original
-  # source document allows it to be parsed as syntatically valid:
+  # The frontier knows how to check the entire document for a syntax error. When blocks
+  # are added onto the frontier, they're removed from the document. When all code containing
+  # syntax errors has been added to the frontier, the document will be parsable without a
+  # syntax error and the search can stop.
   #
-  #   CodeFrontier#holds_all_syntax_errors?
+  #   CodeFrontier#holds_all_syntax_errors? # Returns true when frontier holds all syntax errors
   #
   # ## Filtering false positives
   #
-  # Once the search is completed, the frontier will have many blocks that do not contain
-  # the syntax error. To filter to the smallest subset that does call:
+  # Once the search is completed, the frontier may have multiple blocks that do not contain
+  # the syntax error. To limit the result to the smallest subset of "invalid blocks" call:
   #
   #   CodeFrontier#detect_invalid_blocks
+  #
   class CodeFrontier
     def initialize(code_lines:)
       @code_lines = code_lines
@@ -84,8 +95,8 @@ def expand?
         puts "```"
         puts @frontier.last.to_s
         puts "```"
-        puts "  @frontier indent: #{frontier_indent}"
-        puts "  @unvisited indent:     #{unvisited_indent}"
+        puts "  @frontier indent:  #{frontier_indent}"
+        puts "  @unvisited indent: #{unvisited_indent}"
       end
 
       # Expand all blocks before moving to unvisited lines
diff --git a/lib/dead_end/code_line.rb b/lib/dead_end/code_line.rb
index 83311ff..ebf8a49 100644
--- a/lib/dead_end/code_line.rb
+++ b/lib/dead_end/code_line.rb
@@ -4,44 +4,47 @@ module DeadEnd
   # Represents a single line of code of a given source file
   #
   # This object contains metadata about the line such as
-  # amount of indentation. An if it is empty or not.
+  # amount of indentation, if it is empty or not, and
+  # lexical data, such as if it has an `end` or a keyword
+  # in it.
   #
-  # While a given search for syntax errors is being performed
-  # state about the search can be stored in individual lines such
-  # as :valid or :invalid.
-  #
-  # Visibility of lines can be toggled on and off.
+  # Visibility of lines can be toggled off. Marking a line as invisible
+  # indicates that it should not be used for syntax checks.
+  # It's functionally the same as commenting it out.
   #
   # Example:
   #
-  #   line = CodeLine.new(line: "def foo\n", index: 0)
-  #   line.line_number => 1
+  #   line = CodeLine.from_source("def foo\n").first
+  #   line.number => 1
   #   line.empty? # => false
   #   line.visible? # => true
   #   line.mark_invisible
   #   line.visible? # => false
   #
-  # A CodeBlock is made of multiple CodeLines
-  #
-  # Marking a line as invisible indicates that it should not be used
-  # for syntax checks. It's essentially the same as commenting it out
-  #
-  # Marking a line as invisible also lets the overall program know
-  # that it should not check that area for syntax errors.
   class CodeLine
     TRAILING_SLASH = ("\\" + $/).freeze
 
-    def self.parse(source)
+    # Returns an array of CodeLine objects
+    # from the source string
+    def self.from_source(source)
+      lex_array_for_line = LexAll.new(source: source).each_with_object(Hash.new { |h, k| h[k] = [] }) { |lex, hash| hash[lex.line] << lex }
       source.lines.map.with_index do |line, index|
-        CodeLine.new(line: line, index: index)
+        CodeLine.new(
+          line: line,
+          index: index,
+          lex: lex_array_for_line[index + 1]
+        )
       end
     end
 
-    attr_reader :line, :index, :indent, :original_line
+    attr_reader :line, :index, :lex, :line_number, :indent
+    def initialize(line:, index:, lex:)
+      @lex = lex
+      @line = line
+      @index = index
+      @original = line.freeze
+      @line_number = @index + 1
 
-    def initialize(line:, index:)
-      @original_line = line.freeze
-      @line = @original_line
       if line.strip.empty?
         @empty = true
         @indent = 0
@@ -49,102 +52,148 @@ def initialize(line:, index:)
         @empty = false
         @indent = SpaceCount.indent(line)
       end
-      @index = index
-      @status = nil # valid, invalid, unknown
-      @invalid = false
 
-      lex_detect!
-    end
-
-    private def lex_detect!
-      lex_array = LexAll.new(source: line)
       kw_count = 0
       end_count = 0
-      lex_array.each_with_index do |lex, index|
-        next unless lex.type == :on_kw
-
-        case lex.token
-        when "if", "unless", "while", "until"
-          # Only count if/unless when it's not a "trailing" if/unless
-          # https://github.com/ruby/ruby/blob/06b44f819eb7b5ede1ff69cecb25682b56a1d60c/lib/irb/ruby-lex.rb#L374-L375
-          kw_count += 1 unless lex.expr_label?
-        when "def", "case", "for", "begin", "class", "module", "do"
-          kw_count += 1
-        when "end"
-          end_count += 1
-        end
+      @lex.each do |lex|
+        kw_count += 1 if lex.is_kw?
+        end_count += 1 if lex.is_end?
       end
 
-      @is_comment = lex_array.detect { |lex| lex.type != :on_sp }&.type == :on_comment
-      return if @is_comment
       @is_kw = (kw_count - end_count) > 0
       @is_end = (end_count - kw_count) > 0
-      @is_trailing_slash = lex_array.last.token == TRAILING_SLASH
-    end
-
-    alias_method :original, :original_line
-
-    def trailing_slash?
-      @is_trailing_slash
     end
 
+    # Used for stable sort via indentation level
+    #
+    # Ruby's sort is not "stable" meaning that when
+    # multiple elements have the same value, they are
+    # not guaranteed to return in the same order they
+    # were put in.
+    #
+    # So when multiple code lines have the same indentation
+    # level, they're sorted by their index value which is unique
+    # and consistent.
+    #
+    # This is mostly needed for consistency of the test suite
     def indent_index
       @indent_index ||= [indent, index]
     end
+    alias_method :number, :line_number
 
-    def <=>(other)
-      index <=> other.index
-    end
-
-    def is_comment?
-      @is_comment
-    end
-
-    def not_comment?
-      !is_comment?
-    end
-
+    # Returns true if the code line is determined
+    # to contain a keyword that matches with an `end`
+    #
+    # For example: `def`, `do`, `begin`, `ensure`, etc.
     def is_kw?
       @is_kw
     end
 
+    # Returns true if the code line is determined
+    # to contain an `end` keyword
     def is_end?
       @is_end
     end
 
+    # Used to hide lines
+    #
+    # The search alorithm will group lines into blocks
+    # then if those blocks are determined to represent
+    # valid code they will be hidden
     def mark_invisible
       @line = ""
-      self
-    end
-
-    def mark_visible
-      @line = @original_line
-      self
     end
 
+    # Means the line was marked as "invisible"
+    # Confusingly, "empty" lines are visible...they
+    # just don't contain any source code other than a newline ("\n").
     def visible?
       !line.empty?
     end
 
+    # Opposite or `visible?` (note: different than `empty?`)
     def hidden?
       !visible?
     end
 
-    def line_number
-      index + 1
+    # An `empty?` line is one that was originally left
+    # empty in the source code, while a "hidden" line
+    # is one that we've since marked as "invisible"
+    def empty?
+      @empty
     end
-    alias_method :number, :line_number
 
+    # Opposite of `empty?` (note: different than `visible?`)
     def not_empty?
       !empty?
     end
 
-    def empty?
-      @empty
-    end
-
+    # Renders the given line
+    #
+    # Also allows us to represent source code as
+    # an array of code lines.
+    #
+    # When we have an array of code line elements
+    # calling `join` on the array will call `to_s`
+    # on each element, which essentially converts
+    # it back into it's original source string.
     def to_s
       line
     end
+
+    # When the code line is marked invisible
+    # we retain the original value of it's line
+    # this is useful for debugging and for
+    # showing extra context
+    #
+    # DisplayCodeWithLineNumbers will render
+    # all lines given to it, not just visible
+    # lines, it uses the original method to
+    # obtain them.
+    attr_reader :original
+
+    # Comparison operator, needed for equality
+    # and sorting
+    def <=>(other)
+      index <=> other.index
+    end
+
+    # [Not stable API]
+    #
+    # Lines that have a `on_ignored_nl` type token and NOT
+    # a `BEG` type seem to be a good proxy for the ability
+    # to join multiple lines into one.
+    #
+    # This predicate method is used to determine when those
+    # two criteria have been met.
+    #
+    # The one known case this doesn't handle is:
+    #
+    #     Ripper.lex <<~EOM
+    #       a &&
+    #        b ||
+    #        c
+    #     EOM
+    #
+    # For some reason this introduces `on_ignore_newline` but with BEG type
+    def ignore_newline_not_beg?
+      lex_value = lex.detect { |l| l.type == :on_ignored_nl }
+      !!(lex_value && !lex_value.expr_beg?)
+    end
+
+    # Determines if the given line has a trailing slash
+    #
+    #     lines = CodeLine.from_source(<<~EOM)
+    #       it "foo" \
+    #     EOM
+    #     expect(lines.first.trailing_slash?).to eq(true)
+    #
+    def trailing_slash?
+      last = @lex.last
+      return false unless last
+      return false unless last.type == :on_sp
+
+      last.token == TRAILING_SLASH
+    end
   end
 end
diff --git a/lib/dead_end/code_search.rb b/lib/dead_end/code_search.rb
index 3e73941..79818f6 100644
--- a/lib/dead_end/code_search.rb
+++ b/lib/dead_end/code_search.rb
@@ -3,11 +3,19 @@
 module DeadEnd
   # Searches code for a syntax error
   #
+  # There are three main phases in the algorithm:
+  #
+  # 1. Sanitize/format input source
+  # 2. Search for invalid blocks
+  # 3. Format invalid blocks into something meaninful
+  #
+  # This class handles the part.
+  #
   # The bulk of the heavy lifting is done in:
   #
   #  - CodeFrontier (Holds information for generating blocks and determining if we can stop searching)
   #  - ParseBlocksFromLine (Creates blocks into the frontier)
-  #  - BlockExpand (Expands existing blocks to search more code
+  #  - BlockExpand (Expands existing blocks to search more code)
   #
   # ## Syntax error detection
   #
@@ -31,28 +39,24 @@ class CodeSearch
 
     public
 
-    public
-
     attr_reader :invalid_blocks, :record_dir, :code_lines
 
     def initialize(source, record_dir: ENV["DEAD_END_RECORD_DIR"] || ENV["DEBUG"] ? "tmp" : nil)
-      @source = source
       if record_dir
         @time = Time.now.strftime("%Y-%m-%d-%H-%M-%s-%N")
         @record_dir = Pathname(record_dir).join(@time).tap { |p| p.mkpath }
         @write_count = 0
       end
-      code_lines = source.lines.map.with_index do |line, i|
-        CodeLine.new(line: line, index: i)
-      end
 
-      @code_lines = TrailingSlashJoin.new(code_lines: code_lines).call
+      @tick = 0
+      @source = source
+      @name_tick = Hash.new { |hash, k| hash[k] = 0 }
+      @invalid_blocks = []
+
+      @code_lines = CleanDocument.new(source: source).call.lines
 
       @frontier = CodeFrontier.new(code_lines: @code_lines)
-      @invalid_blocks = []
-      @name_tick = Hash.new { |hash, k| hash[k] = 0 }
-      @tick = 0
-      @block_expand = BlockExpand.new(code_lines: code_lines)
+      @block_expand = BlockExpand.new(code_lines: @code_lines)
       @parse_blocks_from_indent_line = ParseBlocksFromIndentLine.new(code_lines: @code_lines)
     end
 
@@ -63,10 +67,10 @@ def record(block:, name: "record")
       filename = "#{@write_count += 1}-#{name}-#{@name_tick[name]}.txt"
       if ENV["DEBUG"]
         puts "\n\n==== #{filename} ===="
-        puts "\n```#{block.starts_at}:#{block.ends_at}"
+        puts "\n```#{block.starts_at}..#{block.ends_at}"
         puts block.to_s
         puts "```"
-        puts "  block indent:     #{block.current_indent}"
+        puts "  block indent:      #{block.current_indent}"
       end
       @record_dir.join(filename).open(mode: "a") do |f|
         display = DisplayInvalidBlocks.new(
@@ -122,26 +126,8 @@ def expand_invalid_block
       push(block, name: "expand")
     end
 
-    def sweep_heredocs
-      HeredocBlockParse.new(
-        source: @source,
-        code_lines: @code_lines
-      ).call.each do |block|
-        push(block, name: "heredoc")
-      end
-    end
-
-    def sweep_comments
-      lines = @code_lines.select(&:is_comment?)
-      return if lines.empty?
-      block = CodeBlock.new(lines: lines)
-      sweep(block: block, name: "comments")
-    end
-
     # Main search loop
     def call
-      sweep_heredocs
-      sweep_comments
       until frontier.holds_all_syntax_errors?
         @tick += 1
 
diff --git a/lib/dead_end/display_code_with_line_numbers.rb b/lib/dead_end/display_code_with_line_numbers.rb
index 52c8880..0e12860 100644
--- a/lib/dead_end/display_code_with_line_numbers.rb
+++ b/lib/dead_end/display_code_with_line_numbers.rb
@@ -7,7 +7,6 @@ module DeadEnd
   # even if it is "marked invisible" any filtering of
   # output should be done before calling this class.
   #
-  #
   #   DisplayCodeWithLineNumbers.new(
   #     lines: lines,
   #     highlight_lines: [lines[2], lines[3]]
diff --git a/lib/dead_end/fyi.rb b/lib/dead_end/fyi.rb
index 3645201..57b08da 100644
--- a/lib/dead_end/fyi.rb
+++ b/lib/dead_end/fyi.rb
@@ -4,3 +4,5 @@
 
 DeadEnd.send(:remove_const, :SEARCH_SOURCE_ON_ERROR_DEFAULT)
 DeadEnd::SEARCH_SOURCE_ON_ERROR_DEFAULT = false
+
+warn "DEPRECATED: calling `require 'dead_end/fyi'` is deprecated, `require 'dead_end'` instead"
diff --git a/lib/dead_end/heredoc_block_parse.rb b/lib/dead_end/heredoc_block_parse.rb
deleted file mode 100644
index ff5ff6d..0000000
--- a/lib/dead_end/heredoc_block_parse.rb
+++ /dev/null
@@ -1,34 +0,0 @@
-# frozen_string_literal: true
-
-module DeadEnd
-  # Takes in a source, and returns blocks containing each heredoc
-  class HeredocBlockParse
-    private
-
-    attr_reader :code_lines, :lex
-
-    public
-
-    def initialize(source:, code_lines:)
-      @code_lines = code_lines
-      @lex = LexAll.new(source: source)
-    end
-
-    def call
-      blocks = []
-      beginning = []
-      @lex.each do |lex|
-        case lex.type
-        when :on_heredoc_beg
-          beginning << lex.line
-        when :on_heredoc_end
-          start_index = beginning.pop - 1
-          end_index = lex.line - 1
-          blocks << CodeBlock.new(lines: code_lines[start_index..end_index])
-        end
-      end
-
-      blocks
-    end
-  end
-end
diff --git a/lib/dead_end/internals.rb b/lib/dead_end/internals.rb
index b38ead7..6a79845 100644
--- a/lib/dead_end/internals.rb
+++ b/lib/dead_end/internals.rb
@@ -1,8 +1,7 @@
 # frozen_string_literal: true
 
-#
 # This is the top level file, but is moved to `internals`
-# so the top level file can instead enable the "automatic" behavior
+# so the top level require can instead enable the "automatic" behavior
 
 require_relative "version"
 
@@ -145,14 +144,13 @@ def self.invalid_type(source)
 
 require_relative "code_line"
 require_relative "code_block"
+require_relative "code_search"
 require_relative "code_frontier"
-require_relative "display_invalid_blocks"
-require_relative "around_block_scan"
-require_relative "block_expand"
-require_relative "parse_blocks_from_indent_line"
+require_relative "clean_document"
 
-require_relative "code_search"
-require_relative "who_dis_syntax_error"
-require_relative "heredoc_block_parse"
 require_relative "lex_all"
-require_relative "trailing_slash_join"
+require_relative "block_expand"
+require_relative "around_block_scan"
+require_relative "who_dis_syntax_error"
+require_relative "display_invalid_blocks"
+require_relative "parse_blocks_from_indent_line"
diff --git a/lib/dead_end/lex_all.rb b/lib/dead_end/lex_all.rb
index 73b5993..8851c8b 100644
--- a/lib/dead_end/lex_all.rb
+++ b/lib/dead_end/lex_all.rb
@@ -24,6 +24,10 @@ def initialize(source:)
       @lex.map! { |(line, _), type, token, state| LexValue.new(line, type, token, state) }
     end
 
+    def to_a
+      @lex
+    end
+
     def each
       return @lex.each unless block_given?
       @lex.each do |x|
@@ -31,34 +35,14 @@ def each
       end
     end
 
-    def last
-      @lex.last
+    def [](index)
+      @lex[index]
     end
 
-    # Value object for accessing lex values
-    #
-    # This lex:
-    #
-    #   [1, 0], :on_ident, "describe", CMDARG
-    #
-    # Would translate into:
-    #
-    #  lex.line # => 1
-    #  lex.type # => :on_indent
-    #  lex.token # => "describe"
-    class LexValue
-      attr_reader :line, :type, :token, :state
-
-      def initialize(line, type, token, state)
-        @line = line
-        @type = type
-        @token = token
-        @state = state
-      end
-
-      def expr_label?
-        state.allbits?(Ripper::EXPR_LABEL)
-      end
+    def last
+      @lex.last
     end
   end
 end
+
+require_relative "lex_value"
diff --git a/lib/dead_end/lex_value.rb b/lib/dead_end/lex_value.rb
new file mode 100644
index 0000000..2ddb9ea
--- /dev/null
+++ b/lib/dead_end/lex_value.rb
@@ -0,0 +1,62 @@
+module DeadEnd
+  # Value object for accessing lex values
+  #
+  # This lex:
+  #
+  #   [1, 0], :on_ident, "describe", CMDARG
+  #
+  # Would translate into:
+  #
+  #  lex.line # => 1
+  #  lex.type # => :on_indent
+  #  lex.token # => "describe"
+  class LexValue
+    attr_reader :line, :type, :token, :state
+
+    def initialize(line, type, token, state)
+      @line = line
+      @type = type
+      @token = token
+      @state = state
+
+      set_kw_end
+    end
+
+    private def set_kw_end
+      @is_end = false
+      @is_kw = false
+      return if type != :on_kw
+
+      case token
+      when "if", "unless", "while", "until"
+        # Only count if/unless when it's not a "trailing" if/unless
+        # https://github.com/ruby/ruby/blob/06b44f819eb7b5ede1ff69cecb25682b56a1d60c/lib/irb/ruby-lex.rb#L374-L375
+        @is_kw = true unless expr_label?
+      when "def", "case", "for", "begin", "class", "module", "do"
+        @is_kw = true
+      when "end"
+        @is_end = true
+      end
+    end
+
+    def ignore_newline?
+      type == :on_ignored_nl
+    end
+
+    def is_end?
+      @is_end
+    end
+
+    def is_kw?
+      @is_kw
+    end
+
+    def expr_beg?
+      state.anybits?(Ripper::EXPR_BEG)
+    end
+
+    def expr_label?
+      state.allbits?(Ripper::EXPR_LABEL)
+    end
+  end
+end
diff --git a/lib/dead_end/parse_blocks_from_indent_line.rb b/lib/dead_end/parse_blocks_from_indent_line.rb
index 83789f8..9f26cc9 100644
--- a/lib/dead_end/parse_blocks_from_indent_line.rb
+++ b/lib/dead_end/parse_blocks_from_indent_line.rb
@@ -4,7 +4,7 @@ module DeadEnd
   # This class is responsible for generating initial code blocks
   # that will then later be expanded.
   #
-  # The biggest concern when guessing about code blocks, is accidentally
+  # The biggest concern when guessing code blocks, is accidentally
   # grabbing one that contains only an "end". In this example:
   #
   #   def dog
diff --git a/lib/dead_end/trailing_slash_join.rb b/lib/dead_end/trailing_slash_join.rb
deleted file mode 100644
index 8011dcb..0000000
--- a/lib/dead_end/trailing_slash_join.rb
+++ /dev/null
@@ -1,53 +0,0 @@
-# frozen_string_literal: true
-
-module DeadEnd
-  # Handles code that contains trailing slashes
-  # by turning multiple lines with trailing slash(es) into
-  # a single code line
-  #
-  #   expect(code_lines.join).to eq(<<~EOM)
-  #     it "trailing \
-  #        "slash" do
-  #     end
-  #   EOM
-  #
-  #   lines = TrailngSlashJoin(code_lines: code_lines).call
-  #   expect(lines.first.to_s).to eq(<<~EOM)
-  #     it "trailing \
-  #        "slash" do
-  #   EOM
-  #
-  class TrailingSlashJoin
-    def initialize(code_lines:)
-      @code_lines = code_lines
-      @code_lines_dup = code_lines.dup
-    end
-
-    def call
-      @trailing_lines = []
-      @code_lines.select(&:trailing_slash?).each do |trailing|
-        stop_next = false
-        lines = @code_lines[trailing.index..].take_while do |line|
-          next false if stop_next
-
-          if !line.trailing_slash?
-            stop_next = true
-          end
-
-          true
-        end
-
-        joined_line = CodeLine.new(line: lines.map(&:original_line).join, index: trailing.index)
-
-        @code_lines_dup[trailing.index] = joined_line
-
-        @trailing_lines << joined_line
-
-        lines.shift # Don't hide first trailing slash line
-        lines.each(&:mark_invisible)
-      end
-
-      @code_lines_dup
-    end
-  end
-end
diff --git a/lib/dead_end/who_dis_syntax_error.rb b/lib/dead_end/who_dis_syntax_error.rb
index cb4c1d1..a421bc9 100644
--- a/lib/dead_end/who_dis_syntax_error.rb
+++ b/lib/dead_end/who_dis_syntax_error.rb
@@ -1,7 +1,7 @@
 # frozen_string_literal: true
 
 module DeadEnd
-  # Determines what type of syntax error is in the source
+  # Determines what type of syntax error that is in the source
   #
   # Example:
   #
diff --git a/spec/fixtures/webmock.rb.txt b/spec/fixtures/webmock.rb.txt
new file mode 100644
index 0000000..16da0d2
--- /dev/null
+++ b/spec/fixtures/webmock.rb.txt
@@ -0,0 +1,35 @@
+describe "webmock tests" do
+  before(:each) do
+    WebMock.enable!
+  end
+
+  after(:each) do
+    WebMock.disable!
+  end
+
+  it "port" do
+    port = rand(1000...9999)
+    stub_request(:any, "localhost:#{port}")
+
+    query = Cutlass::FunctionQuery.new(
+      port: port
+    ).call
+
+    expect(WebMock).to have_requested(:post, "localhost:#{port}").
+      with(body: "{}")
+  end
+
+  it "body" do
+    body = { lol: "hi" }
+    port = 8080
+    stub_request(:any, "localhost:#{port}")
+
+    query = Cutlass::FunctionQuery.new(
+      port: port
+      body: body
+    ).call
+
+    expect(WebMock).to have_requested(:post, "localhost:#{port}").
+      with(body: body.to_json)
+  end
+end
diff --git a/spec/integration/exe_cli_spec.rb b/spec/integration/exe_cli_spec.rb
index 4a2aae0..779b2f9 100644
--- a/spec/integration/exe_cli_spec.rb
+++ b/spec/integration/exe_cli_spec.rb
@@ -39,13 +39,9 @@ def exe(cmd)
 
         expect(out).to include(<<~EOM.indent(4))
              16  class Rexe
-             40    class Options < Struct.new(
-             71    end
           ❯  77    class Lookups
           ❯  78      def input_modes
           ❯ 148    end
-            152    class CommandLineParser
-            418    end
             551  end
         EOM
       end
diff --git a/spec/integration/improvement_regression_spec.rb b/spec/integration/improvement_regression_spec.rb
index c8906b2..6934756 100644
--- a/spec/integration/improvement_regression_spec.rb
+++ b/spec/integration/improvement_regression_spec.rb
@@ -4,6 +4,28 @@
 
 module DeadEnd
   RSpec.describe "Library only integration to test regressions and improvements" do
+    it "handles multi-line-methods issues/64" do
+      source = fixtures_dir.join("webmock.rb.txt").read
+
+      io = StringIO.new
+      DeadEnd.call(
+        io: io,
+        source: source,
+        filename: "none"
+      )
+
+      expect(io.string).to include(<<~'EOM'.indent(4))
+           1  describe "webmock tests" do
+          22    it "body" do
+          27      query = Cutlass::FunctionQuery.new(
+        ❯ 28        port: port
+        ❯ 29        body: body
+          30      ).call
+          34    end
+          35  end
+      EOM
+    end
+
     it "handles derailed output issues/50" do
       source = fixtures_dir.join("derailed_require_tree.rb.txt").read
 
diff --git a/spec/perf/perf_spec.rb b/spec/perf/perf_spec.rb
index 449a576..221d5ba 100644
--- a/spec/perf/perf_spec.rb
+++ b/spec/perf/perf_spec.rb
@@ -19,8 +19,6 @@ module DeadEnd
 
       expect(io.string).to include(<<~'EOM'.indent(4))
            1  Rails.application.routes.draw do
-          107    constraints -> { Rails.application.config.non_production } do
-          111    end
         ❯ 113    namespace :admin do
         ❯ 116    match "/foobar(*path)", via: :all, to: redirect { |_params, req|
         ❯ 120    }
diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb
index 5adba21..d822da7 100644
--- a/spec/spec_helper.rb
+++ b/spec/spec_helper.rb
@@ -33,12 +33,8 @@ def fixtures_dir
   spec_dir.join("fixtures")
 end
 
-def code_line_array(string)
-  code_lines = []
-  string.lines.each_with_index do |line, index|
-    code_lines << DeadEnd::CodeLine.new(line: line, index: index)
-  end
-  code_lines
+def code_line_array(source)
+  DeadEnd::CleanDocument.new(source: source).call.lines
 end
 
 def run!(cmd)
diff --git a/spec/unit/capture_code_context_spec.rb b/spec/unit/capture_code_context_spec.rb
index 4dff668..f001e0e 100644
--- a/spec/unit/capture_code_context_spec.rb
+++ b/spec/unit/capture_code_context_spec.rb
@@ -4,10 +4,36 @@
 
 module DeadEnd
   RSpec.describe CaptureCodeContext do
-    it "doesn't capture trailing if or unless" do
+    it "handles ambiguous end" do
+      source = <<~'EOM'
+        def call          # 1
+            puts "lol"    # 2
+          end # one       # 3
+        end # two         # 4
+      EOM
+
+      search = CodeSearch.new(source)
+      search.call
+
+      display = CaptureCodeContext.new(
+        blocks: search.invalid_blocks,
+        code_lines: search.code_lines
+      )
+      lines = display.call
+
+      lines = lines.sort.map(&:original)
+
+      expect(lines.join).to eq(<<~EOM)
+        def call          # 1
+          end # one       # 3
+        end # two         # 4
+      EOM
+    end
+
+    it "finds internal end associated with missing do" do
       source = <<~'EOM'
         def call
-          # try do
+          trydo
 
             @options = CommandLineParser.new.parse
 
@@ -44,6 +70,7 @@ def call
 
       expect(lines.join).to eq(<<~EOM)
         def call
+          trydo
           end # one
         end # two
       EOM
@@ -57,7 +84,6 @@ def call
       search = CodeSearch.new(source)
       search.call
 
-      # expect(search.invalid_blocks.join.strip).to eq('class Dog')
       display = CaptureCodeContext.new(
         blocks: search.invalid_blocks,
         code_lines: search.code_lines
@@ -72,8 +98,6 @@ class Rexe
           class Lookups
             def format_requires
           end
-          class CommandLineParser
-          end
         end
       EOM
     end
@@ -148,9 +172,8 @@ def nope
           def lol
           end
 
-          it "foo"
             puts "here"
-          end
+          end # here
 
           def haha
           end
@@ -171,9 +194,7 @@ class Zerg
         code_lines: search.code_lines
       )
 
-      # Finds lines previously hidden
       lines = code_context.call
-      # expect(lines.select(&:hidden?).map(&:line_number)).to eq([11, 12])
 
       out = DisplayCodeWithLineNumbers.new(
         lines: lines
@@ -183,11 +204,8 @@ class Zerg
          3  class OH
          8    def lol
          9    end
-        11    it "foo"
-        13    end
-        15    def haha
-        16    end
-        20  end
+        12    end # here
+        19  end
       EOM
     end
   end
diff --git a/spec/unit/clean_document_spec.rb b/spec/unit/clean_document_spec.rb
new file mode 100644
index 0000000..859d89a
--- /dev/null
+++ b/spec/unit/clean_document_spec.rb
@@ -0,0 +1,259 @@
+# frozen_string_literal: true
+
+require_relative "../spec_helper"
+
+module DeadEnd
+  RSpec.describe CleanDocument do
+    it "heredoc: blerg" do
+      source = fixtures_dir.join("this_project_extra_def.rb.txt").read
+      code_lines = CleanDocument.new(source: source).call.lines
+
+      expect(code_lines[18 - 1].to_s).to eq(<<-'EOL')
+      @io.puts <<~EOM
+
+        DeadEnd: A syntax error was detected
+
+        This code has an unmatched `end` this is caused by either
+        missing a syntax keyword (`def`,  `do`, etc.) or inclusion
+        of an extra `end` line:
+      EOM
+      EOL
+      expect(code_lines[18].to_s).to eq("")
+
+      expect(code_lines[27 - 1].to_s).to eq(<<-'EOL')
+      @io.puts(<<~EOM) if filename
+        file: #{filename}
+      EOM
+      EOL
+      expect(code_lines[27].to_s).to eq("")
+
+      expect(code_lines[31 - 1].to_s).to eq(<<-'EOL')
+      @io.puts <<~EOM
+        #{code_with_filename}
+      EOM
+      EOL
+      expect(code_lines[31].to_s).to eq("")
+    end
+
+    it "joins: multi line methods" do
+      source = <<~EOM
+        User
+          .where(name: 'schneems')
+          .first
+      EOM
+
+      doc = CleanDocument.new(source: source).join_consecutive!
+
+      expect(doc.lines[0].to_s).to eq(source)
+      expect(doc.lines[1].to_s).to eq("")
+      expect(doc.lines[2].to_s).to eq("")
+      expect(doc.lines[3]).to eq(nil)
+
+      lines = doc.lines
+      expect(
+        DisplayCodeWithLineNumbers.new(
+          lines: lines
+        ).call
+      ).to eq(<<~'EOM'.indent(2))
+        1  User
+        2    .where(name: 'schneems')
+        3    .first
+      EOM
+
+      expect(
+        DisplayCodeWithLineNumbers.new(
+          lines: lines,
+          highlight_lines: lines[0]
+        ).call
+      ).to eq(<<~'EOM')
+        ❯ 1  User
+        ❯ 2    .where(name: 'schneems')
+        ❯ 3    .first
+      EOM
+    end
+
+    it "helper method: take_while_including" do
+      source = <<~EOM
+        User
+          .where(name: 'schneems')
+          .first
+      EOM
+
+      doc = CleanDocument.new(source: source)
+
+      lines = doc.take_while_including { |line| !line.to_s.include?("where") }
+      expect(lines.count).to eq(2)
+    end
+
+    it "comments: removes comments" do
+      source = <<~EOM
+        # lol
+        puts "what"
+          # yolo
+      EOM
+
+      out = CleanDocument.new(source: source).clean_sweep
+      expect(out.to_s).to eq(<<~EOM)
+
+        puts "what"
+
+      EOM
+    end
+
+    it "whitespace: removes whitespace" do
+      source = "  \n" + <<~EOM
+        puts "what"
+      EOM
+
+      out = CleanDocument.new(source: source).clean_sweep
+      expect(out.to_s).to eq(<<~EOM)
+
+        puts "what"
+      EOM
+
+      expect(source.lines.first.to_s).to_not eq("\n")
+      expect(out.lines.first.to_s).to eq("\n")
+    end
+
+    it "trailing slash: does not join trailing do" do
+      # Some keywords and syntaxes trigger the "ignored line"
+      # lex output, we ignore them by filtering by BEG
+      #
+      # The `do` keyword is one of these:
+      # https://gist.github.com/schneems/6a7d7f988d3329fb3bd4b5be3e2efc0c
+      source = <<~EOM
+        foo do
+          puts "lol"
+        end
+      EOM
+
+      doc = CleanDocument.new(source: source).join_consecutive!
+
+      expect(doc.lines[0].to_s).to eq(source.lines[0])
+      expect(doc.lines[1].to_s).to eq(source.lines[1])
+      expect(doc.lines[2].to_s).to eq(source.lines[2])
+    end
+
+    it "trailing slash: formats output" do
+      source = <<~'EOM'
+        context "timezones workaround" do
+          it "should receive a time in UTC format and return the time with the"\
+            "office's UTC offset substracted from it" do
+            travel_to DateTime.new(2020, 10, 1, 10, 0, 0) do
+              office = build(:office)
+            end
+          end
+        end
+      EOM
+
+      code_lines = CleanDocument.new(source: source).call.lines
+      expect(
+        DisplayCodeWithLineNumbers.new(
+          lines: code_lines.select(&:visible?)
+        ).call
+      ).to eq(<<~'EOM'.indent(2))
+        1  context "timezones workaround" do
+        2    it "should receive a time in UTC format and return the time with the"\
+        3      "office's UTC offset substracted from it" do
+        4      travel_to DateTime.new(2020, 10, 1, 10, 0, 0) do
+        5        office = build(:office)
+        6      end
+        7    end
+        8  end
+      EOM
+
+      expect(
+        DisplayCodeWithLineNumbers.new(
+          lines: code_lines.select(&:visible?),
+          highlight_lines: code_lines[1]
+        ).call
+      ).to eq(<<~'EOM')
+          1  context "timezones workaround" do
+        ❯ 2    it "should receive a time in UTC format and return the time with the"\
+        ❯ 3      "office's UTC offset substracted from it" do
+          4      travel_to DateTime.new(2020, 10, 1, 10, 0, 0) do
+          5        office = build(:office)
+          6      end
+          7    end
+          8  end
+      EOM
+    end
+
+    it "trailing slash: basic detection" do
+      source = <<~'EOM'
+        it "trailing s" \
+           "lash" do
+      EOM
+
+      code_lines = CleanDocument.new(source: source).call.lines
+
+      expect(code_lines[0]).to_not be_hidden
+      expect(code_lines[1]).to be_hidden
+
+      expect(
+        code_lines.join
+      ).to eq(code_lines.map(&:original).join)
+    end
+
+    it "trailing slash: joins multiple lines" do
+      source = <<~'EOM'
+        it "should " \
+           "keep " \
+           "going " do
+        end
+      EOM
+
+      doc = CleanDocument.new(source: source).join_trailing_slash!
+      expect(doc.lines[0].to_s).to eq(source.lines[0..2].join)
+      expect(doc.lines[1].to_s).to eq("")
+      expect(doc.lines[2].to_s).to eq("")
+      expect(doc.lines[3].to_s).to eq(source.lines[3])
+
+      lines = doc.lines
+      expect(
+        DisplayCodeWithLineNumbers.new(
+          lines: lines
+        ).call
+      ).to eq(<<~'EOM'.indent(2))
+        1  it "should " \
+        2     "keep " \
+        3     "going " do
+        4  end
+      EOM
+
+      expect(
+        DisplayCodeWithLineNumbers.new(
+          lines: lines,
+          highlight_lines: lines[0]
+        ).call
+      ).to eq(<<~'EOM')
+        ❯ 1  it "should " \
+        ❯ 2     "keep " \
+        ❯ 3     "going " do
+          4  end
+      EOM
+    end
+
+    it "trailing slash: no false positives" do
+      source = <<~'EOM'
+        def formatters
+          @formatters ||=  {
+              amazing_print: ->(obj)  { obj.ai + "\n" },
+              inspect:       ->(obj)  { obj.inspect + "\n" },
+              json:          ->(obj)  { obj.to_json },
+              marshal:       ->(obj)  { Marshal.dump(obj) },
+              none:          ->(_obj) { nil },
+              pretty_json:   ->(obj)  { JSON.pretty_generate(obj) },
+              pretty_print:  ->(obj)  { obj.pretty_inspect },
+              puts:          ->(obj)  { require 'stringio'; sio = StringIO.new; sio.puts(obj); sio.string },
+              to_s:          ->(obj)  { obj.to_s + "\n" },
+              yaml:          ->(obj)  { obj.to_yaml },
+          }
+        end
+      EOM
+
+      code_lines = CleanDocument.new(source: source).call.lines
+      expect(code_lines.join).to eq(code_lines.join)
+    end
+  end
+end
diff --git a/spec/unit/code_block_spec.rb b/spec/unit/code_block_spec.rb
index 40aa62c..41b2096 100644
--- a/spec/unit/code_block_spec.rb
+++ b/spec/unit/code_block_spec.rb
@@ -33,7 +33,7 @@ def foo
       array = [block_2, block_1, block_0].sort
       expect(array.last).to eq(block_2)
 
-      block = CodeBlock.new(lines: CodeLine.new(line: " " * 8 + "foo", index: 4))
+      block = CodeBlock.new(lines: CodeLine.new(line: " " * 8 + "foo", index: 4, lex: []))
       array.prepend(block)
       expect(array.max).to eq(block)
     end
diff --git a/spec/unit/code_line_spec.rb b/spec/unit/code_line_spec.rb
index 13cbc80..29324cf 100644
--- a/spec/unit/code_line_spec.rb
+++ b/spec/unit/code_line_spec.rb
@@ -4,8 +4,29 @@
 
 module DeadEnd
   RSpec.describe CodeLine do
+    it "retains original line value, after being marked invisible" do
+      line = CodeLine.from_source(<<~'EOM').first
+        puts "lol"
+      EOM
+      expect(line.line).to match('puts "lol"')
+      line.mark_invisible
+      expect(line.line).to eq("")
+      expect(line.original).to match('puts "lol"')
+    end
+
+    it "knows which lines can be joined" do
+      code_lines = CodeLine.from_source(<<~'EOM')
+        user = User.
+          where(name: 'schneems').
+          first
+        puts user.name
+      EOM
+
+      # Indicates line 1 can join 2, 2 can join 3, but 3 won't join it's next line
+      expect(code_lines.map(&:ignore_newline_not_beg?)).to eq([true, true, false, false])
+    end
     it "trailing if" do
-      code_lines = code_line_array(<<~'EOM')
+      code_lines = CodeLine.from_source(<<~'EOM')
         puts "lol" if foo
         if foo
         end
@@ -15,7 +36,7 @@ module DeadEnd
     end
 
     it "trailing unless" do
-      code_lines = code_line_array(<<~'EOM')
+      code_lines = CodeLine.from_source(<<~'EOM')
         puts "lol" unless foo
         unless foo
         end
@@ -25,44 +46,35 @@ module DeadEnd
     end
 
     it "trailing slash" do
-      code_lines = code_line_array(<<~'EOM')
+      code_lines = CodeLine.from_source(<<~'EOM')
         it "trailing s" \
            "lash" do
       EOM
 
       expect(code_lines.map(&:trailing_slash?)).to eq([true, false])
 
-      code_lines = code_line_array(<<~'EOM')
+      code_lines = CodeLine.from_source(<<~'EOM')
         amazing_print: ->(obj)  { obj.ai + "\n" },
       EOM
       expect(code_lines.map(&:trailing_slash?)).to eq([false])
     end
 
-    it "knows it's a comment" do
-      line = CodeLine.new(line: "   # iama comment", index: 0)
-      expect(line.is_comment?).to be_truthy
-      expect(line.is_end?).to be_falsey
-      expect(line.is_kw?).to be_falsey
-    end
-
     it "knows it's got an end" do
-      line = CodeLine.new(line: "   end", index: 0)
+      line = CodeLine.from_source("   end").first
 
-      expect(line.is_comment?).to be_falsey
       expect(line.is_end?).to be_truthy
       expect(line.is_kw?).to be_falsey
     end
 
     it "knows it's got a keyword" do
-      line = CodeLine.new(line: "  if", index: 0)
+      line = CodeLine.from_source("  if").first
 
-      expect(line.is_comment?).to be_falsey
       expect(line.is_end?).to be_falsey
       expect(line.is_kw?).to be_truthy
     end
 
     it "ignores marked lines" do
-      code_lines = code_line_array(<<~EOM)
+      code_lines = CodeLine.from_source(<<~EOM)
         def foo
           Array(value) |x|
           end
@@ -94,7 +106,7 @@ def foo
     end
 
     it "knows empty lines" do
-      code_lines = code_line_array(<<~EOM)
+      code_lines = CodeLine.from_source(<<~EOM)
         # Not empty
 
         # Not empty
@@ -106,7 +118,7 @@ def foo
     end
 
     it "counts indentations" do
-      code_lines = code_line_array(<<~EOM)
+      code_lines = CodeLine.from_source(<<~EOM)
         def foo
           Array(value) |x|
             puts 'lol'
@@ -118,7 +130,7 @@ def foo
     end
 
     it "doesn't count empty lines as having an indentation" do
-      code_lines = code_line_array(<<~EOM)
+      code_lines = CodeLine.from_source(<<~EOM)
 
 
       EOM
diff --git a/spec/unit/code_search_spec.rb b/spec/unit/code_search_spec.rb
index 4272221..2341aed 100644
--- a/spec/unit/code_search_spec.rb
+++ b/spec/unit/code_search_spec.rb
@@ -418,13 +418,13 @@ def blerg
     it "finds a naked end" do
       search = CodeSearch.new(<<~EOM)
         def foo
-          end
-        end
+          end # one
+        end # two
       EOM
       search.call
 
       expect(search.invalid_blocks.join).to eq(<<~EOM.indent(2))
-        end
+        end # one
       EOM
     end
 
diff --git a/spec/unit/display_invalid_blocks_spec.rb b/spec/unit/display_invalid_blocks_spec.rb
index 85dc035..6280c20 100644
--- a/spec/unit/display_invalid_blocks_spec.rb
+++ b/spec/unit/display_invalid_blocks_spec.rb
@@ -25,7 +25,7 @@ class Cat
           lol = {
         end
       EOM
-      code_lines = code_line_array(source)
+      code_lines = CleanDocument.new(source: source).call.lines
 
       display = DisplayInvalidBlocks.new(
         code_lines: code_lines,
@@ -109,7 +109,7 @@ def hai
     end
 
     it "outputs to io when using `call`" do
-      code_lines = code_line_array(<<~EOM)
+      source = <<~EOM
         class OH
           def hello
           def hai
@@ -117,6 +117,8 @@ def hai
         end
       EOM
 
+      code_lines = CleanDocument.new(source: source).call.lines
+
       io = StringIO.new
       block = CodeBlock.new(lines: code_lines[1])
       display = DisplayInvalidBlocks.new(
@@ -131,7 +133,7 @@ def hai
     end
 
     it " wraps code with github style codeblocks" do
-      code_lines = code_line_array(<<~EOM)
+      source = <<~EOM
         class OH
           def hello
 
@@ -140,6 +142,7 @@ def hai
         end
       EOM
 
+      code_lines = CleanDocument.new(source: source).call.lines
       block = CodeBlock.new(lines: code_lines[1])
       display = DisplayInvalidBlocks.new(
         blocks: block,
diff --git a/spec/unit/heredoc_block_parse_spec.rb b/spec/unit/heredoc_block_parse_spec.rb
deleted file mode 100644
index 197be00..0000000
--- a/spec/unit/heredoc_block_parse_spec.rb
+++ /dev/null
@@ -1,37 +0,0 @@
-# frozen_string_literal: true
-
-require_relative "../spec_helper"
-
-module DeadEnd
-  RSpec.describe "HeredocBlockParse" do
-    it "works" do
-      source = fixtures_dir.join("this_project_extra_def.rb.txt").read
-      code_lines = code_line_array(source)
-      blocks = HeredocBlockParse.new(source: source, code_lines: code_lines).call
-      expect(blocks[0].to_s).to eq(<<-'EOL')
-      @io.puts <<~EOM
-
-        DeadEnd: A syntax error was detected
-
-        This code has an unmatched `end` this is caused by either
-        missing a syntax keyword (`def`,  `do`, etc.) or inclusion
-        of an extra `end` line:
-      EOM
-      EOL
-
-      expect(blocks[1].to_s).to eq(<<-'EOL')
-      @io.puts(<<~EOM) if filename
-        file: #{filename}
-      EOM
-      EOL
-
-      expect(blocks[2].to_s).to eq(<<-'EOL')
-      @io.puts <<~EOM
-        #{code_with_filename}
-      EOM
-      EOL
-
-      expect(blocks[3]).to be_nil
-    end
-  end
-end
diff --git a/spec/unit/trailing_slash_join_spec.rb b/spec/unit/trailing_slash_join_spec.rb
deleted file mode 100644
index 3d7ac4e..0000000
--- a/spec/unit/trailing_slash_join_spec.rb
+++ /dev/null
@@ -1,90 +0,0 @@
-# frozen_string_literal: true
-
-require_relative "../spec_helper"
-
-module DeadEnd
-  RSpec.describe TrailingSlashJoin do
-    it "formats output" do
-      code_lines = code_line_array(<<~'EOM')
-        context "timezones workaround" do
-          it "should receive a time in UTC format and return the time with the"\
-            "office's UTC offset substracted from it" do
-            travel_to DateTime.new(2020, 10, 1, 10, 0, 0) do
-              office = build(:office)
-            end
-          end
-        end
-      EOM
-
-      out_code_lines = TrailingSlashJoin.new(code_lines: code_lines).call
-      expect(
-        DisplayCodeWithLineNumbers.new(
-          lines: out_code_lines.select(&:visible?)
-        ).call
-      ).to eq(<<~'EOM'.indent(2))
-        1  context "timezones workaround" do
-        2    it "should receive a time in UTC format and return the time with the"\
-        3      "office's UTC offset substracted from it" do
-        4      travel_to DateTime.new(2020, 10, 1, 10, 0, 0) do
-        5        office = build(:office)
-        6      end
-        7    end
-        8  end
-      EOM
-
-      expect(
-        DisplayCodeWithLineNumbers.new(
-          lines: out_code_lines.select(&:visible?),
-          highlight_lines: out_code_lines[1]
-        ).call
-      ).to eq(<<~'EOM')
-          1  context "timezones workaround" do
-        ❯ 2    it "should receive a time in UTC format and return the time with the"\
-        ❯ 3      "office's UTC offset substracted from it" do
-          4      travel_to DateTime.new(2020, 10, 1, 10, 0, 0) do
-          5        office = build(:office)
-          6      end
-          7    end
-          8  end
-      EOM
-    end
-
-    it "trailing slash" do
-      code_lines = code_line_array(<<~'EOM')
-        it "trailing s" \
-           "lash" do
-      EOM
-
-      out_code_lines = TrailingSlashJoin.new(code_lines: code_lines).call
-
-      expect(code_lines[0]).to_not be_hidden
-      expect(code_lines[1]).to be_hidden
-
-      expect(
-        out_code_lines.join
-      ).to eq(code_lines.map(&:original).join)
-    end
-
-    it "doesn't falsely identify trailing slashes" do
-      code_lines = code_line_array(<<~'EOM')
-        def formatters
-          @formatters ||=  {
-              amazing_print: ->(obj)  { obj.ai + "\n" },
-              inspect:       ->(obj)  { obj.inspect + "\n" },
-              json:          ->(obj)  { obj.to_json },
-              marshal:       ->(obj)  { Marshal.dump(obj) },
-              none:          ->(_obj) { nil },
-              pretty_json:   ->(obj)  { JSON.pretty_generate(obj) },
-              pretty_print:  ->(obj)  { obj.pretty_inspect },
-              puts:          ->(obj)  { require 'stringio'; sio = StringIO.new; sio.puts(obj); sio.string },
-              to_s:          ->(obj)  { obj.to_s + "\n" },
-              yaml:          ->(obj)  { obj.to_yaml },
-          }
-        end
-      EOM
-
-      out_code_lines = TrailingSlashJoin.new(code_lines: code_lines).call
-      expect(out_code_lines.join).to eq(code_lines.join)
-    end
-  end
-end