From 293b673a3bd1fa0b35a7bc6d99285e806ebb90f0 Mon Sep 17 00:00:00 2001 From: schneems Date: Fri, 4 Dec 2020 10:03:04 -0600 Subject: [PATCH] Lex whole source document For some weird reason Ripper.lex doesn't lex the whole source: ``` source = <<~EOM describe "cat" # 1 Cat.call do # 2 end # 3 end # 4 # 5 it "dog" do # 6 Dog.call do # 7 end # 8 end # 9 EOM require 'ripper' pp Ripper.lex(source) # => # [[[1, 0], :on_ident, "describe", CMDARG], # [[1, 8], :on_sp, " ", CMDARG], # [[1, 9], :on_tstring_beg, "\"", CMDARG], # [[1, 10], :on_tstring_content, "cat", CMDARG], # [[1, 13], :on_tstring_end, "\"", END], # [[1, 14], :on_sp, " ", END], # [[1, 15], :on_comment, "# 1\n", END], # [[2, 0], :on_sp, " ", BEG], # [[2, 2], :on_const, "Cat", CMDARG], # [[2, 5], :on_period, ".", DOT], # [[2, 6], :on_ident, "call", ARG], # [[2, 10], :on_sp, " ", ARG], # [[2, 11], :on_kw, "do", BEG], # [[2, 13], :on_sp, " ", BEG], # [[2, 15], :on_comment, "# 2\n", BEG], # [[3, 0], :on_sp, " ", BEG], # [[3, 2], :on_kw, "end", END], # [[3, 5], :on_sp, " ", END], # [[3, 15], :on_comment, "# 3\n", END], # [[4, 0], :on_kw, "end", END]]# ``` Note that there's no "dog" in this lex even though it's in our source. To handle this I'm introducing a new class LexAll. It also wraps lex values in a easier to use value object. --- CHANGELOG.md | 2 + lib/syntax_search.rb | 1 + lib/syntax_search/heredoc_block_parse.rb | 10 ++-- lib/syntax_search/lex_all.rb | 58 ++++++++++++++++++++++++ spec/unit/lex_all_spec.rb | 29 ++++++++++++ 5 files changed, 95 insertions(+), 5 deletions(-) create mode 100644 lib/syntax_search/lex_all.rb create mode 100644 spec/unit/lex_all_spec.rb diff --git a/CHANGELOG.md b/CHANGELOG.md index 66bfa5a..d941a26 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,7 @@ ## HEAD (unreleased) +- Fix bug where not all of a source is lexed which is used in heredoc detection/removal (https://github.com/zombocom/syntax_search/pull/23) + ## 0.1.5 - Strip out heredocs in documents first (https://github.com/zombocom/syntax_search/pull/19) diff --git a/lib/syntax_search.rb b/lib/syntax_search.rb index 2658f6e..c58a844 100644 --- a/lib/syntax_search.rb +++ b/lib/syntax_search.rb @@ -142,3 +142,4 @@ def self.invalid_type(source) require_relative "syntax_search/code_search" require_relative "syntax_search/who_dis_syntax_error" require_relative "syntax_search/heredoc_block_parse" +require_relative "syntax_search/lex_all" diff --git a/lib/syntax_search/heredoc_block_parse.rb b/lib/syntax_search/heredoc_block_parse.rb index c521baf..9617530 100644 --- a/lib/syntax_search/heredoc_block_parse.rb +++ b/lib/syntax_search/heredoc_block_parse.rb @@ -7,19 +7,19 @@ class HeredocBlockParse def initialize(source:, code_lines: ) @code_lines = code_lines - @lex = Ripper.lex(source) + @lex = LexAll.new(source: source) end def call blocks = [] beginning = [] - @lex.each do |(line, col), event, *_| - case event + @lex.each do |lex| + case lex.type when :on_heredoc_beg - beginning << line + beginning << lex.line when :on_heredoc_end start_index = beginning.pop - 1 - end_index = line - 1 + end_index = lex.line - 1 blocks << CodeBlock.new(lines: code_lines[start_index..end_index]) end end diff --git a/lib/syntax_search/lex_all.rb b/lib/syntax_search/lex_all.rb new file mode 100644 index 0000000..11a4305 --- /dev/null +++ b/lib/syntax_search/lex_all.rb @@ -0,0 +1,58 @@ +module SyntaxErrorSearch + # Ripper.lex is not guaranteed to lex the entire source document + # + # lex = LexAll.new(source: source) + # lex.each do |value| + # puts value.line + # end + class LexAll + include Enumerable + + def initialize(source: ) + @lex = Ripper.lex(source) + lineno = @lex.last&.first&.first + 1 + source_lines = source.lines + last_lineno = source_lines.count + + until lineno >= last_lineno + lines = source_lines[lineno..-1] + + @lex.concat(Ripper.lex(lines.join, '-', lineno + 1)) + lineno = @lex.last&.first&.first + 1 + end + + @lex.map! {|(line, _), type, token| LexValue.new(line, _, type, token) } + end + + def each + return @lex.each unless block_given? + @lex.each do |x| + yield x + end + end + + def last + @lex.last + end + + # Value object for accessing lex values + # + # This lex: + # + # [1, 0], :on_ident, "describe", CMDARG + # + # Would translate into: + # + # lex.line # => 1 + # lex.type # => :on_indent + # lex.token # => "describe" + class LexValue + attr_reader :line, :type, :token + def initialize(line, _, type, token) + @line = line + @type = type + @token = token + end + end + end +end diff --git a/spec/unit/lex_all_spec.rb b/spec/unit/lex_all_spec.rb new file mode 100644 index 0000000..10c69f2 --- /dev/null +++ b/spec/unit/lex_all_spec.rb @@ -0,0 +1,29 @@ +# frozen_string_literal: true + +require_relative "../spec_helper.rb" + +module SyntaxErrorSearch + RSpec.describe "EndBlockParse" do + it "finds blocks based on `end` keyword" do + source = <<~EOM + describe "cat" # 1 + Cat.call do # 2 + end # 3 + end # 4 + # 5 + it "dog" do # 6 + Dog.call do # 7 + end # 8 + end # 9 + EOM + + raw_lex = Ripper.lex(source) + expect(raw_lex.to_s).to_not include("dog") + + lex = LexAll.new(source: source) + expect(lex.map(&:token).to_s).to include("dog") + expect(lex.first.line).to eq(1) + expect(lex.last.line).to eq(9) + end + end +end