From a7d6991cc446c733c6e677f99405d86c81910857 Mon Sep 17 00:00:00 2001 From: Schneems Date: Fri, 1 Dec 2023 12:14:06 -0600 Subject: [PATCH 1/4] Initial support for the prism parser Prism will be the parser in Ruby 3.3. We need to support 3.0+ so we will have to "dual boot" both parsers. Todo: - LexAll to support Prism lex output - Add tests that exercise both Ripper and prism codepaths on CI - Handle https://github.com/ruby/prism/issues/1972 in `ripper_errors.rb` - Update docs to not mention Ripper explicitly - Consider different/cleaner APIs for separating out Ripper and Prism --- Gemfile | 1 + Gemfile.lock | 34 ++++++++++---------- lib/syntax_suggest/api.rb | 46 ++++++++++++++++++++++++---- lib/syntax_suggest/explain_syntax.rb | 12 +++++++- lib/syntax_suggest/lex_all.rb | 24 ++++++++++++--- spec/unit/explain_syntax_spec.rb | 8 +++-- 6 files changed, 95 insertions(+), 30 deletions(-) diff --git a/Gemfile b/Gemfile index 420c84b..b63d595 100644 --- a/Gemfile +++ b/Gemfile @@ -12,3 +12,4 @@ gem "standard" gem "ruby-prof" gem "benchmark-ips" +gem "prism" diff --git a/Gemfile.lock b/Gemfile.lock index 9193c2a..98888a2 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -7,8 +7,8 @@ GEM remote: https://rubygems.org/ specs: ast (2.4.2) - benchmark-ips (2.9.2) - diff-lcs (1.4.4) + benchmark-ips (2.12.0) + diff-lcs (1.5.0) json (2.7.0) language_server-protocol (3.17.0.3) lint_roller (1.1.0) @@ -16,24 +16,25 @@ GEM parser (3.2.2.4) ast (~> 2.4.1) racc + prism (0.18.0) racc (1.7.3) rainbow (3.1.1) rake (12.3.3) regexp_parser (2.8.3) rexml (3.2.6) - rspec (3.10.0) - rspec-core (~> 3.10.0) - rspec-expectations (~> 3.10.0) - rspec-mocks (~> 3.10.0) - rspec-core (3.10.0) - rspec-support (~> 3.10.0) - rspec-expectations (3.10.0) + rspec (3.12.0) + rspec-core (~> 3.12.0) + rspec-expectations (~> 3.12.0) + rspec-mocks (~> 3.12.0) + rspec-core (3.12.2) + rspec-support (~> 3.12.0) + rspec-expectations (3.12.3) diff-lcs (>= 1.2.0, < 2.0) - rspec-support (~> 3.10.0) - rspec-mocks (3.10.0) + rspec-support (~> 3.12.0) + rspec-mocks (3.12.6) diff-lcs (>= 1.2.0, < 2.0) - rspec-support (~> 3.10.0) - rspec-support (3.10.0) + rspec-support (~> 3.12.0) + rspec-support (3.12.1) rubocop (1.57.2) json (~> 2.3) language_server-protocol (>= 3.17.0) @@ -50,9 +51,9 @@ GEM rubocop-performance (1.19.1) rubocop (>= 1.7.0, < 2.0) rubocop-ast (>= 0.4.0) - ruby-prof (1.4.3) + ruby-prof (1.6.3) ruby-progressbar (1.13.0) - stackprof (0.2.16) + stackprof (0.2.25) standard (1.32.1) language_server-protocol (~> 3.17.0.2) lint_roller (~> 1.0) @@ -72,6 +73,7 @@ PLATFORMS DEPENDENCIES benchmark-ips + prism rake (~> 12.0) rspec (~> 3.0) ruby-prof @@ -80,4 +82,4 @@ DEPENDENCIES syntax_suggest! BUNDLED WITH - 2.3.14 + 2.4.21 diff --git a/lib/syntax_suggest/api.rb b/lib/syntax_suggest/api.rb index 74e53c2..e8b39f2 100644 --- a/lib/syntax_suggest/api.rb +++ b/lib/syntax_suggest/api.rb @@ -5,7 +5,22 @@ require "tmpdir" require "stringio" require "pathname" -require "ripper" + +# rubocop:disable Style/IdenticalConditionalBranches +if ENV["SYNTAX_SUGGEST_DISABLE_PRISM"] # For testing dual ripper/prism support + require "ripper" +else + # TODO remove require + # Allow both to be loaded to enable more atomic commits + require "ripper" + begin + require "prism" + rescue LoadError + require "ripper" + end +end +# rubocop:enable Style/IdenticalConditionalBranches + require "timeout" module SyntaxSuggest @@ -16,6 +31,14 @@ module SyntaxSuggest class Error < StandardError; end TIMEOUT_DEFAULT = ENV.fetch("SYNTAX_SUGGEST_TIMEOUT", 1).to_i + # SyntaxSuggest.use_prism_parser? [Private] + # + # Tells us if the prism parser is available for use + # or if we should fallback to `Ripper` + def self.use_prism_parser? + defined?(Prism) + end + # SyntaxSuggest.handle_error [Public] # # Takes a `SyntaxError` exception, uses the @@ -129,11 +152,20 @@ def self.valid_without?(without_lines:, code_lines:) # SyntaxSuggest.invalid? [Private] # # Opposite of `SyntaxSuggest.valid?` - def self.invalid?(source) - source = source.join if source.is_a?(Array) - source = source.to_s + if defined?(Prism) + def self.invalid?(source) + source = source.join if source.is_a?(Array) + source = source.to_s + + Prism.parse(source).failure? + end + else + def self.invalid?(source) + source = source.join if source.is_a?(Array) + source = source.to_s - Ripper.new(source).tap(&:parse).error? + Ripper.new(source).tap(&:parse).error? + end end # SyntaxSuggest.valid? [Private] @@ -191,7 +223,9 @@ def self.valid?(source) require_relative "code_line" require_relative "code_block" require_relative "block_expand" -require_relative "ripper_errors" +if !SyntaxSuggest.use_prism_parser? + require_relative "ripper_errors" +end require_relative "priority_queue" require_relative "unvisited_lines" require_relative "around_block_scan" diff --git a/lib/syntax_suggest/explain_syntax.rb b/lib/syntax_suggest/explain_syntax.rb index 142ed2e..8de962c 100644 --- a/lib/syntax_suggest/explain_syntax.rb +++ b/lib/syntax_suggest/explain_syntax.rb @@ -3,6 +3,16 @@ require_relative "left_right_lex_count" module SyntaxSuggest + class GetParseErrors + def self.errors(source) + if SyntaxSuggest.use_prism_parser? + Prism.parse(source).errors.map(&:message) + else + RipperErrors.new(source).call.errors + end + end + end + # Explains syntax errors based on their source # # example: @@ -94,7 +104,7 @@ def why(miss) # on the original ripper error messages def errors if missing.empty? - return RipperErrors.new(@code_lines.map(&:original).join).call.errors + return GetParseErrors.errors(@code_lines.map(&:original).join) end missing.map { |miss| why(miss) } diff --git a/lib/syntax_suggest/lex_all.rb b/lib/syntax_suggest/lex_all.rb index 962d0d5..b197118 100644 --- a/lib/syntax_suggest/lex_all.rb +++ b/lib/syntax_suggest/lex_all.rb @@ -11,8 +11,8 @@ class LexAll include Enumerable def initialize(source:, source_lines: nil) - @lex = Ripper::Lexer.new(source, "-", 1).parse.sort_by(&:pos) - lineno = @lex.last.pos.first + 1 + @lex = self.class.lex(source, 1) + lineno = @lex.last[0][0] + 1 source_lines ||= source.lines last_lineno = source_lines.length @@ -20,17 +20,31 @@ def initialize(source:, source_lines: nil) lines = source_lines[lineno..] @lex.concat( - Ripper::Lexer.new(lines.join, "-", lineno + 1).parse.sort_by(&:pos) + self.class.lex(lines.join, lineno + 1) ) - lineno = @lex.last.pos.first + 1 + + lineno = @lex.last[0].first + 1 end last_lex = nil @lex.map! { |elem| - last_lex = LexValue.new(elem.pos.first, elem.event, elem.tok, elem.state, last_lex) + last_lex = LexValue.new(elem[0].first, elem[1], elem[2], elem[3], last_lex) } end + # rubocop:disable Style/IdenticalConditionalBranches + if SyntaxSuggest.use_prism_parser? + def self.lex(source, line_number) + # Prism.lex_compat(source, line: line_number).value.sort_by {|values| values[0] } + Ripper::Lexer.new(source, "-", line_number).parse.sort_by(&:pos) + end + else + def self.lex(source, line_number) + Ripper::Lexer.new(source, "-", line_number).parse.sort_by(&:pos) + end + end + # rubocop:enable Style/IdenticalConditionalBranches + def to_a @lex end diff --git a/spec/unit/explain_syntax_spec.rb b/spec/unit/explain_syntax_spec.rb index 394981d..76437bc 100644 --- a/spec/unit/explain_syntax_spec.rb +++ b/spec/unit/explain_syntax_spec.rb @@ -14,7 +14,11 @@ module SyntaxSuggest ).call expect(explain.missing).to eq([]) - expect(explain.errors.join).to include("unterminated string") + if SyntaxSuggest.use_prism_parser? + expect(explain.errors.join).to include("Expected a closing delimiter for the interpolated string") + else + expect(explain.errors.join).to include("unterminated string") + end end it "handles %w[]" do @@ -191,7 +195,7 @@ def meow ).call expect(explain.missing).to eq([]) - expect(explain.errors).to eq(RipperErrors.new(source).call.errors) + expect(explain.errors).to eq(GetParseErrors.errors(source)) end it "handles an unexpected rescue" do From 7f4176a914d7ed4e18e016c461c0bd666f7aae28 Mon Sep 17 00:00:00 2001 From: Schneems Date: Mon, 4 Dec 2023 15:23:41 -0600 Subject: [PATCH 2/4] Support lexing with Prism --- .github/workflows/ci.yml | 19 +++++++++++++++++++ CHANGELOG.md | 1 + lib/syntax_suggest/api.rb | 24 ++++++++++++++---------- lib/syntax_suggest/code_line.rb | 17 ++++++++++++----- lib/syntax_suggest/lex_all.rb | 5 +---- spec/unit/api_spec.rb | 6 ++++++ 6 files changed, 53 insertions(+), 19 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 48637d3..da7b0f3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -43,3 +43,22 @@ jobs: - name: test run: bin/rake test continue-on-error: ${{ matrix.ruby == 'head' }} + + test-disable-prism: + needs: ruby-versions + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + ruby: ${{ fromJson(needs.ruby-versions.outputs.versions) }} + steps: + - name: Checkout code + uses: actions/checkout@v4.1.1 + - name: Set up Ruby + uses: ruby/setup-ruby@v1 + with: + ruby-version: ${{ matrix.ruby }} + bundler-cache: true + - name: test + run: SYNTAX_SUGGEST_DISABLE_PRISM=1 bin/rake test + continue-on-error: ${{ matrix.ruby == 'head' }} diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a34422..5162177 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,6 @@ ## HEAD (unreleased) +- Support prism parser (https://github.com/ruby/syntax_suggest/pull/208). - No longer supports EOL versions of Ruby. (https://github.com/ruby/syntax_suggest/pull/210) - Handle Ruby 3.3 new eval source location format (https://github.com/ruby/syntax_suggest/pull/200). diff --git a/lib/syntax_suggest/api.rb b/lib/syntax_suggest/api.rb index e8b39f2..59f47ee 100644 --- a/lib/syntax_suggest/api.rb +++ b/lib/syntax_suggest/api.rb @@ -5,23 +5,27 @@ require "tmpdir" require "stringio" require "pathname" +require "timeout" -# rubocop:disable Style/IdenticalConditionalBranches -if ENV["SYNTAX_SUGGEST_DISABLE_PRISM"] # For testing dual ripper/prism support - require "ripper" +# We need Ripper loaded for `Prism.lex_compat` even if we're using Prism +# for lexing and parsing +require "ripper" + +# Prism is the new parser, replacing Ripper +# +# We need to "dual boot" both for now because syntax_suggest +# supports older rubies that do not ship with syntax suggest. +# +# We also need the ability to control loading of this library +# so we can test that both modes work correctly in CI. +if (value = ENV["SYNTAX_SUGGEST_DISABLE_PRISM"]) + warn "Skipping loading prism due to SYNTAX_SUGGEST_DISABLE_PRISM=#{value}" else - # TODO remove require - # Allow both to be loaded to enable more atomic commits - require "ripper" begin require "prism" rescue LoadError - require "ripper" end end -# rubocop:enable Style/IdenticalConditionalBranches - -require "timeout" module SyntaxSuggest # Used to indicate a default value that cannot diff --git a/lib/syntax_suggest/code_line.rb b/lib/syntax_suggest/code_line.rb index a20f34a..58197e9 100644 --- a/lib/syntax_suggest/code_line.rb +++ b/lib/syntax_suggest/code_line.rb @@ -180,12 +180,19 @@ def ignore_newline_not_beg? # EOM # expect(lines.first.trailing_slash?).to eq(true) # - def trailing_slash? - last = @lex.last - return false unless last - return false unless last.type == :on_sp + if SyntaxSuggest.use_prism_parser? + def trailing_slash? + last = @lex.last + last&.type == :on_tstring_end + end + else + def trailing_slash? + last = @lex.last + return false unless last + return false unless last.type == :on_sp - last.token == TRAILING_SLASH + last.token == TRAILING_SLASH + end end # Endless method detection diff --git a/lib/syntax_suggest/lex_all.rb b/lib/syntax_suggest/lex_all.rb index b197118..e9509c4 100644 --- a/lib/syntax_suggest/lex_all.rb +++ b/lib/syntax_suggest/lex_all.rb @@ -32,18 +32,15 @@ def initialize(source:, source_lines: nil) } end - # rubocop:disable Style/IdenticalConditionalBranches if SyntaxSuggest.use_prism_parser? def self.lex(source, line_number) - # Prism.lex_compat(source, line: line_number).value.sort_by {|values| values[0] } - Ripper::Lexer.new(source, "-", line_number).parse.sort_by(&:pos) + Prism.lex_compat(source, line: line_number).value.sort_by { |values| values[0] } end else def self.lex(source, line_number) Ripper::Lexer.new(source, "-", line_number).parse.sort_by(&:pos) end end - # rubocop:enable Style/IdenticalConditionalBranches def to_a @lex diff --git a/spec/unit/api_spec.rb b/spec/unit/api_spec.rb index 079a91e..e900b9e 100644 --- a/spec/unit/api_spec.rb +++ b/spec/unit/api_spec.rb @@ -8,6 +8,12 @@ module SyntaxSuggest RSpec.describe "Top level SyntaxSuggest api" do + it "doesn't load prism if env var is set" do + skip("SYNTAX_SUGGEST_DISABLE_PRISM not set") unless ENV["SYNTAX_SUGGEST_DISABLE_PRISM"] + + expect(SyntaxSuggest.use_prism_parser?).to be_falsey + end + it "has a `handle_error` interface" do fake_error = Object.new def fake_error.message From 08aaa3f50a247b129b500ca38395b0cf11ae1e07 Mon Sep 17 00:00:00 2001 From: Schneems Date: Mon, 4 Dec 2023 16:59:10 -0600 Subject: [PATCH 3/4] Update docs, clean up PR Removes or updates mentions of Ripper --- README.md | 4 ++-- lib/syntax_suggest/api.rb | 3 --- lib/syntax_suggest/clean_document.rb | 4 ++-- lib/syntax_suggest/code_block.rb | 2 +- lib/syntax_suggest/explain_syntax.rb | 10 +++++++--- lib/syntax_suggest/lex_all.rb | 16 ++++++++++++---- lib/syntax_suggest/ripper_errors.rb | 5 ++++- spec/unit/lex_all_spec.rb | 3 --- 8 files changed, 28 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 805c052..3e554ca 100644 --- a/README.md +++ b/README.md @@ -122,7 +122,7 @@ Unmatched `(', missing `)' ? 5 end ``` -- Any ambiguous or unknown errors will be annotated by the original ripper error output: +- Any ambiguous or unknown errors will be annotated by the original parser error output: ``` -syntax error, unexpected end-of-input +Expected an expression after the operator 1 class Dog 2 def meals_last_month diff --git a/lib/syntax_suggest/api.rb b/lib/syntax_suggest/api.rb index 59f47ee..65660ec 100644 --- a/lib/syntax_suggest/api.rb +++ b/lib/syntax_suggest/api.rb @@ -227,9 +227,6 @@ def self.valid?(source) require_relative "code_line" require_relative "code_block" require_relative "block_expand" -if !SyntaxSuggest.use_prism_parser? - require_relative "ripper_errors" -end require_relative "priority_queue" require_relative "unvisited_lines" require_relative "around_block_scan" diff --git a/lib/syntax_suggest/clean_document.rb b/lib/syntax_suggest/clean_document.rb index 44aae26..0847a62 100644 --- a/lib/syntax_suggest/clean_document.rb +++ b/lib/syntax_suggest/clean_document.rb @@ -47,9 +47,9 @@ module SyntaxSuggest # ## Heredocs # # A heredoc is an way of defining a multi-line string. They can cause many - # problems. If left as a single line, Ripper would try to parse the contents + # problems. If left as a single line, the parser would try to parse the contents # as ruby code rather than as a string. Even without this problem, we still - # hit an issue with indentation + # hit an issue with indentation: # # 1 foo = <<~HEREDOC # 2 "Be yourself; everyone else is already taken."" diff --git a/lib/syntax_suggest/code_block.rb b/lib/syntax_suggest/code_block.rb index 61e7986..d842890 100644 --- a/lib/syntax_suggest/code_block.rb +++ b/lib/syntax_suggest/code_block.rb @@ -81,7 +81,7 @@ def valid? # lines then the result cannot be invalid # # That means there's no reason to re-check all - # lines with ripper (which is expensive). + # lines with the parser (which is expensive). # Benchmark in commit message @valid = if lines.all? { |l| l.hidden? || l.empty? } true diff --git a/lib/syntax_suggest/explain_syntax.rb b/lib/syntax_suggest/explain_syntax.rb index 8de962c..ee7d44e 100644 --- a/lib/syntax_suggest/explain_syntax.rb +++ b/lib/syntax_suggest/explain_syntax.rb @@ -2,6 +2,10 @@ require_relative "left_right_lex_count" +if !SyntaxSuggest.use_prism_parser? + require_relative "ripper_errors" +end + module SyntaxSuggest class GetParseErrors def self.errors(source) @@ -25,8 +29,8 @@ def self.errors(source) # # => "Unmatched keyword, missing `end' ?" # # When the error cannot be determined by lexical counting - # then ripper is run against the input and the raw ripper - # errors returned. + # then the parser is run against the input and the raw + # errors are returned. # # Example: # @@ -101,7 +105,7 @@ def why(miss) # Returns an array of syntax error messages # # If no missing pairs are found it falls back - # on the original ripper error messages + # on the original error messages def errors if missing.empty? return GetParseErrors.errors(@code_lines.map(&:original).join) diff --git a/lib/syntax_suggest/lex_all.rb b/lib/syntax_suggest/lex_all.rb index e9509c4..c16fbb5 100644 --- a/lib/syntax_suggest/lex_all.rb +++ b/lib/syntax_suggest/lex_all.rb @@ -3,10 +3,18 @@ module SyntaxSuggest # Ripper.lex is not guaranteed to lex the entire source document # - # lex = LexAll.new(source: source) - # lex.each do |value| - # puts value.line - # end + # This class guarantees the whole document is lex-ed by iteratively + # lexing the document where ripper stopped. + # + # Prism likely doesn't have the same problem. Once ripper support is removed + # we can likely reduce the complexity here if not remove the whole concept. + # + # Example usage: + # + # lex = LexAll.new(source: source) + # lex.each do |value| + # puts value.line + # end class LexAll include Enumerable diff --git a/lib/syntax_suggest/ripper_errors.rb b/lib/syntax_suggest/ripper_errors.rb index 48eb206..4e2bc90 100644 --- a/lib/syntax_suggest/ripper_errors.rb +++ b/lib/syntax_suggest/ripper_errors.rb @@ -1,7 +1,10 @@ # frozen_string_literal: true module SyntaxSuggest - # Capture parse errors from ripper + # Capture parse errors from Ripper + # + # Prism returns the errors with their messages, but Ripper + # does not. To get them we must make a custom subclass. # # Example: # diff --git a/spec/unit/lex_all_spec.rb b/spec/unit/lex_all_spec.rb index 0c0df7c..9621c9e 100644 --- a/spec/unit/lex_all_spec.rb +++ b/spec/unit/lex_all_spec.rb @@ -17,9 +17,6 @@ module SyntaxSuggest end # 9 EOM - # raw_lex = Ripper.lex(source) - # expect(raw_lex.to_s).to_not include("dog") - lex = LexAll.new(source: source) expect(lex.map(&:token).to_s).to include("dog") expect(lex.first.line).to eq(1) From becf097e5e07569bcb1b0916c1b54cc51c0d658d Mon Sep 17 00:00:00 2001 From: Schneems Date: Mon, 4 Dec 2023 17:34:12 -0600 Subject: [PATCH 4/4] Remove duplicate error messages Before: ``` Expected a newline or semicolon after the statement Cannot parse the expression Expected a newline or semicolon after the statement Cannot parse the expression 1 describe "webmock tests" do 22 it "body" do 27 query = Cutlass::FunctionQuery.new( > 28 port: port > 29 body: body 30 ).call 34 end 35 end ``` After: ``` Expected a newline or semicolon after the statement Cannot parse the expression 1 describe "webmock tests" do 22 it "body" do 27 query = Cutlass::FunctionQuery.new( > 28 port: port > 29 body: body 30 ).call 34 end 35 end ``` --- lib/syntax_suggest/explain_syntax.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/syntax_suggest/explain_syntax.rb b/lib/syntax_suggest/explain_syntax.rb index ee7d44e..0d80c4d 100644 --- a/lib/syntax_suggest/explain_syntax.rb +++ b/lib/syntax_suggest/explain_syntax.rb @@ -108,7 +108,7 @@ def why(miss) # on the original error messages def errors if missing.empty? - return GetParseErrors.errors(@code_lines.map(&:original).join) + return GetParseErrors.errors(@code_lines.map(&:original).join).uniq end missing.map { |miss| why(miss) }