From d0671e1d24021746d47ba734b471e9027491dc4a Mon Sep 17 00:00:00 2001 From: NAITOH Jun Date: Tue, 15 Oct 2024 22:00:00 +0900 Subject: [PATCH] [JRuby] Optimize `scan()` method: Use `strBL.getBegin(); + curr` instead of `currPtr()` ## Why? Because they are identical. https://github.com/ruby/strscan/blob/d31274f41b7c1e28f23d58cf7bfea03baa818cb7/ext/jruby/org/jruby/ext/strscan/RubyStringScanner.java#L267-L268 https://github.com/ruby/strscan/blob/d31274f41b7c1e28f23d58cf7bfea03baa818cb7/ext/jruby/org/jruby/ext/strscan/RubyStringScanner.java#L359-L361 ## Benchmark It shows String as a pattern is 2.33x faster than Regexp as a pattern. ``` $ benchmark-driver benchmark/check_until.yaml Warming up -------------------------------------- regexp 7.421M i/s - 7.378M times in 0.994235s (134.75ns/i) regexp_var 7.302M i/s - 7.307M times in 1.000706s (136.95ns/i) string 12.715M i/s - 12.707M times in 0.999388s (78.65ns/i) string_var 13.575M i/s - 13.533M times in 0.996914s (73.66ns/i) Calculating ------------------------------------- regexp 8.287M i/s - 22.263M times in 2.686415s (120.67ns/i) regexp_var 10.180M i/s - 21.905M times in 2.151779s (98.23ns/i) string 20.148M i/s - 38.144M times in 1.893226s (49.63ns/i) string_var 23.695M i/s - 40.726M times in 1.718753s (42.20ns/i) Comparison: string_var: 23694846.7 i/s string: 20147598.6 i/s - 1.18x slower regexp_var: 10180018.3 i/s - 2.33x slower regexp: 8287384.8 i/s - 2.86x slower ``` --- .../org/jruby/ext/strscan/RubyStringScanner.java | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/ext/jruby/org/jruby/ext/strscan/RubyStringScanner.java b/ext/jruby/org/jruby/ext/strscan/RubyStringScanner.java index db33717881..a1e81effcd 100644 --- a/ext/jruby/org/jruby/ext/strscan/RubyStringScanner.java +++ b/ext/jruby/org/jruby/ext/strscan/RubyStringScanner.java @@ -263,20 +263,18 @@ private IRubyObject extractBegLen(Ruby runtime, int beg, int len) { private IRubyObject scan(ThreadContext context, IRubyObject regex, boolean succptr, boolean getstr, boolean headonly) { final Ruby runtime = context.runtime; check(context); - - ByteList strBL = str.getByteList(); - int strBeg = strBL.getBegin(); - clearMatched(); if (restLen() < 0) { return context.nil; } + ByteList strBL = str.getByteList(); + int currPtr = strBL.getBegin() + curr; + if (regex instanceof RubyRegexp) { pattern = ((RubyRegexp) regex).preparePattern(str); - int currPtr = currPtr(); int range = currPtr + restLen(); Matcher matcher = pattern.matcher(strBL.getUnsafeBytes(), matchTarget(), range); @@ -311,12 +309,12 @@ private IRubyObject scan(ThreadContext context, IRubyObject regex, boolean succp int patternSize = patternBL.realSize(); if (headonly) { - if (ByteList.memcmp(strBL.unsafeBytes(), strBeg + curr, patternBL.unsafeBytes(), patternBL.begin(), patternSize) != 0) { + if (ByteList.memcmp(strBL.unsafeBytes(), currPtr, patternBL.unsafeBytes(), patternBL.begin(), patternSize) != 0) { return context.nil; } setRegisters(patternSize); } else { - int pos = StringSupport.index(strBL, patternBL, strBeg + curr, patternEnc); + int pos = StringSupport.index(strBL, patternBL, currPtr, patternEnc); if (pos == -1) { return context.nil; }