From b8535a67f8a13d7b2f7727911649ad5e0396ff1a Mon Sep 17 00:00:00 2001 From: NAITOH Jun Date: Fri, 18 Oct 2024 23:32:42 +0900 Subject: [PATCH] [JRuby] Optimize `scan()`: Remove duplicate `if (restLen() < pattern.size()) return context.nil;` checks in `!headonly`. ## Why? https://github.com/ruby/strscan/blob/d31274f41b7c1e28f23d58cf7bfea03baa818cb7/ext/jruby/org/jruby/ext/strscan/RubyStringScanner.java#L371-L373 This means the following : `if (str.size() - curr < pattern.size()) return context.nil;` A similar check is made within `StringSupport#index()` within `!headonly`. https://github.com/jruby/jruby/blob/be7815ec02356a58891c8727bb448f0c6a826d96/core/src/main/java/org/jruby/util/StringSupport.java#L1706-L1720 ```Java public static int index(ByteList source, ByteList other, int offset, Encoding enc) { int sourceLen = source.realSize(); int sourceBegin = source.begin(); int otherLen = other.realSize(); if (otherLen == 0) return offset; if (sourceLen - offset < otherLen) return -1; ``` - source = `strBL` - other = `patternBL` - offset = `strBeg + curr` This means the following : `if (strBL.realSize() - (strBeg + curr) < patternBL.realSize()) return -1;` Both checks are the same. ## Benchmark It shows String as a pattern is 2.40x faster than Regexp as a pattern. ``` $ benchmark-driver benchmark/check_until.yaml Warming up -------------------------------------- regexp 7.613M i/s - 7.593M times in 0.997350s (131.35ns/i) regexp_var 7.793M i/s - 7.772M times in 0.997364s (128.32ns/i) string 13.222M i/s - 13.199M times in 0.998297s (75.63ns/i) string_var 15.283M i/s - 15.216M times in 0.995667s (65.43ns/i) Calculating ------------------------------------- regexp 10.003M i/s - 22.840M times in 2.283361s (99.97ns/i) regexp_var 9.991M i/s - 23.378M times in 2.340019s (100.09ns/i) string 23.454M i/s - 39.666M times in 1.691221s (42.64ns/i) string_var 23.998M i/s - 45.848M times in 1.910447s (41.67ns/i) Comparison: string_var: 23998466.3 i/s string: 23453777.5 i/s - 1.02x slower regexp: 10002809.4 i/s - 2.40x slower regexp_var: 9990580.1 i/s - 2.40x slower ``` --- .../org/jruby/ext/strscan/RubyStringScanner.java | 14 ++++++-------- test/strscan/test_stringscanner.rb | 6 ++++++ 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/ext/jruby/org/jruby/ext/strscan/RubyStringScanner.java b/ext/jruby/org/jruby/ext/strscan/RubyStringScanner.java index a1e81effcd..7d3e7494fc 100644 --- a/ext/jruby/org/jruby/ext/strscan/RubyStringScanner.java +++ b/ext/jruby/org/jruby/ext/strscan/RubyStringScanner.java @@ -265,7 +265,8 @@ private IRubyObject scan(ThreadContext context, IRubyObject regex, boolean succp check(context); clearMatched(); - if (restLen() < 0) { + int restLen = restLen(); + if (restLen < 0) { return context.nil; } @@ -275,7 +276,7 @@ private IRubyObject scan(ThreadContext context, IRubyObject regex, boolean succp if (regex instanceof RubyRegexp) { pattern = ((RubyRegexp) regex).preparePattern(str); - int range = currPtr + restLen(); + int range = currPtr + restLen; Matcher matcher = pattern.matcher(strBL.getUnsafeBytes(), matchTarget(), range); final int ret; @@ -298,17 +299,14 @@ private IRubyObject scan(ThreadContext context, IRubyObject regex, boolean succp if (ret < 0) return context.nil; } else { RubyString pattern = regex.convertToString(); - Encoding patternEnc = str.checkEncoding(pattern); - - if (restLen() < pattern.size()) { - return context.nil; - } - ByteList patternBL = pattern.getByteList(); int patternSize = patternBL.realSize(); if (headonly) { + if (restLen < pattern.size()) { + return context.nil; + } if (ByteList.memcmp(strBL.unsafeBytes(), currPtr, patternBL.unsafeBytes(), patternBL.begin(), patternSize) != 0) { return context.nil; } diff --git a/test/strscan/test_stringscanner.rb b/test/strscan/test_stringscanner.rb index 9b7b7910d0..54fd5027cf 100644 --- a/test/strscan/test_stringscanner.rb +++ b/test/strscan/test_stringscanner.rb @@ -325,6 +325,9 @@ def test_scan_string s = create_string_scanner(str, false) matched = s.scan('str') assert_equal 'str', matched + + s = create_string_scanner("str") + assert_equal nil, s.scan("str\0\0") end def test_skip @@ -710,6 +713,9 @@ def test_scan_until_string assert_equal(nil, s.skip_until("Qux")) assert_equal("\u0000Baz", s.scan_until("Baz")) assert_equal(11, s.pos) + + s = create_string_scanner("str") + assert_equal nil, s.scan_until("str\0\0") end def test_skip_until