From 85db8e79a1177c437b70b77d152a2c8abbf4fb32 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Fri, 3 Sep 2021 00:04:41 -0500 Subject: [PATCH 1/5] Import JRuby's strscan --- .gitignore | 2 +- Rakefile | 14 +- .../jruby/ext/strscan/RubyStringScanner.java | 676 ++++++++++++++++++ .../ext/strscan/StringScannerLibrary.java | 22 + lib/strscan.rb | 6 + 5 files changed, 717 insertions(+), 3 deletions(-) create mode 100644 ext/java/org/jruby/ext/strscan/RubyStringScanner.java create mode 100644 ext/java/org/jruby/ext/strscan/StringScannerLibrary.java create mode 100644 lib/strscan.rb diff --git a/.gitignore b/.gitignore index 925f17687f..ae5ddc6c68 100644 --- a/.gitignore +++ b/.gitignore @@ -4,7 +4,7 @@ /_yardoc/ /coverage/ /doc/ -/lib/ +/lib/*.{jar,so,dylib} /pkg/ /spec/reports/ /tmp/ diff --git a/Rakefile b/Rakefile index e9274ed41b..f90009b69a 100644 --- a/Rakefile +++ b/Rakefile @@ -15,8 +15,18 @@ namespace :version do end end -require 'rake/extensiontask' -Rake::ExtensionTask.new("strscan") +if RUBY_ENGINE == "jruby" + require 'rake/javaextensiontask' + Rake::JavaExtensionTask.new("strscan") do |ext| + require 'maven/ruby/maven' + ext.source_version = '1.8' + ext.target_version = '1.8' + ext.ext_dir = 'ext/java' + end +else + require 'rake/extensiontask' + Rake::ExtensionTask.new("strscan") +end desc "Run test" task :test do diff --git a/ext/java/org/jruby/ext/strscan/RubyStringScanner.java b/ext/java/org/jruby/ext/strscan/RubyStringScanner.java new file mode 100644 index 0000000000..ac4914903d --- /dev/null +++ b/ext/java/org/jruby/ext/strscan/RubyStringScanner.java @@ -0,0 +1,676 @@ +/* + ***** BEGIN LICENSE BLOCK ***** + * Version: EPL 2.0/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Eclipse Public + * License Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.eclipse.org/legal/epl-v20.html + * + * Software distributed under the License is distributed on an "AS + * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or + * implied. See the License for the specific language governing + * rights and limitations under the License. + * + * Alternatively, the contents of this file may be used under the terms of + * either of the GNU General Public License Version 2 or later (the "GPL"), + * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the EPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the EPL, the GPL or the LGPL. + ***** END LICENSE BLOCK *****/ + +package org.jruby.ext.strscan; + +import org.jcodings.Encoding; +import org.joni.Matcher; +import org.joni.Option; +import org.joni.Regex; +import org.joni.Region; +import org.jruby.Ruby; +import org.jruby.RubyArray; +import org.jruby.RubyBoolean; +import org.jruby.RubyClass; +import org.jruby.RubyFixnum; +import org.jruby.RubyMatchData; +import org.jruby.RubyNumeric; +import org.jruby.RubyObject; +import org.jruby.RubyRegexp; +import org.jruby.RubyString; +import org.jruby.RubySymbol; +import org.jruby.RubyThread; +import org.jruby.anno.JRubyClass; +import org.jruby.anno.JRubyMethod; +import org.jruby.common.IRubyWarnings.ID; +import org.jruby.exceptions.RaiseException; +import org.jruby.runtime.Block; +import org.jruby.runtime.Helpers; +import org.jruby.runtime.ThreadContext; +import org.jruby.runtime.builtin.IRubyObject; +import org.jruby.util.ByteList; +import org.jruby.util.StringSupport; + +import static org.jruby.runtime.Visibility.PRIVATE; + +/** + * @author kscott + * + */ +@JRubyClass(name="StringScanner") +public class RubyStringScanner extends RubyObject { + + private RubyString str; + private int pos = 0; + private int lastPos = -1; + + private Region regs; + private Regex pattern; + private int beg = -1; + private int end = -1; + // not to be confused with RubyObject's flags + private int scannerFlags; + + private static final int MATCHED_STR_SCN_F = 1 << 11; + + public static RubyClass createScannerClass(final Ruby runtime) { + RubyClass scannerClass = runtime.defineClass("StringScanner", runtime.getObject(), RubyStringScanner::new); + scannerClass.defineAnnotatedMethods(RubyStringScanner.class); + ThreadContext context = runtime.getCurrentContext(); + scannerClass.setConstant("Version", runtime.newString("0.7.0").freeze(context)); + scannerClass.setConstant("Id", runtime.newString("$Id: strscan.c 13506 2007-09-24 08:56:24Z nobu $").freeze(context)); + + RubyClass standardError = runtime.getStandardError(); + RubyClass error = scannerClass.defineClassUnder( + "Error", standardError, standardError.getAllocator()); + + RubyClass objClass = runtime.getObject(); + if (!objClass.isConstantDefined("ScanError")) { + objClass.defineConstant("ScanError", error); + } + + return scannerClass; + } + + private void clearMatched() { + scannerFlags &= ~MATCHED_STR_SCN_F; + } + + private void setMatched() { + scannerFlags |= MATCHED_STR_SCN_F; + } + + private boolean isMatched() { + return (scannerFlags & MATCHED_STR_SCN_F) != 0; + } + + private void check() { + if (str == null) throw getRuntime().newArgumentError("uninitialized StringScanner object"); + } + + protected RubyStringScanner(Ruby runtime, RubyClass type) { + super(runtime, type); + } + + // second argument is allowed, but ignored (MRI) + @JRubyMethod(required = 1, optional = 1, visibility = PRIVATE) + public IRubyObject initialize(IRubyObject[] args, Block unusedBlock) { + str = args[0].convertToString(); + return this; + } + + @JRubyMethod(visibility = PRIVATE) + @Override + public IRubyObject initialize_copy(IRubyObject other) { + if (this == other) return this; + if (!(other instanceof RubyStringScanner)) { + throw getRuntime().newTypeError("wrong argument type " + + other.getMetaClass() + " (expected StringScanner)"); + } + + RubyStringScanner otherScanner = (RubyStringScanner)other; + str = otherScanner.str; + pos = otherScanner.pos; + lastPos = otherScanner.lastPos; + scannerFlags = otherScanner.scannerFlags; + + regs = otherScanner.regs != null ? otherScanner.regs.clone() : null; + pattern = otherScanner.pattern; + beg = otherScanner.beg; + end = otherScanner.end; + + return this; + } + + @JRubyMethod(name = "reset") + public IRubyObject reset() { + check(); + pos = 0; + clearMatched(); + return this; + } + + @JRubyMethod(name = "terminate") + public IRubyObject terminate() { + check(); + pos = str.getByteList().getRealSize(); + clearMatched(); + return this; + } + + @JRubyMethod(name = "clear") + public IRubyObject clear(ThreadContext context) { + check(); + Ruby runtime = context.runtime; + if (runtime.isVerbose()) { + runtime.getWarnings().warning(ID.DEPRECATED_METHOD, "StringScanner#clear is obsolete; use #terminate instead"); + } + return terminate(); + } + + @JRubyMethod(name = "string") + public RubyString string() { + return str; + } + + @JRubyMethod(name = "string=", required = 1) + public IRubyObject set_string(ThreadContext context, IRubyObject str) { + this.str = RubyString.stringValue(str); + pos = 0; + clearMatched(); + return str; + } + + @JRubyMethod(name = {"concat", "<<"}, required = 1) + public IRubyObject concat(IRubyObject obj) { + check(); + str.append(obj.convertToString()); + return this; + } + + @JRubyMethod(name = {"pos", "pointer"}) + public RubyFixnum pos() { + check(); + return RubyFixnum.newFixnum(getRuntime(), pos); + } + + @JRubyMethod(name = {"pos=", "pointer="}) + public IRubyObject set_pos(IRubyObject pos) { + check(); + int i = RubyNumeric.num2int(pos); + int size = str.getByteList().getRealSize(); + if (i < 0) i += size; + if (i < 0 || i > size) throw getRuntime().newRangeError("index out of range."); + this.pos = i; + return RubyFixnum.newFixnum(getRuntime(), i); + } + + @JRubyMethod(name = "charpos") + public IRubyObject charpos(ThreadContext context) { + Ruby runtime = context.runtime; + RubyString sub = (RubyString)Helpers.invoke(context, str, "byteslice", runtime.newFixnum(0), runtime.newFixnum(pos)); + return runtime.newFixnum(sub.strLength()); + } + + private IRubyObject extractRange(Ruby runtime, int beg, int end) { + int size = str.getByteList().getRealSize(); + if (beg > size) return runtime.getNil(); + if (end > size) end = size; + return str.makeSharedString(runtime, beg, end - beg); + } + + private IRubyObject extractBegLen(Ruby runtime, int beg, int len) { + assert len >= 0; + int size = str.getByteList().getRealSize(); + if (beg > size) return runtime.getNil(); + if (beg + len > size) len = size - beg; + return str.makeSharedString(runtime, beg, len); + } + + final ThreadLocal currentMatcher = new ThreadLocal<>(); + final RubyThread.Task task = new RubyThread.Task() { + @Override + public Integer run(ThreadContext context, RubyStringScanner rubyStringScanner) throws InterruptedException { + ByteList value = str.getByteList(); + return currentMatcher.get().matchInterruptible(value.begin() + pos, value.begin() + value.realSize(), Option.NONE); + } + + @Override + public void wakeup(RubyThread thread, RubyStringScanner rubyStringScanner) { + thread.getNativeThread().interrupt(); + } + }; + + private IRubyObject scan(ThreadContext context, IRubyObject regex, boolean succptr, boolean getstr, boolean headonly) { + final Ruby runtime = getRuntime(); + if (!(regex instanceof RubyRegexp)) throw runtime.newTypeError("wrong argument type " + regex.getMetaClass() + " (expected Regexp)"); + check(); + + pattern = ((RubyRegexp)regex).preparePattern(str); + + clearMatched(); + int rest = str.getByteList().getRealSize() - pos; + if (rest < 0) return runtime.getNil(); + + ByteList value = str.getByteList(); + int beg = value.getBegin() + pos; + int range = value.getBegin() + value.getRealSize(); + + Matcher matcher = pattern.matcher(value.getUnsafeBytes(), beg, range); + final int ret; + if (headonly) { + if (runtime.getInstanceConfig().isInterruptibleRegexps()) { + currentMatcher.set(matcher); + try { + ret = runtime.getCurrentContext().getThread().executeTask(context, this, task); + } catch (InterruptedException ie) { + throw runtime.newInterruptedRegexpError("Regexp Interrupted"); + } + } else { + ret = matcher.match(beg, range, Option.NONE); + } + } else { + ret = RubyRegexp.matcherSearch(context, matcher, beg, range, Option.NONE); + } + + regs = matcher.getRegion(); + if (regs == null) { + this.beg = matcher.getBegin(); + this.end = matcher.getEnd(); + } else { + this.beg = regs.beg[0]; + this.end = regs.end[0]; + } + + if (ret < 0) return context.nil; + setMatched(); + + lastPos = pos; + if (succptr) pos += end; + return getstr ? extractBegLen(runtime, lastPos, end) : RubyFixnum.newFixnum(runtime, end); + } + + @JRubyMethod(name = "scan", required = 1) + public IRubyObject scan(ThreadContext context, IRubyObject regex) { + return scan(context, regex, true, true, true); + } + + @JRubyMethod(name = "match?", required = 1) + public IRubyObject match_p(ThreadContext context, IRubyObject regex) { + return scan(context, regex, false, false, true); + } + + @JRubyMethod(name = "skip", required = 1) + public IRubyObject skip(ThreadContext context, IRubyObject regex) { + return scan(context, regex, true, false, true); + } + + @JRubyMethod(name = "check", required = 1) + public IRubyObject check(ThreadContext context, IRubyObject regex) { + return scan(context, regex, false, true, true); + } + + @JRubyMethod(name = "scan_full", required = 3) + public IRubyObject scan_full(ThreadContext context, IRubyObject regex, IRubyObject s, IRubyObject f) { + return scan(context, regex, s.isTrue(), f.isTrue(), true); + } + + @JRubyMethod(name = "scan_until", required = 1) + public IRubyObject scan_until(ThreadContext context, IRubyObject regex) { + return scan(context, regex, true, true, false); + } + + @JRubyMethod(name = "exist?", required = 1) + public IRubyObject exist_p(ThreadContext context, IRubyObject regex) { + return scan(context, regex, false, false, false); + } + + @JRubyMethod(name = "skip_until", required = 1) + public IRubyObject skip_until(ThreadContext context, IRubyObject regex) { + return scan(context, regex, true, false, false); + } + + @JRubyMethod(name = "check_until", required = 1) + public IRubyObject check_until(ThreadContext context, IRubyObject regex) { + return scan(context, regex, false, true, false); + } + + @JRubyMethod(name = "search_full", required = 3) + public IRubyObject search_full(ThreadContext context, IRubyObject regex, IRubyObject s, IRubyObject f) { + return scan(context, regex, s.isTrue(), f.isTrue(), false); + } + + private void adjustRegisters() { + beg = 0; + end = pos - lastPos; + regs = null; + } + + public IRubyObject getch(ThreadContext context) { + return getch19(context); + } + + @JRubyMethod(name = "getch") + public IRubyObject getch19(ThreadContext context) { + return getchCommon(context, true); + } + + public IRubyObject getchCommon(ThreadContext context, boolean is1_9) { + check(); + clearMatched(); + + Ruby runtime = context.runtime; + ByteList value = str.getByteList(); + + if (pos >= value.getRealSize()) return context.nil; + int len; + if (is1_9) { + Encoding enc = str.getEncoding(); + len = enc.isSingleByte() ? 1 : StringSupport.length(enc, value.getUnsafeBytes(), value.getBegin() + pos, value.getBegin() + value.getRealSize()); + } else { + Encoding enc = runtime.getKCode().getEncoding(); + len = enc.isSingleByte() ? 1 : enc.length(value.getUnsafeBytes(), value.getBegin() + pos, value.getBegin() + value.getRealSize()); + } + + if (pos + len > value.getRealSize()) len = value.getRealSize() - pos; + lastPos = pos; + pos += len; + + setMatched(); + adjustRegisters(); + + return extractRange(runtime, lastPos + beg, lastPos + end); + } + + @JRubyMethod(name = "get_byte") + public IRubyObject get_byte(ThreadContext context) { + check(); + clearMatched(); + if (pos >= str.getByteList().getRealSize()) return context.nil; + + lastPos = pos; + pos++; + + setMatched(); + adjustRegisters(); + + return extractRange(context.runtime, lastPos + beg, lastPos + end); + } + + @JRubyMethod(name = "getbyte") + public IRubyObject getbyte(ThreadContext context) { + Ruby runtime = context.runtime; + if (runtime.isVerbose()) { + runtime.getWarnings().warning(ID.DEPRECATED_METHOD, + "StringScanner#getbyte is obsolete; use #get_byte instead"); + } + return get_byte(context); + } + + @JRubyMethod(name = "peek", required = 1) + public IRubyObject peek(ThreadContext context, IRubyObject length) { + check(); + + int len = RubyNumeric.num2int(length); + if (len < 0) { + throw context.runtime.newArgumentError("negative string size (or size too big)"); + } + + ByteList value = str.getByteList(); + if (pos >= value.getRealSize()) return RubyString.newEmptyString(context.runtime).infectBy(str); + if (pos + len > value.getRealSize()) len = value.getRealSize() - pos; + + return extractBegLen(context.runtime, pos, len); + } + + @JRubyMethod(name = "peep", required = 1) + public IRubyObject peep(ThreadContext context, IRubyObject length) { + Ruby runtime = context.runtime; + if (runtime.isVerbose()) { + runtime.getWarnings().warning( + ID.DEPRECATED_METHOD, "StringScanner#peep is obsolete; use #peek instead"); + } + return peek(context, length); + } + + @JRubyMethod(name = "unscan") + public IRubyObject unscan() { + check(); + Ruby runtime = getRuntime(); + + if (!isMatched()) { + RubyClass errorClass = runtime.getClass("StringScanner").getClass("Error"); + throw RaiseException.from(runtime, errorClass, "unscan failed: previous match had failed"); + } + pos = lastPos; + clearMatched(); + return this; + } + + @JRubyMethod(name = "beginning_of_line?", alias = "bol?") + public IRubyObject bol_p() { + check(); + Ruby runtime = getRuntime(); + + ByteList value = str.getByteList(); + if (pos > value.getRealSize()) return runtime.getNil(); + if (pos == 0) return runtime.getTrue(); + return value.getUnsafeBytes()[(value.getBegin() + pos) - 1] == (byte)'\n' ? runtime.getTrue() : runtime.getFalse(); + } + + @JRubyMethod(name = "eos?") + public RubyBoolean eos_p(ThreadContext context) { + check(); + return pos >= str.getByteList().getRealSize() ? context.tru : context.fals; + } + + @JRubyMethod(name = "empty?") + public RubyBoolean empty_p(ThreadContext context) { + Ruby runtime = context.runtime; + if (runtime.isVerbose()) { + runtime.getWarnings().warning(ID.DEPRECATED_METHOD, "StringScanner#empty? is obsolete; use #eos? instead"); + } + return eos_p(context); + } + + @JRubyMethod(name = "rest?") + public RubyBoolean rest_p(ThreadContext context) { + check(); + return pos >= str.getByteList().getRealSize() ? context.fals : context.tru; + } + + @JRubyMethod(name = "matched?") + public RubyBoolean matched_p(ThreadContext context) { + check(); + return isMatched() ? context.tru : context.fals; + } + + @JRubyMethod(name = "matched") + public IRubyObject matched(ThreadContext context) { + check(); + if (!isMatched()) return context.nil; + return extractRange(context.runtime, lastPos + beg, lastPos + end); + } + + @JRubyMethod(name = "matched_size") + public IRubyObject matched_size() { + check(); + if (!isMatched()) return getRuntime().getNil(); + return RubyFixnum.newFixnum(getRuntime(), end - beg); + } + + @JRubyMethod(name = "matchedsize") + public IRubyObject matchedsize(ThreadContext context) { + Ruby runtime = context.runtime; + if (runtime.isVerbose()) { + runtime.getWarnings().warning(ID.DEPRECATED_METHOD, "StringScanner#matchedsize is obsolete; use #matched_size instead"); + } + return matched_size(); + } + + @JRubyMethod(name = "[]", required = 1) + public IRubyObject op_aref(ThreadContext context, IRubyObject idx) { + Ruby runtime = context.runtime; + check(); + if (!isMatched()) { + return context.nil; + } + + if (idx instanceof RubySymbol || idx instanceof RubyString) { + if (pattern == null) return context.nil; + } + int i = RubyMatchData.backrefNumber(runtime, pattern, regs, idx); + int numRegs = regs == null ? 1 : regs.numRegs; + + if (i < 0) i += numRegs; + if (i < 0 || i >= numRegs) { + return context.nil; + } + + if (regs == null) { + assert i == 0; + if (beg == -1) return context.nil; + return extractRange(runtime, lastPos + beg, lastPos + end); + } else { + if (regs.beg[i] == -1) return context.nil; + return extractRange(context.runtime, lastPos + regs.beg[i], lastPos + regs.end[i]); + } + } + + @JRubyMethod(name = "pre_match") + public IRubyObject pre_match(ThreadContext context) { + check(); + if (!isMatched()) { + return context.nil; + } + return extractRange(context.runtime, 0, lastPos + beg); + } + + @JRubyMethod(name = "post_match") + public IRubyObject post_match(ThreadContext context) { + check(); + if (!isMatched()) { + return context.nil; + } + return extractRange(context.runtime, lastPos + end, str.getByteList().getRealSize()); + } + + @JRubyMethod(name = "rest") + public IRubyObject rest(ThreadContext context) { + check(); + ByteList value = str.getByteList(); + if (pos >= value.getRealSize()) { + return RubyString.newEmptyString(context.runtime).infectBy(str); + } + return extractRange(context.runtime, pos, value.getRealSize()); + } + + @JRubyMethod(name = "rest_size") + public RubyFixnum rest_size() { + check(); + ByteList value = str.getByteList(); + if (pos >= value.getRealSize()) return RubyFixnum.zero(getRuntime()); + return RubyFixnum.newFixnum(getRuntime(), value.getRealSize() - pos); + } + + @JRubyMethod(name = "restsize") + public RubyFixnum restsize(ThreadContext context) { + Ruby runtime = context.runtime; + if (runtime.isVerbose()) { + runtime.getWarnings().warning(ID.DEPRECATED_METHOD, "StringScanner#restsize is obsolete; use #rest_size instead"); + } + return rest_size(); + } + + @JRubyMethod(name = "inspect") + @Override + public IRubyObject inspect() { + if (str == null) return inspect("(uninitialized)"); + if (pos >= str.getByteList().getRealSize()) return inspect("fin"); + if (pos == 0) return inspect(pos + "/" + str.getByteList().getRealSize() + " @ " + inspect2()); + return inspect(pos + "/" + str.getByteList().getRealSize() + " " + inspect1() + " @ " + inspect2()); + } + + private IRubyObject inspect(String msg) { + RubyString result = getRuntime().newString("#<" + getMetaClass() + " " + msg + ">"); + if (str != null) result.infectBy(str); + return result; + } + + private static final int INSPECT_LENGTH = 5; + + private static final byte[] DOT_BYTES = "...".getBytes(); + + private IRubyObject inspect1() { + final Ruby runtime = getRuntime(); + if (pos == 0) return RubyString.newEmptyString(runtime); + if (pos > INSPECT_LENGTH) { + return RubyString.newStringNoCopy(runtime, DOT_BYTES). + append(str.substr(runtime, pos - INSPECT_LENGTH, INSPECT_LENGTH)).inspect(); + } + return str.substr(runtime, 0, pos).inspect(); + } + + private IRubyObject inspect2() { + final Ruby runtime = getRuntime(); + if (pos >= str.getByteList().getRealSize()) return RubyString.newEmptyString(runtime); + int len = str.getByteList().getRealSize() - pos; + if (len > INSPECT_LENGTH) { + return ((RubyString) str.substr(runtime, pos, INSPECT_LENGTH)).cat(DOT_BYTES).inspect(); + } + return str.substr(runtime, pos, len).inspect(); + } + + @JRubyMethod(name = "must_C_version", meta = true) + public static IRubyObject mustCversion(IRubyObject recv) { + return recv; + } + + @JRubyMethod(name = "size") + public IRubyObject size(ThreadContext context) { + if (!isMatched()) return context.nil; + return context.runtime.newFixnum(regs.numRegs); + } + + @JRubyMethod(name = "captures") + public IRubyObject captures(ThreadContext context) { + int i, numRegs; + RubyArray newAry; + + if (!isMatched()) return context.nil; + + Ruby runtime = context.runtime; + + numRegs = regs.numRegs; + newAry = RubyArray.newArray(runtime, numRegs); + + for (i = 1; i < numRegs; i++) { + IRubyObject str = extractRange(runtime, lastPos + regs.beg[i], + lastPos + regs.end[i]); + newAry.push(str); + } + + return newAry; + } + + @JRubyMethod(name = "values_at", rest = true) + public IRubyObject values_at(ThreadContext context, IRubyObject[] args) { + int i; + RubyArray newAry; + + if (!isMatched()) return context.nil; + + Ruby runtime = context.runtime; + + newAry = RubyArray.newArray(runtime, args.length); + for (i = 0; i < args.length; i++) { + newAry.push(op_aref(context, args[i])); + } + + return newAry; + } +} diff --git a/ext/java/org/jruby/ext/strscan/StringScannerLibrary.java b/ext/java/org/jruby/ext/strscan/StringScannerLibrary.java new file mode 100644 index 0000000000..3ffb641a22 --- /dev/null +++ b/ext/java/org/jruby/ext/strscan/StringScannerLibrary.java @@ -0,0 +1,22 @@ +package org.jruby.ext.strscan; + +import java.io.IOException; + +import org.jruby.Ruby; +import org.jruby.ext.strscan.RubyStringScanner; +import org.jruby.runtime.load.Library; + +/** + * @author kscott + * + */ +public class StringScannerLibrary implements Library { + + /** + * @see org.jruby.runtime.load.Library#load(org.jruby.Ruby) + */ + public void load(Ruby runtime, boolean wrap) throws IOException { + RubyStringScanner.createScannerClass(runtime); + } + +} diff --git a/lib/strscan.rb b/lib/strscan.rb new file mode 100644 index 0000000000..19baccff26 --- /dev/null +++ b/lib/strscan.rb @@ -0,0 +1,6 @@ +if RUBY_ENGINE == 'jruby' + require 'strscan.jar' + JRuby::Util.load_ext("org.jruby.ext.strscan.StringScannerLibrary") +else + require 'strscan.so' +end From 0d258c8742c3c0739d5cd367a5f07442b74ae61a Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Mon, 7 Feb 2022 17:42:17 -0600 Subject: [PATCH 2/5] Update with fully functional strscan --- .../jruby/ext/strscan/RubyStringScanner.java | 591 +++++++++++------- 1 file changed, 382 insertions(+), 209 deletions(-) diff --git a/ext/java/org/jruby/ext/strscan/RubyStringScanner.java b/ext/java/org/jruby/ext/strscan/RubyStringScanner.java index ac4914903d..9336f67550 100644 --- a/ext/java/org/jruby/ext/strscan/RubyStringScanner.java +++ b/ext/java/org/jruby/ext/strscan/RubyStringScanner.java @@ -46,6 +46,7 @@ import org.jruby.RubyThread; import org.jruby.anno.JRubyClass; import org.jruby.anno.JRubyMethod; +import org.jruby.ast.util.ArgsUtil; import org.jruby.common.IRubyWarnings.ID; import org.jruby.exceptions.RaiseException; import org.jruby.runtime.Block; @@ -59,40 +60,41 @@ /** * @author kscott - * */ -@JRubyClass(name="StringScanner") +@JRubyClass(name = "StringScanner") public class RubyStringScanner extends RubyObject { private RubyString str; - private int pos = 0; - private int lastPos = -1; + private int curr = 0; + private int prev = -1; private Region regs; private Regex pattern; - private int beg = -1; - private int end = -1; - // not to be confused with RubyObject's flags private int scannerFlags; + private boolean fixedAnchor; private static final int MATCHED_STR_SCN_F = 1 << 11; public static RubyClass createScannerClass(final Ruby runtime) { - RubyClass scannerClass = runtime.defineClass("StringScanner", runtime.getObject(), RubyStringScanner::new); - scannerClass.defineAnnotatedMethods(RubyStringScanner.class); - ThreadContext context = runtime.getCurrentContext(); - scannerClass.setConstant("Version", runtime.newString("0.7.0").freeze(context)); - scannerClass.setConstant("Id", runtime.newString("$Id: strscan.c 13506 2007-09-24 08:56:24Z nobu $").freeze(context)); + RubyClass Object = runtime.getObject(); - RubyClass standardError = runtime.getStandardError(); - RubyClass error = scannerClass.defineClassUnder( - "Error", standardError, standardError.getAllocator()); + RubyClass scannerClass = runtime.defineClass("StringScanner", Object, RubyStringScanner::new); - RubyClass objClass = runtime.getObject(); - if (!objClass.isConstantDefined("ScanError")) { - objClass.defineConstant("ScanError", error); + RubyClass standardError = runtime.getStandardError(); + RubyClass error = scannerClass.defineClassUnder("Error", standardError, standardError.getAllocator()); + if (!Object.isConstantDefined("ScanError")) { + Object.defineConstant("ScanError", error); } + RubyString version = runtime.newString("3.0.2"); + version.setFrozen(true); + scannerClass.setConstant("Version", version); + RubyString id = runtime.newString("$Id$"); + id.setFrozen(true); + scannerClass.setConstant("Id", id); + + scannerClass.defineAnnotatedMethods(RubyStringScanner.class); + return scannerClass; } @@ -108,68 +110,77 @@ private boolean isMatched() { return (scannerFlags & MATCHED_STR_SCN_F) != 0; } - private void check() { - if (str == null) throw getRuntime().newArgumentError("uninitialized StringScanner object"); + private void check(ThreadContext context) { + if (str == null) throw context.runtime.newArgumentError("uninitialized StringScanner object"); } protected RubyStringScanner(Ruby runtime, RubyClass type) { super(runtime, type); } - // second argument is allowed, but ignored (MRI) - @JRubyMethod(required = 1, optional = 1, visibility = PRIVATE) - public IRubyObject initialize(IRubyObject[] args, Block unusedBlock) { - str = args[0].convertToString(); + @JRubyMethod(visibility = PRIVATE) + public IRubyObject initialize(ThreadContext context, IRubyObject string) { + return initialize(context, string, context.nil); + } + + @JRubyMethod(visibility = PRIVATE) + public IRubyObject initialize(ThreadContext context, IRubyObject string, IRubyObject dupOrOpts) { + this.str = string.convertToString(); + this.fixedAnchor = ArgsUtil.extractKeywordArg(context, "fixed_anchor", dupOrOpts).isTrue(); + this.regs = new Region(0, 0); + return this; } @JRubyMethod(visibility = PRIVATE) - @Override - public IRubyObject initialize_copy(IRubyObject other) { + public IRubyObject initialize(ThreadContext context, IRubyObject string, IRubyObject dup, IRubyObject opts) { + return initialize(context, string, opts); + } + + @JRubyMethod(visibility = PRIVATE) + public IRubyObject initialize_copy(ThreadContext context, IRubyObject other) { if (this == other) return this; if (!(other instanceof RubyStringScanner)) { - throw getRuntime().newTypeError("wrong argument type " - + other.getMetaClass() + " (expected StringScanner)"); + throw context.runtime.newTypeError("wrong argument type " + other.getMetaClass() + " (expected StringScanner)"); } - RubyStringScanner otherScanner = (RubyStringScanner)other; + RubyStringScanner otherScanner = (RubyStringScanner) other; str = otherScanner.str; - pos = otherScanner.pos; - lastPos = otherScanner.lastPos; + curr = otherScanner.curr; + prev = otherScanner.prev; scannerFlags = otherScanner.scannerFlags; - regs = otherScanner.regs != null ? otherScanner.regs.clone() : null; + regs = otherScanner.regs.clone(); pattern = otherScanner.pattern; - beg = otherScanner.beg; - end = otherScanner.end; + fixedAnchor = otherScanner.fixedAnchor; return this; } @JRubyMethod(name = "reset") - public IRubyObject reset() { - check(); - pos = 0; + public IRubyObject reset(ThreadContext context) { + check(context); + curr = 0; clearMatched(); return this; } @JRubyMethod(name = "terminate") - public IRubyObject terminate() { - check(); - pos = str.getByteList().getRealSize(); + public IRubyObject terminate(ThreadContext context) { + check(context); + curr = str.getByteList().getRealSize(); clearMatched(); return this; } @JRubyMethod(name = "clear") public IRubyObject clear(ThreadContext context) { - check(); + check(context); Ruby runtime = context.runtime; if (runtime.isVerbose()) { runtime.getWarnings().warning(ID.DEPRECATED_METHOD, "StringScanner#clear is obsolete; use #terminate instead"); } - return terminate(); + return terminate(context); } @JRubyMethod(name = "string") @@ -177,57 +188,69 @@ public RubyString string() { return str; } - @JRubyMethod(name = "string=", required = 1) + @JRubyMethod(name = "string=") public IRubyObject set_string(ThreadContext context, IRubyObject str) { this.str = RubyString.stringValue(str); - pos = 0; + curr = 0; clearMatched(); return str; } - @JRubyMethod(name = {"concat", "<<"}, required = 1) - public IRubyObject concat(IRubyObject obj) { - check(); + @JRubyMethod(name = {"concat", "<<"}) + public IRubyObject concat(ThreadContext context, IRubyObject obj) { + check(context); str.append(obj.convertToString()); return this; } @JRubyMethod(name = {"pos", "pointer"}) - public RubyFixnum pos() { - check(); - return RubyFixnum.newFixnum(getRuntime(), pos); + public RubyFixnum pos(ThreadContext context) { + check(context); + return RubyFixnum.newFixnum(context.runtime, curr); } @JRubyMethod(name = {"pos=", "pointer="}) - public IRubyObject set_pos(IRubyObject pos) { - check(); + public IRubyObject set_pos(ThreadContext context, IRubyObject pos) { + check(context); + + Ruby runtime = context.runtime; + int i = RubyNumeric.num2int(pos); int size = str.getByteList().getRealSize(); if (i < 0) i += size; - if (i < 0 || i > size) throw getRuntime().newRangeError("index out of range."); - this.pos = i; - return RubyFixnum.newFixnum(getRuntime(), i); + if (i < 0 || i > size) throw runtime.newRangeError("index out of range."); + this.curr = i; + + return RubyFixnum.newFixnum(runtime, i); } @JRubyMethod(name = "charpos") public IRubyObject charpos(ThreadContext context) { Ruby runtime = context.runtime; - RubyString sub = (RubyString)Helpers.invoke(context, str, "byteslice", runtime.newFixnum(0), runtime.newFixnum(pos)); - return runtime.newFixnum(sub.strLength()); + + ByteList strBL = str.getByteList(); + int strBeg = strBL.begin(); + + return runtime.newFixnum(StringSupport.strLength(strBL.getEncoding(), strBL.unsafeBytes(), strBeg, strBeg + curr)); } private IRubyObject extractRange(Ruby runtime, int beg, int end) { int size = str.getByteList().getRealSize(); + if (beg > size) return runtime.getNil(); if (end > size) end = size; + return str.makeSharedString(runtime, beg, end - beg); } private IRubyObject extractBegLen(Ruby runtime, int beg, int len) { assert len >= 0; + int size = str.getByteList().getRealSize(); + if (beg > size) return runtime.getNil(); - if (beg + len > size) len = size - beg; + len = Math.min(len, size - beg); + return str.makeSharedString(runtime, beg, len); } @@ -236,7 +259,7 @@ private IRubyObject extractBegLen(Ruby runtime, int beg, int len) { @Override public Integer run(ThreadContext context, RubyStringScanner rubyStringScanner) throws InterruptedException { ByteList value = str.getByteList(); - return currentMatcher.get().matchInterruptible(value.begin() + pos, value.begin() + value.realSize(), Option.NONE); + return currentMatcher.get().matchInterruptible(value.begin() + curr, value.begin() + value.realSize(), Option.NONE); } @Override @@ -245,175 +268,251 @@ public void wakeup(RubyThread thread, RubyStringScanner rubyStringScanner) { } }; + // MRI: strscan_do_scan private IRubyObject scan(ThreadContext context, IRubyObject regex, boolean succptr, boolean getstr, boolean headonly) { - final Ruby runtime = getRuntime(); - if (!(regex instanceof RubyRegexp)) throw runtime.newTypeError("wrong argument type " + regex.getMetaClass() + " (expected Regexp)"); - check(); + final Ruby runtime = context.runtime; + + if (headonly) { + if (!(regex instanceof RubyRegexp)) { + regex = regex.convertToString(); + } + } else { + if (!(regex instanceof RubyRegexp)) { + throw runtime.newTypeError("wrong argument type " + regex.getMetaClass() + " (expected Regexp)"); + } + } - pattern = ((RubyRegexp)regex).preparePattern(str); + check(context); + + ByteList strBL = str.getByteList(); + int strBeg = strBL.getBegin(); clearMatched(); - int rest = str.getByteList().getRealSize() - pos; - if (rest < 0) return runtime.getNil(); - ByteList value = str.getByteList(); - int beg = value.getBegin() + pos; - int range = value.getBegin() + value.getRealSize(); + if (restLen() < 0) { + return context.nil; + } - Matcher matcher = pattern.matcher(value.getUnsafeBytes(), beg, range); - final int ret; - if (headonly) { - if (runtime.getInstanceConfig().isInterruptibleRegexps()) { - currentMatcher.set(matcher); - try { - ret = runtime.getCurrentContext().getThread().executeTask(context, this, task); - } catch (InterruptedException ie) { - throw runtime.newInterruptedRegexpError("Regexp Interrupted"); - } + if (regex instanceof RubyRegexp) { + pattern = ((RubyRegexp) regex).preparePattern(str); + + int currPtr = currPtr(); + int range = currPtr + restLen(); + + Matcher matcher = pattern.matcher(strBL.getUnsafeBytes(), matchTarget(), range); + final int ret; + if (headonly) { + ret = RubyRegexp.matcherMatch(context, matcher, currPtr, range, Option.NONE); } else { - ret = matcher.match(beg, range, Option.NONE); + ret = RubyRegexp.matcherSearch(context, matcher, currPtr, range, Option.NONE); } + + Region matchRegion = matcher.getRegion(); + if (matchRegion == null) { + regs.beg[0] = matcher.getBegin(); + regs.end[0] = matcher.getEnd(); + } else { + regs = matchRegion; + } + + if (ret == -2) { + throw runtime.newRaiseException((RubyClass) getMetaClass().getConstant("ScanError"), "regexp buffer overflow"); + } + if (ret < 0) return context.nil; } else { - ret = RubyRegexp.matcherSearch(context, matcher, beg, range, Option.NONE); + RubyString pattern = (RubyString) regex; + + str.checkEncoding(pattern); + + if (restLen() < pattern.size()) { + return context.nil; + } + + ByteList patternBL = pattern.getByteList(); + int patternSize = patternBL.realSize(); + + if (ByteList.memcmp(strBL.unsafeBytes(), strBeg + curr, patternBL.unsafeBytes(), patternBL.begin(), patternSize) != 0) { + return context.nil; + } + + setRegisters(patternSize); } - regs = matcher.getRegion(); - if (regs == null) { - this.beg = matcher.getBegin(); - this.end = matcher.getEnd(); + setMatched(); + prev = curr; + + if (succptr) { + succ(); + } + + int length = lastMatchLength(); + + if (getstr) { + return extractBegLen(runtime, prev, length); + } + + return RubyFixnum.newFixnum(runtime, length); + } + + private int lastMatchLength() { + if (fixedAnchor) { + return regs.end[0] - prev; } else { - this.beg = regs.beg[0]; - this.end = regs.end[0]; + return regs.end[0]; } + } - if (ret < 0) return context.nil; - setMatched(); + private void succ() { + if (fixedAnchor) { + this.curr = this.regs.end[0]; + } else { + this.curr += this.regs.end[0]; + } + } - lastPos = pos; - if (succptr) pos += end; - return getstr ? extractBegLen(runtime, lastPos, end) : RubyFixnum.newFixnum(runtime, end); + private int currPtr() { + return str.getByteList().getBegin() + curr; } - @JRubyMethod(name = "scan", required = 1) + private int matchTarget() { + if (fixedAnchor) { + return str.getByteList().getBegin(); + } else { + return str.getByteList().getBegin() + curr; + } + } + + private int restLen() { + return str.size() - curr; + } + + // MRI: set_registers + private void setRegisters(int length) { + if (fixedAnchor) { + regs = new Region(curr, curr + length); + } else { + regs = new Region(0, length); + } + } + + @JRubyMethod(name = "scan") public IRubyObject scan(ThreadContext context, IRubyObject regex) { return scan(context, regex, true, true, true); } - @JRubyMethod(name = "match?", required = 1) + @JRubyMethod(name = "match?") public IRubyObject match_p(ThreadContext context, IRubyObject regex) { return scan(context, regex, false, false, true); } - @JRubyMethod(name = "skip", required = 1) + @JRubyMethod(name = "skip") public IRubyObject skip(ThreadContext context, IRubyObject regex) { return scan(context, regex, true, false, true); } - @JRubyMethod(name = "check", required = 1) + @JRubyMethod(name = "check") public IRubyObject check(ThreadContext context, IRubyObject regex) { return scan(context, regex, false, true, true); } - @JRubyMethod(name = "scan_full", required = 3) + @JRubyMethod(name = "scan_full") public IRubyObject scan_full(ThreadContext context, IRubyObject regex, IRubyObject s, IRubyObject f) { return scan(context, regex, s.isTrue(), f.isTrue(), true); } - @JRubyMethod(name = "scan_until", required = 1) + @JRubyMethod(name = "scan_until") public IRubyObject scan_until(ThreadContext context, IRubyObject regex) { return scan(context, regex, true, true, false); } - @JRubyMethod(name = "exist?", required = 1) + @JRubyMethod(name = "exist?") public IRubyObject exist_p(ThreadContext context, IRubyObject regex) { return scan(context, regex, false, false, false); } - @JRubyMethod(name = "skip_until", required = 1) + @JRubyMethod(name = "skip_until") public IRubyObject skip_until(ThreadContext context, IRubyObject regex) { return scan(context, regex, true, false, false); } - @JRubyMethod(name = "check_until", required = 1) + @JRubyMethod(name = "check_until") public IRubyObject check_until(ThreadContext context, IRubyObject regex) { return scan(context, regex, false, true, false); } - @JRubyMethod(name = "search_full", required = 3) + @JRubyMethod(name = "search_full") public IRubyObject search_full(ThreadContext context, IRubyObject regex, IRubyObject s, IRubyObject f) { return scan(context, regex, s.isTrue(), f.isTrue(), false); } + // MRI: adjust_register_to_matched private void adjustRegisters() { - beg = 0; - end = pos - lastPos; - regs = null; - } - - public IRubyObject getch(ThreadContext context) { - return getch19(context); + if (fixedAnchor) { + regs = new Region(prev, curr); + } else { + regs = new Region(0, curr - prev); + } } @JRubyMethod(name = "getch") - public IRubyObject getch19(ThreadContext context) { - return getchCommon(context, true); + public IRubyObject getch(ThreadContext context) { + return getchCommon(context); } - public IRubyObject getchCommon(ThreadContext context, boolean is1_9) { - check(); + public IRubyObject getchCommon(ThreadContext context) { + check(context); clearMatched(); + ByteList strBL = str.getByteList(); + int strSize = strBL.getRealSize(); + + if (curr >= strSize) return context.nil; Ruby runtime = context.runtime; - ByteList value = str.getByteList(); - if (pos >= value.getRealSize()) return context.nil; - int len; - if (is1_9) { - Encoding enc = str.getEncoding(); - len = enc.isSingleByte() ? 1 : StringSupport.length(enc, value.getUnsafeBytes(), value.getBegin() + pos, value.getBegin() + value.getRealSize()); - } else { - Encoding enc = runtime.getKCode().getEncoding(); - len = enc.isSingleByte() ? 1 : enc.length(value.getUnsafeBytes(), value.getBegin() + pos, value.getBegin() + value.getRealSize()); - } + Encoding strEnc = strBL.getEncoding(); + int setBeg = strBL.getBegin(); + + int len = strEnc.isSingleByte() ? 1 : StringSupport.length(strEnc, strBL.getUnsafeBytes(), setBeg + curr, setBeg + strSize); + len = Math.min(len, restLen()); - if (pos + len > value.getRealSize()) len = value.getRealSize() - pos; - lastPos = pos; - pos += len; + prev = curr; + curr += len; setMatched(); adjustRegisters(); - return extractRange(runtime, lastPos + beg, lastPos + end); + return extractRange(runtime, + prev + regs.beg[0], + prev + regs.end[0]); } @JRubyMethod(name = "get_byte") public IRubyObject get_byte(ThreadContext context) { - check(); + check(context); clearMatched(); - if (pos >= str.getByteList().getRealSize()) return context.nil; + if (curr >= str.getByteList().getRealSize()) return context.nil; - lastPos = pos; - pos++; + prev = curr; + curr++; setMatched(); adjustRegisters(); - return extractRange(context.runtime, lastPos + beg, lastPos + end); + return extractRange(context.runtime, prev + regs.beg[0], prev + regs.end[0]); } @JRubyMethod(name = "getbyte") public IRubyObject getbyte(ThreadContext context) { Ruby runtime = context.runtime; if (runtime.isVerbose()) { - runtime.getWarnings().warning(ID.DEPRECATED_METHOD, - "StringScanner#getbyte is obsolete; use #get_byte instead"); + runtime.getWarnings().warning(ID.DEPRECATED_METHOD, "StringScanner#getbyte is obsolete; use #get_byte instead"); } return get_byte(context); } - @JRubyMethod(name = "peek", required = 1) + @JRubyMethod(name = "peek") public IRubyObject peek(ThreadContext context, IRubyObject length) { - check(); + check(context); int len = RubyNumeric.num2int(length); if (len < 0) { @@ -421,51 +520,52 @@ public IRubyObject peek(ThreadContext context, IRubyObject length) { } ByteList value = str.getByteList(); - if (pos >= value.getRealSize()) return RubyString.newEmptyString(context.runtime).infectBy(str); - if (pos + len > value.getRealSize()) len = value.getRealSize() - pos; + if (curr >= value.getRealSize()) return RubyString.newEmptyString(context.runtime); + if (curr + len > value.getRealSize()) len = value.getRealSize() - curr; - return extractBegLen(context.runtime, pos, len); + return extractBegLen(context.runtime, curr, len); } - @JRubyMethod(name = "peep", required = 1) + @JRubyMethod(name = "peep") public IRubyObject peep(ThreadContext context, IRubyObject length) { Ruby runtime = context.runtime; if (runtime.isVerbose()) { - runtime.getWarnings().warning( - ID.DEPRECATED_METHOD, "StringScanner#peep is obsolete; use #peek instead"); + runtime.getWarnings().warning(ID.DEPRECATED_METHOD, "StringScanner#peep is obsolete; use #peek instead"); } return peek(context, length); } @JRubyMethod(name = "unscan") - public IRubyObject unscan() { - check(); - Ruby runtime = getRuntime(); + public IRubyObject unscan(ThreadContext context) { + check(context); if (!isMatched()) { + Ruby runtime = context.runtime; + RubyClass errorClass = runtime.getClass("StringScanner").getClass("Error"); throw RaiseException.from(runtime, errorClass, "unscan failed: previous match had failed"); } - pos = lastPos; + + curr = prev; clearMatched(); + return this; } @JRubyMethod(name = "beginning_of_line?", alias = "bol?") - public IRubyObject bol_p() { - check(); - Ruby runtime = getRuntime(); + public IRubyObject bol_p(ThreadContext context) { + check(context); ByteList value = str.getByteList(); - if (pos > value.getRealSize()) return runtime.getNil(); - if (pos == 0) return runtime.getTrue(); - return value.getUnsafeBytes()[(value.getBegin() + pos) - 1] == (byte)'\n' ? runtime.getTrue() : runtime.getFalse(); + if (curr > value.getRealSize()) return context.nil; + if (curr == 0) return context.tru; + return value.getUnsafeBytes()[(value.getBegin() + curr) - 1] == (byte) '\n' ? context.tru : context.fals; } @JRubyMethod(name = "eos?") public RubyBoolean eos_p(ThreadContext context) { - check(); - return pos >= str.getByteList().getRealSize() ? context.tru : context.fals; + check(context); + return curr >= str.getByteList().getRealSize() ? context.tru : context.fals; } @JRubyMethod(name = "empty?") @@ -479,28 +579,28 @@ public RubyBoolean empty_p(ThreadContext context) { @JRubyMethod(name = "rest?") public RubyBoolean rest_p(ThreadContext context) { - check(); - return pos >= str.getByteList().getRealSize() ? context.fals : context.tru; + check(context); + return curr >= str.getByteList().getRealSize() ? context.fals : context.tru; } @JRubyMethod(name = "matched?") public RubyBoolean matched_p(ThreadContext context) { - check(); + check(context); return isMatched() ? context.tru : context.fals; } @JRubyMethod(name = "matched") public IRubyObject matched(ThreadContext context) { - check(); + check(context); if (!isMatched()) return context.nil; - return extractRange(context.runtime, lastPos + beg, lastPos + end); + return extractRange(context.runtime, prev + regs.beg[0], prev + regs.end[0]); } @JRubyMethod(name = "matched_size") - public IRubyObject matched_size() { - check(); - if (!isMatched()) return getRuntime().getNil(); - return RubyFixnum.newFixnum(getRuntime(), end - beg); + public IRubyObject matched_size(ThreadContext context) { + check(context); + if (!isMatched()) return context.nil; + return RubyFixnum.newFixnum(context.runtime, regs.end[0] - regs.beg[0]); } @JRubyMethod(name = "matchedsize") @@ -512,10 +612,10 @@ public IRubyObject matchedsize(ThreadContext context) { return matched_size(); } - @JRubyMethod(name = "[]", required = 1) + @JRubyMethod(name = "[]") public IRubyObject op_aref(ThreadContext context, IRubyObject idx) { - Ruby runtime = context.runtime; - check(); + check(context); + if (!isMatched()) { return context.nil; } @@ -523,58 +623,64 @@ public IRubyObject op_aref(ThreadContext context, IRubyObject idx) { if (idx instanceof RubySymbol || idx instanceof RubyString) { if (pattern == null) return context.nil; } + + Ruby runtime = context.runtime; + int i = RubyMatchData.backrefNumber(runtime, pattern, regs, idx); - int numRegs = regs == null ? 1 : regs.numRegs; + int numRegs = regs.numRegs; if (i < 0) i += numRegs; - if (i < 0 || i >= numRegs) { + if (i < 0 || i >= numRegs || regs.beg[i] == -1) { return context.nil; } - if (regs == null) { - assert i == 0; - if (beg == -1) return context.nil; - return extractRange(runtime, lastPos + beg, lastPos + end); - } else { - if (regs.beg[i] == -1) return context.nil; - return extractRange(context.runtime, lastPos + regs.beg[i], lastPos + regs.end[i]); - } + return extractRange(context.runtime, prev + regs.beg[i], prev + regs.end[i]); } @JRubyMethod(name = "pre_match") public IRubyObject pre_match(ThreadContext context) { - check(); + check(context); if (!isMatched()) { return context.nil; } - return extractRange(context.runtime, 0, lastPos + beg); + return extractRange(context.runtime, 0, prev + regs.beg[0]); } @JRubyMethod(name = "post_match") public IRubyObject post_match(ThreadContext context) { - check(); + check(context); + if (!isMatched()) { return context.nil; } - return extractRange(context.runtime, lastPos + end, str.getByteList().getRealSize()); + + return extractRange(context.runtime, prev + regs.end[0], str.getByteList().getRealSize()); } @JRubyMethod(name = "rest") public IRubyObject rest(ThreadContext context) { - check(); + check(context); + Ruby runtime = context.runtime; + ByteList value = str.getByteList(); - if (pos >= value.getRealSize()) { - return RubyString.newEmptyString(context.runtime).infectBy(str); + + if (curr >= value.getRealSize()) { + return RubyString.newEmptyString(runtime); } - return extractRange(context.runtime, pos, value.getRealSize()); + + return extractRange(runtime, curr, value.getRealSize()); } @JRubyMethod(name = "rest_size") - public RubyFixnum rest_size() { - check(); + public RubyFixnum rest_size(ThreadContext context) { + check(context); + Ruby runtime = context.runtime; + ByteList value = str.getByteList(); - if (pos >= value.getRealSize()) return RubyFixnum.zero(getRuntime()); - return RubyFixnum.newFixnum(getRuntime(), value.getRealSize() - pos); + + if (curr >= value.getRealSize()) return RubyFixnum.zero(runtime); + + return RubyFixnum.newFixnum(runtime, value.getRealSize() - curr); } @JRubyMethod(name = "restsize") @@ -583,21 +689,25 @@ public RubyFixnum restsize(ThreadContext context) { if (runtime.isVerbose()) { runtime.getWarnings().warning(ID.DEPRECATED_METHOD, "StringScanner#restsize is obsolete; use #rest_size instead"); } - return rest_size(); + return rest_size(context); } @JRubyMethod(name = "inspect") @Override public IRubyObject inspect() { if (str == null) return inspect("(uninitialized)"); - if (pos >= str.getByteList().getRealSize()) return inspect("fin"); - if (pos == 0) return inspect(pos + "/" + str.getByteList().getRealSize() + " @ " + inspect2()); - return inspect(pos + "/" + str.getByteList().getRealSize() + " " + inspect1() + " @ " + inspect2()); + if (curr >= str.getByteList().getRealSize()) return inspect("fin"); + if (curr == 0) return inspect(curr + "/" + str.getByteList().getRealSize() + " @ " + inspect2()); + return inspect(curr + "/" + str.getByteList().getRealSize() + " " + inspect1() + " @ " + inspect2()); + } + + @JRubyMethod(name = "fixed_anchor?") + public IRubyObject fixed_anchor_p(ThreadContext context) { + return RubyBoolean.newBoolean(context, fixedAnchor); } private IRubyObject inspect(String msg) { RubyString result = getRuntime().newString("#<" + getMetaClass() + " " + msg + ">"); - if (str != null) result.infectBy(str); return result; } @@ -607,27 +717,21 @@ private IRubyObject inspect(String msg) { private IRubyObject inspect1() { final Ruby runtime = getRuntime(); - if (pos == 0) return RubyString.newEmptyString(runtime); - if (pos > INSPECT_LENGTH) { - return RubyString.newStringNoCopy(runtime, DOT_BYTES). - append(str.substr(runtime, pos - INSPECT_LENGTH, INSPECT_LENGTH)).inspect(); + if (curr == 0) return RubyString.newEmptyString(runtime); + if (curr > INSPECT_LENGTH) { + return RubyString.newStringNoCopy(runtime, DOT_BYTES).append(str.substr(runtime, curr - INSPECT_LENGTH, INSPECT_LENGTH)).inspect(); } - return str.substr(runtime, 0, pos).inspect(); + return str.substr(runtime, 0, curr).inspect(); } private IRubyObject inspect2() { final Ruby runtime = getRuntime(); - if (pos >= str.getByteList().getRealSize()) return RubyString.newEmptyString(runtime); - int len = str.getByteList().getRealSize() - pos; + if (curr >= str.getByteList().getRealSize()) return RubyString.newEmptyString(runtime); + int len = str.getByteList().getRealSize() - curr; if (len > INSPECT_LENGTH) { - return ((RubyString) str.substr(runtime, pos, INSPECT_LENGTH)).cat(DOT_BYTES).inspect(); + return ((RubyString) str.substr(runtime, curr, INSPECT_LENGTH)).cat(DOT_BYTES).inspect(); } - return str.substr(runtime, pos, len).inspect(); - } - - @JRubyMethod(name = "must_C_version", meta = true) - public static IRubyObject mustCversion(IRubyObject recv) { - return recv; + return str.substr(runtime, curr, len).inspect(); } @JRubyMethod(name = "size") @@ -646,11 +750,10 @@ public IRubyObject captures(ThreadContext context) { Ruby runtime = context.runtime; numRegs = regs.numRegs; - newAry = RubyArray.newArray(runtime, numRegs); + newAry = RubyArray.newArray(runtime, numRegs); for (i = 1; i < numRegs; i++) { - IRubyObject str = extractRange(runtime, lastPos + regs.beg[i], - lastPos + regs.end[i]); + IRubyObject str = extractRange(runtime, prev + regs.beg[i], prev + regs.end[i]); newAry.push(str); } @@ -673,4 +776,74 @@ public IRubyObject values_at(ThreadContext context, IRubyObject[] args) { return newAry; } + + @Deprecated + public IRubyObject initialize(IRubyObject[] args, Block unusedBlock) { + str = args[0].convertToString(); + return this; + } + + @Deprecated + public IRubyObject initialize_copy(IRubyObject other) { + return initialize_copy(getRuntime().getCurrentContext(), other); + } + + @Deprecated + public IRubyObject concat(IRubyObject obj) { + return concat(getRuntime().getCurrentContext(), obj); + } + + @Deprecated + public RubyFixnum pos() { + return pos(getRuntime().getCurrentContext()); + } + + @Deprecated + public IRubyObject set_pos(IRubyObject pos) { + return set_pos(getRuntime().getCurrentContext(), pos); + } + + @Deprecated + public IRubyObject getch19(ThreadContext context) { + return getch(context); + } + + @Deprecated + public IRubyObject reset() { + return reset(getRuntime().getCurrentContext()); + } + + @Deprecated + public IRubyObject unscan() { + return unscan(getRuntime().getCurrentContext()); + } + + @Deprecated + public IRubyObject matched_size() { + return matched_size(getRuntime().getCurrentContext()); + } + + @Deprecated + public IRubyObject bol_p() { + return bol_p(getRuntime().getCurrentContext()); + } + + @Deprecated + public RubyFixnum rest_size() { + return rest_size(getRuntime().getCurrentContext()); + } + + @Deprecated + public IRubyObject getchCommon(ThreadContext context, boolean is1_9) { + return getchCommon(context); + } + + /** + * @deprecated Only defined for backward compatibility in CRuby. + */ + @Deprecated + @JRubyMethod(name = "must_C_version", meta = true) + public static IRubyObject mustCversion(IRubyObject recv) { + return recv; + } } From f053115ef906b9d5cbc568c676e777c55df441ad Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Mon, 7 Feb 2022 17:53:56 -0600 Subject: [PATCH 3/5] Add JRuby logic to gemspec and CI --- .github/workflows/ci.yml | 1 + strscan.gemspec | 16 ++++++++++++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 987ae8fa25..d4ea930827 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,6 +24,7 @@ jobs: - 2.5 - 2.4 - debug + - jruby-9.3 include: - { os: windows-latest , ruby: mingw } - { os: windows-latest , ruby: mswin } diff --git a/strscan.gemspec b/strscan.gemspec index 5d8119ea4c..871636d86c 100644 --- a/strscan.gemspec +++ b/strscan.gemspec @@ -17,12 +17,20 @@ Gem::Specification.new do |s| s.description = "Provides lexical scanning operations on a String." s.require_path = %w{lib} - s.files = %w{ext/strscan/extconf.rb ext/strscan/strscan.c} - s.extensions = %w{ext/strscan/extconf.rb} + + jruby = true if Gem::Platform.new('java') =~ s.platform or RUBY_ENGINE == 'jruby' + + if jruby + s.files = %w{lib/strscan.jar lib/strscan.rb} + s.platform = "java" + else + s.files = %w{ext/strscan/extconf.rb ext/strscan/strscan.c} + s.extensions = %w{ext/strscan/extconf.rb} + end s.required_ruby_version = ">= 2.4.0" - s.authors = ["Minero Aoki", "Sutou Kouhei"] - s.email = [nil, "kou@cozmixng.org"] + s.authors = ["Minero Aoki", "Sutou Kouhei", "Charles Oliver Nutter"] + s.email = [nil, "kou@cozmixng.org", "headius@headius.com"] s.homepage = "https://github.com/ruby/strscan" s.licenses = ["Ruby", "BSD-2-Clause"] end From 8d22c0c88bbcf58cfc1de2cf4cdd0803235a0594 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Mon, 7 Feb 2022 17:58:01 -0600 Subject: [PATCH 4/5] Add ruby-maven to dev dependencies for JRuby --- Gemfile | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Gemfile b/Gemfile index 7057b125b5..cebc9de0e7 100644 --- a/Gemfile +++ b/Gemfile @@ -2,6 +2,9 @@ source 'https://rubygems.org' gemspec -gem "rake-compiler" -gem "benchmark-driver" -gem "test-unit" +group :development do + gem "rake-compiler" + gem "benchmark-driver" + gem "test-unit" + gem "ruby-maven", :platforms => :jruby +end From 502e8910700e89f3681cb168a67cf218764d437f Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Mon, 7 Feb 2022 18:05:03 -0600 Subject: [PATCH 5/5] Try jruby-head JRuby 9.3 does not load strscan as a default gem, so it does not pick up the new version in this gem. --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d4ea930827..ba3c0ab427 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,7 +24,7 @@ jobs: - 2.5 - 2.4 - debug - - jruby-9.3 + - jruby-head include: - { os: windows-latest , ruby: mingw } - { os: windows-latest , ruby: mswin }