From acf7b67895dbc24ea7efdfd3a196fc50f466204f Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Thu, 12 Dec 2024 16:26:41 -0600 Subject: [PATCH 1/5] Re-port scan_base16_integer from the C code This may help prevent ArrayIndexOutOfBounds randomly seen in CI. See https://github.com/ruby/strscan/issues/125 --- .../jruby/ext/strscan/RubyStringScanner.java | 44 ++++++++++--------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/ext/jruby/org/jruby/ext/strscan/RubyStringScanner.java b/ext/jruby/org/jruby/ext/strscan/RubyStringScanner.java index 7e7c492b9b..23364d35d8 100644 --- a/ext/jruby/org/jruby/ext/strscan/RubyStringScanner.java +++ b/ext/jruby/org/jruby/ext/strscan/RubyStringScanner.java @@ -608,40 +608,44 @@ public IRubyObject scan_base16_integer(ThreadContext context) { throw runtime.newEncodingCompatibilityError("ASCII incompatible encoding: " + str.getEncoding()); } - ByteList bytes = str.getByteList(); int curr = this.curr; - int bite = bytes.get(curr); - if (bite == '-' || bite == '+') { - curr++; - bite = bytes.get(curr); + int remaining_len = bytes.realSize() - curr; + + if (remaining_len <= 0) { + return context.nil; } - if (bite == '0' && bytes.get(curr + 1) == 'x') { - curr += 2; - bite = bytes.get(curr); + int len = 0; + + if (bytes.get(len) == '-' || bytes.get(len) == '+') { + len++; } - if (!((bite >= '0' && bite <= '9') || (bite >= 'a' && bite <= 'f') || (bite >= 'A' && bite <= 'F'))) { - return context.nil; + if ((remaining_len >= (len + 2)) && bytes.get(len) == '0' && bytes.get(len + 1) == 'x') { + len += 2; } - while ((bite >= '0' && bite <= '9') || (bite >= 'a' && bite <= 'f') || (bite >= 'A' && bite <= 'F')) { - curr++; - if (curr >= bytes.getRealSize()) { - break; - } - bite = bytes.get(curr); + if (len >= remaining_len || !isHexChar(bytes.get(len))) { + return context.nil; } - int length = curr - this.curr; - prev = this.curr; - this.curr = curr; setMatched(); adjustRegisters(); + prev = curr; + + while (len < remaining_len && isHexChar(bytes.get(len))) { + len++; + } + + this.curr = curr + len; + + return ConvertBytes.byteListToInum(runtime, bytes, 0, len, 16, true); + } - return ConvertBytes.byteListToInum(runtime, bytes, prev, curr, 16, true); + private static boolean isHexChar(int c) { + return Character.isDigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'); } @JRubyMethod(name = "unscan") From 854c3e3877a111a1c217713978a0ff32a58fcc2b Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Thu, 12 Dec 2024 16:36:39 -0600 Subject: [PATCH 2/5] Additional tweaks for scan_base16_integer * Parse base should be from current pointer so add that to the get calls. * Pull out ascii check into util method. * Rename curr local var to ptr to better match C version. --- .../jruby/ext/strscan/RubyStringScanner.java | 26 +++++++++++-------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/ext/jruby/org/jruby/ext/strscan/RubyStringScanner.java b/ext/jruby/org/jruby/ext/strscan/RubyStringScanner.java index 23364d35d8..0a57129c44 100644 --- a/ext/jruby/org/jruby/ext/strscan/RubyStringScanner.java +++ b/ext/jruby/org/jruby/ext/strscan/RubyStringScanner.java @@ -604,14 +604,12 @@ public IRubyObject scan_base16_integer(ThreadContext context) { check(context); clearMatched(); - if (!str.getEncoding().isAsciiCompatible()) { - throw runtime.newEncodingCompatibilityError("ASCII incompatible encoding: " + str.getEncoding()); - } + strscanMustAsciiCompat(runtime); ByteList bytes = str.getByteList(); - int curr = this.curr; + int ptr = this.curr; - int remaining_len = bytes.realSize() - curr; + int remaining_len = bytes.realSize() - ptr; if (remaining_len <= 0) { return context.nil; @@ -619,31 +617,37 @@ public IRubyObject scan_base16_integer(ThreadContext context) { int len = 0; - if (bytes.get(len) == '-' || bytes.get(len) == '+') { + if (bytes.get(ptr + len) == '-' || bytes.get(ptr + len) == '+') { len++; } - if ((remaining_len >= (len + 2)) && bytes.get(len) == '0' && bytes.get(len + 1) == 'x') { + if ((remaining_len >= (len + 2)) && bytes.get(ptr + len) == '0' && bytes.get(ptr + len + 1) == 'x') { len += 2; } - if (len >= remaining_len || !isHexChar(bytes.get(len))) { + if (len >= remaining_len || !isHexChar(bytes.get(ptr + len))) { return context.nil; } setMatched(); adjustRegisters(); - prev = curr; + prev = ptr; - while (len < remaining_len && isHexChar(bytes.get(len))) { + while (len < remaining_len && isHexChar(bytes.get(ptr + len))) { len++; } - this.curr = curr + len; + this.curr = ptr + len; return ConvertBytes.byteListToInum(runtime, bytes, 0, len, 16, true); } + private void strscanMustAsciiCompat(Ruby runtime) { + if (!str.getEncoding().isAsciiCompatible()) { + throw runtime.newEncodingCompatibilityError("ASCII incompatible encoding: " + str.getEncoding()); + } + } + private static boolean isHexChar(int c) { return Character.isDigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'); } From de7d9c0ac814441fe7527320b7c57a96c3a6409c Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Thu, 12 Dec 2024 16:38:17 -0600 Subject: [PATCH 3/5] Re-port scan_base10_integer from the C code This is to align the base10 code with the freshly-ported base16 code. See https://github.com/ruby/strscan/issues/125 --- .../jruby/ext/strscan/RubyStringScanner.java | 41 +++++++++---------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/ext/jruby/org/jruby/ext/strscan/RubyStringScanner.java b/ext/jruby/org/jruby/ext/strscan/RubyStringScanner.java index 0a57129c44..ad700284a8 100644 --- a/ext/jruby/org/jruby/ext/strscan/RubyStringScanner.java +++ b/ext/jruby/org/jruby/ext/strscan/RubyStringScanner.java @@ -563,39 +563,36 @@ public IRubyObject scan_base10_integer(ThreadContext context) { check(context); clearMatched(); - if (!str.getEncoding().isAsciiCompatible()) { - throw runtime.newEncodingCompatibilityError("ASCII incompatible encoding: " + str.getEncoding()); - } - + strscanMustAsciiCompat(runtime); ByteList bytes = str.getByteList(); - int curr = this.curr; + int ptr = curr; + int len = 0; - int bite = bytes.get(curr); - if (bite == '-' || bite == '+') { - curr++; - bite = bytes.get(curr); - } + int remaining_len = bytes.realSize() - curr; - if (!(bite >= '0' && bite <= '9')) { + if (remaining_len <= 0) { return context.nil; } - while (bite >= '0' && bite <= '9') { - curr++; - if (curr >= bytes.getRealSize()) { - break; - } - bite = bytes.get(curr); + if (bytes.get(ptr + len) == '-' || bytes.get(ptr + len) == '+') { + len++; + } + + if (!Character.isDigit(bytes.get(ptr + len))) { + return context.nil; } - int length = curr - this.curr; - prev = this.curr; - this.curr = curr; setMatched(); - adjustRegisters(); + prev = ptr; + + while (len < remaining_len && Character.isDigit(bytes.get(ptr + len))) { + len++; + } + + this.curr = ptr + len; - return ConvertBytes.byteListToInum(runtime, bytes, prev, curr, 10, true); + return ConvertBytes.byteListToInum(runtime, bytes, 0, len, 10, true); } @JRubyMethod(name = "scan_base16_integer", visibility = PRIVATE) From 79f682fe96effdbbc46d5642081dee97fb949049 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Thu, 12 Dec 2024 16:44:30 -0600 Subject: [PATCH 4/5] Add env for JRuby to force compile for better errors This is temporary until we're sure that the AIOOB from ruby/strscan#125 has been fixed by ruby/strscan#127. --- .github/workflows/ci.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 56d1dce1fc..a03fbbcda8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -4,6 +4,9 @@ on: - push - pull_request +env: + JRUBY_OPTS: "-X+C" # temporarily force JRuby to compile, so Java exception trace will contain .rb lines + jobs: ruby-versions: uses: ruby/actions/.github/workflows/ruby_versions.yml@master From 9b6f02cb5287589696d4ff2a6e7c1442c48ad211 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Thu, 12 Dec 2024 17:08:40 -0600 Subject: [PATCH 5/5] Add strscanParseInteger equivalent This aligns with C code that does the final parse and curr update in strscan_parse_integer. --- .../org/jruby/ext/strscan/RubyStringScanner.java | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/ext/jruby/org/jruby/ext/strscan/RubyStringScanner.java b/ext/jruby/org/jruby/ext/strscan/RubyStringScanner.java index ad700284a8..c68c57edea 100644 --- a/ext/jruby/org/jruby/ext/strscan/RubyStringScanner.java +++ b/ext/jruby/org/jruby/ext/strscan/RubyStringScanner.java @@ -39,6 +39,7 @@ import org.jruby.RubyClass; import org.jruby.RubyFixnum; import org.jruby.RubyHash; +import org.jruby.RubyInteger; import org.jruby.RubyMatchData; import org.jruby.RubyNumeric; import org.jruby.RubyObject; @@ -590,9 +591,7 @@ public IRubyObject scan_base10_integer(ThreadContext context) { len++; } - this.curr = ptr + len; - - return ConvertBytes.byteListToInum(runtime, bytes, 0, len, 10, true); + return strscanParseInteger(runtime, bytes, ptr, len, 10); } @JRubyMethod(name = "scan_base16_integer", visibility = PRIVATE) @@ -634,9 +633,13 @@ public IRubyObject scan_base16_integer(ThreadContext context) { len++; } + return strscanParseInteger(runtime, bytes, ptr, len, 16); + } + + private RubyInteger strscanParseInteger(Ruby runtime, ByteList bytes, int ptr, int len, int base) { this.curr = ptr + len; - return ConvertBytes.byteListToInum(runtime, bytes, 0, len, 16, true); + return ConvertBytes.byteListToInum(runtime, bytes, ptr, len, base, true); } private void strscanMustAsciiCompat(Ruby runtime) {