Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ext/jruby/lib/strscan.rb
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
require 'strscan.jar'
JRuby::Util.load_ext("org.jruby.ext.strscan.StringScannerLibrary")
require "strscan/strscan"
50 changes: 48 additions & 2 deletions ext/jruby/org/jruby/ext/strscan/RubyStringScanner.java
Original file line number Diff line number Diff line change
Expand Up @@ -557,8 +557,8 @@ public IRubyObject peep(ThreadContext context, IRubyObject length) {
return peek(context, length);
}

@JRubyMethod(name = "scan_integer")
public IRubyObject scan_integer(ThreadContext context) {
@JRubyMethod(name = "scan_base10_integer", visibility = PRIVATE)
public IRubyObject scan_base10_integer(ThreadContext context) {
final Ruby runtime = context.runtime;
check(context);
clearMatched();
Expand Down Expand Up @@ -598,6 +598,52 @@ public IRubyObject scan_integer(ThreadContext context) {
return ConvertBytes.byteListToInum(runtime, bytes, prev, curr, 10, true);
}

@JRubyMethod(name = "scan_base16_integer", visibility = PRIVATE)
public IRubyObject scan_base16_integer(ThreadContext context) {
final Ruby runtime = context.runtime;
check(context);
clearMatched();

if (!str.getEncoding().isAsciiCompatible()) {
throw runtime.newEncodingCompatibilityError("ASCII incompatible encoding: " + str.getEncoding());
}


ByteList bytes = str.getByteList();
int curr = this.curr;

int bite = bytes.get(curr);
if (bite == '-' || bite == '+') {
curr++;
bite = bytes.get(curr);
}

if (bite == '0' && bytes.get(curr + 1) == 'x') {
curr += 2;
bite = bytes.get(curr);
}

if (!((bite >= '0' && bite <= '9') || (bite >= 'a' && bite <= 'f') || (bite >= 'A' && bite <= 'F'))) {
return context.nil;
}

while ((bite >= '0' && bite <= '9') || (bite >= 'a' && bite <= 'f') || (bite >= 'A' && bite <= 'F')) {
curr++;
if (curr >= bytes.getRealSize()) {
break;
}
bite = bytes.get(curr);
}

int length = curr - this.curr;
prev = this.curr;
this.curr = curr;
setMatched();
adjustRegisters();

return ConvertBytes.byteListToInum(runtime, bytes, prev, curr, 16, true);
}

@JRubyMethod(name = "unscan")
public IRubyObject unscan(ThreadContext context) {
check(context);
Expand Down
90 changes: 66 additions & 24 deletions ext/strscan/strscan.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
extern size_t onig_region_memsize(const struct re_registers *regs);
#endif

#include <ctype.h>
#include <stdbool.h>

#define STRSCAN_VERSION "3.1.1"
Expand Down Expand Up @@ -116,7 +115,7 @@ static VALUE strscan_get_byte _((VALUE self));
static VALUE strscan_getbyte _((VALUE self));
static VALUE strscan_peek _((VALUE self, VALUE len));
static VALUE strscan_peep _((VALUE self, VALUE len));
static VALUE strscan_scan_integer _((VALUE self));
static VALUE strscan_scan_base10_integer _((VALUE self));
static VALUE strscan_unscan _((VALUE self));
static VALUE strscan_bol_p _((VALUE self));
static VALUE strscan_eos_p _((VALUE self));
Expand Down Expand Up @@ -1268,21 +1267,26 @@ strscan_peep(VALUE self, VALUE vlen)
return strscan_peek(self, vlen);
}

/*
* call-seq:
* scan_integer
*
* Equivalent to #scan with a [+-]?\d+ pattern, and returns an Integer or nil.
*
* The scanned string must be encoded with an ASCII compatible encoding, otherwise
* Encoding::CompatibilityError will be raised.
*/
static VALUE
strscan_scan_integer(VALUE self)
strscan_parse_integer(struct strscanner *p, int base, long len)
{
char *ptr, *buffer;
long len = 0;
VALUE buffer_v, integer;

char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);

MEMCPY(buffer, CURPTR(p), char, len);
buffer[len] = '\0';
integer = rb_cstr2inum(buffer, base);
RB_ALLOCV_END(buffer_v);
p->curr += len;
return integer;
}

static VALUE
strscan_scan_base10_integer(VALUE self)
{
char *ptr;
long len = 0;
struct strscanner *p;

GET_SCANNER(self, p);
Expand All @@ -1302,25 +1306,60 @@ strscan_scan_integer(VALUE self)
len++;
}

if (!isdigit(ptr[len])) {
if (!rb_isdigit(ptr[len])) {
return Qnil;
}

MATCHED(p);
p->prev = p->curr;

while (len < remaining_len && isdigit(ptr[len])) {
while (len < remaining_len && rb_isdigit(ptr[len])) {
len++;
}

buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
return strscan_parse_integer(p, 10, len);
}

MEMCPY(buffer, CURPTR(p), char, len);
buffer[len] = '\0';
integer = rb_cstr2inum(buffer, 10);
RB_ALLOCV_END(buffer_v);
p->curr += len;
return integer;
static VALUE
strscan_scan_base16_integer(VALUE self)
{
char *ptr;
long len = 0;
struct strscanner *p;

GET_SCANNER(self, p);
CLEAR_MATCH_STATUS(p);

rb_must_asciicompat(p->str);

ptr = CURPTR(p);

long remaining_len = S_RESTLEN(p);

if (remaining_len <= 0) {
return Qnil;
}

if (ptr[len] == '-' || ptr[len] == '+') {
len++;
}

if ((remaining_len >= (len + 2)) && ptr[len] == '0' && ptr[len + 1] == 'x') {
len += 2;
}

if (len >= remaining_len || !rb_isxdigit(ptr[len])) {
return Qnil;
}

MATCHED(p);
p->prev = p->curr;

while (len < remaining_len && rb_isxdigit(ptr[len])) {
len++;
}

return strscan_parse_integer(p, 16, len);
}

/*
Expand Down Expand Up @@ -2261,7 +2300,8 @@ Init_strscan(void)
rb_define_method(StringScanner, "peek_byte", strscan_peek_byte, 0);
rb_define_method(StringScanner, "peep", strscan_peep, 1);

rb_define_method(StringScanner, "scan_integer", strscan_scan_integer, 0);
rb_define_private_method(StringScanner, "scan_base10_integer", strscan_scan_base10_integer, 0);
rb_define_private_method(StringScanner, "scan_base16_integer", strscan_scan_base16_integer, 0);

rb_define_method(StringScanner, "unscan", strscan_unscan, 0);

Expand Down Expand Up @@ -2290,4 +2330,6 @@ Init_strscan(void)
rb_define_method(StringScanner, "fixed_anchor?", strscan_fixed_anchor_p, 0);

rb_define_method(StringScanner, "named_captures", strscan_named_captures, 0);

rb_require("strscan/strscan");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We may need to use rb_funcall(rb_mKernel, rb_intern("require"), 1, rb_str_new_cstr("strscan/strscan")) or something because RubyGems may replace rb_require().

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you sure? Other gems like psych, json etc use rb_require directly without known issues.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I didn't know it.
OK. Let's use this. I hope that this is a temporary workaround...

}
25 changes: 25 additions & 0 deletions lib/strscan/strscan.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# frozen_string_literal: true

class StringScanner
# call-seq:
# scan_integer(base: 10)
#
# If `base` isn't provided or is `10`, then it is equivalent to calling `#scan` with a `[+-]?\d+` pattern,
# and returns an Integer or nil.
#
# If `base` is `16`, then it is equivalent to calling `#scan` with a `[+-]?(0x)?[0-9a-fA-F]+` pattern,
# and returns an Integer or nil.
#
# The scanned string must be encoded with an ASCII compatible encoding, otherwise
# Encoding::CompatibilityError will be raised.
def scan_integer(base: 10)
case base
when 10
scan_base10_integer
when 16
scan_base16_integer
else
raise ArgumentError, "Unsupported integer base: #{base.inspect}, expected 10 or 16"
end
end
end
9 changes: 6 additions & 3 deletions strscan.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,17 @@ Gem::Specification.new do |s|
files = [
"COPYING",
"LICENSE.txt",
"lib/strscan/strscan.rb"
]

s.require_paths = %w{lib}

if RUBY_ENGINE == "jruby"
s.require_paths = %w{ext/jruby/lib lib}
files << "ext/jruby/lib/strscan.rb"
files << "lib/strscan.jar"
files << "ext/jruby/lib/strscan.rb"
s.require_paths += %w{ext/jruby/lib}
s.platform = "java"
else
s.require_paths = %w{lib}
files << "ext/strscan/extconf.rb"
files << "ext/strscan/strscan.c"
s.rdoc_options << "-idoc"
Expand Down
75 changes: 75 additions & 0 deletions test/strscan/test_stringscanner.rb
Original file line number Diff line number Diff line change
Expand Up @@ -945,6 +945,81 @@ def test_scan_integer_encoding
s.scan_integer
end
end

def test_scan_integer_base_16
omit "scan_integer isn't implemented on TruffleRuby yet" if RUBY_ENGINE == "truffleruby"

s = create_string_scanner('0')
assert_equal 0x0, s.scan_integer(base: 16)
assert_equal 1, s.pos
assert_predicate s, :matched?

s = create_string_scanner('abc')
assert_equal 0xabc, s.scan_integer(base: 16)
assert_equal 3, s.pos
assert_predicate s, :matched?

s = create_string_scanner('123abc')
assert_equal 0x123abc, s.scan_integer(base: 16)
assert_equal 6, s.pos
assert_predicate s, :matched?

s = create_string_scanner('0x123abc')
assert_equal 0x123abc, s.scan_integer(base: 16)
assert_equal 8, s.pos
assert_predicate s, :matched?

s = create_string_scanner('0x123ABC')
assert_equal 0x123abc, s.scan_integer(base: 16)
assert_equal 8, s.pos
assert_predicate s, :matched?

s = create_string_scanner('-0x123ABC')
assert_equal -0x123abc, s.scan_integer(base: 16)
assert_equal 9, s.pos
assert_predicate s, :matched?

s = create_string_scanner('+0x123ABC')
assert_equal +0x123abc, s.scan_integer(base: 16)
assert_equal 9, s.pos
assert_predicate s, :matched?

s = create_string_scanner('0x')
assert_nil s.scan_integer(base: 16)
assert_equal 0, s.pos
refute_predicate s, :matched?

s = create_string_scanner('-0x')
assert_nil s.scan_integer(base: 16)
assert_equal 0, s.pos
refute_predicate s, :matched?

s = create_string_scanner('+0x')
assert_nil s.scan_integer(base: 16)
assert_equal 0, s.pos
refute_predicate s, :matched?

s = create_string_scanner('-123abc')
assert_equal -0x123abc, s.scan_integer(base: 16)
assert_equal 7, s.pos
assert_predicate s, :matched?

s = create_string_scanner('+123')
assert_equal 0x123, s.scan_integer(base: 16)
assert_equal 4, s.pos
assert_predicate s, :matched?

s = create_string_scanner('-abc')
assert_equal -0xabc, s.scan_integer(base: 16)
assert_equal 4, s.pos
assert_predicate s, :matched?

huge_integer = 'F' * 2_000
s = create_string_scanner(huge_integer)
assert_equal huge_integer.to_i(16), s.scan_integer(base: 16)
assert_equal 2_000, s.pos
assert_predicate s, :matched?
end
end

class TestStringScanner < Test::Unit::TestCase
Expand Down