Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions ext/jruby/org/jruby/ext/strscan/RubyStringScanner.java
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
import org.jruby.runtime.builtin.IRubyObject;
import org.jruby.util.ByteList;
import org.jruby.util.StringSupport;
import org.jruby.util.ConvertBytes;

import java.util.Iterator;

Expand Down Expand Up @@ -556,6 +557,47 @@ public IRubyObject peep(ThreadContext context, IRubyObject length) {
return peek(context, length);
}

@JRubyMethod(name = "scan_integer")
public IRubyObject scan_integer(ThreadContext context) {
final Ruby runtime = context.runtime;
check(context);
clearMatched();

if (!str.getEncoding().isAsciiCompatible()) {
throw runtime.newEncodingCompatibilityError("ASCII incompatible encoding: " + str.getEncoding());
}


ByteList bytes = str.getByteList();
int curr = this.curr;

int bite = bytes.get(curr);
if (bite == '-' || bite == '+') {
curr++;
bite = bytes.get(curr);
}

if (!(bite >= '0' && bite <= '9')) {
return context.nil;
}

while (bite >= '0' && bite <= '9') {
curr++;
if (curr >= bytes.getRealSize()) {
break;
}
bite = bytes.get(curr);
}

int length = curr - this.curr;
prev = this.curr;
this.curr = curr;
setMatched();
adjustRegisters();

return ConvertBytes.byteListToInum(runtime, bytes, prev, curr, 10, true);
}

@JRubyMethod(name = "unscan")
public IRubyObject unscan(ThreadContext context) {
check(context);
Expand Down
59 changes: 59 additions & 0 deletions ext/strscan/strscan.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
extern size_t onig_region_memsize(const struct re_registers *regs);
#endif

#include <ctype.h>
#include <stdbool.h>

#define STRSCAN_VERSION "3.1.1"
Expand Down Expand Up @@ -115,6 +116,7 @@ static VALUE strscan_get_byte _((VALUE self));
static VALUE strscan_getbyte _((VALUE self));
static VALUE strscan_peek _((VALUE self, VALUE len));
static VALUE strscan_peep _((VALUE self, VALUE len));
static VALUE strscan_scan_integer _((VALUE self));
static VALUE strscan_unscan _((VALUE self));
static VALUE strscan_bol_p _((VALUE self));
static VALUE strscan_eos_p _((VALUE self));
Expand Down Expand Up @@ -1266,6 +1268,61 @@ strscan_peep(VALUE self, VALUE vlen)
return strscan_peek(self, vlen);
}

/*
* call-seq:
* scan_integer
*
* Equivalent to #scan with a [+-]?\d+ pattern, and returns an Integer or nil.
*
* The scanned string must be encoded with an ASCII compatible encoding, otherwise
* Encoding::CompatibilityError will be raised.
*/
static VALUE
strscan_scan_integer(VALUE self)
{
char *ptr, *buffer;
long len = 0;
VALUE buffer_v, integer;
struct strscanner *p;

GET_SCANNER(self, p);
CLEAR_MATCH_STATUS(p);

rb_must_asciicompat(p->str);

ptr = CURPTR(p);

long remaining_len = S_RESTLEN(p);

if (remaining_len <= 0) {
return Qnil;
}

if (ptr[len] == '-' || ptr[len] == '+') {
len++;
}

if (!isdigit(ptr[len])) {
return Qnil;
}

MATCHED(p);
p->prev = p->curr;

while (len < remaining_len && isdigit(ptr[len])) {
len++;
}

buffer = RB_ALLOCV_N(char, buffer_v, len + 1);

MEMCPY(buffer, CURPTR(p), char, len);
buffer[len] = '\0';
integer = rb_cstr2inum(buffer, 10);
RB_ALLOCV_END(buffer_v);
p->curr += len;
return integer;
}

/*
* :markup: markdown
* :include: strscan/link_refs.txt
Expand Down Expand Up @@ -2204,6 +2261,8 @@ Init_strscan(void)
rb_define_method(StringScanner, "peek_byte", strscan_peek_byte, 0);
rb_define_method(StringScanner, "peep", strscan_peep, 1);

rb_define_method(StringScanner, "scan_integer", strscan_scan_integer, 0);

rb_define_method(StringScanner, "unscan", strscan_unscan, 0);

rb_define_method(StringScanner, "beginning_of_line?", strscan_bol_p, 0);
Expand Down
55 changes: 55 additions & 0 deletions test/strscan/test_stringscanner.rb
Original file line number Diff line number Diff line change
Expand Up @@ -890,6 +890,61 @@ def test_named_captures
assert_equal(9, scan.match?(/(?<f>foo)(?<r>bar)(?<z>baz)/))
assert_equal({"f" => "foo", "r" => "bar", "z" => "baz"}, scan.named_captures)
end

def test_scan_integer
omit "scan_integer isn't implemented on TruffleRuby yet" if RUBY_ENGINE == "truffleruby"

s = create_string_scanner('abc')
assert_nil s.scan_integer
assert_equal 0, s.pos
refute_predicate s, :matched?

s = create_string_scanner('123abc')
assert_equal 123, s.scan_integer
assert_equal 3, s.pos
assert_predicate s, :matched?

s = create_string_scanner('-123abc')
assert_equal -123, s.scan_integer
assert_equal 4, s.pos
assert_predicate s, :matched?

s = create_string_scanner('+123')
assert_equal 123, s.scan_integer
assert_equal 4, s.pos
assert_predicate s, :matched?

s = create_string_scanner('-abc')
assert_nil s.scan_integer
assert_equal 0, s.pos
refute_predicate s, :matched?

huge_integer = '1' * 2_000
s = create_string_scanner(huge_integer)
assert_equal huge_integer.to_i, s.scan_integer
assert_equal 2_000, s.pos
assert_predicate s, :matched?
end

def test_scan_integer_unmatch
omit "scan_integer isn't implemented on TruffleRuby yet" if RUBY_ENGINE == "truffleruby"

s = create_string_scanner('123abc')
assert_equal 123, s.scan_integer
assert_equal 3, s.pos

s.unscan
assert_equal 0, s.pos
end

def test_scan_integer_encoding
omit "scan_integer isn't implemented on TruffleRuby yet" if RUBY_ENGINE == "truffleruby"

s = create_string_scanner('123abc'.encode(Encoding::UTF_32LE))
assert_raise(Encoding::CompatibilityError) do
s.scan_integer
end
end
end

class TestStringScanner < Test::Unit::TestCase
Expand Down