From 7de4ff5b2ce8034b6e72684e3750455cdd1e57c6 Mon Sep 17 00:00:00 2001 From: Aaron Patterson Date: Thu, 22 Feb 2024 15:09:26 -0800 Subject: [PATCH 1/3] Add a method for peeking and reading bytes as integers This commit adds `scan_byte` and `peek_byte`. `scan_byte` will read the current byte, return it as an integer, and advance the cursor. `peek_byte` will return the current byte as an integer without advancing the cursor. --- .../jruby/ext/strscan/RubyStringScanner.java | 27 +++++++++ ext/strscan/strscan.c | 55 +++++++++++++++++++ run-test.rb | 1 + test/strscan/test_stringscanner.rb | 23 ++++++++ 4 files changed, 106 insertions(+) diff --git a/ext/jruby/org/jruby/ext/strscan/RubyStringScanner.java b/ext/jruby/org/jruby/ext/strscan/RubyStringScanner.java index a1e4895b2a..df08af63ce 100644 --- a/ext/jruby/org/jruby/ext/strscan/RubyStringScanner.java +++ b/ext/jruby/org/jruby/ext/strscan/RubyStringScanner.java @@ -508,6 +508,33 @@ public IRubyObject getbyte(ThreadContext context) { return get_byte(context); } + @JRubyMethod(name = "scan_byte") + public IRubyObject scan_byte(ThreadContext context) { + Ruby runtime = context.runtime; + check(context); + clearMatched(); + if (curr >= str.getByteList().getRealSize()) return context.nil; + + byte[] bytes = str.getBytes(); + + byte bite = bytes[curr]; + prev = curr; + curr++; + + setMatched(); + adjustRegisters(); + return RubyFixnum.newFixnum(context.runtime, bite & 0xff); + } + + @JRubyMethod(name = "peek_byte") + public IRubyObject peek_byte(ThreadContext context) { + Ruby runtime = context.runtime; + check(context); + if (curr >= str.getByteList().getRealSize()) return context.nil; + + return RubyFixnum.newFixnum(context.runtime, (str.getBytes()[curr]) & 0xff); + } + @JRubyMethod(name = "peek") public IRubyObject peek(ThreadContext context, IRubyObject length) { check(context); diff --git a/ext/strscan/strscan.c b/ext/strscan/strscan.c index bed1c87cdc..a167ffde04 100644 --- a/ext/strscan/strscan.c +++ b/ext/strscan/strscan.c @@ -902,6 +902,57 @@ strscan_getch(VALUE self) adjust_register_position(p, p->regs.end[0])); } +/* + * Scans one byte and returns it as an integer. + * This method is not multibyte character sensitive. + * See also: #getch. + * + * s = StringScanner.new('ab') + * s.scan_byte # => 97 + * s.scan_byte # => 98 + * s.scan_byte # => nil + * + * s = StringScanner.new("\244\242".force_encoding("euc-jp")) + * s.scan_byte # => 0xA4 + * s.scan_byte # => 0xA2 + * s.scan_byte # => nil + */ +static VALUE +strscan_scan_byte(VALUE self) +{ + struct strscanner *p; + + GET_SCANNER(self, p); + CLEAR_MATCH_STATUS(p); + if (EOS_P(p)) + return Qnil; + + VALUE ret = INT2FIX((unsigned char)*CURPTR(p)); + p->prev = p->curr; + p->curr++; + MATCHED(p); + adjust_registers_to_matched(p); + return ret; +} + +/* + * Peeks at the current byte and returns it as an integer. + * + * s = StringScanner.new('ab') + * s.peek_byte # => 97 + */ +static VALUE +strscan_peek_byte(VALUE self) +{ + struct strscanner *p; + + GET_SCANNER(self, p); + if (EOS_P(p)) + return Qnil; + + return INT2FIX((unsigned char)*CURPTR(p)); +} + /* * Scans one byte and returns it. * This method is not multibyte character sensitive. @@ -1605,6 +1656,7 @@ strscan_named_captures(VALUE self) * * - #getch * - #get_byte + * - #scan_byte * - #scan * - #scan_until * - #skip @@ -1617,6 +1669,7 @@ strscan_named_captures(VALUE self) * - #exist? * - #match? * - #peek + * - #peek_byte * * === Finding Where we Are * @@ -1708,7 +1761,9 @@ Init_strscan(void) rb_define_method(StringScanner, "getch", strscan_getch, 0); rb_define_method(StringScanner, "get_byte", strscan_get_byte, 0); rb_define_method(StringScanner, "getbyte", strscan_getbyte, 0); + rb_define_method(StringScanner, "scan_byte", strscan_scan_byte, 0); rb_define_method(StringScanner, "peek", strscan_peek, 1); + rb_define_method(StringScanner, "peek_byte", strscan_peek_byte, 0); rb_define_method(StringScanner, "peep", strscan_peep, 1); rb_define_method(StringScanner, "unscan", strscan_unscan, 0); diff --git a/run-test.rb b/run-test.rb index b4390272ad..dd4d8e1551 100755 --- a/run-test.rb +++ b/run-test.rb @@ -1,5 +1,6 @@ #!/usr/bin/env ruby +gem 'strscan' require 'strscan' puts "Loaded strscan from #{$".grep(/\/strscan\./).join(', ')}" puts "Gem from #{Gem.loaded_specs["strscan"]&.full_gem_path}" diff --git a/test/strscan/test_stringscanner.rb b/test/strscan/test_stringscanner.rb index 2a127a773a..a9a62a7cf8 100644 --- a/test/strscan/test_stringscanner.rb +++ b/test/strscan/test_stringscanner.rb @@ -8,6 +8,29 @@ require 'test/unit' module StringScannerTests + def test_peekbyte + s = create_string_scanner('ab') + assert_equal 97, s.peek_byte + assert_equal 97, s.scan_byte + assert_equal 98, s.peek_byte + assert_equal 98, s.scan_byte + assert_nil s.peek_byte + assert_nil s.scan_byte + end + + def test_scan_byte + s = create_string_scanner('ab') + assert_equal 97, s.scan_byte + assert_equal 98, s.scan_byte + assert_nil s.scan_byte + + str = "\244\242".dup.force_encoding("euc-jp") + s = StringScanner.new(str) + assert_equal str.getbyte(s.pos), s.scan_byte + assert_equal str.getbyte(s.pos), s.scan_byte + assert_nil s.scan_byte + end + def test_s_new s = create_string_scanner('test string') assert_instance_of StringScanner, s From 59436e15344a7613266155398c3561b8c1064e13 Mon Sep 17 00:00:00 2001 From: Aaron Patterson Date: Fri, 23 Feb 2024 08:50:09 -0800 Subject: [PATCH 2/3] Update ext/strscan/strscan.c Co-authored-by: Sutou Kouhei --- ext/strscan/strscan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ext/strscan/strscan.c b/ext/strscan/strscan.c index a167ffde04..70a3ce5260 100644 --- a/ext/strscan/strscan.c +++ b/ext/strscan/strscan.c @@ -927,12 +927,12 @@ strscan_scan_byte(VALUE self) if (EOS_P(p)) return Qnil; - VALUE ret = INT2FIX((unsigned char)*CURPTR(p)); + VALUE byte = INT2FIX((unsigned char)*CURPTR(p)); p->prev = p->curr; p->curr++; MATCHED(p); adjust_registers_to_matched(p); - return ret; + return byte; } /* From e035b4d6e16f7131ca6512ba319c0199bc55cca8 Mon Sep 17 00:00:00 2001 From: Aaron Patterson Date: Fri, 23 Feb 2024 08:50:17 -0800 Subject: [PATCH 3/3] Update test/strscan/test_stringscanner.rb Co-authored-by: Sutou Kouhei --- test/strscan/test_stringscanner.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/strscan/test_stringscanner.rb b/test/strscan/test_stringscanner.rb index a9a62a7cf8..2884b8ef05 100644 --- a/test/strscan/test_stringscanner.rb +++ b/test/strscan/test_stringscanner.rb @@ -8,7 +8,7 @@ require 'test/unit' module StringScannerTests - def test_peekbyte + def test_peek_byte s = create_string_scanner('ab') assert_equal 97, s.peek_byte assert_equal 97, s.scan_byte