-
Notifications
You must be signed in to change notification settings - Fork 43
Implement #scan_integer to efficiently parse Integer #114
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,6 +20,7 @@ | |
| extern size_t onig_region_memsize(const struct re_registers *regs); | ||
| #endif | ||
|
|
||
| #include <ctype.h> | ||
| #include <stdbool.h> | ||
|
|
||
| #define STRSCAN_VERSION "3.1.1" | ||
|
|
@@ -115,6 +116,7 @@ static VALUE strscan_get_byte _((VALUE self)); | |
| static VALUE strscan_getbyte _((VALUE self)); | ||
| static VALUE strscan_peek _((VALUE self, VALUE len)); | ||
| static VALUE strscan_peep _((VALUE self, VALUE len)); | ||
| static VALUE strscan_scan_integer _((VALUE self)); | ||
| static VALUE strscan_unscan _((VALUE self)); | ||
| static VALUE strscan_bol_p _((VALUE self)); | ||
| static VALUE strscan_eos_p _((VALUE self)); | ||
|
|
@@ -1266,6 +1268,55 @@ strscan_peep(VALUE self, VALUE vlen) | |
| return strscan_peek(self, vlen); | ||
| } | ||
|
|
||
| /* | ||
| * call-seq: | ||
| * scan_integer | ||
| * | ||
| * Equivalent to #scan with a [+-]?\d+ pattern, and returns an Integer or nil. | ||
| * | ||
| * The scanned string must be encoded with an ASCII compatible encoding, otherwise | ||
| * Encoding::CompatibilityError will be raised. | ||
| */ | ||
| static VALUE | ||
| strscan_scan_integer(VALUE self) | ||
| { | ||
| char *ptr, *buffer; | ||
| long len = 0; | ||
| VALUE buffer_v, integer; | ||
| struct strscanner *p; | ||
|
|
||
| GET_SCANNER(self, p); | ||
| CLEAR_MATCH_STATUS(p); | ||
|
|
||
| rb_must_asciicompat(p->str); | ||
|
|
||
| ptr = CURPTR(p); | ||
|
|
||
| if (ptr[len] == '-' || ptr[len] == '+') { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think so, because (for now?) Ruby strings are guaranteed to be NULL terminated. So if we're EOF,
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems that a
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Technically you can but the documentation clearly states:
So it assume it's a literal, hence is NUL terminated. I can add the checks if you feel strongly about it, but it's a lot of busy code for something I think isn't supposed to happen.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added the extra checks in #115 noetheless. |
||
| len++; | ||
| } | ||
|
|
||
| if (!isdigit(ptr[len])) { | ||
| return Qnil; | ||
| } | ||
|
|
||
| MATCHED(p); | ||
| p->prev = p->curr; | ||
|
|
||
| while (isdigit(ptr[len])) { | ||
| len++; | ||
| } | ||
|
|
||
| buffer = ALLOCV_N(char, buffer_v, len + 1); | ||
|
|
||
| MEMCPY(buffer, CURPTR(p), char, len); | ||
| buffer[len] = '\0'; | ||
| integer = rb_cstr2inum(buffer, 10); | ||
|
Comment on lines
+1310
to
+1314
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, we want to avoid this copy...
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unfortunately, Ruby doesn't provide an API that doesn't require a NULL terminated string. But if you want I can implement a fast path like I did in ruby/json: ruby/json@3a4dc9e
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK. We can use |
||
| RB_GC_GUARD(buffer_v); | ||
| p->curr += len; | ||
| return integer; | ||
| } | ||
|
|
||
| /* | ||
| * :markup: markdown | ||
| * :include: strscan/link_refs.txt | ||
|
|
@@ -2204,6 +2255,8 @@ Init_strscan(void) | |
| rb_define_method(StringScanner, "peek_byte", strscan_peek_byte, 0); | ||
| rb_define_method(StringScanner, "peep", strscan_peep, 1); | ||
|
|
||
| rb_define_method(StringScanner, "scan_integer", strscan_scan_integer, 0); | ||
|
|
||
| rb_define_method(StringScanner, "unscan", strscan_unscan, 0); | ||
|
|
||
| rb_define_method(StringScanner, "beginning_of_line?", strscan_bol_p, 0); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
byte?Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for asking that, I now re-learned that in Java, there are certain reserved words: https://en.wikipedia.org/wiki/List_of_Java_keywords and
byteis one of them.EDIT: Oh,
(Ah, right, that list of contextually-reserved didn't include
byte.)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
!!!