-
Notifications
You must be signed in to change notification settings - Fork 4k
ARROW-12657: [C++] Adding String hex to numeric conversion #11161
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -273,6 +273,30 @@ inline bool ParseUnsigned(const char* s, size_t length, uint64_t* out) { | |
| #undef PARSE_UNSIGNED_ITERATION | ||
| #undef PARSE_UNSIGNED_ITERATION_LAST | ||
|
|
||
| template <typename T> | ||
| bool ParseHex(const char* s, size_t length, T* out) { | ||
| // lets make sure that the length of the string is not too big | ||
| if (!ARROW_PREDICT_TRUE(sizeof(T) * 2 >= length && length > 0)) { | ||
| return false; | ||
| } | ||
| T result = 0; | ||
| for (size_t i = 0; i < length; i++) { | ||
| result = static_cast<T>(result << 4); | ||
| if (s[i] >= '0' && s[i] <= '9') { | ||
| result = static_cast<T>(result | (s[i] - '0')); | ||
| } else if (s[i] >= 'A' && s[i] <= 'F') { | ||
| result = static_cast<T>(result | (s[i] - 'A' + 10)); | ||
| } else if (s[i] >= 'a' && s[i] <= 'f') { | ||
| result = static_cast<T>(result | (s[i] - 'a' + 10)); | ||
| } else { | ||
| /* Non-digit */ | ||
| return false; | ||
| } | ||
|
||
| } | ||
| *out = result; | ||
| return true; | ||
| } | ||
|
|
||
| template <class ARROW_TYPE> | ||
| struct StringToUnsignedIntConverterMixin { | ||
| using value_type = typename ARROW_TYPE::c_type; | ||
|
|
@@ -281,6 +305,13 @@ struct StringToUnsignedIntConverterMixin { | |
| if (ARROW_PREDICT_FALSE(length == 0)) { | ||
| return false; | ||
| } | ||
| // If it starts with 0x then its hex | ||
| if (length > 2 && s[0] == '0' && ((s[1] == 'x') || (s[1] == 'X'))) { | ||
| length -= 2; | ||
| s += 2; | ||
|
|
||
| return ARROW_PREDICT_TRUE(ParseHex(s, length, out)); | ||
| } | ||
| // Skip leading zeros | ||
| while (length > 0 && *s == '0') { | ||
| length--; | ||
|
|
@@ -329,6 +360,18 @@ struct StringToSignedIntConverterMixin { | |
| if (ARROW_PREDICT_FALSE(length == 0)) { | ||
| return false; | ||
| } | ||
| // If it starts with 0x then its hex | ||
| if (length > 2 && s[0] == '0' && ((s[1] == 'x') || (s[1] == 'X'))) { | ||
| length -= 2; | ||
| s += 2; | ||
|
|
||
| if (!ARROW_PREDICT_TRUE(ParseHex(s, length, &unsigned_value))) { | ||
| return false; | ||
| } | ||
| *out = static_cast<value_type>(unsigned_value); | ||
| return true; | ||
| } | ||
|
|
||
| if (*s == '-') { | ||
| negative = true; | ||
| s++; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add a test case for hex with negative sign (eg.,
-0x34) and another for multiple prefixed zeros (eg.,00000x7f).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Those cases are explicitly not allowed. Do you mean that I should add them to
CheckCastFails?A negative before a hex does not make sense. If the value can be negative (signed integer) then you should use the binary representation of that negative number, for example for int8 0xFF is -1 and 0xF0 is -128.
For the case of multiple prefixed zeros, I opted for not allowing that to be allowed, since allowing it would make the logic more complex (probably slower) and it seems like it would be a very odd way for a hex string to be formatted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ohh, sorry...I meant as failing test cases. It is just a way to ensure more code coverage in tests.