From c96bcecde85f07075ff9e7ab013bc58d09bf55a3 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Wed, 24 Jul 2019 16:12:46 +0200 Subject: [PATCH 1/5] Add support for "NaN" as a double when tokenizing --- src/tokenizer.cc | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/src/tokenizer.cc b/src/tokenizer.cc index f24655c75..b2bdd2a19 100644 --- a/src/tokenizer.cc +++ b/src/tokenizer.cc @@ -99,10 +99,17 @@ std::unique_ptr Tokenizer::NextToken() { data_.substr(current_position_, end_pos - current_position_); current_position_ = end_pos; + // Check for "NaN" explicitly. + bool is_nan = (tok_str.size() == 3 && + std::tolower(tok_str[0]) == 'n' && + std::tolower(tok_str[1]) == 'a' && + std::tolower(tok_str[2]) == 'n'); + // Starts with an alpha is a string. - if (!std::isdigit(tok_str[0]) && - !(tok_str[0] == '-' && std::isdigit(tok_str[1])) && - !(tok_str[0] == '.' && std::isdigit(tok_str[1]))) { + if (!is_nan && + !std::isdigit(tok_str[0]) && + !(tok_str[0] == '-' && tok_str.size() >= 2 && std::isdigit(tok_str[1])) && + !(tok_str[0] == '.' && tok_str.size() >= 2 && std::isdigit(tok_str[1]))) { // If we've got a continuation, skip over the end of line and get the next // token. if (tok_str == "\\") { @@ -126,17 +133,21 @@ std::unique_ptr Tokenizer::NextToken() { } // Handle hex strings - if (tok_str.size() > 2 && tok_str[0] == '0' && tok_str[1] == 'x') { + if (!is_nan && tok_str.size() > 2 && tok_str[0] == '0' && tok_str[1] == 'x') { auto tok = MakeUnique(TokenType::kHex); tok->SetStringValue(tok_str); return tok; } bool is_double = false; - for (const char ch : tok_str) { - if (ch == '.') { - is_double = true; - break; + if (is_nan) { + is_double = true; + } else { + for (const char ch : tok_str) { + if (ch == '.') { + is_double = true; + break; + } } } From d5c3def2c751d4fd1651c8a816801cafd93b30d2 Mon Sep 17 00:00:00 2001 From: Ricardo Garcia Date: Wed, 24 Jul 2019 16:18:56 +0200 Subject: [PATCH 2/5] Add a unit test for NaN double values --- src/tokenizer_test.cc | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/src/tokenizer_test.cc b/src/tokenizer_test.cc index d50ec467c..fdccb79ea 100644 --- a/src/tokenizer_test.cc +++ b/src/tokenizer_test.cc @@ -15,6 +15,7 @@ #include "src/tokenizer.h" #include +#include #include "gtest/gtest.h" @@ -77,6 +78,33 @@ TEST_F(TokenizerTest, ProcessDouble) { EXPECT_TRUE(next->IsEOS()); } +namespace { + +void TestNaN(const std::string &nan_str) { + Tokenizer t(nan_str); + auto next = t.NextToken(); + ASSERT_TRUE(next != nullptr); + EXPECT_TRUE(next->IsDouble()); + EXPECT_TRUE(std::isnan(next->AsDouble())); + + next = t.NextToken(); + ASSERT_TRUE(next != nullptr); + EXPECT_TRUE(next->IsEOS()); +} + +} // anonymous. + +TEST_F(TokenizerTest, ProcessNaN) { + TestNaN("nan"); + TestNaN("naN"); + TestNaN("nAn"); + TestNaN("nAN"); + TestNaN("Nan"); + TestNaN("NaN"); + TestNaN("NAn"); + TestNaN("NAN"); +} + TEST_F(TokenizerTest, ProcessNegativeDouble) { Tokenizer t("-123.456"); auto next = t.NextToken(); From 303b3c8ae919e0996dde34d31ef417603586c082 Mon Sep 17 00:00:00 2001 From: dan sinclair Date: Wed, 24 Jul 2019 19:58:50 -0400 Subject: [PATCH 3/5] Fix formatter issues. --- src/tokenizer_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tokenizer_test.cc b/src/tokenizer_test.cc index fdccb79ea..647386d8a 100644 --- a/src/tokenizer_test.cc +++ b/src/tokenizer_test.cc @@ -92,7 +92,7 @@ void TestNaN(const std::string &nan_str) { EXPECT_TRUE(next->IsEOS()); } -} // anonymous. +} // namespace TEST_F(TokenizerTest, ProcessNaN) { TestNaN("nan"); From d3b50247a618a0b1fb79ca003bb91df05fc52db7 Mon Sep 17 00:00:00 2001 From: dan sinclair Date: Wed, 24 Jul 2019 20:15:13 -0400 Subject: [PATCH 4/5] More formatting --- src/tokenizer.cc | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/tokenizer.cc b/src/tokenizer.cc index b2bdd2a19..54bf88c05 100644 --- a/src/tokenizer.cc +++ b/src/tokenizer.cc @@ -100,14 +100,12 @@ std::unique_ptr Tokenizer::NextToken() { current_position_ = end_pos; // Check for "NaN" explicitly. - bool is_nan = (tok_str.size() == 3 && - std::tolower(tok_str[0]) == 'n' && - std::tolower(tok_str[1]) == 'a' && - std::tolower(tok_str[2]) == 'n'); + bool is_nan = + (tok_str.size() == 3 && std::tolower(tok_str[0]) == 'n' && + std::tolower(tok_str[1]) == 'a' && std::tolower(tok_str[2]) == 'n'); // Starts with an alpha is a string. - if (!is_nan && - !std::isdigit(tok_str[0]) && + if (!is_nan && !std::isdigit(tok_str[0]) && !(tok_str[0] == '-' && tok_str.size() >= 2 && std::isdigit(tok_str[1])) && !(tok_str[0] == '.' && tok_str.size() >= 2 && std::isdigit(tok_str[1]))) { // If we've got a continuation, skip over the end of line and get the next From c38dbed3531eea429f47d62c4d19187e40c08868 Mon Sep 17 00:00:00 2001 From: dan sinclair Date: Wed, 24 Jul 2019 20:15:47 -0400 Subject: [PATCH 5/5] Formatting --- src/tokenizer_test.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tokenizer_test.cc b/src/tokenizer_test.cc index 647386d8a..fd7dba48d 100644 --- a/src/tokenizer_test.cc +++ b/src/tokenizer_test.cc @@ -14,8 +14,8 @@ #include "src/tokenizer.h" -#include #include +#include #include "gtest/gtest.h" @@ -80,7 +80,7 @@ TEST_F(TokenizerTest, ProcessDouble) { namespace { -void TestNaN(const std::string &nan_str) { +void TestNaN(const std::string& nan_str) { Tokenizer t(nan_str); auto next = t.NextToken(); ASSERT_TRUE(next != nullptr);