Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -606,7 +606,7 @@ $(libcppdir)/templatesimplifier.o: lib/templatesimplifier.cpp lib/color.h lib/co
$(libcppdir)/timer.o: lib/timer.cpp lib/config.h lib/timer.h
$(CXX) ${INCLUDE_FOR_LIB} $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $(libcppdir)/timer.cpp

$(libcppdir)/token.o: lib/token.cpp lib/astutils.h lib/config.h lib/errortypes.h lib/importproject.h lib/library.h lib/mathlib.h lib/platform.h lib/settings.h lib/smallvector.h lib/sourcelocation.h lib/standards.h lib/suppressions.h lib/symboldatabase.h lib/templatesimplifier.h lib/timer.h lib/token.h lib/tokenlist.h lib/tokenrange.h lib/utils.h lib/valueflow.h
$(libcppdir)/token.o: lib/token.cpp externals/simplecpp/simplecpp.h lib/astutils.h lib/config.h lib/errortypes.h lib/importproject.h lib/library.h lib/mathlib.h lib/platform.h lib/settings.h lib/smallvector.h lib/sourcelocation.h lib/standards.h lib/suppressions.h lib/symboldatabase.h lib/templatesimplifier.h lib/timer.h lib/token.h lib/tokenlist.h lib/tokenrange.h lib/utils.h lib/valueflow.h
$(CXX) ${INCLUDE_FOR_LIB} $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $(libcppdir)/token.cpp

$(libcppdir)/tokenize.o: lib/tokenize.cpp externals/simplecpp/simplecpp.h lib/check.h lib/color.h lib/config.h lib/errorlogger.h lib/errortypes.h lib/importproject.h lib/library.h lib/mathlib.h lib/platform.h lib/preprocessor.h lib/settings.h lib/sourcelocation.h lib/standards.h lib/summaries.h lib/suppressions.h lib/symboldatabase.h lib/templatesimplifier.h lib/timer.h lib/token.h lib/tokenize.h lib/tokenlist.h lib/utils.h lib/valueflow.h
Expand Down
7 changes: 6 additions & 1 deletion externals/simplecpp/simplecpp.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ namespace simplecpp {
name = (std::isalpha(static_cast<unsigned char>(string[0])) || string[0] == '_' || string[0] == '$')
&& (std::memchr(string.c_str(), '\'', string.size()) == nullptr);
comment = string.size() > 1U && string[0] == '/' && (string[1] == '/' || string[1] == '*');
number = std::isdigit(static_cast<unsigned char>(string[0])) || (string.size() > 1U && string[0] == '-' && std::isdigit(static_cast<unsigned char>(string[1])));
number = isNumberLike(string);
op = (string.size() == 1U) ? string[0] : '\0';
}

Expand Down Expand Up @@ -162,6 +162,11 @@ namespace simplecpp {

void printAll() const;
void printOut() const;

static bool isNumberLike(const TokenString &string) {
return std::isdigit(static_cast<unsigned char>(string[0])) || (string.size() > 1U && string[0] == '-' && std::isdigit(static_cast<unsigned char>(string[1])));
}

private:
TokenString string;

Expand Down
10 changes: 8 additions & 2 deletions lib/token.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "errortypes.h"
#include "library.h"
#include "settings.h"
#include "simplecpp.h"
#include "symboldatabase.h"
#include "tokenlist.h"
#include "utils.h"
Expand Down Expand Up @@ -138,8 +139,13 @@ void Token::update_property_info()
tokType(eKeyword);
else if (mTokType != eVariable && mTokType != eFunction && mTokType != eType && mTokType != eKeyword)
tokType(eName);
} else if (std::isdigit((unsigned char)mStr[0]) || (mStr.length() > 1 && mStr[0] == '-' && std::isdigit((unsigned char)mStr[1])))
tokType(eNumber);
}
else if (simplecpp::Token::isNumberLike(mStr)) {
if (MathLib::isInt(mStr) || MathLib::isFloat(mStr))
tokType(eNumber);
else
tokType(eLiteral); // assume it is a user defined literal
}
else if (mStr == "=" || mStr == "<<=" || mStr == ">>=" ||
(mStr.size() == 2U && mStr[1] == '=' && std::strchr("+-*/%&^|", mStr[0])))
tokType(eAssignmentOp);
Expand Down
2 changes: 1 addition & 1 deletion lib/tokenize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8609,7 +8609,7 @@ void Tokenizer::simplifyAsm()
Token *endasm = tok->next();
const Token *firstSemiColon = nullptr;
int comment = 0;
while (Token::Match(endasm, "%num%|%name%|,|:|;") || (endasm && endasm->linenr() == comment)) {
while (Token::Match(endasm, "%num%|%name%|,|:|;") || (endasm && endasm->isLiteral()) || (endasm && endasm->linenr() == comment)) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unit tests pointed me here: 12h is valid asm (12 in hex), but it is not a valid C++ int. So after my changes, this is seen as a literal. Therefore this change

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to allow and handle 12h and 101010b until Tokenizer::simplifyAsm() is executed. The checks should not see such tokens at all. My guess is that it doesn't matter if they are number or literal..

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That sounds like we need more tests for ASM blocks.

if (Token::Match(endasm, "_asm|__asm|__endasm"))
break;
if (endasm->str() == ";") {
Expand Down
16 changes: 14 additions & 2 deletions test/testtoken.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -638,7 +638,7 @@ class TestToken : public TestFixture {
givenACodeSampleToTokenize nonNumeric("abc", true);
ASSERT_EQUALS(false, Token::Match(nonNumeric.tokens(), "%num%"));

givenACodeSampleToTokenize binary("101010b", true);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't know in what context 101010b was a valid integer? C? asm? Something else?
Same for 0.0d below.

I've rewritten them to valid C++ integers

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think 101010b is used in inline assembler.

givenACodeSampleToTokenize binary("0b101010", true);
ASSERT_EQUALS(true, Token::Match(binary.tokens(), "%num%"));

givenACodeSampleToTokenize octal("0123", true);
Expand All @@ -653,7 +653,7 @@ class TestToken : public TestFixture {
givenACodeSampleToTokenize floatingPoint("0.0f", true);
ASSERT_EQUALS(true, Token::Match(floatingPoint.tokens(), "%num%"));

givenACodeSampleToTokenize doublePrecision("0.0d", true);
givenACodeSampleToTokenize doublePrecision("0.0", true);
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not know where 0.0d comes from. I don't want to immediately say if we need that or not.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems to come from this commit: acad87c

And I don't see why that was added it could be by mistake. I guess we can remove 0.0d.

ASSERT_EQUALS(true, Token::Match(doublePrecision.tokens(), "%num%"));

givenACodeSampleToTokenize signedLong("0L", true);
Expand Down Expand Up @@ -685,6 +685,12 @@ class TestToken : public TestFixture {

givenACodeSampleToTokenize positiveNull("+.0", true);
ASSERT_EQUALS(true, Token::Match(positiveNull.tokens(), "+ %num%"));

givenACodeSampleToTokenize decimalSeparated("123'456'678", true);
ASSERT_EQUALS(true, Token::Match(decimalSeparated.tokens(), "%num%"));

givenACodeSampleToTokenize userDefinedLiteral("123_udl", true);
ASSERT_EQUALS(false, Token::Match(userDefinedLiteral.tokens(), "%num%"));
}


Expand Down Expand Up @@ -950,6 +956,12 @@ class TestToken : public TestFixture {
ASSERT(tok.tokType() == Token::eBoolean);
tok.str("false");
ASSERT(tok.tokType() == Token::eBoolean);
tok.str("\"foo\"_userDefinedLiteral");
ASSERT(tok.tokType() == Token::eOther); // should be eLiteral
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

User defined string literals are still not properly processed, but this seems a lot harder to (properly) accomplish.
In my code base I don't run into issues with strings (I did ran into issues with user defined int literals, now fixed), so left that unchanged.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please use TODO_ASSERT so we know this is not the expected result.

tok.str("123_userDefinedLiteral");
ASSERT(tok.tokType() == Token::eLiteral);
tok.str("0x123._userDefinedLiteral");
ASSERT(tok.tokType() == Token::eLiteral);
}

void isStandardType() const {
Expand Down
24 changes: 24 additions & 0 deletions test/testtokenize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,8 @@ class TestTokenizer : public TestFixture {

TEST_CASE(cpp20_default_bitfield_initializer);

TEST_CASE(userDefinedLiterals); // #11438, #10807

TEST_CASE(cpp11init);
}

Expand Down Expand Up @@ -993,6 +995,7 @@ class TestTokenizer : public TestFixture {
ASSERT_EQUALS("asm ( \"mov ax , bx\" ) ;", tokenizeAndStringify("__asm { mov ax,bx };"));
ASSERT_EQUALS("asm ( \"\"mov ax,bx\"\" ) ;", tokenizeAndStringify("__asm__ __volatile__ ( \"mov ax,bx\" );"));
ASSERT_EQUALS("asm ( \"_emit 12h\" ) ;", tokenizeAndStringify("__asm _emit 12h ;"));
ASSERT_EQUALS("asm ( \"_emit 101010b\" ) ;", tokenizeAndStringify("__asm _emit 101010b ;"));
ASSERT_EQUALS("asm ( \"mov a , b\" ) ;", tokenizeAndStringify("__asm mov a, b ;"));
ASSERT_EQUALS("asm ( \"\"fnstcw %0\" : \"= m\" ( old_cw )\" ) ;", tokenizeAndStringify("asm volatile (\"fnstcw %0\" : \"= m\" (old_cw));"));
ASSERT_EQUALS("asm ( \"\"fnstcw %0\" : \"= m\" ( old_cw )\" ) ;", tokenizeAndStringify(" __asm__ (\"fnstcw %0\" : \"= m\" (old_cw));"));
Expand Down Expand Up @@ -7469,6 +7472,27 @@ class TestTokenizer : public TestFixture {
ASSERT_THROW(tokenizeAndStringify(code, settings), InternalError);
}

void userDefinedLiterals() {
Settings settings;

// #11438
const char code[] = "bool f () { return 3ms < 3s; }";
Tokenizer tokenizer(&settings1, this);
std::istringstream istr(code);
ASSERT(tokenizer.tokenize(istr, "test.cpp"));
const Token* token = Token::findsimplematch(tokenizer.tokens(), "3ms");
ASSERT(token->tokType() == Token::eLiteral);

// #10807
ASSERT_NO_THROW(tokenizeAndStringify("struct S {\n"
" template <typename T>\n"
" constexpr explicit S(const T& t) {}\n"
" static S zero() {\n"
" return S(0_s);\n"
" }\n"
"};\n", settings));
}

void cpp11init() {
#define testIsCpp11init(...) testIsCpp11init_(__FILE__, __LINE__, __VA_ARGS__)
auto testIsCpp11init_ = [this](const char* file, int line, const char* code, const char* find, TokenImpl::Cpp11init expected) {
Expand Down