From 49866372b594d1985082b75edbb717bcf80ec178 Mon Sep 17 00:00:00 2001 From: Gerbo Engels Date: Mon, 9 Jan 2023 09:25:08 +0100 Subject: [PATCH 1/5] Fix 11438 and 10807: categorize user defined literal as literal instead of number There's a big assumption, that if a token starts with a digit but isn't properly parsed as int/float, that it shall be a user defined literal as a fallback --- lib/token.cpp | 9 +++++++-- lib/tokenize.cpp | 2 +- test/testgarbage.cpp | 17 +++++++++++++++++ test/testtoken.cpp | 16 ++++++++++++++-- 4 files changed, 39 insertions(+), 5 deletions(-) diff --git a/lib/token.cpp b/lib/token.cpp index e0e4ac1d500..441a0b2f7a2 100644 --- a/lib/token.cpp +++ b/lib/token.cpp @@ -138,8 +138,13 @@ void Token::update_property_info() tokType(eKeyword); else if (mTokType != eVariable && mTokType != eFunction && mTokType != eType && mTokType != eKeyword) tokType(eName); - } else if (std::isdigit((unsigned char)mStr[0]) || (mStr.length() > 1 && mStr[0] == '-' && std::isdigit((unsigned char)mStr[1]))) - tokType(eNumber); + } + else if (std::isdigit((unsigned char)mStr[0]) || (mStr.length() > 1 && mStr[0] == '-' && std::isdigit((unsigned char)mStr[1]))) { + if (MathLib::isInt(mStr) || MathLib::isFloat(mStr)) + tokType(eNumber); + else + tokType(eLiteral); // assume it is a user defined literal + } else if (mStr == "=" || mStr == "<<=" || mStr == ">>=" || (mStr.size() == 2U && mStr[1] == '=' && std::strchr("+-*/%&^|", mStr[0]))) tokType(eAssignmentOp); diff --git a/lib/tokenize.cpp b/lib/tokenize.cpp index 87c42d9f061..850f11c9ec6 100644 --- a/lib/tokenize.cpp +++ b/lib/tokenize.cpp @@ -8609,7 +8609,7 @@ void Tokenizer::simplifyAsm() Token *endasm = tok->next(); const Token *firstSemiColon = nullptr; int comment = 0; - while (Token::Match(endasm, "%num%|%name%|,|:|;") || (endasm && endasm->linenr() == comment)) { + while (Token::Match(endasm, "%num%|%name%|,|:|;") || (endasm && endasm->isLiteral()) || (endasm && endasm->linenr() == comment)) { if (Token::Match(endasm, "_asm|__asm|__endasm")) break; if (endasm->str() == ";") { diff --git a/test/testgarbage.cpp b/test/testgarbage.cpp index 1b5e45cf769..2a550fc4ae9 100644 --- a/test/testgarbage.cpp +++ b/test/testgarbage.cpp @@ -267,6 +267,7 @@ class TestGarbage : public TestFixture { TEST_CASE(enumTrailingComma); TEST_CASE(nonGarbageCode1); // #8346 + TEST_CASE(userDefinedLiterals); // #11438, #10807 } #define checkCodeInternal(code, filename) checkCodeInternal_(code, filename, __FILE__, __LINE__) @@ -1823,6 +1824,22 @@ class TestGarbage : public TestFixture { " auto fn = []() -> foo* { return new foo(); };\n" "}"); } + + void userDefinedLiterals() { + // #11438 + ASSERT_NO_THROW(checkCode("bool f () { return 3ms < 3s; }")); + ASSERT_EQUALS("", errout.str()); + + // #10807 + ASSERT_NO_THROW(checkCode("struct S {\n" + " template \n" + " constexpr explicit S(const T& t) {}\n" + " static S zero() {\n" + " return S(0_s);\n" + " }\n" + "};\n")); + ASSERT_EQUALS("", errout.str()); + } }; REGISTER_TEST(TestGarbage) diff --git a/test/testtoken.cpp b/test/testtoken.cpp index 984b45fd3c4..41dfd8e943d 100644 --- a/test/testtoken.cpp +++ b/test/testtoken.cpp @@ -638,7 +638,7 @@ class TestToken : public TestFixture { givenACodeSampleToTokenize nonNumeric("abc", true); ASSERT_EQUALS(false, Token::Match(nonNumeric.tokens(), "%num%")); - givenACodeSampleToTokenize binary("101010b", true); + givenACodeSampleToTokenize binary("0b101010", true); ASSERT_EQUALS(true, Token::Match(binary.tokens(), "%num%")); givenACodeSampleToTokenize octal("0123", true); @@ -653,7 +653,7 @@ class TestToken : public TestFixture { givenACodeSampleToTokenize floatingPoint("0.0f", true); ASSERT_EQUALS(true, Token::Match(floatingPoint.tokens(), "%num%")); - givenACodeSampleToTokenize doublePrecision("0.0d", true); + givenACodeSampleToTokenize doublePrecision("0.0", true); ASSERT_EQUALS(true, Token::Match(doublePrecision.tokens(), "%num%")); givenACodeSampleToTokenize signedLong("0L", true); @@ -685,6 +685,12 @@ class TestToken : public TestFixture { givenACodeSampleToTokenize positiveNull("+.0", true); ASSERT_EQUALS(true, Token::Match(positiveNull.tokens(), "+ %num%")); + + givenACodeSampleToTokenize decimalSeparated("123'456'678", true); + ASSERT_EQUALS(true, Token::Match(decimalSeparated.tokens(), "%num%")); + + givenACodeSampleToTokenize userDefinedLiteral("123_udl", true); + ASSERT_EQUALS(false, Token::Match(userDefinedLiteral.tokens(), "%num%")); } @@ -950,6 +956,12 @@ class TestToken : public TestFixture { ASSERT(tok.tokType() == Token::eBoolean); tok.str("false"); ASSERT(tok.tokType() == Token::eBoolean); + tok.str("\"foo\"_userDefinedLiteral"); + ASSERT(tok.tokType() == Token::eOther); // should be eLiteral + tok.str("123_userDefinedLiteral"); + ASSERT(tok.tokType() == Token::eLiteral); + tok.str("0x123._userDefinedLiteral"); + ASSERT(tok.tokType() == Token::eLiteral); } void isStandardType() const { From 69b786d0730d6896b3fc4835608999f6342ce56d Mon Sep 17 00:00:00 2001 From: Gerbo Engels Date: Sat, 21 Jan 2023 22:48:39 +0100 Subject: [PATCH 2/5] Move tests to testtokenize --- test/testgarbage.cpp | 17 ----------------- test/testtokenize.cpp | 23 +++++++++++++++++++++++ 2 files changed, 23 insertions(+), 17 deletions(-) diff --git a/test/testgarbage.cpp b/test/testgarbage.cpp index 2a550fc4ae9..1b5e45cf769 100644 --- a/test/testgarbage.cpp +++ b/test/testgarbage.cpp @@ -267,7 +267,6 @@ class TestGarbage : public TestFixture { TEST_CASE(enumTrailingComma); TEST_CASE(nonGarbageCode1); // #8346 - TEST_CASE(userDefinedLiterals); // #11438, #10807 } #define checkCodeInternal(code, filename) checkCodeInternal_(code, filename, __FILE__, __LINE__) @@ -1824,22 +1823,6 @@ class TestGarbage : public TestFixture { " auto fn = []() -> foo* { return new foo(); };\n" "}"); } - - void userDefinedLiterals() { - // #11438 - ASSERT_NO_THROW(checkCode("bool f () { return 3ms < 3s; }")); - ASSERT_EQUALS("", errout.str()); - - // #10807 - ASSERT_NO_THROW(checkCode("struct S {\n" - " template \n" - " constexpr explicit S(const T& t) {}\n" - " static S zero() {\n" - " return S(0_s);\n" - " }\n" - "};\n")); - ASSERT_EQUALS("", errout.str()); - } }; REGISTER_TEST(TestGarbage) diff --git a/test/testtokenize.cpp b/test/testtokenize.cpp index ec6f324647a..20312c8508f 100644 --- a/test/testtokenize.cpp +++ b/test/testtokenize.cpp @@ -462,6 +462,8 @@ class TestTokenizer : public TestFixture { TEST_CASE(cpp20_default_bitfield_initializer); + TEST_CASE(userDefinedLiterals); // #11438, #10807 + TEST_CASE(cpp11init); } @@ -7469,6 +7471,27 @@ class TestTokenizer : public TestFixture { ASSERT_THROW(tokenizeAndStringify(code, settings), InternalError); } + void userDefinedLiterals() { + Settings settings; + + // #11438 + const char code[] = "bool f () { return 3ms < 3s; }"; + Tokenizer tokenizer(&settings1, this); + std::istringstream istr(code); + ASSERT(tokenizer.tokenize(istr, "test.cpp")); + const Token* token = Token::findsimplematch(tokenizer.tokens(), "3ms"); + ASSERT(token->tokType() == Token::eLiteral); + + // #10807 + ASSERT_NO_THROW(tokenizeAndStringify("struct S {\n" + " template \n" + " constexpr explicit S(const T& t) {}\n" + " static S zero() {\n" + " return S(0_s);\n" + " }\n" + "};\n", settings)); + } + void cpp11init() { #define testIsCpp11init(...) testIsCpp11init_(__FILE__, __LINE__, __VA_ARGS__) auto testIsCpp11init_ = [this](const char* file, int line, const char* code, const char* find, TokenImpl::Cpp11init expected) { From 3b8c0882f11235cdbfa29f0b5d99fba7ef1a3f58 Mon Sep 17 00:00:00 2001 From: Gerbo Engels Date: Sat, 21 Jan 2023 22:53:43 +0100 Subject: [PATCH 3/5] Add test for asm binary token --- test/testtokenize.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/test/testtokenize.cpp b/test/testtokenize.cpp index 20312c8508f..922988005b2 100644 --- a/test/testtokenize.cpp +++ b/test/testtokenize.cpp @@ -995,6 +995,7 @@ class TestTokenizer : public TestFixture { ASSERT_EQUALS("asm ( \"mov ax , bx\" ) ;", tokenizeAndStringify("__asm { mov ax,bx };")); ASSERT_EQUALS("asm ( \"\"mov ax,bx\"\" ) ;", tokenizeAndStringify("__asm__ __volatile__ ( \"mov ax,bx\" );")); ASSERT_EQUALS("asm ( \"_emit 12h\" ) ;", tokenizeAndStringify("__asm _emit 12h ;")); + ASSERT_EQUALS("asm ( \"_emit 101010b\" ) ;", tokenizeAndStringify("__asm _emit 101010b ;")); ASSERT_EQUALS("asm ( \"mov a , b\" ) ;", tokenizeAndStringify("__asm mov a, b ;")); ASSERT_EQUALS("asm ( \"\"fnstcw %0\" : \"= m\" ( old_cw )\" ) ;", tokenizeAndStringify("asm volatile (\"fnstcw %0\" : \"= m\" (old_cw));")); ASSERT_EQUALS("asm ( \"\"fnstcw %0\" : \"= m\" ( old_cw )\" ) ;", tokenizeAndStringify(" __asm__ (\"fnstcw %0\" : \"= m\" (old_cw));")); From 2eac468eab9545b307ca887192b7a921b4de88c8 Mon Sep 17 00:00:00 2001 From: Gerbo Engels Date: Sat, 21 Jan 2023 23:10:05 +0100 Subject: [PATCH 4/5] Extract function with shared logic --- externals/simplecpp/simplecpp.h | 7 ++++++- lib/token.cpp | 3 ++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/externals/simplecpp/simplecpp.h b/externals/simplecpp/simplecpp.h index 24443b074fd..6852f6909fe 100644 --- a/externals/simplecpp/simplecpp.h +++ b/externals/simplecpp/simplecpp.h @@ -113,7 +113,7 @@ namespace simplecpp { name = (std::isalpha(static_cast(string[0])) || string[0] == '_' || string[0] == '$') && (std::memchr(string.c_str(), '\'', string.size()) == nullptr); comment = string.size() > 1U && string[0] == '/' && (string[1] == '/' || string[1] == '*'); - number = std::isdigit(static_cast(string[0])) || (string.size() > 1U && string[0] == '-' && std::isdigit(static_cast(string[1]))); + number = isNumberLike(string); op = (string.size() == 1U) ? string[0] : '\0'; } @@ -162,6 +162,11 @@ namespace simplecpp { void printAll() const; void printOut() const; + + static bool isNumberLike(const TokenString &string) { + return std::isdigit(static_cast(string[0])) || (string.size() > 1U && string[0] == '-' && std::isdigit(static_cast(string[1]))); + } + private: TokenString string; diff --git a/lib/token.cpp b/lib/token.cpp index 441a0b2f7a2..748def985d4 100644 --- a/lib/token.cpp +++ b/lib/token.cpp @@ -22,6 +22,7 @@ #include "errortypes.h" #include "library.h" #include "settings.h" +#include "simplecpp.h" #include "symboldatabase.h" #include "tokenlist.h" #include "utils.h" @@ -139,7 +140,7 @@ void Token::update_property_info() else if (mTokType != eVariable && mTokType != eFunction && mTokType != eType && mTokType != eKeyword) tokType(eName); } - else if (std::isdigit((unsigned char)mStr[0]) || (mStr.length() > 1 && mStr[0] == '-' && std::isdigit((unsigned char)mStr[1]))) { + else if (simplecpp::Token::isNumberLike(mStr)) { if (MathLib::isInt(mStr) || MathLib::isFloat(mStr)) tokType(eNumber); else From 6bd09b576258c2d9cdc2de604ae65dc3692de131 Mon Sep 17 00:00:00 2001 From: Gerbo Engels Date: Sun, 22 Jan 2023 00:07:57 +0100 Subject: [PATCH 5/5] Update Makefile for new include dependency --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4ef48391d42..a657745dc42 100644 --- a/Makefile +++ b/Makefile @@ -606,7 +606,7 @@ $(libcppdir)/templatesimplifier.o: lib/templatesimplifier.cpp lib/color.h lib/co $(libcppdir)/timer.o: lib/timer.cpp lib/config.h lib/timer.h $(CXX) ${INCLUDE_FOR_LIB} $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $(libcppdir)/timer.cpp -$(libcppdir)/token.o: lib/token.cpp lib/astutils.h lib/config.h lib/errortypes.h lib/importproject.h lib/library.h lib/mathlib.h lib/platform.h lib/settings.h lib/smallvector.h lib/sourcelocation.h lib/standards.h lib/suppressions.h lib/symboldatabase.h lib/templatesimplifier.h lib/timer.h lib/token.h lib/tokenlist.h lib/tokenrange.h lib/utils.h lib/valueflow.h +$(libcppdir)/token.o: lib/token.cpp externals/simplecpp/simplecpp.h lib/astutils.h lib/config.h lib/errortypes.h lib/importproject.h lib/library.h lib/mathlib.h lib/platform.h lib/settings.h lib/smallvector.h lib/sourcelocation.h lib/standards.h lib/suppressions.h lib/symboldatabase.h lib/templatesimplifier.h lib/timer.h lib/token.h lib/tokenlist.h lib/tokenrange.h lib/utils.h lib/valueflow.h $(CXX) ${INCLUDE_FOR_LIB} $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $(libcppdir)/token.cpp $(libcppdir)/tokenize.o: lib/tokenize.cpp externals/simplecpp/simplecpp.h lib/check.h lib/color.h lib/config.h lib/errorlogger.h lib/errortypes.h lib/importproject.h lib/library.h lib/mathlib.h lib/platform.h lib/preprocessor.h lib/settings.h lib/sourcelocation.h lib/standards.h lib/summaries.h lib/suppressions.h lib/symboldatabase.h lib/templatesimplifier.h lib/timer.h lib/token.h lib/tokenize.h lib/tokenlist.h lib/utils.h lib/valueflow.h