-
Notifications
You must be signed in to change notification settings - Fork 15.7k
[-Wunsafe-buffer-usage] Add check for custom printf/scanf functions #173096
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This commit adds the support for functions annotated with `__attribute__((__format__(__printf__, ...)))` (or `__scanf__`). These functions will be treated the same way as printf/scanf functions in the standard C library by `-Wunsafe-buffer-usage` rdar://143233737
|
@llvm/pr-subscribers-clang-analysis @llvm/pr-subscribers-clang Author: Ziqing Luo (ziqingluo-90) ChangesThis commit adds support for functions annotated with rdar://143233737 Full diff: https://github.com/llvm/llvm-project/pull/173096.diff 3 Files Affected:
diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
index fae5f8b8aa8e3..f9bba5d54e9c7 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
@@ -40,6 +40,7 @@ WARNING_GADGET(UnsafeBufferUsageCtorAttr)
WARNING_GADGET(DataInvocation)
WARNING_GADGET(UniquePtrArrayAccess)
WARNING_OPTIONAL_GADGET(UnsafeLibcFunctionCall)
+WARNING_OPTIONAL_GADGET(UnsafeFormatAttributedFunctionCall)
WARNING_OPTIONAL_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, arg1)`
FIXABLE_GADGET(ULCArraySubscript) // `DRE[any]` in an Unspecified Lvalue Context
FIXABLE_GADGET(DerefSimplePtrArithFixable)
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 7ef20726d0ab9..7c21ec86af544 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -825,9 +825,11 @@ struct LibcFunNamePrefixSuffixParser {
//
// `UnsafeArg` is the output argument that will be set only if this function
// returns true.
-static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg,
- const unsigned FmtArgIdx, ASTContext &Ctx,
- bool isKprintf = false) {
+// `FmtArgIdx` is insignificant if its value is negative, meaning that format
+// arguments start at `FmtIdx` + 1.
+static bool hasUnsafeFormatOrSArg(ASTContext &Ctx, const CallExpr *Call,
+ const Expr *&UnsafeArg, const unsigned FmtIdx,
+ int FmtArgIdx = -1, bool isKprintf = false) {
class StringFormatStringHandler
: public analyze_format_string::FormatStringHandler {
const CallExpr *Call;
@@ -850,8 +852,8 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg,
unsigned PArgIdx = -1;
if (Precision.hasDataArgument())
- PArgIdx = Precision.getPositionalArgIndex() + FmtArgIdx;
- if (0 < PArgIdx && PArgIdx < Call->getNumArgs()) {
+ PArgIdx = Precision.getArgIndex() + FmtArgIdx;
+ if (0 <= PArgIdx && PArgIdx < Call->getNumArgs()) {
const Expr *PArg = Call->getArg(PArgIdx);
// Strip the cast if `PArg` is a cast-to-int expression:
@@ -886,9 +888,9 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg,
analyze_printf::PrintfConversionSpecifier::sArg)
return true; // continue parsing
- unsigned ArgIdx = FS.getPositionalArgIndex() + FmtArgIdx;
+ unsigned ArgIdx = FS.getArgIndex() + FmtArgIdx;
- if (!(0 < ArgIdx && ArgIdx < Call->getNumArgs()))
+ if (!(0 <= ArgIdx && ArgIdx < Call->getNumArgs()))
// If the `ArgIdx` is invalid, give up.
return true; // continue parsing
@@ -921,12 +923,15 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg,
bool isUnsafeArgSet() { return UnsafeArgSet; }
};
- const Expr *Fmt = Call->getArg(FmtArgIdx);
+ const Expr *Fmt = Call->getArg(FmtIdx);
+ unsigned FmtArgStartingIdx =
+ FmtArgIdx < 0 ? FmtIdx + 1 : static_cast<unsigned>(FmtArgIdx);
if (auto *SL = dyn_cast<clang::StringLiteral>(Fmt->IgnoreParenImpCasts())) {
if (SL->getCharByteWidth() == 1) {
StringRef FmtStr = SL->getString();
- StringFormatStringHandler Handler(Call, FmtArgIdx, UnsafeArg, Ctx);
+ StringFormatStringHandler Handler(Call, FmtArgStartingIdx, UnsafeArg,
+ Ctx);
return analyze_format_string::ParsePrintfString(
Handler, FmtStr.begin(), FmtStr.end(), Ctx.getLangOpts(),
@@ -935,7 +940,8 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg,
}
if (auto FmtStr = SL->tryEvaluateString(Ctx)) {
- StringFormatStringHandler Handler(Call, FmtArgIdx, UnsafeArg, Ctx);
+ StringFormatStringHandler Handler(Call, FmtArgStartingIdx, UnsafeArg,
+ Ctx);
return analyze_format_string::ParsePrintfString(
Handler, FmtStr->data(), FmtStr->data() + FmtStr->size(),
Ctx.getLangOpts(), Ctx.getTargetInfo(), isKprintf) &&
@@ -946,7 +952,7 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg,
// In this case, this call is considered unsafe if at least one argument
// (including the format argument) is unsafe pointer.
return llvm::any_of(
- llvm::make_range(Call->arg_begin() + FmtArgIdx, Call->arg_end()),
+ llvm::make_range(Call->arg_begin() + FmtArgStartingIdx, Call->arg_end()),
[&UnsafeArg, &Ctx](const Expr *Arg) -> bool {
if (Arg->getType()->isPointerType() && !isNullTermPointer(Arg, Ctx)) {
UnsafeArg = Arg;
@@ -1161,7 +1167,7 @@ static bool hasUnsafePrintfStringArg(const CallExpr &Node, ASTContext &Ctx,
// It is a fprintf:
const Expr *UnsafeArg;
- if (hasUnsafeFormatOrSArg(&Node, UnsafeArg, 1, Ctx, false)) {
+ if (hasUnsafeFormatOrSArg(Ctx, &Node, UnsafeArg, 1)) {
Result.addNode(Tag, DynTypedNode::create(*UnsafeArg));
return true;
}
@@ -1175,7 +1181,7 @@ static bool hasUnsafePrintfStringArg(const CallExpr &Node, ASTContext &Ctx,
if (auto *II = FD->getIdentifier())
isKprintf = II->getName() == "kprintf";
- if (hasUnsafeFormatOrSArg(&Node, UnsafeArg, 0, Ctx, isKprintf)) {
+ if (hasUnsafeFormatOrSArg(Ctx, &Node, UnsafeArg, 0, -1, isKprintf)) {
Result.addNode(Tag, DynTypedNode::create(*UnsafeArg));
return true;
}
@@ -1190,7 +1196,7 @@ static bool hasUnsafePrintfStringArg(const CallExpr &Node, ASTContext &Ctx,
// second is an integer, it is a snprintf:
const Expr *UnsafeArg;
- if (hasUnsafeFormatOrSArg(&Node, UnsafeArg, 2, Ctx, false)) {
+ if (hasUnsafeFormatOrSArg(Ctx, &Node, UnsafeArg, 2)) {
Result.addNode(Tag, DynTypedNode::create(*UnsafeArg));
return true;
}
@@ -2068,6 +2074,7 @@ class UnsafeLibcFunctionCallGadget : public WarningGadget {
constexpr static const char *const UnsafeVaListTag =
"UnsafeLibcFunctionCall_va_list";
+public:
enum UnsafeKind {
OTHERS = 0, // no specific information, the callee function is unsafe
SPRINTF = 1, // never call `-sprintf`s, call `-snprintf`s instead.
@@ -2080,7 +2087,6 @@ class UnsafeLibcFunctionCallGadget : public WarningGadget {
// considered unsafe as it is not compile-time check
} WarnedFunKind = OTHERS;
-public:
UnsafeLibcFunctionCallGadget(const MatchResult &Result)
: WarningGadget(Kind::UnsafeLibcFunctionCall),
Call(Result.getNodeAs<CallExpr>(Tag)) {
@@ -2171,6 +2177,85 @@ class UnsafeLibcFunctionCallGadget : public WarningGadget {
SmallVector<const Expr *, 1> getUnsafePtrs() const override { return {}; }
};
+class UnsafeFormatAttributedFunctionCallGadget : public WarningGadget {
+ const CallExpr *const Call;
+ const Expr *UnsafeArg = nullptr;
+ constexpr static const char *const Tag = "UnsafeFormatAttributedFunctionCall";
+ constexpr static const char *const UnsafeStringTag =
+ "UnsafeFormatAttributedFunctionCall_string";
+
+public:
+ UnsafeFormatAttributedFunctionCallGadget(const MatchResult &Result)
+ : WarningGadget(Kind::UnsafeLibcFunctionCall),
+ Call(Result.getNodeAs<CallExpr>(Tag)),
+ UnsafeArg(Result.getNodeAs<Expr>(UnsafeStringTag)) {}
+
+ static bool matches(const Stmt *S, ASTContext &Ctx,
+ const UnsafeBufferUsageHandler *Handler,
+ MatchResult &Result) {
+ if (ignoreUnsafeLibcCall(Ctx, *S, Handler))
+ return false;
+ auto *CE = dyn_cast<CallExpr>(S);
+ if (!CE || !CE->getDirectCallee())
+ return false;
+ const auto *FD = dyn_cast<FunctionDecl>(CE->getDirectCallee());
+ if (!FD)
+ return false;
+
+ const FormatAttr *Attr = nullptr;
+ bool IsPrintf = false;
+ bool AnyAttr = llvm::any_of(
+ FD->specific_attrs<FormatAttr>(),
+ [&Attr, &IsPrintf](const FormatAttr *FA) -> bool {
+ if (const auto *II = FA->getType()) {
+ if (II->getName() == "printf" || II->getName() == "scanf") {
+ Attr = FA;
+ IsPrintf = II->getName() == "printf";
+ return true;
+ }
+ }
+ return false;
+ });
+ const Expr *UnsafeArg;
+
+ if (AnyAttr && !IsPrintf &&
+ (CE->getNumArgs() >= static_cast<unsigned>(Attr->getFirstArg()))) {
+ // for scanf-like functions, any format argument is considered unsafe:
+ Result.addNode(Tag, DynTypedNode::create(*CE));
+ return true;
+ }
+ if (AnyAttr && libc_func_matchers::hasUnsafeFormatOrSArg(
+ Ctx, CE, UnsafeArg,
+ // FormatAttribute indexes are 1-based:
+ Attr->getFormatIdx() - 1, Attr->getFirstArg() - 1)) {
+ Result.addNode(Tag, DynTypedNode::create(*CE));
+ Result.addNode(UnsafeStringTag, DynTypedNode::create(*UnsafeArg));
+ return true;
+ }
+ return false;
+ }
+
+ const Stmt *getBaseStmt() const { return Call; }
+
+ SourceLocation getSourceLoc() const override { return Call->getBeginLoc(); }
+
+ void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
+ bool IsRelatedToDecl,
+ ASTContext &Ctx) const override {
+ if (UnsafeArg)
+ Handler.handleUnsafeLibcCall(
+ Call, UnsafeLibcFunctionCallGadget::UnsafeKind::STRING, Ctx,
+ UnsafeArg);
+ else
+ Handler.handleUnsafeLibcCall(
+ Call, UnsafeLibcFunctionCallGadget::UnsafeKind::OTHERS, Ctx);
+ }
+
+ DeclUseList getClaimedVarUseSites() const override { return {}; }
+
+ SmallVector<const Expr *, 1> getUnsafePtrs() const override { return {}; }
+};
+
// Represents expressions of the form `DRE[*]` in the Unspecified Lvalue
// Context (see `findStmtsInUnspecifiedLvalueContext`).
// Note here `[]` is the built-in subscript operator.
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
index 4f1af79609223..8df65ebc2eaf0 100644
--- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
@@ -1,10 +1,10 @@
-// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage \
+// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage -Wno-gcc-compat\
// RUN: -verify %s
-// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage \
+// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage -Wno-gcc-compat\
// RUN: -verify %s -x objective-c++
-// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage-in-libc-call \
+// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage-in-libc-call -Wno-gcc-compat\
// RUN: -verify %s
-// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage-in-libc-call \
+// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage-in-libc-call -Wno-gcc-compat\
// RUN: -verify %s -DTEST_STD_NS
typedef struct {} FILE;
@@ -255,3 +255,27 @@ void dontCrashForInvalidFormatString() {
snprintf((char*)0, 0, "%");
snprintf((char*)0, 0, "\0");
}
+
+
+// Also warn about unsafe printf/scanf-like functions:
+void myprintf(const char *F, ...) __attribute__((__format__ (__printf__, 1, 2)));
+void myprintf_2(const char *F, int irrelevant, const char *Str) __attribute__((__format__ (__printf__, 1, 3)));
+void myscanf(const char *F, ...) __attribute__((__format__ (__scanf__, 1, 2)));
+
+void test_myprintf(char * Str, std::string StdStr) {
+ myprintf("hello", Str);
+ myprintf("hello %s", StdStr.c_str());
+ myprintf("hello %s", Str); // expected-warning{{function 'myprintf' is unsafe}} \
+ expected-note{{string argument is not guaranteed to be null-terminated}}
+
+ myprintf_2("hello", 0, Str);
+ myprintf_2("hello %s", 0, StdStr.c_str());
+ myprintf_2("hello %s", 0, Str); // expected-warning{{function 'myprintf_2' is unsafe}} \
+ expected-note{{string argument is not guaranteed to be null-terminated}}
+ myscanf("hello %s");
+ myscanf("hello %s", Str); // expected-warning{{function 'myscanf' is unsafe}}
+
+ int X;
+
+ myscanf("hello %d", &X); // expected-warning{{function 'myscanf' is unsafe}}
+}
|
|
CC @dtarditi |
ojhunt
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These are all just style nits, and a request for some additional tests
|
|
||
|
|
||
| // Also warn about unsafe printf/scanf-like functions: | ||
| void myprintf(const char *F, ...) __attribute__((__format__ (__printf__, 1, 2))); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
could we add a few tests where the format string isn't the first arg? and also something where the format string comes equal to and after the first arg index? (only as a "do something sane/don't crash" test, not because anyone should ever ever do this :D
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It will be a compilation error when the format string does not come before the first argument. There are existing tests for such cases:
https://github.com/llvm/llvm-project/blob/38cdadd9c74509be636e41778043e4cd270be04b/clang/test/Sema/attr-format.c#L1C1-L14C1
I will sure add the case where the format string is not the first argument.
|
Thank you @ojhunt for the comments! I've addressed them, please take another look. |
ojhunt
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
| } | ||
| if (AnyAttr && libc_func_matchers::hasUnsafeFormatOrSArg( | ||
| Ctx, CE, UnsafeArg, | ||
| // FormatAttribute indexes are 1-based: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(Old man shakes fist at clouds comment, not a review comment :D)
the 1 based indexing of these attributes will never stop being infuriating to me - I assume that the intent was 0 as the return type but clearly that never seems to have ended up happening
A downstream test recovers a false negative introduced in llvm#173096, where it changed the use of variable `FmtArgIdx` to `FmtArgStartingIdx`. The two variables are different in that `FmtArgIdx` refers to the index of the format string and `FmtArgStartingIdx` refers to the index of the first format argument. The consequence is that the analysis will miss reporting an unsafe format string. This fix also upstreams the test catching the FN.
…174253) A downstream test recovers a false negative introduced in #173096, where it changed the use of variable `FmtArgIdx` to `FmtArgStartingIdx`. The two variables are different in that `FmtArgIdx` refers to the index of the format string and `FmtArgStartingIdx` refers to the index of the first format argument. The consequence is that the analysis will miss reporting an unsafe format string. This fix also upstreams the test catching the FN.
|
Sadly, our CI reports an assertion failing when building the re2c library (version 3.1): The Note that we're using libc++ and not libstdc++ in our LLVM builds and our Functional testing CI builds LLVM with assertions turned on. |
Are you able to provide a stack trace, and if possible the repro case that should have been produced on the crash? (also an out of bounds assertion from this PR must be a good definition of irony?) |
| const Expr *Fmt = Call->getArg(FmtArgIdx); | ||
| const Expr *Fmt = Call->getArg(FmtIdx); | ||
| unsigned FmtArgStartingIdx = | ||
| FmtArgIdx.has_value() ? static_cast<unsigned>(*FmtArgIdx) : FmtIdx + 1; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No bounds check on FmtIdx+1, could be a source of the sadness?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@pawosm-arm would you mind seeing if doing an return on an OoB FmtArgStartingIdx resolves the assertion failure?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@pawosm-arm would you mind seeing if doing an return on an OoB FmtArgStartingIdx resolves the assertion failure?
In the morning I guess.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can confirm that adding the following before Call->getArg(FmtIdx); makes the crash goes away.
if (FmtIdx >= Call->getNumArgs())
return false;
|
We also observed the same assertion failure. Currently running cvise to reduce it. |
It's closed for the night right now, but I can copy-paste the latest trace: |
Did the crash include the often helpful "while parsing X" comment? (I'm guessing not, but it might help speed up making the test case if it's not trivially obvious |
|
Here's the reduced code: repro command: |
Yay thanks! |
…lvm#173096) This commit adds support for functions annotated with `__attribute__((__format__(__printf__, ...)))` (or `__scanf__`). These functions will be treated the same way as printf/scanf functions in the standard C library by `-Wunsafe-buffer-usage` rdar://143233737
llvm#174253) A downstream test recovers a false negative introduced in llvm#173096, where it changed the use of variable `FmtArgIdx` to `FmtArgStartingIdx`. The two variables are different in that `FmtArgIdx` refers to the index of the format string and `FmtArgStartingIdx` refers to the index of the first format argument. The consequence is that the analysis will miss reporting an unsafe format string. This fix also upstreams the test catching the FN.
|
What's the plan for fixing it? If it will take a while, please revert it. |
|
Let me prod @ziqingluo-90 |
|
Thanks for the reporting, I'm on it. |
Isn't |
Yup, based on the test case I think the argument count tests might not be considering the We can trivially fix the assertion with a bounds check, but this makes me suspicious of the interaction of these attributes with member functions in general |
…canf functions llvm#173096" The previous PR llvm#173096 assumes that format attribute parameters always refer to valid indices of arguments. It is a wrong assumption in itself because the second attribute parameter could specify the index after the last named parameter for variadic functions and no actual arguments passed beyond named parameters. In addition, clang (possibly incorrectly) allows the following uses of the attribute: ``` void f(const char *) __attribute__((__format__ (__printf__, 1, 2))); // The second attribute argument 2 will not refer to any valid argument at any call of 'f' void g(const char *) __attribute__((__format__ (__printf__, 1, 99))); // Clang is even quiet on this, if assertions are disabled :( ```
…canf functions #173096" (#174683) The previous PR #173096 assumes that format attribute parameters always refer to valid indices of arguments. It is a wrong assumption in itself because the second attribute parameter could specify the index after the last named parameter for variadic functions and no actual arguments passed beyond named parameters. In addition, clang (possibly incorrectly) allows the following uses of the attribute: ``` void f(const char *) __attribute__((__format__ (__printf__, 1, 2))); // The second attribute argument 2 will not refer to any valid argument at any call of 'f' void g(const char *) __attribute__((__format__ (__printf__, 1, 99))); // Clang is even quiet on this, if assertions are disabled :( ```
This commit adds support for functions annotated with
__attribute__((__format__(__printf__, ...)))(or__scanf__). These functions will be treated the same way as printf/scanf functions in the standard C library by-Wunsafe-buffer-usagerdar://143233737