Skip to content

Conversation

@ziqingluo-90
Copy link
Contributor

This commit adds support for functions annotated with __attribute__((__format__(__printf__, ...))) (or __scanf__). These functions will be treated the same way as printf/scanf functions in the standard C library by -Wunsafe-buffer-usage

rdar://143233737

This commit adds the support for functions annotated with
`__attribute__((__format__(__printf__, ...)))` (or `__scanf__`).
These functions will be treated the same way as printf/scanf functions
in the standard C library by `-Wunsafe-buffer-usage`

rdar://143233737
@llvmbot llvmbot added clang Clang issues not falling into any other category clang:analysis labels Dec 19, 2025
@llvmbot
Copy link
Member

llvmbot commented Dec 19, 2025

@llvm/pr-subscribers-clang-analysis

@llvm/pr-subscribers-clang

Author: Ziqing Luo (ziqingluo-90)

Changes

This commit adds support for functions annotated with __attribute__((__format__(__printf__, ...))) (or __scanf__). These functions will be treated the same way as printf/scanf functions in the standard C library by -Wunsafe-buffer-usage

rdar://143233737


Full diff: https://github.com/llvm/llvm-project/pull/173096.diff

3 Files Affected:

  • (modified) clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def (+1)
  • (modified) clang/lib/Analysis/UnsafeBufferUsage.cpp (+100-15)
  • (modified) clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp (+28-4)
diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
index fae5f8b8aa8e3..f9bba5d54e9c7 100644
--- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
+++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
@@ -40,6 +40,7 @@ WARNING_GADGET(UnsafeBufferUsageCtorAttr)
 WARNING_GADGET(DataInvocation)
 WARNING_GADGET(UniquePtrArrayAccess)
 WARNING_OPTIONAL_GADGET(UnsafeLibcFunctionCall)
+WARNING_OPTIONAL_GADGET(UnsafeFormatAttributedFunctionCall)
 WARNING_OPTIONAL_GADGET(SpanTwoParamConstructor) // Uses of `std::span(arg0, arg1)`
 FIXABLE_GADGET(ULCArraySubscript)          // `DRE[any]` in an Unspecified Lvalue Context
 FIXABLE_GADGET(DerefSimplePtrArithFixable)
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 7ef20726d0ab9..7c21ec86af544 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -825,9 +825,11 @@ struct LibcFunNamePrefixSuffixParser {
 //
 // `UnsafeArg` is the output argument that will be set only if this function
 // returns true.
-static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg,
-                                  const unsigned FmtArgIdx, ASTContext &Ctx,
-                                  bool isKprintf = false) {
+// `FmtArgIdx` is insignificant if its value is negative, meaning that format
+// arguments start at `FmtIdx` + 1.
+static bool hasUnsafeFormatOrSArg(ASTContext &Ctx, const CallExpr *Call,
+                                  const Expr *&UnsafeArg, const unsigned FmtIdx,
+                                  int FmtArgIdx = -1, bool isKprintf = false) {
   class StringFormatStringHandler
       : public analyze_format_string::FormatStringHandler {
     const CallExpr *Call;
@@ -850,8 +852,8 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg,
       unsigned PArgIdx = -1;
 
       if (Precision.hasDataArgument())
-        PArgIdx = Precision.getPositionalArgIndex() + FmtArgIdx;
-      if (0 < PArgIdx && PArgIdx < Call->getNumArgs()) {
+        PArgIdx = Precision.getArgIndex() + FmtArgIdx;
+      if (0 <= PArgIdx && PArgIdx < Call->getNumArgs()) {
         const Expr *PArg = Call->getArg(PArgIdx);
 
         // Strip the cast if `PArg` is a cast-to-int expression:
@@ -886,9 +888,9 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg,
           analyze_printf::PrintfConversionSpecifier::sArg)
         return true; // continue parsing
 
-      unsigned ArgIdx = FS.getPositionalArgIndex() + FmtArgIdx;
+      unsigned ArgIdx = FS.getArgIndex() + FmtArgIdx;
 
-      if (!(0 < ArgIdx && ArgIdx < Call->getNumArgs()))
+      if (!(0 <= ArgIdx && ArgIdx < Call->getNumArgs()))
         // If the `ArgIdx` is invalid, give up.
         return true; // continue parsing
 
@@ -921,12 +923,15 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg,
     bool isUnsafeArgSet() { return UnsafeArgSet; }
   };
 
-  const Expr *Fmt = Call->getArg(FmtArgIdx);
+  const Expr *Fmt = Call->getArg(FmtIdx);
+  unsigned FmtArgStartingIdx =
+      FmtArgIdx < 0 ? FmtIdx + 1 : static_cast<unsigned>(FmtArgIdx);
 
   if (auto *SL = dyn_cast<clang::StringLiteral>(Fmt->IgnoreParenImpCasts())) {
     if (SL->getCharByteWidth() == 1) {
       StringRef FmtStr = SL->getString();
-      StringFormatStringHandler Handler(Call, FmtArgIdx, UnsafeArg, Ctx);
+      StringFormatStringHandler Handler(Call, FmtArgStartingIdx, UnsafeArg,
+                                        Ctx);
 
       return analyze_format_string::ParsePrintfString(
                  Handler, FmtStr.begin(), FmtStr.end(), Ctx.getLangOpts(),
@@ -935,7 +940,8 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg,
     }
 
     if (auto FmtStr = SL->tryEvaluateString(Ctx)) {
-      StringFormatStringHandler Handler(Call, FmtArgIdx, UnsafeArg, Ctx);
+      StringFormatStringHandler Handler(Call, FmtArgStartingIdx, UnsafeArg,
+                                        Ctx);
       return analyze_format_string::ParsePrintfString(
                  Handler, FmtStr->data(), FmtStr->data() + FmtStr->size(),
                  Ctx.getLangOpts(), Ctx.getTargetInfo(), isKprintf) &&
@@ -946,7 +952,7 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg,
   // In this case, this call is considered unsafe if at least one argument
   // (including the format argument) is unsafe pointer.
   return llvm::any_of(
-      llvm::make_range(Call->arg_begin() + FmtArgIdx, Call->arg_end()),
+      llvm::make_range(Call->arg_begin() + FmtArgStartingIdx, Call->arg_end()),
       [&UnsafeArg, &Ctx](const Expr *Arg) -> bool {
         if (Arg->getType()->isPointerType() && !isNullTermPointer(Arg, Ctx)) {
           UnsafeArg = Arg;
@@ -1161,7 +1167,7 @@ static bool hasUnsafePrintfStringArg(const CallExpr &Node, ASTContext &Ctx,
     // It is a fprintf:
     const Expr *UnsafeArg;
 
-    if (hasUnsafeFormatOrSArg(&Node, UnsafeArg, 1, Ctx, false)) {
+    if (hasUnsafeFormatOrSArg(Ctx, &Node, UnsafeArg, 1)) {
       Result.addNode(Tag, DynTypedNode::create(*UnsafeArg));
       return true;
     }
@@ -1175,7 +1181,7 @@ static bool hasUnsafePrintfStringArg(const CallExpr &Node, ASTContext &Ctx,
 
     if (auto *II = FD->getIdentifier())
       isKprintf = II->getName() == "kprintf";
-    if (hasUnsafeFormatOrSArg(&Node, UnsafeArg, 0, Ctx, isKprintf)) {
+    if (hasUnsafeFormatOrSArg(Ctx, &Node, UnsafeArg, 0, -1, isKprintf)) {
       Result.addNode(Tag, DynTypedNode::create(*UnsafeArg));
       return true;
     }
@@ -1190,7 +1196,7 @@ static bool hasUnsafePrintfStringArg(const CallExpr &Node, ASTContext &Ctx,
       // second is an integer, it is a snprintf:
       const Expr *UnsafeArg;
 
-      if (hasUnsafeFormatOrSArg(&Node, UnsafeArg, 2, Ctx, false)) {
+      if (hasUnsafeFormatOrSArg(Ctx, &Node, UnsafeArg, 2)) {
         Result.addNode(Tag, DynTypedNode::create(*UnsafeArg));
         return true;
       }
@@ -2068,6 +2074,7 @@ class UnsafeLibcFunctionCallGadget : public WarningGadget {
   constexpr static const char *const UnsafeVaListTag =
       "UnsafeLibcFunctionCall_va_list";
 
+public:
   enum UnsafeKind {
     OTHERS = 0,  // no specific information, the callee function is unsafe
     SPRINTF = 1, // never call `-sprintf`s, call `-snprintf`s instead.
@@ -2080,7 +2087,6 @@ class UnsafeLibcFunctionCallGadget : public WarningGadget {
                  // considered unsafe as it is not compile-time check
   } WarnedFunKind = OTHERS;
 
-public:
   UnsafeLibcFunctionCallGadget(const MatchResult &Result)
       : WarningGadget(Kind::UnsafeLibcFunctionCall),
         Call(Result.getNodeAs<CallExpr>(Tag)) {
@@ -2171,6 +2177,85 @@ class UnsafeLibcFunctionCallGadget : public WarningGadget {
   SmallVector<const Expr *, 1> getUnsafePtrs() const override { return {}; }
 };
 
+class UnsafeFormatAttributedFunctionCallGadget : public WarningGadget {
+  const CallExpr *const Call;
+  const Expr *UnsafeArg = nullptr;
+  constexpr static const char *const Tag = "UnsafeFormatAttributedFunctionCall";
+  constexpr static const char *const UnsafeStringTag =
+      "UnsafeFormatAttributedFunctionCall_string";
+
+public:
+  UnsafeFormatAttributedFunctionCallGadget(const MatchResult &Result)
+      : WarningGadget(Kind::UnsafeLibcFunctionCall),
+        Call(Result.getNodeAs<CallExpr>(Tag)),
+        UnsafeArg(Result.getNodeAs<Expr>(UnsafeStringTag)) {}
+
+  static bool matches(const Stmt *S, ASTContext &Ctx,
+                      const UnsafeBufferUsageHandler *Handler,
+                      MatchResult &Result) {
+    if (ignoreUnsafeLibcCall(Ctx, *S, Handler))
+      return false;
+    auto *CE = dyn_cast<CallExpr>(S);
+    if (!CE || !CE->getDirectCallee())
+      return false;
+    const auto *FD = dyn_cast<FunctionDecl>(CE->getDirectCallee());
+    if (!FD)
+      return false;
+
+    const FormatAttr *Attr = nullptr;
+    bool IsPrintf = false;
+    bool AnyAttr = llvm::any_of(
+        FD->specific_attrs<FormatAttr>(),
+        [&Attr, &IsPrintf](const FormatAttr *FA) -> bool {
+          if (const auto *II = FA->getType()) {
+            if (II->getName() == "printf" || II->getName() == "scanf") {
+              Attr = FA;
+              IsPrintf = II->getName() == "printf";
+              return true;
+            }
+          }
+          return false;
+        });
+    const Expr *UnsafeArg;
+
+    if (AnyAttr && !IsPrintf &&
+        (CE->getNumArgs() >= static_cast<unsigned>(Attr->getFirstArg()))) {
+      // for scanf-like functions, any format argument is considered unsafe:
+      Result.addNode(Tag, DynTypedNode::create(*CE));
+      return true;
+    }
+    if (AnyAttr && libc_func_matchers::hasUnsafeFormatOrSArg(
+                       Ctx, CE, UnsafeArg,
+                       // FormatAttribute indexes are 1-based:
+                       Attr->getFormatIdx() - 1, Attr->getFirstArg() - 1)) {
+      Result.addNode(Tag, DynTypedNode::create(*CE));
+      Result.addNode(UnsafeStringTag, DynTypedNode::create(*UnsafeArg));
+      return true;
+    }
+    return false;
+  }
+
+  const Stmt *getBaseStmt() const { return Call; }
+
+  SourceLocation getSourceLoc() const override { return Call->getBeginLoc(); }
+
+  void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
+                             bool IsRelatedToDecl,
+                             ASTContext &Ctx) const override {
+    if (UnsafeArg)
+      Handler.handleUnsafeLibcCall(
+          Call, UnsafeLibcFunctionCallGadget::UnsafeKind::STRING, Ctx,
+          UnsafeArg);
+    else
+      Handler.handleUnsafeLibcCall(
+          Call, UnsafeLibcFunctionCallGadget::UnsafeKind::OTHERS, Ctx);
+  }
+
+  DeclUseList getClaimedVarUseSites() const override { return {}; }
+
+  SmallVector<const Expr *, 1> getUnsafePtrs() const override { return {}; }
+};
+
 // Represents expressions of the form `DRE[*]` in the Unspecified Lvalue
 // Context (see `findStmtsInUnspecifiedLvalueContext`).
 // Note here `[]` is the built-in subscript operator.
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
index 4f1af79609223..8df65ebc2eaf0 100644
--- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
@@ -1,10 +1,10 @@
-// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage \
+// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage -Wno-gcc-compat\
 // RUN:            -verify %s
-// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage \
+// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage -Wno-gcc-compat\
 // RUN:            -verify %s -x objective-c++
-// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage-in-libc-call \
+// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage-in-libc-call -Wno-gcc-compat\
 // RUN:            -verify %s
-// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage-in-libc-call \
+// RUN: %clang_cc1 -std=c++20 -Wno-all -Wunsafe-buffer-usage-in-libc-call -Wno-gcc-compat\
 // RUN:            -verify %s -DTEST_STD_NS
 
 typedef struct {} FILE;
@@ -255,3 +255,27 @@ void dontCrashForInvalidFormatString() {
   snprintf((char*)0, 0, "%");
   snprintf((char*)0, 0, "\0");
 }
+
+
+// Also warn about unsafe printf/scanf-like functions:
+void myprintf(const char *F, ...) __attribute__((__format__ (__printf__, 1, 2)));
+void myprintf_2(const char *F, int irrelevant, const char *Str) __attribute__((__format__ (__printf__, 1, 3)));
+void myscanf(const char *F, ...) __attribute__((__format__ (__scanf__, 1, 2)));
+
+void test_myprintf(char * Str, std::string StdStr) {
+  myprintf("hello", Str);
+  myprintf("hello %s", StdStr.c_str());
+  myprintf("hello %s", Str);  // expected-warning{{function 'myprintf' is unsafe}} \
+			         expected-note{{string argument is not guaranteed to be null-terminated}}
+
+  myprintf_2("hello", 0, Str);
+  myprintf_2("hello %s", 0, StdStr.c_str());
+  myprintf_2("hello %s", 0, Str);  // expected-warning{{function 'myprintf_2' is unsafe}} \
+			              expected-note{{string argument is not guaranteed to be null-terminated}}
+  myscanf("hello %s");
+  myscanf("hello %s", Str); // expected-warning{{function 'myscanf' is unsafe}}
+
+  int X;
+
+  myscanf("hello %d", &X); // expected-warning{{function 'myscanf' is unsafe}}
+}

@ziqingluo-90
Copy link
Contributor Author

CC @dtarditi

Copy link
Contributor

@ojhunt ojhunt left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are all just style nits, and a request for some additional tests



// Also warn about unsafe printf/scanf-like functions:
void myprintf(const char *F, ...) __attribute__((__format__ (__printf__, 1, 2)));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could we add a few tests where the format string isn't the first arg? and also something where the format string comes equal to and after the first arg index? (only as a "do something sane/don't crash" test, not because anyone should ever ever do this :D

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It will be a compilation error when the format string does not come before the first argument. There are existing tests for such cases:
https://github.com/llvm/llvm-project/blob/38cdadd9c74509be636e41778043e4cd270be04b/clang/test/Sema/attr-format.c#L1C1-L14C1

I will sure add the case where the format string is not the first argument.

@ziqingluo-90
Copy link
Contributor Author

Thank you @ojhunt for the comments! I've addressed them, please take another look.

Copy link
Contributor

@ojhunt ojhunt left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

}
if (AnyAttr && libc_func_matchers::hasUnsafeFormatOrSArg(
Ctx, CE, UnsafeArg,
// FormatAttribute indexes are 1-based:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Old man shakes fist at clouds comment, not a review comment :D)

the 1 based indexing of these attributes will never stop being infuriating to me - I assume that the intent was 0 as the return type but clearly that never seems to have ended up happening

@ziqingluo-90 ziqingluo-90 merged commit 81b4664 into llvm:main Jan 2, 2026
10 checks passed
@ziqingluo-90 ziqingluo-90 deleted the eng/PR-143233737 branch January 2, 2026 19:10
ziqingluo-90 added a commit to ziqingluo-90/apple-llvm-project that referenced this pull request Jan 2, 2026
A downstream test recovers a false negative introduced in llvm#173096,
where it changed the use of variable `FmtArgIdx` to
`FmtArgStartingIdx`.  The two variables are different in that
`FmtArgIdx` refers to the index of the format string and
`FmtArgStartingIdx` refers to the index of the first format
argument. The consequence is that the analysis will miss reporting an
unsafe format string.

This fix also upstreams the test catching the FN.
ziqingluo-90 added a commit that referenced this pull request Jan 3, 2026
…174253)

A downstream test recovers a false negative introduced in #173096, where
it changed the use of variable `FmtArgIdx` to `FmtArgStartingIdx`. The
two variables are different in that `FmtArgIdx` refers to the index of
the format string and `FmtArgStartingIdx` refers to the index of the
first format argument. The consequence is that the analysis will miss
reporting an unsafe format string.

This fix also upstreams the test catching the FN.
@pawosm-arm
Copy link
Contributor

pawosm-arm commented Jan 5, 2026

Sadly, our CI reports an assertion failing when building the re2c library (version 3.1):

04:23:47               const Expr *clang::CallExpr::getArg(unsigned int) const: Assertion `
04:23:47               Arg < getNumArgs() && "Arg access out of range!"' failed.

The git bisect undeniable points at this PR:

81b46646fb5eb34559ef1e31d0ee83a69c18a301 is the first bad commit
commit 81b46646fb5eb34559ef1e31d0ee83a69c18a301
Author: Ziqing Luo <ziqing_luo@apple.com>
Date:   Fri Jan 2 11:10:31 2026 -0800

    [-Wunsafe-buffer-usage] Add check for custom printf/scanf functions (#173096)

    This commit adds support for functions annotated with
    `__attribute__((__format__(__printf__, ...)))` (or `__scanf__`). These
    functions will be treated the same way as printf/scanf functions in the
    standard C library by `-Wunsafe-buffer-usage`

    rdar://143233737

 .../Analysis/Analyses/UnsafeBufferUsageGadgets.def |   1 +
 clang/lib/Analysis/UnsafeBufferUsage.cpp           | 139 +++++++++++++++++----
 .../warn-unsafe-buffer-usage-libc-functions.cpp    |  39 +++++-
 3 files changed, 150 insertions(+), 29 deletions(-)

Note that we're using libc++ and not libstdc++ in our LLVM builds and our Functional testing CI builds LLVM with assertions turned on.

@ojhunt
Copy link
Contributor

ojhunt commented Jan 5, 2026

Sadly, our CI reports an assertion failing when building the re2c library (version 3.1):

Are you able to provide a stack trace, and if possible the repro case that should have been produced on the crash?

(also an out of bounds assertion from this PR must be a good definition of irony?)

const Expr *Fmt = Call->getArg(FmtArgIdx);
const Expr *Fmt = Call->getArg(FmtIdx);
unsigned FmtArgStartingIdx =
FmtArgIdx.has_value() ? static_cast<unsigned>(*FmtArgIdx) : FmtIdx + 1;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No bounds check on FmtIdx+1, could be a source of the sadness?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ziqingluo-90 ^

@pawosm-arm would you mind seeing if doing an return on an OoB FmtArgStartingIdx resolves the assertion failure?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pawosm-arm would you mind seeing if doing an return on an OoB FmtArgStartingIdx resolves the assertion failure?

In the morning I guess.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can confirm that adding the following before Call->getArg(FmtIdx); makes the crash goes away.

  if (FmtIdx >= Call->getNumArgs())
    return false;

@ZequanWu
Copy link
Contributor

ZequanWu commented Jan 5, 2026

We also observed the same assertion failure. Currently running cvise to reduce it.

@pawosm-arm
Copy link
Contributor

Are you able to provide a stack trace, and if possible the repro case that should have been produced on the crash?

It's closed for the night right now, but I can copy-paste the latest trace:

armclang++: clang/include/clang/AST/Expr.h:3152: const Expr *clang::CallExpr::getArg(unsigned int) const: Assertion `Arg < getNumArgs() && "Arg access out of range!"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace, preprocessed source, and associated run script.
Stack dump:
0.      Program arguments: arm-software/linux/build/atfl/bin/armclang++ -DHAVE_CONFIG_H -I. -I.. -std=c++11 -W -Wall -Wextra -Weffc++ -pedantic -Wformat=2 -Wredundant-decls -Wsuggest-attribute=format -Wconversion -Wsign-conversion -Wold-style-cast -Werror=return-type -O2 -Weverything -Wno-unknown-warning-option -Wno-reserved-id-macro -Wno-padded -Wno-nested-anon-types -Wno-reserved-identifier -Wno-shadow-field-in-constructor -Wno-undefined-func-template -Wno-c++98-compat -Wno-c++98-compat-pedantic -DRE2C_STDLIB_DIR=\"re2c/share/re2c/stdlib/\" -g -O2 -MT src/adfa/adfa.o -MD -MP -MF src/adfa/.deps/adfa.Tpo -c -o src/adfa/adfa.o ../src/adfa/adfa.cc
1.      <eof> parser at end of file
 #0 0x0000be115b06ac88 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (arm-software/linux/build/atfl/bin/armclang+++0x421ac88)
 #1 0x0000be115b068814 llvm::sys::RunSignalHandlers() (arm-software/linux/build/atfl/bin/armclang+++0x4218814)
 #2 0x0000be115afdc37c CrashRecoverySignalHandler(int) CrashRecoveryContext.cpp:0:0
 #3 0x000051981f5aa8f8 (linux-vdso.so.1+0x8f8)
 #4 0x000051981f8c2008 __pthread_kill_implementation ./nptl/pthread_kill.c:44:76
 #5 0x000051981f87a83c gsignal ./signal/../sysdeps/posix/raise.c:27:6
 #6 0x000051981f867134 abort ./stdlib/abort.c:81:7
 #7 0x000051981f874114 __assert_fail_base ./assert/assert.c:91:7
 #8 0x000051981f87418c (/lib/aarch64-linux-gnu/libc.so.6+0x3418c)
 #9 0x0000be115dbd13c0 libc_func_matchers::hasUnsafeFormatOrSArg(clang::ASTContext&, clang::CallExpr const*, clang::Expr const*&, unsigned int, std::__1::optional<unsigned int const>, bool) UnsafeBufferUsage.cpp:0:0
#10 0x0000be115dbccde4 WarningGadgetMatcher::matches(clang::DynTypedNode const&, clang::ASTContext&, clang::UnsafeBufferUsageHandler const&) UnsafeBufferUsage.cpp:0:0
#11 0x0000be115dbd1ce0 MatchDescendantVisitor::TraverseStmt(clang::Stmt*) UnsafeBufferUsage.cpp:0:0
#12 0x0000be115df1c050 clang::DynamicRecursiveASTVisitorBase<false>::TraverseCompoundStmt(clang::CompoundStmt*) (arm-software/linux/build/atfl/bin/armclang+++0x70cc050)
#13 0x0000be115def3984 clang::RecursiveASTVisitor<(anonymous namespace)::Impl<false>>::TraverseStmt(clang::Stmt*, llvm::SmallVectorImpl<llvm::PointerIntPair<clang::Stmt*, 1u, bool, llvm::PointerLikeTypeTraits<clang::Stmt*>, llvm::PointerIntPairInfo<clang::Stmt*, 1u, llvm::PointerLikeTypeTraits<clang::Stmt*>>>>*) DynamicRecursiveASTVisitor.cpp:0:0
#14 0x0000be115dbd1d04 MatchDescendantVisitor::TraverseStmt(clang::Stmt*) UnsafeBufferUsage.cpp:0:0
#15 0x0000be115df1b9d8 clang::DynamicRecursiveASTVisitorBase<false>::TraverseDoStmt(clang::DoStmt*) (arm-software/linux/build/atfl/bin/armclang+++0x70cb9d8)
#16 0x0000be115def3984 clang::RecursiveASTVisitor<(anonymous namespace)::Impl<false>>::TraverseStmt(clang::Stmt*, llvm::SmallVectorImpl<llvm::PointerIntPair<clang::Stmt*, 1u, bool, llvm::PointerLikeTypeTraits<clang::Stmt*>, llvm::PointerIntPairInfo<clang::Stmt*, 1u, llvm::PointerLikeTypeTraits<clang::Stmt*>>>>*) DynamicRecursiveASTVisitor.cpp:0:0
#17 0x0000be115dbd1d04 MatchDescendantVisitor::TraverseStmt(clang::Stmt*) UnsafeBufferUsage.cpp:0:0
#18 0x0000be115df1c050 clang::DynamicRecursiveASTVisitorBase<false>::TraverseCompoundStmt(clang::CompoundStmt*) (arm-software/linux/build/atfl/bin/armclang+++0x70cc050)
#19 0x0000be115def3984 clang::RecursiveASTVisitor<(anonymous namespace)::Impl<false>>::TraverseStmt(clang::Stmt*, llvm::SmallVectorImpl<llvm::PointerIntPair<clang::Stmt*, 1u, bool, llvm::PointerLikeTypeTraits<clang::Stmt*>, llvm::PointerIntPairInfo<clang::Stmt*, 1u, llvm::PointerLikeTypeTraits<clang::Stmt*>>>>*) DynamicRecursiveASTVisitor.cpp:0:0
#20 0x0000be115dbd1d04 MatchDescendantVisitor::TraverseStmt(clang::Stmt*) UnsafeBufferUsage.cpp:0:0
#21 0x0000be115df1b624 clang::DynamicRecursiveASTVisitorBase<false>::TraverseIfStmt(clang::IfStmt*) (arm-software/linux/build/atfl/bin/armclang+++0x70cb624)
#22 0x0000be115def3984 clang::RecursiveASTVisitor<(anonymous namespace)::Impl<false>>::TraverseStmt(clang::Stmt*, llvm::SmallVectorImpl<llvm::PointerIntPair<clang::Stmt*, 1u, bool, llvm::PointerLikeTypeTraits<clang::Stmt*>, llvm::PointerIntPairInfo<clang::Stmt*, 1u, llvm::PointerLikeTypeTraits<clang::Stmt*>>>>*) DynamicRecursiveASTVisitor.cpp:0:0
#23 0x0000be115dbd1d04 MatchDescendantVisitor::TraverseStmt(clang::Stmt*) UnsafeBufferUsage.cpp:0:0
#24 0x0000be115df1c050 clang::DynamicRecursiveASTVisitorBase<false>::TraverseCompoundStmt(clang::CompoundStmt*) (arm-software/linux/build/atfl/bin/armclang+++0x70cc050)
#25 0x0000be115def3984 clang::RecursiveASTVisitor<(anonymous namespace)::Impl<false>>::TraverseStmt(clang::Stmt*, llvm::SmallVectorImpl<llvm::PointerIntPair<clang::Stmt*, 1u, bool, llvm::PointerLikeTypeTraits<clang::Stmt*>, llvm::PointerIntPairInfo<clang::Stmt*, 1u, llvm::PointerLikeTypeTraits<clang::Stmt*>>>>*) DynamicRecursiveASTVisitor.cpp:0:0
#26 0x0000be115dbc3d00 findGadgets(clang::Stmt const*, clang::ASTContext&, clang::UnsafeBufferUsageHandler const&, bool, std::__1::vector<std::__1::unique_ptr<(anonymous namespace)::FixableGadget, std::__1::default_delete<(anonymous namespace)::FixableGadget>>, std::__1::allocator<std::__1::unique_ptr<(anonymous namespace)::FixableGadget, std::__1::default_delete<(anonymous namespace)::FixableGadget>>>>&, std::__1::vector<std::__1::unique_ptr<(anonymous namespace)::WarningGadget, std::__1::default_delete<(anonymous namespace)::WarningGadget>>, std::__1::allocator<std::__1::unique_ptr<(anonymous namespace)::WarningGadget, std::__1::default_delete<(anonymous namespace)::WarningGadget>>>>&, (anonymous namespace)::DeclUseTracker&) UnsafeBufferUsage.cpp:0:0
#27 0x0000be115dbc56a8 clang::checkUnsafeBufferUsage(clang::Decl const*, clang::UnsafeBufferUsageHandler&, bool) (arm-software/linux/build/atfl/bin/armclang+++0x6d756a8)
#28 0x0000be115d26438c CallableVisitor::VisitFunctionDecl(clang::FunctionDecl*) AnalysisBasedWarnings.cpp:0:0
#29 0x0000be115def9fcc clang::DynamicRecursiveASTVisitorBase<false>::TraverseCXXMethodDecl(clang::CXXMethodDecl*) (arm-software/linux/build/atfl/bin/armclang+++0x70a9fcc)
#30 0x0000be115def2da0 clang::DynamicRecursiveASTVisitorBase<false>::TraverseDecl(clang::Decl*) (arm-software/linux/build/atfl/bin/armclang+++0x70a2da0)
#31 0x0000be115df013d8 clang::DynamicRecursiveASTVisitorBase<false>::TraverseNamespaceDecl(clang::NamespaceDecl*) (arm-software/linux/build/atfl/bin/armclang+++0x70b13d8)
#32 0x0000be115def2cc4 clang::DynamicRecursiveASTVisitorBase<false>::TraverseDecl(clang::Decl*) (arm-software/linux/build/atfl/bin/armclang+++0x70a2cc4)
#33 0x0000be115def5704 clang::DynamicRecursiveASTVisitorBase<false>::TraverseTranslationUnitDecl(clang::TranslationUnitDecl*) (arm-software/linux/build/atfl/bin/armclang+++0x70a5704)
#34 0x0000be115d25d0f8 clang::sema::AnalysisBasedWarnings::IssueWarnings(clang::TranslationUnitDecl*) (arm-software/linux/build/atfl/bin/armclang+++0x640d0f8)
#35 0x0000be115d24b5d8 clang::Sema::ActOnEndOfTranslationUnit() (arm-software/linux/build/atfl/bin/armclang+++0x63fb5d8)
#36 0x0000be115d102b04 clang::Parser::ParseTopLevelDecl(clang::OpaquePtr<clang::DeclGroupRef>&, clang::Sema::ModuleImportState&) (arm-software/linux/build/atfl/bin/armclang+++0x62b2b04)
#37 0x0000be115d0f7a88 clang::ParseAST(clang::Sema&, bool, bool) (arm-software/linux/build/atfl/bin/armclang+++0x62a7a88)
#38 0x0000be115bc2b044 clang::FrontendAction::Execute() (arm-software/linux/build/atfl/bin/armclang+++0x4ddb044)
#39 0x0000be115bbb559c clang::CompilerInstance::ExecuteAction(clang::FrontendAction&) (arm-software/linux/build/atfl/bin/armclang+++0x4d6559c)
#40 0x0000be115bd0ec44 clang::ExecuteCompilerInvocation(clang::CompilerInstance*) (arm-software/linux/build/atfl/bin/armclang+++0x4ebec44)
#41 0x0000be1159d5b040 cc1_main(llvm::ArrayRef<char const*>, char const*, void*) (arm-software/linux/build/atfl/bin/armclang+++0x2f0b040)
#42 0x0000be1159d58df4 ExecuteCC1Tool(llvm::SmallVectorImpl<char const*>&, llvm::ToolContext const&, llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem>) driver.cpp:0:0
#43 0x0000be1159d5a948 int llvm::function_ref<int (llvm::SmallVectorImpl<char const*>&)>::callback_fn<clang_main(int, char**, llvm::ToolContext const&)::$_0>(long, llvm::SmallVectorImpl<char const*>&) driver.cpp:0:0
#44 0x0000be115ba57540 void llvm::function_ref<void ()>::callback_fn<clang::driver::CC1Command::Execute(llvm::ArrayRef<std::__1::optional<llvm::StringRef>>, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>*, bool*) const::$_0>(long) Job.cpp:0:0
#45 0x0000be115afdbfe8 llvm::CrashRecoveryContext::RunSafely(llvm::function_ref<void ()>) (arm-software/linux/build/atfl/bin/armclang+++0x418bfe8)
#46 0x0000be115ba56920 clang::driver::CC1Command::Execute(llvm::ArrayRef<std::__1::optional<llvm::StringRef>>, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>*, bool*) const (arm-software/linux/build/atfl/bin/armclang+++0x4c06920)
#47 0x0000be115ba1e81c clang::driver::Compilation::ExecuteCommand(clang::driver::Command const&, clang::driver::Command const*&, bool) const (arm-software/linux/build/atfl/bin/armclang+++0x4bce81c)
#48 0x0000be115ba1ea38 clang::driver::Compilation::ExecuteJobs(clang::driver::JobList const&, llvm::SmallVectorImpl<std::__1::pair<int, clang::driver::Command const*>>&, bool) const (arm-software/linux/build/atfl/bin/armclang+++0x4bcea38)
#49 0x0000be115ba36f74 clang::driver::Driver::ExecuteCompilation(clang::driver::Compilation&, llvm::SmallVectorImpl<std::__1::pair<int, clang::driver::Command const*>>&) (arm-software/linux/build/atfl/bin/armclang+++0x4be6f74)
#50 0x0000be1159d58090 clang_main(int, char**, llvm::ToolContext const&) (arm-software/linux/build/atfl/bin/armclang+++0x2f08090)
#51 0x0000be1159d656ac main (arm-software/linux/build/atfl/bin/armclang+++0x2f156ac)
#52 0x000051981f867400 __libc_start_call_main ./csu/../sysdeps/nptl/libc_start_call_main.h:74:3
#53 0x000051981f8674d8 call_init ./csu/../csu/libc-start.c:128:20
#54 0x000051981f8674d8 __libc_start_main ./csu/../csu/libc-start.c:379:5
armclang++: error: clang frontend command failed with exit code 134 (use -v to see invocation)
Arm Toolchain for Linux 0.0 clang version 22.0.0 (65dbee00898417b09d6bd40d67c129ee7b0de2fc)

@ojhunt
Copy link
Contributor

ojhunt commented Jan 5, 2026

We also observed the same assertion failure. Currently running cvise to reduce it.

Did the crash include the often helpful "while parsing X" comment? (I'm guessing not, but it might help speed up making the test case if it's not trivially obvious
)

@ZequanWu
Copy link
Contributor

ZequanWu commented Jan 5, 2026

Here's the reduced code:

enum errc { not_a_directory };
namespace detail {
struct ErrorHandler {
  ErrorHandler(char *, int *, int *, int *);
  __attribute__((__format__(__printf__, 3, 4))) void report(errc, char *);
};
} // namespace detail
using detail::ErrorHandler;
int __create_directory_ec, __create_directory_p, __create_directory_attributes;
void __create_directory() {
  ErrorHandler err("", &__create_directory_ec, &__create_directory_p,
                   &__create_directory_attributes);
  err.report(not_a_directory, "");
}

repro command: clang++ -Wunsafe-buffer-usage crash.cpp

@ojhunt
Copy link
Contributor

ojhunt commented Jan 5, 2026

Here's the reduced code:

Yay thanks!

mahesh-attarde pushed a commit to mahesh-attarde/llvm-project that referenced this pull request Jan 6, 2026
…lvm#173096)

This commit adds support for functions annotated with
`__attribute__((__format__(__printf__, ...)))` (or `__scanf__`). These
functions will be treated the same way as printf/scanf functions in the
standard C library by `-Wunsafe-buffer-usage`

rdar://143233737
mahesh-attarde pushed a commit to mahesh-attarde/llvm-project that referenced this pull request Jan 6, 2026
llvm#174253)

A downstream test recovers a false negative introduced in llvm#173096, where
it changed the use of variable `FmtArgIdx` to `FmtArgStartingIdx`. The
two variables are different in that `FmtArgIdx` refers to the index of
the format string and `FmtArgStartingIdx` refers to the index of the
first format argument. The consequence is that the analysis will miss
reporting an unsafe format string.

This fix also upstreams the test catching the FN.
@ZequanWu
Copy link
Contributor

ZequanWu commented Jan 6, 2026

What's the plan for fixing it? If it will take a while, please revert it.

@ojhunt
Copy link
Contributor

ojhunt commented Jan 6, 2026

Let me prod @ziqingluo-90

@ziqingluo-90
Copy link
Contributor Author

Thanks for the reporting, I'm on it.

@ziqingluo-90
Copy link
Contributor Author

Here's the reduced code:

enum errc { not_a_directory };
namespace detail {
struct ErrorHandler {
  ErrorHandler(char *, int *, int *, int *);
  __attribute__((__format__(__printf__, 3, 4))) void report(errc, char *);
};
} // namespace detail
using detail::ErrorHandler;
int __create_directory_ec, __create_directory_p, __create_directory_attributes;
void __create_directory() {
  ErrorHandler err("", &__create_directory_ec, &__create_directory_p,
                   &__create_directory_attributes);
  err.report(not_a_directory, "");
}

repro command: clang++ -Wunsafe-buffer-usage crash.cpp

Isn't __attribute__((__format__(__printf__, 3, 4))) void report(errc, char *); a compilation error by default?

@ojhunt
Copy link
Contributor

ojhunt commented Jan 6, 2026

Here's the reduced code:

enum errc { not_a_directory };
namespace detail {
struct ErrorHandler {
  ErrorHandler(char *, int *, int *, int *);
  __attribute__((__format__(__printf__, 3, 4))) void report(errc, char *);
};
} // namespace detail
using detail::ErrorHandler;
int __create_directory_ec, __create_directory_p, __create_directory_attributes;
void __create_directory() {
  ErrorHandler err("", &__create_directory_ec, &__create_directory_p,
                   &__create_directory_attributes);
  err.report(not_a_directory, "");
}

repro command: clang++ -Wunsafe-buffer-usage crash.cpp

Isn't __attribute__((__format__(__printf__, 3, 4))) void report(errc, char *); a compilation error by default?

Yup, based on the test case I think the argument count tests might not be considering the this parameter.

We can trivially fix the assertion with a bounds check, but this makes me suspicious of the interaction of these attributes with member functions in general

ziqingluo-90 added a commit to ziqingluo-90/apple-llvm-project that referenced this pull request Jan 7, 2026
…canf functions llvm#173096"

The previous PR llvm#173096 assumes that format attribute parameters
always refer to valid indices of arguments.  It is a wrong assumption
in itself because the second attribute parameter could specify the
index after the last named parameter for variadic functions and no
actual arguments passed beyond named parameters.  In addition, clang
(possibly incorrectly) allows the following uses of the attribute:

```
void f(const char *) __attribute__((__format__ (__printf__, 1, 2))); // The second attribute argument 2 will not refer to any valid argument at any call of 'f'

void g(const char *) __attribute__((__format__ (__printf__, 1, 99))); // Clang is even quiet on this, if assertions are disabled :(
```
ziqingluo-90 added a commit that referenced this pull request Jan 8, 2026
…canf functions #173096" (#174683)

The previous PR #173096 assumes that format attribute parameters always
refer to valid indices of arguments. It is a wrong assumption in itself
because the second attribute parameter could specify the index after the
last named parameter for variadic functions and no actual arguments
passed beyond named parameters. In addition, clang (possibly
incorrectly) allows the following uses of the attribute:

```
void f(const char *) __attribute__((__format__ (__printf__, 1, 2))); // The second attribute argument 2 will not refer to any valid argument at any call of 'f'

void g(const char *) __attribute__((__format__ (__printf__, 1, 99))); // Clang is even quiet on this, if assertions are disabled :(
```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

clang:analysis clang Clang issues not falling into any other category

Projects

None yet

Development

Successfully merging this pull request may close these issues.

5 participants