diff --git a/.clang-tidy b/.clang-tidy index 54f04105c7..4b361e8559 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -17,6 +17,8 @@ Checks: '-*, -readability-convert-member-functions-to-static, -readability-isolate-declaration, -readability-identifier-length, + -readability-redundant-member-init, + -readability-use-anyofallof, cppcoreguidelines-*, -cppcoreguidelines-avoid-non-const-global-variables, -cppcoreguidelines-pro-bounds-array-to-pointer-decay, diff --git a/CMakeLists.txt b/CMakeLists.txt index e27c186974..68f2dc8380 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -71,7 +71,7 @@ set(RELEASE_CONFIGURATIONS RELWITHDEBINFO RELEASE CACHE INTERNAL "" FORCE) # https://reviews.llvm.org/D157613 string(APPEND CMAKE_CXX_FLAGS " -MP -fstack-protector-strong -ffunction-sections -fdata-sections -pipe") -string(APPEND CMAKE_CXX_FLAGS_DEBUG " -Og -fno-omit-frame-pointer") +string(APPEND CMAKE_CXX_FLAGS_DEBUG " -fno-omit-frame-pointer") string(APPEND CMAKE_CXX_FLAGS_RELWITHDEBINFO " -fno-omit-frame-pointer") string(APPEND CMAKE_CXX_FLAGS_RELEASE "") diff --git a/config/double-free-config.json b/config/double-free-config.json new file mode 100644 index 0000000000..043b392288 --- /dev/null +++ b/config/double-free-config.json @@ -0,0 +1,28 @@ +{ + "name": "double-free", + "version": 1.0, + "functions": [ + { + "name": "free", + "params": { + "source": [ + 0 + ], + "sink": [ + 0 + ] + } + }, + { + "name": "_ZdlPv", + "params": { + "source": [ + 0 + ], + "sink": [ + 0 + ] + } + } + ] +} diff --git a/include/phasar/ControlFlow/CFGBase.h b/include/phasar/ControlFlow/CFGBase.h index 4abfbd9833..0ec38d5dcb 100644 --- a/include/phasar/ControlFlow/CFGBase.h +++ b/include/phasar/ControlFlow/CFGBase.h @@ -136,7 +136,7 @@ template class CFGBase { return self().getAsJsonImpl(Fun); } -private: +protected: Derived &self() noexcept { return static_cast(*this); } const Derived &self() const noexcept { return static_cast(*this); diff --git a/include/phasar/ControlFlow/SparseCFGBase.h b/include/phasar/ControlFlow/SparseCFGBase.h new file mode 100644 index 0000000000..3d5f531c26 --- /dev/null +++ b/include/phasar/ControlFlow/SparseCFGBase.h @@ -0,0 +1,43 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#ifndef PHASAR_CONTROLFLOW_SPARSECFGBASE_H +#define PHASAR_CONTROLFLOW_SPARSECFGBASE_H + +#include "phasar/ControlFlow/CFGBase.h" +#include "phasar/Utils/ByRef.h" +#include "phasar/Utils/Nullable.h" + +namespace psr { +template class SparseCFGBase : public CFGBase { +public: + using typename CFGBase::n_t; + using typename CFGBase::f_t; + + /// Gets the next instruction in control-flow order, starting from + /// FromInstruction, that may use or define Val. + /// If the next user is ambiguous, returns null. + [[nodiscard]] Nullable + nextUserOrNull(ByConstRef FromInstruction) const { + return self().nextUserOrNullImpl(FromInstruction); + } + +protected: + using CFGBase::self; +}; + +template +// NOLINTNEXTLINE(readability-identifier-naming) +constexpr bool is_sparse_cfg_v = is_crtp_base_of_v + &&std::is_same_v + &&std::is_same_v; + +} // namespace psr + +#endif // PHASAR_CONTROLFLOW_SPARSECFGBASE_H diff --git a/include/phasar/ControlFlow/SparseCFGProvider.h b/include/phasar/ControlFlow/SparseCFGProvider.h new file mode 100644 index 0000000000..1ef2882f6a --- /dev/null +++ b/include/phasar/ControlFlow/SparseCFGProvider.h @@ -0,0 +1,54 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#ifndef PHASAR_CONTROLFLOW_SPARSECFGPROVIDER_H +#define PHASAR_CONTROLFLOW_SPARSECFGPROVIDER_H + +#include "phasar/Utils/ByRef.h" + +#include + +namespace psr { +template T valueOf(T Val) { return Val; } + +template class SparseCFGProvider { +public: + using f_t = F; + using v_t = V; + + template + [[nodiscard]] decltype(auto) getSparseCFG(ByConstRef Fun, + const D &Val) const { + using psr::valueOf; + static_assert(std::is_convertible_v); + return self().getSparseCFGImpl(Fun, valueOf(Val)); + } + +private: + Derived &self() noexcept { return static_cast(*this); } + const Derived &self() const noexcept { + return static_cast(*this); + } +}; + +template +struct has_getSparseCFG : std::false_type {}; // NOLINT +template +struct has_getSparseCFG< + T, D, + std::void_t().getSparseCFG( + std::declval(), std::declval()))>> + : std::true_type {}; + +template +// NOLINTNEXTLINE +static constexpr bool has_getSparseCFG_v = has_getSparseCFG::value; +} // namespace psr + +#endif // PHASAR_CONTROLFLOW_SPARSECFGPROVIDER_H diff --git a/include/phasar/DataFlow/IfdsIde/FlowFunctions.h b/include/phasar/DataFlow/IfdsIde/FlowFunctions.h index ad41e1e10f..4537c278e2 100644 --- a/include/phasar/DataFlow/IfdsIde/FlowFunctions.h +++ b/include/phasar/DataFlow/IfdsIde/FlowFunctions.h @@ -17,6 +17,7 @@ #ifndef PHASAR_DATAFLOW_IFDSIDE_FLOWFUNCTIONS_H #define PHASAR_DATAFLOW_IFDSIDE_FLOWFUNCTIONS_H +#include "phasar/Utils/Macros.h" #include "phasar/Utils/TypeTraits.h" #include "llvm/ADT/ArrayRef.h" @@ -131,7 +132,7 @@ Container makeContainer(Range &&Rng) { Container C; reserveIfPossible(C, Rng.size()); for (auto &&Fact : Rng) { - C.insert(std::forward(Fact)); + C.insert(PSR_FWD(Fact)); } return C; } diff --git a/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h b/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h index e83086f7e8..534357f5bb 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/IDESolver.h @@ -18,6 +18,7 @@ #define PHASAR_DATAFLOW_IFDSIDE_SOLVER_IDESOLVER_H #include "phasar/Config/Configuration.h" +#include "phasar/ControlFlow/SparseCFGProvider.h" #include "phasar/DB/ProjectIRDBBase.h" #include "phasar/DataFlow/IfdsIde/EdgeFunction.h" #include "phasar/DataFlow/IfdsIde/EdgeFunctionStats.h" @@ -35,15 +36,19 @@ #include "phasar/DataFlow/IfdsIde/SolverResults.h" #include "phasar/Domain/AnalysisDomain.h" #include "phasar/Utils/Average.h" +#include "phasar/Utils/ByRef.h" #include "phasar/Utils/DOTGraph.h" #include "phasar/Utils/JoinLattice.h" #include "phasar/Utils/Logger.h" +#include "phasar/Utils/Macros.h" +#include "phasar/Utils/Nullable.h" #include "phasar/Utils/PAMMMacros.h" #include "phasar/Utils/Table.h" #include "phasar/Utils/Utilities.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/TypeName.h" #include "llvm/Support/raw_ostream.h" #include "nlohmann/json.hpp" @@ -81,14 +86,24 @@ class IDESolver using t_t = typename AnalysisDomainTy::t_t; using v_t = typename AnalysisDomainTy::v_t; + template IDESolver(IDETabulationProblem &Problem, - const i_t *ICF) - : IDEProblem(Problem), ZeroValue(Problem.getZeroValue()), ICF(ICF), + const I *ICF) + : IDEProblem(Problem), ZeroValue(Problem.getZeroValue()), + ICF(&static_cast(*ICF)), SVFG(ICF), SolverConfig(Problem.getIFDSIDESolverConfig()), CachedFlowEdgeFunctions(Problem), AllTop(Problem.allTopFunction()), JumpFn(std::make_shared>()), Seeds(Problem.initialSeeds()) { assert(ICF != nullptr); + + if constexpr (has_getSparseCFG_v) { + NextUserOrNullCB = [](const void *SVFG, ByConstRef Fun, + ByConstRef d3, ByConstRef n) { + auto &&SCFG = static_cast(SVFG)->getSparseCFG(Fun, d3); + return SCFG.nextUserOrNull(n); + }; + } } IDESolver(IDETabulationProblem *Problem, @@ -339,6 +354,15 @@ class IDESolver } protected: + Nullable getNextUserOrNull(ByConstRef Fun, ByConstRef d3, + ByConstRef n) { + if (!NextUserOrNullCB || IDEProblem.isZeroValue(d3)) { + return {}; + } + + return NextUserOrNullCB(SVFG, Fun, d3, n); + } + /// Lines 13-20 of the algorithm; processing a call site in the caller's /// context. /// @@ -382,6 +406,15 @@ class IDESolver bool HasNoCalleeInformation = true; + auto &&Fun = ICF->getFunctionOf(n); + auto GetNextUse = [this, &Fun, &n](n_t nPrime, ByConstRef d3) { + if (auto &&NextUser = getNextUserOrNull(Fun, d3, n)) { + return psr::unwrapNullable(PSR_FWD(NextUser)); + } + + return nPrime; + }; + // for each possible callee for (f_t SCalledProcN : Callees) { // still line 14 // check if a special summary for the called procedure exists @@ -409,7 +442,9 @@ class IDESolver "Queried Summary Edge Function: " << SumEdgFnE); PHASAR_LOG_LEVEL(DEBUG, "Compose: " << SumEdgFnE << " * " << f << '\n'); - WorkList.emplace_back(PathEdge(d1, ReturnSiteN, std::move(d3)), + + auto DestN = GetNextUse(ReturnSiteN, d3); + WorkList.emplace_back(PathEdge(d1, DestN, std::move(d3)), IDEProblem.extend(f, SumEdgFnE)); } } @@ -508,8 +543,10 @@ class IDESolver d_t d5_restoredCtx = restoreContextOnReturnedFact(n, d2, d5); // propagte the effects of the entire call PHASAR_LOG_LEVEL(DEBUG, "Compose: " << fPrime << " * " << f); + + auto DestN = GetNextUse(RetSiteN, d5_restoredCtx); WorkList.emplace_back( - PathEdge(d1, RetSiteN, std::move(d5_restoredCtx)), + PathEdge(d1, DestN, std::move(d5_restoredCtx)), IDEProblem.extend(f, fPrime)); } } @@ -545,7 +582,8 @@ class IDESolver auto fPrime = IDEProblem.extend(f, EdgeFnE); PHASAR_LOG_LEVEL(DEBUG, "Compose: " << EdgeFnE << " * " << f << " = " << fPrime); - WorkList.emplace_back(PathEdge(d1, ReturnSiteN, std::move(d3)), + auto DestN = GetNextUse(ReturnSiteN, d3); + WorkList.emplace_back(PathEdge(d1, DestN, std::move(d3)), std::move(fPrime)); } } @@ -563,6 +601,8 @@ class IDESolver EdgeFunction f = jumpFunction(Edge); auto [d1, n, d2] = Edge.consume(); + const auto &Fun = ICF->getFunctionOf(n); + for (const auto nPrime : ICF->getSuccsOf(n)) { FlowFunctionPtrType FlowFunc = CachedFlowEdgeFunctions.getNormalFlowFunction(n, nPrime); @@ -575,14 +615,23 @@ class IDESolver CachedFlowEdgeFunctions.getNormalEdgeFunction(n, d2, nPrime, d3); PHASAR_LOG_LEVEL(DEBUG, "Queried Normal Edge Function: " << g); EdgeFunction fPrime = IDEProblem.extend(f, g); + + auto DestN = [&, &n = n] { + if (auto &&NextUser = getNextUserOrNull(Fun, d3, n)) { + return psr::unwrapNullable(PSR_FWD(NextUser)); + } + + return nPrime; + }(); + if (SolverConfig.emitESG()) { - IntermediateEdgeFunctions[std::make_tuple(n, d2, nPrime, d3)] + IntermediateEdgeFunctions[std::make_tuple(n, d2, DestN, d3)] .push_back(g); } PHASAR_LOG_LEVEL(DEBUG, "Compose: " << g << " * " << f << " = " << fPrime); INC_COUNTER("EF Queries", 1, Full); - WorkList.emplace_back(PathEdge(d1, nPrime, std::move(d3)), + WorkList.emplace_back(PathEdge(d1, DestN, std::move(d3)), std::move(fPrime)); } } @@ -915,6 +964,7 @@ class IDESolver for (const auto &Entry : Inc) { // line 22 n_t c = Entry.first; + auto &&Fun = ICF->getFunctionOf(c); // for each return site for (n_t RetSiteC : ICF->getReturnSitesOfCallAt(c)) { // compute return-flow function @@ -968,9 +1018,19 @@ class IDESolver d_t d3 = ValAndFunc.first; d_t d5_restoredCtx = restoreContextOnReturnedFact(c, d4, d5); PHASAR_LOG_LEVEL(DEBUG, "Compose: " << fPrime << " * " << f3); - WorkList.emplace_back(PathEdge(std::move(d3), RetSiteC, - std::move(d5_restoredCtx)), - IDEProblem.extend(f3, fPrime)); + + auto DestN = [&] { + if (auto &&NextUser = + getNextUserOrNull(Fun, d5_restoredCtx, c)) { + return psr::unwrapNullable(PSR_FWD(NextUser)); + } + + return RetSiteC; + }(); + + WorkList.emplace_back( + PathEdge(std::move(d3), DestN, std::move(d5_restoredCtx)), + IDEProblem.extend(f3, fPrime)); } } } @@ -1809,7 +1869,10 @@ class IDESolver IDETabulationProblem &IDEProblem; d_t ZeroValue; const i_t *ICF; + const void *SVFG; IFDSIDESolverConfig &SolverConfig; + Nullable (*NextUserOrNullCB)(const void *, ByConstRef, + ByConstRef, ByConstRef) = nullptr; std::vector, EdgeFunction>> WorkList; std::vector> ValuePropWL; diff --git a/include/phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h b/include/phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h index 2a13cc8ead..e6423bab0e 100644 --- a/include/phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h +++ b/include/phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h @@ -37,17 +37,17 @@ class IFDSSolver using n_t = typename AnalysisDomainTy::n_t; using i_t = typename AnalysisDomainTy::i_t; - template >> IFDSSolver(IFDSTabulationProblem &IFDSProblem, - const i_t *ICF) - : IDESolver>(&IFDSProblem, ICF) {} - template >(IFDSProblem, ICF) {} + template >> IFDSSolver(IFDSTabulationProblem *IFDSProblem, - const i_t *ICF) + const I *ICF) : IDESolver>(IFDSProblem, ICF) {} ~IFDSSolver() override = default; diff --git a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h new file mode 100644 index 0000000000..8645f5c725 --- /dev/null +++ b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h @@ -0,0 +1,50 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#ifndef PHASAR_PHASARLLVM_CONTROLFLOW_SPARSELLVMBASEDCFG_H +#define PHASAR_PHASARLLVM_CONTROLFLOW_SPARSELLVMBASEDCFG_H + +#include "phasar/ControlFlow/SparseCFGBase.h" +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h" + +#include "llvm/ADT/DenseMap.h" + +namespace psr { + +class SparseLLVMBasedCFG; + +template <> struct CFGTraits : CFGTraits { + using v_t = const llvm::Value *; +}; + +class SparseLLVMBasedCFG : public LLVMBasedCFG, + public SparseCFGBase { + friend struct SVFGCache; + friend SparseCFGBase; + +public: + using vgraph_t = + llvm::SmallDenseMap; + + SparseLLVMBasedCFG() noexcept = default; + SparseLLVMBasedCFG( + llvm::SmallDenseMap + &&VGraph) noexcept + : VGraph(std::move(VGraph)) {} + +private: + [[nodiscard]] n_t nextUserOrNullImpl(n_t FromInstruction) const { + return VGraph.lookup(FromInstruction); + } + + vgraph_t VGraph; +}; +} // namespace psr + +#endif // PHASAR_PHASARLLVM_CONTROLFLOW_SPARSELLVMBASEDCFG_H diff --git a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFGProvider.h b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFGProvider.h new file mode 100644 index 0000000000..30e56628d6 --- /dev/null +++ b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFGProvider.h @@ -0,0 +1,33 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#ifndef PHASAR_PHASARLLVM_CONTROLFLOW_SPARSELLVMBASEDCFGPROVIDER_H +#define PHASAR_PHASARLLVM_CONTROLFLOW_SPARSELLVMBASEDCFGPROVIDER_H + +#include "phasar/ControlFlow/SparseCFGProvider.h" + +namespace llvm { +class Function; +class Value; +} // namespace llvm + +namespace psr { + +template +using SparseLLVMBasedCFGProvider = + SparseCFGProvider; + +[[nodiscard]] constexpr const llvm::Value * +valueOf(const llvm::Value *V) noexcept { + return V; +} + +} // namespace psr + +#endif // PHASAR_PHASARLLVM_CONTROLFLOW_SPARSELLVMBASEDCFGPROVIDER_H diff --git a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h new file mode 100644 index 0000000000..2d43ae64ea --- /dev/null +++ b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h @@ -0,0 +1,56 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#ifndef PHASAR_PHASARLLVM_CONTROLFLOW_SPARSELLVMBASEDICFG_H +#define PHASAR_PHASARLLVM_CONTROLFLOW_SPARSELLVMBASEDICFG_H + +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h" +#include "phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFGProvider.h" + +#include + +namespace psr { +class SparseLLVMBasedCFG; +class DIBasedTypeHierarchy; +struct SVFGCache; + +class SparseLLVMBasedICFG + : public LLVMBasedICFG, + public SparseLLVMBasedCFGProvider { + friend SparseLLVMBasedCFGProvider; + +public: + explicit SparseLLVMBasedICFG(LLVMProjectIRDB *IRDB, + CallGraphAnalysisType CGType, + llvm::ArrayRef EntryPoints = {}, + DIBasedTypeHierarchy *TH = nullptr, + LLVMAliasInfoRef PT = nullptr, + Soundness S = Soundness::Soundy, + bool IncludeGlobals = true); + + /// Creates an ICFG with an already given call-graph + explicit SparseLLVMBasedICFG(CallGraph CG, LLVMProjectIRDB *IRDB, + LLVMAliasInfoRef PT); + + explicit SparseLLVMBasedICFG(LLVMProjectIRDB *IRDB, + const nlohmann::json &SerializedCG, + LLVMAliasInfoRef PT); + + ~SparseLLVMBasedICFG(); + +private: + [[nodiscard]] const SparseLLVMBasedCFG & + getSparseCFGImpl(const llvm::Function *Fun, const llvm::Value *Val) const; + + std::unique_ptr SparseCFGCache; + LLVMAliasInfoRef AliasAnalysis; +}; +} // namespace psr + +#endif // PHASAR_PHASARLLVM_CONTROLFLOW_SPARSELLVMBASEDICFG_H diff --git a/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.h b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.h new file mode 100644 index 0000000000..927840d252 --- /dev/null +++ b/include/phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.h @@ -0,0 +1,74 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#ifndef PHASAR_PHASARLLVM_CONTROLFLOW_SPARSELLVMBASEDICFG_H +#define PHASAR_PHASARLLVM_CONTROLFLOW_SPARSELLVMBASEDICFG_H + +#include "phasar/ControlFlow/CallGraph.h" +#include "phasar/ControlFlow/ICFGBase.h" +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h" +#include "phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFGProvider.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" +#include "phasar/PhasarLLVM/Utils/LLVMBasedContainerConfig.h" + +#include + +namespace psr { +class LLVMProjectIRDB; +class LLVMBasedICFG; +class SparseLLVMBasedCFG; +class SparseLLVMBasedICFGView; +struct SVFGCache; + +template <> +struct CFGTraits : CFGTraits {}; + +/// Similar to SparseLLVMBasedICFG; the only difference is that this one *is* no +/// LLVMBasedICFG -- it contains a pointer to an already existing one. +/// It still owns the sparse value-flow graphs +class SparseLLVMBasedICFGView + : public LLVMBasedCFG, + public ICFGBase, + public SparseLLVMBasedCFGProvider { + friend ICFGBase; + friend SparseLLVMBasedCFGProvider; + +public: + explicit SparseLLVMBasedICFGView(const LLVMBasedICFG *ICF, + LLVMAliasInfoRef PT); + + ~SparseLLVMBasedICFGView(); + + // To make the IDESolver happy... + operator const LLVMBasedICFG &() const noexcept { return *ICF; } + +private: + [[nodiscard]] FunctionRange getAllFunctionsImpl() const; + [[nodiscard]] f_t getFunctionImpl(llvm::StringRef Fun) const; + + [[nodiscard]] bool isIndirectFunctionCallImpl(n_t Inst) const; + [[nodiscard]] bool isVirtualFunctionCallImpl(n_t Inst) const; + [[nodiscard]] std::vector allNonCallStartNodesImpl() const; + [[nodiscard]] llvm::SmallVector getCallsFromWithinImpl(f_t Fun) const; + [[nodiscard]] llvm::SmallVector + getReturnSitesOfCallAtImpl(n_t Inst) const; + void printImpl(llvm::raw_ostream &OS) const; + [[nodiscard, deprecated]] nlohmann::json getAsJsonImpl() const; + [[nodiscard]] const CallGraph &getCallGraphImpl() const noexcept; + + [[nodiscard]] const SparseLLVMBasedCFG & + getSparseCFGImpl(const llvm::Function *Fun, const llvm::Value *Val) const; + + const LLVMBasedICFG *ICF{}; + std::unique_ptr SparseCFGCache; + LLVMAliasInfoRef AliasAnalysis; +}; +} // namespace psr + +#endif // PHASAR_PHASARLLVM_CONTROLFLOW_SPARSELLVMBASEDICFG_H diff --git a/include/phasar/PhasarLLVM/Utils/DataFlowAnalysisType.def b/include/phasar/PhasarLLVM/Utils/DataFlowAnalysisType.def index 08cf9e9e08..564fb245b7 100644 --- a/include/phasar/PhasarLLVM/Utils/DataFlowAnalysisType.def +++ b/include/phasar/PhasarLLVM/Utils/DataFlowAnalysisType.def @@ -14,6 +14,7 @@ DATA_FLOW_ANALYSIS_TYPES(IFDSUninitializedVariables, "ifds-uninit", "Find usages of uninitialized variables.") DATA_FLOW_ANALYSIS_TYPES(IFDSConstAnalysis, "ifds-const", "Find variables that are actually mutated through the program") DATA_FLOW_ANALYSIS_TYPES(IFDSTaintAnalysis, "ifds-taint", "Simple, alias-aware taint-analysis. Use with --analysis-config") +DATA_FLOW_ANALYSIS_TYPES(SparseIFDSTaintAnalysis, "sparse-ifds-taint", "Simple, alias-aware taint-analysis utilizing SparseIFDS. Use with --analysis-config") DATA_FLOW_ANALYSIS_TYPES(IDEExtendedTaintAnalysis, "ide-xtaint", "More advanced alias-aware taint analysis that provides limited field-sensitivity. Use with --analysis-config") DATA_FLOW_ANALYSIS_TYPES(IFDSTypeAnalysis, "ifds-type", "Simple type analysis") DATA_FLOW_ANALYSIS_TYPES(IDECSTDIOTypeStateAnalysis, "ide-stdio-ts", "Find invalid usages of the libc file-io") diff --git a/include/phasar/Utils/Macros.h b/include/phasar/Utils/Macros.h index 5e072ad56b..9620018de8 100644 --- a/include/phasar/Utils/Macros.h +++ b/include/phasar/Utils/Macros.h @@ -10,6 +10,8 @@ #ifndef PHASAR_UTILS_MACROS_H #define PHASAR_UTILS_MACROS_H +#define PSR_FWD(...) ::std::forward(__VA_ARGS__) + #if __cplusplus < 202002L #define PSR_CONCEPT static constexpr bool #else diff --git a/include/phasar/Utils/Nullable.h b/include/phasar/Utils/Nullable.h index 5bb3b2a9da..829db85eab 100644 --- a/include/phasar/Utils/Nullable.h +++ b/include/phasar/Utils/Nullable.h @@ -12,12 +12,34 @@ #include #include +#include namespace psr { template using Nullable = std::conditional_t, T, std::optional>; + +template +std::enable_if_t, T &&> +unwrapNullable(T &&Val) noexcept { + return std::forward(Val); +} +template +std::enable_if_t, T> +unwrapNullable(std::optional &&Val) noexcept { + return *std::move(Val); +} +template +std::enable_if_t, const T &> +unwrapNullable(const std::optional &Val) noexcept { + return *Val; +} +template +std::enable_if_t, T &> +unwrapNullable(std::optional &Val) noexcept { + return *Val; +} } // namespace psr #endif // PHASAR_UTILS_NULLABLE_H diff --git a/lib/PhasarLLVM/ControlFlow/SVFGCache.cpp b/lib/PhasarLLVM/ControlFlow/SVFGCache.cpp new file mode 100644 index 0000000000..c7a49c7ac4 --- /dev/null +++ b/lib/PhasarLLVM/ControlFlow/SVFGCache.cpp @@ -0,0 +1,184 @@ +#include "SVFGCache.h" + +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedCFG.h" +#include "phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" + +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/Casting.h" + +using namespace psr; + +static bool isNonPointerType(const llvm::Type *Ty) { + if (const auto *Struct = llvm::dyn_cast(Ty)) { + for (const auto *ElemTy : Struct->elements()) { + // XXX: Go into nested structs recursively + if (!ElemTy->isSingleValueType() || ElemTy->isVectorTy()) { + return false; + } + } + return true; + } + if (const auto *Vec = llvm::dyn_cast(Ty)) { + return !Vec->getElementType()->isPointerTy(); + } + return Ty->isSingleValueType(); +} + +static bool isNonAddressTakenVariable(const llvm::Value *Val) { + const auto *Alloca = llvm::dyn_cast(Val); + if (!Alloca) { + return false; + } + for (const auto &Use : Alloca->uses()) { + if (const auto *Store = llvm::dyn_cast(Use.getUser())) { + if (Use == Store->getValueOperand()) { + return false; + } + } else if (const auto *Call = + llvm::dyn_cast(Use.getUser())) { + auto ArgNo = Use.getOperandNo(); + if (Call->paramHasAttr(ArgNo, llvm::Attribute::StructRet)) { + continue; + } + if (Call->paramHasAttr(ArgNo, llvm::Attribute::NoCapture) && + isNonPointerType(Call->getType())) { + continue; + } + return false; + } + } + return true; +} + +static bool mayAlias(const llvm::Value *Ptr1, const llvm::Value *Ptr2, + LLVMAliasInfoRef AliasAnalysis) { + if (isNonAddressTakenVariable(Ptr1) || isNonAddressTakenVariable(Ptr2)) { + return false; + } + + return AliasAnalysis.alias(Ptr1, Ptr2) != AliasResult::NoAlias; +} + +static bool isFirstInBB(const llvm::Instruction *Inst) { + return !Inst->getPrevNode(); +} + +static bool isLastInBB(const llvm::Instruction *Inst, const llvm::Value *Val) { + if (Inst->getNextNode()) { + return false; + } + + if (Val->getType()->isPointerTy()) { + return true; + } + + const auto *InstBB = Inst->getParent(); + for (const auto *User : Val->users()) { + const auto *UserInst = llvm::dyn_cast(User); + if (!UserInst || UserInst->getParent() != InstBB) { + return true; + } + } + return llvm::succ_empty(Inst); +} + +static bool shouldKeepInst(const llvm::Instruction *Inst, + const llvm::Value *Val, + LLVMAliasInfoRef AliasAnalysis) { + if (Inst == Val || isFirstInBB(Inst) || isLastInBB(Inst, Val)) { + // First in BB always stays for now + return true; + } + + const auto *ValTy = Val->getType(); + bool ValPtr = ValTy->isPointerTy(); + + if (const auto *Call = llvm::dyn_cast(Inst)) { + if (llvm::isa(Val)) { + return true; + } + } + + for (const auto *Op : Inst->operand_values()) { + if (Op == Val) { + return true; + } + if (!ValPtr) { + continue; + } + const auto *OpTy = Op->getType(); + bool OpPtr = OpTy->isPointerTy(); + + if (!OpPtr) { + // Pointers cannot influence non-pointers + continue; + } + + if (mayAlias(Val, Op, AliasAnalysis)) { + return true; + } + } + + return false; +} + +static void buildSparseCFG(const LLVMBasedCFG &CFG, + SparseLLVMBasedCFG::vgraph_t &SCFG, + const llvm::Function *Fun, const llvm::Value *Val, + LLVMAliasInfoRef AliasAnalysis) { + llvm::SmallVector< + std::pair> + WL; + + // -- Initialization + + const auto *Entry = &Fun->getEntryBlock().front(); + if (llvm::isa(Entry)) { + Entry = Entry->getNextNonDebugInstruction(); + } + + for (const auto *Succ : CFG.getSuccsOf(Entry)) { + WL.emplace_back(Entry, Succ); + } + + // -- Fixpoint Iteration + + llvm::SmallDenseSet Handled; + + while (!WL.empty()) { + auto [From, To] = WL.pop_back_val(); + + const auto *Curr = From; + if (shouldKeepInst(To, Val, AliasAnalysis)) { + Curr = To; + auto [It, Inserted] = SCFG.try_emplace(From, To); + if (!Inserted) { + if (It->second != To) { + It->second = nullptr; + } + } + } + + if (!Handled.insert(To).second) { + continue; + } + + for (const auto *Succ : CFG.getSuccsOf(To)) { + WL.emplace_back(Curr, Succ); + } + } +} + +const SparseLLVMBasedCFG & +SVFGCache::getOrCreate(const LLVMBasedCFG &CFG, const llvm::Function *Fun, + const llvm::Value *Val, LLVMAliasInfoRef AliasAnalysis) { + // XXX: Make thread-safe + + auto [It, Inserted] = Cache.try_emplace(std::make_pair(Fun, Val)); + if (Inserted) { + buildSparseCFG(CFG, It->second.VGraph, Fun, Val, AliasAnalysis); + } + + return It->second; +} diff --git a/lib/PhasarLLVM/ControlFlow/SVFGCache.h b/lib/PhasarLLVM/ControlFlow/SVFGCache.h new file mode 100644 index 0000000000..c4adc6f96c --- /dev/null +++ b/lib/PhasarLLVM/ControlFlow/SVFGCache.h @@ -0,0 +1,41 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#ifndef PHASAR_PHASARLLVM_CONTROLFLOW_SVFGCACHE_H +#define PHASAR_PHASARLLVM_CONTROLFLOW_SVFGCACHE_H + +#include "phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" + +#include "llvm/IR/Function.h" +#include "llvm/Support/Compiler.h" + +#include + +namespace psr { +struct FVHasher { + auto operator()(std::pair FV) + const noexcept { + return llvm::hash_value(FV); + } +}; + +struct SVFGCache { + using f_t = const llvm::Function *; + using v_t = const llvm::Value *; + std::unordered_map, SparseLLVMBasedCFG, FVHasher> Cache{}; + + LLVM_LIBRARY_VISIBILITY const SparseLLVMBasedCFG & + getOrCreate(const LLVMBasedCFG &CFG, const llvm::Function *Fun, + const llvm::Value *Val, LLVMAliasInfoRef AliasAnalysis); +}; + +} // namespace psr + +#endif // PHASAR_PHASARLLVM_CONTROLFLOW_SPARSECFGCACHE_H diff --git a/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.cpp b/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.cpp new file mode 100644 index 0000000000..ccaf80b2cf --- /dev/null +++ b/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.cpp @@ -0,0 +1,45 @@ +#include "phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h" + +#include "phasar/PhasarLLVM/Pointer/LLVMAliasInfo.h" + +#include "SVFGCache.h" + +#include +#include + +using namespace psr; + +struct FVHasher { + auto operator()(std::pair FV) + const noexcept { + return llvm::hash_value(FV); + } +}; + +SparseLLVMBasedICFG::~SparseLLVMBasedICFG() = default; + +SparseLLVMBasedICFG::SparseLLVMBasedICFG( + LLVMProjectIRDB *IRDB, CallGraphAnalysisType CGType, + llvm::ArrayRef EntryPoints, DIBasedTypeHierarchy *TH, + LLVMAliasInfoRef PT, Soundness S, bool IncludeGlobals) + : LLVMBasedICFG(IRDB, CGType, EntryPoints, TH, PT, S, IncludeGlobals), + SparseCFGCache(new SVFGCache{}), AliasAnalysis(PT) {} + +SparseLLVMBasedICFG::SparseLLVMBasedICFG(CallGraph CG, + LLVMProjectIRDB *IRDB, + LLVMAliasInfoRef PT) + : LLVMBasedICFG(std::move(CG), IRDB), SparseCFGCache(new SVFGCache{}), + AliasAnalysis(PT) {} + +SparseLLVMBasedICFG::SparseLLVMBasedICFG(LLVMProjectIRDB *IRDB, + const nlohmann::json &SerializedCG, + LLVMAliasInfoRef PT) + : LLVMBasedICFG(IRDB, SerializedCG), SparseCFGCache(new SVFGCache{}), + AliasAnalysis(PT) {} + +const SparseLLVMBasedCFG & +SparseLLVMBasedICFG::getSparseCFGImpl(const llvm::Function *Fun, + const llvm::Value *Val) const { + assert(SparseCFGCache != nullptr); + return SparseCFGCache->getOrCreate(*this, Fun, Val, AliasAnalysis); +} diff --git a/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.cpp b/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.cpp new file mode 100644 index 0000000000..a61b147cc7 --- /dev/null +++ b/lib/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.cpp @@ -0,0 +1,66 @@ +#include "phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.h" + +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasSet.h" + +#include "SVFGCache.h" + +using namespace psr; + +SparseLLVMBasedICFGView::SparseLLVMBasedICFGView(const LLVMBasedICFG *ICF, + LLVMAliasInfoRef PT) + : ICF(ICF), SparseCFGCache(new SVFGCache{}), AliasAnalysis(PT) {} + +SparseLLVMBasedICFGView::~SparseLLVMBasedICFGView() = default; + +FunctionRange SparseLLVMBasedICFGView::getAllFunctionsImpl() const { + return ICF->getAllFunctions(); +} + +auto SparseLLVMBasedICFGView::getFunctionImpl(llvm::StringRef Fun) const + -> f_t { + return ICF->getFunction(Fun); +}; + +bool SparseLLVMBasedICFGView::isIndirectFunctionCallImpl(n_t Inst) const { + return ICF->isIndirectFunctionCall(Inst); +} + +bool SparseLLVMBasedICFGView::isVirtualFunctionCallImpl(n_t Inst) const { + return ICF->isVirtualFunctionCall(Inst); +} + +auto SparseLLVMBasedICFGView::allNonCallStartNodesImpl() const + -> std::vector { + return ICF->allNonCallStartNodes(); +} + +auto SparseLLVMBasedICFGView::getCallsFromWithinImpl(f_t Fun) const + -> llvm::SmallVector { + return ICF->getCallsFromWithin(Fun); +} + +auto SparseLLVMBasedICFGView::getReturnSitesOfCallAtImpl(n_t Inst) const + -> llvm::SmallVector { + return ICF->getReturnSitesOfCallAt(Inst); +} + +void SparseLLVMBasedICFGView::printImpl(llvm::raw_ostream &OS) const { + ICF->print(OS); +} + +nlohmann::json SparseLLVMBasedICFGView::getAsJsonImpl() const { + return ICF->getAsJson(); +} + +auto SparseLLVMBasedICFGView::getCallGraphImpl() const noexcept + -> const CallGraph & { + return ICF->getCallGraph(); +} + +const SparseLLVMBasedCFG & +SparseLLVMBasedICFGView::getSparseCFGImpl(const llvm::Function *Fun, + const llvm::Value *Val) const { + assert(SparseCFGCache != nullptr); + return SparseCFGCache->getOrCreate(*this, Fun, Val, AliasAnalysis); +} diff --git a/tools/phasar-cli/Controller/AnalysisController.cpp b/tools/phasar-cli/Controller/AnalysisController.cpp index fbaa858e9c..8ad63474e2 100644 --- a/tools/phasar-cli/Controller/AnalysisController.cpp +++ b/tools/phasar-cli/Controller/AnalysisController.cpp @@ -127,6 +127,9 @@ static void executeWholeProgram(AnalysisController &Data) { case DataFlowAnalysisType::IFDSTaintAnalysis: executeIFDSTaint(Data); continue; + case DataFlowAnalysisType::SparseIFDSTaintAnalysis: + executeSparseIFDSTaint(Data); + continue; case DataFlowAnalysisType::IDEExtendedTaintAnalysis: executeIDEXTaint(Data); continue; diff --git a/tools/phasar-cli/Controller/AnalysisControllerInternal.h b/tools/phasar-cli/Controller/AnalysisControllerInternal.h index 1246518fb6..019cc122f2 100644 --- a/tools/phasar-cli/Controller/AnalysisControllerInternal.h +++ b/tools/phasar-cli/Controller/AnalysisControllerInternal.h @@ -46,6 +46,7 @@ LLVM_LIBRARY_VISIBILITY void executeIntraMonoFullConstant(AnalysisController &Data); LLVM_LIBRARY_VISIBILITY void executeIntraMonoSolverTest(AnalysisController &Data); +LLVM_LIBRARY_VISIBILITY void executeSparseIFDSTaint(AnalysisController &Data); LLVM_LIBRARY_VISIBILITY void executeInterMonoSolverTest(AnalysisController &Data); LLVM_LIBRARY_VISIBILITY void executeInterMonoTaint(AnalysisController &Data); diff --git a/tools/phasar-cli/Controller/AnalysisControllerInternalIDE.h b/tools/phasar-cli/Controller/AnalysisControllerInternalIDE.h index 645c055a77..789157a75e 100644 --- a/tools/phasar-cli/Controller/AnalysisControllerInternalIDE.h +++ b/tools/phasar-cli/Controller/AnalysisControllerInternalIDE.h @@ -12,6 +12,8 @@ #include "phasar/DataFlow/IfdsIde/Solver/IDESolver.h" #include "phasar/DataFlow/IfdsIde/Solver/IFDSSolver.h" +#include "phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h" +#include "phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFGView.h" #include "AnalysisControllerInternal.h" @@ -23,11 +25,13 @@ static void statsEmitter(llvm::raw_ostream &OS, const IDESolver &Solver) { Solver.printEdgeFunctionStatistics(OS); } -template -static void executeIfdsIdeAnalysis(AnalysisController &Data, ArgTys &&...Args) { +template +static void executeIfdsIdeAnalysisImpl(AnalysisController &Data, + const ICFGTy &ICF, ArgTys &&...Args) { auto Problem = createAnalysisProblem(*Data.HA, std::forward(Args)...); - SolverTy Solver(Problem, &Data.HA->getICFG()); + SolverTy Solver(Problem, &ICF); { std::optional MeasureTime; if (Data.EmitterOptions & @@ -42,18 +46,47 @@ static void executeIfdsIdeAnalysis(AnalysisController &Data, ArgTys &&...Args) { emitRequestedDataFlowResults(Data, Solver); } +template +static void executeIfdsIdeAnalysis(AnalysisController &Data, ArgTys &&...Args) { + executeIfdsIdeAnalysisImpl( + Data, Data.HA->getICFG(), std::forward(Args)...); +} + +template +static void executeSparseIfdsIdeAnalysis(AnalysisController &Data, + ArgTys &&...Args) { + + SparseLLVMBasedICFGView SVFG(&Data.HA->getICFG(), &Data.HA->getAliasInfo()); + executeIfdsIdeAnalysisImpl( + Data, SVFG, std::forward(Args)...); +} + template static void executeIFDSAnalysis(AnalysisController &Data, ArgTys &&...Args) { executeIfdsIdeAnalysis, ProblemTy>( Data, std::forward(Args)...); } +template +static void executeSparseIFDSAnalysis(AnalysisController &Data, + ArgTys &&...Args) { + executeSparseIfdsIdeAnalysis, ProblemTy>( + Data, std::forward(Args)...); +} + template static void executeIDEAnalysis(AnalysisController &Data, ArgTys &&...Args) { executeIfdsIdeAnalysis, ProblemTy>( Data, std::forward(Args)...); } +template +static void executeSparseIDEAnalysis(AnalysisController &Data, + ArgTys &&...Args) { + executeSparseIfdsIdeAnalysis, ProblemTy>( + Data, std::forward(Args)...); +} + } // namespace psr::controller #endif // PHASAR_CONTROLLER_ANALYSISCONTROLLERINTERNALMONO_H diff --git a/tools/phasar-cli/Controller/AnalysisControllerXSparseIFDSTaint.cpp b/tools/phasar-cli/Controller/AnalysisControllerXSparseIFDSTaint.cpp new file mode 100644 index 0000000000..4d3b8623b8 --- /dev/null +++ b/tools/phasar-cli/Controller/AnalysisControllerXSparseIFDSTaint.cpp @@ -0,0 +1,19 @@ +/****************************************************************************** + * Copyright (c) 2024 Fabian Schiebel. + * All rights reserved. This program and the accompanying materials are made + * available under the terms of LICENSE.txt. + * + * Contributors: + * Fabian Schiebel and others + *****************************************************************************/ + +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.h" + +#include "AnalysisControllerInternalIDE.h" + +using namespace psr; + +void controller::executeSparseIFDSTaint(AnalysisController &Data) { + auto Config = makeTaintConfig(Data); + executeSparseIFDSAnalysis(Data, &Config, Data.EntryPoints); +} diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/CMakeLists.txt b/unittests/PhasarLLVM/DataFlow/IfdsIde/CMakeLists.txt index 7a26332282..5c191d3856 100644 --- a/unittests/PhasarLLVM/DataFlow/IfdsIde/CMakeLists.txt +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/CMakeLists.txt @@ -4,6 +4,7 @@ set(IfdsIdeSources EdgeFunctionComposerTest.cpp EdgeFunctionSingletonCacheTest.cpp InteractiveIDESolverTest.cpp + SparseIDESolverTest.cpp IterativeIDESolverTest.cpp ) diff --git a/unittests/PhasarLLVM/DataFlow/IfdsIde/SparseIDESolverTest.cpp b/unittests/PhasarLLVM/DataFlow/IfdsIde/SparseIDESolverTest.cpp new file mode 100644 index 0000000000..060f438271 --- /dev/null +++ b/unittests/PhasarLLVM/DataFlow/IfdsIde/SparseIDESolverTest.cpp @@ -0,0 +1,210 @@ +#include "phasar/ControlFlow/CallGraphAnalysisType.h" +#include "phasar/ControlFlow/SparseCFGProvider.h" +#include "phasar/DataFlow/IfdsIde/Solver/IDESolver.h" +#include "phasar/PhasarLLVM/ControlFlow/LLVMBasedICFG.h" +#include "phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedCFG.h" +#include "phasar/PhasarLLVM/ControlFlow/SparseLLVMBasedICFG.h" +#include "phasar/PhasarLLVM/DB/LLVMProjectIRDB.h" +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IDELinearConstantAnalysis.h" +#include "phasar/PhasarLLVM/DataFlow/IfdsIde/Problems/IFDSTaintAnalysis.h" +#include "phasar/PhasarLLVM/HelperAnalyses.h" +#include "phasar/PhasarLLVM/Pointer/LLVMAliasSet.h" +#include "phasar/PhasarLLVM/SimpleAnalysisConstructor.h" +#include "phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h" +#include "phasar/PhasarLLVM/TypeHierarchy/DIBasedTypeHierarchy.h" +#include "phasar/PhasarLLVM/TypeHierarchy/LLVMTypeHierarchy.h" +#include "phasar/PhasarLLVM/Utils/LLVMShorthands.h" +#include "phasar/Utils/Soundness.h" + +#include "TestConfig.h" +#include "gtest/gtest.h" + +using namespace psr; +namespace { +/* ============== TEST FIXTURE ============== */ +class LinearConstant : public ::testing::TestWithParam { +protected: + const std::vector EntryPoints = {"main"}; +}; +class DoubleFreeTA : public ::testing::TestWithParam { +protected: + const std::vector EntryPoints = {"main"}; +}; + +TEST_P(LinearConstant, SparseResultsEquivalent) { + static constexpr auto PathToLlFiles = + PHASAR_BUILD_SUBFOLDER("linear_constant/"); + LLVMProjectIRDB IRDB(PathToLlFiles + GetParam()); + DIBasedTypeHierarchy TH(IRDB); + LLVMAliasSet PT(&IRDB); + + LLVMBasedICFG ICF(&IRDB, CallGraphAnalysisType::OTF, EntryPoints, &TH, &PT); + auto HasGlobalCtor = IRDB.getFunctionDefinition( + LLVMBasedICFG::GlobalCRuntimeModelName) != nullptr; + std::vector Entry = { + HasGlobalCtor ? LLVMBasedICFG::GlobalCRuntimeModelName.str() : "main"}; + SparseLLVMBasedICFG SICF(&IRDB, CallGraphAnalysisType::OTF, Entry, &TH, &PT, + psr::Soundness::Soundy, false); + + static_assert(has_getSparseCFG_v); + + IDELinearConstantAnalysis LCAProblem(&IRDB, &ICF, Entry); + IDELinearConstantAnalysis SLCAProblem(&IRDB, &SICF, Entry); + + auto DenseResults = IDESolver(LCAProblem, &ICF).solve(); + auto SparseResults = IDESolver(SLCAProblem, &SICF).solve(); + + DenseResults.dumpResults(ICF, llvm::outs() << "DenseResults:"); + SparseResults.dumpResults(SICF, llvm::outs() << "SparseResults:"); + + for (auto &&Cell : SparseResults.getAllResultEntries()) { + auto DenseRes = + DenseResults.resultAt(Cell.getRowKey(), Cell.getColumnKey()); + EXPECT_EQ(DenseRes, Cell.getValue()) + << "At " << llvmIRToString(Cell.getRowKey()) + << " :: " << llvmIRToShortString(Cell.getColumnKey()); + } + // Note: Do not check for equivalence, because SparseIDE is *expected* to + // compute less (N, D) results than vanilla IDE. +} + +static LLVMTaintConfig getDoubleFreeConfig() { + auto SourceCB = [](const llvm::Instruction *Inst) { + std::set Ret; + if (const auto *Call = llvm::dyn_cast(Inst); + Call && Call->getCalledFunction() && + Call->getCalledFunction()->getName() == "free") { + Ret.insert(Call->getArgOperand(0)); + } + return Ret; + }; + + return LLVMTaintConfig(SourceCB, SourceCB); +} + +TEST_P(DoubleFreeTA, SparseLeaksEquivalent) { + static constexpr auto PathToLlFiles = + PHASAR_BUILD_SUBFOLDER("taint_analysis/"); + LLVMProjectIRDB IRDB(PathToLlFiles + GetParam()); + DIBasedTypeHierarchy TH(IRDB); + LLVMAliasSet PT(&IRDB); + + LLVMBasedICFG ICF(&IRDB, CallGraphAnalysisType::OTF, EntryPoints, &TH, &PT); + auto HasGlobalCtor = IRDB.getFunctionDefinition( + LLVMBasedICFG::GlobalCRuntimeModelName) != nullptr; + std::vector Entry = { + HasGlobalCtor ? LLVMBasedICFG::GlobalCRuntimeModelName.str() : "main"}; + SparseLLVMBasedICFG SICF(&IRDB, CallGraphAnalysisType::OTF, Entry, &TH, &PT, + psr::Soundness::Soundy, false); + + static_assert(has_getSparseCFG_v); + + auto Config = getDoubleFreeConfig(); + IFDSTaintAnalysis TaintProblem(&IRDB, &PT, &Config, Entry); + IFDSTaintAnalysis STaintProblem(&IRDB, &PT, &Config, Entry); + + auto DenseResults = IDESolver(TaintProblem, &ICF).solve(); + auto SparseResults = IDESolver(STaintProblem, &SICF).solve(); + + for (const auto &[LeakInst, Leaks] : TaintProblem.Leaks) { + auto LeakIt = STaintProblem.Leaks.find(LeakInst); + EXPECT_NE(LeakIt, STaintProblem.Leaks.end()) + << "SparseIDE did not find expected leak(s) at " + << llvmIRToString(LeakInst); + + if (LeakIt == STaintProblem.Leaks.end()) { + continue; + } + + const auto &SLeaks = LeakIt->second; + EXPECT_EQ(Leaks, SLeaks) + << "Leak sets at " << llvmIRToString(LeakInst) << " do not match"; + } +} + +static constexpr std::string_view LCATestFiles[] = { + "basic_01_cpp_dbg.ll", + "basic_02_cpp_dbg.ll", + "basic_03_cpp_dbg.ll", + "basic_04_cpp_dbg.ll", + "basic_05_cpp_dbg.ll", + "basic_06_cpp_dbg.ll", + "basic_07_cpp_dbg.ll", + "basic_08_cpp_dbg.ll", + "basic_09_cpp_dbg.ll", + "basic_10_cpp_dbg.ll", + "basic_11_cpp_dbg.ll", + "basic_12_cpp_dbg.ll", + + "branch_01_cpp_dbg.ll", + "branch_02_cpp_dbg.ll", + "branch_03_cpp_dbg.ll", + "branch_04_cpp_dbg.ll", + "branch_05_cpp_dbg.ll", + "branch_06_cpp_dbg.ll", + "branch_07_cpp_dbg.ll", + + "while_01_cpp_dbg.ll", + "while_02_cpp_dbg.ll", + "while_03_cpp_dbg.ll", + "while_04_cpp_dbg.ll", + "while_05_cpp_dbg.ll", + "for_01_cpp_dbg.ll", + + "call_01_cpp_dbg.ll", + "call_02_cpp_dbg.ll", + "call_03_cpp_dbg.ll", + "call_04_cpp_dbg.ll", + "call_05_cpp_dbg.ll", + "call_06_cpp_dbg.ll", + "call_07_cpp_dbg.ll", + "call_08_cpp_dbg.ll", + "call_09_cpp_dbg.ll", + "call_10_cpp_dbg.ll", + "call_11_cpp_dbg.ll", + + "recursion_01_cpp_dbg.ll", + "recursion_02_cpp_dbg.ll", + "recursion_03_cpp_dbg.ll", + + "global_01_cpp_dbg.ll", + "global_02_cpp_dbg.ll", + "global_03_cpp_dbg.ll", + "global_04_cpp_dbg.ll", + "global_05_cpp_dbg.ll", + "global_06_cpp_dbg.ll", + "global_07_cpp_dbg.ll", + "global_08_cpp_dbg.ll", + "global_09_cpp_dbg.ll", + "global_10_cpp_dbg.ll", + "global_11_cpp_dbg.ll", + "global_12_cpp_dbg.ll", + "global_13_cpp_dbg.ll", + "global_14_cpp_dbg.ll", + "global_15_cpp_dbg.ll", + "global_16_cpp_dbg.ll", + + "overflow_add_cpp_dbg.ll", + "overflow_sub_cpp_dbg.ll", + "overflow_mul_cpp_dbg.ll", + "overflow_div_min_by_neg_one_cpp_dbg.ll", + + "ub_division_by_zero_cpp_dbg.ll", + "ub_modulo_by_zero_cpp_dbg.ll", +}; + +static constexpr std::string_view TaintTestFiles[] = { + "double_free_01_c.ll", + "double_free_02_c.ll", +}; + +INSTANTIATE_TEST_SUITE_P(SparseIDETest, LinearConstant, + ::testing::ValuesIn(LCATestFiles)); +INSTANTIATE_TEST_SUITE_P(SparseIDETest, DoubleFreeTA, + ::testing::ValuesIn(TaintTestFiles)); +} // namespace + +int main(int Argc, char **Argv) { + ::testing::InitGoogleTest(&Argc, Argv); + return RUN_ALL_TESTS(); +}