From f86ab4d8e0e84122c94368710d567d214596741e Mon Sep 17 00:00:00 2001 From: Henny Sipma Date: Mon, 25 Nov 2024 15:30:57 -0800 Subject: [PATCH 1/3] REL: add facility to compare cfg info --- chb/app/AppAccess.py | 11 ++- chb/app/AppCfgInfo.py | 103 ++++++++++++++++++++++++ chb/ast/ASTCPrettyPrinter.py | 3 +- chb/cmdline/AnalysisManager.py | 5 +- chb/cmdline/XComparison.py | 33 ++++++++ chb/cmdline/astcmds.py | 6 +- chb/cmdline/chkx | 34 ++++++++ chb/cmdline/commandutil.py | 4 +- chb/cmdline/relationalcmds.py | 138 +++++++++++++++++++++++++++++++++ chb/util/fileutil.py | 15 +++- 10 files changed, 346 insertions(+), 6 deletions(-) create mode 100644 chb/app/AppCfgInfo.py diff --git a/chb/app/AppAccess.py b/chb/app/AppAccess.py index 32930be6..264a5f19 100644 --- a/chb/app/AppAccess.py +++ b/chb/app/AppAccess.py @@ -6,7 +6,7 @@ # # Copyright (c) 2016-2020 Kestrel Technology LLC # Copyright (c) 2020-2021 Henny Sipma -# Copyright (c) 2021-2023 Aarno Labs LLC +# Copyright (c) 2021-2024 Aarno Labs LLC # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -49,6 +49,7 @@ from chb.api.CallTarget import CallTarget, IndirectTarget, CallbackTableTarget from chb.api.InterfaceDictionary import InterfaceDictionary +from chb.app.AppCfgInfo import AppCfgInfo from chb.app.AppResultData import AppResultData from chb.app.AppResultMetrics import AppResultMetrics from chb.app.BDictionary import BDictionary @@ -110,6 +111,7 @@ def __init__( # functions self._appresultdata: Optional[AppResultData] = None + self._appcfginfo: Optional[AppCfgInfo] = None self._functioninfos: Dict[str, FunctionInfo] = {} # callgraph @@ -256,6 +258,13 @@ def appresultdata(self) -> AppResultData: self._appresultdata = AppResultData(x) return self._appresultdata + @property + def appcfginfo(self) -> AppCfgInfo: + if self._appcfginfo is None: + x = UF.get_app_cfg_info_xnode(self.path, self.filename) + self._appcfginfo = AppCfgInfo(x) + return self._appcfginfo + @property def appfunction_addrs(self) -> Sequence[str]: """Return a list of all application function addresses.""" diff --git a/chb/app/AppCfgInfo.py b/chb/app/AppCfgInfo.py new file mode 100644 index 00000000..91186017 --- /dev/null +++ b/chb/app/AppCfgInfo.py @@ -0,0 +1,103 @@ +# ------------------------------------------------------------------------------ +# CodeHawk Binary Analyzer +# Author: Henny Sipma +# ------------------------------------------------------------------------------ +# The MIT License (MIT) +# +# Copyright (c) 2024 Aarno Labs LLC +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ------------------------------------------------------------------------------ +"""List of application function addresses and cfg characteristics.""" + +import xml.etree.ElementTree as ET + +from typing import Dict, List, Optional + + +class FnCfgInfo: + + def __init__(self, xnode: ET.Element) -> None: + self.xnode = xnode + + @property + def faddr(self) -> str: + return self.xnode.get("va", "0") + + @property + def faddr_i(self) -> int: + return int(self.xnode.get("va", "0"), 16) + + @property + def basic_blocks(self) -> int: + return int(self.xnode.get("bc", "0")) + + @property + def instructions(self) -> int: + return int(self.xnode.get("ic", "0")) + + @property + def loops(self) -> int: + return int(self.xnode.get("lc", "0")) + + @property + def max_loopdepth(self) -> int: + return int(self.xnode.get("ld", "0")) + + @property + def has_error(self) -> bool: + return self.xnode.get("tr", "ok") == "x" + + @property + def name(self) -> Optional[str]: + return self.xnode.get("name") + + def __str__(self) -> str: + return ( + ("bc:" + str(self.basic_blocks)).ljust(10) + + ("; ic: " + str(self.instructions)).ljust(14) + + ("" if self.loops == 0 else ("; lc: " + str(self.loops)))) + + +class AppCfgInfo: + + def __init__(self, xnode: Optional[ET.Element]) -> None: + self.xnode = xnode + self._function_cfg_infos: Optional[Dict[str, FnCfgInfo]] = None + + @property + def function_cfg_infos(self) -> Dict[str, FnCfgInfo]: + if self._function_cfg_infos is None: + self._function_cfg_infos = {} + self._initialize_functions() + return self._function_cfg_infos + + @property + def cfg_infos(self) -> List[FnCfgInfo]: + return sorted( + self.function_cfg_infos.values(), + key = lambda c: c.faddr_i) + + def _initialize_functions(self) -> None: + self._function_cfg_infos = {} + if self.xnode is not None: + for xf in self.xnode.findall("fn"): + optva = xf.get("va") + if optva is not None: + self._function_cfg_infos[optva] = FnCfgInfo(xf) diff --git a/chb/ast/ASTCPrettyPrinter.py b/chb/ast/ASTCPrettyPrinter.py index 6885cd59..5d3f7c78 100644 --- a/chb/ast/ASTCPrettyPrinter.py +++ b/chb/ast/ASTCPrettyPrinter.py @@ -80,7 +80,8 @@ def __init__( localsymboltable: "ASTLocalSymbolTable", indentation: int = 2, annotations: Dict[int, List[str]] = {}, - livevars_on_exit: Dict[int, Set[str]] = {}) -> None: + livevars_on_exit: Dict[int, Set[str]] = {}, + hide_annotations: bool = False) -> None: self._indentation = indentation # indentation amount self._indent = 0 # current indentation self._localsymboltable = localsymboltable diff --git a/chb/cmdline/AnalysisManager.py b/chb/cmdline/AnalysisManager.py index ee0c1932..5677ed0a 100644 --- a/chb/cmdline/AnalysisManager.py +++ b/chb/cmdline/AnalysisManager.py @@ -220,7 +220,8 @@ def disassemble( verbose: bool = False, collectdiagnostics: bool = True, preamble_cutoff: int = 12, - save_asm: str = "yes") -> None: + save_asm: str = "yes", + save_asm_cfg_info: bool = False) -> None: cwd = os.getcwd() chklogger.logger.debug("change directory to %s", self.path) os.chdir(self.path) # temporary change in directory @@ -233,6 +234,8 @@ def disassemble( cmd.extend(["-specialization", s]) if save_asm == "yes": cmd.append("-save_asm") + if save_asm_cfg_info: + cmd.append("-save_asm_cfg_info") if collectdiagnostics: cmd.append("-diagnostics") if self.mips: diff --git a/chb/cmdline/XComparison.py b/chb/cmdline/XComparison.py index 9b1ea0d3..e7f1f189 100644 --- a/chb/cmdline/XComparison.py +++ b/chb/cmdline/XComparison.py @@ -399,5 +399,38 @@ def prepare_report(self) -> str: return "\n".join(lines) + def diffs_as_table(self) -> str: + + lines: List[str] = [] + lines.append( + "section".ljust(16) + + "virtual address".rjust(28) + + "section size (bytes)".rjust(28) + + " difference") + lines.append("-" * 82) + for (name, (optsh1, optsh2)) in self.sectionheaderpairs.items(): + if optsh1 is not None and optsh2 is not None: + if optsh1.vaddr != optsh2.vaddr or optsh1.size != optsh2.size: + if optsh1.vaddr == optsh2.vaddr: + vaddr = optsh1.vaddr + else: + vaddr = (optsh1.vaddr + " => " + optsh2.vaddr) + if optsh1.size == optsh2.size: + size = optsh1.size + strdiff = "" + else: + diff = int(optsh2.size, 16) - int(optsh1.size, 16) + if diff > 0: + strdiff = "(+ " + str(diff) + ")" + else: + strdiff = "(- " + str(-diff) + ")" + size = (optsh1.size + " => " + optsh2.size) + lines.append( + name.ljust(16) + + vaddr.rjust(28) + size.rjust(28) + strdiff.rjust(10)) + + return "\n".join(lines) + + def __str__(self) -> str: return self.prepare_report() diff --git a/chb/cmdline/astcmds.py b/chb/cmdline/astcmds.py index 097bbd24..e231b10d 100644 --- a/chb/cmdline/astcmds.py +++ b/chb/cmdline/astcmds.py @@ -143,6 +143,7 @@ def buildast(args: argparse.Namespace) -> NoReturn: hints: List[str] = args.hints # names of json files xpatchresultsfile = args.patch_results_file hide_globals: bool = args.hide_globals + hide_annotations: bool = args.hide_annotations remove_edges: List[str] = args.remove_edges add_edges: List[str] = args.add_edges verbose: bool = args.verbose @@ -353,8 +354,11 @@ def buildast(args: argparse.Namespace) -> NoReturn: print("\n// Lifted code for function " + faddr) print("// --------------------------------------------------") + annotations: Dict[int, List[str]] = {} + if not hide_annotations: + annotations = astinterface.annotations prettyprinter = ASTCPrettyPrinter( - localsymboltable, annotations=astinterface.annotations) + localsymboltable, annotations=annotations) print(prettyprinter.to_c(asts[0], include_globals=(not hide_globals))) functions_lifted += 1 diff --git a/chb/cmdline/chkx b/chb/cmdline/chkx index c5c6a56f..d3011309 100755 --- a/chb/cmdline/chkx +++ b/chb/cmdline/chkx @@ -379,6 +379,10 @@ def parse() -> argparse.Namespace: default="yes", choices=["yes", "no"], help='save asm listing in analysis directory') + analyzecmd.add_argument( + "--save_asm_cfg_info", + action="store_true", + help="save list of functions with cfg info in xml (may be slow)") analyzecmd.add_argument( "--construct_all_functions", action="store_true", @@ -674,6 +678,10 @@ def parse() -> argparse.Namespace: "--hide_globals", help="do not include global declarations and definitions in printed output", action="store_true") + buildast.add_argument( + "--hide_annotations", + help="do not include annotations in printed C code", + action="store_true") buildast.add_argument( "--remove_edges", nargs="*", @@ -1494,6 +1502,32 @@ def parse() -> argparse.Namespace: "xname2", help="name of second (patched) executable (assumed analyzed)") relationalcomparemd5s.set_defaults(func=R.relational_compare_md5s_cmd) + # --- relational_compare elfdata + relationalcompareelf = relationalcompareparsers.add_parser("elfdata") + relationalcompareelf.add_argument( + "xname1", + help="name of first (original) executable (assumed disassembled)") + relationalcompareelf.add_argument( + "xname2", + help="name of second (patched) executable (assumed disassembled)") + relationalcompareelf.set_defaults(func=R.relational_compare_elfdata) + + # --- relational_compare cfg-info + relationalcomparecfginfo = relationalcompareparsers.add_parser("cfg_info") + relationalcomparecfginfo.add_argument( + "xname1", + help="name of first (original) executable (assumed disassembled)") + relationalcomparecfginfo.add_argument( + "xname2", + help="name of second (patched) executable (asseumed disassembled)"), + relationalcomparecfginfo.add_argument( + "--newfunctions", + nargs="*", + default=[], + help="list of functions that are new in xname2") + relationalcomparecfginfo.set_defaults( + func=R.relational_compare_cfg_info) + # ------------------------------------------------------ simulate subcommand parser_simulate = subparsers.add_parser("simulate") parser_simulate.add_argument("xname", help="name of executable") diff --git a/chb/cmdline/commandutil.py b/chb/cmdline/commandutil.py index e9c822a1..06722955 100644 --- a/chb/cmdline/commandutil.py +++ b/chb/cmdline/commandutil.py @@ -397,6 +397,7 @@ def analyzecmd(args: argparse.Namespace) -> NoReturn: verbose: bool = args.verbose collectdiagnostics: bool = args.collect_diagnostics save_asm: str = args.save_asm + save_asm_cfg_info: bool = args.save_asm_cfg_info thumb: List[str] = args.thumb preamble_cutoff: int = args.preamble_cutoff iterations: int = args.iterations @@ -517,7 +518,8 @@ def analyzecmd(args: argparse.Namespace) -> NoReturn: verbose=verbose, collectdiagnostics=collectdiagnostics, preamble_cutoff=preamble_cutoff, - save_asm=save_asm) + save_asm=save_asm, + save_asm_cfg_info=save_asm_cfg_info) except subprocess.CalledProcessError as e: print_error(str(e.output)) print_error(str(e)) diff --git a/chb/cmdline/relationalcmds.py b/chb/cmdline/relationalcmds.py index f8b008e5..57761a5a 100644 --- a/chb/cmdline/relationalcmds.py +++ b/chb/cmdline/relationalcmds.py @@ -948,3 +948,141 @@ def relational_compare_invs_cmd(args: argparse.Namespace) -> NoReturn: chklogger.logger.info("relational compare invariants completed") exit(0) + + +def relational_compare_elfdata(args: argparse.Namespace) -> NoReturn: + + # arguments + xname1: str = args.xname1 + xname2: str = args.xname2 + + try: + (path1, xfile1) = UC.get_path_filename(xname1) + (path2, xfile2) = UC.get_path_filename(xname2) + except UF.CHBError as e: + print(str(e.wrap())) + exit(1) + + xinfo1 = XI.XInfo() + xinfo1.load(path1, xfile1) + + xinfo2 = XI.XInfo() + xinfo2.load(path2, xfile2) + + app1 = UC.get_app(path1, xfile1, xinfo1) + app2 = UC.get_app(path2, xfile2, xinfo2) + + sectionheaders1 = app1.header.sectionheaders + sectionheaders2 = app2.header.sectionheaders + + xcomparison = XComparison( + False, + path1, + xfile1, + path2, + xfile2, + app1, + app2) + + xcomparison.compare_sections() + xcomparison.compare_segments() + + print(xcomparison.diffs_as_table()) + + exit(0) + + +def relational_compare_cfg_info(args: argparse.Namespace) -> NoReturn: + """This command helps to establish a function mapping between two + binaries. The assumption is that the two binaries are roughly + similar, in particular that the order in which the functions appear + is stable, but some functions may have changed in size, or some new + functions may have been inserted. + + It only requires both binaries to have been disassembled with the + option --save_asm_cfg_info; it does not require the binaries to have + been analyzed. + """ + + # arguments + xname1: str = args.xname1 + xname2: str = args.xname2 + newfunctions: List[str] = args.newfunctions + + try: + (path1, xfile1) = UC.get_path_filename(xname1) + (path2, xfile2) = UC.get_path_filename(xname2) + except UF.CHBError as e: + print(str(e.wrap())) + exit(1) + + xinfo1 = XI.XInfo() + xinfo1.load(path1, xfile1) + + xinfo2 = XI.XInfo() + xinfo2.load(path2, xfile2) + + app1 = UC.get_app(path1, xfile1, xinfo1) + app2 = UC.get_app(path2, xfile2, xinfo2) + + cfginfo1 = app1.appcfginfo + cfginfo2 = app2.appcfginfo + + if len(cfginfo1.function_cfg_infos) == 0: + UC.print_error( + "No function data found for " + xname1 + " in " + path1 + + "\n Please disassemble with option -save_cfg_info") + exit(1) + + if len(cfginfo2.function_cfg_infos) == 0: + UC.print_error( + "No function data found for " + xname2 + " in " + path2 + + "\n Please disassemble with option -save_cfg_info") + exit(1) + + cfginfos1 = cfginfo1.cfg_infos + cfginfos2 = cfginfo2.cfg_infos + + print("app1: " + str(len(cfginfos1))) + print("app2: " + str(len(cfginfos2))) + + cfginfos2 = [x for x in cfginfos2 if x.faddr not in newfunctions] + + cfginfos2 = cfginfos2[:len(cfginfos1)] + + diffcount = 0 + + for (ci1, ci2) in zip(cfginfos1, cfginfos2): + if ( + ci1.basic_blocks == ci2.basic_blocks + and ci1.instructions == ci2.instructions): + cfgdiff = "" + elif ci1.basic_blocks == ci2.basic_blocks: + cfgdiff = ( + "diff: " + + str(ci1.basic_blocks).rjust(8) + " " + + str(ci1.instructions) + " => " + str(ci2.instructions)) + diffcount += 1 + else: + cfgdiff = ( + "diff: " + + str(ci1.basic_blocks) + " => " + str(ci2.basic_blocks) + + " " + + str(ci1.instructions) + " => " + str(ci2.instructions)) + diffcount += 1 + if ci1.name is not None and ci2.name is not None and ci1.name == ci2.name: + name = ci1.name + elif ci1.name is not None: + name = ci1.name + " (original)" + elif ci2.name is not None: + name = ci2.name + " (patched)" + else: + name = "" + + print(ci1.faddr + " " + ci2.faddr + " " + + str(ci2.faddr_i - ci1.faddr_i).rjust(4) + " " + cfgdiff.ljust(24) + + name) + + print("\nNumber of functions different: " + str(diffcount)) + + exit(0) diff --git a/chb/util/fileutil.py b/chb/util/fileutil.py index cfa88543..52a1acb7 100644 --- a/chb/util/fileutil.py +++ b/chb/util/fileutil.py @@ -5,7 +5,7 @@ # The MIT License (MIT) # # Copyright (c) 2016-2020 Kestrel Technology LLC -# Copyright (c) 2021-2023 Aarno Labs LLC +# Copyright (c) 2021-2024 Aarno Labs LLC # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -818,6 +818,19 @@ def get_resultdata_xnode(path: str, xfile: str) -> ET.Element: return get_chb_xnode(filename, "application-results") +def get_app_cfg_info_filename(path: str, xfile: str) -> str: + fdir = get_analysis_dir(path, xfile) + return get_chb_filename(fdir, xfile, "functions.xml") + + +def get_app_cfg_info_xnode(path: str, xfile: str) -> Optional[ET.Element]: + filename = get_app_cfg_info_filename(path, xfile) + if os.path.isfile(filename): + return get_chb_xnode(filename, "functions") + else: + return None + + def get_md5profile_filename(path: str, xfile: str) -> str: fdir = get_results_dir(path, xfile) return get_chb_filename(fdir, xfile, "md5.json") From 84106d97678bc5d8706218188949830d641fd2bd Mon Sep 17 00:00:00 2001 From: Henny Sipma Date: Tue, 26 Nov 2024 10:31:24 -0800 Subject: [PATCH 2/3] AST: add support for BX-call --- chb/app/AppAccess.py | 3 ++- chb/app/CallbackTables.py | 8 ++++---- chb/app/InstrXData.py | 4 ++++ chb/arm/ARMCallOpcode.py | 29 ++++++++++++++++------------ chb/arm/ARMInstruction.py | 3 ++- chb/arm/opcodes/ARMBranchExchange.py | 15 +++++++++++++- chb/graphics/DotCallgraph.py | 2 ++ 7 files changed, 45 insertions(+), 19 deletions(-) diff --git a/chb/app/AppAccess.py b/chb/app/AppAccess.py index 264a5f19..5197ebeb 100644 --- a/chb/app/AppAccess.py +++ b/chb/app/AppAccess.py @@ -410,7 +410,8 @@ def callgraph(self) -> Callgraph: cbttgts = cbtable.tagged_fields_at_offset(cbtgt.offset) for (tag, cbfaddr) in cbttgts.items(): if self.has_function_name(cbfaddr): - cbfname = tag + ":" + self.function_name(cbfaddr) + cbfname = ( + tag + ":" + self.function_name(cbfaddr)) else: cbfname = tag + ":" + cbfaddr apptgtnode = mk_tagged_app_callgraph_node( diff --git a/chb/app/CallbackTables.py b/chb/app/CallbackTables.py index cc93bf8f..f36ba7b3 100644 --- a/chb/app/CallbackTables.py +++ b/chb/app/CallbackTables.py @@ -78,11 +78,11 @@ def tag_offset(self) -> int: return (-1) @property - def tag(self) -> str: + def tag(self) -> Optional[str]: if self.tag_offset >= 0: return self.fields[self.tag_offset][1] else: - return "?" + return None def value_at_offset(self, offset: int) -> str: if offset in self.fields: @@ -131,8 +131,8 @@ def tagged_fields_at_offset(self, offset: int) -> Dict[str, str]: for r in self.records: counter += 1 tag = r.tag - if tag == "?": - tag = "unknown_" + str(counter) + if tag is None: + tag = str(counter) result[tag] = r.value_at_offset(offset) return result diff --git a/chb/app/InstrXData.py b/chb/app/InstrXData.py index 05920149..3d4c1d56 100644 --- a/chb/app/InstrXData.py +++ b/chb/app/InstrXData.py @@ -308,6 +308,10 @@ def has_call_target(self) -> bool: else: return False + @property + def is_bx_call(self) -> bool: + return "bx-call" in self.tags + def call_target_argument_count(self) -> Optional[int]: if len(self.tags) >= 3: if self.tags[1] == "call": diff --git a/chb/arm/ARMCallOpcode.py b/chb/arm/ARMCallOpcode.py index db78df66..11555499 100644 --- a/chb/arm/ARMCallOpcode.py +++ b/chb/arm/ARMCallOpcode.py @@ -195,23 +195,28 @@ def ast_call_prov( finfo = xdata.function.finfo if finfo.has_call_target_info(iaddr): ctinfo = finfo.call_target_info(iaddr) - fname = ctinfo.target_interface.name ftype = ctinfo.target_interface.bctype if ftype is not None: astfntype = ftype.convert(astree.typconverter) - if astree.globalsymboltable.has_symbol(fname): - tgtvinfo = astree.globalsymboltable.get_symbol(fname) - hl_tgt = astree.mk_vinfo_lval_expression(tgtvinfo) + + if xdata.is_bx_call: + # indirect call + hl_tgt = XU.xxpr_to_ast_def_expr(xprs[-1], xdata, iaddr, astree) else: - gaddr: int = 0 - if fname.startswith("sub_"): - gaddr = int("0x" + fname[4:], 16) + fname = ctinfo.target_interface.name + if astree.globalsymboltable.has_symbol(fname): + tgtvinfo = astree.globalsymboltable.get_symbol(fname) + hl_tgt = astree.mk_vinfo_lval_expression(tgtvinfo) else: - if tgt.is_absolute: - tgtaddr = cast(ARMAbsoluteOp, tgt.opkind) - gaddr = int(tgtaddr.address.get_hex(), 16) - hl_tgt = astree.mk_global_variable_expr( - fname, globaladdress=gaddr, vtype=astfntype) + gaddr: int = 0 + if fname.startswith("sub_"): + gaddr = int("0x" + fname[4:], 16) + else: + if tgt.is_absolute: + tgtaddr = cast(ARMAbsoluteOp, tgt.opkind) + gaddr = int(tgtaddr.address.get_hex(), 16) + hl_tgt = astree.mk_global_variable_expr( + fname, globaladdress=gaddr, vtype=astfntype) if ftype is not None and ftype.is_function: ftype = cast("BCTypFun", ftype) diff --git a/chb/arm/ARMInstruction.py b/chb/arm/ARMInstruction.py index e1a3684f..1dc89e18 100644 --- a/chb/arm/ARMInstruction.py +++ b/chb/arm/ARMInstruction.py @@ -236,8 +236,9 @@ def annotation(self) -> str: aggaddr = self.xdata.subsumed_by() return f"subsumed by {aggaddr}" elif self.subsumes: + ann = self.opcode.annotation(self.xdata) dependents = self.xdata.subsumes() - return "subsumes [" + ", ".join(dependents) + "]" + return ann + " (subsumes [" + ", ".join(dependents) + "])" else: return self.opcode.annotation(self.xdata) diff --git a/chb/arm/opcodes/ARMBranchExchange.py b/chb/arm/opcodes/ARMBranchExchange.py index df01cb4f..830b4686 100644 --- a/chb/arm/opcodes/ARMBranchExchange.py +++ b/chb/arm/opcodes/ARMBranchExchange.py @@ -39,7 +39,9 @@ from chb.invariants.XXpr import XXpr + import chb.util.fileutil as UF +from chb.util.loggingutil import chklogger from chb.util.IndexedTable import IndexedTableValue @@ -98,8 +100,19 @@ def call_target(self, xdata: InstrXData) -> "CallTarget": else: raise UF.CHBError("Instruction is not a call: " + str(self)) + def argument_count(self, xdata: InstrXData) -> int: + if self.is_call_instruction(xdata): + argcount = xdata.call_target_argument_count() + if argcount is not None: + return argcount + chklogger.logger.warning( + "Call instruction does not have argument count") + return 0 + else: + raise UF.CHBError("Instruction is not a call: " + str(self)) + def arguments(self, xdata: InstrXData) -> Sequence[XXpr]: - return xdata.xprs + return xdata.xprs[:self.argument_count(xdata)] def annotation(self, xdata: InstrXData) -> str: """xdata format: a:x . diff --git a/chb/graphics/DotCallgraph.py b/chb/graphics/DotCallgraph.py index e3a449bf..9b378b62 100644 --- a/chb/graphics/DotCallgraph.py +++ b/chb/graphics/DotCallgraph.py @@ -103,6 +103,8 @@ def to_dotgraph(self) -> DotGraph: ('[' in name or ']' in name or '?' in name) and not name.startswith('"')): name = '"' + name + '"' + if ":" in name: + continue if len(name) == 0: continue sameranknodes.append(name) From 0010ac3c90321a0f34c3face2c0a721aacc52563 Mon Sep 17 00:00:00 2001 From: Henny Sipma Date: Wed, 27 Nov 2024 10:37:08 -0800 Subject: [PATCH 3/3] REP: first version of patch candidates --- chb/app/InstrXData.py | 4 +- chb/buffer/LibraryCallCallsites.py | 16 +++++- chb/cmdline/chkx | 37 ++++++++++++- chb/cmdline/reportcmds.py | 83 +++++++++++++++++++++++++++++- chb/models/FunctionSummary.py | 4 +- 5 files changed, 137 insertions(+), 7 deletions(-) diff --git a/chb/app/InstrXData.py b/chb/app/InstrXData.py index 3d4c1d56..deaeb8d4 100644 --- a/chb/app/InstrXData.py +++ b/chb/app/InstrXData.py @@ -334,7 +334,9 @@ def has_indirect_call_target_exprs(self) -> bool: return (len(self.tags) == 2 and self.tags[1] == "u" and len(self.args) > 1) def call_target(self, ixd: "InterfaceDictionary") -> "CallTarget": - if self.has_call_target(): + if self.has_call_target() and self.is_bx_call: + return ixd.call_target(self.args[-5]) + elif self.has_call_target(): return ixd.call_target(self.args[-1]) else: raise UF.CHBError( diff --git a/chb/buffer/LibraryCallCallsites.py b/chb/buffer/LibraryCallCallsites.py index 2d9354e7..c14771ad 100644 --- a/chb/buffer/LibraryCallCallsites.py +++ b/chb/buffer/LibraryCallCallsites.py @@ -51,9 +51,11 @@ class LibraryCallSideeffect: def __init__( self, summary: "FunctionSummary", + faddr: str, instr: "Instruction", pre: "PreDerefWrite") -> None: self._summary = summary + self._faddr = faddr self._instr = instr self._pre = pre @@ -61,6 +63,10 @@ def __init__( def summary(self) -> "FunctionSummary": return self._summary + @property + def faddr(self) -> str: + return self._faddr + @property def instr(self) -> "Instruction": return self._instr @@ -286,7 +292,8 @@ def derefwrites(self) -> List[LibraryCallSideeffect]: for pre in self.preconditions: if pre.is_deref_write: pre = cast("PreDerefWrite", pre) - lcwrite = LibraryCallSideeffect(self.summary, self.instr, pre) + lcwrite = LibraryCallSideeffect( + self.summary, self.faddr, self.instr, pre) result.append(lcwrite) return result @@ -446,6 +453,13 @@ def patch_candidates(self) -> List["Instruction"]: result.append(c.instr) return result + def patch_callsites(self) -> List[LibraryCallSideeffect]: + result: List[LibraryCallSideeffect] = [] + for (_, ics) in self.callsites.items(): + for cs in ics.values(): + result.extend(cs.patch_candidates) + return result + def patch_candidates_distribution(self) -> Dict[str, int]: result: Dict[str, int] = {} for (_, ics) in self.callsites.items(): diff --git a/chb/cmdline/chkx b/chb/cmdline/chkx index d3011309..7ee9209a 100755 --- a/chb/cmdline/chkx +++ b/chb/cmdline/chkx @@ -1171,6 +1171,41 @@ def parse() -> argparse.Namespace: report_bufferbounds.set_defaults(func=REP.report_buffer_bounds) + # --- report patch candidates + report_patchcandidates = reportparsers.add_parser("patchcandidates") + report_patchcandidates.add_argument("xname", help="name of executable") + report_patchcandidates.add_argument( + "--output", "-o", + help="name of output file (without extension)") + report_patchcandidates.add_argument( + "--json", + action="store_true", + help="output results in json format") + report_patchcandidates.add_argument( + "--targets", + nargs="*", + default=[], + help="list of target library functions to include") + report_patchcandidates.add_argument( + "--verbose", "-v", + action="store_true", + help="print functions examined") + report_patchcandidates.add_argument( + "--loglevel", "-log", + choices=UL.LogLevel.options(), + default="NONE", + help="activate logging with the given level (default to stderr)") + report_patchcandidates.add_argument( + "--logfilename", + help="name of file to write log messages") + report_patchcandidates.add_argument( + "--logfilemode", + choices=["a", "w"], + default="a", + help="file mode for log file: append (a, default), or write (w)") + + report_patchcandidates.set_defaults(func=REP.report_patch_candidates) + ''' # -- report application calls -- @@ -1195,7 +1230,7 @@ def parse() -> argparse.Namespace: report_iocs.set_defaults(func=reportiocs) ''' - # -------------------------------------------------------------- summaries subcommand + # ----------------------------------------------------- summaries subcommand parser_summaries = subparsers.add_parser("summaries") parser_summaries.set_defaults(func=SC.summariescommand) diff --git a/chb/cmdline/reportcmds.py b/chb/cmdline/reportcmds.py index 4a12e6ae..541a89dd 100644 --- a/chb/cmdline/reportcmds.py +++ b/chb/cmdline/reportcmds.py @@ -988,8 +988,6 @@ def perc(v: float) -> str: exit(0) - - def report_buffer_bounds(args: argparse.Namespace) -> NoReturn: # arguments @@ -1122,3 +1120,84 @@ def write_json_result( reverse=True): print(name.ljust(24) + str(count).rjust(5)) exit(0) + + +def report_patch_candidates(args: argparse.Namespace) -> NoReturn: + + # arguments + xname = args.xname + xoutput: Optional[str] = args.output + xjson: bool = args.json + xverbose: bool = args.verbose + xtargets: List[str] = args.targets + loglevel: str = args.loglevel + logfilename: Optional[str] = args.logfilename + logfilemode: str = args.logfilemode + + try: + (path, xfile) = UC.get_path_filename(xname) + UF.check_analysis_results(path, xfile) + except UF.CHBError as e: + print(str(e.wrap())) + exit(1) + + UC.set_logging( + loglevel, + path, + logfilename=logfilename, + mode=logfilemode, + msg="report_patch_candidates invoked") + + xinfo = XI.XInfo() + xinfo.load(path, xfile) + + app = UC.get_app(path, xfile, xinfo) + + n_calls: int = 0 + libcalls = LibraryCallCallsites() + + for (faddr, blocks) in app.call_instructions().items(): + fn = app.function(faddr) + + for (baddr, instrs) in blocks.items(): + for instr in instrs: + n_calls += 1 + calltgt = instr.call_target + if calltgt.is_so_target and calltgt.name in xtargets: + libcalls.add_library_callsite(faddr, instr) + + print("Number of calls: " + str(n_calls)) + + patchcallsites = libcalls.patch_callsites() + + for pc in sorted(patchcallsites, key=lambda pc:pc.faddr): + instr = cast("ARMInstruction", pc.instr) + dstarg = pc.dstarg + if dstarg is None: + chklogger.logger.warning( + "No expression found for destination argument: %s", + str(instr)) + continue + dstoffset = dstarg.stack_address_offset() + fn = instr.armfunction + stackframe = fn.stackframe + stackbuffer = stackframe.get_stack_buffer(dstoffset) + if stackbuffer is None: + chklogger.logger.warning( + "No stackbuffer found for %s at offset %s", + str(instr), str(dstoffset)) + continue + buffersize = stackbuffer.size + + print(" " + pc.instr.iaddr + " " + pc.instr.annotation) + print(" - faddr: " + pc.faddr) + print(" - iaddr: " + pc.instr.iaddr) + print(" - target function: " + str(pc.summary.name)) + print(" - stack offset: " + str(dstoffset)) + print(" - length argument: " + str(pc.lenarg)) + print(" - buffersize: " + str(buffersize)) + print("") + + print("Number of patch callsites: " + str(len(patchcallsites))) + + exit(0) diff --git a/chb/models/FunctionSummary.py b/chb/models/FunctionSummary.py index 0693f0cf..8afe150e 100644 --- a/chb/models/FunctionSummary.py +++ b/chb/models/FunctionSummary.py @@ -6,7 +6,7 @@ # # Copyright (c) 2016-2020 Kestrel Technology LLC # Copyright (c) 2020 Henny Sipma -# Copyright (c) 2021-2023 Aarno Labs LLC +# Copyright (c) 2021-2024 Aarno Labs LLC # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -42,7 +42,7 @@ class FunctionSummary: - "Signature and summary semantics for a function.""" + """Signature and summary semantics for a function.""" def __init__(self, library: "FunctionSummaryLibrary",