From c19fcf1162b19666d11b585a8cecd193cc468b70 Mon Sep 17 00:00:00 2001 From: me Date: Thu, 16 Feb 2023 17:23:17 -0500 Subject: [PATCH 1/2] added a history tracker to get coverage information about the execution of the binary --- qiling/extensions/coverage/formats/history.py | 143 ++++++++++++++++++ tests/test_history.py | 81 ++++++++++ 2 files changed, 224 insertions(+) create mode 100644 qiling/extensions/coverage/formats/history.py create mode 100644 tests/test_history.py diff --git a/qiling/extensions/coverage/formats/history.py b/qiling/extensions/coverage/formats/history.py new file mode 100644 index 000000000..2edd28e2a --- /dev/null +++ b/qiling/extensions/coverage/formats/history.py @@ -0,0 +1,143 @@ +from typing import List +from qiling import Qiling +from qiling.core_hooks_types import HookRet +import re + +class History: + history_hook_handle: HookRet = None + history: List[int] = [] + ql: Qiling + + def __init__(self, ql: Qiling) -> None: + self.ql = ql + self.track_block_coverage() + + def clear_history(self) -> None: + """Clears the current state of the history + + """ + self.history = [] + + def clear_hooks(self) -> None: + """Clears the current history hook from the Qiling instance + + Returns: + None + """ + + self.ql.hook_del(self.history_hook_handle) + + def track_block_coverage(self) -> None: + """Configures the history plugin to track all of the basic blocks that are executed. Removes any existing hooks + + Returns: + None + """ + if self.history_hook_handle: + self.clear_hooks() + + def __hook_block(ql, address, size): + self.history.append(address) + + self.history_hook_handle = self.ql.hook_block(__hook_block) + + def track_instruction_coverage(self) -> None: + """Configures the history plugin to track all of the instructions that are executed. Removes any existing hooks + + Returns: + None + """ + if self.history_hook_handle: + self.clear_hooks() + + def __hook_block(ql, address, size): + self.history.append(address) + + self.history_hook_handle = self.ql.hook_code(__hook_block) + + def get_ins_only_lib(self, libs: List[str]) -> List[int]: + """Returns a list of addresses that have been executed that are only in mmaps for objects that match the regex of items in the list + + Args: + libs (List[str]): A list of regex strings to match against the library names in the memory maps + + Returns: + List[int]: A list of addresses that have been executed and in the memory maps that match the regex + + Examples: + >>> history.get_ins_only_lib(["libc.so", "libpthread.so"]) + """ + + executable_maps = self.get_regex_matching_exec_maps(libs) + return [x for x in self.history if any([x >= start and x <= end for start, end, _, _, _ in executable_maps])] + + def get_ins_exclude_lib(self, libs: list) -> List: + '''Returns a list of history instructions that are not in the libraries that match the regex in the libs list + + Args: + libs (List): A list of regex strings to match against the library names in the memory maps + + Returns: + List: A list of addresses that have been executed and are not in the memory maps that match the regex + + Examples: + >>> history.get_ins_exclude_lib(["libc.so", "libpthread.so"]) + ''' + + executable_maps = self.get_regex_matching_exec_maps(libs) + return [x for x in self.history if any([x < start or x > end for start, end, _, _, _ in executable_maps])] + + def get_mem_map_from_addr(self, ins: int) -> tuple: + '''Returns the memory map that contains the instruction + + Args: + ins (int): The instruction address to search for + + Returns: + tuple: A tuple that contains the memory map that contains the instruction + this tuple is in the format of (start_addr, end_addr, perms, name, path) + + Examples: + >>> history.get_mem_map_from_addr(0x7ffff7dd1b97) + ''' + + #get the memory map that contains the instruction + mem_map = [x for x in self.ql.mem.get_mapinfo() if x[0] <= ins and x[1] >= ins] + + if len(mem_map) == 0: + return None + + # i sure hope theres not more than one map that contains the instruction lol + return mem_map[0] + + def get_regex_matching_exec_maps(self, libs: List) -> List: + '''Returns a list of tuples for current mmaps whose names match the regex of libs in the list + + This is a wrapper around ql.mem.get_mapinfo() and just filters the results by the regex of the library names and also only returns maps that are executable + + Args: + libs (List): A list of regex strings to match against the library names in the memory maps + + Returns: + List: A list of tuples that match the regex and are executable + + Examples: + >>> history.get_regex_matching_exec_maps(["libc.so", "libpthread.so"]) + >>> history.get_regex_matching_exec_maps(".*libc.*") + ''' + + # if libs is a string, convert it to a list + if isinstance(libs, str): + libs = [libs] + + # filter the history list by the library name, using a list of regex + regex = [re.compile(lib) for lib in libs] + + # filter the list of tuples + # so that we return only the ones where the library name matches the regex + regex_matching_libs = [x for x in self.ql.mem.get_mapinfo() if any([r.match(x[3]) for r in regex])] + + # filter viable_libs for items that have the executable bit set + executable_maps = [x for x in regex_matching_libs if 'x' in x[2]] + + return executable_maps \ No newline at end of file diff --git a/tests/test_history.py b/tests/test_history.py new file mode 100644 index 000000000..97d865e48 --- /dev/null +++ b/tests/test_history.py @@ -0,0 +1,81 @@ +import unittest +from qiling import Qiling +from qiling.const import QL_VERBOSE +from qiling.extensions.coverage.formats.history import History +from typing import List + +from IPython import embed + +class HistoryTest(unittest.TestCase): + + @staticmethod + def sanitize_mmap_path(mmap: List[tuple]) -> List[tuple]: + '''Removes the path from the mmap tuple so that it can be compared to other mmaps + currently because the loader is handling loading ld and the main binary, we get the annotation of the path in element 5 of the tuple (index 4) + this path is going to be dependent on the users filesystem, so it doesnt quite make sense to test for it + ''' + if isinstance(mmap, tuple): + mmap = [mmap] + + return list(map(lambda x: (x[0], x[1], x[2], x[3], ''), mmap)) + + def test_get_regex_matching_exec_maps(self): + ql = Qiling(["../examples/rootfs/x8664_linux/bin/x8664_hello"], "../examples/rootfs/x8664_linux", verbose=QL_VERBOSE.OFF) + history = History(ql) + ql.run() + + self.assertEqual([(140736278126592, 140736280121344, 'r-x', '[mmap] libc.so.6', '')], history.get_regex_matching_exec_maps(".*libc.so.*")) + + self.assertEqual( + [ + (140736278126592, 140736280121344, 'r-x', '[mmap] libc.so.6', ''), + (140737351864320, 140737352024064, 'r-x', 'ld-linux-x86-64.so.2', '') + ], + self.sanitize_mmap_path(history.get_regex_matching_exec_maps([".*libc.so.*", "ld.*"])) + ) + + del ql + + def test_get_mem_map_from_addr(self): + ql = Qiling(["../examples/rootfs/x8664_linux/bin/x8664_hello"], "../examples/rootfs/x8664_linux", verbose=QL_VERBOSE.OFF) + history = History(ql) + ql.run() + + self.assertEqual( + self.sanitize_mmap_path(history.get_mem_map_from_addr(0x7ffff7df4830))[0], + ( + 0x7ffff7dd5000, + 0x7ffff7dfc000, + 'r-x', + 'ld-linux-x86-64.so.2', + '')) + + + def test_get_ins_exclude_lib(self): + ql = Qiling(["../examples/rootfs/x8664_linux/bin/x8664_hello"], "../examples/rootfs/x8664_linux", verbose=QL_VERBOSE.OFF) + history = History(ql) + ql.run(end=0x55555555465a) + + non_libc_blocks = history.get_ins_exclude_lib(".*libc.so.*") + + # this test is going to take a while but oh well + # also assumes that the get_mem_map_from_addr function works + for block in non_libc_blocks: + map_for_ins = history.get_mem_map_from_addr(block) + self.assertNotRegex(map_for_ins[3], ".*libc.so.*") + + def test_get_ins_only_lib(self): + ql = Qiling(["../examples/rootfs/x8664_linux/bin/x8664_hello"], "../examples/rootfs/x8664_linux", verbose=QL_VERBOSE.OFF) + history = History(ql) + ql.run(end=0x55555555465a) + + non_libc_blocks = history.get_ins_only_lib(".*libc.so.*") + + # this test is going to take a while but oh well + # also assumes that the get_mem_map_from_addr function works + for block in non_libc_blocks: + map_for_ins = history.get_mem_map_from_addr(block) + self.assertRegex(map_for_ins[3], ".*libc.so.*") + +if __name__ == "__main__": + unittest.main() \ No newline at end of file From ffc3a46c83387dfd8eacff738d8ce3788e32487a Mon Sep 17 00:00:00 2001 From: me Date: Thu, 16 Feb 2023 17:35:04 -0500 Subject: [PATCH 2/2] updated some values to be in hex, removed some code that was just used for development (the ipython thing) --- tests/test_history.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/test_history.py b/tests/test_history.py index 97d865e48..6ad2d84d4 100644 --- a/tests/test_history.py +++ b/tests/test_history.py @@ -4,8 +4,6 @@ from qiling.extensions.coverage.formats.history import History from typing import List -from IPython import embed - class HistoryTest(unittest.TestCase): @staticmethod @@ -24,12 +22,12 @@ def test_get_regex_matching_exec_maps(self): history = History(ql) ql.run() - self.assertEqual([(140736278126592, 140736280121344, 'r-x', '[mmap] libc.so.6', '')], history.get_regex_matching_exec_maps(".*libc.so.*")) + self.assertEqual([(0x7fffb7dd6000, 0x7fffb7fbd000, 'r-x', '[mmap] libc.so.6', '')], history.get_regex_matching_exec_maps(".*libc.so.*")) self.assertEqual( [ - (140736278126592, 140736280121344, 'r-x', '[mmap] libc.so.6', ''), - (140737351864320, 140737352024064, 'r-x', 'ld-linux-x86-64.so.2', '') + (0x7fffb7dd6000, 0x7fffb7fbd000, 'r-x', '[mmap] libc.so.6', ''), + (0x7ffff7dd5000, 0x7ffff7dfc000, 'r-x', 'ld-linux-x86-64.so.2', '') ], self.sanitize_mmap_path(history.get_regex_matching_exec_maps([".*libc.so.*", "ld.*"])) )