diff --git a/qiling/extensions/coverage/formats/history.py b/qiling/extensions/coverage/formats/history.py index 2edd28e2a..b0415b41b 100644 --- a/qiling/extensions/coverage/formats/history.py +++ b/qiling/extensions/coverage/formats/history.py @@ -1,22 +1,33 @@ -from typing import List -from qiling import Qiling -from qiling.core_hooks_types import HookRet +from __future__ import annotations +from typing import List, Tuple, TYPE_CHECKING, Union, Optional, Any + +if TYPE_CHECKING: + from qiling import Qiling + from qiling.core_hooks_types import HookRet + from capstone import Cs + import re +from capstone import CsInsn + class History: - history_hook_handle: HookRet = None - history: List[int] = [] + history_hook_handle: HookRet + history: List[CsInsn] = [] ql: Qiling + md: Cs + arm_is_thumb: bool def __init__(self, ql: Qiling) -> None: self.ql = ql + self.md = self.ql.arch.disassembler + self.arm_is_thumb = getattr(ql.arch, 'is_thumb', False) self.track_block_coverage() def clear_history(self) -> None: """Clears the current state of the history """ - self.history = [] + self.history.clear() def clear_hooks(self) -> None: """Clears the current history hook from the Qiling instance @@ -27,19 +38,38 @@ def clear_hooks(self) -> None: self.ql.hook_del(self.history_hook_handle) + def __hook_block(self, ql: Qiling, address: int, size: int) -> Any: + ''' + The unicorn block/instruction hook function for the track_block_coverage and track_instruction_coverage functions. This just give us a way to append capstone objects to the history list + ''' + + # get the current state of the thumb mode, only applys to arm + # originally we were going to access the ql.arch.disassembler directly for all architectures from in this callback, but in the + # implementation for arch.arm.disassembler, the capstone instance is recreated every time (to make sure THUMB mode is properly dealt with) + if self.arm_is_thumb is not getattr(ql.arch, "is_thumb", False): + # the thumb mode has changed, so we need to update the disassembler + self.arm_is_thumb = not self.arm_is_thumb + self.md = self.ql.arch.disassembler + + # 0x10 is way more than enough bytes to grab a single instruction + ins_bytes = ql.mem.read(address, 0x10) + try: + self.history.append(next(self.md.disasm(ins_bytes, address))) + except StopIteration: + # if this ever happens, then the unicorn/qiling is going to crash because it tried to execute + # an instruction that it cant, so we are just not going to do anything + pass + def track_block_coverage(self) -> None: """Configures the history plugin to track all of the basic blocks that are executed. Removes any existing hooks Returns: None """ - if self.history_hook_handle: + if getattr(self, 'history_hook_handle', None): self.clear_hooks() - - def __hook_block(ql, address, size): - self.history.append(address) - self.history_hook_handle = self.ql.hook_block(__hook_block) + self.history_hook_handle = self.ql.hook_block(self.__hook_block) def track_instruction_coverage(self) -> None: """Configures the history plugin to track all of the instructions that are executed. Removes any existing hooks @@ -47,82 +77,79 @@ def track_instruction_coverage(self) -> None: Returns: None """ - if self.history_hook_handle: + if getattr(self, 'history_hook_handle', None): self.clear_hooks() - - def __hook_block(ql, address, size): - self.history.append(address) - self.history_hook_handle = self.ql.hook_code(__hook_block) + self.history_hook_handle = self.ql.hook_code(self.__hook_block) - def get_ins_only_lib(self, libs: List[str]) -> List[int]: + def get_ins_only_lib(self, libs: List[str]) -> List[CsInsn]: """Returns a list of addresses that have been executed that are only in mmaps for objects that match the regex of items in the list Args: libs (List[str]): A list of regex strings to match against the library names in the memory maps Returns: - List[int]: A list of addresses that have been executed and in the memory maps that match the regex + List[capstone.CsInsn]: A list of CsInsn that have been executed and are only in the memory maps that match the regex Examples: - >>> history.get_ins_only_lib(["libc.so", "libpthread.so"]) + >>> history.get_ins_only_lib([".*libc.so.*", ".*libpthread.so.*"]) """ executable_maps = self.get_regex_matching_exec_maps(libs) - return [x for x in self.history if any([x >= start and x <= end for start, end, _, _, _ in executable_maps])] + + return [x for x in self.history if any(start <= x.address <= end for start, end, _, _, _ in executable_maps)] - def get_ins_exclude_lib(self, libs: list) -> List: + def get_ins_exclude_lib(self, libs: List[str]) -> List[CsInsn]: '''Returns a list of history instructions that are not in the libraries that match the regex in the libs list Args: - libs (List): A list of regex strings to match against the library names in the memory maps + libs (List[str]): A list of regex strings to match against the library names in the memory maps Returns: - List: A list of addresses that have been executed and are not in the memory maps that match the regex + List[capstone.CsInsn]: A list of CsInsn that have been executed and are not in the memory maps that match the regex Examples: - >>> history.get_ins_exclude_lib(["libc.so", "libpthread.so"]) + >>> history.get_ins_exclude_lib([".*libc.so.*", ".*libpthread.so.*"]) ''' executable_maps = self.get_regex_matching_exec_maps(libs) - return [x for x in self.history if any([x < start or x > end for start, end, _, _, _ in executable_maps])] - - def get_mem_map_from_addr(self, ins: int) -> tuple: + return [h for h in self.history if not any(start <= h.address <= end for start, end, _, _, _ in executable_maps)] + + def get_mem_map_from_addr(self, ins: Union[int, CsInsn]) -> Optional[Tuple]: '''Returns the memory map that contains the instruction Args: - ins (int): The instruction address to search for + ins (Union[int, CsInsn]): The instruction address to search for, can be either an int or a capstone.CsInsn Returns: - tuple: A tuple that contains the memory map that contains the instruction + Optional[Tuple]: A tuple that contains the memory map that contains the instruction this tuple is in the format of (start_addr, end_addr, perms, name, path) Examples: >>> history.get_mem_map_from_addr(0x7ffff7dd1b97) ''' - #get the memory map that contains the instruction - mem_map = [x for x in self.ql.mem.get_mapinfo() if x[0] <= ins and x[1] >= ins] + if isinstance(ins, CsInsn): + ins = ins.address - if len(mem_map) == 0: - return None + assert isinstance(ins, int) - # i sure hope theres not more than one map that contains the instruction lol - return mem_map[0] + return next((x for x in self.ql.mem.get_mapinfo() if x[0] <= ins and x[1] >= ins), None) - def get_regex_matching_exec_maps(self, libs: List) -> List: + def get_regex_matching_exec_maps(self, libs: List[str]) -> List[Tuple]: '''Returns a list of tuples for current mmaps whose names match the regex of libs in the list - This is a wrapper around ql.mem.get_mapinfo() and just filters the results by the regex of the library names and also only returns maps that are executable + This is a wrapper around ql.mem.get_mapinfo() and just filters the results by the regex of the library names + and also only returns maps that are executable Args: - libs (List): A list of regex strings to match against the library names in the memory maps + libs (Union[str, Collection[str]]): A list of regex strings to match against the library names in the memory maps Returns: - List: A list of tuples that match the regex and are executable + List[Tuple]: A list of tuples that match the regex and are executable Examples: - >>> history.get_regex_matching_exec_maps(["libc.so", "libpthread.so"]) + >>> history.get_regex_matching_exec_maps([".*libc.so.*", ".*libpthread.so.*"]) >>> history.get_regex_matching_exec_maps(".*libc.*") ''' @@ -135,9 +162,7 @@ def get_regex_matching_exec_maps(self, libs: List) -> List: # filter the list of tuples # so that we return only the ones where the library name matches the regex - regex_matching_libs = [x for x in self.ql.mem.get_mapinfo() if any([r.match(x[3]) for r in regex])] + regex_matching_libs = [x for x in self.ql.mem.get_mapinfo() if any(r.match(x[3]) for r in regex)] # filter viable_libs for items that have the executable bit set - executable_maps = [x for x in regex_matching_libs if 'x' in x[2]] - - return executable_maps \ No newline at end of file + return [x for x in regex_matching_libs if 'x' in x[2]] diff --git a/tests/test_history.py b/tests/test_history.py index 6ad2d84d4..1aaf6dba2 100644 --- a/tests/test_history.py +++ b/tests/test_history.py @@ -62,6 +62,17 @@ def test_get_ins_exclude_lib(self): map_for_ins = history.get_mem_map_from_addr(block) self.assertNotRegex(map_for_ins[3], ".*libc.so.*") + assert len(non_libc_blocks) > 0 + + non_libc_blocks_and_ld = history.get_ins_exclude_lib([".*libc.so.*", "ld-linux.*"]) + + for block in non_libc_blocks_and_ld: + map_for_ins = history.get_mem_map_from_addr(block) + self.assertNotRegex(map_for_ins[3], ".*libc.so.*|ld-linux.*") + + assert len(non_libc_blocks_and_ld) > 0 + + def test_get_ins_only_lib(self): ql = Qiling(["../examples/rootfs/x8664_linux/bin/x8664_hello"], "../examples/rootfs/x8664_linux", verbose=QL_VERBOSE.OFF) history = History(ql) @@ -75,5 +86,15 @@ def test_get_ins_only_lib(self): map_for_ins = history.get_mem_map_from_addr(block) self.assertRegex(map_for_ins[3], ".*libc.so.*") + assert len(non_libc_blocks) > 0 + + non_libc_blocks_and_ld = history.get_ins_only_lib([".*libc.so.*", "ld-linux.*"]) + + for block in non_libc_blocks_and_ld : + map_for_ins = history.get_mem_map_from_addr(block) + self.assertRegex(map_for_ins[3], ".*libc.so.*|.*ld-linux.*") + + assert len(non_libc_blocks_and_ld) > 0 + if __name__ == "__main__": unittest.main() \ No newline at end of file