Skip to content
Merged
113 changes: 69 additions & 44 deletions qiling/extensions/coverage/formats/history.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,33 @@
from typing import List
from qiling import Qiling
from qiling.core_hooks_types import HookRet
from __future__ import annotations
from typing import List, Tuple, TYPE_CHECKING, Union, Optional, Any

if TYPE_CHECKING:
from qiling import Qiling
from qiling.core_hooks_types import HookRet
from capstone import Cs

import re
from capstone import CsInsn


class History:
history_hook_handle: HookRet = None
history: List[int] = []
history_hook_handle: HookRet
history: List[CsInsn] = []
ql: Qiling
md: Cs
arm_is_thumb: bool

def __init__(self, ql: Qiling) -> None:
self.ql = ql
self.md = self.ql.arch.disassembler
self.arm_is_thumb = getattr(ql.arch, 'is_thumb', False)
self.track_block_coverage()

def clear_history(self) -> None:
"""Clears the current state of the history

"""
self.history = []
self.history.clear()

def clear_hooks(self) -> None:
"""Clears the current history hook from the Qiling instance
Expand All @@ -27,102 +38,118 @@ def clear_hooks(self) -> None:

self.ql.hook_del(self.history_hook_handle)

def __hook_block(self, ql: Qiling, address: int, size: int) -> Any:
'''
The unicorn block/instruction hook function for the track_block_coverage and track_instruction_coverage functions. This just give us a way to append capstone objects to the history list
'''

# get the current state of the thumb mode, only applys to arm
# originally we were going to access the ql.arch.disassembler directly for all architectures from in this callback, but in the
# implementation for arch.arm.disassembler, the capstone instance is recreated every time (to make sure THUMB mode is properly dealt with)
if self.arm_is_thumb is not getattr(ql.arch, "is_thumb", False):
# the thumb mode has changed, so we need to update the disassembler
self.arm_is_thumb = not self.arm_is_thumb
self.md = self.ql.arch.disassembler

# 0x10 is way more than enough bytes to grab a single instruction
ins_bytes = ql.mem.read(address, 0x10)
try:
self.history.append(next(self.md.disasm(ins_bytes, address)))
except StopIteration:
# if this ever happens, then the unicorn/qiling is going to crash because it tried to execute
# an instruction that it cant, so we are just not going to do anything
pass

def track_block_coverage(self) -> None:
"""Configures the history plugin to track all of the basic blocks that are executed. Removes any existing hooks

Returns:
None
"""
if self.history_hook_handle:
if getattr(self, 'history_hook_handle', None):
self.clear_hooks()

def __hook_block(ql, address, size):
self.history.append(address)

self.history_hook_handle = self.ql.hook_block(__hook_block)
self.history_hook_handle = self.ql.hook_block(self.__hook_block)

def track_instruction_coverage(self) -> None:
"""Configures the history plugin to track all of the instructions that are executed. Removes any existing hooks

Returns:
None
"""
if self.history_hook_handle:
if getattr(self, 'history_hook_handle', None):
self.clear_hooks()

def __hook_block(ql, address, size):
self.history.append(address)

self.history_hook_handle = self.ql.hook_code(__hook_block)
self.history_hook_handle = self.ql.hook_code(self.__hook_block)

def get_ins_only_lib(self, libs: List[str]) -> List[int]:
def get_ins_only_lib(self, libs: List[str]) -> List[CsInsn]:
"""Returns a list of addresses that have been executed that are only in mmaps for objects that match the regex of items in the list

Args:
libs (List[str]): A list of regex strings to match against the library names in the memory maps

Returns:
List[int]: A list of addresses that have been executed and in the memory maps that match the regex
List[capstone.CsInsn]: A list of CsInsn that have been executed and are only in the memory maps that match the regex

Examples:
>>> history.get_ins_only_lib(["libc.so", "libpthread.so"])
>>> history.get_ins_only_lib([".*libc.so.*", ".*libpthread.so.*"])
"""

executable_maps = self.get_regex_matching_exec_maps(libs)
return [x for x in self.history if any([x >= start and x <= end for start, end, _, _, _ in executable_maps])]

return [x for x in self.history if any(start <= x.address <= end for start, end, _, _, _ in executable_maps)]

def get_ins_exclude_lib(self, libs: list) -> List:
def get_ins_exclude_lib(self, libs: List[str]) -> List[CsInsn]:
'''Returns a list of history instructions that are not in the libraries that match the regex in the libs list

Args:
libs (List): A list of regex strings to match against the library names in the memory maps
libs (List[str]): A list of regex strings to match against the library names in the memory maps

Returns:
List: A list of addresses that have been executed and are not in the memory maps that match the regex
List[capstone.CsInsn]: A list of CsInsn that have been executed and are not in the memory maps that match the regex

Examples:
>>> history.get_ins_exclude_lib(["libc.so", "libpthread.so"])
>>> history.get_ins_exclude_lib([".*libc.so.*", ".*libpthread.so.*"])
'''

executable_maps = self.get_regex_matching_exec_maps(libs)
return [x for x in self.history if any([x < start or x > end for start, end, _, _, _ in executable_maps])]
def get_mem_map_from_addr(self, ins: int) -> tuple:
return [h for h in self.history if not any(start <= h.address <= end for start, end, _, _, _ in executable_maps)]

def get_mem_map_from_addr(self, ins: Union[int, CsInsn]) -> Optional[Tuple]:
'''Returns the memory map that contains the instruction

Args:
ins (int): The instruction address to search for
ins (Union[int, CsInsn]): The instruction address to search for, can be either an int or a capstone.CsInsn

Returns:
tuple: A tuple that contains the memory map that contains the instruction
Optional[Tuple]: A tuple that contains the memory map that contains the instruction
this tuple is in the format of (start_addr, end_addr, perms, name, path)

Examples:
>>> history.get_mem_map_from_addr(0x7ffff7dd1b97)
'''

#get the memory map that contains the instruction
mem_map = [x for x in self.ql.mem.get_mapinfo() if x[0] <= ins and x[1] >= ins]
if isinstance(ins, CsInsn):
ins = ins.address

if len(mem_map) == 0:
return None
assert isinstance(ins, int)

# i sure hope theres not more than one map that contains the instruction lol
return mem_map[0]
return next((x for x in self.ql.mem.get_mapinfo() if x[0] <= ins and x[1] >= ins), None)

def get_regex_matching_exec_maps(self, libs: List) -> List:
def get_regex_matching_exec_maps(self, libs: List[str]) -> List[Tuple]:
'''Returns a list of tuples for current mmaps whose names match the regex of libs in the list

This is a wrapper around ql.mem.get_mapinfo() and just filters the results by the regex of the library names and also only returns maps that are executable
This is a wrapper around ql.mem.get_mapinfo() and just filters the results by the regex of the library names
and also only returns maps that are executable

Args:
libs (List): A list of regex strings to match against the library names in the memory maps
libs (Union[str, Collection[str]]): A list of regex strings to match against the library names in the memory maps

Returns:
List: A list of tuples that match the regex and are executable
List[Tuple]: A list of tuples that match the regex and are executable

Examples:
>>> history.get_regex_matching_exec_maps(["libc.so", "libpthread.so"])
>>> history.get_regex_matching_exec_maps([".*libc.so.*", ".*libpthread.so.*"])
>>> history.get_regex_matching_exec_maps(".*libc.*")
'''

Expand All @@ -135,9 +162,7 @@ def get_regex_matching_exec_maps(self, libs: List) -> List:

# filter the list of tuples
# so that we return only the ones where the library name matches the regex
regex_matching_libs = [x for x in self.ql.mem.get_mapinfo() if any([r.match(x[3]) for r in regex])]
regex_matching_libs = [x for x in self.ql.mem.get_mapinfo() if any(r.match(x[3]) for r in regex)]

# filter viable_libs for items that have the executable bit set
executable_maps = [x for x in regex_matching_libs if 'x' in x[2]]

return executable_maps
return [x for x in regex_matching_libs if 'x' in x[2]]
21 changes: 21 additions & 0 deletions tests/test_history.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,17 @@ def test_get_ins_exclude_lib(self):
map_for_ins = history.get_mem_map_from_addr(block)
self.assertNotRegex(map_for_ins[3], ".*libc.so.*")

assert len(non_libc_blocks) > 0

non_libc_blocks_and_ld = history.get_ins_exclude_lib([".*libc.so.*", "ld-linux.*"])

for block in non_libc_blocks_and_ld:
map_for_ins = history.get_mem_map_from_addr(block)
self.assertNotRegex(map_for_ins[3], ".*libc.so.*|ld-linux.*")

assert len(non_libc_blocks_and_ld) > 0


def test_get_ins_only_lib(self):
ql = Qiling(["../examples/rootfs/x8664_linux/bin/x8664_hello"], "../examples/rootfs/x8664_linux", verbose=QL_VERBOSE.OFF)
history = History(ql)
Expand All @@ -75,5 +86,15 @@ def test_get_ins_only_lib(self):
map_for_ins = history.get_mem_map_from_addr(block)
self.assertRegex(map_for_ins[3], ".*libc.so.*")

assert len(non_libc_blocks) > 0

non_libc_blocks_and_ld = history.get_ins_only_lib([".*libc.so.*", "ld-linux.*"])

for block in non_libc_blocks_and_ld :
map_for_ins = history.get_mem_map_from_addr(block)
self.assertRegex(map_for_ins[3], ".*libc.so.*|.*ld-linux.*")

assert len(non_libc_blocks_and_ld) > 0

if __name__ == "__main__":
unittest.main()