diff --git a/pyproject.toml b/pyproject.toml index 71a7d8ee2..9b2b7b75e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,7 +28,7 @@ keywords = [ [tool.poetry.dependencies] python = "^3.8" -capstone = "^4" +capstone = "^5" unicorn = "2.1.3" pefile = ">=2022.5.30" python-registry = "^1.3.1" diff --git a/qiling/debugger/__init__.py b/qiling/debugger/__init__.py index 57e0576ed..4122e4cb4 100644 --- a/qiling/debugger/__init__.py +++ b/qiling/debugger/__init__.py @@ -1,3 +1 @@ from .debugger import QlDebugger -# from .disassember import QlDisassember -# from .utils import QlReadELF diff --git a/qiling/debugger/disassember.py b/qiling/debugger/disassember.py deleted file mode 100644 index fea90563a..000000000 --- a/qiling/debugger/disassember.py +++ /dev/null @@ -1,55 +0,0 @@ -#!/usr/bin/env python3 -# -# Cross Platform and Multi Architecture Advanced Binary Emulation Framework -# - -from elftools.elf.elffile import ELFFile - -from qiling import Qiling -from qiling.const import * -from capstone import * - - -class QlDisassember(): - def __init__(self, ql:Qiling): - self.ql = ql - - def disasm_all_lines(self): - disasm_result = [] - - if self.ql.os.type == QL_OS.LINUX: - disasm_result = self.disasm_elf() - - return disasm_result - - def disasm_elf(self, seg_name='.text'): - def disasm(ql, address, size): - md = ql.arch.disassembler - md.detail = True - - return md.disasm(ql.mem.read(address, size), address) - - disasm_result = [] - if self.ql.arch.type == QL_ARCH.X86: - BASE = int(self.ql.profile.get("OS32", "load_address"), 16) - seg_start = 0x0 - seg_end = 0x0 - - f = open(self.ql.path, 'rb') - elffile = ELFFile(f) - elf_header = elffile.header - reladyn = elffile.get_section_by_name(seg_name) - - # No PIE - if elf_header['e_type'] == 'ET_EXEC': - seg_start = reladyn.header.sh_addr - seg_end = seg_start + reladyn.data_size - # PIE - elif elf_header['e_type'] == 'ET_DYN': - seg_start = BASE + reladyn.header.sh_addr - seg_end = seg_start + reladyn.data_size - - for insn in disasm(ql, seg_start, seg_end-seg_start): - disasm_result.append(insn) - - return disasm_result \ No newline at end of file diff --git a/qiling/debugger/qdb/arch/__init__.py b/qiling/debugger/qdb/arch/__init__.py index 4c5b7a385..12ed30d11 100644 --- a/qiling/debugger/qdb/arch/__init__.py +++ b/qiling/debugger/qdb/arch/__init__.py @@ -3,7 +3,6 @@ # Cross Platform and Multi Architecture Advanced Binary Emulation Framework # -from .arch_x86 import ArchX86 -from .arch_mips import ArchMIPS from .arch_arm import ArchARM, ArchCORTEX_M -from .arch_x8664 import ArchX8664 \ No newline at end of file +from .arch_intel import ArchIntel, ArchX86, ArchX64 +from .arch_mips import ArchMIPS diff --git a/qiling/debugger/qdb/arch/arch.py b/qiling/debugger/qdb/arch/arch.py index cbe6489a7..bf1aa6dfe 100644 --- a/qiling/debugger/qdb/arch/arch.py +++ b/qiling/debugger/qdb/arch/arch.py @@ -3,32 +3,81 @@ # Cross Platform and Multi Architecture Advanced Binary Emulation Framework # +from typing import Collection, Dict, Mapping, Optional, TypeVar -from qiling.const import QL_ARCH -from unicorn import UC_ERR_READ_UNMAPPED -import unicorn +T = TypeVar('T') class Arch: + """Arch base class. """ - base class for arch - """ - def __init__(self): - pass + def __init__(self, regs: Collection[str], swaps: Mapping[str, str], asize: int, isize: int) -> None: + """Initialize architecture instance. + + Args: + regs : collection of registers names to include in context + asize : native address size in bytes + isize : instruction size in bytes + swaps : readable register names alternatives, may be empty + """ + + self._regs = regs + self._swaps = swaps + self._asize = asize + self._isize = isize @property - def arch_insn_size(self): - return 4 + def regs(self) -> Collection[str]: + """Collection of registers names. + """ + + return self._regs @property - def archbit(self): - return 4 + def isize(self) -> int: + """Native instruction size. + """ + + return self._isize + + @property + def asize(self) -> int: + """Native pointer size. + """ + + return self._asize + + def swap_regs(self, mapping: Mapping[str, T]) -> Dict[str, T]: + """Swap default register names with their aliases. + + Args: + mapping: regsiters names mapped to their values + + Returns: a new dictionary where all swappable names were swapped with their aliases + """ + + return {self._swaps.get(k, k): v for k, v in mapping.items()} + + def unalias(self, name: str) -> str: + """Get original register name for the specified alias. + + Args: + name: aliaes register name + + Returns: original name of aliased register, or same name if not an alias + """ + + # perform a reversed lookup in swaps to find the original name for given alias + return next((org for org, alt in self._swaps.items() if name == alt), name) + + def read_insn(self, address: int) -> Optional[bytearray]: + """Read a single instruction from given address. + + Args: + address: memory address to read from - def read_insn(self, address: int): - try: - result = self.read_mem(address, self.arch_insn_size) - except unicorn.unicorn.UcError as err: - result = None + Returns: instruction bytes, or None if memory could not be read + """ - return result + return self.try_read_mem(address, self.isize) diff --git a/qiling/debugger/qdb/arch/arch_arm.py b/qiling/debugger/qdb/arch/arch_arm.py index ed2e797c4..cbf63c2ad 100644 --- a/qiling/debugger/qdb/arch/arch_arm.py +++ b/qiling/debugger/qdb/arch/arch_arm.py @@ -3,105 +3,141 @@ # Cross Platform and Multi Architecture Advanced Binary Emulation Framework # -from typing import Mapping +from typing import Dict, Optional from .arch import Arch + class ArchARM(Arch): - def __init__(self): - super().__init__() - self._regs = ( - "r0", "r1", "r2", "r3", - "r4", "r5", "r6", "r7", - "r8", "r9", "r10", "r11", - "r12", "sp", "lr", "pc", - ) + def __init__(self) -> None: + regs = ( + 'r0', 'r1', 'r2', 'r3', + 'r4', 'r5', 'r6', 'r7', + 'r8', 'r9', 'r10', 'r11', + 'r12', 'sp', 'lr', 'pc' + ) + + aliases = { + 'r9' : 'sb', + 'r10': 'sl', + 'r12': 'ip', + 'r11': 'fp' + } + + asize = 4 + isize = 4 + + super().__init__(regs, aliases, asize, isize) + + @staticmethod + def get_flags(bits: int) -> Dict[str, bool]: + return { + 'thumb': bits & (0b1 << 5) != 0, + 'fiq': bits & (0b1 << 6) != 0, + 'irq': bits & (0b1 << 7) != 0, + 'overflow': bits & (0b1 << 28) != 0, + 'carry': bits & (0b1 << 29) != 0, + 'zero': bits & (0b1 << 30) != 0, + 'neg': bits & (0b1 << 31) != 0 + } + + @staticmethod + def get_mode(bits: int) -> str: + modes = { + 0b10000: 'User', + 0b10001: 'FIQ', + 0b10010: 'IRQ', + 0b10011: 'Supervisor', + 0b10110: 'Monitor', + 0b10111: 'Abort', + 0b11010: 'Hypervisor', + 0b11011: 'Undefined', + 0b11111: 'System' + } + + return modes.get(bits & 0b11111, '?') @property - def regs(self): - return self._regs + def is_thumb(self) -> bool: + """Query whether the processor is currently in thumb mode. + """ - @regs.setter - def regs(self, regs): - self._regs += regs + return self.ql.arch.is_thumb @property - def regs_need_swapped(self): - return { - "sl": "r10", - "ip": "r12", - "fp": "r11", - } + def isize(self) -> int: + return 2 if self.is_thumb else self._isize @staticmethod - def get_flags(bits: int) -> Mapping[str, bool]: - """ - get flags for ARM + def __is_wide_insn(data: bytes) -> bool: + """Determine whether a sequence of bytes respresents a wide thumb instruction. """ - def get_mode(bits: int) -> int: - """ - get operating mode for ARM - """ - return { - 0b10000: "User", - 0b10001: "FIQ", - 0b10010: "IRQ", - 0b10011: "Supervisor", - 0b10110: "Monitor", - 0b10111: "Abort", - 0b11010: "Hypervisor", - 0b11011: "Undefined", - 0b11111: "System", - }.get(bits & 0x00001f) + assert len(data) in (2, 4), f'unexpected instruction length: {len(data)}' - return { - "mode": get_mode(bits), - "thumb": bits & 0x00000020 != 0, - "fiq": bits & 0x00000040 != 0, - "irq": bits & 0x00000080 != 0, - "neg": bits & 0x80000000 != 0, - "zero": bits & 0x40000000 != 0, - "carry": bits & 0x20000000 != 0, - "overflow": bits & 0x10000000 != 0, - } + # determine whether this is a wide instruction by inspecting the 5 most + # significant bits in the first half-word + return (data[1] >> 3) & 0b11111 in (0b11101, 0b11110, 0b11111) - @property - def thumb_mode(self) -> bool: - """ - helper function for checking thumb mode + def __read_thumb_insn_fail(self, address: int) -> Optional[bytearray]: + """A failsafe method for reading thumb instructions. This method is needed for + rare cases in which a narrow instruction is on a page boundary where the next + page is unavailable. """ - return self.ql.arch.is_thumb + lo_half = self.try_read_mem(address, 2) + if lo_half is None: + return None - def read_insn(self, address: int) -> bytes: - """ - read instruction depending on current operating mode + data = lo_half + + if ArchARM.__is_wide_insn(data): + hi_half = self.try_read_mem(address + 2, 2) + + # fail if higher half-word was required but could not be read + if hi_half is None: + return None + + data.extend(hi_half) + + return data + + def __read_thumb_insn(self, address: int) -> Optional[bytearray]: + """Read one instruction in thumb mode. + + Thumb instructions may be either 2 or 4 bytes long, depending on encoding of + the first word. However, reading two chunks of two bytes each is slower. For + most cases reading all four bytes in advance will be safe and quicker. """ - def thumb_read(address: int) -> bytes: + data = self.try_read_mem(address, 4) - first_two = self.ql.mem.read_ptr(address, 2) - result = self.ql.pack16(first_two) + if data is None: + # there is a slight chance we could not read 4 bytes because only 2 + # are available. try the failsafe method to find out + return self.__read_thumb_insn_fail(address) - # to judge it's thumb mode or not - if any([ - first_two & 0xf000 == 0xf000, - first_two & 0xf800 == 0xf800, - first_two & 0xe800 == 0xe800, - ]): + if ArchARM.__is_wide_insn(data): + return data - latter_two = self.ql.mem.read_ptr(address+2, 2) - result += self.ql.pack16(latter_two) + return data[:2] - return result + def read_insn(self, address: int) -> Optional[bytearray]: + """Read one instruction worth of bytes. + """ - return super().read_insn(address) if not self.thumb_mode else thumb_read(address) + if self.is_thumb: + return self.__read_thumb_insn(address) + return super().read_insn(address) class ArchCORTEX_M(ArchARM): def __init__(self): super().__init__() - self.regs += ("xpsr", "control", "primask", "basepri", "faultmask") + + self._regs += ( + 'xpsr', 'control', 'primask', + 'basepri', 'faultmask' + ) diff --git a/qiling/debugger/qdb/arch/arch_intel.py b/qiling/debugger/qdb/arch/arch_intel.py new file mode 100644 index 000000000..986309e02 --- /dev/null +++ b/qiling/debugger/qdb/arch/arch_intel.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 +# +# Cross Platform and Multi Architecture Advanced Binary Emulation Framework +# + +from typing import Collection, Dict + +from .arch import Arch + + +class ArchIntel(Arch): + """Arch base class for Intel architecture. + """ + + def __init__(self, regs: Collection[str], asize: int) -> None: + super().__init__(regs, {}, asize, 15) + + @staticmethod + def get_flags(bits: int) -> Dict[str, bool]: + return { + 'CF' : bits & (0b1 << 0) != 0, # carry + 'PF' : bits & (0b1 << 2) != 0, # parity + 'AF' : bits & (0b1 << 4) != 0, # adjust + 'ZF' : bits & (0b1 << 6) != 0, # zero + 'SF' : bits & (0b1 << 7) != 0, # sign + 'IF' : bits & (0b1 << 9) != 0, # interrupt enable + 'DF' : bits & (0b1 << 10) != 0, # direction + 'OF' : bits & (0b1 << 11) != 0 # overflow + } + + @staticmethod + def get_iopl(bits: int) -> int: + return bits & (0b11 << 12) + + +class ArchX86(ArchIntel): + def __init__(self) -> None: + regs = ( + 'eax', 'ebx', 'ecx', 'edx', + 'ebp', 'esp', 'esi', 'edi', + 'eip', 'eflags' ,'ss', 'cs', + 'ds', 'es', 'fs', 'gs' + ) + + super().__init__(regs, 4) + + +class ArchX64(ArchIntel): + def __init__(self) -> None: + regs = ( + 'rax', 'rbx', 'rcx', 'rdx', + 'rbp', 'rsp', 'rsi', 'rdi', + 'r8', 'r9', 'r10', 'r11', + 'r12', 'r13', 'r14', 'r15', + 'rip', 'eflags', 'ss', 'cs', + 'ds', 'es', 'fs', 'gs' + ) + + super().__init__(regs, 8) diff --git a/qiling/debugger/qdb/arch/arch_mips.py b/qiling/debugger/qdb/arch/arch_mips.py index d262b0a90..52d7d8fcd 100644 --- a/qiling/debugger/qdb/arch/arch_mips.py +++ b/qiling/debugger/qdb/arch/arch_mips.py @@ -3,29 +3,27 @@ # Cross Platform and Multi Architecture Advanced Binary Emulation Framework # - - from .arch import Arch + class ArchMIPS(Arch): - def __init__(self): - super().__init__() + def __init__(self) -> None: + regs = ( + 'gp', 'at', 'v0', 'v1', + 'a0', 'a1', 'a2', 'a3', + 't0', 't1', 't2', 't3', + 't4', 't5', 't6', 't7', + 't8', 't9', 'sp', 's8', + 's0', 's1', 's2', 's3', + 's4', 's5', 's6', 's7', + 'ra', 'k0', 'k1', 'pc' + ) + + aliases = { + 's8': 'fp' + } - @property - def regs(self): - return ( - "gp", "at", "v0", "v1", - "a0", "a1", "a2", "a3", - "t0", "t1", "t2", "t3", - "t4", "t5", "t6", "t7", - "t8", "t9", "sp", "s8", - "s0", "s1", "s2", "s3", - "s4", "s5", "s6", "s7", - "ra", "k0", "k1", "pc", - ) + asize = 4 + isize = 4 - @property - def regs_need_swapped(self): - return { - "fp": "s8", - } + super().__init__(regs, aliases, asize, isize) diff --git a/qiling/debugger/qdb/arch/arch_x86.py b/qiling/debugger/qdb/arch/arch_x86.py deleted file mode 100644 index 10617cbd1..000000000 --- a/qiling/debugger/qdb/arch/arch_x86.py +++ /dev/null @@ -1,47 +0,0 @@ -#!/usr/bin/env python3 -# -# Cross Platform and Multi Architecture Advanced Binary Emulation Framework -# - -from typing import Mapping - -from .arch import Arch - -class ArchX86(Arch): - def __init__(self): - super().__init__() - - @property - def arch_insn_size(self): - return 15 - - @property - def regs(self): - return ( - "eax", "ebx", "ecx", "edx", - "esp", "ebp", "esi", "edi", - "eip", "ss", "cs", "ds", "es", - "fs", "gs", "eflags", - ) - - def read_insn(self, address: int) -> bytes: - # due to the variadic lengh of x86 instructions ( 1~15 ) - # always assume the maxium size for disassembler to tell - # what is it exactly. - - return self.read_mem(address, self.arch_insn_size) - - @staticmethod - def get_flags(bits: int) -> Mapping[str, bool]: - """ - get flags from ql.reg.eflags - """ - - return { - "CF" : bits & 0x0001 != 0, # CF, carry flag - "PF" : bits & 0x0004 != 0, # PF, parity flag - "AF" : bits & 0x0010 != 0, # AF, adjust flag - "ZF" : bits & 0x0040 != 0, # ZF, zero flag - "SF" : bits & 0x0080 != 0, # SF, sign flag - "OF" : bits & 0x0800 != 0, # OF, overflow flag - } diff --git a/qiling/debugger/qdb/arch/arch_x8664.py b/qiling/debugger/qdb/arch/arch_x8664.py deleted file mode 100644 index 686e2016e..000000000 --- a/qiling/debugger/qdb/arch/arch_x8664.py +++ /dev/null @@ -1,66 +0,0 @@ -#!/usr/bin/env python3 -# -# Cross Platform and Multi Architecture Advanced Binary Emulation Framework -# - -from typing import Mapping - -from .arch import Arch - -class ArchX8664(Arch): - ''' - This is currently mostly just a copy of x86 - other than the size of archbits. Some of this may be wrong. - ''' - - def __init__(self): - super().__init__() - - @property - def arch_insn_size(self): - ''' - Architecture maximum instruction size. x86_64 instructions are a maximum size of 15 bytes. - - @returns bytes - ''' - - return 15 - - @property - def regs(self): - return ( - "rax", "rbx", "rcx", "rdx", - "rsp", "rbp", "rsi", "rdi", - "rip", "r8", "r9", "r10", - "r11", "r12", "r13", "r14", - "r15", "ss", "cs", "ds", "es", - "fs", "gs", "eflags" - ) - - @property - def archbit(self): - ''' - Architecture maximum register size. x86 is a maximum of 4 bytes. - - @returns bytes - ''' - - return 8 - - def read_insn(self, address: int) -> bytes: - # Due to the variadicc length of x86 instructions - # always assume the maximum size for disassembler to tell - # what it is. - - return self.read_mem(address, self.arch_insn_size) - - @staticmethod - def get_flags(bits: int) -> Mapping[str, bool]: - - return { - "CF" : bits & 0x0001 != 0, # CF, carry flag - "PF" : bits & 0x0004 != 0, # PF, parity flag - "AF" : bits & 0x0010 != 0, # AF, adjust flag - "ZF" : bits & 0x0040 != 0, # ZF, zero flag - "SF" : bits & 0x0080 != 0, # SF, sign flag - "OF" : bits & 0x0800 != 0, # OF, overflow flag - } diff --git a/qiling/debugger/qdb/branch_predictor/__init__.py b/qiling/debugger/qdb/branch_predictor/__init__.py index 5004ec348..670f65347 100644 --- a/qiling/debugger/qdb/branch_predictor/__init__.py +++ b/qiling/debugger/qdb/branch_predictor/__init__.py @@ -4,7 +4,13 @@ # from .branch_predictor import BranchPredictor -from .branch_predictor_x86 import BranchPredictorX86 -from .branch_predictor_mips import BranchPredictorMIPS from .branch_predictor_arm import BranchPredictorARM, BranchPredictorCORTEX_M -from .branch_predictor_x8664 import BranchPredictorX8664 +from .branch_predictor_intel import BranchPredictorX86, BranchPredictorX64 +from .branch_predictor_mips import BranchPredictorMIPS + +__all__ = [ + 'BranchPredictor', + 'BranchPredictorARM', 'BranchPredictorCORTEX_M', + 'BranchPredictorX86', 'BranchPredictorX64', + 'BranchPredictorMIPS' +] diff --git a/qiling/debugger/qdb/branch_predictor/branch_predictor.py b/qiling/debugger/qdb/branch_predictor/branch_predictor.py index 713661501..9ee1466e5 100644 --- a/qiling/debugger/qdb/branch_predictor/branch_predictor.py +++ b/qiling/debugger/qdb/branch_predictor/branch_predictor.py @@ -4,37 +4,82 @@ # from abc import abstractmethod +from typing import ClassVar, NamedTuple, Optional + +from capstone import CS_GRP_JUMP, CS_GRP_CALL, CS_GRP_RET, CS_GRP_BRANCH_RELATIVE + from ..context import Context +from ..misc import InvalidInsn -class Prophecy: +class Prophecy(NamedTuple): + """Simple container for storing prediction results. """ - container for storing result of the predictor - @going: indicate the certian branch will be taken or not - @where: where will it go if going is true + + going: bool + """Indicate whether the certian branch is taken or not. """ - def __init__(self): - self.going = False - self.where = None + where: Optional[int] + """Branch target in case it is taken. + Target may be `None` if it should have been read from memory, but that memory location + could not be reached. + """ - def __iter__(self): - return iter((self.going, self.where)) class BranchPredictor(Context): + """Branch predictor base class. """ - Base class for predictor + + stop: ClassVar[str] + """Instruction mnemonic that can be used to determine program's end. """ - def read_reg(self, reg_name): + def has_ended(self) -> bool: + """Determine whether the program has ended by inspecting the currnet instruction. + """ + + insn = self.disasm_lite(self.cur_addr) + + if not insn: + return False + + # (address, size, mnemonic, op_str) + return insn[2] == self.stop + + def is_branch(self) -> bool: + """Determine whether the current instruction is a branching instruction. + This does not provide indication whether the branch is going to be taken or not. """ - read specific register value + + insn = self.disasm(self.cur_addr, True) + + # invalid instruction; definitely not a branch + if isinstance(insn, InvalidInsn): + return False + + branching = ( + CS_GRP_JUMP, + CS_GRP_CALL, + CS_GRP_RET, + CS_GRP_BRANCH_RELATIVE + ) + + return any(grp in branching for grp in insn.groups) + + def is_fcall(self) -> bool: + """Determine whether the current instruction is a function call. """ - return self.ql.arch.regs.read(reg_name) + insn = self.disasm(self.cur_addr, True) + + # invalid instruction; definitely not a function call + if isinstance(insn, InvalidInsn): + return False + + return insn.group(CS_GRP_CALL) @abstractmethod def predict(self) -> Prophecy: - """ - Try to predict certian branch will be taken or not based on current context + """Predict whether a certian branch will be taken or not based on current context. """ diff --git a/qiling/debugger/qdb/branch_predictor/branch_predictor_arm.py b/qiling/debugger/qdb/branch_predictor/branch_predictor_arm.py index bb5cd0f61..ea5dde0ec 100644 --- a/qiling/debugger/qdb/branch_predictor/branch_predictor_arm.py +++ b/qiling/debugger/qdb/branch_predictor/branch_predictor_arm.py @@ -3,255 +3,264 @@ # Cross Platform and Multi Architecture Advanced Binary Emulation Framework # +from typing import Callable, Dict, List, Optional, Tuple +from capstone import CS_OP_IMM, CS_OP_MEM, CS_OP_REG +from capstone.arm import ArmOp, ArmOpMem +from capstone.arm_const import ( + ARM_CC_EQ, ARM_CC_NE, ARM_CC_HS, ARM_CC_LO, + ARM_CC_MI, ARM_CC_PL, ARM_CC_VS, ARM_CC_VC, + ARM_CC_HI, ARM_CC_LS, ARM_CC_GE, ARM_CC_LT, + ARM_CC_GT, ARM_CC_LE, ARM_CC_AL +) -from .branch_predictor import * -from ..arch import ArchARM -from ..misc import read_int +from unicorn.arm_const import UC_ARM_REG_PC +from .branch_predictor import BranchPredictor, Prophecy +from ..arch import ArchARM +from ..misc import InvalidInsn class BranchPredictorARM(BranchPredictor, ArchARM): + """Branch Predictor for ARM. """ - predictor for ARM - """ - - def __init__(self, ql): - super().__init__(ql) - ArchARM.__init__(self) - self.INST_SIZE = 4 - self.THUMB_INST_SIZE = 2 - self.CODE_END = "udf" + stop = 'udf' - def read_reg(self, reg_name): - reg_name = reg_name.replace("ip", "r12").replace("fp", "r11") - return getattr(self.ql.arch.regs, reg_name) - - def regdst_eq_pc(self, op_str): - return op_str.partition(", ")[0] == "pc" - - @staticmethod - def get_cpsr(bits: int) -> (bool, bool, bool, bool): + def get_cpsr(self) -> Tuple[bool, bool, bool, bool]: + """Get flags map of CPSR. """ - get flags from ql.reg.cpsr - """ - return ( - bits & 0x10000000 != 0, # V, overflow flag - bits & 0x20000000 != 0, # C, carry flag - bits & 0x40000000 != 0, # Z, zero flag - bits & 0x80000000 != 0, # N, sign flag - ) - - def predict(self, pref_addr=None): - prophecy = Prophecy() - cur_addr = self.cur_addr if pref_addr is None else pref_addr - line = self.disasm(cur_addr) - - if line.mnemonic == self.CODE_END: # indicates program exited - prophecy.where = True - return prophecy - - jump_table = { - # unconditional branch - "b" : (lambda *_: True), - "bl" : (lambda *_: True), - "bx" : (lambda *_: True), - "blx" : (lambda *_: True), - "b.w" : (lambda *_: True), - - # branch on equal, Z == 1 - "beq" : (lambda V, C, Z, N: Z == 1), - "bxeq" : (lambda V, C, Z, N: Z == 1), - "beq.w": (lambda V, C, Z, N: Z == 1), - - # branch on not equal, Z == 0 - "bne" : (lambda V, C, Z, N: Z == 0), - "bxne" : (lambda V, C, Z, N: Z == 0), - "bne.w": (lambda V, C, Z, N: Z == 0), - - # branch on signed greater than, Z == 0 and N == V - "bgt" : (lambda V, C, Z, N: (Z == 0 and N == V)), - "bgt.w": (lambda V, C, Z, N: (Z == 0 and N == V)), - - # branch on signed less than, N != V - "blt" : (lambda V, C, Z, N: N != V), - - # branch on signed greater than or equal, N == V - "bge" : (lambda V, C, Z, N: N == V), - - # branch on signed less than or queal - "ble" : (lambda V, C, Z, N: Z == 1 or N != V), - - # branch on unsigned higher or same (or carry set), C == 1 - "bhs" : (lambda V, C, Z, N: C == 1), - "bcs" : (lambda V, C, Z, N: C == 1), - - # branch on unsigned lower (or carry clear), C == 0 - "bcc" : (lambda V, C, Z, N: C == 0), - "blo" : (lambda V, C, Z, N: C == 0), - "bxlo" : (lambda V, C, Z, N: C == 0), - "blo.w": (lambda V, C, Z, N: C == 0), - - # branch on negative or minus, N == 1 - "bmi" : (lambda V, C, Z, N: N == 1), - - # branch on positive or plus, N == 0 - "bpl" : (lambda V, C, Z, N: N == 0), - - # branch on signed overflow - "bvs" : (lambda V, C, Z, N: V == 1), - - # branch on no signed overflow - "bvc" : (lambda V, C, Z, N: V == 0), - - # branch on unsigned higher - "bhi" : (lambda V, C, Z, N: (Z == 0 and C == 1)), - "bxhi" : (lambda V, C, Z, N: (Z == 0 and C == 1)), - "bhi.w": (lambda V, C, Z, N: (Z == 0 and C == 1)), - - # branch on unsigned lower - "bls" : (lambda V, C, Z, N: (C == 0 or Z == 1)), - "bls.w": (lambda V, C, Z, N: (C == 0 or Z == 1)), - } - cb_table = { - # branch on equal to zero - "cbz" : (lambda r: r == 0), + cpsr = self.read_reg('cpsr') - # branch on not equal to zero - "cbnz": (lambda r: r != 0), - } + return ( + (cpsr & (0b1 << 28)) != 0, # V, overflow flag + (cpsr & (0b1 << 29)) != 0, # C, carry flag + (cpsr & (0b1 << 30)) != 0, # Z, zero flag + (cpsr & (0b1 << 31)) != 0 # N, sign flag + ) - if line.mnemonic in jump_table: - prophecy.going = jump_table.get(line.mnemonic)(*self.get_cpsr(self.ql.arch.regs.cpsr)) + def predict(self) -> Prophecy: + insn = self.disasm(self.cur_addr, True) - elif line.mnemonic in cb_table: - prophecy.going = cb_table.get(line.mnemonic)(self.read_reg(line.op_str.split(", ")[0])) + going = False + where = 0 - if prophecy.going: - if "#" in line.op_str: - prophecy.where = read_int(line.op_str.split("#")[-1]) - else: - prophecy.where = self.read_reg(line.op_str) + # invalid instruction; nothing to predict + if isinstance(insn, InvalidInsn): + return Prophecy(going, where) - if self.regdst_eq_pc(line.op_str): - next_addr = cur_addr + line.size - n2_addr = next_addr + len(self.read_insn(next_addr)) - prophecy.where += len(self.read_insn(n2_addr)) + len(self.read_insn(next_addr)) + # iname is the instruction's basename stripped from all optional suffixes. + # this greatly simplifies the case handling + iname: str = insn.insn_name() or '' + operands: List[ArmOp] = insn.operands - elif line.mnemonic.startswith("it"): - # handle IT block here + # branch instructions + branches = ('b', 'bl', 'bx', 'blx') - cond_met = { - "eq": lambda V, C, Z, N: (Z == 1), - "ne": lambda V, C, Z, N: (Z == 0), - "ge": lambda V, C, Z, N: (N == V), - "hs": lambda V, C, Z, N: (C == 1), - "lo": lambda V, C, Z, N: (C == 0), - "mi": lambda V, C, Z, N: (N == 1), - "pl": lambda V, C, Z, N: (N == 0), - "ls": lambda V, C, Z, N: (C == 0 or Z == 1), - "le": lambda V, C, Z, N: (Z == 1 or N != V), - "hi": lambda V, C, Z, N: (Z == 0 and C == 1), - }.get(line.op_str)(*self.get_cpsr(self.ql.arch.regs.cpsr)) + # reg-based conditional branches + conditional_reg: Dict[str, Callable[[int], bool]] = { + 'cbz' : lambda r: r == 0, + 'cbnz': lambda r: r != 0 + } - it_block_range = [each_char for each_char in line.mnemonic[1:]] + def __read_reg(reg: int) -> Optional[int]: + """[internal] Read register value where register is provided as a Unicorn constant. + """ - next_addr = cur_addr + self.THUMB_INST_SIZE - for each in it_block_range: - _insn = self.read_insn(next_addr) - n2_addr = self.predict(ql, next_addr) + # name will be None in case of an invalid register. this is expected in some cases + # and should not raise an exception, but rather silently dropped + name = insn.reg_name(reg) - if (cond_met and each == "t") or (not cond_met and each == "e"): - if n2_addr != (next_addr+len(_insn)): # branch detected - break + # pc reg value needs adjustment + adj = (2 * self.isize) if reg == UC_ARM_REG_PC else 0 - next_addr += len(_insn) + return name and self.read_reg(self.unalias(name)) + adj - prophecy.where = next_addr + def __read_mem(mem: ArmOpMem, size: int = 0, *, signed: bool = False) -> Optional[int]: + """[internal] Attempt to read memory contents. By default memory accesses are in + native size and values are unsigned. + """ - elif line.mnemonic in ("ldr",): + base = __read_reg(mem.base) or 0 + index = __read_reg(mem.index) or 0 + scale = mem.scale + disp = mem.disp - if self.regdst_eq_pc(line.op_str): - _, _, rn_offset = line.op_str.partition(", ") - r, _, imm = rn_offset.strip("[]!").partition(", #") + return self.try_read_pointer(base + index * scale + disp, size, signed=signed) - if "]" in rn_offset.split(", ")[1]: # pre-indexed immediate - prophecy.where = self.unpack32(self.read_mem(read_int(imm) + self.read_reg(r), self.INST_SIZE)) + def __parse_op(op: ArmOp, *args, **kwargs) -> Optional[int]: + """[internal] Parse an operand and return its value. Register references will be + substitued with the corresponding register value, while memory dereferences will + be substitued by the effective address they refer to. + """ - else: # post-indexed immediate - # FIXME: weired behavior, immediate here does not apply - prophecy.where = self.unpack32(self.read_mem(self.read_reg(r), self.INST_SIZE)) + if op.type == CS_OP_REG: + value = __read_reg(op.reg) - elif line.mnemonic in ("addls", "addne", "add") and self.regdst_eq_pc(line.op_str): - V, C, Z, N = self.get_cpsr(self.ql.arch.regs.cpsr) - r0, r1, r2, *imm = line.op_str.split(", ") + elif op.type == CS_OP_IMM: + value = op.imm - # program counter is awalys 8 bytes ahead when it comes with pc, need to add extra 8 bytes - extra = 8 if 'pc' in (r0, r1, r2) else 0 + elif op.type == CS_OP_MEM: + value = __read_mem(op.mem, *args, **kwargs) - if imm: - expr = imm[0].split() - # TODO: should support more bit shifting and rotating operation - if expr[0] == "lsl": # logical shift left - n = read_int(expr[-1].strip("#")) * 2 + else: + # we are not expecting any other operand type, including floating point (CS_OP_FP) + raise RuntimeError(f'unexpected operand type: {op.type}') + + # LSR + if op.shift.type == 1: + value *= (1 >> op.shift.value) + + # LSL + elif op.shift.type == 2: + value *= (1 << op.shift.value) + + # ROR ? + + return value + + def __is_taken(cc: int) -> Tuple[bool, Tuple[bool, ...]]: + pred = predicate[cc] + cpsr = self.get_cpsr() + + return pred(*cpsr), cpsr + + # conditions predicate selector + predicate: Dict[int, Callable[..., bool]] = { + ARM_CC_EQ: lambda V, C, Z, N: Z, + ARM_CC_NE: lambda V, C, Z, N: not Z, + ARM_CC_HS: lambda V, C, Z, N: C, + ARM_CC_LO: lambda V, C, Z, N: not C, + ARM_CC_MI: lambda V, C, Z, N: N, + ARM_CC_PL: lambda V, C, Z, N: not N, + ARM_CC_VS: lambda V, C, Z, N: V, + ARM_CC_VC: lambda V, C, Z, N: not V, + ARM_CC_HI: lambda V, C, Z, N: (not Z) and C, + ARM_CC_LS: lambda V, C, Z, N: (not C) or Z, + ARM_CC_GE: lambda V, C, Z, N: (N == V), + ARM_CC_LT: lambda V, C, Z, N: (N != V), + ARM_CC_GT: lambda V, C, Z, N: not Z and (N == V), + ARM_CC_LE: lambda V, C, Z, N: Z or (N != V), + ARM_CC_AL: lambda V, C, Z, N: True + } + + # implementation of simple binary arithmetic and bitwise operations + binop: Dict[str, Callable[[int, int, int], int]] = { + 'add': lambda a, b, _: a + b, + 'adc': lambda a, b, c: a + b + c, + 'sub': lambda a, b, _: a - b, + 'rsb': lambda a, b, _: b - a, + 'sbc': lambda a, b, c: a - b - (1 - c), + 'rsc': lambda a, b, c: b - a - (1 - c), + 'mul': lambda a, b, _: a * b, + 'and': lambda a, b, _: a & b, + 'orr': lambda a, b, _: a | b, + 'eor': lambda a, b, _: a ^ b + } + + # is this a branch? + if iname in branches: + going, _ = __is_taken(insn.cc) + + if going: + where = __parse_op(operands[0]) + + return Prophecy(going, where) + + if iname in conditional_reg: + is_taken = conditional_reg[iname] + reg = __parse_op(operands[0]) + assert reg is not None, 'unrecognized reg' + + going = is_taken(reg) + + if going: + where = __parse_op(operands[1]) + + return Prophecy(going, where) + + # instruction is not a branch; check whether pc is affected by this instruction. + # + # insn.regs_write doesn't work well, so we use insn.regs_access instead + if UC_ARM_REG_PC in insn.regs_access()[1]: + + if iname == 'mov': + going = True + where = __parse_op(operands[1]) + + elif iname.startswith('ldr'): + suffix: str = insn.mnemonic[3:] + + # map possible ldr suffixes to kwargs required for the memory access. + # + # to improve readability we also address the case where ldr has no suffix + # and no special kwargs are required. all strings start with '', so it + # serves as a safe default case + msize: Dict[str, Dict] = { + 'b' : {'size': 1, 'signed': False}, + 'h' : {'size': 2, 'signed': False}, + 'sb': {'size': 1, 'signed': True}, + 'sh': {'size': 2, 'signed': True}, + '' : {} + } - if line.mnemonic == "addls" and (C == 0 or Z == 1): - prophecy.where = extra + self.read_reg(r1) + self.read_reg(r2) * n + # ldr has different variations that affect the memory access size and + # whether the value should be signed or not. + suffix = next(s for s in msize if suffix.startswith(s)) - elif line.mnemonic == "add" or (line.mnemonic == "addne" and Z == 0): - prophecy.where = extra + self.read_reg(r1) + (self.read_reg(r2) * n if imm else self.read_reg(r2)) + going, _ = __is_taken(insn.cc) - elif line.mnemonic in ("tbh", "tbb"): + if going: + where = __parse_op(operands[1], **msize[suffix]) - cur_addr += self.INST_SIZE - r0, r1, *imm = line.op_str.strip("[]").split(", ") + elif iname in binop: + going, cpsr = __is_taken(insn.cc) - if imm: - expr = imm[0].split() - if expr[0] == "lsl": # logical shift left - n = read_int(expr[-1].strip("#")) * 2 + if going: + operator = binop[iname] + op1 = __parse_op(operands[1]) + op2 = __parse_op(operands[2]) + carry = int(cpsr[1]) - if line.mnemonic == "tbh": + where = (op1 and op2) and operator(op1, op2, carry) - r1 = self.read_reg(r1) * n + elif iname == 'pop': + going, _ = __is_taken(insn.cc) - elif line.mnemonic == "tbb": + if going: + # find pc position within pop regs list + idx = next(i for i, op in enumerate(operands) if (op.type == CS_OP_REG) and (op.reg == UC_ARM_REG_PC)) - r1 = self.read_reg(r1) + # read the corresponding stack entry + where = self.ql.stack_read(idx * self.asize) - to_add = int.from_bytes(self.read_mem(cur_addr+r1, 2 if line.mnemonic == "tbh" else 1), byteorder="little") * n - prophecy.where = cur_addr + to_add + else: + # left here for users to provide feedback when encountered + raise RuntimeWarning(f'instruction affects pc but was not considered: {insn.mnemonic}') - elif line.mnemonic.startswith("pop") and "pc" in line.op_str: + # for some reason capstone does not consider pc to be affected by 'tbb' and 'tbh' + # so we need to test for them specifically - prophecy.where = self.ql.stack_read(line.op_str.strip("{}").split(", ").index("pc") * self.INST_SIZE) - if not { # step to next instruction if cond does not meet - "pop" : lambda *_: True, - "pop.w": lambda *_: True, - "popeq": lambda V, C, Z, N: (Z == 1), - "popne": lambda V, C, Z, N: (Z == 0), - "pophi": lambda V, C, Z, N: (C == 1), - "popge": lambda V, C, Z, N: (N == V), - "poplt": lambda V, C, Z, N: (N != V), - }.get(line.mnemonic)(*self.get_cpsr(self.ql.arch.regs.cpsr)): + # table branch byte + elif iname == 'tbb': + offset = __read_mem(operands[0].mem, 1) + pc = __read_reg(UC_ARM_REG_PC) - prophecy.where = cur_addr + self.INST_SIZE + going = True + where = (offset and pc) and (pc + offset * 2) - elif line.mnemonic == "sub" and self.regdst_eq_pc(line.op_str): - _, r, imm = line.op_str.split(", ") - prophecy.where = self.read_reg(r) - read_int(imm.strip("#")) + # table branch half-word + elif iname == 'tbh': + offset = __read_mem(operands[0].mem, 2) + pc = __read_reg(UC_ARM_REG_PC) - elif line.mnemonic == "mov" and self.regdst_eq_pc(line.op_str): - _, r = line.op_str.split(", ") - prophecy.where = self.read_reg(r) + going = True + where = (offset and pc) and (pc + offset * 2) - if prophecy.where is not None: - prophecy.where &= ~0b1 + return Prophecy(going, where) - return prophecy class BranchPredictorCORTEX_M(BranchPredictorARM): - def __init__(self, ql): - super().__init__(ql) + """Branch Predictor for ARM Cortex-M. + """ diff --git a/qiling/debugger/qdb/branch_predictor/branch_predictor_intel.py b/qiling/debugger/qdb/branch_predictor/branch_predictor_intel.py new file mode 100644 index 000000000..672fa0041 --- /dev/null +++ b/qiling/debugger/qdb/branch_predictor/branch_predictor_intel.py @@ -0,0 +1,181 @@ +#!/usr/bin/env python3 +# +# Cross Platform and Multi Architecture Advanced Binary Emulation Framework +# + +from typing import Callable, Dict, List, Optional, Tuple + +from capstone.x86 import X86Op +from capstone.x86_const import X86_OP_REG, X86_OP_IMM, X86_OP_MEM, X86_INS_LEA + +from .branch_predictor import Prophecy, BranchPredictor +from ..arch import ArchX86, ArchX64 +from ..misc import InvalidInsn + + +class BranchPredictorIntel(BranchPredictor): + """Branch Predictor base class for Intel architecture. + """ + + stop = 'hlt' + + def get_eflags(self) -> Tuple[int, int, int, int, int]: + eflags = self.read_reg('eflags') + + return ( + (eflags & (0b1 << 0)) != 0, # carry + (eflags & (0b1 << 2)) != 0, # parity + (eflags & (0b1 << 6)) != 0, # zero + (eflags & (0b1 << 7)) != 0, # sign + (eflags & (0b1 << 11)) != 0 # overflow + ) + + def predict(self) -> Prophecy: + insn = self.disasm(self.cur_addr, True) + + going = False + where = 0 + + # invalid instruction; nothing to predict + if isinstance(insn, InvalidInsn): + return Prophecy(going, where) + + mnem: str = insn.mnemonic + operands: List[X86Op] = insn.operands + + # unconditional branches + unconditional = ('call', 'jmp') + + # flags-based conditional branches + conditional: Dict[str, Callable[..., bool]] = { + 'jb' : lambda C, P, Z, S, O: C, + 'jc' : lambda C, P, Z, S, O: C, + 'jnae': lambda C, P, Z, S, O: C, + + 'jnb' : lambda C, P, Z, S, O: not C, + 'jnc' : lambda C, P, Z, S, O: not C, + 'jae' : lambda C, P, Z, S, O: not C, + + 'jp' : lambda C, P, Z, S, O: P, + 'jpe' : lambda C, P, Z, S, O: P, + + 'jnp' : lambda C, P, Z, S, O: not P, + 'jpo' : lambda C, P, Z, S, O: not P, + + 'je' : lambda C, P, Z, S, O: Z, + 'jz' : lambda C, P, Z, S, O: Z, + + 'jne' : lambda C, P, Z, S, O: not Z, + 'jnz' : lambda C, P, Z, S, O: not Z, + + 'js' : lambda C, P, Z, S, O: S, + 'jns' : lambda C, P, Z, S, O: not S, + + 'jo' : lambda C, P, Z, S, O: O, + 'jno' : lambda C, P, Z, S, O: not O, + + 'jbe' : lambda C, P, Z, S, O: C or Z, + 'jna' : lambda C, P, Z, S, O: C or Z, + + 'ja' : lambda C, P, Z, S, O: (not C) and (not Z), + 'jnbe': lambda C, P, Z, S, O: (not C) and (not Z), + + 'jl' : lambda C, P, Z, S, O: S != O, + 'jnge': lambda C, P, Z, S, O: S != O, + + 'jge' : lambda C, P, Z, S, O: S == O, + 'jnl' : lambda C, P, Z, S, O: S == O, + + 'jle' : lambda C, P, Z, S, O: Z or (S != O), + 'jng' : lambda C, P, Z, S, O: Z or (S != O), + + 'jg' : lambda C, P, Z, S, O: (not Z) or (not S), + 'jnle': lambda C, P, Z, S, O: (not Z) or (not S) + } + + # reg-based conditional branches + conditional_reg = { + "jcxz" : 'cx', + "jecxz" : 'ecx', + "jrcxz" : 'rcx' + } + + def __read_reg(reg: int) -> Optional[int]: + """Read register value where register is provided as a Unicorn constant. + """ + + # name will be None in case of an illegal or unknown register + name = insn.reg_name(reg) + + return name and self.read_reg(name) + + def __parse_op(op: X86Op) -> Optional[int]: + """Parse an operand and return its value. Memory dereferences will be + substitued by the effective address they refer to. + """ + + if op.type == X86_OP_REG: + value = __read_reg(op.reg) + + elif op.type == X86_OP_IMM: + value = op.imm + + elif op.type == X86_OP_MEM: + mem = op.mem + + base = __read_reg(mem.base) or 0 + index = __read_reg(mem.index) or 0 + scale = mem.scale + disp = mem.disp + + seg = __read_reg(mem.segment) or 0 + ea = seg * 0x10 + (base + index * scale + disp) + + # lea does not really dereference memory + value = ea if insn.id == X86_INS_LEA else self.try_read_pointer(ea) + + else: + raise RuntimeError(f'unexpected operand type: {op.type}') + + return value + + # is this an unconditional branch? + if mnem in unconditional: + going = True + where = __parse_op(operands[0]) + + # is this a return from a function call? + elif mnem == 'ret': + going = True + where = self.ql.arch.stack_read(0) + + # is this a flags-based branch? + elif mnem in conditional: + predict = conditional[mnem] + eflags = self.get_eflags() + + going = predict(*eflags) + + if going: + where = __parse_op(operands[0]) + + elif mnem in conditional_reg: + reg = conditional_reg[mnem] + predict = lambda c: c == 0 + + going = predict(self.read_reg(reg)) + + if going: + where = __parse_op(operands[0]) + + return Prophecy(going, where) + + +class BranchPredictorX86(BranchPredictorIntel, ArchX86): + """Branch Predictor for x86. + """ + + +class BranchPredictorX64(BranchPredictorIntel, ArchX64): + """Branch Predictor for x86-64. + """ diff --git a/qiling/debugger/qdb/branch_predictor/branch_predictor_mips.py b/qiling/debugger/qdb/branch_predictor/branch_predictor_mips.py index a111df8f6..e7423389b 100644 --- a/qiling/debugger/qdb/branch_predictor/branch_predictor_mips.py +++ b/qiling/debugger/qdb/branch_predictor/branch_predictor_mips.py @@ -3,88 +3,95 @@ # Cross Platform and Multi Architecture Advanced Binary Emulation Framework # +from typing import Optional +from capstone.mips import MipsOp, MIPS_OP_REG, MIPS_OP_IMM - -from .branch_predictor import * +from .branch_predictor import BranchPredictor, Prophecy from ..arch import ArchMIPS +from ..misc import InvalidInsn + class BranchPredictorMIPS(BranchPredictor, ArchMIPS): - """ - predictor for MIPS + """Branch Predictor for MIPS 32. """ - def __init__(self, ql): - super().__init__(ql) - ArchMIPS.__init__(self) - self.CODE_END = "break" - self.INST_SIZE = 4 + stop = 'break' - @staticmethod - def signed_val(val: int) -> int: - """ - signed value convertion - """ + def predict(self): + insn = self.disasm(self.cur_addr, True) + + going = False + where = 0 + + # invalid instruction; nothing to predict + if isinstance(insn, InvalidInsn): + return Prophecy(going, where) + + unconditional = ('j', 'jr', 'jal', 'jalr', 'b', 'bl', 'bal') + + conditional = { + 'beq' : lambda r0, r1: r0 == r1, # branch on equal + 'bne' : lambda r0, r1: r0 != r1, # branch on not equal + 'blt' : lambda r0, r1: r0 < r1, # branch on r0 less than r1 + 'bgt' : lambda r0, r1: r0 > r1, # branch on r0 greater than r1 + 'ble' : lambda r0, r1: r0 <= r1, # branch on r0 less than or equal to r1 + 'bge' : lambda r0, r1: r0 >= r1, # branch on r0 greater than or equal to r1 + + 'beqz' : lambda r: r == 0, # branch on equal to zero + 'bnez' : lambda r: r != 0, # branch on not equal to zero + 'bgtz' : lambda r: r > 0, # branch on greater than zero + 'bltz' : lambda r: r < 0, # branch on less than zero + 'bltzal': lambda r: r < 0, # branch on less than zero and link + 'blez' : lambda r: r <= 0, # branch on less than or equal to zero + 'bgez' : lambda r: r >= 0, # branch on greater than or equal to zero + 'bgezal': lambda r: r >= 0 # branch on greater than or equal to zero and link + } + + def __as_signed(val: int) -> int: + """Get the signed integer representation of a given value. + """ - def is_negative(i: int) -> int: + msb = 0b1 << 31 + + return (val & ~msb) - (val & msb) + + def __read_reg(reg: int) -> Optional[int]: + """Read register value where register is provided as a Unicorn constant. """ - check wether negative value or not + + # name will be None in case of an illegal or unknown register + name = insn.reg_name(reg) + + return name and __as_signed(self.read_reg(self.unalias(name))) + + def __parse_op(op: MipsOp) -> Optional[int]: + """Parse an operand and return its value. """ - return i & (1 << 31) + if op.type == MIPS_OP_REG: + value = __read_reg(op.reg) - return (val-1 << 32) if is_negative(val) else val + elif op.type == MIPS_OP_IMM: + value = op.imm - def read_reg(self, reg_name): - reg_name = reg_name.strip("$").replace("fp", "s8") - return self.signed_val(getattr(self.ql.arch.regs, reg_name)) + else: + raise RuntimeError(f'unexpected operand type: {op.type}') - def predict(self): - prophecy = Prophecy() - line = self.disasm(self.cur_addr) - - if line.mnemonic == self.CODE_END: # indicates program extied - prophecy.where = True - return prophecy - - prophecy.where = self.cur_addr + self.INST_SIZE - if line.mnemonic.startswith('j') or line.mnemonic.startswith('b'): - - # make sure at least delay slot executed - prophecy.where += self.INST_SIZE - - # get registers or memory address from op_str - targets = [ - self.read_reg(each) - if '$' in each else read_int(each) - for each in line.op_str.split(", ") - ] - - prophecy.going = { - "j" : (lambda _: True), # unconditional jump - "jr" : (lambda _: True), # unconditional jump - "jal" : (lambda _: True), # unconditional jump - "jalr" : (lambda _: True), # unconditional jump - "b" : (lambda _: True), # unconditional branch - "bl" : (lambda _: True), # unconditional branch - "bal" : (lambda _: True), # unconditional branch - "beq" : (lambda r0, r1, _: r0 == r1), # branch on equal - "bne" : (lambda r0, r1, _: r0 != r1), # branch on not equal - "blt" : (lambda r0, r1, _: r0 < r1), # branch on r0 less than r1 - "bgt" : (lambda r0, r1, _: r0 > r1), # branch on r0 greater than r1 - "ble" : (lambda r0, r1, _: r0 <= r1), # brach on r0 less than or equal to r1 - "bge" : (lambda r0, r1, _: r0 >= r1), # branch on r0 greater than or equal to r1 - "beqz" : (lambda r, _: r == 0), # branch on equal to zero - "bnez" : (lambda r, _: r != 0), # branch on not equal to zero - "bgtz" : (lambda r, _: r > 0), # branch on greater than zero - "bltz" : (lambda r, _: r < 0), # branch on less than zero - "bltzal" : (lambda r, _: r < 0), # branch on less than zero and link - "blez" : (lambda r, _: r <= 0), # branch on less than or equal to zero - "bgez" : (lambda r, _: r >= 0), # branch on greater than or equal to zero - "bgezal" : (lambda r, _: r >= 0), # branch on greater than or equal to zero and link - }.get(line.mnemonic)(*targets) - - if prophecy.going: - # target address is always the rightmost one - prophecy.where = targets[-1] - - return prophecy + return value + + # get operands. target address is always the rightmost one + if insn.operands: + *operands, target = insn.operands + + if insn.mnemonic in unconditional: + going = True + + elif insn.mnemonic in conditional: + predict = conditional[insn.mnemonic] + + going = predict(*(__parse_op(op) for op in operands)) + + if going: + where = __parse_op(target) + + return Prophecy(going, where) diff --git a/qiling/debugger/qdb/branch_predictor/branch_predictor_x86.py b/qiling/debugger/qdb/branch_predictor/branch_predictor_x86.py deleted file mode 100644 index dd1e34fee..000000000 --- a/qiling/debugger/qdb/branch_predictor/branch_predictor_x86.py +++ /dev/null @@ -1,128 +0,0 @@ -#!/usr/bin/env python3 -# -# Cross Platform and Multi Architecture Advanced Binary Emulation Framework -# - - - -import re - -from .branch_predictor import * -from ..arch import ArchX86 -from ..misc import check_and_eval - -class BranchPredictorX86(BranchPredictor, ArchX86): - """ - predictor for X86 - """ - - class ParseError(Exception): - """ - indicate parser error - """ - pass - - def __init__(self, ql): - super().__init__(ql) - ArchX86.__init__(self) - - def predict(self): - prophecy = Prophecy() - line = self.disasm(self.cur_addr) - - jump_table = { - # conditional jump - - "jo" : (lambda C, P, A, Z, S, O: O == 1), - "jno" : (lambda C, P, A, Z, S, O: O == 0), - - "js" : (lambda C, P, A, Z, S, O: S == 1), - "jns" : (lambda C, P, A, Z, S, O: S == 0), - - "je" : (lambda C, P, A, Z, S, O: Z == 1), - "jz" : (lambda C, P, A, Z, S, O: Z == 1), - - "jne" : (lambda C, P, A, Z, S, O: Z == 0), - "jnz" : (lambda C, P, A, Z, S, O: Z == 0), - - "jb" : (lambda C, P, A, Z, S, O: C == 1), - "jc" : (lambda C, P, A, Z, S, O: C == 1), - "jnae" : (lambda C, P, A, Z, S, O: C == 1), - - "jnb" : (lambda C, P, A, Z, S, O: C == 0), - "jnc" : (lambda C, P, A, Z, S, O: C == 0), - "jae" : (lambda C, P, A, Z, S, O: C == 0), - - "jbe" : (lambda C, P, A, Z, S, O: C == 1 or Z == 1), - "jna" : (lambda C, P, A, Z, S, O: C == 1 or Z == 1), - - "ja" : (lambda C, P, A, Z, S, O: C == 0 and Z == 0), - "jnbe" : (lambda C, P, A, Z, S, O: C == 0 and Z == 0), - - "jl" : (lambda C, P, A, Z, S, O: S != O), - "jnge" : (lambda C, P, A, Z, S, O: S != O), - - "jge" : (lambda C, P, A, Z, S, O: S == O), - "jnl" : (lambda C, P, A, Z, S, O: S == O), - - "jle" : (lambda C, P, A, Z, S, O: Z == 1 or S != O), - "jng" : (lambda C, P, A, Z, S, O: Z == 1 or S != O), - - "jg" : (lambda C, P, A, Z, S, O: Z == 0 or S == O), - "jnle" : (lambda C, P, A, Z, S, O: Z == 0 or S == O), - - "jp" : (lambda C, P, A, Z, S, O: P == 1), - "jpe" : (lambda C, P, A, Z, S, O: P == 1), - - "jnp" : (lambda C, P, A, Z, S, O: P == 0), - "jpo" : (lambda C, P, A, Z, S, O: P == 0), - - # unconditional jump - - "call" : (lambda *_: True), - "jmp" : (lambda *_: True), - - } - - jump_reg_table = { - "jcxz" : (lambda cx: cx == 0), - "jecxz" : (lambda ecx: ecx == 0), - "jrcxz" : (lambda rcx: rcx == 0), - } - - if line.mnemonic in jump_table: - eflags = self.get_flags(self.ql.arch.regs.eflags).values() - prophecy.going = jump_table.get(line.mnemonic)(*eflags) - - elif line.mnemonic in jump_reg_table: - prophecy.going = jump_reg_table.get(line.mnemonic)(self.ql.arch.regs.ecx) - - if prophecy.going: - takeaway_list = ["ptr", "dword", "[", "]"] - - if len(line.op_str.split()) > 1: - new_line = line.op_str.replace(":", "+") - for each in takeaway_list: - new_line = new_line.replace(each, " ") - - new_line = " ".join(new_line.split()) - for each_reg in filter(lambda r: len(r) == 3, self.ql.arch.regs.register_mapping.keys()): - if each_reg in new_line: - new_line = re.sub(each_reg, hex(self.read_reg(each_reg)), new_line) - - for each_reg in filter(lambda r: len(r) == 2, self.ql.arch.regs.register_mapping.keys()): - if each_reg in new_line: - new_line = re.sub(each_reg, hex(self.read_reg(each_reg)), new_line) - - - prophecy.where = check_and_eval(new_line) - - elif line.op_str in self.ql.arch.regs.register_mapping: - prophecy.where = self.ql.arch.regs.read(line.op_str) - - else: - prophecy.where = read_int(line.op_str) - else: - prophecy.where = self.cur_addr + line.size - - return prophecy diff --git a/qiling/debugger/qdb/branch_predictor/branch_predictor_x8664.py b/qiling/debugger/qdb/branch_predictor/branch_predictor_x8664.py deleted file mode 100644 index 1350c9bb3..000000000 --- a/qiling/debugger/qdb/branch_predictor/branch_predictor_x8664.py +++ /dev/null @@ -1,127 +0,0 @@ -#!/usr/bin/env python3 -# -# Cross Platform and Multi Architecture Advanced Binary Emulation Framework -# - - - -import re - -from .branch_predictor import * -from ..arch import ArchX8664 -from ..misc import check_and_eval - -class BranchPredictorX8664(BranchPredictor, ArchX8664): - """ - predictor for X86 - """ - - class ParseError(Exception): - """ - indicate parser error - """ - pass - - def __init__(self, ql): - super().__init__(ql) - ArchX8664.__init__(self) - - def predict(self): - prophecy = Prophecy() - line = self.disasm(self.cur_addr) - - jump_table = { - # conditional jump - - "jo" : (lambda C, P, A, Z, S, O: O == 1), - "jno" : (lambda C, P, A, Z, S, O: O == 0), - - "js" : (lambda C, P, A, Z, S, O: S == 1), - "jns" : (lambda C, P, A, Z, S, O: S == 0), - - "je" : (lambda C, P, A, Z, S, O: Z == 1), - "jz" : (lambda C, P, A, Z, S, O: Z == 1), - - "jne" : (lambda C, P, A, Z, S, O: Z == 0), - "jnz" : (lambda C, P, A, Z, S, O: Z == 0), - - "jb" : (lambda C, P, A, Z, S, O: C == 1), - "jc" : (lambda C, P, A, Z, S, O: C == 1), - "jnae" : (lambda C, P, A, Z, S, O: C == 1), - - "jnb" : (lambda C, P, A, Z, S, O: C == 0), - "jnc" : (lambda C, P, A, Z, S, O: C == 0), - "jae" : (lambda C, P, A, Z, S, O: C == 0), - - "jbe" : (lambda C, P, A, Z, S, O: C == 1 or Z == 1), - "jna" : (lambda C, P, A, Z, S, O: C == 1 or Z == 1), - - "ja" : (lambda C, P, A, Z, S, O: C == 0 and Z == 0), - "jnbe" : (lambda C, P, A, Z, S, O: C == 0 and Z == 0), - - "jl" : (lambda C, P, A, Z, S, O: S != O), - "jnge" : (lambda C, P, A, Z, S, O: S != O), - - "jge" : (lambda C, P, A, Z, S, O: S == O), - "jnl" : (lambda C, P, A, Z, S, O: S == O), - - "jle" : (lambda C, P, A, Z, S, O: Z == 1 or S != O), - "jng" : (lambda C, P, A, Z, S, O: Z == 1 or S != O), - - "jg" : (lambda C, P, A, Z, S, O: Z == 0 or S == O), - "jnle" : (lambda C, P, A, Z, S, O: Z == 0 or S == O), - - "jp" : (lambda C, P, A, Z, S, O: P == 1), - "jpe" : (lambda C, P, A, Z, S, O: P == 1), - - "jnp" : (lambda C, P, A, Z, S, O: P == 0), - "jpo" : (lambda C, P, A, Z, S, O: P == 0), - - # unconditional jump - - "call" : (lambda *_: True), - "jmp" : (lambda *_: True), - - } - - jump_reg_table = { - "jcxz" : (lambda cx: cx == 0), - "jecxz" : (lambda ecx: ecx == 0), - "jrcxz" : (lambda rcx: rcx == 0), - } - - if line.mnemonic in jump_table: - eflags = self.get_flags(self.ql.arch.regs.eflags).values() - prophecy.going = jump_table.get(line.mnemonic)(*eflags) - - elif line.mnemonic in jump_reg_table: - prophecy.going = jump_reg_table.get(line.mnemonic)(self.ql.arch.regs.ecx) - - if prophecy.going: - takeaway_list = ["ptr", "dword", "qword", "[", "]"] - - if len(line.op_str.split()) > 1: - new_line = line.op_str.replace(":", "+") - for each in takeaway_list: - new_line = new_line.replace(each, " ") - - new_line = " ".join(new_line.split()) - for each_reg in filter(lambda r: len(r) == 3, self.ql.arch.regs.register_mapping.keys()): - if each_reg in new_line: - new_line = re.sub(each_reg, hex(self.read_reg(each_reg)), new_line) - - for each_reg in filter(lambda r: len(r) == 2, self.ql.arch.regs.register_mapping.keys()): - if each_reg in new_line: - new_line = re.sub(each_reg, hex(self.read_reg(each_reg)), new_line) - - prophecy.where = check_and_eval(new_line) - - elif line.op_str in self.ql.arch.regs.register_mapping: - prophecy.where = self.ql.arch.regs.read(line.op_str) - - else: - prophecy.where = read_int(line.op_str) - else: - prophecy.where = self.cur_addr + line.size - - return prophecy diff --git a/qiling/debugger/qdb/const.py b/qiling/debugger/qdb/const.py index 74c72d229..d316fc263 100644 --- a/qiling/debugger/qdb/const.py +++ b/qiling/debugger/qdb/const.py @@ -1,23 +1,25 @@ from enum import IntEnum + class color: - """ - class for colorful prints - """ - CYAN = '\033[96m' - PURPLE = '\033[95m' - BLUE = '\033[94m' - YELLOW = '\033[93m' - GREEN = '\033[92m' - RED = '\033[91m' - DARKGRAY = '\033[90m' - WHITE = '\033[48m' - DARKCYAN = '\033[36m' - BLACK = '\033[35m' - UNDERLINE = '\033[4m' - BOLD = '\033[1m' - END = '\033[0m' - RESET = '\x1b[39m' + """ + class for colorful prints + """ + DARKGRAY = '\033[90m' + RED = '\033[91m' + GREEN = '\033[92m' + YELLOW = '\033[93m' + BLUE = '\033[94m' + PURPLE = '\033[95m' + CYAN = '\033[96m' + WHITE = '\033[48m' + BLACK = '\033[35m' + DARKCYAN = '\033[36m' + UNDERLINE = '\033[4m' + BOLD = '\033[1m' + END = '\033[0m' + RESET = '\033[39m' + class QDB_MSG(IntEnum): ERROR = 10 diff --git a/qiling/debugger/qdb/context.py b/qiling/debugger/qdb/context.py index e4400f4b4..349197544 100644 --- a/qiling/debugger/qdb/context.py +++ b/qiling/debugger/qdb/context.py @@ -3,102 +3,147 @@ # Cross Platform and Multi Architecture Advanced Binary Emulation Framework # -from typing import Optional +from __future__ import annotations -from unicorn import UC_ERR_READ_UNMAPPED -import unicorn +from typing import TYPE_CHECKING, Optional, Tuple, Union +from unicorn import UcError -from capstone import CsInsn +from .misc import InvalidInsn + + +if TYPE_CHECKING: + from qiling import Qiling + from .misc import InsnLike -from .misc import read_int, InvalidInsn class Context: - """ - base class for accessing context of running qiling instance + """Emulation context accessor. """ - def __init__(self, ql): + def __init__(self, ql: Qiling): + # make sure mixin classes are properly initialized + super().__init__() + self.ql = ql self.pointersize = self.ql.arch.pointersize - self.unpack = ql.unpack - self.unpack16 = ql.unpack16 - self.unpack32 = ql.unpack32 - self.unpack64 = ql.unpack64 @property - def cur_addr(self): - """ - program counter of qiling instance + def cur_addr(self) -> int: + """Read current program counter register. """ return self.ql.arch.regs.arch_pc - def read_mem(self, address: int, size: int): + @property + def cur_sp(self) -> int: + """Read current stack pointer register. """ - read data from memory of qiling instance + + return self.ql.arch.regs.arch_sp + + def read_reg(self, reg: Union[str, int]) -> int: + """Get register value. """ - return self.ql.mem.read(address, size) + return self.ql.arch.regs.read(reg) - def disasm(self, address: int, detail: bool = False) -> Optional[CsInsn]: + def write_reg(self, reg: Union[str, int], value: int) -> None: + """Set register value. """ - helper function for disassembling + + self.ql.arch.regs.write(reg, value) + + def disasm(self, address: int, detail: bool = False) -> InsnLike: + """Helper function for disassembling. """ - md = self.ql.arch.disassembler - md.detail = detail + insn_bytes = self.read_insn(address) + insn = None + + if insn_bytes: + md = self.ql.arch.disassembler + md.detail = detail + + insn = next(md.disasm(insn_bytes, address, 1), None) - if (bytes_read := self.read_insn(address)): - return next(md.disasm(bytes_read, address), InvalidInsn(bytes_read, address)) - return InvalidInsn(bytes_read, address) + return insn or InvalidInsn(insn_bytes, address) - def try_read(self, address: int, size: int) -> Optional[bytes]: + def disasm_lite(self, address: int) -> Tuple[int, int, str, str]: + """Helper function for light disassembling, when details are not required. + + Returns: + A tuple of: instruction address, size, mnemonic and operands """ - try to read data from ql.mem + + insn_bytes = self.read_insn(address) + insn = None + + if insn_bytes: + md = self.ql.arch.disassembler + + insn = next(md.disasm_lite(insn_bytes, address, 1), None) + + return insn or tuple() + + def read_mem(self, address: int, size: int) -> bytearray: + """Read data of a certain size from specified memory location. """ - result = None - err_msg = "" - try: - result = self.read_mem(address, size) + return self.ql.mem.read(address, size) - except unicorn.unicorn.UcError as err: - if err.errno == UC_ERR_READ_UNMAPPED: # Invalid memory read (UC_ERR_READ_UNMAPPED) - err_msg = f"Can not access memory at address 0x{address:08x}" + def try_read_mem(self, address: int, size: int) -> Optional[bytearray]: + """Attempt to read data from memory. + """ - except: - pass + try: + data = self.read_mem(address, size) + except UcError: + data = None - return (result, err_msg) + return data - def try_read_pointer(self, address: int) -> Optional[bytes]: + def read_pointer(self, address: int, size: int = 0, *, signed: bool = False) -> int: + """Attempt to read a native-size integer from memory. """ - try to read pointer size of data from ql.mem + + return self.ql.mem.read_ptr(address, size, signed=signed) + + def try_read_pointer(self, address: int, size: int = 0, *, signed: bool = False) -> Optional[int]: + """Attempt to read a native-size integer from memory. """ - return self.try_read(address, self.archbit) + try: + value = self.read_pointer(address, size, signed=signed) + except UcError: + value = None + + return value def read_string(self, address: int) -> Optional[str]: - """ - read string from memory of qiling instance + """Read string from memory. """ return self.ql.mem.string(address) def try_read_string(self, address: int) -> Optional[str]: - """ - try to read string from memory of qiling instance + """Attempt to read a string from memory. """ - s = None try: s = self.read_string(address) - except: - pass + except UcError: + s = None + + return s + + def get_deref(self, ptr: int) -> Union[int, str, None]: + """Get content referenced by a pointer. + + If dereferenced data is printable, a string will be returned. Otherwise + an integer value is retgurned. If the specified address is not reachable + None is returned. + """ - @staticmethod - def read_int(s: str) -> int: - return read_int(s) + val = self.try_read_string(ptr) -if __name__ == "__main__": - pass + return val if val and val.isprintable() else self.try_read_pointer(ptr) diff --git a/qiling/debugger/qdb/helper.py b/qiling/debugger/qdb/helper.py new file mode 100644 index 000000000..fd6c05bf3 --- /dev/null +++ b/qiling/debugger/qdb/helper.py @@ -0,0 +1,252 @@ +#!/usr/bin/env python3 +# +# Cross Platform and Multi Architecture Advanced Binary Emulation Framework +# + +from __future__ import annotations + +import re + +from typing import TYPE_CHECKING, List, Tuple + +from qiling.const import QL_ARCH +from .context import Context +from .arch import ArchCORTEX_M, ArchARM, ArchMIPS, ArchX86, ArchX64 + + +if TYPE_CHECKING: + from re import Match + from qiling import Qiling + from .misc import InsnLike + + +def setup_command_helper(ql: Qiling): + atypes = { + QL_ARCH.X86: ArchX86, + QL_ARCH.X8664: ArchX64, + QL_ARCH.MIPS: ArchMIPS, + QL_ARCH.ARM: ArchARM, + QL_ARCH.CORTEX_M: ArchCORTEX_M + } + + ret = type('CommandHelper', (CommandHelper, atypes[ql.arch.type]), {}) + + return ret(ql) + + +# pre-compile the safe arithmetics and bitwise pattern +__arith_pattern = re.compile(r'^(0[xX][0-9a-fA-F]+|0[0-7]+|\d+|[\+\-\*/\(\)|&^~\s])+$') + + +def safe_arith(expr: str) -> int: + """Safely evaluate an arithmetic expression. The expression may include only + digits, arithmetic and bitwise operators, parantheses, whitespaces, hexadecimal + and octal values. + + Args: + expr: arithmetic expression to evaluate + + Returns: integer result + + Raises: + ValueError: if disallowed tokens are included in `expr` + SyntaxError: in case the arithmetic expression does not make sense + """ + + if not __arith_pattern.fullmatch(expr): + raise ValueError + + # adjust gdb-style octal values to python: 0644 -> 0o644 + re.sub(r'0([0-7]+)', r'0o\1', expr) + + # safely evaluate the expression + return eval(expr, {}, {}) + + +class CommandHelper(Context): + """ + memory manager for handing memory access + """ + + def __init__(self, ql: Qiling): + super().__init__(ql) + + # default values for the examine ('x') command + self.x_defaults = { + 'n': '1', # number of units to read + 'f': 'x', # output format + 'u': 'w' # unit type + } + + def sub_reg_values(self, expr: str) -> str: + def __sub_reg(m: Match[str]) -> str: + reg = m.group(1).lower() + + return f'{self.read_reg(self.unalias(reg)):#x}' + + # replace reg names with their actual values + return re.sub(r'\$(\w+)', __sub_reg, expr) + + def resolve_expr(self, expr: str) -> int: + """Resolve an arithmetic expression that might include register names. + + Registers names will be substituted with their current value before + proceeding to evaluate the expression. + + Args: + expr: an expression to evaluate + + Returns: + final evaluation result + + Raises: + KeyError: if `expr` contains an unrecognized register name + ValueError: if `expr` contains disallowed tokens + SyntaxError: if `expr` contains a broken arithmetic syntax + """ + + try: + # look for registers names and replace them with their actual values + expr = self.sub_reg_values(expr) + + # expr contains an unrecognized register name + except KeyError as ex: + raise KeyError(f'unrecognized register name: {ex.args[0]}') from ex + + try: + # expr should contain only values and aithmetic tokens by now; attempt to evaluate it + res = safe_arith(expr) + + # expr contains a disallowed token + except ValueError as ex: + raise ValueError('only integers, hexadecimals, octals, arithmetic and bitwise operators are allowed') from ex + + # arithmetic syntax is broken + except SyntaxError as ex: + raise SyntaxError('error evaluating arithmetic expression') from ex + + return res + + def handle_set(self, line: str) -> Tuple[str, int]: + """ + set register value of current context + """ + # set $a = b + + m = re.match(r'\s*\$(?P\w+)\s*=\s*(?P.+)', line) + + if m is None: + raise SyntaxError('illegal command syntax') + + if not m['reg']: + raise KeyError('error parsing input: invalid lhand expression') + + if not m['expr']: + raise SyntaxError('error parsing input: invalid rhand expression') + + reg = self.unalias(m['reg']) + expr = self.resolve_expr(m['expr']) + + self.write_reg(reg, expr) + + return (reg, expr) + + def handle_i(self, addr: int, count: int) -> List[InsnLike]: + result = [] + + for _ in range(count): + insn = self.disasm(addr) + addr += insn.size + + result.append(insn) + + return result + + def handle_examine(self, line: str) -> None: + # examples: + # x/xw address + # x/4xw $esp + # x/4xg $rsp + # x/i $eip - 0x10 + # x $sp + # x $sp + 0xc + + m = re.match(r'(?:/(?P\d+)?(?P[oxdutfacis])?(?P[bhwg])?)?\s*(?P.+)?', line) + + # there should be always a match, at least for target, but let's be on the safe side + if m is None: + raise ValueError('unexpected examine command syntax') + + n = m['n'] or self.x_defaults['n'] + f = m['f'] or self.x_defaults['f'] + u = m['u'] or self.x_defaults['u'] + + target = m['target'] + + # if target was specified, determine its value. otherwise use the current address + target = self.resolve_expr(target) if target else self.cur_addr + + n = int(n) + + if f == r'i': + for insn in self.handle_i(target, n): + print(f"{insn.address:#010x}: {insn.mnemonic:10s} {insn.op_str}") + + # handle read c-style string + elif f == r's': + s = self.try_read_string(target) + + if s is None: + raise ValueError(f'error reading c-style string at {target:#010x}') + + print(f"{target:#010x}: {s}") + + else: + def __to_size(u: str) -> int: + """Convert a gdb unit name to its corresponding size in bytes. + """ + + sizes = { + 'b': 1, # byte + 'h': 2, # halfword + 'w': 4, # word + 'g': 8 # giant + } + + # assume u is in sizes + return sizes[u] + + def __to_py_spec(f: str, size: int) -> Tuple[str, str, str]: + """Convert a gdb format specifier to its corresponding python format, + prefix and padding specifiers. + """ + + specs = { + 'o': ('o', '0', ''), # octal + 'x': ('x', '0x', f'0{size * 2}'), # hex + 'd': ('d', '', ''), # decimal + 'u': ('u', '', ''), # unsigned decimal + 't': ('b', '', f'0{size * 8}'), # binary + 'f': ('f', '', ''), # float + 'a': ('x', '0x', f'0{size * 2}'), # address + 'c': ('c', '', ''), # char + } + + # assume f is in specs + return specs[f] + + size = __to_size(u) + pyfmt, prefix, pad = __to_py_spec(f, size) + values = [self.try_read_pointer(target + (i * size), size) for i in range(n)] + + ipr = 4 # number of items to display per row + + for i in range(0, len(values), ipr): + vset = values[i:i + ipr] + + print(f'{target + i * size:#10x}:', end='\t') + + for v in vset: + print('?' if v is None else f'{prefix}{v:{pad}{pyfmt}}', end='\t') + + print() diff --git a/qiling/debugger/qdb/memory.py b/qiling/debugger/qdb/memory.py deleted file mode 100644 index e26f49302..000000000 --- a/qiling/debugger/qdb/memory.py +++ /dev/null @@ -1,204 +0,0 @@ -#!/usr/bin/env python3 -# -# Cross Platform and Multi Architecture Advanced Binary Emulation Framework -# - -from qiling.const import QL_ARCH - -from .context import Context -from .arch import ArchCORTEX_M, ArchARM, ArchMIPS, ArchX86, ArchX8664 -from .misc import check_and_eval -import re, math - - - -def setup_memory_Manager(ql): - - arch_type = { - QL_ARCH.X86: ArchX86, - QL_ARCH.X8664: ArchX8664, - QL_ARCH.MIPS: ArchMIPS, - QL_ARCH.ARM: ArchARM, - QL_ARCH.CORTEX_M: ArchCORTEX_M, - }.get(ql.arch.type) - - ret = type( - "MemoryManager", - (MemoryManager, arch_type), - {} - ) - - return ret(ql) - - -class MemoryManager(Context): - """ - memory manager for handing memory access - """ - - def __init__(self, ql): - super().__init__(ql) - - @property - def get_default_fmt(self): - return ('x', 4, 1) - - @property - def get_format_letter(self): - return { - "o", # octal - "x", # hex - "d", # decimal - "u", # unsigned decimal - "t", # binary - "f", # float - "a", # address - "i", # instruction - "c", # char - "s", # string - "z", # hex, zero padded on the left - } - - @property - def get_size_letter(self): - return { - "b": 1, # 1-byte, byte - "h": 2, # 2-byte, halfword - "w": 4, # 4-byte, word - "g": 8, # 8-byte, giant - } - - def extract_count(self, t): - return "".join([s for s in t if s.isdigit()]) - - def get_fmt(self, text): - f, s, c = self.get_default_fmt - if self.extract_count(text): - c = int(self.extract_count(text)) - - for char in text.strip(str(c)): - if char in self.get_size_letter.keys(): - s = self.get_size_letter.get(char) - - elif char in self.get_format_letter: - f = char - - return (f, s, c) - - def fmt_unpack(self, bs: bytes, sz: int) -> int: - return { - 1: lambda x: x[0], - 2: self.unpack16, - 4: self.unpack32, - 8: self.unpack64, - }.get(sz)(bs) - - def handle_i(self, addr, ct=1): - result = [] - - for offset in range(addr, addr+ct*4, 4): - if (line := self.disasm(offset)): - result.append(line) - - return result - - - def parse(self, line: str): - - # test case - # x/wx address - # x/i address - # x $sp - # x $sp +0xc - # x $sp+0xc - # x $sp + 0xc - - if line.startswith("/"): # followed by format letter and size letter - - fmt, *rest = line.strip("/").split() - - fmt = self.get_fmt(fmt) - - else: - args = line.split() - - rest = [args[0]] if len(args) == 1 else args - - fmt = self.get_default_fmt - - if len(rest) == 0: - return - - line = [] - if (regs_dict := getattr(self, "regs_need_swapped", None)): - for each in rest: - for reg in regs_dict: - if each in regs_dict: - line.append(regs_dict[each]) - else: - line.append(each) - else: - line = rest - - # for simple calculation with register and address - - line = " ".join(line) - # substitue register name with real value - for each_reg in filter(lambda r: len(r) == 3, self.ql.arch.regs.register_mapping): - reg = f"${each_reg}" - if reg in line: - line = re.sub(f"\\{reg}", hex(self.ql.arch.regs.read(each_reg)), line) - - for each_reg in filter(lambda r: len(r) == 2, self.ql.arch.regs.register_mapping): - reg = f"${each_reg}" - if reg in line: - line = re.sub(f"\\{reg}", hex(self.ql.arch.regs.read(each_reg)), line) - - - ft, sz, ct = fmt - - try: - addr = check_and_eval(line) - except: - return "something went wrong ..." - - if ft == "i": - output = self.handle_i(addr, ct) - for each in output: - print(f"0x{each.address:x}: {each.mnemonic}\t{each.op_str}") - - elif ft == "s": - # handle read c-style string - try: - print(f"0x{addr:08x}: {self.ql.os.utils.read_cstring(addr)}") - except: - return f"error reading c-style string at 0x{addr:08x}" - - else: - lines = 1 if ct <= 4 else math.ceil(ct / 4) - # parse command - prefix = "0x" if ft in ("x", "a") else "" - pad = '0' + str(sz*2) if ft in ('x', 'a', 't') else '' - ft = ft.lower() if ft in ("x", "o", "b", "d") else ft.lower().replace("t", "b").replace("a", "x") - - mem_read = [] - for offset in range(ct): - # append data if read successfully, otherwise return error message - if (data := self.try_read(addr+(offset*sz), sz))[0] is not None: - mem_read.append(data[0]) - - else: - return data[1] - - for line in range(lines): - offset = line * sz * 4 - print(f"0x{addr+offset:x}:\t", end="") - - idx = line * self.ql.arch.pointersize - for each in mem_read[idx:idx+self.ql.arch.pointersize]: - data = self.fmt_unpack(each, sz) - print(f"{prefix}{data:{pad}{ft}}\t", end="") - - print() - - return True diff --git a/qiling/debugger/qdb/misc.py b/qiling/debugger/qdb/misc.py index a3cf29e1a..46c06cc02 100644 --- a/qiling/debugger/qdb/misc.py +++ b/qiling/debugger/qdb/misc.py @@ -3,92 +3,68 @@ # Cross Platform and Multi Architecture Advanced Binary Emulation Framework # -from typing import AnyStr, Callable, Optional +from typing import Optional, Union from dataclasses import dataclass +from capstone import CsInsn -import ast - -def check_and_eval(line: str): - """ - This function will valid all type of nodes and evaluate it if nothing went wrong - """ - - class AST_checker(ast.NodeVisitor): - def generic_visit(self, node): - if type(node) in (ast.Module, ast.Expr, ast.BinOp, ast.Constant, ast.Add, ast.Mult, ast.Sub): - ast.NodeVisitor.generic_visit(self, node) - else: - raise ParseError("malform or invalid ast node") - - checker = AST_checker() - ast_tree = ast.parse(line) - checker.visit(ast_tree) - - return eval(line) @dataclass class InvalidInsn: """ class for displaying invalid instruction """ + bytes: bytes - address: bytes - mnemonic: str = 'invalid' + address: int + mnemonic: str = '(invalid)' op_str: str = '' def __post_init__(self): - self.size = len(self.bytes) + self.size = len(self.bytes) if self.bytes else 1 class Breakpoint: + """Dummy class for breakpoints. """ - dummy class for breakpoint - """ - def __init__(self, addr: int): - self.addr = addr - self.hitted = False + # monotonically increasing index counter + _counter = 0 -class TempBreakpoint(Breakpoint): - """ - dummy class for temporay breakpoint - """ - def __init__(self, addr: int): - super().__init__(addr) + def __init__(self, addr: int, temp: bool = False): + """Initialize a breakpoint object. + Args: + addr: address to break upon arrival + temp: whether this is a temporary breakpoint. temporary breakpoints + get removed after they get hit for the first time + """ -def read_int(s: str) -> int: - """ - parse unsigned integer from string - """ - return int(s, 0) + self.index = Breakpoint._counter + Breakpoint._counter += 1 + self.addr = addr + self.temp = temp + self.enabled = True -def try_read_int(s: AnyStr) -> Optional[int]: - """ - try to read string as integer is possible + +def read_int(s: str, /) -> int: + """Turn a numerical string into its integer value. """ - try: - ret = read_int(s) - except: - ret = None - return ret + return int(s, 0) -def parse_int(func: Callable) -> Callable: +def try_read_int(s: str, /) -> Optional[int]: + """Attempt to convert string to an integer value. """ - function dectorator for parsing argument as integer - """ - def wrap(qdb, s: str = "") -> int: - assert type(s) is str - ret = try_read_int(s) - return func(qdb, ret) - return wrap + try: + val = read_int(s) + except (ValueError, TypeError): + val = None + return val -if __name__ == "__main__": - pass +InsnLike = Union[CsInsn, InvalidInsn] diff --git a/qiling/debugger/qdb/qdb.py b/qiling/debugger/qdb/qdb.py index fe4a68d61..ae942139e 100644 --- a/qiling/debugger/qdb/qdb.py +++ b/qiling/debugger/qdb/qdb.py @@ -3,48 +3,60 @@ # Cross Platform and Multi Architecture Advanced Binary Emulation Framework # -import cmd +from __future__ import annotations -from typing import Callable, Optional, Tuple, Union, List +import sys + +from typing import TYPE_CHECKING, Any, Callable, Dict, List, Union +from cmd import Cmd from contextlib import contextmanager -from qiling import Qiling -from qiling.const import QL_OS, QL_ARCH, QL_ENDIAN, QL_VERBOSE +from qiling.const import QL_OS, QL_ARCH, QL_VERBOSE from qiling.debugger import QlDebugger -from .utils import setup_context_render, setup_branch_predictor, setup_address_marker, SnapshotManager, run_qdb_script -from .memory import setup_memory_Manager -from .misc import parse_int, Breakpoint, TempBreakpoint, try_read_int from .const import color +from .helper import setup_command_helper +from .misc import Breakpoint, try_read_int +from .render.render import RARROW +from .utils import setup_context_render, setup_branch_predictor, Marker, SnapshotManager, QDB_MSG, qdb_print + -from .utils import QDB_MSG, qdb_print +if TYPE_CHECKING: + from qiling import Qiling -def save_reg_dump(func: Callable) -> Callable[..., None]: - """Decorator for saving registers dump. +def save_regs(func: Callable) -> Callable[..., None]: + """Save registers before running a certain functionality so we can display + the registers diff. """ def inner(self: 'QlQdb', *args, **kwargs) -> None: - self._saved_reg_dump = dict(filter(lambda d: isinstance(d[0], str), self.ql.arch.regs.save().items())) + self.render.prev_regs = self.render.get_regs() func(self, *args, **kwargs) return inner -def check_ql_alive(func: Callable) -> Callable[..., None]: - """Decorator for checking whether ql instance is alive. +def liveness_check(func: Callable) -> Callable[..., None]: + """Decorator for checking whether the program is alive. """ def inner(self: 'QlQdb', *args, **kwargs) -> None: if self.ql is None: - qdb_print(QDB_MSG.ERROR, "The program is not being run.") - else: - func(self, *args, **kwargs) + qdb_print(QDB_MSG.ERROR, 'no active emulation') + return + + if self.predictor.has_ended(): + qdb_print(QDB_MSG.ERROR, 'the program has ended') + return + + # proceed to functionality + func(self, *args, **kwargs) return inner -class QlQdb(cmd.Cmd, QlDebugger): +class QlQdb(Cmd, QlDebugger): """ The built-in debugger of Qiling Framework """ @@ -56,49 +68,55 @@ def __init__(self, ql: Qiling, init_hook: List[str] = [], rr: bool = False, scri """ self.ql = ql - self.prompt = f"{color.BOLD}{color.RED}Qdb> {color.END}" - self._saved_reg_dump = None + self.prompt = f"{color.RED}(qdb) {color.RESET}" self._script = script - self.bp_list = {} - self.marker = setup_address_marker() + self.last_addr: int = -1 + self.bp_list: Dict[int, Breakpoint] = {} + self.marker = Marker() self.rr = SnapshotManager(ql) if rr else None - self.mm = setup_memory_Manager(ql) + self.helper = setup_command_helper(ql) self.predictor = setup_branch_predictor(ql) self.render = setup_context_render(ql, self.predictor) super().__init__() # filter out entry_point of loader if presented - self.dbg_hook(list(filter(lambda d: int(d, 0) != self.ql.loader.entry_point, init_hook))) + self.dbg_hook([addr for addr in init_hook if int(addr, 0) != self.ql.loader.entry_point]) + + def run_qdb_script(self, filename: str) -> None: + with open(filename, 'r', encoding='latin') as fd: + self.cmdqueue = fd.readlines() def dbg_hook(self, init_hook: List[str]): """ initial hook to prepare everything we need """ - # self.ql.loader.entry_point # ld.so - # self.ql.loader.elf_entry # .text of binary + def __bp_handler(ql: Qiling, address: int, size: int): + if (address in self.bp_list) and (address != self.last_addr): + bp = self.bp_list[address] - def bp_handler(ql, address, size, bp_list): + if bp.enabled: + if bp.temp: + # temp breakpoint: remove once hit + self.del_breakpoint(bp) - if (bp := self.bp_list.get(address, None)): + else: + qdb_print(QDB_MSG.INFO, f'hit breakpoint at {self.cur_addr:#x}') - if isinstance(bp, TempBreakpoint): - # remove TempBreakpoint once hitted - self.del_breakpoint(bp) + # flush unicorn translation block to avoid resuming execution from next + # basic block + self.ql.arch.uc.ctl_flush_tb() - else: - if bp.hitted: - return + ql.stop() + self.do_context() - qdb_print(QDB_MSG.INFO, f"hit breakpoint at {self.cur_addr:#x}") - bp.hitted = True + # this is used to prevent breakpoints be hit more than once in a row. without + # it we would not be able to proceed after hitting a breakpoint + self.last_addr = address - ql.stop() - self.do_context() - - self.ql.hook_code(bp_handler, self.bp_list) + self.ql.hook_code(__bp_handler) if self.ql.entry_point: self.cur_addr = self.ql.entry_point @@ -107,31 +125,16 @@ def bp_handler(ql, address, size, bp_list): self.init_state = self.ql.save() - # stop emulator once interp. have been done emulating - if addr_elf_entry := getattr(self.ql.loader, 'elf_entry', None): - handler = self.ql.hook_address(lambda ql: ql.stop(), addr_elf_entry) - else: - handler = self.ql.hook_address(lambda ql: ql.stop(), self.ql.loader.entry_point) - - # suppress logging temporary - _verbose = self.ql.verbose - self.ql.verbose = QL_VERBOSE.DISABLED - - # init os for integrity of hooks and patches, - self.ql.os.run() - - handler.remove() - - # ignore the memory unmap error for now, due to the MIPS memory layout issue - try: - self.ql.mem.unmap_all() - except: - pass - - self.ql.restore(self.init_state) + # the interpreter has to be emulated, but this is not interesting for most of the users. + # here we start emulating from interpreter's entry point while making sure the emulator + # stops once it reaches the program entry point + entry = getattr(self.ql.loader, 'elf_entry', self.ql.loader.entry_point) & ~0b1 + self.set_breakpoint(entry, is_temp=True) - # resotre logging verbose - self.ql.verbose = _verbose + # init os for integrity of hooks and patches while temporarily suppress logging to let it + # fast-forward + with self.__set_temp(self.ql, 'verbose', QL_VERBOSE.DISABLED): + self.ql.os.run() if self.ql.os.type is QL_OS.BLOB: self.ql.loader.entry_point = self.ql.loader.load_address @@ -141,30 +144,26 @@ def bp_handler(ql, address, size, bp_list): self.do_breakpoint(each_hook) if self._script: - run_qdb_script(self, self._script) - else: - self.do_context() - self.interactive() + self.run_qdb_script(self._script) + + self.cmdloop() @property def cur_addr(self) -> int: - """ - getter for current address of qiling instance + """Get emulation's current program counter. """ return self.ql.arch.regs.arch_pc @cur_addr.setter def cur_addr(self, address: int) -> None: - """ - setter for current address of qiling instance + """Set emulation's current program counter. """ self.ql.arch.regs.arch_pc = address def _run(self, address: int = 0, end: int = 0, count: int = 0) -> None: - """ - internal function for emulating instruction + """Internal method for advancing emulation on different circumstences. """ if not address: @@ -176,42 +175,27 @@ def _run(self, address: int = 0, end: int = 0, count: int = 0) -> None: self.ql.emu_start(begin=address, end=end, count=count) @contextmanager - def _save(self, reg=True, mem=True, hw=False, fd=False, cpu_context=False, os=False, loader=False): + def save(self): """ helper function for fetching specific context by emulating instructions """ - saved_states = self.ql.save(reg=reg, mem=mem) + saved_states = self.ql.save(reg=True, mem=False) yield self self.ql.restore(saved_states) - def parseline(self, line: str) -> Tuple[Optional[str], Optional[str], str]: - """ - Parse the line into a command name and a string containing - the arguments. Returns a tuple containing (command, args, line). - 'command' and 'args' may be None if the line couldn't be parsed. - """ + def default(self, line: str): + # if this is a comment line, ignore it + if line.startswith('#'): + return - line = line.strip() - if not line: - return None, None, line - elif line[0] == '?': - line = 'help ' + line[1:] - elif line.startswith('!'): - if hasattr(self, 'do_shell'): - line = 'shell ' + line[1:] - else: - return None, None, line - i, n = 0, len(line) - while i < n and line[i] in self.identchars: i = i+1 - cmd, arg = line[:i], line[i:].strip() - return cmd, arg, line + super().default(line) - def interactive(self, *args) -> None: - """ - initial an interactive interface - """ + def emptyline(self) -> bool: + # when executing a script, ignore empty lines + if self._script: + return False - return self.cmdloop() + return super().emptyline() def run(self, *args) -> None: """ @@ -220,15 +204,7 @@ def run(self, *args) -> None: self._run() - def emptyline(self, *args) -> None: - """ - repeat last command - """ - - if (lastcmd := getattr(self, "do_" + self.lastcmd, None)): - return lastcmd() - - def do_run(self, *args) -> None: + def do_run(self, args: str) -> None: """ launch qiling instance """ @@ -236,346 +212,446 @@ def do_run(self, *args) -> None: self._run() @SnapshotManager.snapshot - @save_reg_dump - @check_ql_alive - def do_step_in(self, step: str = '', *args) -> Optional[bool]: - """ - execute one instruction at a time, will enter subroutine + @save_regs + @liveness_check + def do_step_in(self, args: str) -> None: + """Go to next instruction, stepping into function calls. """ - prophecy = self.predictor.predict() - if prophecy.where is True: - qdb_print(QDB_MSG.INFO, 'program exited due to code end hitted') - self.do_context() - return False + steps, *_ = args.split() if args else ('',) + steps = try_read_int(steps) + + if steps is None: + steps = 1 - step = 1 if step == '' else int(step) + qdb_print(QDB_MSG.INFO, f'stepping {steps} steps from {self.cur_addr:#x}') - # make sure follow branching - if prophecy.going is True and self.ql.arch.type == QL_ARCH.MIPS: - step += 1 + # make sure to include delay slot when branching in mips + if self.ql.arch.type is QL_ARCH.MIPS and self.predictor.is_branch(): + prophecy = self.predictor.predict() - self._run(count=step) + if prophecy.going: + steps += 1 + + self._run(count=steps) self.do_context() @SnapshotManager.snapshot - @save_reg_dump - @check_ql_alive - def do_step_over(self, *args) -> Optional[bool]: - """ - execute one instruction at a time, but WON't enter subroutine + @save_regs + @liveness_check + def do_step_over(self, args: str) -> None: + """Go to next instruction, stepping over function calls. """ - prophecy = self.predictor.predict() + addr, size, _, _ = self.predictor.disasm_lite(self.cur_addr) + next_insn = addr + size - if prophecy.going: - self.set_breakpoint(prophecy.where, is_temp=True) + # make sure to include delay slot when branching in mips + if self.ql.arch.type is QL_ARCH.MIPS and self.predictor.is_branch(): + next_insn += size - else: - cur_insn = self.predictor.disasm(self.cur_addr) - bp_addr = self.cur_addr + cur_insn.size - - if self.ql.arch.type is QL_ARCH.MIPS: - bp_addr += cur_insn.size - - self.set_breakpoint(bp_addr, is_temp=True) + self.set_breakpoint(next_insn, is_temp=True) self._run() @SnapshotManager.snapshot - @parse_int - def do_continue(self, address: Optional[int] = None) -> None: - """ - continue execution from current address if not specified + @save_regs + @liveness_check + def do_continue(self, args: str) -> None: + """Continue execution from specified address, or from current one if + not specified. """ + address, *_ = args.split() if args else ('',) + address = try_read_int(address) + if address is None: address = self.cur_addr - qdb_print(QDB_MSG.INFO, f"continued from 0x{address:08x}") + qdb_print(QDB_MSG.INFO, f'continuing from {address:#010x}') self._run(address) - def do_backward(self, *args) -> None: - """ - step barkward if it's possible, option rr should be enabled and previous instruction must be executed before + def do_backward(self, args: str) -> None: + """Step backwards to the previous location. + + This operation requires the rr option to be enabled and having a progress + of at least one instruction """ - if self.rr: - if len(self.rr.layers) == 0 or not isinstance(self.rr.layers[-1], self.rr.DiffedState): - qdb_print(QDB_MSG.ERROR, "there is no way back !!!") + if self.rr is None: + qdb_print(QDB_MSG.ERROR, 'rr was not enabled') + return - else: - qdb_print(QDB_MSG.INFO, "step backward ~") - self.rr.restore() - self.do_context() - else: - qdb_print(QDB_MSG.ERROR, f"the option rr yet been set !!!") + if not self.rr.layers: + qdb_print(QDB_MSG.ERROR, 'there are no snapshots yet') + return + + qdb_print(QDB_MSG.INFO, 'stepping backwards') + + self.rr.restore() + self.do_context() + + # we did not really amualte anything going backwards, so we manually + # updating last address + self.last_addr = self.cur_addr def set_breakpoint(self, address: int, is_temp: bool = False) -> None: - """ - internal function for placing breakpoint + """[internal] Add or update an existing breakpoint. """ - bp = TempBreakpoint(address) if is_temp else Breakpoint(address) + self.bp_list[address] = Breakpoint(address, is_temp) - self.bp_list.update({address: bp}) + def del_breakpoint(self, bp: Union[int, Breakpoint]) -> None: + """[internal] Remove an existing breakpoint. - def del_breakpoint(self, bp: Union[Breakpoint, TempBreakpoint]) -> None: - """ - internal function for removing breakpoint + The caller is responsible to make sure the breakpoint exists. """ - self.bp_list.pop(bp.addr, None) + if isinstance(bp, int): + try: + bp = next(b for b in self.bp_list.values() if b.addr == bp) + except StopIteration: + qdb_print(QDB_MSG.ERROR, f'No breakpoint number {bp}.') + return + + del self.bp_list[bp.addr] - @parse_int - def do_breakpoint(self, address: Optional[int] = None) -> None: - """ - set breakpoint on specific address + def do_breakpoint(self, args: str) -> None: + """Set a breakpoint on a specific address, or current one if not specified. """ + address, *_ = args.split() if args else ('',) + address = try_read_int(address) + if address is None: address = self.cur_addr self.set_breakpoint(address) - qdb_print(QDB_MSG.INFO, f"Breakpoint at 0x{address:08x}") + qdb_print(QDB_MSG.INFO, f"breakpoint set at {address:#010x}") - @parse_int - def do_disassemble(self, address: Optional[int] = None) -> None: - """ - disassemble instructions from address specified + def do_disassemble(self, args: str) -> None: + """Disassemble a few instructions starting from specified address. """ - try: - context_asm(self.ql, address) - except: - qdb_print(QDB_MSG.ERROR) + address, *_ = args.split() if args else ('',) + address = try_read_int(address) - def do_examine(self, line: str) -> None: + if address is None: + address = self.cur_addr - """ - Examine memory: x/FMT ADDRESS. - format letter: o(octal), x(hex), d(decimal), u(unsigned decimal), t(binary), f(float), a(address), i(instruction), c(char), s(string) and z(hex, zero padded on the left) - size letter: b(byte), h(halfword), w(word), g(giant, 8 bytes) - e.g. x/4wx 0x41414141 , print 4 word size begin from address 0x41414141 in hex - """ + self.do_examine(f'x/{self.render.disasm_num * 2}i {address}') - if type(err_msg := self.mm.parse(line)) is str: - qdb_print(QDB_MSG.ERROR, err_msg) + def do_examine(self, args: str) -> None: + """Examine memory. + Usage: x/nfu target (all arguments are optional) + Where: + n - number of units to read + f - format specifier + u - unit type + """ + + try: + self.helper.handle_examine(args) + except (KeyError, ValueError, SyntaxError) as ex: + qdb_print(QDB_MSG.ERROR, ex) - def do_set(self, line: str) -> None: + def do_set(self, args: str) -> None: """ set register value of current context """ # set $a = b - reg, val = line.split("=") - reg_name = reg.strip().strip("$") - reg_val = try_read_int(val.strip()) - - if reg_name in self.ql.arch.regs.save().keys(): - if reg_val is not None: - setattr(self.ql.arch.regs, reg_name, reg_val) - self.do_context() - qdb_print(QDB_MSG.INFO, f"set register {reg_name} to 0x{(reg_val & 0xfffffff):08x}") - - else: - qdb_print(QDB_MSG.ERROR, f"error parsing input: {reg_val} as integer value") - + try: + reg, value = self.helper.handle_set(args) + except (KeyError, ValueError, SyntaxError) as ex: + qdb_print(QDB_MSG.ERROR, ex) else: - qdb_print(QDB_MSG.ERROR, f"invalid register: {reg_name}") + qdb_print(QDB_MSG.INFO, f"{reg} set to {value:#010x}") - def do_start(self, *args) -> None: + def do_start(self, args: str) -> None: """ restore qiling instance context to initial state """ - if self.ql.arch != QL_ARCH.CORTEX_M: + if self.ql.arch.type is QL_ARCH.CORTEX_M: self.ql.restore(self.init_state) self.do_context() - def do_context(self, *args) -> None: + def do_context(self, *args: str) -> None: """ display context information for current location """ - self.render.context_reg(self._saved_reg_dump) + self.render.context_reg() self.render.context_stack() self.render.context_asm() - def do_jump(self, loc: str, *args) -> None: + def do_jump(self, args: str) -> None: """ seek to where ever valid location you want """ - sym = self.marker.get_symbol(loc) - addr = sym if sym is not None else try_read_int(loc) + loc, *_ = args.split() if args else ('',) + addr = self.marker.get_address(loc) + + if addr is None: + addr = try_read_int(loc) + + if addr is None: + qdb_print(QDB_MSG.ERROR, 'seek target should be a symbol or an address') + return # check validation of the address to be seeked - if self.ql.mem.is_mapped(addr, 4): - if sym: - qdb_print(QDB_MSG.INFO, f"seek to {loc} @ 0x{addr:08x} ...") - else: - qdb_print(QDB_MSG.INFO, f"seek to 0x{addr:08x} ...") + if not self.ql.mem.is_mapped(addr, 4): + qdb_print(QDB_MSG.ERROR, f'seek target is unreachable: {addr:#010x}') + return - self.cur_addr = addr - self.do_context() + qdb_print(QDB_MSG.INFO, f'seeking to {addr:#010x} ...') - else: - qdb_print(QDB_MSG.ERROR, f"the address to be seeked isn't mapped") + self.cur_addr = addr + self.do_context() - def do_mark(self, args=""): + def do_mark(self, args: str): """ mark a user specified address as a symbol """ - args = args.split() - if len(args) == 0: + elems = args.split() if args else [] + + if not elems: loc = self.cur_addr - sym_name = self.marker.mark_only_loc(loc) + sym = self.marker.mark(loc) - elif len(args) == 1: - if (loc := try_read_int(args[0])): - sym_name = self.marker.mark_only_loc(loc) + elif len(elems) == 1: + loc = try_read_int(elems[0]) - else: + if loc is None: loc = self.cur_addr - sym_name = args[0] - if (err := self.marker.mark(sym_name, loc)): - qdb_print(QDB_MSG.ERROR, err) + sym = elems[0] + + if not self.marker.mark(loc, sym): + qdb_print(QDB_MSG.ERROR, f"duplicated symbol name: {sym} at address: {loc:#010x}") return - elif len(args) == 2: - sym_name, addr = args - if (loc := try_read_int(addr)): - self.marker.mark(sym_name, loc) else: + sym = self.marker.mark(loc) + + elif len(elems) == 2: + sym, addr = elems + loc = try_read_int(addr) + + if loc is None: qdb_print(QDB_MSG.ERROR, f"unable to mark symbol at address: '{addr}'") return + + else: + self.marker.mark(loc, sym) + else: qdb_print(QDB_MSG.ERROR, "symbol should not be empty ...") return - qdb_print(QDB_MSG.INFO, f"mark symbol '{sym_name}' at address: 0x{loc:08x} ...") + qdb_print(QDB_MSG.INFO, f"mark symbol '{sym}' at address: 0x{loc:08x} ...") - @parse_int - def do_show_args(self, argc: int = -1): - """ - show arguments of a function call - default argc is 2 since we don't know the function definition + @staticmethod + @contextmanager + def __set_temp(obj: object, member: str, value: Any): + """A utility context manager that temporarily sets a new value to an + object member, only to run a certain functionality. Then the change + is reverted. """ + has_member = hasattr(obj, member) + + if has_member: + orig = getattr(obj, member) + setattr(obj, member, value) + + try: + yield + finally: + if has_member: + setattr(obj, member, orig) + + def __info_args(self, args: str): + argc, *_ = args.split() if args else ('',) + argc = try_read_int(argc) + if argc is None: - argc = -1 + argc = 2 - elif argc > 16: - qdb_print(QDB_MSG.ERROR, 'Maximum argc is 16.') + if argc > 16: + qdb_print(QDB_MSG.ERROR, 'can show up to 16 arguments') return - prophecy = self.predictor.predict() - if not prophecy.going: - qdb_print(QDB_MSG.ERROR, 'Not on a braching instruction currently.') + if not self.predictor.is_fcall(): + qdb_print(QDB_MSG.ERROR, 'available only on a function call instruction') return - if argc == -1: - reg_n, stk_n = 2, 0 - else: - if argc > 4: - reg_n, stk_n = 4, argc - 4 - elif argc <= 4: - reg_n, stk_n = argc, 0 - - ptr_size = self.ql.arch.pointersize + # the cc methods were designed to access fcall arguments from within the function, + # and therefore assume a return address is on the stack (in relevant archs), so they + # skip it. when we are just about to call a function the return address is not yet + # there and the arguments, if read off the stack, get messed up. + # + # here we work around this by temporarily cheating cc to think there is no return + # address on the stack, so it does not skip it. - reg_args = [] - arch_type = self.ql.arch.type - if arch_type in (QL_ARCH.MIPS, QL_ARCH.ARM, QL_ARCH.CORTEX_M, QL_ARCH.X8664): + with QlQdb.__set_temp(self.ql.os.fcall.cc, '_retaddr_on_stack', False): + fargs = [self.ql.os.fcall.cc.getRawParam(i) for i in range(argc)] - reg_idx = None - if arch_type == QL_ARCH.MIPS: - slot_addr = self.cur_addr + ptr_size + # mips requires a special handling since the instruction in delay slot might + # affect one of the reg arguments values + if self.ql.arch.type is QL_ARCH.MIPS: + slot_addr = self.cur_addr + self.ql.arch.pointersize + _, _, _, op_str = self.predictor.disasm_lite(slot_addr) + operands = op_str.split(',') - op_str = self.predictor.disasm(slot_addr).op_str - # register may be changed due to dealy slot - if '$a' in op_str.split(',')[0]: - dst_reg = op_str.split(',')[0].strip('$') - reg_idx = int(dst_reg.strip('a')) + reg_args = ('$a0', '$a1', '$a2', '$a3') - # fetch real value by emulating instruction in delay slot - with self._save() as qdb: - qdb._run(slot_addr, 0, count=1) - real_val = self.ql.arch.regs.read(dst_reg) + # find out whether one of the argument registers gets modified in the dealy slot + if any(a in operands[0] for a in reg_args): + last = self.last_addr - reg_names = [f'a{d}'for d in range(reg_n)] - if reg_idx != None: - reg_names.pop(reg_idx) + dst_reg = operands[0].strip('$') + reg_idx = int(dst_reg.strip('a')) - elif arch_type in (QL_ARCH.ARM, QL_ARCH.CORTEX_M): - reg_names = [f'r{d}'for d in range(reg_n)] + # fetch real value by emulating instruction in delay slot + with self.save() as qdb: + qdb._run(slot_addr, count=1) + real_val = self.ql.arch.regs.read(dst_reg) - elif arch_type == QL_ARCH.X8664: - reg_names = ('rdi', 'rsi', 'rdx', 'rcx', 'r8', 'r9')[:reg_n] + # update argument value with the calculated one + fargs[reg_idx] = real_val - reg_args = [self.ql.arch.regs.read(reg_name) for reg_name in reg_names] - if reg_idx != None: - reg_args.insert(reg_idx, real_val) + # we don't want that to count as emulation, so restore last address + self.last_addr = last - reg_args = list(map(hex, reg_args)) + nibbles = self.ql.arch.pointersize * 2 - elif arch_type == QL_ARCH.X86: - stk_n = 2 if argc == -1 else argc + for i, a in enumerate(fargs): + deref = self.render.get_deref(a) - # read arguments on stack - if stk_n >= 0: - shadow_n = 0 - base_offset = self.ql.arch.regs.arch_sp + if isinstance(deref, int): + deref_str = f'{deref:#0{nibbles + 2}x}' - if arch_type in (QL_ARCH.X86, QL_ARCH.X8664): - # shadow 1 pointer size for return address - shadow_n = 1 + elif isinstance(deref, str): + deref_str = f'"{deref}"' - elif arch_type == QL_ARCH.MIPS: - # shadow 4 pointer size for mips - shadow_n = 4 + else: + deref_str = '' - base_offset = self.ql.arch.regs.arch_sp + shadow_n * ptr_size - stk_args = [self.ql.mem.read(base_offset+offset*ptr_size, ptr_size) for offset in range(stk_n)] - endian = 'little' if self.ql.arch.endian == QL_ENDIAN.EL else 'big' - stk_args = list(map(hex, map(lambda x: int.from_bytes(x, endian), stk_args))) + qdb_print(QDB_MSG.INFO, f'arg{i}: {a:#0{nibbles + 2}x}{f" {RARROW} {deref_str}" if deref_str else ""}') - args = reg_args + stk_args - qdb_print(QDB_MSG.INFO, f'args: {args}') + def __info_breakpoints(self, args: str): + if self.bp_list: + qdb_print(QDB_MSG.INFO, f'{"id":2s} {"address":10s} {"enabled"}') - def do_show(self, keyword: Optional[str] = None, *args) -> None: - """ - show some runtime information - """ + for addr, bp in self.bp_list.items(): + if not bp.temp: + qdb_print(QDB_MSG.INFO, f"{bp.index:2d} {addr:#010x} {bp.enabled}") - qdb_print(QDB_MSG.INFO, f"Entry point: {self.ql.loader.entry_point:#x}") - - if addr_elf_entry := getattr(self.ql.loader, 'elf_entry', None): - qdb_print(QDB_MSG.INFO, f"ELF entry: {addr_elf_entry:#x}") + else: + qdb_print(QDB_MSG.INFO, 'No breakpoints') + def __info_mem(self, kw: str): info_lines = iter(self.ql.mem.get_formatted_mapinfo()) # print filed name first qdb_print(QDB_MSG.INFO, next(info_lines)) # keyword filtering - if keyword: - lines = filter(lambda line: keyword in line, info_lines) - else: - lines = info_lines + lines = (line for line in info_lines if kw in line) if kw else info_lines for line in lines: qdb_print(QDB_MSG.INFO, line) - qdb_print(QDB_MSG.INFO, f"Breakpoints: {[hex(addr) for addr in self.bp_list.keys()]}") - qdb_print(QDB_MSG.INFO, f"Marked symbol: {[{key:hex(val)} for key,val in self.marker.mark_list]}") + def __info_marks(self, args: str): + """Show marked symbols. + """ + + if self.marker.mark_list: + qdb_print(QDB_MSG.INFO, f'{"symbol":10s} {"address":10s}') + + for key, addr in self.marker.mark_list: + qdb_print(QDB_MSG.INFO, f'{key:10s} {addr:#010x}') + + else: + qdb_print(QDB_MSG.INFO, 'No marked symbols') + + def __info_snapshot(self, args: str): if self.rr: - qdb_print(QDB_MSG.INFO, f"Snapshots: {len([st for st in self.rr.layers if isinstance(st, self.rr.DiffedState)])}") + if self.rr.layers: + recent = self.rr.layers[-1] + + # regs diff + if recent.reg: + for reg, val in recent.reg.items(): + qdb_print(QDB_MSG.INFO, f'{reg:6s}: {val:08x}') + + else: + qdb_print(QDB_MSG.INFO, 'Regs identical') + + qdb_print(QDB_MSG.INFO, '') + + # system regs diff + if recent.xreg: + for reg, val in recent.xreg.items(): + qdb_print(QDB_MSG.INFO, f'{reg:8s}: {val:08x}') + + else: + qdb_print(QDB_MSG.INFO, 'System regs identical') + + qdb_print(QDB_MSG.INFO, '') + + # ram diff + if recent.ram: + for rng, (opcode, diff) in sorted(recent.ram.items()): + lbound, ubound = rng + perms, label, data = diff + + qdb_print(QDB_MSG.INFO, f'{opcode.name} {lbound:010x} - {ubound:010x} {perms:03b} {label:24s} ~{len(data)}') + + else: + qdb_print(QDB_MSG.INFO, 'Memory identical') + + else: + qdb_print(QDB_MSG.INFO, 'No snapshots') + + else: + qdb_print(QDB_MSG.INFO, 'Snapshots were not enabled for this session') + + def __info_entry(self, args: str): + qdb_print(QDB_MSG.INFO, f'{"Entry point":16s}: {self.ql.loader.entry_point:#010x}') + + if hasattr(self.ql.loader, 'elf_entry'): + qdb_print(QDB_MSG.INFO, f'{"ELF entry point":16s}: {self.ql.loader.elf_entry:#010x}') + + def do_info(self, args: str) -> None: + """Provide run-time information. + """ + + subcmd, *a = args.split(maxsplit=1) if args else ('',) + + if not a: + a = [''] + + handlers = { + 'args': self.__info_args, + 'breakpoints': self.__info_breakpoints, + 'mem': self.__info_mem, + 'marks': self.__info_marks, + 'snapshot': self.__info_snapshot, + 'entry': self.__info_entry + } + + if subcmd in handlers: + handlers[subcmd](*a) + + else: + qdb_print(QDB_MSG.ERROR, f'info subcommands: {list(handlers.keys())}') def do_script(self, filename: str) -> None: """ @@ -584,42 +660,51 @@ def do_script(self, filename: str) -> None: """ if filename: - run_qdb_script(self, filename) + self._script = filename + + self.run_qdb_script(filename) else: qdb_print(QDB_MSG.ERROR, "parameter filename must be specified") - def do_shell(self, *command) -> None: + def do_shell(self, args: str) -> None: """ run python code """ + # allowing arbitrary shell commands is a huge security problem. until it gets + # removed, block shell command in scripts for security reasons + if self._script: + qdb_print(QDB_MSG.ERROR, 'shell command is not allowed on script') + return + try: - print(eval(*command)) + print(eval(args)) except: qdb_print(QDB_MSG.ERROR, "something went wrong ...") - def do_quit(self, *args) -> bool: + def do_quit(self, *args: str) -> None: """ exit Qdb and stop running qiling instance """ self.ql.stop() - if self._script: - return True - exit() - def do_EOF(self, *args) -> None: + sys.exit(0) + + def do_EOF(self, *args: str) -> None: """ handle Ctrl+D """ - if input(f"{color.RED}[!] Are you sure about saying good bye ~ ? [Y/n]{color.END} ").strip() == "Y": + prompt = f'{color.RED}[!] are you sure you want to quit? [Y/n]{color.END} ' + answer = input(prompt).strip() + + if not answer or answer.lower() == 'y': self.do_quit() do_r = do_run do_s = do_step_in do_n = do_step_over - do_a = do_show_args do_j = do_jump do_m = do_mark do_q = do_quit @@ -628,7 +713,3 @@ def do_EOF(self, *args) -> None: do_c = do_continue do_b = do_breakpoint do_dis = do_disassemble - - -if __name__ == "__main__": - pass diff --git a/qiling/debugger/qdb/render/__init__.py b/qiling/debugger/qdb/render/__init__.py index 1625a52ae..0b7e61807 100644 --- a/qiling/debugger/qdb/render/__init__.py +++ b/qiling/debugger/qdb/render/__init__.py @@ -4,7 +4,6 @@ # from .render import ContextRender -from .render_x86 import ContextRenderX86 +from .render_intel import ContextRenderX86, ContextRenderX64 from .render_mips import ContextRenderMIPS from .render_arm import ContextRenderARM, ContextRenderCORTEX_M -from .render_x8664 import ContextRenderX8664 diff --git a/qiling/debugger/qdb/render/render.py b/qiling/debugger/qdb/render/render.py index aa7a6022d..b1d62b85d 100644 --- a/qiling/debugger/qdb/render/render.py +++ b/qiling/debugger/qdb/render/render.py @@ -3,168 +3,184 @@ # Cross Platform and Multi Architecture Advanced Binary Emulation Framework # +"""Context Render for rendering UI +""" + + +from __future__ import annotations +import os -from capstone import CsInsn -from typing import Mapping -import os, copy +from typing import TYPE_CHECKING, Callable, Collection, Dict, Iterator, List, Mapping, Optional, Sequence, Tuple, Union from ..context import Context from ..const import color +if TYPE_CHECKING: + from qiling.core import Qiling + from ..branch_predictor.branch_predictor import BranchPredictor, Prophecy + from ..misc import InsnLike -""" - Context Render for rendering UI +COLORS = ( + color.DARKCYAN, + color.BLUE, + color.RED, + color.YELLOW, + color.GREEN, + color.PURPLE, + color.CYAN, + color.WHITE +) -""" +RARROW = '\u2192' +RULER = '\u2500' + +CURSOR = '\u25ba' # current instruction cursor +GOING_DN = '\u2ba6' # branching downward to a higher address +GOING_UP = '\u2ba4' # branching upward to a lower address -COLORS = (color.DARKCYAN, color.BLUE, color.RED, color.YELLOW, color.GREEN, color.PURPLE, color.CYAN, color.WHITE) class Render: + """Base class for graphical rendering functionality. + + Render objects are agnostic to current emulation state. """ - base class for rendering related functions - """ - def divider_printer(field_name, ruler="─"): + def __init__(self): + # make sure mixin classes are properly initialized + super().__init__() + + self.regs_a_row = 4 # number of regs to display per row + self.stack_num = 8 # number of stack entries to display in context + self.disasm_num = 4 # number of instructions to display in context before and after current pc + + @staticmethod + def divider_printer(header: str, footer: bool = False): """ decorator function for printing divider and field name """ - def decorator(context_dumper): + def decorator(wrapped: Callable): def wrapper(*args, **kwargs): try: width, _ = os.get_terminal_size() except OSError: width = 130 - bar = (width - len(field_name)) // 2 - 1 - print(ruler * bar, field_name, ruler * bar) - context_dumper(*args, **kwargs) - if "DISASM" in field_name: - print(ruler * width) + print(header.center(width, RULER)) + wrapped(*args, **kwargs) + + if footer: + print(RULER * width) return wrapper return decorator - def __init__(self): - self.regs_a_row = 4 - self.stack_num = 10 - self.disasm_num = 0x10 - self.color = color - - def reg_diff(self, cur_regs, saved_reg_dump): + def reg_diff(self, curr: Mapping[str, int], prev: Mapping[str, int]) -> List[str]: """ helper function for highlighting register changed during execution """ - if saved_reg_dump: - reg_dump = copy.deepcopy(saved_reg_dump) - if getattr(self, "regs_need_swapped", None): - reg_dump = self.swap_reg_name(reg_dump) + return [k for k in curr if curr[k] != prev[k]] if prev else [] - return [k for k in cur_regs if cur_regs[k] != reg_dump[k]] - - def render_regs_dump(self, regs, diff_reg=None): - """ - helper function for redering registers dump + def render_regs_dump(self, regs: Mapping[str, int], diff_reg: Collection[str]) -> None: + """Helper function for rendering registers dump. """ - lines = "" - for idx, r in enumerate(regs, 1): - line = "{}{}: 0x{{:08x}} {}\t".format(COLORS[(idx-1) // self.regs_a_row], r, color.END) + # find the length of the longest reg name to have all regs aligned in columns + longest = max(len(name) for name in regs) - if diff_reg and r in diff_reg: - line = f"{color.UNDERLINE}{color.BOLD}{line}" + def __render_regs_line() -> Iterator[str]: + elements = [] - if idx % self.regs_a_row == 0 and idx != 32: - line += "\n" + for idx, (name, value) in enumerate(regs.items()): + line_color = f'{COLORS[idx // self.regs_a_row]}' - lines += line + if name in diff_reg: + line_color = f'{color.UNDERLINE}{color.BOLD}{line_color}' - print(lines.format(*regs.values())) + elements.append(f'{line_color}{name:{longest}s}: {value:#010x}{color.END}') - def render_stack_dump(self, arch_sp: int) -> None: - """ - helper function for redering stack dump - """ - - # Loops over stack range (last 10 addresses) - for idx in range(self.stack_num): - addr = arch_sp + idx * self.pointersize + if (idx + 1) % self.regs_a_row == 0: + yield '\t'.join(elements) - ''' - @NOTE: Implemented new class arch_x8664 in order to bugfix issue with only dereferencing 32-bit pointers - on 64-bit emulation passes. - ''' - if (val := self.try_read_pointer(addr)[0]): # defined to be try_read_pointer(addr)[0] - dereferneces pointer + elements.clear() - # @TODO: Bug here where the values on the stack are being displayed in 32-bit format - print(f"SP + 0x{idx*self.pointersize:02x}│ [0x{addr:08x}] —▸ 0x{self.unpack(val):08x}", end="") + for line in __render_regs_line(): + print(line) - # try to dereference wether it's a pointer - if (buf := self.try_read_pointer(addr))[0] is not None: + def render_flags(self, flags: Mapping[str, int], before: str = ''): + def __set(f: str) -> str: + return f'{color.BLUE}{f.upper()}{color.END}' - if (addr := self.unpack(buf[0])): + def __cleared(f: str) -> str: + return f'{color.GREEN}{f.lower()}{color.END}' - # try to dereference again - if (buf := self.try_read_pointer(addr))[0] is not None: - s = self.try_read_string(addr) + s_before = f"[{before}] " if before else "" + s_flags = " ".join(__set(f) if val else __cleared(f) for f, val in flags.items()) - if s and s.isprintable(): - print(f" ◂— {self.read_string(addr)}", end="") - else: - print(f" ◂— 0x{self.unpack(buf[0]):08x}", end="") - print() + print(f'{s_before}[flags: {s_flags}]') - def render_assembly(self, lines) -> None: - """ - helper function for rendering assembly + def render_stack_dump(self, sp: int, dump: Sequence[Tuple[int, int, Union[int, str, None]]]) -> None: + """Helper function for rendering stack dump. """ - # assembly before current location - if (backward := lines.get("backward", None)): - for line in backward: - self.print_asm(line) + # number of hexadecimal nibbles to display per value + nibbles = self.pointersize * 2 - # assembly for current location - if (cur_insn := lines.get("current", None)): - prophecy = self.predictor.predict() - self.print_asm(cur_insn, to_jump=prophecy.going) + for address, value, deref in dump: + offset = address - sp - # assembly after current location - if (forward := lines.get("forward", None)): - for line in forward: - self.print_asm(line) + value_str = '(unreachable)' if value is None else f'{value:#0{nibbles + 2}x}' - def swap_reg_name(self, cur_regs: Mapping[str, int], extra_dict=None) -> Mapping[str, int]: - """ - swap register name with more readable register name - """ + if isinstance(deref, int): + deref_str = f'{deref:#0{nibbles + 2}x}' - target_items = extra_dict.items() if extra_dict else self.regs_need_swapped.items() + elif isinstance(deref, str): + deref_str = f'"{deref}"' - for old_reg, new_reg in target_items: - cur_regs.update({old_reg: cur_regs.pop(new_reg)}) + else: + deref_str = '' - return cur_regs + print(f'SP + {offset:#04x} │ {address:#010x} : {value_str}{f" {RARROW} {deref_str}" if deref_str else ""}') - def print_asm(self, insn: CsInsn, to_jump: bool = False) -> None: - """ - helper function for printing assembly instructions, indicates where we are and the branch prediction - provided by BranchPredictor + def render_assembly(self, listing: Sequence[InsnLike], pc: int, prediction: Prophecy) -> None: + """Helper function for rendering assembly. """ - opcode = "".join(f"{b:02x}" for b in insn.bytes) + def __render_asm_line(insn: InsnLike) -> str: + """Helper function for rendering assembly instructions, indicates where we are and + the branch prediction provided by branch predictor + """ + + trace_line = f"{insn.address:#010x} │ {insn.bytes.hex():18s} {insn.mnemonic:12} {insn.op_str:35s}" + + cursor = '' # current instruction cursor + brmark = '' # branching mark + + if insn.address == pc: + cursor = CURSOR + + if prediction.going: + # branch target might be None in case it should have been + # read from memory but that memory could not be reached + bmark = '?' if prediction.where is None else (GOING_DN if prediction.where > pc else GOING_UP) + + # apply some colors + brmark = f'{color.RED}{bmark}{color.RESET}' - trace_line = f"0x{insn.address:08x} │ {opcode:15s} {insn.mnemonic:10} {insn.op_str:35s}" + # + where = '?' if prediction.where is None else f'{prediction.where:#010x}' - cursor = "►" if self.cur_addr == insn.address else " " + print(f'prediction: {f"taken, {where}" if prediction.going else "not taken"}') + # - jump_sign = f"{color.RED}✓{color.END}" if to_jump else " " + return f"{brmark:1s} {cursor:1s} {color.DARKGRAY}{trace_line}{color.RESET}" - print(f"{jump_sign} {cursor} {color.DARKGRAY}{trace_line}{color.END}") + for insn in listing: + print(__render_asm_line(insn)) class ContextRender(Context, Render): @@ -172,17 +188,17 @@ class ContextRender(Context, Render): base class for context render """ - def __init__(self, ql, predictor): + def __init__(self, ql: Qiling, predictor: BranchPredictor): super().__init__(ql) - Render.__init__(self) + self.predictor = predictor + self.prev_regs: Dict[str, int] = {} - def dump_regs(self) -> Mapping[str, int]: - """ - dump all registers + def get_regs(self) -> Dict[str, int]: + """Save current registers state. """ - return {reg_name: self.ql.arch.regs.read(reg_name) for reg_name in self.regs} + return {reg_name: self.read_reg(reg_name) for reg_name in self.regs} @Render.divider_printer("[ STACK ]") def context_stack(self) -> None: @@ -190,50 +206,55 @@ def context_stack(self) -> None: display context stack dump """ - self.render_stack_dump(self.ql.arch.regs.arch_sp) - + sp = self.cur_sp + stack_dump = [] + + for i in range(self.stack_num): + address = sp + i * self.asize + + # attempt to read current stack entry + value = self.try_read_pointer(address) + + # treat stack entry as a pointer and attempt to dereference it + deref = None if value is None else self.get_deref(value) + + stack_dump.append((address, value, deref)) + + self.render_stack_dump(sp, stack_dump) + @Render.divider_printer("[ REGISTERS ]") - def context_reg(self, saved_states: Mapping["str", int]) -> None: - """ - display context registers dump + def context_reg(self) -> None: + """Rendering registers context. """ - return NotImplementedError + curr = self.get_regs() + prev = self.prev_regs + + curr = self.swap_regs(curr) + prev = self.swap_regs(prev) + + diff_reg = self.reg_diff(curr, prev) + self.render_regs_dump(curr, diff_reg) + self.print_mode_info() - @Render.divider_printer("[ DISASM ]") + @Render.divider_printer("[ DISASM ]", footer=True) def context_asm(self) -> None: + """Disassemble srrounding instructions. """ - read context assembly and render with render_assembly - """ - lines = {} - past_list = [] - from_addr = self.cur_addr - self.disasm_num - to_addr = self.cur_addr + self.disasm_num - - cur_addr = from_addr - while cur_addr <= to_addr: - insn = self.disasm(cur_addr) - cur_addr += insn.size - past_list.append(insn) - - bk_list = [] - fd_list = [] - cur_insn = None - for each in past_list: - if each.address < self.cur_addr: - bk_list.append(each) - - elif each.address > self.cur_addr: - fd_list.append(each) - - elif each.address == self.cur_addr: - cur_insn = each - - lines.update({ - "backward": bk_list, - "forward": fd_list, - "current": cur_insn, - }) - - self.render_assembly(lines) + address = self.cur_addr + prediction = self.predictor.predict() + + # assuming a single instruction is in the same size of a native pointer. + # this is not true for all architectures. + ptr = address - self.pointersize * self.disasm_num + listing = [] + + # taking disasm_num instructions before, current, and disasm_num instructions after + for _ in range(self.disasm_num * 2 + 1): + insn = self.disasm(ptr) + listing.append(insn) + + ptr += insn.size + + self.render_assembly(listing, address, prediction) diff --git a/qiling/debugger/qdb/render/render_arm.py b/qiling/debugger/qdb/render/render_arm.py index 7209be2c6..5f5adb50d 100644 --- a/qiling/debugger/qdb/render/render_arm.py +++ b/qiling/debugger/qdb/render/render_arm.py @@ -3,73 +3,66 @@ # Cross Platform and Multi Architecture Advanced Binary Emulation Framework # +from typing import Iterator, Optional - -from .render import * +from .render import Render, ContextRender from ..arch import ArchARM, ArchCORTEX_M +from ..misc import InsnLike + class ContextRenderARM(ContextRender, ArchARM): - """ - context render for ARM + """Context renderer for ARM architecture. """ - def __init__(self, ql, predictor): - super().__init__(ql, predictor) - ArchARM.__init__(self) - self.disasm_num = 8 + def print_mode_info(self) -> None: + cpsr = self.read_reg('cpsr') - @staticmethod - def print_mode_info(bits): - flags = ArchARM.get_flags(bits) + flags = ArchARM.get_flags(cpsr) + mode = ArchARM.get_mode(cpsr) - print(f"[{flags.pop('mode')} mode] ", end="") - for key, val in flags.items(): - if val: - print(f"{color.BLUE}{key.upper()} ", end="") - else: - print(f"{color.GREEN}{key.lower()} ", end="") + self.render_flags(flags, f'{mode} mode') - print(color.END) + def __disasm_all(self, rng: range) -> Iterator[InsnLike]: + addr = rng.start - @Render.divider_printer("[ REGISTERS ]") - def context_reg(self, saved_reg_dump): - """ - redering context registers + while addr in rng: + insn = self.disasm(addr) + yield insn + + addr += insn.size + + @Render.divider_printer("[ DISASM ]", footer=True) + def context_asm(self) -> None: + """Disassemble srrounding instructions. """ - cur_regs = self.dump_regs() - cur_regs = self.swap_reg_name(cur_regs) - diff_reg = self.reg_diff(cur_regs, saved_reg_dump) - self.render_regs_dump(cur_regs, diff_reg=diff_reg) - self.print_mode_info(self.ql.arch.regs.cpsr) + address = self.cur_addr + prediction = self.predictor.predict() + + # arm thumb may mix narrow and wide instructions so we can never know for + # sure where we need to start reading instructions from. to work around + # that we assume all instructions are wide, and then take the most recent + # ones into consideration. + listing = [] + + begin = address - self.asize * self.disasm_num + end = address + + # disassemble all instructions in range, but keep only the last ones + listing.extend(self.__disasm_all(range(begin, end))) + listing = listing[-self.disasm_num:] + + begin = address + end = address + self.asize * (self.disasm_num + 1) + + # disassemble all instructions in range, but keep only the first ones + listing.extend(self.__disasm_all(range(begin, end))) + listing = listing[:self.disasm_num * 2 + 1] + + self.render_assembly(listing, address, prediction) class ContextRenderCORTEX_M(ContextRenderARM, ArchCORTEX_M): + """Context renderer for ARM Cortex-M architecture. """ - context render for cortex_m - """ - - def __init__(self, ql, predictor): - super().__init__(ql, predictor) - ArchCORTEX_M.__init__(self) - self.regs_a_row = 3 - - @Render.divider_printer("[ REGISTERS ]") - def context_reg(self, saved_reg_dump): - cur_regs = self.dump_regs() - cur_regs = self.swap_reg_name(cur_regs) - - # for re-order - extra_dict = { - "xpsr": "xpsr", - "control": "control", - "primask": "primask", - "faultmask": "faultmask", - "basepri": "basepri", - } - - cur_regs = self.swap_reg_name(cur_regs, extra_dict=extra_dict) - diff_reg = self.reg_diff(cur_regs, saved_reg_dump) - self.render_regs_dump(cur_regs, diff_reg=diff_reg) - self.print_mode_info(self.ql.arch.regs.cpsr) diff --git a/qiling/debugger/qdb/render/render_intel.py b/qiling/debugger/qdb/render/render_intel.py new file mode 100644 index 000000000..0e0b8f7e2 --- /dev/null +++ b/qiling/debugger/qdb/render/render_intel.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python3 +# +# Cross Platform and Multi Architecture Advanced Binary Emulation Framework +# + +from typing import Optional + +from .render import Render, ContextRender +from ..arch import ArchIntel, ArchX86, ArchX64 + + +class ContextRenderIntel(ContextRender): + """Context renderer base class for Intel architecture. + """ + + def print_mode_info(self) -> None: + eflags = self.read_reg('eflags') + + flags = ArchIntel.get_flags(eflags) + iopl = ArchIntel.get_iopl(eflags) + + self.render_flags(flags, f'iopl: {iopl}') + + @Render.divider_printer("[ DISASM ]", footer=True) + def context_asm(self) -> None: + """Disassemble srrounding instructions. + """ + + address = self.cur_addr + prediction = self.predictor.predict() + + ptr = address + listing = [] + + # since intel architecture has instructions with varying sizes, it is + # difficult to tell what were the preceding instructions. for that reason + # we display instructions only from current address and on. + + for _ in range(9): + insn = self.disasm(ptr) + listing.append(insn) + + ptr += insn.size + + self.render_assembly(listing, address, prediction) + + +class ContextRenderX86(ContextRenderIntel, ArchX86): + """Context renderer for x86 architecture. + """ + + +class ContextRenderX64(ContextRenderIntel, ArchX64): + """Context renderer for x86-64 architecture. + """ diff --git a/qiling/debugger/qdb/render/render_mips.py b/qiling/debugger/qdb/render/render_mips.py index ff67891d8..13f01c658 100644 --- a/qiling/debugger/qdb/render/render_mips.py +++ b/qiling/debugger/qdb/render/render_mips.py @@ -3,27 +3,13 @@ # Cross Platform and Multi Architecture Advanced Binary Emulation Framework # - - -from .render import * +from .render import ContextRender from ..arch import ArchMIPS + class ContextRenderMIPS(ContextRender, ArchMIPS): + """Context renderer for MIPS architecture. """ - context render for MIPS - """ - - def __init__(self, ql, predictor): - super().__init__(ql, predictor) - ArchMIPS.__init__(self) - - @Render.divider_printer("[ REGISTERS ]") - def context_reg(self, saved_reg_dump): - """ - redering context registers - """ - cur_regs = self.dump_regs() - cur_regs = self.swap_reg_name(cur_regs) - diff_reg = self.reg_diff(cur_regs, saved_reg_dump) - self.render_regs_dump(cur_regs, diff_reg=diff_reg) + def print_mode_info(self) -> None: + pass diff --git a/qiling/debugger/qdb/render/render_x86.py b/qiling/debugger/qdb/render/render_x86.py deleted file mode 100644 index c13b92fe7..000000000 --- a/qiling/debugger/qdb/render/render_x86.py +++ /dev/null @@ -1,68 +0,0 @@ -#!/usr/bin/env python3 -# -# Cross Platform and Multi Architecture Advanced Binary Emulation Framework -# - - - -from .render import * -from ..arch import ArchX86 - -class ContextRenderX86(ContextRender, ArchX86): - """ - context render for X86 - """ - - def __init__(self, ql, predictor): - super().__init__(ql, predictor) - ArchX86.__init__(self) - - @Render.divider_printer("[ REGISTERS ]") - def context_reg(self, saved_reg_dump): - cur_regs = self.dump_regs() - diff_reg = self.reg_diff(cur_regs, saved_reg_dump) - self.render_regs_dump(cur_regs, diff_reg=diff_reg) - - flags = self.get_flags(self.ql.arch.regs.eflags) - print("EFLAGS: ", end="") - print(color.GREEN, end="") - for key, val in flags.items(): - if val: - print(f"{color.BLUE}{key.upper()} ", end="") - else: - print(f"{color.GREEN}{key.lower()} ", end="") - - print(color.END) - - @Render.divider_printer("[ DISASM ]") - def context_asm(self): - lines = {} - past_list = [] - - cur_addr = self.cur_addr - while len(past_list) < 10: - line = self.disasm(cur_addr) - past_list.append(line) - cur_addr += line.size - - fd_list = [] - cur_insn = None - for each in past_list: - if each.address > self.cur_addr: - fd_list.append(each) - - elif each.address == self.cur_addr: - cur_insn = each - - """ - only forward and current instruction will be printed, - because we don't have a solid method to disasm backward instructions, - since it's x86 instruction length is variadic - """ - - lines.update({ - "current": cur_insn, - "forward": fd_list, - }) - - self.render_assembly(lines) diff --git a/qiling/debugger/qdb/render/render_x8664.py b/qiling/debugger/qdb/render/render_x8664.py deleted file mode 100644 index 22c687d49..000000000 --- a/qiling/debugger/qdb/render/render_x8664.py +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/env python3 -# -# Cross Platform and Multi Architecture Advanced Binary Emulation Framework -# - - - -from .render import * -from ..arch import ArchX8664 - -class ContextRenderX8664(ContextRender, ArchX8664): - """ - Context render for X86_64 - """ - - def __init__(self, ql, predictor): - super().__init__(ql, predictor) - ArchX8664.__init__(self) - - @Render.divider_printer("[ REGISTERS ]") - def context_reg(self, saved_reg_dump): - cur_regs = self.dump_regs() - diff_reg = self.reg_diff(cur_regs, saved_reg_dump) - self.render_regs_dump(cur_regs, diff_reg=diff_reg) - print(color.GREEN, "EFLAGS: [CF: {flags[CF]}, PF: {flags[PF]}, AF: {flags[AF]}, ZF: {flags[ZF]}, SF: {flags[SF]}, OF: {flags[OF]}]".format(flags=self.get_flags(self.ql.arch.regs.eflags)), color.END, sep="") - - @Render.divider_printer("[ DISASM ]") - def context_asm(self): - lines = {} - past_list = [] - - cur_addr = self.cur_addr - while len(past_list) < 10: - line = self.disasm(cur_addr) - past_list.append(line) - cur_addr += line.size - - fd_list = [] - cur_insn = None - for each in past_list: - if each.address > self.cur_addr: - fd_list.append(each) - - elif each.address == self.cur_addr: - cur_insn = each - - """ - only forward and current instruction will be printed, - because we don't have a solid method to disasm backward instructions, - since it's x86 instruction length is variadic - """ - - lines.update({ - "current": cur_insn, - "forward": fd_list, - }) - - self.render_assembly(lines) diff --git a/qiling/debugger/qdb/utils.py b/qiling/debugger/qdb/utils.py index c5f0d4456..03be0ba89 100644 --- a/qiling/debugger/qdb/utils.py +++ b/qiling/debugger/qdb/utils.py @@ -4,16 +4,16 @@ # from __future__ import annotations -from typing import TYPE_CHECKING, Callable, Dict, Mapping, Tuple, Type -from capstone import CsInsn +from enum import Enum +from typing import TYPE_CHECKING, Any, Callable, Dict, List, Mapping, Optional, Tuple, Type, TypeVar, Union from qiling.const import QL_ARCH from .render import ( ContextRender, ContextRenderX86, - ContextRenderX8664, + ContextRenderX64, ContextRenderARM, ContextRenderCORTEX_M, ContextRenderMIPS @@ -22,7 +22,7 @@ from .branch_predictor import ( BranchPredictor, BranchPredictorX86, - BranchPredictorX8664, + BranchPredictorX64, BranchPredictorARM, BranchPredictorCORTEX_M, BranchPredictorMIPS, @@ -36,81 +36,69 @@ from .qdb import QlQdb -def qdb_print(msgtype: QDB_MSG, msg: str) -> None: - """ - color printing - """ +_K = TypeVar('_K') +_V = TypeVar('_V') - def print_error(msg): - return f"{color.RED}[!] {msg}{color.END}" - def print_info(msg): - return f"{color.CYAN}[+] {msg}{color.END}" +def qdb_print(level: QDB_MSG, msg: str) -> None: + """Log printing. + """ - color_coated = { - QDB_MSG.ERROR: print_error, - QDB_MSG.INFO : print_info, - }.get(msgtype)(msg) + decorations = { + QDB_MSG.ERROR: ('!', color.RED), + QDB_MSG.INFO : ('+', color.CYAN), + } - print(color_coated) + tag, col = decorations[level] + print(f'{col}[{tag}] {msg}{color.END}') -def setup_address_marker(): - class Marker: - """provide the ability to mark an address as a more easier rememberable alias - """ +class Marker: + """provide the ability to mark an address as a more easier rememberable alias + """ - def __init__(self): - self._mark_list = {} + def __init__(self): + self._mark_list: Dict[str, int] = {} - def get_symbol(self, sym): - """ - get the mapped address to a symbol if it's in the mark_list - """ + def get_address(self, sym: str) -> Optional[int]: + """ + get the mapped address to a symbol if it's in the mark_list + """ - return self._mark_list.get(sym, None) + return self._mark_list.get(sym) - @property - def mark_list(self): - """ - get a list about what we marked - """ + @property + def mark_list(self): + """ + get a list about what we marked + """ - return self._mark_list.items() + return self._mark_list.items() - def gen_sym_name(self): - """ - generating symbol name automatically - """ + def gen_sym_name(self) -> str: + """ + generating symbol name automatically + """ - sym_name, idx = "sym0", 0 - while sym_name in self._mark_list: - idx += 1 - sym_name = f"sym{idx}" + syms = len(self._mark_list) - return sym_name + # find the next available 'sym#' + return next((f'sym{i}' for i in range(syms) if f'sym{i}' not in self._mark_list), f'sym{syms}') - def mark_only_loc(self, loc): - """ - mark when location provided only - """ + def mark(self, loc: int, sym: Optional[str] = None) -> str: + """ + mark loc as sym + """ - sym_name = self.gen_sym_name() - self.mark(sym_name, loc) - return sym_name + sym = sym or self.gen_sym_name() - def mark(self, sym: str, loc: int): - """ - mark loc as sym - """ + if sym in self._mark_list: + return '' - if sym not in self.mark_list: - self._mark_list.update({sym: loc}) - else: - return f"dumplicated symbol name: {sym} at address: 0x{loc:08x}" + self._mark_list[sym] = loc - return Marker() + return sym # helper functions for setting proper branch predictor and context render depending on different arch @@ -120,7 +108,7 @@ def setup_branch_predictor(ql: Qiling) -> BranchPredictor: preds: Dict[QL_ARCH, Type[BranchPredictor]] = { QL_ARCH.X86: BranchPredictorX86, - QL_ARCH.X8664: BranchPredictorX8664, + QL_ARCH.X8664: BranchPredictorX64, QL_ARCH.ARM: BranchPredictorARM, QL_ARCH.CORTEX_M: BranchPredictorCORTEX_M, QL_ARCH.MIPS: BranchPredictorMIPS @@ -136,7 +124,7 @@ def setup_context_render(ql: Qiling, predictor: BranchPredictor) -> ContextRende rends: Dict[QL_ARCH, Type[ContextRender]] = { QL_ARCH.X86: ContextRenderX86, - QL_ARCH.X8664: ContextRenderX8664, + QL_ARCH.X8664: ContextRenderX64, QL_ARCH.ARM: ContextRenderARM, QL_ARCH.CORTEX_M: ContextRenderCORTEX_M, QL_ARCH.MIPS: ContextRenderMIPS @@ -146,121 +134,137 @@ def setup_context_render(ql: Qiling, predictor: BranchPredictor) -> ContextRende return r(ql, predictor) -def run_qdb_script(qdb: QlQdb, filename: str) -> None: - with open(filename) as fd: - for line in iter(fd.readline, ""): - # skip commented and empty line - if line.startswith("#") or line == "\n": - continue +class MemDiff(Enum): + ADD = '+' + REM = '-' + MOD = '*' - cmd, arg, _ = qdb.parseline(line) - func = getattr(qdb, f"do_{cmd}") - if arg: - func(arg) - else: - func() +RamKey = Tuple[int, int] +RamVal = Tuple[int, str, bytes] + +RamDiffKey = Tuple[int, int] +RamDiffVal = Tuple[MemDiff, Tuple[int, str, Union[bytes, Tuple]]] -class SnapshotManager: - """for functioning differential snapshot - Supports Qdb features like: - 1. record/replay debugging - 2. memory access in gdb-style +class DiffedState: + """ + internal container for storing diffed state """ - class State: - """ - internal container for storing raw state from qiling - """ + def __init__(self, reg, xreg, ram, loader): + self.reg: Dict[str, int] = reg + self.xreg: Dict[str, int] = xreg + self.ram: Dict[RamDiffKey, RamDiffVal] = ram + self.loader: Dict[str, Any] = loader - def __init__(self, saved_state): - self.reg, self.ram, self.xreg = SnapshotManager.transform(saved_state) - class DiffedState: - """ - internal container for storing diffed state - """ +class State: + """ + internal container for storing raw state from qiling + """ + + def __init__(self, saved: Mapping[str, Mapping]): + self.reg: Dict[str, int] = saved.get("reg") or {} + self.xreg: Dict[str, int] = saved.get("cpr") or saved.get("msr") or {} + + mem = saved.get("mem") or {} + ram = mem.get("ram") or [] + + # saved ram lists might not match in order, we turn them into dicts to work around + # that. in these dicts every memory content is mapped to its memory entry's properties + self.ram: Dict[RamKey, RamVal] = {(lbound, ubound): (perms, label, data) for lbound, ubound, perms, label, data in ram} - def __init__(self, diffed_st): - self.reg, self.ram, self.xreg = diffed_st + self.loader: Dict[str, Any] = saved.get('loader') or {} @staticmethod - def transform(st): - """ - transform saved context into binary set - """ + def __dict_diff(d0: Mapping[_K, _V], d1: Mapping[_K, _V]) -> Dict[_K, _V]: + return {k: v for k, v in d0.items() if v != d1.get(k)} - reg = st.get("reg", {}) - mem = st.get("mem", []) - xreg = st.get("cpr") or st.get("msr") or {} + def _diff_reg(self, other: State) -> Dict[str, int]: + return State.__dict_diff(self.reg, other.reg) - ram = [] - for mem_seg in mem["ram"]: - lbound, ubound, perms, label, raw_bytes = mem_seg - rb_set = {(idx, val) for idx, val in enumerate(raw_bytes)} - ram.append((lbound, ubound, perms, label, rb_set)) + def _diff_xreg(self, other: State) -> Dict[str, int]: + return State.__dict_diff(self.xreg, other.xreg) - return (reg, ram, xreg) + def _diff_ram(self, other: State) -> Dict[RamDiffKey, RamDiffVal]: + ram0 = self.ram + ram1 = other.ram - def __init__(self, ql): - self.ql = ql - self.layers = [] + ram_diff: Dict[RamDiffKey, RamDiffVal] = {} - def _save(self) -> State: - """ - acquire current State by wrapping saved context from ql.save() - """ + removed = [rng for rng in ram0 if rng not in ram1] + added = [rng for rng in ram1 if rng not in ram0] + modified = [rng for rng in ram0 if rng in ram1 and ram0[rng] != ram1[rng]] - return self.State(self.ql.save()) + # memory regions that got removed should be re-added + for rng in removed: + ram_diff[rng] = (MemDiff.ADD, ram0[rng]) - def diff_reg(self, prev_reg, cur_reg): - """ - diff two register values - """ + # memory regions that got added should be removed + for rng in added: + _, label, _ = ram1[rng] - diffed = filter(lambda t: t[0] != t[1], zip(prev_reg.items(), cur_reg.items())) - return {prev[0]: prev[1] for prev, _ in diffed} + # though we discard data as it is not required anymore, label is still required + # to determine the method of removing the region: brk, mmap, or ordinary map + ram_diff[rng] = (MemDiff.REM, (-1, label, b'')) - def diff_ram(self, prev_ram, cur_ram): - """ - diff two ram data if needed - """ + # memory regions that fot modified should be reverted back + for rng in modified: + perms0, label0, data0 = ram0[rng] + perms1, label1, data1 = ram1[rng] - if any((cur_ram is None, prev_ram is None, prev_ram == cur_ram)): - return + perms = -1 if perms0 == perms1 else perms0 - ram = [] - paired = zip(prev_ram, cur_ram) - for each in paired: - # lbound, ubound, perm, label, data - *prev_others, prev_rb_set = each[0] - *cur_others, cur_rb_set = each[1] + assert label0 == label1, 'memory region label changed unexpectedly' + assert len(data0) == len(data1), 'memory contents differ in size' - if prev_others == cur_others and cur_rb_set != prev_rb_set: - diff_set = prev_rb_set - cur_rb_set - else: - continue + # scan both data chunks and keep the index and byte value of the unmatched ones. + # if memory contents are identical, this will result in an empty tuple + data_diff = tuple((i, b0) for i, (b0, b1) in enumerate(zip(data0, data1)) if b0 != b1) - ram.append((*cur_others, diff_set)) + ram_diff[rng] = (MemDiff.MOD, (perms, label0, data_diff)) - return ram + # + # for rng, (opcode, diff) in sorted(ram_diff.items()): + # lbound, ubound = rng + # perms, label, data = diff + # + # print(f'{opcode.name} {lbound:010x} - {ubound:010x} {perms:03b} {label:24s} ~{len(data)}') + # + + return ram_diff - def diff(self, before_st, after_st): + def diff(self, other: State) -> DiffedState: + """Diff between previous and current state. """ - diff between previous and current state + + return DiffedState( + self._diff_reg(other), + self._diff_xreg(other), + self._diff_ram(other), + self.loader + ) + + +class SnapshotManager: + """Differential snapshot object. + """ + + def __init__(self, ql: Qiling): + self.ql = ql + self.layers: List[DiffedState] = [] + + def save(self) -> State: + """ + acquire current State by wrapping saved context from ql.save() """ - # prev_st = self.layers.pop() - diffed_reg = self.diff_reg(before_st.reg, after_st.reg) - diffed_ram = self.diff_ram(before_st.ram, after_st.ram) - diffed_xreg = self.diff_reg(before_st.xreg, after_st.xreg) - # diffed_reg = self.diff_reg(prev_st.reg, cur_st.reg) - # diffed_ram = self.diff_ram(prev_st.ram, cur_st.ram) - return self.DiffedState((diffed_reg, diffed_ram, diffed_xreg)) + return State(self.ql.save(reg=True, mem=True, loader=True)) - def snapshot(func): + @staticmethod + def snapshot(func: Callable) -> Callable: """ decorator function for saving differential context on certian qdb command """ @@ -268,17 +272,16 @@ def snapshot(func): def magic(self: QlQdb, *args, **kwargs): if self.rr: # save State before execution - p_st = self.rr._save() + before = self.rr.save() # certian execution to be snapshot func(self, *args, **kwargs) # save State after execution - q_st = self.rr._save() + after = self.rr.save() # merge two saved States into a DiffedState - st = self.rr.diff(p_st, q_st) - self.rr.layers.append(st) + self.rr.layers.append(before.diff(after)) else: func(self, *args, **kwargs) @@ -289,49 +292,65 @@ def restore(self): helper function for restoring running state from an existing incremental snapshot """ - prev_st = self.layers.pop() - cur_st = self._save() + prev_st = self.layers.pop() # DiffedState + curr_st = self.save() # State, expected to be identical to 'after' State in snapshot method + + curr_st.reg.update(prev_st.reg) + curr_st.xreg.update(prev_st.xreg) + + if prev_st.ram: + diff_ram = prev_st.ram + curr_ram = curr_st.ram + + # we must begin by removing unwanted memory regions, otherwise we would not be able to + # add new ones in case they overlap. here we iterate over the diff dictionary but handle + # only remove opcodes + for rng, (opcode, props) in diff_ram.items(): + lbound, ubound = rng + size = ubound - lbound - for reg_name, reg_value in prev_st.reg.items(): - cur_st.reg[reg_name] = reg_value + if opcode is MemDiff.REM: + # NOTE: it doesn't seem like distinguishing between brk, mmap, mmap annonymous + # and regular maps is actually required + self.ql.mem.unmap(lbound, size) - for reg_name, reg_value in prev_st.xreg.items(): - cur_st.xreg[reg_name] = reg_value + # doind a second pass, but this time handling add and modify opcodes + for rng, (opcode, props) in diff_ram.items(): + lbound, ubound = rng + perms, label, data = props + size = ubound - lbound - to_be_restored = { - "reg": cur_st.reg, + if opcode is MemDiff.ADD: + # TODO: distinguish between brk, mmap, mmap annonymous and regular maps + + self.ql.mem.map(lbound, size, perms, label) + self.ql.mem.write(lbound, data) + + elif opcode is MemDiff.MOD: + if perms != -1: + self.ql.mem.protect(lbound, size, perms) + + # is there a diff for this memory range? + if data: + # get current memory content + _, _, curr_data = curr_ram[rng] + curr_data = bytearray(curr_data) + + # patch with existing diff + for i, b in data: + curr_data[i] = b + + # write patched data + self.ql.mem.write(lbound, bytes(curr_data)) + + self.ql.restore({ + 'reg': curr_st.reg, # though we have arch-specific context to restore, we want to keep this arch-agnostic. # one way to work around that is to include 'xreg' both as msr (intel) and cpr (arm). # only the relevant one will be picked up while the other one will be discarded - "msr": cur_st.xreg, - "cpr": cur_st.xreg - } + 'msr': curr_st.xreg, + 'cpr': curr_st.xreg, - # FIXME: not sure how this one even works. while curr_st is a fresh qiling snapshot, - # prev_st is a DiffedState which does not hold a complete state but only a diff between - # two points which seem to be unrelated here. - # - # this code only patches current memory content with the diff between points a and b while - # we may be already be at point c. - if getattr(prev_st, "ram", None) and prev_st.ram != cur_st.ram: - - ram = [] - # lbound, ubound, perm, label, data - for each in prev_st.ram: - *prev_others, prev_rb_set = each - for *cur_others, cur_rb_set in cur_st.ram: - if prev_others == cur_others: - cur_rb_dict = dict(cur_rb_set) - for idx, val in prev_rb_set: - cur_rb_dict[idx] = val - - bs = bytes(dict(sorted(cur_rb_dict.items())).values()) - ram.append((*cur_others, bs)) - - to_be_restored["mem"] = { - "ram": ram, - "mmio": {} - } - - self.ql.restore(to_be_restored) + 'loader': prev_st.loader + }) diff --git a/qiling/debugger/utils.py b/qiling/debugger/utils.py deleted file mode 100644 index 5fa75e330..000000000 --- a/qiling/debugger/utils.py +++ /dev/null @@ -1,344 +0,0 @@ -#!/usr/bin/env python3 -# -# Cross Platform and Multi Architecture Advanced Binary Emulation Framework -# - -from elftools.common.exceptions import ELFError -from elftools.common.py3compat import ( - ifilter, byte2int, bytes2str, itervalues, str2bytes, iterbytes) -from elftools.elf.elffile import ELFFile -from elftools.elf.dynamic import DynamicSection, DynamicSegment -from elftools.elf.enums import ENUM_D_TAG -from elftools.elf.segments import InterpSegment -from elftools.elf.sections import NoteSection, SymbolTableSection -from elftools.elf.gnuversions import ( - GNUVerSymSection, GNUVerDefSection, - GNUVerNeedSection, - ) -from elftools.elf.relocation import RelocationSection -from elftools.elf.descriptions import ( - describe_ei_class, describe_ei_data, describe_ei_version, - describe_ei_osabi, describe_e_type, describe_e_machine, - describe_e_version_numeric, describe_p_type, describe_p_flags, - describe_sh_type, describe_sh_flags, - describe_symbol_type, describe_symbol_bind, describe_symbol_visibility, - describe_symbol_shndx, describe_reloc_type, describe_dyn_tag, - describe_dt_flags, describe_dt_flags_1, describe_ver_flags, describe_note, - describe_attr_tag_arm - ) -from elftools.elf.constants import E_FLAGS -from elftools.elf.constants import E_FLAGS_MASKS - -from qiling import Qiling - - -class QlReadELF(object): - def __init__(self, ql:Qiling, elf_stream): - self.ql = ql - self.elffile = ELFFile(elf_stream) - self._versioninfo = None - - def elf_file_header(self): - elf_header = {} - def add_info(key, value): - elf_header[key] = value - - header = self.elffile.header - e_ident = header['e_ident'] - - add_info('Magic', ' '.join('%2.2x' % byte2int(b) - for b in self.elffile.e_ident_raw)) - add_info('Class',describe_ei_class(e_ident['EI_CLASS'])) - add_info('Data', describe_ei_data(e_ident['EI_DATA'])) - add_info('Version', e_ident['EI_VERSION']) - add_info('OS/ABI', describe_ei_osabi(e_ident['EI_OSABI'])) - add_info('ABI Version', e_ident['EI_ABIVERSION']) - add_info('Type', describe_e_type(header['e_type'])) - add_info('Machine', describe_e_machine(header['e_machine'])) - add_info('Version_e', describe_e_version_numeric(header['e_version'])) - add_info('Entry point address', self._format_hex(header['e_entry'])) - add_info('Start of program headers', header['e_phoff']) - add_info('Start of section headers', header['e_shoff']) - add_info('Flags', [self._format_hex(header['e_flags']), - self.decode_flags(header['e_flags'])]) - add_info('Size of this header', header['e_ehsize']) - add_info('Size of program headers', header['e_phentsize']) - add_info('Number of program headers', header['e_phnum']) - add_info('Size of section headers', header['e_shentsize']) - add_info('Number of section headers', header['e_shnum']) - add_info('Section header string table index', header['e_shstrndx']) - - return elf_header - - def elf_program_headers(self): - program_headers = [] - def add_info(dic): - program_headers.append(dic) - - if self.elffile.num_segments() == 0: - return None - - for segment in self.elffile.iter_segments(): - program_hdr = {} - program_hdr['Type'] = describe_p_type(segment['p_type']) - program_hdr['Offset'] = self._format_hex(segment['p_offset'], fieldsize=6) - program_hdr['VirtAddr'] = self._format_hex(segment['p_vaddr'], fullhex=True) - program_hdr['PhysAddr'] = self._format_hex(segment['p_paddr'], fullhex=True) - program_hdr['FileSiz'] = self._format_hex(segment['p_filesz'], fieldsize=5) - program_hdr['MemSiz'] = self._format_hex(segment['p_memsz'], fieldsize=5) - program_hdr['Flg'] = describe_p_flags(segment['p_flags']) - program_hdr['Align'] = self._format_hex(segment['p_align']) - - add_info(program_hdr) - - return program_headers - - def elf_section_headers(self): - section_headers = [] - def add_info(dic): - section_headers.append(dic) - - if self.elffile.num_sections() == 0: - return None - - for nsec, section in enumerate(self.elffile.iter_sections()): - section_hdr = {} - section_hdr['index'] = nsec - section_hdr['Name'] = section.name - section_hdr['Type'] = describe_sh_type(section['sh_type']) - section_hdr['Addr'] = self._format_hex(section['sh_addr'], fieldsize=8, lead0x=False) - section_hdr['Offset'] = self._format_hex(section['sh_offset'], fieldsize=6, lead0x=False) - section_hdr['Size'] = self._format_hex(section['sh_size'], fieldsize=6, lead0x=False) - section_hdr['ES'] = self._format_hex(section['sh_entsize'], fieldsize=2, lead0x=False) - section_hdr['Flag'] = describe_sh_flags(section['sh_flags']) - section_hdr['Lk'] = section['sh_link'] - section_hdr['Inf'] = section['sh_info'] - section_hdr['Al'] = section['sh_addralign'] - - add_info(section_hdr) - - return section_headers - - def elf_symbol_tables(self): - symbol_tables_list = [] - def add_info(dic): - symbol_tables_list.append(dic) - - self._init_versioninfo() - - symbol_tables = [s for s in self.elffile.iter_sections() - if isinstance(s, SymbolTableSection)] - - if not symbol_tables and self.elffile.num_sections() == 0: - return None - - for section in symbol_tables: - if not isinstance(section, SymbolTableSection): - continue - - if section['sh_entsize'] == 0: - continue - - for nsym, symbol in enumerate(section.iter_symbols()): - version_info = '' - if (section['sh_type'] == 'SHT_DYNSYM' and - self._versioninfo['type'] == 'GNU'): - version = self._symbol_version(nsym) - if (version['name'] != symbol.name and - version['index'] not in ('VER_NDX_LOCAL', - 'VER_NDX_GLOBAL')): - if version['filename']: - # external symbol - version_info = '@%(name)s (%(index)i)' % version - else: - # internal symbol - if version['hidden']: - version_info = '@%(name)s' % version - else: - version_info = '@@%(name)s' % version - - symbol_info = {} - symbol_info['index'] = nsym - symbol_info['Value'] = self._format_hex( - symbol['st_value'], fullhex=True, lead0x=False) - symbol_info['Size'] = symbol['st_size'] - symbol_info['Type'] = describe_symbol_type(symbol['st_info']['type']) - symbol_info['Bind'] = describe_symbol_bind(symbol['st_info']['bind']) - symbol_info['Vis'] = describe_symbol_visibility(symbol['st_other']['visibility']) - symbol_info['Ndx'] = describe_symbol_shndx(symbol['st_shndx']) - symbol_info['Name'] = symbol.name - symbol_info['version_info'] = version_info - add_info(symbol_info) - return symbol_tables_list - - def decode_flags(self, flags): - description = "" - if self.elffile['e_machine'] == "EM_ARM": - eabi = flags & E_FLAGS.EF_ARM_EABIMASK - flags &= ~E_FLAGS.EF_ARM_EABIMASK - - if flags & E_FLAGS.EF_ARM_RELEXEC: - description += ', relocatable executabl' - flags &= ~E_FLAGS.EF_ARM_RELEXEC - - if eabi == E_FLAGS.EF_ARM_EABI_VER5: - EF_ARM_KNOWN_FLAGS = E_FLAGS.EF_ARM_ABI_FLOAT_SOFT|E_FLAGS.EF_ARM_ABI_FLOAT_HARD|E_FLAGS.EF_ARM_LE8|E_FLAGS.EF_ARM_BE8 - description += ', Version5 EABI' - if flags & E_FLAGS.EF_ARM_ABI_FLOAT_SOFT: - description += ", soft-float ABI" - elif flags & E_FLAGS.EF_ARM_ABI_FLOAT_HARD: - description += ", hard-float ABI" - - if flags & E_FLAGS.EF_ARM_BE8: - description += ", BE8" - elif flags & E_FLAGS.EF_ARM_LE8: - description += ", LE8" - - if flags & ~EF_ARM_KNOWN_FLAGS: - description += ', ' - else: - description += ', ' - - elif self.elffile['e_machine'] == "EM_MIPS": - if flags & E_FLAGS.EF_MIPS_NOREORDER: - description += ", noreorder" - if flags & E_FLAGS.EF_MIPS_PIC: - description += ", pic" - if flags & E_FLAGS.EF_MIPS_CPIC: - description += ", cpic" - if (flags & E_FLAGS.EF_MIPS_ABI2): - description += ", abi2" - if (flags & E_FLAGS.EF_MIPS_32BITMODE): - description += ", 32bitmode" - if (flags & E_FLAGS_MASKS.EFM_MIPS_ABI_O32): - description += ", o32" - elif (flags & E_FLAGS_MASKS.EFM_MIPS_ABI_O64): - description += ", o64" - elif (flags & E_FLAGS_MASKS.EFM_MIPS_ABI_EABI32): - description += ", eabi32" - elif (flags & E_FLAGS_MASKS.EFM_MIPS_ABI_EABI64): - description += ", eabi64" - if (flags & E_FLAGS.EF_MIPS_ARCH) == E_FLAGS.EF_MIPS_ARCH_1: - description += ", mips1" - if (flags & E_FLAGS.EF_MIPS_ARCH) == E_FLAGS.EF_MIPS_ARCH_2: - description += ", mips2" - if (flags & E_FLAGS.EF_MIPS_ARCH) == E_FLAGS.EF_MIPS_ARCH_3: - description += ", mips3" - if (flags & E_FLAGS.EF_MIPS_ARCH) == E_FLAGS.EF_MIPS_ARCH_4: - description += ", mips4" - if (flags & E_FLAGS.EF_MIPS_ARCH) == E_FLAGS.EF_MIPS_ARCH_5: - description += ", mips5" - if (flags & E_FLAGS.EF_MIPS_ARCH) == E_FLAGS.EF_MIPS_ARCH_32R2: - description += ", mips32r2" - if (flags & E_FLAGS.EF_MIPS_ARCH) == E_FLAGS.EF_MIPS_ARCH_64R2: - description += ", mips64r2" - if (flags & E_FLAGS.EF_MIPS_ARCH) == E_FLAGS.EF_MIPS_ARCH_32: - description += ", mips32" - if (flags & E_FLAGS.EF_MIPS_ARCH) == E_FLAGS.EF_MIPS_ARCH_64: - description += ", mips64" - - return description - - def _format_hex(self, addr, fieldsize=None, fullhex=False, lead0x=True, - alternate=False): - """ Format an address into a hexadecimal string. - - fieldsize: - Size of the hexadecimal field (with leading zeros to fit the - address into. For example with fieldsize=8, the format will - be %08x - If None, the minimal required field size will be used. - - fullhex: - If True, override fieldsize to set it to the maximal size - needed for the elfclass - - lead0x: - If True, leading 0x is added - - alternate: - If True, override lead0x to emulate the alternate - hexadecimal form specified in format string with the # - character: only non-zero values are prefixed with 0x. - This form is used by readelf. - """ - if alternate: - if addr == 0: - lead0x = False - else: - lead0x = True - fieldsize -= 2 - - s = '0x' if lead0x else '' - if fullhex: - fieldsize = 8 if self.elffile.elfclass == 32 else 16 - if fieldsize is None: - field = '%x' - else: - field = '%' + '0%sx' % fieldsize - return s + field % addr - - def _init_versioninfo(self): - """ Search and initialize informations about version related sections - and the kind of versioning used (GNU or Solaris). - """ - if self._versioninfo is not None: - return - - self._versioninfo = {'versym': None, 'verdef': None, - 'verneed': None, 'type': None} - - for section in self.elffile.iter_sections(): - if isinstance(section, GNUVerSymSection): - self._versioninfo['versym'] = section - elif isinstance(section, GNUVerDefSection): - self._versioninfo['verdef'] = section - elif isinstance(section, GNUVerNeedSection): - self._versioninfo['verneed'] = section - elif isinstance(section, DynamicSection): - for tag in section.iter_tags(): - if tag['d_tag'] == 'DT_VERSYM': - self._versioninfo['type'] = 'GNU' - break - - if not self._versioninfo['type'] and ( - self._versioninfo['verneed'] or self._versioninfo['verdef']): - self._versioninfo['type'] = 'Solaris' - - def _symbol_version(self, nsym): - """ Return a dict containing information on the - or None if no version information is available - """ - self._init_versioninfo() - - symbol_version = dict.fromkeys(('index', 'name', 'filename', 'hidden')) - - if (not self._versioninfo['versym'] or - nsym >= self._versioninfo['versym'].num_symbols()): - return None - - symbol = self._versioninfo['versym'].get_symbol(nsym) - index = symbol.entry['ndx'] - if not index in ('VER_NDX_LOCAL', 'VER_NDX_GLOBAL'): - index = int(index) - - if self._versioninfo['type'] == 'GNU': - # In GNU versioning mode, the highest bit is used to - # store wether the symbol is hidden or not - if index & 0x8000: - index &= ~0x8000 - symbol_version['hidden'] = True - - if (self._versioninfo['verdef'] and - index <= self._versioninfo['verdef'].num_versions()): - _, verdaux_iter = \ - self._versioninfo['verdef'].get_version(index) - symbol_version['name'] = next(verdaux_iter).name - else: - verneed, vernaux = \ - self._versioninfo['verneed'].get_version(index) - symbol_version['name'] = vernaux.name - symbol_version['filename'] = verneed.name - - symbol_version['index'] = index - return symbol_version diff --git a/qiling/loader/elf.py b/qiling/loader/elf.py index 076cb8f0b..81ea096cb 100644 --- a/qiling/loader/elf.py +++ b/qiling/loader/elf.py @@ -7,7 +7,7 @@ import os from enum import IntEnum -from typing import AnyStr, Optional, Sequence, Mapping, Tuple +from typing import Any, AnyStr, Optional, Sequence, Mapping, Tuple from elftools.common.utils import preserve_stream_pos from elftools.elf.constants import P_FLAGS, SH_FLAGS @@ -701,3 +701,15 @@ def get_elfdata_mapping(self, elffile: ELFFile) -> bytes: elfdata_mapping.extend(sec.data()) return bytes(elfdata_mapping) + + def save(self) -> Mapping[str, Any]: + saved = super().save() + + saved['brk_address'] = self.brk_address + + return saved + + def restore(self, saved_state: Mapping[str, Any]): + self.brk_address = saved_state['brk_address'] + + super().restore(saved_state) diff --git a/qiling/os/memory.py b/qiling/os/memory.py index ec643c0e4..0939fc278 100644 --- a/qiling/os/memory.py +++ b/qiling/os/memory.py @@ -57,7 +57,7 @@ def __read_string(self, addr: int) -> str: addr += 1 c = self.read(addr, 1) - return ret.decode() + return ret.decode('latin1') def __write_string(self, addr: int, s: str, encoding: str): self.write(addr, bytes(s, encoding) + b'\x00') diff --git a/tests/qdb_scripts/arm.qdb b/tests/qdb_scripts/arm.qdb index 5bfa261a9..1336b219e 100644 --- a/tests/qdb_scripts/arm.qdb +++ b/tests/qdb_scripts/arm.qdb @@ -1,13 +1,37 @@ -# This line is demonstrate comment in qdb script +# break on entry to main +b 0x000103fc -x/10wx 0x7ff3cee4 -x $sp -x $sp + 0x10 -x/5i 0x047ba9e0 -b 0x047ba9ec +# break on call to puts +b 0x00010414 + +# run till main +c + +# show stack entries +x/8xw $sp + +# run till puts c -s + +# show argument passed to puts +info args 1 + +# show instructions passed call till end of function +x/4i ($pc + 4) + +# step over call to puts n + +# show snapshot diff +info snapshot + +# step backwards to start of main p p + +# re-run till the end of program to test that nothing breaks +c +c + +# quit q diff --git a/tests/qdb_scripts/arm_static.qdb b/tests/qdb_scripts/arm_static.qdb new file mode 100644 index 000000000..31cd02ab6 --- /dev/null +++ b/tests/qdb_scripts/arm_static.qdb @@ -0,0 +1,37 @@ +# break on entry to main +b 0x000102e4 + +# break on call to puts +b 0x000102ee + +# run till main +c + +# show stack entries +x/8xw $sp + +# run till puts +c + +# show argument passed to puts +info args 1 + +# show instructions passed call till end of function +x/3i ($pc + 4) + +# step over call to puts +n + +# show snapshot diff +info snapshot + +# step backwards to start of main +p +p + +# re-run till the end of program to test that nothing breaks +c +c + +# quit +q diff --git a/tests/qdb_scripts/mips32el.qdb b/tests/qdb_scripts/mips32el.qdb index 0e8342baf..cf880b486 100644 --- a/tests/qdb_scripts/mips32el.qdb +++ b/tests/qdb_scripts/mips32el.qdb @@ -1,13 +1,37 @@ -# This line is demonstrate comment in qdb script +# break on entry to main +b 0x565555e0 -x/10wx 0x7ff3cec0 -x $sp -x $sp + 0x10 -x/5i 0x047bac40 -b 0x047bac50 +# break on call to puts +b 0x56555600 + +# run till main +c + +# show stack entries +x/8xw $sp + +# run till puts c -s + +# show argument passed to puts +info args 1 + +# show instructions passed call till end of function +x/5i ($pc + 4) + +# step over call to puts n + +# show snapshot diff +info snapshot + +# step backwards to start of main p p + +# re-run till the end of program to test that nothing breaks +c +c + +# quit q diff --git a/tests/qdb_scripts/x86.qdb b/tests/qdb_scripts/x86.qdb index d06623328..e145f2bd1 100644 --- a/tests/qdb_scripts/x86.qdb +++ b/tests/qdb_scripts/x86.qdb @@ -1,11 +1,37 @@ -# This line is demonstrate comment in qdb script +# break on entry to main +b 0x5655551d -x/4wx 0x7ff3cee0 -x $esp -x $esp + 0x4 -x/5i 0x047bac70 -s +# break on call to printf +b 0x56555542 + +# run till main +c + +# show stack entries +x/8xw $esp + +# run till printf +c + +# show argument passed to printf +info args 1 + +# show instructions passed call till end of function +x/8i ($eip + 5) + +# step over call to printf n + +# show snapshot diff +info snapshot + +# step backwards to start of main p p + +# re-run till the end of program to test that nothing breaks +c +c + +# quit q diff --git a/tests/test_qdb.py b/tests/test_qdb.py index 0a0da506c..563dd840e 100644 --- a/tests/test_qdb.py +++ b/tests/test_qdb.py @@ -1,41 +1,58 @@ #!/usr/bin/env python3 -# +# # Cross Platform and Multi Architecture Advanced Binary Emulation Framework # -import sys, unittest +import sys +import unittest sys.path.append("..") from qiling import Qiling +from qiling.const import QL_VERBOSE + class DebuggerTest(unittest.TestCase): - def test_qdb_mips32el_hello(self): - rootfs = "../examples/rootfs/mips32el_linux" - path = rootfs + "/bin/mips32el_hello" + def __test_common(self, vpath: str, rootfs: str, script: str) -> None: + """Load a common setup for all test cases. + """ - ql = Qiling([path], rootfs) - ql.debugger = "qdb::rr:qdb_scripts/mips32el.qdb" - ql.run() - del ql + ql = Qiling([f'{rootfs}{vpath}'], rootfs, verbose=QL_VERBOSE.DEBUG) + ql.debugger = f'qdb::rr:{script}' - def test_qdb_arm_hello(self): - rootfs = "../examples/rootfs/arm_linux" - path = rootfs + "/bin/arm_hello" + try: + ql.run() + except SystemExit as ex: + self.assertEqual(ex.code, 0) - ql = Qiling([path], rootfs) - ql.debugger = "qdb::rr:qdb_scripts/arm.qdb" - ql.run() - del ql + def test_qdb_mips32el_hello(self): + self.__test_common( + r'/bin/mips32el_hello', + r'../examples/rootfs/mips32el_linux', + r'qdb_scripts/mips32el.qdb' + ) + + def test_qdb_arm_hello(self): + self.__test_common( + r'/bin/arm_hello', + r'../examples/rootfs/arm_linux', + r'qdb_scripts/arm.qdb' + ) + + def test_qdb_arm_hello_static(self): + self.__test_common( + r'/bin/arm_hello_static', + r'../examples/rootfs/arm_linux', + r'qdb_scripts/arm_static.qdb' + ) def test_qdb_x86_hello(self): - rootfs = "../examples/rootfs/x86_linux" - path = rootfs + "/bin/x86_hello" + self.__test_common( + r'/bin/x86_hello', + r'../examples/rootfs/x86_linux', + r'qdb_scripts/x86.qdb' + ) - ql = Qiling([path], rootfs) - ql.debugger = "qdb::rr:qdb_scripts/x86.qdb" - ql.run() - del ql -if __name__ == "__main__": +if __name__ == '__main__': unittest.main()