From 20fe40302ccc3501056b963dda96014d71186db3 Mon Sep 17 00:00:00 2001 From: Peace-Maker Date: Thu, 13 Jul 2023 19:11:01 +0200 Subject: [PATCH] Add Python bindings for WASM --- bindings/python/capstone/__init__.py | 7 ++- bindings/python/capstone/wasm.py | 71 ++++++++++++++++++++++++ bindings/python/pyx/ccapstone.pyx | 5 +- bindings/python/setup_cython.py | 2 +- bindings/python/test_basic.py | 6 ++- bindings/python/test_wasm.py | 81 ++++++++++++++++++++++++++++ 6 files changed, 167 insertions(+), 5 deletions(-) create mode 100644 bindings/python/capstone/wasm.py create mode 100755 bindings/python/test_wasm.py diff --git a/bindings/python/capstone/__init__.py b/bindings/python/capstone/__init__.py index 4ec23458d1..5bd4bc8d8f 100755 --- a/bindings/python/capstone/__init__.py +++ b/bindings/python/capstone/__init__.py @@ -35,6 +35,7 @@ 'CS_ARCH_TMS320C64X', 'CS_ARCH_M680X', 'CS_ARCH_EVM', + 'CS_ARCH_WASM', 'CS_ARCH_BPF', 'CS_ARCH_RISCV', 'CS_ARCH_MOS65XX', @@ -387,7 +388,7 @@ def copy_ctypes_list(src): return [copy_ctypes(n) for n in src] # Weird import placement because these modules are needed by the below code but need the above functions -from . import arm, arm64, m68k, mips, ppc, sparc, systemz, x86, xcore, tms320c64x, m680x, evm, mos65xx, bpf, riscv, tricore +from . import arm, arm64, m68k, mips, ppc, sparc, systemz, x86, xcore, tms320c64x, m680x, evm, mos65xx, wasm, bpf, riscv, tricore class _cs_arch(ctypes.Union): _fields_ = ( @@ -404,6 +405,7 @@ class _cs_arch(ctypes.Union): ('m680x', m680x.CsM680x), ('evm', evm.CsEvm), ('mos65xx', mos65xx.CsMOS65xx), + ('wasm', wasm.CsWasm), ('bpf', bpf.CsBPF), ('riscv', riscv.CsRISCV), ('tricore', tricore.CsTriCore), @@ -727,6 +729,8 @@ def __gen_detail(self): (self.pop, self.push, self.fee) = evm.get_arch_info(self._raw.detail.contents.arch.evm) elif arch == CS_ARCH_MOS65XX: (self.am, self.modifies_flags, self.operands) = mos65xx.get_arch_info(self._raw.detail.contents.arch.mos65xx) + elif arch == CS_ARCH_WASM: + (self.operands) = wasm.get_arch_info(self._raw.detail.contents.arch.wasm) elif arch == CS_ARCH_BPF: (self.operands) = bpf.get_arch_info(self._raw.detail.contents.arch.bpf) elif arch == CS_ARCH_RISCV: @@ -1199,6 +1203,7 @@ def debug(): "sysz": CS_ARCH_SYSZ, 'xcore': CS_ARCH_XCORE, "tms320c64x": CS_ARCH_TMS320C64X, "m680x": CS_ARCH_M680X, 'evm': CS_ARCH_EVM, 'mos65xx': CS_ARCH_MOS65XX, 'bpf': CS_ARCH_BPF, 'riscv': CS_ARCH_RISCV, 'tricore': CS_ARCH_TRICORE, + 'wasm': CS_ARCH_WASM, } all_archs = "" diff --git a/bindings/python/capstone/wasm.py b/bindings/python/capstone/wasm.py new file mode 100644 index 0000000000..23d68ca8ae --- /dev/null +++ b/bindings/python/capstone/wasm.py @@ -0,0 +1,71 @@ +# Capstone Python bindings, by Peace-Maker + +import ctypes + +from . import copy_ctypes_list +from .wasm_const import * + + +# define the API +class WASMBrTable(ctypes.Structure): + _fields_ = ( + ('length', ctypes.c_uint32), + ('address', ctypes.c_uint64), + ('default_target', ctypes.c_uint32), + ) + +class WASMOpValue(ctypes.Union): + _fields_ = ( + ('int7', ctypes.c_int8), + ('varuint32', ctypes.c_uint32), + ('varuint64', ctypes.c_uint64), + ('uint32', ctypes.c_uint32), + ('uint64', ctypes.c_uint64), + ('immediate', ctypes.c_uint32 * 2), + ('brtable', WASMBrTable), + ) + +class WASMOp(ctypes.Structure): + _fields_ = ( + ('type', ctypes.c_uint), + ('size', ctypes.c_uint32), + ('value', WASMOpValue), + ) + + @property + def int7(self): + return self.value.int7 + + @property + def varuint32(self): + return self.value.varuint32 + + @property + def varuint64(self): + return self.value.varuint64 + + @property + def uint32(self): + return self.value.uint32 + + @property + def uint64(self): + return self.value.uint64 + + @property + def immediate(self): + return self.value.immediate + + @property + def brtable(self): + return self.value.brtable + +class CsWasm(ctypes.Structure): + _fields_ = ( + ('op_count', ctypes.c_uint8), + ('operands', WASMOp * 2), + ) + +def get_arch_info(a): + return (copy_ctypes_list(a.operands[:a.op_count])) + diff --git a/bindings/python/pyx/ccapstone.pyx b/bindings/python/pyx/ccapstone.pyx index dbc92f6e55..3bf5d76c41 100644 --- a/bindings/python/pyx/ccapstone.pyx +++ b/bindings/python/pyx/ccapstone.pyx @@ -2,7 +2,7 @@ cimport pyx.ccapstone as cc import capstone, ctypes -from . import arm, x86, mips, ppc, arm64, sparc, systemz, xcore, tms320c64x, m68k, m680x, evm, mos65xx, bpf, riscv, tricore, CsError +from . import arm, x86, mips, ppc, arm64, sparc, systemz, xcore, tms320c64x, m68k, m680x, evm, mos65xx, wasm, bpf, riscv, tricore, CsError _diet = cc.cs_support(capstone.CS_SUPPORT_DIET) @@ -57,6 +57,8 @@ class CsDetail(object): (self.pop, self.push, self.fee) = evm.get_arch_info(detail.arch.evm) elif arch == capstone.CS_ARCH_MOS65XX: (self.am, self.modifies_flags, self.operands) = mos65xx.get_arch_info(detail.arch.mos65xx) + elif arch == capstone.CS_ARCH_WASM: + (self.operands) = wasm.get_arch_info(detail.arch.wasm) elif arch == capstone.CS_ARCH_BPF: (self.operands) = bpf.get_arch_info(detail.arch.bpf) elif arch == capstone.CS_ARCH_RISCV: @@ -361,6 +363,7 @@ def debug(): "sysz": capstone.CS_ARCH_SYSZ, "xcore": capstone.CS_ARCH_XCORE, \ "tms320c64x": capstone.CS_ARCH_TMS320C64X, "m680x": capstone.CS_ARCH_M680X, \ "evm": capstone.CS_ARCH_EVM, "mos65xx": capstone.CS_ARCH_MOS65XX, \ + "wasm": capstone.CS_ARCH_WASM, \ "bpf": capstone.CS_ARCH_BPF, "riscv": capstone.CS_ARCH_RISCV, \ "tricore": capstone.CS_ARCH_TRICORE } diff --git a/bindings/python/setup_cython.py b/bindings/python/setup_cython.py index 953b09a6db..6dc8e0393e 100644 --- a/bindings/python/setup_cython.py +++ b/bindings/python/setup_cython.py @@ -41,7 +41,7 @@ compile_args = ['-O3', '-fomit-frame-pointer', '-I' + HEADERS_DIR] link_args = ['-L' + LIBS_DIR] -ext_module_names = ['arm', 'arm_const', 'arm64', 'arm64_const', 'm68k', 'm68k_const', 'm680x', 'm680x_const', 'mips', 'mips_const', 'ppc', 'ppc_const', 'x86', 'x86_const', 'sparc', 'sparc_const', 'systemz', 'sysz_const', 'xcore', 'xcore_const', 'tms320c64x', 'tms320c64x_const', 'evm', 'evm_const', 'mos65xx', 'mos65xx_const', 'bpf', 'bpf_const', 'riscv', 'riscv_const', 'tricore', 'tricore_const' ] +ext_module_names = ['arm', 'arm_const', 'arm64', 'arm64_const', 'm68k', 'm68k_const', 'm680x', 'm680x_const', 'mips', 'mips_const', 'ppc', 'ppc_const', 'x86', 'x86_const', 'sparc', 'sparc_const', 'systemz', 'sysz_const', 'xcore', 'xcore_const', 'tms320c64x', 'tms320c64x_const', 'evm', 'evm_const', 'mos65xx', 'mos65xx_const', 'wasm', 'wasm_const', 'bpf', 'bpf_const', 'riscv', 'riscv_const', 'tricore', 'tricore_const' ] ext_modules = [Extension("capstone.ccapstone", ["pyx/ccapstone.pyx"], diff --git a/bindings/python/test_basic.py b/bindings/python/test_basic.py index 1f4c721a9f..55d153bfc3 100755 --- a/bindings/python/test_basic.py +++ b/bindings/python/test_basic.py @@ -34,6 +34,7 @@ M68K_CODE = b"\xd4\x40\x87\x5a\x4e\x71\x02\xb4\xc0\xde\xc0\xde\x5c\x00\x1d\x80\x71\x12\x01\x23\xf2\x3c\x44\x22\x40\x49\x0e\x56\x54\xc5\xf2\x3c\x44\x00\x44\x7a\x00\x00\xf2\x00\x0a\x28\x4E\xB9\x00\x00\x00\x12\x4E\x75" TMS320C64X_CODE = b"\x01\xac\x88\x40\x81\xac\x88\x43\x00\x00\x00\x00\x02\x90\x32\x96\x02\x80\x46\x9e\x05\x3c\x83\xe6\x0b\x0c\x8b\x24" M680X_CODE = b"\x06\x10\x19\x1a\x55\x1e\x01\x23\xe9\x31\x06\x34\x55\xa6\x81\xa7\x89\x7f\xff\xa6\x9d\x10\x00\xa7\x91\xa6\x9f\x10\x00\x11\xac\x99\x10\x00\x39" +WASM_CODE = b"\x20\x00\x20\x01\x41\x20\x10\xc9\x01\x45\x0b" RISCV_CODE32 = b"\x37\x34\x00\x00\x97\x82\x00\x00\xef\x00\x80\x00\xef\xf0\x1f\xff\xe7\x00\x45\x00\xe7\x00\xc0\xff\x63\x05\x41\x00\xe3\x9d\x61\xfe\x63\xca\x93\x00\x63\x53\xb5\x00\x63\x65\xd6\x00\x63\x76\xf7\x00\x03\x88\x18\x00\x03\x99\x49\x00\x03\xaa\x6a\x00\x03\xcb\x2b\x01\x03\xdc\x8c\x01\x23\x86\xad\x03\x23\x9a\xce\x03\x23\x8f\xef\x01\x93\x00\xe0\x00\x13\xa1\x01\x01\x13\xb2\x02\x7d\x13\xc3\x03\xdd\x13\xe4\xc4\x12\x13\xf5\x85\x0c\x13\x96\xe6\x01\x13\xd7\x97\x01\x13\xd8\xf8\x40\x33\x89\x49\x01\xb3\x0a\x7b\x41\x33\xac\xac\x01\xb3\x3d\xde\x01\x33\xd2\x62\x40\xb3\x43\x94\x00\x33\xe5\xc5\x00\xb3\x76\xf7\x00\xb3\x54\x39\x01\xb3\x50\x31\x00\x33\x9f\x0f\x00" RISCV_CODE64 = b"\x13\x04\xa8\x7a" @@ -65,8 +66,9 @@ (CS_ARCH_M68K, CS_MODE_BIG_ENDIAN | CS_MODE_M68K_040, M68K_CODE, "M68K (68040)", None), (CS_ARCH_TMS320C64X, 0, TMS320C64X_CODE, "TMS320C64x", None), (CS_ARCH_M680X, CS_MODE_M680X_6809, M680X_CODE, "M680X_M6809", None), - (CS_ARCH_RISCV, CS_MODE_RISCV32, RISCV_CODE32, "riscv32", None), - (CS_ARCH_RISCV, CS_MODE_RISCV64, RISCV_CODE64, "riscv64", None), + (CS_ARCH_WASM, 0, WASM_CODE, "WASM", None), + (CS_ARCH_RISCV, CS_MODE_RISCV32, RISCV_CODE32, "RISCV32", None), + (CS_ARCH_RISCV, CS_MODE_RISCV64, RISCV_CODE64, "RISCV64", None), ) # ## Test cs_disasm_quick() diff --git a/bindings/python/test_wasm.py b/bindings/python/test_wasm.py new file mode 100755 index 0000000000..2645711320 --- /dev/null +++ b/bindings/python/test_wasm.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python + +# Capstone Python bindings, by Peace-Maker + +from __future__ import print_function +from capstone import * +from capstone.wasm import * +from xprint import to_hex + +WASM_CODE = b"\x20\x00\x20\x01\x41\x20\x10\xc9\x01\x45\x0b" + +all_tests = ( + (CS_ARCH_WASM, 0, WASM_CODE, "WASM"), +) + + +def print_insn_detail(insn): + # print address, mnemonic and operands + print("0x%x:\t%s\t%s" % (insn.address, insn.mnemonic, insn.op_str)) + + # "data" instruction generated by SKIPDATA option has no detail + if insn.id == 0: + return + + if len(insn.groups) > 0: + print("\tGroups: ", end="") + for group in insn.groups: + print("%s " % insn.group_name(group), end="") + print() + + if len(insn.operands) > 0: + print("\tOperand count: %u" % len(insn.operands)) + c = 0 + for i in insn.operands: + if i.type == WASM_OP_INT7: + print("\t\tOperand[%u] type: int7" % c) + print("\t\tOperand[%u] value: %d" % (c, i.int7)) + elif i.type == WASM_OP_VARUINT32: + print("\t\tOperand[%u] type: varuint32" % c) + print("\t\tOperand[%u] value: %#x" % (c, i.varuint32)) + elif i.type == WASM_OP_VARUINT64: + print("\t\tOperand[%u] type: varuint64" % c) + print("\t\tOperand[%u] value: %#x" % (c, i.varuint64)) + elif i.type == WASM_OP_UINT32: + print("\t\tOperand[%u] type: uint32" % c) + print("\t\tOperand[%u] value: %#x" % (c, i.uint32)) + elif i.type == WASM_OP_UINT64: + print("\t\tOperand[%u] type: uint64" % c) + print("\t\tOperand[%u] value: %#x" % (c, i.uint64)) + elif i.type == WASM_OP_IMM: + print("\t\tOperand[%u] type: imm" % c) + print("\t\tOperand[%u] value: %#x %#x" % (c, i.immediate[0], i.immediate[1])) + elif i.type == WASM_OP_BRTABLE: + print("\t\tOperand[%u] type: brtable" % c) + print("\t\tOperand[%u] value: length=%#x, address=%#x, default_target=%#x" % (c, i.brtable.length, i.brtable.address, i.brtable.default_target)) + print("\t\tOperand[%u] size: %u" % (c, i.size)) + c += 1 + + + +# ## Test class Cs +def test_class(): + for (arch, mode, code, comment) in all_tests: + print("*" * 16) + print("Platform: %s" % comment) + print("Code: %s" % to_hex(code)) + print("Disasm:") + + try: + md = Cs(arch, mode) + md.detail = True + for insn in md.disasm(code, 0xffff): + print_insn_detail(insn) + print() + print("0x%x:\n" % (insn.address + insn.size)) + except CsError as e: + print("ERROR: %s" % e) + + +if __name__ == '__main__': + test_class()