diff --git a/.gitignore b/.gitignore index 9daef43..81c186b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,8 @@ .vscode/* iconbuster.py +form_test.py +*.bak +capdisasm.py +vtest* +namecollisions.py \ No newline at end of file diff --git a/README.md b/README.md index 1b6440b..4e5c941 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,9 @@ # IDA Scripts Some random IDA scripts I wrote +### distfromfunc.py ### -### findmyfunc.py ### - -Takes a SourceMod signature and jumps you to the function it's for. If it's a bad signature, then you won't go anywhere. - +Get the offset from the cursor address and the start of a function. Useful for byte patching. ### gamedata_checker.py ### @@ -18,25 +16,23 @@ Has a few quirks with it at the moment: - Offset checking is variably difficult depending on naming conventions. If the gamedata key name is not named exactly the same as the function name, it will not be found; e.g. `OnTakeDamage` -> `CBaseEntity::OnTakeDamage` and `CTFPlayer::OnTakeDamage` -> `CBaseEntity::OnTakeDamage` but `TakeDamage` != `CBaseEntity::OnTakeDamage`. -### getfuncoffset.py ### - -Get the offset from the cursor address and the start of a function. Useful for byte patching. - - ### isgoodsig.py ### -Takes a SourceMod signature input and detects if it's unique or not. +Takes a SourceMod (or any) signature input and detects if it's unique or not. ### makesig.py ### Python translation of [makesig](https://github.com/alliedmodders/sourcemod/blob/master/tools/ida_scripts/makesig.idc). +Optionally, install pyperclip with `pip install pyperclip` to automatically copy any signatures to your clipboard when running. + ### makesigfromhere.py ### Creates a signature from the cursor offset. Useful for byte patching. + ### nameresetter.py ### Resets the name of every function in IDA's database. Does not include library or external functions. @@ -46,7 +42,10 @@ Resets the name of every function in IDA's database. Does not include library or Imports netprops and owner classes as structs and struct members into IDA's DB. Only works with the XML file provided by sm_dump_netprops_xml. Datatables only work most of the time. You should also use the proper netprop dump for your OS, or else you will be very confused. -You also have the option of importing vtables from the found classes into IDA. This is a bit more sane than the **vtable_structs.py** script, but only works on classes with netprops. + +### sigfind.py ### + +Takes a SourceMod (or any) signature and jumps you to the function it's for. If it's a bad signature, then you won't go anywhere. ### sigsmasher.py ### @@ -54,7 +53,7 @@ You also have the option of importing vtables from the found classes into IDA. T Makes SourceMod ready signatures for every function in IDA's database. Yes, this will take a long, long time. Requires PyYAML so you'll need to `pip install pyyaml`. You have the option of only generating signatures for typed functions so this works very well with the Symbol Smasher. -### structaligner.py ### +### structfiller.py ### Sanitizes undefined struct members as if IDA had parsed a header file. Each structure will have its undefined members replaced with a one-byte-sized member in order to prevent pseudocode from falling apart. Only makes sense to use it after running the netprop importer. @@ -69,13 +68,44 @@ If you're on a symbol library, you should run it in read mode and export it to a When on Windows or another stripped database, run the script in write mode and select the file you exported earlier. A solid amount of functions should be typed within a few seconds. -This works well with the Signature Smasher. However to save you an hour or so, I publicly host dumps of most Source games [here](https://brewcrew.tf/sigdump). +This works well with the Signature Smasher. However to save you an hour or so, I publicly host dumps of most Source games [here](http://scag.site.nfoservers.com/sigdump). ### vtable_io.py ### -Imports and exports virtual tables. Run it through a Linux binary to export to a file, then run it through a Windows binary to import those VTables into the database. This is similar to [Asherkin's VTable Dumper](https://asherkin.github.io/vtable/) but doesn't suffer from the pitfalls of multiple inheritance. Since it doesn't have those liabilities, it's function typing will almost always be perfect. 32-bit only for now. - -Only works on libraries that have virtual thunks *after* the virtual table declaration such as in TF2. Fixing this is a TODO. +Imports and exports virtual tables. Run it through a Linux binary to export to a file, then run it through a Windows binary to import those VTables into the database. This is similar to [Asherkin's VTable Dumper](https://asherkin.github.io/vtable/) but doesn't suffer from the pitfalls of multiple inheritance. Since it doesn't have those liabilities, its function typing will almost always be perfect. + +#### Features #### +This script is slightly heavy and has features that warrant explanation. Features can be freely enabled/disabled in the popup form that opens when you run the script. Desired features options are kept in the IDA registry and will persist. + +**Parse type strings** +- Sometimes IDA fails to properly analyze Windows RTTI Type Descriptor objects. Because of this, there won't be a reference from certain type descriptors to std::type_info, which is required for the script to work. +- If this feature is enabled, then the string names of the type descriptor will be parsed in order to discover the unreferencing type descriptors. This will be done alongside the normal script function. +- If you notice that there are multiple functions of the same name or classes that have virtual functions that aren't typed, consider enabling this. +- It should be harmless to keep on regardless, but it is disabled by default. +- This problem only seemed to be present in NMRiH. + +**Skip vtable size mismatches** +- The script is *almost* perfect. On rare occasion, it will fail to properly prepare a Windows translation of a Linux virtual table. +- If this option is enabled, then any size mismatches will forego function typing. +- Enabled by default. + +**Comment reused functions** +- Windows oftentimes optimizes shorter and simpler functions and reuses them across multiple virtual tables. This means that it would be redundant to rename these functions over and over again. +- If enabled, virtual table declarations instead emplace a comment on the function's reference. +- Enabled by default. + +**Export options** +- Should be self-explanatory, but the script is able to export the Linux and Windows virtual tables to a file. +- This is is a .json file and is organized to be readable. +- The format of the export file is as follows: +```json +"classname" +{ + "[this-offset] vtable-offset function-name" +} +``` +- Linux offsets are denoted with `L` and Windows with `W`. If the function is not present in a certain OS, then that index is empty. +- Exporting is optional, and if it is not enabled, then the export file path option can be safely ignored. ### vtable_structs.py ### diff --git a/distfromfunc.py b/distfromfunc.py new file mode 100644 index 0000000..15bf81d --- /dev/null +++ b/distfromfunc.py @@ -0,0 +1,22 @@ +import idc +import idaapi + +def main(): + addr = idaapi.get_screen_ea() + if addr == idc.BADADDR: + print("Make sure you are in a function!") + idaapi.beep() + return + + func = idaapi.get_func(addr) + if func is None: + print("Make sure you are in a function!") + idaapi.beep() + return + + funcname = idaapi.get_name(func.start_ea) + demangled = idaapi.demangle_name(funcname, idc.get_inf_attr(idc.INF_SHORT_DN)) + print(f"{demangled or funcname}:") + print(f"Offset from {func.start_ea:08X} to {addr:08X} = {addr - func.start_ea} ({addr - func.start_ea:#X})") + +main() \ No newline at end of file diff --git a/findmyfunc.py b/findmyfunc.py deleted file mode 100644 index 29e8af2..0000000 --- a/findmyfunc.py +++ /dev/null @@ -1,17 +0,0 @@ -import idc -import idaapi - -def getsigloc(sig): - segend = idaapi.get_segm_by_name(".text").end_ea - addr = idaapi.find_binary(0, segend, sig, 0, idc.SEARCH_DOWN|idc.SEARCH_NEXT) - return addr - -sig = idaapi.ask_str("", 0, "Insert signature: ") - -oldsig = sig -sig = sig.replace(r"\x", " ").replace("2A", "?").replace("2a", "?").strip() - -def main(): - loc = getsigloc(sig) - if loc != idc.BADADDR: - idaapi.jumpto(loc) diff --git a/gamedata_checker.py b/gamedata_checker.py index a77560d..8ebb6ef 100644 --- a/gamedata_checker.py +++ b/gamedata_checker.py @@ -5,27 +5,71 @@ from sys import version_info -FUNCS_SEGEND = idaapi.get_segm_by_name(".text").end_ea +OS_Linux = 0 +OS_Win = 1 def get_os(): - # Lazy af lol - return "linux" if idaapi.get_root_filename().endswith(".so") else "windows" + ftype = idaapi.get_file_type_name() + if "PE" in ftype: + return OS_Win + elif "ELF" in ftype: + return OS_Linux + return -1 + +def osstr(os): + if os == OS_Linux: + return "linux" + elif os == OS_Win: + return "windows" + return "unknown" def checksig(sig): if sig[0] == '@': # Just check for existence of this mangled name return idc.get_name_ea_simple(sig[1:]) != idc.BADADDR - + sig = sig.replace(r"\x", " ").replace("2A", "?").replace("2a", "?").replace("\\", "").strip() + + # Get the first segment that is executable to use its addresses for parse_binpat_str + endea = idc.BADADDR + for segea in idautils.Segments(): + s = idaapi.getseg(segea) + if s.perm & idaapi.SEGPERM_EXEC: + segstart = segea + # Set the end ea to the end of the last executable segment + # Speed isn't as important in this script, so reading any extra X + # segments is fine + if endea == idc.BADADDR or endea < segstart + s.size(): + endea = segstart + s.size() + count = 0 addr = 0 - addr = idaapi.find_binary(addr, FUNCS_SEGEND, sig, 0, idc.SEARCH_DOWN|idc.SEARCH_NEXT) - while addr != idc.BADADDR: + addr = idaapi.find_binary(addr, endea, sig, 0, idc.SEARCH_DOWN|idc.SEARCH_NEXT) + while count < 2 and addr != idc.BADADDR: count = count + 1 - addr = idaapi.find_binary(addr, FUNCS_SEGEND, sig, 0, idc.SEARCH_DOWN|idc.SEARCH_NEXT) + if count > 1: + break + addr = idaapi.find_binary(addr, endea, sig, 0, idc.SEARCH_DOWN|idc.SEARCH_NEXT) return count == 1 + # bin_search3 breaks after 15 or so bytes or something, idk man + # binpat = idaapi.compiled_binpat_vec_t() + # idaapi.parse_binpat_str(binpat, segstart, sig, 16, idaapi.get_default_encoding_idx(idaapi.get_encoding_bpu_by_name("UTF-8"))) + + # count = 0 + # addr = 0 + # addr, _ = idaapi.bin_search3(addr, endea, binpat, idaapi.BIN_SEARCH_FORWARD) + # while addr != idc.BADADDR: + # count += 1 + # if count > 1: + # break + + # # +1 because the search finds itself + # addr, _ = idaapi.bin_search3(addr + 1, endea, binpat, idaapi.BIN_SEARCH_FORWARD) + + # return count == 1 + def get_bcompat_items(d): return d.iteritems() if version_info[0] <= 2 else d.items() @@ -33,12 +77,11 @@ def get_bcompat_items(d): # If you have multiple #default's in you first subsection or you have #default # anywhere else other than that first subsection, you're SOL. Sorry Silvers :c def get_gamedir(kv): - gamedir = "" - # If we've got multiple games supported, so let's just ask + # If we've got multiple games supported, let's just ask if len(kv.items()) > 1: - gamedir = idaapi.ask_str("", 0, "There are multiple supported games with this file. Which game directory is this for?") + gamedir = idaapi.ask_str("", 0, "There are multiple games supported by this file. Which game directory is this for?") # Not in the basic game shit, check for support in default - if gamedir not in kv.keys(): + if gamedir is not None and gamedir not in kv.keys(): default = kv.get("#default") # There's a default entry, check for supported if default: @@ -58,8 +101,8 @@ def get_gamedir(kv): supported = kv.get("#supported") if supported: if len(supported.items()) > 1: - gamedir = idaapi.ask_str("", 0, "There are multiple supported games with this file. Which game directory is this for?") - if gamedir in default["#supported"].values(): + gamedir = idaapi.ask_str("", 0, "There are multiple games supported by this file. Which game directory is this for?") + if gamedir is not None and gamedir in default["#supported"].values(): return gamedir return "" return list(supported.values())[0] @@ -69,7 +112,7 @@ def get_gamedir(kv): def get_voffs(name): os = get_os() - if os == "linux": + if os == OS_Linux: mangled = "_ZTV{}{}".format(len(name), name) offset = 8 else: @@ -85,7 +128,11 @@ def read_vtable(funcname, ea): funcs = {} offset = 0 while ea != idc.BADADDR: - offs = idc.get_wide_dword(ea) + if idc.__EA64__: + offs = idaapi.get_qword(ea) + else: + offs = idaapi.get_dword(ea) + if not idaapi.is_code(idaapi.get_full_flags(offs)): break @@ -99,7 +146,7 @@ def read_vtable(funcname, ea): funcs[demangled.lower()] = offset offset += 1 - ea = idaapi.next_not_tail(ea) + ea = idaapi.next_head(ea, idc.BADADDR) # We've got a list of function names, let's do this really shittily because idk any other way @@ -182,11 +229,21 @@ def try_get_voffset(funcname): def main(): kv = None filereq = idaapi.ask_file(0, "*.txt", "Select a gamedata file") - if filereq == None: + if filereq is None: return - with open(filereq) as f: - kv = vdf.load(f) + # Try and capture the huge exception that happens if there are multi-line comments + # Why does vdfparse print the entire file? Lol + try: + with open(filereq) as f: + kv = vdf.load(f) + except Exception as e: + idaapi.warning("Could not load file!\nSee console for details") + import traceback + traceback.print_exc(type(e), e, e.__traceback__) + if "vdf.parse: unexpected EOF" in str(e): + print("[Gamedata Checker] This is likely due to multi-line comments in the gamedata file. Try removing them and try again") + return if kv == None: idaapi.warning("Could not load file!") @@ -208,40 +265,57 @@ def main(): signatures = kv.get("Signatures") if signatures: for name, handle in signatures.items(): - s = handle.get(os) + s = handle.get(osstr(os)) if s: found["Signatures"][name] = checksig(s) offsets = kv.get("Offsets") if offsets:# and os != "windows": for name, handle in offsets.items(): - offset = handle.get(os, -1) + offset = handle.get(osstr(os), -1) if offset != -1: found["Offsets"][name] = [offset, try_get_voffset(name)] checkmark = u"\u2713".encode("utf8") if version_info[0] <= 2 else "✓" - if len(found["Signatures"].items()): + + # Format the output string so it's pretty + try: + maxlen = max([len(key) for key in found["Signatures"].keys()]) + except: + maxlen = 0 + if maxlen: + # Align maxlen to 4 + if maxlen % 4 != 0: + maxlen += 4 - (maxlen % 4) + print("Signatures:") for key, value in get_bcompat_items(found["Signatures"]): - print("\t{} - {}".format(key, checkmark if value else "INVALID")) - - if len(found["Offsets"].items()): - print("Offsets:") + print(f"\t{key:{maxlen}}{checkmark if value else 'INVALID'}") + + try: + maxlen = max([len(key) for key in found["Offsets"].keys()]) + except: + maxlen = 0 + if maxlen: + # Align maxlen to 4 + if maxlen % 4 != 0: + maxlen += 4 - (maxlen % 4) + + # Trial and error and trial and error and trial and + print(f"Offsets:{'Gamedata':>{maxlen + 9}}{'Current':>12}{'Status':>12}") for key, value in get_bcompat_items(found["Offsets"]): - s = "\t{} - ".format(key) + s = f"\t{key:{maxlen}}" + foundval = value[1] + status = checkmark if isinstance(value[1], list): - s += "{} == {} - {}".format(value[0], value[1], checkmark if value[0] in value[1] else "INVALID") - else: - if int(value[0]) == int(value[1]): - s += "{} == {} - {}".format(value[0], value[1], checkmark) - else: - s += "{} == {} - {}".format(value[0], value[1], "NOT FOUND" if value[1] == -1 else "INVALID") + status = checkmark if value[0] in value[1] else 'X' + elif int(value[0]) != int(value[1]): + status = 'X' + if value[1] == -1: + foundval = "N/A" + + s += f"{value[0]:<12} {foundval:<12} {status:<12}" print(s) -# if os == "windows" and kv.get("Offsets"): -# print("Offset checking is not supported on Windows binaries") - - idaapi.ask_form("Validated\nCheck console for output") - main() \ No newline at end of file diff --git a/getfuncoffset.py b/getfuncoffset.py deleted file mode 100644 index 132b2ff..0000000 --- a/getfuncoffset.py +++ /dev/null @@ -1,13 +0,0 @@ -import idc -import idaapi - -def main(): - addr = idc.get_screen_ea() - funcstart = idc.get_func_attr(addr, idc.FUNCATTR_START) - if addr == idc.BADADDR or funcstart == idc.BADADDR: - print("Make sure you are in a function!") - return - - print("Offset from %X to %X:\n%d (0x%X)" % (funcstart, addr, addr - funcstart, addr - funcstart)) - -main() \ No newline at end of file diff --git a/isgoodsig.py b/isgoodsig.py index 7b8cbbd..0ea2974 100644 --- a/isgoodsig.py +++ b/isgoodsig.py @@ -1,34 +1,58 @@ import idc import idaapi - -FUNCS_SEGEND = idaapi.get_segm_by_name(".text").end_ea +import idautils def main(): - sig = idaapi.ask_str("", 0, "Insert signature: ") + bytesig = idaapi.ask_str("", 0, "Insert signature: ") - # wtfwtfwtfwtf - oldsig = sig - sig = sig.replace(r"\x", " ").replace("2A", "?").replace("2a", "?").strip() -# print(sig) + sig = bytesig.replace(r"\x", " ").replace("2A", "?").replace("2a", "?").strip() count = checksig(sig) if not count: - print(r"INVALID: {}".format(oldsig)) + print(r"INVALID: {}".format(bytesig)) print("Could not find any matching signatures for input") elif count == 1: - print(r"VALID: {}".format(oldsig)) + print(r"VALID: {}".format(bytesig)) else: - print(r"INVALID: {}".format(oldsig)) + print(r"INVALID: {}".format(bytesig)) print("Found {} instances of input signature".format(count)) def checksig(sig): + # Get the first segment that is executable to use its addresses for parse_binpat_str + endea = idc.BADADDR + for segea in idautils.Segments(): + s = idaapi.getseg(segea) + if s.perm & idaapi.SEGPERM_EXEC: + segstart = segea + # Set the end ea to the end of the last executable segment + # Speed isn't as important in this script, so reading any extra X + # segments is fine + if endea == idc.BADADDR or endea < segstart + s.size(): + endea = segstart + s.size() + break + count = 0 addr = 0 - addr = idaapi.find_binary(addr, FUNCS_SEGEND, sig, 0, idc.SEARCH_DOWN|idc.SEARCH_NEXT) + addr = idaapi.find_binary(addr, endea, sig, 0, idc.SEARCH_DOWN|idc.SEARCH_NEXT) while addr != idc.BADADDR: count = count + 1 - addr = idaapi.find_binary(addr, FUNCS_SEGEND, sig, 0, idc.SEARCH_DOWN|idc.SEARCH_NEXT) + addr = idaapi.find_binary(addr, endea, sig, 0, idc.SEARCH_DOWN|idc.SEARCH_NEXT) return count + # bin_search3 breaks after 15 or so bytes or something, idk man + # binpat = idaapi.compiled_binpat_vec_t() + # idaapi.parse_binpat_str(binpat, segstart, sig, 16, idaapi.get_default_encoding_idx(idaapi.get_encoding_bpu_by_name("UTF-8"))) + + # count = 0 + # addr = 0 + # addr, _ = idaapi.bin_search3(addr, endea, binpat, idaapi.BIN_SEARCH_FORWARD) + # while addr != idc.BADADDR: + # count += 1 + + # # +1 because the search finds itself + # addr, _ = idaapi.bin_search3(addr + 1, endea, binpat, idaapi.BIN_SEARCH_FORWARD) + + # return count + main() \ No newline at end of file diff --git a/makesig.py b/makesig.py index 2ae6134..15b5214 100644 --- a/makesig.py +++ b/makesig.py @@ -2,101 +2,142 @@ import idautils import idaapi -FUNCS_SEGEND = idaapi.get_segm_by_name(".text").end_ea - -def get_dt_size(dtype): - return { - idaapi.dt_byte: 1, - idaapi.dt_word: 2, - idaapi.dt_dword: 4, - idaapi.dt_float: 4, - idaapi.dt_double: 8, - }.get(dtype, -1) - def print_wildcards(count): - return "? " * count + return "?" * count + +def is_good_sig(sig, mask): + search = " ".join('?' if m == '?' else b for b, m in zip(sig.strip().split(), mask)) + + # Get the first segment that is executable to use its addresses for parse_binpat_str + endea = idc.BADADDR + for segea in idautils.Segments(): + s = idaapi.getseg(segea) + if s.perm & idaapi.SEGPERM_EXEC: + segstart = segea + # Set the end ea to the end of the last executable segment + # Speed isn't as important in this script, so reading any extra X + # segments is fine + if endea == idc.BADADDR or endea < segstart + s.size(): + endea = segstart + s.size() -def is_good_sig(sig): count = 0 addr = 0 - addr = idaapi.find_binary(addr, FUNCS_SEGEND, sig, 0, idc.SEARCH_DOWN|idc.SEARCH_NEXT) - while count < 2 and addr != idc.BADADDR: + # Ever just deprecate something and provide 0 documentation on what to use instead? + addr = idaapi.find_binary(addr, endea, search, 0, idc.SEARCH_DOWN|idc.SEARCH_NEXT) + while addr != idc.BADADDR: count = count + 1 - addr = idaapi.find_binary(addr, FUNCS_SEGEND, sig, 0, idc.SEARCH_DOWN|idc.SEARCH_NEXT) + if count > 1: + break + addr = idaapi.find_binary(addr, endea, search, 0, idc.SEARCH_DOWN|idc.SEARCH_NEXT) return count == 1 -def makesig(): - addr = idc.get_screen_ea() - addr = idc.get_func_attr(addr, idc.FUNCATTR_START) - funcstart = addr - if addr == idc.BADADDR: - print("Make sure you are in a function!") - return + # binpat = idaapi.compiled_binpat_vec_t() + # idaapi.parse_binpat_str(binpat, segstart, search, 16, idaapi.get_encoding_bpu_by_name("UTF-8")) + + # count = 0 + # addr = 0 + # addr, _ = idaapi.bin_search3(addr, endea, binpat, idaapi.BIN_SEARCH_FORWARD) + + # while addr != idc.BADADDR: + # count += 1 + # if count > 1: + # break + + # # +1 because the search finds itself + # addr, _ = idaapi.bin_search3(addr + 1, endea, binpat, idaapi.BIN_SEARCH_FORWARD) + - name = idc.get_name(addr, idaapi.GN_VISIBLE); - funcend = idc.get_func_attr(addr, idc.FUNCATTR_END); + # return count == 1 + +def makesig(ea, sz = -1): + name = idc.get_name(ea, idaapi.GN_VISIBLE) sig = "" + mask = "" found = 0 done = 0 - addr = funcstart - while addr != idc.BADADDR: + addr = ea + end = ea + sz if sz != -1 else idc.BADADDR + while addr != idc.BADADDR and (sz == -1 or addr < ea + sz): info = idaapi.insn_t() if not idaapi.decode_insn(info, addr): - return None + print(f"Failed to decode instruction at {addr:#X}?") + idaapi.beep() + return + + sig += " ".join(f"{idaapi.get_byte(addr+i):02X}" for i in range(info.size)) + " " done = 0 - if info.Op1.type == idaapi.o_near or info.Op1.type == idaapi.o_far: - if (idc.get_wide_byte(addr)) == 0x0F: # Two-byte instruction - sig = sig + ("0F %02X " % idc.get_wide_byte(addr + 1)) + print_wildcards(get_dt_size(info.Op1.dtype)) - else: - sig = sig + ("%02X " % idc.get_wide_byte(addr)) + print_wildcards(get_dt_size(info.Op1.dtype)) + if info.Op1.type in (idaapi.o_near, idaapi.o_far): + insnsz = 2 if idaapi.get_byte(addr) == 0x0F else 1 + mask += f"{'x' * insnsz}{print_wildcards(info.size - insnsz)}" + done = 1 + elif info.Op1.type == idaapi.o_reg and info.Op2.type == idaapi.o_mem and info.Op2.addr != idc.BADADDR: + mask += f"{'x' * info.Op2.offb}{print_wildcards(info.size - info.Op2.offb)}" done = 1 if not done: # Unknown, just wildcard addresses i = 0 - size = idc.get_item_size(addr) - while 1: # Screw u python + while i < info.size: loc = addr + i - if ((idc.get_fixup_target_type(loc) & 0xF) == idaapi.FIXUP_OFF32): - sig = sig + print_wildcards(4) - i = i + 3 + if ((idc.get_fixup_target_type(loc) & 0x0F) == idaapi.FIXUP_OFF32): + mask += print_wildcards(4) + i += 3 + elif (idc.get_fixup_target_type(loc) & 0x0F) == idaapi.FIXUP_OFF64: + mask += print_wildcards(8) + i += 7 else: - sig = sig + ("%02X " % idc.get_wide_byte(loc)) - - i = i + 1 + mask += 'x' - if i >= size: - break + i += 1 - if is_good_sig(sig): + if is_good_sig(sig, mask): found = 1 break - addr = idc.next_head(addr, funcend) + addr = idaapi.next_head(addr, end) if found == 0: print(sig) - print("Ran out of bytes to create unique signature."); - return None - - l = len(sig) - 1 - smsig = r"\x" - for i in range(l): - c = sig[i] - if c == " ": - smsig = smsig + r"\x" - elif c == "?": - smsig = smsig + "2A" - else: - smsig = smsig + c - - print("Signature for %s:\n%s\n%s" % (name, sig, smsig)); - return smsig + print("Ran out of bytes to create unique signature.") + idaapi.beep() + return + + sig = sig.strip() + csig = r"\x" + sig.replace(" ", r"\x") + + align = len("Wildcarded Bytes: ") + wildcarded = f"{'Wildcarded Bytes:':<{align}} {' '.join('?' if m == '?' else b for b, m in zip(sig.split(), mask))}\n" if "?" in mask else "" + smsig = r"\x" + r"\x".join("2A" if m == "?" else b for b, m in zip(sig.split(), mask)) + + print("==================================================") + print( + f"Signature for {name}:\n" + f"{'Mask:':<{align}} {mask}\n" + f"{'Bytes:':<{align}} {sig}\n" + f"{wildcarded}" + f"{'Byte String:':<{align}} {csig}\n" + f"{'SourceMod':<{align}} {smsig}" + ) + + try: + import pyperclip + pyperclip.copy(smsig) + print(f"SourceMod signature copied to clipboard") + except: + print("'pip install pyperclip' to automatically copy the SourceMod signature to your clipboard") + return csig def main(): - makesig() + addr = idaapi.get_screen_ea() + func = idaapi.get_func(addr) + if addr == idc.BADADDR or func is None: + print("Make sure you are in a function!") + idaapi.beep() + return + + makesig(func.start_ea, func.end_ea - func.start_ea) main() \ No newline at end of file diff --git a/makesigfromhere.py b/makesigfromhere.py index a4edb9c..842961d 100644 --- a/makesigfromhere.py +++ b/makesigfromhere.py @@ -2,8 +2,6 @@ import idautils import idaapi -FUNCS_SEGEND = idaapi.get_segm_by_name(".text").end_ea - def get_dt_size(dtype): return { idaapi.dt_byte: 1, @@ -14,88 +12,144 @@ def get_dt_size(dtype): }.get(dtype, -1) def print_wildcards(count): - return "? " * count + return "?" * count + +def is_good_sig(sig, mask): + search = " ".join('?' if m == '?' else b for b, m in zip(sig.strip().split(), mask)) + + # Get the first segment that is executable to use its addresses for parse_binpat_str + endea = idc.BADADDR + for segea in idautils.Segments(): + s = idaapi.getseg(segea) + if s.perm & idaapi.SEGPERM_EXEC: + segstart = segea + # Set the end ea to the end of the last executable segment + # Speed isn't as important in this script, so reading any extra X + # segments is fine + if endea == idc.BADADDR or endea < segstart + s.size(): + endea = segstart + s.size() -def is_good_sig(sig): count = 0 addr = 0 - addr = idaapi.find_binary(addr, FUNCS_SEGEND, sig, 0, idc.SEARCH_DOWN|idc.SEARCH_NEXT) - while count < 2 and addr != idc.BADADDR: + addr = idaapi.find_binary(addr, endea, search, 0, idc.SEARCH_DOWN|idc.SEARCH_NEXT) + while addr != idc.BADADDR: count = count + 1 - addr = idaapi.find_binary(addr, FUNCS_SEGEND, sig, 0, idc.SEARCH_DOWN|idc.SEARCH_NEXT) + if count > 1: + break + addr = idaapi.find_binary(addr, endea, search, 0, idc.SEARCH_DOWN|idc.SEARCH_NEXT) return count == 1 -def makesig(): - addr = idc.get_screen_ea() - funcstart = idc.get_func_attr(addr, idc.FUNCATTR_START) - if addr == idc.BADADDR or funcstart == idc.BADADDR: - print("Make sure you are in a function!") - return + # binpat = idaapi.compiled_binpat_vec_t() + # idaapi.parse_binpat_str(binpat, segstart, search, 16, idaapi.get_encoding_bpu_by_name("UTF-8")) + + # count = 0 + # addr = 0 + # addr, _ = idaapi.bin_search3(addr, endea, binpat, idaapi.BIN_SEARCH_FORWARD) + + # while addr != idc.BADADDR: + # count += 1 + # if count > 1: + # break + + # # +1 because the search finds itself + # addr, _ = idaapi.bin_search3(addr + 1, endea, binpat, idaapi.BIN_SEARCH_FORWARD) + + + # return count == 1 - funcend = idc.get_func_attr(addr, idc.FUNCATTR_END); - name = idc.get_name(funcstart, idaapi.GN_VISIBLE); +def makesig(ea, sz=-1): + func = idaapi.get_func(ea) + name = idc.get_name(func.start_ea, idaapi.GN_VISIBLE) sig = "" + mask = "" found = 0 done = 0 - startaddr = addr - while addr != idc.BADADDR: + addr = ea + end = ea + sz if sz != -1 else idc.BADADDR + while addr != idc.BADADDR and (sz == -1 or addr < ea + sz): info = idaapi.insn_t() if not idaapi.decode_insn(info, addr): - return None + print(f"Failed to decode instruction at {addr:#X}?") + idaapi.beep() + return + + sig += " ".join(f"{idaapi.get_byte(addr+i):02X}" for i in range(info.size)) + " " done = 0 - if info.Op1.type == idaapi.o_near or info.Op1.type == idaapi.o_far: - if (idc.get_wide_byte(addr)) == 0x0F: # Two-byte instruction - sig = sig + ("0F %02X " % idc.get_wide_byte(addr + 1)) + print_wildcards(get_dt_size(info.Op1.dtype)) - else: - sig = sig + ("%02X " % idc.get_wide_byte(addr)) + print_wildcards(get_dt_size(info.Op1.dtype)) + if info.Op1.type in (idaapi.o_near, idaapi.o_far): + insnsz = 2 if idaapi.get_byte(addr) == 0x0F else 1 + mask += f"{'x' * insnsz}{print_wildcards(info.size - insnsz)}" + done = 1 + elif info.Op1.type == idaapi.o_reg and info.Op2.type == idaapi.o_mem and info.Op2.addr != idc.BADADDR: + mask += f"{'x' * info.Op2.offb}{print_wildcards(info.size - info.Op2.offb)}" done = 1 if not done: # Unknown, just wildcard addresses i = 0 - size = idc.get_item_size(addr) - while 1: # Screw u python + while i < info.size: loc = addr + i - if ((idc.get_fixup_target_type(loc) & 0xF) == idaapi.FIXUP_OFF32): - sig = sig + print_wildcards(4) - i = i + 3 + if ((idc.get_fixup_target_type(loc) & 0x0F) == idaapi.FIXUP_OFF32): + mask += print_wildcards(4) + i += 3 + elif (idc.get_fixup_target_type(loc) & 0x0F) == idaapi.FIXUP_OFF64: + mask += print_wildcards(8) + i += 7 else: - sig = sig + ("%02X " % idc.get_wide_byte(loc)) + mask += 'x' - i = i + 1 + i += 1 - if i >= size: - break - - if is_good_sig(sig): + if is_good_sig(sig, mask): found = 1 break - addr = idc.next_head(addr, funcend) + addr = idaapi.next_head(addr, end) if found == 0: print(sig) - print("Ran out of bytes to create unique signature."); - return None - - l = len(sig) - 1 - smsig = r"\x" - for i in range(l): - c = sig[i] - if c == " ": - smsig = smsig + r"\x" - elif c == "?": - smsig = smsig + "2A" - else: - smsig = smsig + c - - print("Signature for %s at %X (offset %d):\n%s\n%s" % (name, startaddr, startaddr - funcstart, sig, smsig)); - return smsig + print("Ran out of bytes to create unique signature.") + idaapi.beep() + return + + sig = sig.strip() + csig = r"\x" + sig.replace(" ", r"\x") + + align = len("Wildcarded Bytes: ") + wildcarded = f"{'Wildcarded Bytes:':<{align}} {' '.join('?' if m == '?' else b for b, m in zip(sig.split(), mask))}\n" if "?" in mask else "" + smsig = r"\x" + r"\x".join("2A" if m == "?" else b for b, + m in zip(sig.split(), mask)) + + print("==================================================") + print( + f"Signature for {name} + {ea - func.start_ea} ({ea - func.start_ea:#x}):\n" + f"{'Mask:':<{align}} {mask}\n" + f"{'Bytes:':<{align}} {sig}\n" + f"{wildcarded}" + f"{'Byte String:':<{align}} {csig}\n" + f"{'SourceMod':<{align}} {smsig}" + ) + + try: + import pyperclip + pyperclip.copy(smsig) + print(f"SourceMod signature copied to clipboard") + except: + print("'pip install pyperclip' to automatically copy the SourceMod signature to your clipboard") + return csig def main(): - makesig() + ea = idaapi.get_screen_ea() + func = idaapi.get_func(ea) + if ea == idc.BADADDR or func is None: + print("Make sure you are in a function!") + idaapi.beep() + return + + sz = func.end_ea - ea + + makesig(ea, sz) main() \ No newline at end of file diff --git a/nameresetter.py b/nameresetter.py index badc1a6..6b7b647 100644 --- a/nameresetter.py +++ b/nameresetter.py @@ -3,19 +3,21 @@ import idaapi def main(): - segstart = 0 - segend = None + count = 0 + for segstart in idautils.Segments(): + segend = idaapi.getseg(segstart).end_ea + for fea in idautils.Functions(segstart, segend): + flags = idaapi.get_full_flags(fea) + if not (flags & idc.FF_NAME): + continue - segm = idaapi.get_segm_by_name(".text") - if segm: - segstart = segm.start_ea - segend = segm.end_ea + fflags = idc.get_func_attr(fea, idc.FUNCATTR_FLAGS) + if fflags & idaapi.FUNC_LIB: + continue - for fea in idautils.Functions(segstart, segend): - flags = idc.get_func_attr(fea, idc.FUNCATTR_FLAGS) - if flags & idaapi.FUNC_LIB: - continue - - idc.set_name(fea, "") + if idc.set_name(fea, ""): + count += 1 + + print(f"Successfully renamed {count} functions") main() \ No newline at end of file diff --git a/netprop_importer.py b/netprop_importer.py index aafb1e1..284a187 100644 --- a/netprop_importer.py +++ b/netprop_importer.py @@ -1,27 +1,350 @@ import idautils import idaapi import idc +import ctypes +import time + from math import ceil -from time import time import xml.etree.ElementTree as et -IMPORT_VTABLE = 0 +from dataclasses import dataclass +from enum import Enum + +if idc.__EA64__: + ea_t = ctypes.c_uint64 + FF_PTR = idc.FF_QWORD +else: + ea_t = ctypes.c_uint32 + FF_PTR = idc.FF_DWORD + +class DataCache(object): + tablecache = {} + +class SendPropType(Enum): + DPT_Int = 0 + DPT_Float = 1 + DPT_Vector = 2 + DPT_VectorXY = 3 + DPT_String = 4 + DPT_Array = 5 + DPT_DataTable = 6 + DPT_Int64 = 7 + +class SendFlags(Enum): + UNSIGNED = 1 << 0 + COORD = 1 << 1 + NOSCALE = 1 << 2 + ROUNDDOWN = 1 << 3 + ROUNDUP = 1 << 4 + NORMAL = 1 << 5 + EXCLUDE = 1 << 6 + XYZE = 1 << 7 + INSIDEARRAY = 1 << 8 + PROXY_ALWAYS_YES = 1 << 9 + CHANGES_OFTEN = 1 << 10 + IS_A_VECTOR_ELEM = 1 << 11 + COLLAPSIBLE = 1 << 12 + COORD_MP = 1 << 13 + COORD_MP_LOWPRECISION = 1 << 14 + COORD_MP_INTEGRAL = 1 << 15 + VARINT = NORMAL + ENCODED_AGAINST_TICKCOUNT = 1 << 16 + +@dataclass(frozen=True) +class SendProp: + name: str + type: int #SendPropType + offset: int + bits: int + flags: int + table: 'SendTable' = None + + def __repr__(self): + # Use id() with table or else infinite recursion + return f"SendProp(name={self.name}, type={self.type}, offset={self.offset}, bits={self.bits}, flags={self.flags}, table={id(self.table):#x})" + + def add_to_struc(self, struc, offset): + # So, unfortunately, it doesn't seem to be possible to implement baseclasses + # while also keeping vtables intact. This might actually be possible as it can be done + # with IDA's header parser, but this might not be exposed to the API. + # Implementing baseclasses with seamless vtable integration is a TODO + # The framework is more or less here, so if I manage to figure that out it won't + # be that difficult to implement + # Might have to do with optinfo_t pointing to the proper vtable? Dunno + # if self.table is not None: + # baseclass = DataCache.struccache.get(self.table.classname, None) + # if baseclass is None: + # self.table.create_struc() + + # baseclass = DataCache.struccache[self.table.classname] + + if self.table is not None: + # Array + # We *could* parse these and implement them as embedded classes/arrays + # but there's no guarantee that we would get a proper size, which could + # cause some really poor results + # There's a good chance that more array data is actually in the inner table's m_pExtraData + # Mayhaps a SourceMod PR for another time + if not self.table.name.startswith("_ST_"): + # Bad hack but catches arrays + if self.table.name == self.name: + if self.offset != 0: + self.table.add_array_to_struc(struc, offset + self.offset) + return + else: + self.table.add_to_struc(struc, offset + self.offset) + + # Offset is 0 so we die + if self.offset == 0: + return + + curroffset = self.offset + offset + + currmem = idaapi.get_member(struc, curroffset) + if currmem is not None: +# print(f"Member {self.name} already exists in {idaapi.get_struc_name(struc.id)}") + return + + idaflags, sz = self.calc_sz() + tinfo = self.get_tinfo() + targetname = idaapi.validate_name(self.name, idaapi.VNT_IDENT) + + serr = idaapi.add_struc_member(struc, targetname, curroffset, idaflags, None, sz) + if serr != idaapi.STRUC_ERROR_MEMBER_OK: + # I really don't wanna deal with these silly subclasses + if serr < idaapi.STRUC_ERROR_MEMBER_OFFSET: + print(f"Could not add struct member {idaapi.get_struc_name(struc.id)}.{targetname} at {curroffset} ({curroffset:#x})! Error {serr}") + return + + currmem = idaapi.get_member(struc, curroffset) + if tinfo is not None: + idaapi.set_member_tinfo(struc, currmem, 0, tinfo, 0) + elif self.flags and self.flags & SendFlags.UNSIGNED.value: + currinfo = idaapi.tinfo_t() + if idaapi.get_member_tinfo(currinfo, currmem): + currinfo.change_sign(idaapi.type_unsigned) + idaapi.set_member_tinfo(struc, currmem, 0, currinfo, 0) + + def calc_sz(self): + if self.type == SendPropType.DPT_Float.value: + return idc.FF_FLOAT, 4 + elif self.type == SendPropType.DPT_Int64.value: + return idc.FF_QWORD, 8 + elif self.type == SendPropType.DPT_String.value: + return FF_PTR, ctypes.sizeof(ea_t) + elif self.type == SendPropType.DPT_Vector.value: + # Returning FF_STRUCT doesn't work because the proper opinfo_t needs to be set + # but this can be cheesed by just setting it to FF_DWORD and setting the tinfo after + return idc.FF_DWORD, 12 #idc.FF_STRUCT + + absmax = ceil(self.bits/8.0) + if absmax == 1: + flags = idc.FF_BYTE + numbytes = 1 + elif absmax == 2: + flags = idc.FF_WORD + numbytes = 2 + else: + flags = idc.FF_DWORD + numbytes = 4 + + return flags, numbytes + + def get_tinfo(self): + return { + SendPropType.DPT_Vector.value: VECTOR, +# SendPropType.DPT_Int.value: idaapi.tinfo_t(idaapi.BT_INT), + SendPropType.DPT_Float.value: idaapi.tinfo_t(idaapi.BT_FLOAT), +# SendPropType.DPT_String.value: idaapi.tinfo_t(idaapi.BT_PTR), + SendPropType.DPT_Int64.value: idaapi.tinfo_t(idaapi.BT_INT64), + }.get(self.type, None) + +@dataclass +class SendTable: + name: str + props: list[SendProp] + # For mapping to a "C"-class + # I'm gonna assume that there'll be some game that won't suffice with a "replace DT_ with C" method, + # so we have SendTable objects point to their actual class name + classname: str + + @staticmethod + def create(elem:et.Element, classname=None): + name = elem.attrib["name"] + + # Check if we've already cached this table, update classname if so + # because if this is true, then its classname is surely missing + if name in DataCache.tablecache: + if classname is not None: + DataCache.tablecache[name].classname = classname + return DataCache.tablecache[name] + + props = [] + for p in elem: + pname = p.attrib["name"] + + # Collect and format the fields + stype = p.find("type").text if p.find("type") != None else None + ptype = str_to_dt_type(stype) + sflags = p.find("flags").text if p.find("flags") != None else None + flags = str_to_sendflags(sflags) + offset = int(p.find("offset").text) if p.find("offset") != None else None + bits = int(p.find("bits").text) if p.find("bits") != None else None + ptable = SendTable.create(p.find("sendtable")) if p.find("sendtable") != None else None + + # Append a new prop + props.append(SendProp(pname, ptype, offset, bits, flags, ptable)) + + # Cache and return + DataCache.tablecache[name] = SendTable(name, props, classname) + return DataCache.tablecache[name] + + def create_struc(self): + struc = add_struc_ex(self.classname) + + self.add_to_struc(struc, 0) + + #DataCache.struccache[self.classname] = struc + + def add_to_struc(self, struc, offset): + for prop in self.props: + prop.add_to_struc(struc, offset) + + def add_array_to_struc(self, struc, offset): + if offset == 0: + return + + idaflags, sz = self.props[0].calc_sz() + if len(self.props) > 1: + sz = (self.props[1].offset - self.props[0].offset) + idaflags = sz_to_idaflags(sz) + + sz *= len(self.props) + + tinfo = self.props[0].get_tinfo() + targetname = idaapi.validate_name(self.name, idaapi.VNT_IDENT) + + serr = idaapi.add_struc_member(struc, targetname, offset, idaflags, None, sz) + if serr != idaapi.STRUC_ERROR_MEMBER_OK: + # I really don't wanna deal with these silly subclasses + if serr < idaapi.STRUC_ERROR_MEMBER_OFFSET: + print(f"Could not add struct member {idaapi.get_struc_name(struc.id)}.{targetname} at {offset} ({offset:#x})! Error {serr}") + return + + currmem = idaapi.get_member(struc, offset) + if tinfo is not None: + idaapi.set_member_tinfo(struc, currmem, 0, tinfo, 0) + elif self.props[0].flags and self.props[0].flags & SendFlags.UNSIGNED.value: + currinfo = idaapi.tinfo_t() + if idaapi.get_member_tinfo(currinfo, currmem): + currinfo.change_sign(idaapi.type_unsigned) + idaapi.set_member_tinfo(struc, currmem, 0, currinfo, 0) + +@dataclass(frozen=True) +class ServerClass: + name: str + sendtable: SendTable + + @staticmethod + def create(elem: et.Element, classname): + sendtable = elem.find("sendtable") + return ServerClass(classname, SendTable.create(sendtable, classname)) + + def create_struc(self): + self.sendtable.create_struc() + + +# Idiot proof IDA wait box +class WaitBox: + buffertime = 0.0 + shown = False + msg = "" + + @staticmethod + def _show(msg): + WaitBox.msg = msg + if WaitBox.shown: + idaapi.replace_wait_box(msg) + else: + idaapi.show_wait_box(msg) + WaitBox.shown = True + + @staticmethod + def show(msg, buffertime=0.1): + if msg == WaitBox.msg: + return + + if buffertime > 0.0: + if time.time() - WaitBox.buffertime < buffertime: + return + WaitBox.buffertime = time.time() + WaitBox._show(msg) + + @staticmethod + def hide(): + if WaitBox.shown: + idaapi.hide_wait_box() + WaitBox.shown = False + VECTOR = None +def str_to_dt_type(t): + return { + "int": SendPropType.DPT_Int.value, + "float": SendPropType.DPT_Float.value, + "vector": SendPropType.DPT_Vector.value, + "string": SendPropType.DPT_String.value, + "array": SendPropType.DPT_Array.value, + "datatable": SendPropType.DPT_DataTable.value, + "int64": SendPropType.DPT_Int64.value + }.get(t, None) + +def str_to_sendflags(s): + if not s: + return s + + splode = s.split("|") + d = { + "Unsigned": SendFlags.UNSIGNED.value, + "Coord": SendFlags.COORD.value, + "NoScale": SendFlags.NOSCALE.value, + "RoundDown": SendFlags.ROUNDDOWN.value, + "RoundUp": SendFlags.ROUNDUP.value, + "VarInt": SendFlags.NORMAL.value, + "Normal": SendFlags.NORMAL.value, + "Exclude": SendFlags.EXCLUDE.value, + "XYZE": SendFlags.XYZE.value, + "InsideArray": SendFlags.INSIDEARRAY.value, + "AlwaysProxy": SendFlags.PROXY_ALWAYS_YES.value, + "ChangesOften": SendFlags.CHANGES_OFTEN.value, + "VectorElem": SendFlags.IS_A_VECTOR_ELEM.value, + "Collapsible": SendFlags.COLLAPSIBLE.value, + "CoordMP": SendFlags.COORD_MP.value, + "CoordMPLowPrec": SendFlags.COORD_MP_LOWPRECISION.value, + "CoordMpIntegral": SendFlags.COORD_MP_INTEGRAL.value, + } + flags = 0 + for fl in splode: + flags |= d.get(fl, 0) + + return flags + +def sz_to_idaflags(sz): + return { + 1: idc.FF_BYTE, + 2: idc.FF_WORD, + 4: idc.FF_DWORD, + 8: idc.FF_QWORD + }.get(sz, 1) + + def add_struc_ex(name): strucid = idaapi.get_struc_id(name) if strucid == idc.BADADDR: strucid = idaapi.add_struc(idc.BADADDR, name) - return strucid - -def add_struc_ex2(name): - strucid = idaapi.get_struc_id(name) - if strucid != idc.BADADDR: - idaapi.del_struc(idaapi.get_struc(strucid)) - - return idaapi.add_struc(idc.BADADDR, name) + return idaapi.get_struc(strucid) def calcszdata(sz): absmax = ceil(sz/8.0) @@ -37,210 +360,12 @@ def calcszdata(sz): return flags, numbytes -# Doesn't exactly work with recursive sendtables -# This nutty recursion really fucking hurts my head -def get_sendtable_size(sendtable): - size = 0 - highestoffset = 0 - highestflag = idc.FF_BYTE - for c in sendtable: - add = 0 - t = c.find("type") - if t == None: - continue - - offset = c.find("offset") - offset = int(offset.text) if offset != None else None - - highestoffset = max(offset, highestoffset) - - if t.text == "datatable": - sendtable2 = c.find("sendtable") - if sendtable2 != None: - mycls = sendtable2.attrib.get("name", None) - if mycls != None: - if not mycls.startswith("DT_"): # An array with a baseclass datatable? Oh well - flag, add = get_sendtable_size(sendtable2) - highestflag = max(flag, highestflag) - else: - sz = c.find("bits") - sz = int(sz.text) if sz != None else None - if sz == None: - return - - flags, numbytes = calcszdata(sz) - if t.text == "float": - flags = idc.FF_FLOAT - numbytes = 4 - highestflag = max(flags, highestflag) - add = numbytes - - size = add + highestoffset - - # Round up to the nearest 4 byte multiple -# size = int(ceil(size / 4.0) * 4) - # Actually don't, some bools can get squeezed in there (e.g. CParticleSystem.m_bWeatherEffect) - return highestflag, size - -def parse(c, struc): - if c.tag == "sendtable": - name = c.attrib.get("name", None) - if name and name.startswith("DT_"): - for i in c: - parse(i, struc) - elif c.tag == "property": - classname = c.attrib.get("name", None) - if classname != None: - if classname == "baseclass": - for p in c: - parse(p, struc) - else: - t = c.find("type") - if t == None: - return - - offset = c.find("offset") - offset = int(offset.text) if offset != None else None - if offset == None or offset == 0: - return - - # Have to be a little special with datatables - if t.text == "datatable": - idaapi.add_struc_member(struc, classname, offset, idc.FF_DWORD, None, 4) - sendtable = c.find("sendtable") - if sendtable != None: - mycls = sendtable.attrib.get("name", None) - if mycls != None: - if mycls.startswith("DT_"): - mycls = mycls.replace("DT_", "C", 1) - strucid = idaapi.get_struc_id(mycls) - if strucid == idc.BADADDR: # If this struct didn't exist, parse it - strucid = idaapi.add_struc(idc.BADADDR, mycls) - parse(sendtable, idaapi.get_struc(strucid)) - ti = idaapi.tinfo_t() # Assign the sendtable type to the struct - idaapi.parse_decl(ti, None, mycls + ";", 0) - if str(ti) != "CAttributeList": # HACK; this one doesn't work and idk what else to try - idaapi.set_member_tinfo(struc, idaapi.get_member(struc, offset), 0, ti, 0) - else: # Iterate the array and update the struct member size, hackily - flag, sizemult = get_sendtable_size(sendtable) - if sizemult > 4: - idaapi.set_member_type(struc, offset, flag, None, sizemult) - return - - sz = c.find("bits") - sz = int(sz.text) if sz != None else None - if sz == None: - return - - flags, numbytes = calcszdata(sz) - - if t.text == "float": - flags = idc.FF_FLOAT - numbytes = 4 - - if t.text == "vector": - idaapi.add_struc_member(struc, classname, offset, idc.FF_DWORD, None, 12) - global VECTOR - idaapi.set_member_tinfo(struc, idaapi.get_member(struc, offset), 0, VECTOR, 0) - else: - returnval = idaapi.add_struc_member(struc, classname, offset, flags, None, numbytes) - if returnval: - print("Could not add struct member {}.{}! Error {}".format(idaapi.get_struc_name(struc.id), classname, returnval)) - -def get_vtable(name): - # So, to assure that we're in a vtable, we need to find the thisoffset - # So we remangle this fucker - mangledname = "_ZTV{}{}".format(len(name), name) - # Then get the address where this mangled thisoffs is stored - return idc.get_name_ea_simple(mangledname) - -def import_vtable(classname, struc): - ea = get_vtable(classname) - if ea == idc.BADADDR: - return - - # Mildly adapted from Asherkin's vtable dumper - ea = ea + 8 # Skip typeinfo and thisoffs - - funcs = [] - while ea != idc.BADADDR: - offs = idc.get_wide_dword(ea) - if not idaapi.is_code(idaapi.get_full_flags(offs)): - break - name = idc.get_name(offs, idaapi.GN_VISIBLE) - funcs.append(name) - - ea = idaapi.next_not_tail(ea) - -# print(funcs) - - if not len(funcs): - return - - strucid = add_struc_ex(classname + "_vtbl") - vstruc = idaapi.get_struc(strucid) - for i in funcs: - # Gotta do a fancy demangle, it can't have special chars - # and there can't be multiples of the same name, so let's just jazz around all of that - demangled = idc.demangle_name(i, idc.get_inf_attr(idc.INF_SHORT_DN)) - if demangled == None: - demangled = i - else: - demangled = demangled[demangled.find("::")+2:demangled.find("(")] - demangled = demangled.replace("~", "_").replace("<", "_").replace(">", "_") - while 1: - error = idaapi.add_struc_member(vstruc, demangled, idc.BADADDR, idc.FF_DWORD, None, 4) - - if error == 0: - break - - demangled += "_{}".format(hex(idaapi.get_struc_last_offset(vstruc) + 4)[2:]) - - # Now assign the vtable to the actual struct - ti = idaapi.tinfo_t() - idaapi.parse_decl(ti, None, classname + "_vtbl;", 0) - ti.create_ptr(ti) - idaapi.set_member_tinfo(struc, idaapi.get_member(struc, 0), 0, ti, 0) - -UPDATE_TIME = time() -def update_window(s): - global UPDATE_TIME - currtime = time() - if currtime - UPDATE_TIME > 0.2: - idaapi.replace_wait_box(s) - UPDATE_TIME = currtime - -def parse_class(c): - if c is None: - return - - if c.tag != "serverclass": - return - - classname = c.attrib["name"] - - update_window("Importing {}".format(classname)) - strucid = add_struc_ex(classname) - struc = idaapi.get_struc(strucid) - - # Add the vtable here, anywhere else and it might be slotted into a class w/o vfuncs - m = idaapi.get_member(struc, 0) - if m == None: - idaapi.add_struc_member(struc, "__vftable", 0, idc.FF_DWORD, None, 4) - - global IMPORT_VTABLE - if IMPORT_VTABLE: - import_vtable(classname, struc) - - if len(c): - parse(c[0], struc) - # Fix SM's bad xml structure def fix_xml(data): for i in range(len(data)): data[i] = data[i].replace('""', '"') - data[3] = "\n" + data[3] = "\n" data.append("\n") return data @@ -249,53 +374,74 @@ def make_basic_structs(): strucid = idaapi.get_struc_id("Vector") if strucid == idc.BADADDR: struc = idaapi.get_struc(idaapi.add_struc(idc.BADADDR, "Vector")) - idaapi.add_struc_member(struc, "x", idc.BADADDR, idc.FF_FLOAT, None, 4) - idaapi.add_struc_member(struc, "y", idc.BADADDR, idc.FF_FLOAT, None, 4) - idaapi.add_struc_member(struc, "z", idc.BADADDR, idc.FF_FLOAT, None, 4) + idaapi.add_struc_member(struc, "x", idc.BADADDR, idc.FF_FLOAT|idc.FF_DATA, None, 4) + idaapi.add_struc_member(struc, "y", idc.BADADDR, idc.FF_FLOAT|idc.FF_DATA, None, 4) + idaapi.add_struc_member(struc, "z", idc.BADADDR, idc.FF_FLOAT|idc.FF_DATA, None, 4) + strucid = idaapi.get_struc_id("Vector") global VECTOR VECTOR = idaapi.tinfo_t() - idaapi.parse_decl(VECTOR, None, "Vector;", 0) + if idaapi.guess_tinfo(VECTOR, strucid) == idaapi.GUESS_FUNC_FAILED: + VECTOR = None strucid = idaapi.get_struc_id("QAngle") if strucid == idc.BADADDR: struc = idaapi.get_struc(idaapi.add_struc(idc.BADADDR, "QAngle")) - idaapi.add_struc_member(struc, "x", idc.BADADDR, idc.FF_FLOAT, None, 4) - idaapi.add_struc_member(struc, "y", idc.BADADDR, idc.FF_FLOAT, None, 4) - idaapi.add_struc_member(struc, "z", idc.BADADDR, idc.FF_FLOAT, None, 4) + idaapi.add_struc_member(struc, "x", idc.BADADDR, idc.FF_FLOAT|idc.FF_DATA, None, 4) + idaapi.add_struc_member(struc, "y", idc.BADADDR, idc.FF_FLOAT|idc.FF_DATA, None, 4) + idaapi.add_struc_member(struc, "z", idc.BADADDR, idc.FF_FLOAT|idc.FF_DATA, None, 4) def main(): - idaapi.set_ida_state(idaapi.st_Work) data = None - with open(idaapi.ask_file(0, "*.xml", "Select a file to import")) as f: - data = f.readlines() - - if data is None: - idaapi.set_ida_state(idaapi.st_Ready) - return - - idaapi.show_wait_box("Importing file") - make_basic_structs() - try: - # SM 1.10 <= has bad XML, assume its correct first then try to fix it - tree = et.fromstringlist(data) + fopen = idaapi.ask_file(0, "*.xml", "Select a file to import") + if fopen is None: + return + + idaapi.set_ida_state(idaapi.st_Work) + WaitBox.show("Parsing XML") + with open(fopen) as f: + data = f.readlines() + + if data is None: + idaapi.set_ida_state(idaapi.st_Ready) + return + + make_basic_structs() + + try: + # SM 1.10 <= has bad XML, assume its correct first then try to fix it + tree = et.fromstringlist(data) + except: + fix_xml(data) + tree = et.fromstringlist(data) + + if tree is None: + idaapi.warning("Something bad happened :(") + idaapi.set_ida_state(idaapi.st_Ready) + return + + WaitBox.show("Creating ServerClasses") + classes = {} + for cls in tree: + classname = cls.attrib["name"] + classes[classname] = ServerClass.create(cls, classname) + + idaapi.begin_type_updating(idaapi.UTP_STRUCT) + + WaitBox.show("Adding struct members") + for classname, serverclass in classes.items(): + serverclass.create_struc() + + print("Done!") except: - fix_xml(data) - tree = et.fromstringlist(data) - - if tree is None: - idaapi.hide_wait_box() - idaapi.warning("Something bad happened :(") - idaapi.set_ida_state(idaapi.st_Ready) - return - - global IMPORT_VTABLE - IMPORT_VTABLE = idaapi.ask_yn(1, "Import virtual tables for classes? (Longer)") + import traceback + traceback.print_exc() + print("Please file a bug report with supporting information at https://github.com/Scags/IDA-Scripts/issues") + idaapi.beep() - for i in tree: - parse_class(i) - idaapi.hide_wait_box() + WaitBox.hide() + idaapi.end_type_updating(idaapi.UTP_STRUCT) idaapi.set_ida_state(idaapi.st_Ready) main() \ No newline at end of file diff --git a/sigfind.py b/sigfind.py new file mode 100644 index 0000000..1d2ba7f --- /dev/null +++ b/sigfind.py @@ -0,0 +1,51 @@ +import idc +import idaapi +import idautils + +def getsigloc(sig): + # Get the first segment that is executable to use its addresses for parse_binpat_str + endea = idc.BADADDR + for segea in idautils.Segments(): + s = idaapi.getseg(segea) + if s.perm & idaapi.SEGPERM_EXEC: + segstart = segea + # Set the end ea to the end of the last executable segment + # Speed isn't as important in this script, so reading any extra X + # segments is fine + if endea == idc.BADADDR or endea < segstart + s.size(): + endea = segstart + s.size() + break + + count = 0 + first = idaapi.find_binary(0, endea, sig, 0, idc.SEARCH_DOWN|idc.SEARCH_NEXT) + addr = first + while addr != idc.BADADDR: + count = count + 1 + addr = idaapi.find_binary(addr, endea, sig, 0, idc.SEARCH_DOWN|idc.SEARCH_NEXT) + + return first, count + + # binpat = idaapi.compiled_binpat_vec_t() + # # This returns false but it works? + # idaapi.parse_binpat_str(binpat, segstart, sig, 16, idaapi.get_default_encoding_idx(idaapi.get_encoding_bpu_by_name("UTF-8"))) + # addr, _ = idaapi.bin_search3(0, endea, binpat, idaapi.BIN_SEARCH_FORWARD) + # return addr + + +def main(): + bytesig = idaapi.ask_str("", 0, "Insert signature: ") + if bytesig is None: + return + + sig = bytesig.replace(r"\x", " ").replace("2A", "?").replace("2a", "?").strip() + + loc, count = getsigloc(sig) + if loc != idc.BADADDR: + idaapi.jumpto(loc) + if count > 1: + print(f"Found {count} instances of signature. Jumping to first at {loc:#X}") + else: + # Beep, nothing found + idaapi.beep() + +main() \ No newline at end of file diff --git a/sigsmasher.py b/sigsmasher.py index cfe0bf7..ab19552 100644 --- a/sigsmasher.py +++ b/sigsmasher.py @@ -2,209 +2,255 @@ import idc import idaapi import yaml +import time from math import floor -from time import time, strftime, gmtime MAX_SIG_LENGTH = 512 -FUNCS_SEGSTART = 0 -FUNCS_SEGEND = None - # Change to 1 to have a very optimized makesig # Will produce useable signatures but theyll be a bit more volatile # since they rely on the position of the function in the binary # Uses the end of the function to search compared to the end of the .text segment ABSOLUTE_OPTIMIZATION = 0 -def get_dt_size(dtype): - return { - idaapi.dt_byte: 1, - idaapi.dt_word: 2, - idaapi.dt_dword: 4, - idaapi.dt_float: 4, - idaapi.dt_double: 8, - }.get(dtype, -1) - -def print_wildcards(count): - return "? " * count +# Write-only trie for signatures +# This is slightly faster than constantly running search_binary as +# common signature prologues will be caught early and more quickly +class Trie(object): + def __init__(self): + self.root = {} + + def add(self, data): + node = self.root + for d in data: + if d not in node: + node[d] = {} + node = node[d] + + def find(self, data): + node = self.root + for d in data: + if d not in node: + return False + node = node[d] + return True + + def __contains__(self, data): + return self.find(data) + +TRIE = Trie() + +# Idiot proof IDA wait box + + +class WaitBox: + buffertime = 0.0 + shown = False + msg = "" + + @staticmethod + def _show(msg): + WaitBox.msg = msg + if WaitBox.shown: + idaapi.replace_wait_box(msg) + else: + idaapi.show_wait_box(msg) + WaitBox.shown = True + + @staticmethod + def show(msg, buffertime=0.1): + if msg == WaitBox.msg: + return + + if buffertime > 0.0: + if time.time() - WaitBox.buffertime < buffertime: + return + WaitBox.buffertime = time.time() + WaitBox._show(msg) + + @staticmethod + def hide(): + if WaitBox.shown: + idaapi.hide_wait_box() + WaitBox.shown = False + +FUNCS_SEGEND = idc.BADADDR +def calc_sigstop(): + endea = idc.BADADDR + for segea in idautils.Segments(): + s = idaapi.getseg(segea) + if s.perm & idaapi.SEGPERM_EXEC: + segstart = segea + # Set the end ea to the end of the last executable segment + # Speed isn't as important in this script, so reading any extra X + # segments is fine + if endea == idc.BADADDR or endea < segstart + s.size(): + endea = segstart + s.size() + + return endea def is_good_sig(sig, funcend): + if sig in TRIE: + return False + + bytesig = " ".join(sig) + endea = funcend if ABSOLUTE_OPTIMIZATION else FUNCS_SEGEND count = 0 - addr = FUNCS_SEGSTART # Linux has a .LOAD section in front - # The odds of this having matching bytes are about 0 - # so let's just skip it, would save a lot of time - addr = idaapi.find_binary(addr, endea, sig, 0, idc.SEARCH_DOWN|idc.SEARCH_NEXT) + addr = 0 + addr = idaapi.find_binary(addr, endea, bytesig, 0, idc.SEARCH_DOWN|idc.SEARCH_NEXT) while count < 2 and addr != idc.BADADDR: count = count + 1 - addr = idaapi.find_binary(addr, endea, sig, 0, idc.SEARCH_DOWN|idc.SEARCH_NEXT) + addr = idaapi.find_binary(addr, endea, bytesig, 0, idc.SEARCH_DOWN|idc.SEARCH_NEXT) + + # Good sig, add it to the trie + if count == 1: + TRIE.add(sig) + return True - return count == 1 + return False -def makesig(func): - sig = "" +def makesigfast(func): + addr = func.start_ea found = 0 - funcstart = func.start_ea - funcend = func.end_ea - done = 0 - global MAX_SIG_LENGTH - addr = funcstart + sig = [] while addr != idc.BADADDR: info = idaapi.insn_t() if not idaapi.decode_insn(info, addr): return None done = 0 - if info.Op1.type == idaapi.o_near or info.Op1.type == idaapi.o_far: - if (idc.get_wide_byte(addr)) == 0x0F: # Two-byte instruction - sig = sig + ("0F %02X " % idc.get_wide_byte(addr + 1)) + print_wildcards(get_dt_size(info.Op1.dtype)) - else: - sig = sig + ("%02X " % idc.get_wide_byte(addr)) + print_wildcards(get_dt_size(info.Op1.dtype)) + if info.Op1.type in (idaapi.o_near, idaapi.o_far): + insnsz = 2 if idaapi.get_byte(addr) == 0x0F else 1 + sig += [f"{idaapi.get_byte(addr+i):02X}" for i in range(insnsz)] + ["?"] * (info.size - insnsz) + done = 1 + elif info.Op1.type == idaapi.o_reg and info.Op2.type == idaapi.o_mem and info.Op2.addr != idc.BADADDR: + sig += [f"{idaapi.get_byte(addr+i):02X}" for i in range(info.Op2.offb)] + ["?"] * (info.size - info.Op2.offb) done = 1 if not done: # Unknown, just wildcard addresses i = 0 - size = idc.get_item_size(addr) - while 1: # Screw u python + while i < info.size: loc = addr + i - if ((idc.get_fixup_target_type(loc) & 0xF) == idaapi.FIXUP_OFF32): - sig = sig + print_wildcards(4) - i = i + 3 + if ((idc.get_fixup_target_type(loc) & 0x0F) == idaapi.FIXUP_OFF32): + sig += ["?"] * 4 + i += 3 + elif (idc.get_fixup_target_type(loc) & 0x0F) == idaapi.FIXUP_OFF64: + sig += ["?"] * 8 + i += 7 else: - sig = sig + ("%02X " % idc.get_wide_byte(loc)) + sig += [f"{idaapi.get_byte(addr+i):02X}"] - i = i + 1 - - if i >= size: - break + i += 1 # Escape the evil functions that break everything if len(sig) > MAX_SIG_LENGTH: return "Signature is too long!" # Save milliseconds and only check for good sigs after a fewish bytes # Trust me, it matters - elif sig.count(" ") >= 5 and is_good_sig(sig, funcend): + elif len(sig) >= 5 and is_good_sig(sig, func.end_ea): found = 1 break - addr = idc.next_head(addr, funcend) + addr = idc.next_head(addr, func.end_ea) if found == 0: return "Ran out of bytes!" - l = len(sig) - 1 - smsig = r"\x" - for i in range(l): - c = sig[i] - if c == " ": - smsig = smsig + r"\x" - elif c == "?": - smsig = smsig + "2A" - else: - smsig = smsig + c - + smsig = r"\x" + r"\x".join(sig) + smsig = smsig.replace("?", "2A") return smsig -UPDATE_TIME = time() -def update_window(activity): - global UPDATE_TIME - currtime = time() - if currtime - UPDATE_TIME > 0.2: - UPDATE_TIME = currtime - idaapi.replace_wait_box(activity) - -def calc_func_segments(): - global FUNCS_SEGSTART, FUNCS_SEGEND - seg = idaapi.get_segm_by_name(".text") - if seg: - FUNCS_SEGSTART = seg.start_ea - FUNCS_SEGEND = seg.end_ea - def main(): - idaapi.set_ida_state(idaapi.st_Work) - root = {} - - count = 0 - sigcount = 0 - sigattempts = 0 - - calc_func_segments() - - funcs = list(idautils.Functions(FUNCS_SEGSTART, FUNCS_SEGEND)) - - alltime = 0.0 - avgtime = 0.0 - - f = idaapi.ask_file(1, "*.yml", "Choose a file to save to") - if not f: - return - - skip = idaapi.ask_yn(1, "Skip functions that start with \"sub_\"?") - if skip == -1: - return - - # Clean up and get rid of shitty funcs - funccpy = funcs[:] - for fea in funccpy: - funcname = idaapi.get_func_name(fea) - if funcname is None or funcname.startswith("nullsub"): - funcs.remove(fea) - continue - - if skip and funcname.startswith("sub"): - funcs.remove(fea) - continue - - flags = idc.get_func_attr(fea, idc.FUNCATTR_FLAGS) - if flags & idaapi.FUNC_LIB: - funcs.remove(fea) - continue - - funccount = len(funcs) - for fea in funcs: - starttime = time() - - func = idaapi.get_func(fea) - funcname = idaapi.get_func_name(fea) - if funcname != None: - unmangled = idc.demangle_name(funcname, idc.get_inf_attr(idc.INF_SHORT_DN)) + try: + root = {} + + f = idaapi.ask_file(1, "*.yml", "Choose a file to save to") + if not f: + return + + skip = idaapi.ask_yn(1, "Skip unnamed functions (e.g. ones that start with \"sub_\")?") + if skip == -1: + return + + idaapi.set_ida_state(idaapi.st_Work) + global FUNCS_SEGEND + FUNCS_SEGEND = calc_sigstop() + + funcs = list(idautils.Functions()) + siglist = [] + + for i in range(len(funcs)): + fea = funcs[i] + flags = idaapi.get_full_flags(fea) + if not idaapi.is_func(flags): + continue + + if skip and not idaapi.has_name(flags): + continue + + func = idaapi.get_func(fea) + # Thunks and lib funcs + if func.flags & (idaapi.FUNC_LIB | idaapi.FUNC_THUNK): + continue + + funcname = idaapi.get_name(fea) + unmangled = idaapi.demangle_name(funcname, idaapi.MNG_SHORT_FORM) + if unmangled is not None: + # Skip jmp stubs + if unmangled.startswith("j_"): + continue + + # Nullsub + if unmangled.startswith("nullsub"): + continue + + siglist.append(func) + + totalcount = len(siglist) + actualstarttime = time.time() + sigcount = 0 + for i, func in enumerate(siglist): + funcname = idaapi.get_name(func.start_ea) + unmangled = idaapi.demangle_name(funcname, idaapi.MNG_SHORT_FORM) if unmangled is None: unmangled = funcname - sig = makesig(func) - sigattempts += 1 + sig = makesigfast(func) root[unmangled] = {"mangled": funcname, "signature": sig} if sig: sigcount += (0 if "!" in sig else 1) - # Only ETA makesig() attempts, otherwise the timing is really off - # Unfortunately, sigging takes progressively longer the further along the function list - # this goes, as makesig() searches from up to down while functions are ordered from up to down - # So this isn't really accurate but w/e - - multpct = 2.0 - count / float(funccount) # Scale up a bit the lower we start at the get a halfass decent eta - alltime += time() - starttime - avgtime = alltime / sigattempts - eta = int(avgtime * (funccount - count) * multpct) - etastr = strftime("%H:%M:%S", gmtime(eta)) + # Unfortunately, sigging takes progressively longer the further along the function list + # this goes, as makesig() searches from top to bottom while functions are ordered from top to bottom + # So this isn't really accurate but w/e - count += 1 - update_window("Evaluated {} out of {} ({}%)\nETA: {}".format(count, funccount, floor(count / float(funccount) * 100.0 * 10.0) / 10.0, etastr)) + totaltime = time.time() - actualstarttime + count = i + 1 + avgtime = totaltime / count + eta = int(avgtime * (totalcount - count)) + etastr = time.strftime("%H:%M:%S", time.gmtime(eta)) - while f.count(".yml") >= 2: - f = f.replace(".yml", "", 1) - if not f.endswith(".yml"): - f += ".yml" + WaitBox.show(f"Evaluated {count} out of {totalcount} ({floor(i / float(totalcount) * 100.0 * 10.0) / 10.0}%)\nETA: {etastr}") - with open(f, "w") as f: - yaml.safe_dump(root, f, default_flow_style = False, width = 999999) + WaitBox.show("Saving to file") + with open(f, "w") as f: + yaml.safe_dump(root, f, default_flow_style=False, width=999999) - idaapi.hide_wait_box() - print("Successfully generated {} signatures from {} functions".format(sigcount, funccount)) + totaltime = time.strftime("%H:%M:%S", time.gmtime(time.time() - actualstarttime)) + print(f"Successfully generated {sigcount} signatures from {totalcount} functions in {totaltime}") + except: + import traceback + traceback.print_exc() + print("Please file a bug report with supporting information at https://github.com/Scags/IDA-Scripts/issues") + idaapi.beep() idaapi.set_ida_state(idaapi.st_Ready) + WaitBox.hide() -main() \ No newline at end of file +# import cProfile +# cProfile.run("main()", "sigsmasher.prof") +main() diff --git a/structaligner.py b/structaligner.py deleted file mode 100644 index 586dad6..0000000 --- a/structaligner.py +++ /dev/null @@ -1,35 +0,0 @@ -import idc -import idautils -import idaapi - -from time import time -from math import floor - -UPDATE_TIME = time() -def update_window(s): - global UPDATE_TIME - currtime = time() - if currtime - UPDATE_TIME > 0.2: - idaapi.replace_wait_box(s) - UPDATE_TIME = currtime - -def main(): - maxstructs = idaapi.get_last_struc_idx() - i = idaapi.get_first_struc_idx() - while i < maxstructs: - update_window("{}%".format(floor(i / float(maxstructs) * 100.0 * 10.0) / 10.0)) - strucid = idaapi.get_struc_by_idx(i) - struc = idaapi.get_struc(strucid) - k = 0 - struclen = idaapi.get_max_offset(struc) - while k < struclen: - member = idaapi.get_member(struc, k) - if not member: - idaapi.add_struc_member(struc, "field_{}".format(hex(k)[2:].upper()), k, idc.FF_BYTE, None, 1) - k += 1 - else: - k += idaapi.get_member_size(member) - - i += 1 - -main() \ No newline at end of file diff --git a/structfiller.py b/structfiller.py new file mode 100644 index 0000000..58ad7dd --- /dev/null +++ b/structfiller.py @@ -0,0 +1,71 @@ +import idc +import idautils +import idaapi +import time + +from math import floor + +# Idiot proof IDA wait box +class WaitBox: + buffertime = 0.0 + shown = False + msg = "" + + @staticmethod + def _show(msg): + WaitBox.msg = msg + if WaitBox.shown: + idaapi.replace_wait_box(msg) + else: + idaapi.show_wait_box(msg) + WaitBox.shown = True + + @staticmethod + def show(msg, buffertime = 0.1): + if msg == WaitBox.msg: + return + + if buffertime > 0.0: + if time.time() - WaitBox.buffertime < buffertime: + return + WaitBox.buffertime = time.time() + WaitBox._show(msg) + + @staticmethod + def hide(): + if WaitBox.shown: + idaapi.hide_wait_box() + WaitBox.shown = False + +def main(): + try: + idaapi.begin_type_updating(idaapi.UTP_STRUCT) + maxstructs = idaapi.get_last_struc_idx() + i = idaapi.get_first_struc_idx() + while i < maxstructs: + WaitBox.show(f"{floor(i / float(maxstructs) * 100.0 * 10.0) / 10.0}%") + strucid = idaapi.get_struc_by_idx(i) + struc = idaapi.get_struc(strucid) + k = 0 + struclen = idaapi.get_max_offset(struc) + while k < struclen: + member = idaapi.get_member(struc, k) + if not member: + idaapi.add_struc_member(struc, f"field_{k:X}", k, idc.FF_BYTE, None, 1) + k += 1 + else: + k += idaapi.get_member_size(member) + + i += 1 + + print("Done!") + except: + import traceback + traceback.print_exc() + print("Please file a bug report with supporting information at https://github.com/Scags/IDA-Scripts/issues") + idaapi.beep() + + WaitBox.hide() + idaapi.end_type_updating(idaapi.UTP_STRUCT) + +main() \ No newline at end of file diff --git a/symbolsmasher.py b/symbolsmasher.py index 7863e23..effb0e6 100644 --- a/symbolsmasher.py +++ b/symbolsmasher.py @@ -3,7 +3,7 @@ import idaapi import json -from time import time +import time from sys import version_info # Are we reading this DB or writing to it. Not to be confused with reading from/writing to the work file @@ -13,6 +13,38 @@ DEBUG = 0 +# Idiot proof IDA wait box +class WaitBox: + buffertime = 0.0 + shown = False + msg = "" + + @staticmethod + def _show(msg): + WaitBox.msg = msg + if WaitBox.shown: + idaapi.replace_wait_box(msg) + else: + idaapi.show_wait_box(msg) + WaitBox.shown = True + + @staticmethod + def show(msg, buffertime = 0.1): + if msg == WaitBox.msg: + return + + if buffertime > 0.0: + if time.time() - WaitBox.buffertime < buffertime: + return + WaitBox.buffertime = time.time() + WaitBox._show(msg) + + @staticmethod + def hide(): + if WaitBox.shown: + idaapi.hide_wait_box() + WaitBox.shown = False + def get_action(): return idaapi.ask_buttons("Reading from", "Writing to", "", 0, "What action are we performing on this database?") @@ -25,19 +57,6 @@ def get_file(action): # Show how many functions we've found FOUND_FUNCS = set() -# Don't update asap as that throttles script speed, split sec is fine ig -UPDATE_TIME = time() -def update_window(activity, hidefuncs = False): - global UPDATE_TIME - if not hidefuncs: - currtime = time() - if currtime - UPDATE_TIME > 0.2: - activity += "\nFunctions found: {}".format(len(FOUND_FUNCS)) - UPDATE_TIME = currtime - else: - return - - idaapi.replace_wait_box(activity) # Format: # "String Name": @@ -83,12 +102,12 @@ def build_data_dict(strdict): return funcs def read_strs(strings, file): - update_window("Reading strings", True) + WaitBox.show("Reading strings", True) # Build an organized dictionary of the string data we can get strdict = build_xref_dict(strings) # Then reorient it around functions, then dump it funcdict = build_data_dict(strdict) - update_window("Dumping to file", True) + WaitBox.show("Dumping to file", True) # Running the script in write mode will build a similar dict then compare the two through functions json.dump(funcdict, file, indent = 4, sort_keys = True) @@ -100,7 +119,7 @@ def get_bcompat_keys(d): def write_exact_comp(strdict, funcdict, myfuncs): global FOUND_FUNCS - update_window("Writing exact comparisons") + WaitBox.show("Writing exact comparisons") count = 0 for strippedname, strippedlist in get_bcompat_iter(strdict): @@ -127,7 +146,7 @@ def write_exact_comp(strdict, funcdict, myfuncs): count += 1 FOUND_FUNCS.add(possibilities[0]) - update_window("Writing exact comparisons") + WaitBox.show("Writing exact comparisons") elif DEBUG: print("{} is probably wrong!".format(idc.demangle_name(possibilities[0], idc.get_inf_attr(idc.INF_SHORT_DN)))) @@ -136,7 +155,7 @@ def write_exact_comp(strdict, funcdict, myfuncs): def write_simple_comp(strdict, funcdict, myfuncs, liw = True): global FOUND_FUNCS s = "symboled in stripped" if liw else "stripped in symboled" - update_window("Writing simple comparisons ({})".format(s)) + WaitBox.show("Writing simple comparisons ({})".format(s)) count = 0 for strippedname, strippedlist in get_bcompat_iter(strdict): @@ -167,7 +186,7 @@ def write_simple_comp(strdict, funcdict, myfuncs, liw = True): count += 1 FOUND_FUNCS.add(possibilities[0]) - update_window("Writing simple comparisons ({})".format(s)) + WaitBox.show("Writing simple comparisons ({})".format(s)) elif DEBUG: print("{} is probably wrong!".format(idc.demangle_name(possibilities[0], idc.get_inf_attr(idc.INF_SHORT_DN)))) @@ -191,7 +210,7 @@ def clean_data_dict(strdict): # strdict = resultant def write_symbols(strings, file): - update_window("Loading file", True) + WaitBox.show("Loading file", True) funcdict = json.load(file) if not funcdict: idaapi.warning("Could not load function data from file") @@ -226,25 +245,31 @@ def write_symbols(strings, file): return exact_count, liw, wil def main(): - action = get_action() - if action == Mode_Invalid: - return + try: + action = get_action() + if action == Mode_Invalid: + return - file = get_file(action) - if file is None: - return + file = get_file(action) + if file is None: + return -# strings = get_strs() - strings = list(idautils.Strings()) - if action == Mode_Read: - read_strs(strings, file) - print("Done!") - else: - c1, c2, c3 = write_symbols(strings, file) - print("Successfully typed {} functions".format(len(FOUND_FUNCS))) - print("\t- {} Exact\n\t- {} Symboled in stripped\n\t- {} Stripped in symboled".format(c1, c2, c3)) - - idaapi.hide_wait_box() + # strings = get_strs() + strings = list(idautils.Strings()) + if action == Mode_Read: + read_strs(strings, file) + print("Done!") + else: + c1, c2, c3 = write_symbols(strings, file) + print("Successfully typed {} functions".format(len(FOUND_FUNCS))) + print("\t- {} Exact\n\t- {} Symboled in stripped\n\t- {} Stripped in symboled".format(c1, c2, c3)) + except: + import traceback + traceback.print_exc() + print("Please file a bug report with supporting information at https://github.com/Scags/IDA-Scripts/issues") + idaapi.beep() + + WaitBox.hide() file.close() main() \ No newline at end of file diff --git a/vtable_io.py b/vtable_io.py index 9ba6ae9..3619424 100644 --- a/vtable_io.py +++ b/vtable_io.py @@ -1,599 +1,1235 @@ import idc import idautils import idaapi -#import yaml -import json # YAML is just too slow for this +import json +import ctypes +import time +import re + +from dataclasses import dataclass + +if idc.__EA64__: + ea_t = ctypes.c_uint64 + ptr_t = ctypes.c_int64 + get_ptr = idaapi.get_qword + FF_PTR = idc.FF_QWORD +else: + ea_t = ctypes.c_uint32 + ptr_t = ctypes.c_int32 + get_ptr = idaapi.get_dword + FF_PTR = idc.FF_DWORD + +# Calling these a lot so we'll speed up the invocations by manually implementing them here +def is_off(f): return (f & (idc.FF_0OFF|idc.FF_1OFF)) != 0 +def is_code(f): return (f & idaapi.MS_CLS) == idc.FF_CODE +def has_any_name(f): return (f & idc.FF_ANYNAME) != 0 +def is_ptr(f): return (f & idaapi.MS_CLS) == idc.FF_DATA and (f & idaapi.DT_TYPE) == FF_PTR + +# Let's go https://www.blackhat.com/presentations/bh-dc-07/Sabanal_Yason/Paper/bh-dc-07-Sabanal_Yason-WP.pdf + +_RTTICompleteObjectLocator_fields = [ + ("signature", ctypes.c_uint32), # signature + ("offset", ctypes.c_uint32), # offset of this vtable in complete class (from top) + ("cdOffset", ctypes.c_uint32), # offset of constructor displacement + ("pTypeDescriptor", ctypes.c_uint32), # ref TypeDescriptor + ("pClassHierarchyDescriptor", ctypes.c_uint32), # ref RTTIClassHierarchyDescriptor + ] + +if idc.__EA64__: + _RTTICompleteObjectLocator_fields.append(("pSelf", ctypes.c_uint32)) # ref to object's base + +class RTTICompleteObjectLocator(ctypes.Structure): + _fields_ = _RTTICompleteObjectLocator_fields + + +class TypeDescriptor(ctypes.Structure): + _fields_ = [ + ("pVFTable", ctypes.c_uint32), # reference to RTTI's vftable + ("spare", ctypes.c_uint32), # internal runtime reference + ("name", ctypes.c_uint8), # type descriptor name (no varstruct needed since we don't use this) + ] + + +class RTTIClassHierarchyDescriptor(ctypes.Structure): + _fields_ = [ + ("signature", ctypes.c_uint32), # signature + ("attribs", ctypes.c_uint32), # attributes + ("numBaseClasses", ctypes.c_uint32), # # of items in the array of base classes + ("pBaseClassArray", ctypes.c_uint32), # ref BaseClassArray + ] + + +class RTTIBaseClassDescriptor(ctypes.Structure): + _fields_ = [ + ("pTypeDescriptor", ctypes.c_uint32), # ref TypeDescriptor + ("numContainedBases", ctypes.c_uint32), # # of sub elements within base class array + ("mdisp", ctypes.c_uint32), # member displacement + ("pdisp", ctypes.c_uint32), # vftable displacement + ("vdisp", ctypes.c_uint32), # displacement within vftable + ("attributes", ctypes.c_uint32), # base class attributes + ("pClassDescriptor", ctypes.c_uint32), # ref RTTIClassHierarchyDescriptor + ] + + +class base_class_type_info(ctypes.Structure): + _fields_ = [ + ("basetype", ea_t), # Base class type + ("offsetflags", ea_t), # Offset and info + ] + + +class class_type_info(ctypes.Structure): + _fields_ = [ + ("pVFTable", ea_t), # reference to RTTI's vftable (__class_type_info) + ("pName", ea_t), # ref to type name + ] + +# I don't think this is right, but every case I found looked to be correct +# This might be a vtable? IDA sometimes says it is but not always +# Plus sometimes the flags member is 0x1, so it's not a thisoffs. Weird +class pointer_type_info(class_type_info): + _fields_ = [ + ("flags", ea_t), # Flags or something else + ("pType", ea_t), # ref to type + ] + +class si_class_type_info(class_type_info): + _fields_ = [ + ("pParent", ea_t), # ref to parent type + ] + +class vmi_class_type_info(class_type_info): + _fields_ = [ + ("flags", ctypes.c_uint32), # flags + ("basecount", ctypes.c_uint32), # # of base classes + ("pBaseArray", base_class_type_info), # array of BaseClassArray + ] + +def create_vmi_class_type_info(ea): + bytestr = idaapi.get_bytes(ea, ctypes.sizeof(vmi_class_type_info)) + tinfo = vmi_class_type_info.from_buffer_copy(bytestr) + + # Since this is a varstruct, we create a dynamic class with the proper size and type and return it instead + class vmi_class_type_info_dynamic(class_type_info): + _fields_ = [ + ("flags", ctypes.c_uint32), + ("basecount", ctypes.c_uint32), + ("pBaseArray", base_class_type_info * tinfo.basecount), + ] + + return vmi_class_type_info_dynamic + + +# Steps to retrieve vtables on Windows (MSVC): +# 1. Get RTTI's vftable (??_7type_info@@6B@) +# 2. Iterate over xrefs to, which are all TypeDescriptor objects +# a. Of course don't load up the function that uses it +# 3. At each xref load up xrefs to again +# a. There should only be at least 2, the important ones are RTTICompleteObjectLocator's AKA COL (there can be more than 1) +# b. To discern which one is which, just see if there's a label at the address +# - If there is, then that one is RTTIClassHierarchyDescriptor, so skip it +# 4. The current ea position at each xref should be at RTTICompleteObjectLocator::pTypeDescriptor, so subtract 12 to get to the beginning of the struct +# 5. Find xrefs to each. There should only be one, and it should be its vtable +# a. Each COL has an offset which will shows where its vtable starts, so running too far over the table will be easier to detect +# +# Steps to retrieve vtables on Linux (GCC and maybe Clang) +# 1. Get RTTI's vftable (_ZTVN10__cxxabiv117__class_type_infoE, +# _ZTVN10__cxxabiv120__si_class_type_infoE, and _ZTVN10__cxxabiv121__vmi_class_type_infoE) +# 2. First, before doing anything, shove each xref of type_info object into some sort of structure +# a. There's no easy way to cheese discerning which xref is the actual vtable, unless we want to start parsing IDA comments +# 3. Once each type_info object and their references are loaded, get the xrefs from each pVFTable +# 4. There will probably be more than one xref. +# a. To discern which one is a vtable, if the xref lies in another type_info object, then it's not a vtable +# b. The remaining xref(s) is indeed a vtable + +# Class for windows type info, helps organize things +@dataclass(frozen=True) +class WinTI(object): + typedesc: int + name: str + cols: list[int] + vtables: list[int] + +# Class for function lists (what is held in the json) +@dataclass(frozen=True) +class FuncList: + thisoffs: int + funcs: list#[VFunc] + +# Idiot proof IDA wait box +class WaitBox: + buffertime = 0.0 + shown = False + msg = "" + + @staticmethod + def _show(msg): + WaitBox.msg = msg + if WaitBox.shown: + idaapi.replace_wait_box(msg) + else: + idaapi.show_wait_box(msg) + WaitBox.shown = True + + @staticmethod + def show(msg, buffertime = 0.1): + if msg == WaitBox.msg: + return + + if buffertime > 0.0: + if time.time() - WaitBox.buffertime < buffertime: + return + WaitBox.buffertime = time.time() + WaitBox._show(msg) + + @staticmethod + def hide(): + if WaitBox.shown: + idaapi.hide_wait_box() + WaitBox.shown = False + +# Virtual class tree +class VClass(object): + def __init__(self, *args, **kwargs): + self.name = kwargs.get("name", "") + # dict[classname, VClass] + self.baseclasses = kwargs.get("baseclasses", {}) + # Same as Linux json, dict[thisoffs, funcs] + self.vfuncs = kwargs.get("vfuncs", {}) + # Written to when writing to Windows, dict[thisoffs, [VFunc]] + self.vfuncnames = kwargs.get("vfuncnames", {}) + # Exists solely to speed up checking for inherited functions + self.postnames = set() + + def __str__(self): + return f"{self.name} (baseclasses = {self.baseclasses}, vfuncs = {self.vfuncs})" + + def parse(self, colea, wintable): + col = get_class_from_ea(RTTICompleteObjectLocator, colea) + thisoffs = col.offset + + # Already parsed + if self.name in wintable.keys(): + if thisoffs in wintable[self.name].vfuncs.keys(): + return + + + # In 64-bit PEs, the COL references itself, remove this + xrefs = list(idautils.XrefsTo(colea)) + if idc.__EA64__: + for n in range(len(xrefs)-1, -1, -1): + if xrefs[n].frm == colea + RTTICompleteObjectLocator.pSelf.offset: + del xrefs[n] + + if len(xrefs) != 1: + print(f"[VTABLE IO] Multiple vtables point to same COL - {self.name} at {colea:#x}") + return -from sys import version_info + vtable = xrefs[0].frm + ctypes.sizeof(ea_t) + self.vfuncs[thisoffs] = parse_vtable_addresses(vtable) + +# TODO; This is created for each function in the json and for each function in each vtable +# This clearly does this for multiple of each function, so there needs to be a way to +# cache each function and reuse it for each vtable +# Possible pain point is differentiating between inheritedness +@dataclass +class VFunc: + ea: int # Address to this function + vaddr: int # Address to this function's reference in its vtable + mangledname: str + inheritid: int + name: str + postname: str + sname: str + + @staticmethod + def create(ea=idc.BADADDR, mangledname="", inheritid=-1, vaddr=idc.BADADDR): + name = "" + postname = "" + sname = "" + if mangledname: + name = idaapi.demangle_name(mangledname, idaapi.MNG_LONG_FORM) or mangledname + if name: + postname = get_func_postname(name) + sname = postname.split("(")[0] + return VFunc(ea, vaddr, mangledname, inheritid, name, postname, sname) + +class VOptions(object): + StringMethod = 1 << 0 + SkipMismatches = 1 << 1 + CommentReusedFunctions = 1 << 2 + + DoNotExport = 0 + ExportNormal = 1 + ExportOnly = 2 + +# Form for script options +class VForm(idaapi.Form): + + def __init__(self): + idaapi.Form.__init__(self, r"""STARTITEM 0 +BUTTON YES* Go +BUTTON CANCEL Cancel +VTable IO +{FormChangeCb} +<#Browse#Select a file to import from :{iFileImport}> + <##Import options##Parse type strings (for hashed type info):{rStringMethod}> | <##Export options##Do not export:{rDoNotExport}> + | + {cImportOptions}> | {cExportOptions}> +<#Browse#Select a file to export to (ignored if unchecked):{iFileExport}> + """, { + "FormChangeCb": idaapi.Form.FormChangeCb(self.OnFormChange), + "iFileImport": idaapi.Form.FileInput(open=True, value=idaapi.reg_read_string("vtable_io", "iFileImport", "*.json"), swidth=50), + "cImportOptions": idaapi.Form.ChkGroupControl( + ("rStringMethod", "rSkipMismatches", "rComment"), value=idaapi.reg_read_int("vtable_io", VOptions.SkipMismatches | VOptions.CommentReusedFunctions, "cImportOptions") + ), + "cExportOptions": idaapi.Form.RadGroupControl( + ("rDoNotExport", "rExportNormal", "rExportOnly"), value=idaapi.reg_read_int("vtable_io", VOptions.DoNotExport, "cExportOptions") + ), + "iFileExport": idaapi.Form.FileInput(save=True, value=idaapi.reg_read_string("vtable_io", "iFileExport", "*.json"), swidth=50), + }) + + def OnFormChange(self, fid): + # print(fid) + return 1 + + @staticmethod + def init_options(): + f = VForm() + f, _ = f.Compile() + go = f.Execute() + if not go: + return None + + options = VOptions() + for control in f.controls.keys(): + if control != "FormChangeCb": + currval = getattr(f, control).value + setattr(options, control, currval) + if isinstance(currval, str): + idaapi.reg_write_string("vtable_io", currval, control) + elif isinstance(currval, int): + idaapi.reg_write_int("vtable_io", currval, control) + else: + print(f"Unsupported type for {control} - {type(currval)}") + + f.Free() + return options OS_Linux = 0 OS_Win = 1 FUNCS = 0 +EXPORTS = 0 -# For exporting successful table builds -EXPORT = 0 -EXPORT_TABLE = {} - -Export_No = -1 -Export_YesOnly = 0 -Export_Yes = 1 - -# Change to 0 to disable weak typing. This will speed up the script but you'll have a lot of shitty repeat names -USE_WEAK_NAMES = 0 +VOPTIONS = None def get_os(): - # Lazy af lol - return OS_Linux if idaapi.get_root_filename().endswith(".so") else OS_Win + ftype = idaapi.get_file_type_name() + if "ELF" in ftype: + return OS_Linux + elif "PE" in ftype: + return OS_Win + return -1 + +# Read a ctypes class from an ea +def get_class_from_ea(classtype, ea): + bytestr = idaapi.get_bytes(ea, ctypes.sizeof(classtype)) + return classtype.from_buffer_copy(bytestr) + +def rva_to_ea(ea): + if idc.__EA64__: + return idaapi.get_imagebase() + ea + return ea -def get_bcompat_keys(d): - return d.keys() if version_info[0] >= 3 else d.iterkeys() +# Anything past Classname:: +# Thank you CTFPlayer::SOCacheUnsubscribed... +def get_func_postname(name): + retname = name + template = 0 + iterback = 0 + for i, c in enumerate(retname): + if c == "<": + template += 1 + elif c == ">": + template -= 1 + # Find ( and break if we're not in a template + elif c == "(" and template == 0: + iterback = i + break -def get_bcompat_items(d): - return d.items() if version_info[0] >= 3 else d.iteritems() + # Run backwards from ( until we hit a :: + for i in range(iterback, -1, -1): + if retname[i] == ":": + retname = retname[i+1:] + break -def parse_vtable(ea, typename): - os = get_os() - if os == OS_Linux: - ea += 8 + return retname + +def parse_vtable_names(ea): funcs = [] while ea != idc.BADADDR: - eatemp = ea - offs = idc.get_wide_dword(ea) -# if idaapi.is_unknown(idaapi.get_full_flags(ea)): -# break - - size = idc.get_item_size(ea) # This is bad abd abadbadbadbabdbabdad but there's no other choice here - if size != 4: - # This looks like it might be a bug with IDA - # Random points of a vtable are getting turned into unknown data - if size != 1: - break + # Using flags sped this up by a lot + # Went from 4 secs to ~1.3 + flags = idaapi.get_full_flags(ea) + if not is_off(flags) or not is_ptr(flags): + break - s = "".join(["%02x" % idc.get_wide_byte(ea + i) for i in range(3, -1, -1)])#.replace("0x", "") - if not s.lower().startswith("ffff"): - ea = idaapi.next_not_tail(ea) - continue + if idaapi.has_name(flags): + break - offs = int(s, 16) - ea += 3 + offs = get_ptr(ea) + fflags = idaapi.get_full_flags(offs) + if not idaapi.is_func(fflags): + break - name = idc.get_name(offs, idaapi.GN_VISIBLE) - if name: - if os == OS_Linux: - if not(name.startswith("_Z") or name.startswith("__cxa")) or name.startswith("_ZTV"): - break # If we've exceeded past this vtable - elif name.startswith("??"): - break - else: - if os == OS_Win: - break + name = idaapi.get_name(offs) + funcs.append(name) - # dd -offsettothis - # This is even worseworsoewewrosorooese - s = "%02x" % offs - if not s.lower().startswith("ffff"): - ea = idaapi.next_not_tail(ea) - continue + ea = idaapi.next_head(ea, idc.BADADDR) + return funcs - name = (1 << 32) - int(offs) - funcs.append(name) +def parse_vtable_addresses(ea): + funcs = [] - ea = idaapi.next_not_tail(ea) - return funcs, eatemp + while ea != idc.BADADDR: + flags = idaapi.get_full_flags(ea) + if not is_off(flags) or not is_ptr(flags): + break -# (funcaddr, funcname) -def get_thunks(ea, typename, funclist): - funcidx = 0 - instance = (int, long) if version_info[0] < 3 else int - for i in range(len(funclist)): - if isinstance(funclist[i], instance): - funcidx = i + offs = get_ptr(ea) + fflags = idaapi.get_full_flags(offs) + if not has_any_name(fflags): break - # No thunks here - if not funcidx: - return [], [] +# if not idaapi.is_func(fflags):# or not idaapi.has_name(fflags): + # Sometimes IDA doesn't think a function is a function + # This is all CSteamWorksGameStatsUploader's fault :( + if not is_code(fflags): + break - funcs = [] - gotthunks = False + funcs.append(VFunc.create(ea=offs, vaddr=ea)) - # Index all these thunks so they line up for when we check for an offset - # Get rid of extra destructor too - thunklist = [get_func_postname(i) for i in funclist[funcidx:] if not isinstance(i, instance) and not i.startswith("_ZTI") and not i.endswith(typename + "D1Ev")] + ea = idaapi.next_head(ea, idc.BADADDR) + return funcs - while ea != idc.BADADDR: - size = idc.get_item_size(ea) - - # CTFRocketLauncher_DirectHit has its thunks below some random ass string - # Don't know what's up with that but we'll check 2 more offsets beyond that - if size != 4: - ea = idaapi.next_not_tail(ea) - size = idc.get_item_size(ea) - if size != 4: - ea = idaapi.next_not_tail(ea) - size = idc.get_item_size(ea) - if size != 4: # This is really bad - break +def parse_si_tinfo(ea, tinfos): + for xref in idautils.XrefsTo(ea): + tinfo = get_class_from_ea(si_class_type_info, xref.frm) + tinfos[xref.frm + si_class_type_info.pParent.offset] = tinfo.pParent - offs = idc.get_wide_dword(ea) - name = idc.get_name(offs, idaapi.GN_VISIBLE) +def parse_pointer_tinfo(ea, tinfos): + for xref in idautils.XrefsTo(ea): + tinfo = get_class_from_ea(pointer_type_info, xref.frm) + tinfos[xref.frm + pointer_type_info.pType.offset] = tinfo.pType - if name: - if name.startswith("??_R4"): -# if typename not in name: -# break +def parse_vmi_tinfo(ea, tinfos): + for xref in idautils.XrefsTo(ea): + tinfotype = create_vmi_class_type_info(xref.frm) + tinfo = get_class_from_ea(tinfotype, xref.frm) - gotthunks = True - ea = idaapi.next_not_tail(ea) - continue - else: - s = "%02x" % offs - if not s.lower().startswith("ffff"): - ea = idaapi.next_not_tail(ea) - continue + for i in range(tinfo.basecount): + offset = vmi_class_type_info.pBaseArray.offset + i * ctypes.sizeof(base_class_type_info) + basetinfo = get_class_from_ea(base_class_type_info, xref.frm + offset) + tinfos[xref.frm + offset + base_class_type_info.basetype.offset] = basetinfo.basetype - break +def get_tinfo_vtables(ea, tinfos, vtables): + if ea == idc.BADADDR: + return - if gotthunks: - funcs.append((offs, name)) + for tinfoxref in idautils.XrefsTo(ea, idaapi.XREF_DATA): + count = 0 + mangled = idaapi.get_name(tinfoxref.frm) + demangled = idc.demangle_name(mangled, idaapi.MNG_LONG_FORM) + if demangled is None: + print(f"[VTABLE IO] Invalid name at {tinfoxref.frm:#x}") + continue - ea = idaapi.next_not_tail(ea) + classname = demangled[len("`typeinfo for'"):] + for xref in idautils.XrefsTo(tinfoxref.frm, idaapi.XREF_DATA): + if xref.frm not in tinfos.keys(): + # If address lies in a function + if idaapi.is_func(idaapi.get_full_flags(xref.frm)): + continue - return funcs, thunklist + count += 1 + vtables[classname] = vtables.get(classname, []) + [xref.frm] -def read_vtables(): +def read_vtables_linux(): f = idaapi.ask_file(1, "*.json", "Select a file to export to") if not f: return + + WaitBox.show("Parsing typeinfo") + + # Step 1 and 2, crawl xrefs and stick the inherited class type infos into a structure + # After this, we can run over the xrefs again and see which xrefs come from another structure + # The remaining xrefs are either vtables or weird math in a function + xreftinfos = {} + + def getparse(name, fn, quiet=False): + tinfo = idc.get_name_ea_simple(name) + if tinfo == idc.BADADDR and not quiet: + print(f"[VTABLE IO] Type info {name} not found. Skipping...") + return None + + if fn is not None: + fn(tinfo, xreftinfos) + return tinfo + + # Don't need to parse base classes + tinfo = getparse("_ZTVN10__cxxabiv117__class_type_infoE", None) + tinfo_pointer = getparse("_ZTVN10__cxxabiv119__pointer_type_infoE", parse_pointer_tinfo, True) + tinfo_si = getparse("_ZTVN10__cxxabiv120__si_class_type_infoE", parse_si_tinfo) + tinfo_vmi = getparse("_ZTVN10__cxxabiv121__vmi_class_type_infoE", parse_vmi_tinfo) + + if len(xreftinfos) == 0: + print("[VTABLE IO] No type infos found. Are you sure you're in a C++ binary?") + return + + # Step 3, crawl xrefs to again and if the xref is not in the type info structure, then it's a vtable + WaitBox.show("Discovering vtables") + vtables = {} + get_tinfo_vtables(tinfo, xreftinfos, vtables) + get_tinfo_vtables(tinfo_pointer, xreftinfos, vtables) + get_tinfo_vtables(tinfo_si, xreftinfos, vtables) + get_tinfo_vtables(tinfo_vmi, xreftinfos, vtables) + + # Now, we have a list of vtables and their respective classes + WaitBox.show("Parsing vtables") + jsondata = parse_vtables(vtables) + + WaitBox.show("Writing to file") + with open(f, "w") as f: + json.dump(jsondata, f, indent=4, sort_keys=True) + +def parse_ti(ea, tis): + typedesc = ea + flags = idaapi.get_full_flags(ea) + if is_code(flags): + return + + try: + classname = idaapi.demangle_name(idc.get_name(ea), idaapi.MNG_SHORT_FORM) + classname = classname.removeprefix("class ") + classname = classname.removeprefix("struct TypeDescriptor ") + classname = classname.removesuffix(" `RTTI Type Descriptor'") + except: + print(f"[VTABLE IO] Invalid vtable name at {ea:#x}") + return + + if classname in tis.keys(): + return - seg = idaapi.get_segm_by_name(".rodata") - ea = seg.start_ea - end = seg.end_ea + cols = [] + vtables = [] - idaapi.replace_wait_box("Reading vtables") - root = {} - while ea < end and ea != idc.BADADDR: - typename = is_vtable(ea) - if typename: - node, ea = parse_vtable(ea, typename) - if len(node): - root[typename] = node + # Then figure out which xref is a/the COL + for xref in idautils.XrefsTo(typedesc): + ea = xref.frm + flags = idaapi.get_full_flags(ea) + # Dynamic cast + if is_code(flags): continue - ea = idaapi.next_head(ea, end) + name = idaapi.get_name(ea) + # Class type descriptor and/or random global data + # Kind of a hack but let's assume no one will rename these + if name and (name.startswith("??_R1") or name.startswith("off_")): + continue - idaapi.replace_wait_box("Exporting to file") - with open(f, "w") as f: - json.dump(root, f, indent = 4, sort_keys = True) + ea -= 4 + name = idaapi.get_name(ea) + # Catchable types + if name and name.startswith("__CT"): + continue + + # COL + ea -= 8 + workaround = False + if idaapi.is_unknown(idaapi.get_full_flags(ea)): + print(f"[VTABLE IO] Possible COL is unknown at {ea:#x}. This may be an unreferenced vtable. Trying workaround...") + # This might be a bug with IDA, but sometimes the COL isn't analyzed + # If there's still a reference, then we can still trace back + # If there is a list of functions (or even just one), then it's probably a vtable, + # but we'll still warn the user that it might be garbage + refs = list(idautils.XrefsTo(ea)) + if len(refs) == 1: + vtable = refs[0].frm + ctypes.sizeof(ea_t) + tryfunc = get_ptr(vtable + ctypes.sizeof(ea_t)) + funcflags = idaapi.get_full_flags(tryfunc) + if idaapi.is_func(funcflags): + print(f" - Workaround successful. Please assure that {vtable:#x} is a vtable.") + workaround = True + + if not workaround: + print(" - Workaround failed. Skipping...") + continue -# Function name only, no params or classname -def get_func_sname(name): - unmangled = idc.demangle_name(name, idc.get_inf_attr(idc.INF_SHORT_DN)) - if unmangled is None: - return "" + name = idaapi.get_name(ea) + if not workaround and (not name or not name.startswith("??_R4")): + print(f"[VTABLE IO] Invalid name at {ea:#x}. Possible unwind info. Ignoring...") + continue - if unmangled.find("::") != -1: - unmangled = unmangled[unmangled.find("::")+2:] - if unmangled.find("(") != -1: - unmangled = unmangled.split("(")[0] - return unmangled + # In 64-bit PEs, the COL references itself, remove this + refs = list(idautils.XrefsTo(ea)) + if idc.__EA64__: + for n in range(len(refs)-1, -1, -1): + if refs[n].frm == ea + RTTICompleteObjectLocator.pSelf.offset: + del refs[n] + + # Now that we have the COL, we can use it to find the vtable that utilizes it and its thisoffs + # We need to use this later because of overloads so we cache it in a list + if len(refs) != 1: + print(f"[VTABLE IO] Multiple vtables point to same COL - {name} at {ea:#x}") + continue -# Classname -def get_func_tname(name): - unmangled = idc.demangle_name(name, idc.get_inf_attr(idc.INF_SHORT_DN)) - if unmangled is None: - return "" + cols.append(ea) + vtable = refs[0].frm + ctypes.sizeof(ea_t) + vtables.append(vtable) - if unmangled.find("::") != -1: - unmangled = unmangled[:unmangled.find("::")] + # Can have RTTI without a vtable + tis[classname] = WinTI(typedesc, classname, cols, vtables) - return unmangled -#() -def get_func_argnames(name): - unmangled = idc.demangle_name(name, idc.get_inf_attr(idc.INF_SHORT_DN)) - if unmangled is None: - return "" +def read_ti_win(): + # Step 1, get the vftable of type_info + type_info = idc.get_name_ea_simple("??_7type_info@@6B@") + if type_info is None: + print("[VTABLE IO] type_info not found. Are you sure you're in a C++ binary?") + return None + + tis = {} + + # Step 2, get all xrefs to type_info + # Get type descriptor + for typedesc in idautils.XrefsTo(type_info): + parse_ti(typedesc.frm, tis) + + # In some cases, the IDA either fails to reference some type descriptors with type_info + # Not exactly sure why, but it lists the ea of type_info as a "hash" when in reality it isn't + # A workaround for this is to parse type descriptor strings (".?AV*"), load up their references, and + # walk backwards to the start of what is supposed to be the type descriptor, and assure that + # its DWORD is the type_info vtable + # We also make this an optional feature because it's slow in older IDA versions and not necessarily needed + # I only found this to be a problem in NMRIH, so it appears to be rare + if VOPTIONS.cImportOptions & VOptions.StringMethod: + WaitBox.show("Performing string parsing") + string_method(type_info, tis) + + return tis + +def string_method(type_info, tis): + for string in idautils.Strings(): + sstr = str(string) + if not sstr.startswith(".?AV"): + continue - if unmangled.find("(") != -1: - unmangled = unmangled[unmangled.find("("):] + ea = string.ea + ea -= TypeDescriptor.name.offset + trytinfo = rva_to_ea(idaapi.get_wide_dword(ea)) + # This is a weird string that isn't a part of a type descriptor + if trytinfo != type_info: + continue - return unmangled + parse_ti(ea, tis) -# Anything past Classname:: -# Thank you CTFPlayer::SOCacheUnsubscribed... -def get_func_postname(name): - unmangled = idc.demangle_name(name, idc.get_inf_attr(idc.INF_SHORT_DN)) - if unmangled is None: - return "" - if unmangled[:unmangled.find("(")].rfind("::") != -1: - unmangled = unmangled[unmangled[:unmangled.find("(")].rfind("::")+2:] +def parse_vtables(vtables): + jsondata = {} + ptrsize = ctypes.sizeof(ea_t) + for classname, tables in vtables.items(): + # We don't *need* to do any sort of sorting in Linux and can just capture the thisoffset + # The Windows side of the script can organize later + for ea in tables: + thisoffs = get_ptr(ea - ptrsize) - return unmangled + funcs = parse_vtable_names(ea + ptrsize) + # Can be zero if there's an xref in the global offset table (.got) section + # Fortunately the parse_vtable function doesn't grab anything from there + if funcs: + classdata = jsondata.get(classname, {}) + classdata[ptr_t(thisoffs).value] = funcs + jsondata[classname] = classdata -def isinthunk(winname, thunk): - ea, name = thunk - funcstart = idc.get_func_attr(ea, idc.FUNCATTR_START) - funcend = idc.get_func_attr(ea, idc.FUNCATTR_END) + return jsondata - if funcend - funcstart > 20: # Highest I've seen is 13 opcodes but this works ig - return False +# See if the thunk is actually a thunk and jumps to +# a function in the vtable +def is_thunk(thunkfunc, targetfuncs): + ea = thunkfunc.ea + func = idaapi.get_func(ea) + funcend = func.end_ea + +# if funcend - ea > 20: # Highest I've seen is 13 opcodes but this works ig +# return False - addr = idc.next_head(funcstart, funcend) + addr = idc.next_head(ea, funcend) if addr == idc.BADADDR: return False - b = idc.get_wide_byte(addr) + b = idaapi.get_byte(addr) if b in (0xEB, 0xE9): - dis = idc.generate_disasm_line(addr, 0) - try: - funcname = dis[dis.find("jmp")+3:].strip() - if funcname.find("short") != -1: - funcname = funcname[funcname.find("short")+5:].strip() - - # When this function gets typed, a comment is added - # Remove it - if funcname.find(";") != -1: - funcname = funcname[:funcname.find(";")] - - if funcname == winname: - return True - except: - pass + insn = idaapi.insn_t() + idaapi.decode_insn(insn, addr) + jmpaddr = insn.Op1.addr + return any(jmpaddr == i.ea for i in targetfuncs) return False -# I GIVE UP -# I don't know if it's possible, but of the 3 quintillion hurdles I've had to -# jump through to make this script work, the most frustrating was subclass overloads -# Let's say we have A::Foo(int) and subclass B::Foo(void) -# The script says "hey these are subclasses so let's not add them together" -# That works iff B does not override Foo(int) -# SO LET'S SAY WE ARE NextBotCombatCharacter AND WE WOULD LIKE TO OVERRIDE CBaseAnimating::Ignite -# THAT'S COOL, NOW LET'S ALSO OVERLOAD THAT FUNCTION WITH NextBotCombatCharacter::Ignite(float, CBaseEntity*) -# FUCK -# So to counter that, I have officially given up and have decided to check for an arbitrary offset -# If the offset between overload A and overload B is greater than FUCK, we give up -# Change at your own demise -FUCK = 30 - -def prep_vtable(linuxtable, key, wintable, winv): - if not linuxtable.get(key): - return None +def build_export_table(linuxtables, wintables): + # Table is built mainly for readability but having one that is actually parsable would + # be a cool idea for the future + exporttable = {} + # Save Linux only tables for exporting too + winless = {k: linuxtables[k] for k in linuxtables.keys() - wintables.keys()} + global EXPORTS + for classname, wintable in wintables.items(): + linuxtable = linuxtables.get(classname, None) + if linuxtable is None: + continue - funclist = linuxtable[key] - # Compat for 2.7, strings are in unicode - if version_info[0] < 3: - funclist = [i if isinstance(i, (int, long)) else str(i) for i in linuxtable[key]] - thunks, thunklist = get_thunks(winv, key, funclist) - - # We've got the thunks, now we don't need anything beyond another typeinfo - instance = (int, long) if version_info[0] < 3 else int - for i, v in enumerate(funclist): - if isinstance(v, instance): - funclist = funclist[:i] # Skipping thisoffs - break + # Sort and int-ify Linux again + newlinuxtable = [(abs(int(k)), v) for k, v in linuxtable.items()] + newlinuxtable.sort(key=lambda x: x[0]) + + exportnode = [] + purecalls = [] + for currlinuxitems, currwinitems in zip(newlinuxtable, wintable.items()): + lthisoffs, ltable = currlinuxitems + wthisoffs, wtable = currwinitems + + windiscovered = set() + prepend = f"[L{lthisoffs}/W{wthisoffs}]" + for i, mangledname in enumerate(ltable): + # Save for later + if mangledname.startswith("__cxa"): + # print(f"Found purecall {classname}::{mangledname} at {i}") + purecalls.append(i) + continue - # Get rid of extra destructor for linux - for i, n in enumerate(funclist): - name = idc.demangle_name(n, idc.get_inf_attr(idc.INF_SHORT_DN)) - if name: - if "::~" in name: - del funclist[i] - break + winidx = -1 + for j, winfunc in enumerate(wtable): + if mangledname == winfunc.mangledname: + winidx = j + windiscovered.add(j) + break + + s = f"L{i}" + if winidx != -1: + s = f"{s:<8}W{winidx}" + + shortname = idaapi.demangle_name(mangledname, idaapi.MNG_SHORT_FORM) or "purecall" + newprepend = f"{prepend:<20}{s:<8}" + s = f"{newprepend:<36}{shortname}" + exportnode.append(s) + + # Purecalls are a bit special + # We can't just grab the Linux index and use it for Windows + # So we 1: do this after everything else is done, and 2: find the first + # Windows purecall after the last purecall we found for each one + # in the Linux table + # This is kinda hard to test edge cases, but we'll assume this works + lastidx = 0 + for i in purecalls: + winidx = -1 + for j, winfunc in enumerate(wtable[lastidx:]): + if winfunc.mangledname == "__cxa_pure_virtual": + winidx = j + lastidx + break + + s = f"L{i}" + if winidx != -1: + s = f"{s:<8}W{winidx}" + + shortname = idaapi.demangle_name(mangledname, idaapi.MNG_SHORT_FORM) or "purecall" + newprepend = f"{prepend:<20}{s:<8}" + s = f"{newprepend:<36}{shortname}" + exportnode.insert(i, s) + lastidx = winidx+1 + windiscovered.add(winidx) + + # For thunks, figure out which Windows indices were not discovered and add them + # Inherited table might be out of order but we favor Linux anyways + for j, winfunc in enumerate(wtable): + if j not in windiscovered: + dummy = "" + s = f"W{j}" + + shortname = idaapi.demangle_name(winfunc.mangledname, idaapi.MNG_SHORT_FORM) or "purecall" + newprepend = f"{prepend:<20}{dummy:<8}{s:<8}" + s = f"{newprepend:<36}{shortname}" + exportnode.append(s) + + EXPORTS += 1 + exporttable[classname] = exportnode + + # Export Linux only tables + for classname, linuxtable in winless.items(): + # Sort and int-ify Linux again + newlinuxtable = [(abs(int(k)), v) for k, v in linuxtable.items()] + newlinuxtable.sort(key=lambda x: x[0]) + exportnode = [] + for thisoffs, table in newlinuxtable: + prepend = f"[L{thisoffs}]" + for i, mangledname in enumerate(table): + shortname = idaapi.demangle_name(mangledname, idaapi.MNG_SHORT_FORM) or "purecall" + newprepend = f"{prepend:<20}L{i:<8}" + s = f"{newprepend:<36}{shortname}" + exportnode.append(s) + + EXPORTS += 1 + exporttable[classname] = exportnode + return exporttable + +def read_vtables_win(classname, ti, wintable, baseclasses): + if classname in wintable.keys(): + return - # Windows does overloads backwards, reverse them - # Also check for thunks while we're at it - i = 0 - funcoverloads = {} - while i < len(funclist):# and i < len(wintable): - n = funclist[i] - if n.startswith("__cxa"): - i += 1 - continue + vclass = wintable.get(classname, VClass(name=classname, baseclasses=baseclasses)) + for colea in ti.cols: + vclass.parse(colea, wintable) - # I shouldn't need to do this, but destructors are wonky - if i == 0: - demangled = idc.demangle_name(n, idc.get_inf_attr(idc.INF_SHORT_DN)) - if demangled and "::~" in demangled: - i += 1 - continue + wintable[classname] = vclass - overloadname = get_func_sname(n) - shortname = get_func_postname(n) - if not shortname: - i += 1 - continue +def read_tinfo_win(classname, ti, winti, wintable, baseclasses): + # Strange cases where there is a base class descriptor with no vtable + if classname not in winti.keys(): + return - # Windows skips the vtable function if it exists in the thunks and - # the thunk does not jmp into it (because the thunk is the function) - try: - thunkidx = thunklist.index(shortname) - delete = 1 - except: - thunkidx = -1 - delete = 0 - if i < len(wintable): - if thunkidx != -1 and thunkidx < len(thunks): - if not isinthunk(wintable[i], thunks[thunkidx]): - currname = idc.get_name(thunks[thunkidx][0], idaapi.GN_VISIBLE) - - if currname and currname != funclist[i] and EXPORT_MODE != Export_YesOnly: - nameflags = idaapi.SN_FORCE - if not currname.startswith("sub_"): - if not USE_WEAK_NAMES: - del funclist[i] - continue - - nameflags |= idaapi.SN_WEAK - elif USE_WEAK_NAMES: - global FUNCS - FUNCS += 1 - - idc.set_name(thunks[thunkidx][0], funclist[i], nameflags) - - del funclist[i] + if classname in wintable.keys(): + return + + # No COLs, but we still keep the type in the wintable + if not ti.cols: + wintable[classname] = VClass(name=classname, baseclasses=baseclasses) + return + + # So essentially we just run through each base class in the hierarchy descriptor + # and recursively parse the base classes of the base classes + # Sort of like a reverse insertion sort only not really a sort + for colea in ti.cols: + col = get_class_from_ea(RTTICompleteObjectLocator, colea) + hierarchydesc = get_class_from_ea(RTTIClassHierarchyDescriptor, rva_to_ea(col.pClassHierarchyDescriptor)) + numitems = hierarchydesc.numBaseClasses + arraystart = rva_to_ea(hierarchydesc.pBaseClassArray) + + # Go backwards because we should start parsing from the basest base class + for i in range(numitems - 1, -1, -1): + offset = arraystart + i * ctypes.sizeof(ctypes.c_uint32) + descea = rva_to_ea(idaapi.get_wide_dword(offset)) + parentname = idaapi.demangle_name(idaapi.get_name(descea), idaapi.MNG_SHORT_FORM) + if not parentname: + # Another undefining IDA moment +# print(f"[VTABLE IO] Invalid parent name at {offset:#x}") + typedesc = rva_to_ea(idaapi.get_wide_dword(descea)) + parentname = idaapi.demangle_name(idaapi.get_name(typedesc), idaapi.MNG_SHORT_FORM) + + # Should be impossible since this is the type descriptor + if not parentname: + print(f"[VTABLE IO] Invalid parent name at {offset:#x} - type descriptor at {typedesc:#x}") continue - else: # Class has thunks at the end of the vtable - # This doesn't change anything but it should link up the lengths of both tables - if delete: - del funclist[i] - continue - node = funcoverloads.get(overloadname, []) + parentname = parentname.removeprefix("class ") + parentname = parentname.removeprefix("struct TypeDescriptor ") + parentname = parentname.removesuffix(" `RTTI Type Descriptor'") + else: + parentname = parentname[:parentname.find("::`RTTI Base Class Descriptor")] + + # End of the line + if i == 0: + read_vtables_win(classname, winti[parentname], wintable, baseclasses) + elif parentname in winti.keys(): + read_tinfo_win(parentname, winti[parentname], winti, wintable, baseclasses) + # Once again relying on dicts being ordered + baseclasses[parentname] = wintable[parentname] + +def gen_win_tables(winti): + # So first we start looping windows typeinfos because + # we're going to go from the COL -> ClassHierarchyDescriptor -> BaseClassArray + # The reason why we're doing this is because of subclass overloads + # For a history lesson, see https://github.com/Scags/IDA-Scripts/blob/125f1877a24da48062e62efcfb7d8a63e3bd939b/vtable_io.py#L251-L263 + # We're going to fix this by writing (and thus caching the names of) the baseclasses of classes first + # This way, we'll be able to know the classname and the virtual functions contained therein, + # and thus we will know if there is an overload that exists in a subclass + # This relies on the fact that dicts are ordered in Python 3.7+ + # If you're running Jiang Yang, either get a job or replace wintables with an OrderedDict + + # Same format as linuxtables + # {classname: VClass(classname, {thisoffs: [vfunc...], ...}, ...}) + wintables = {} + for classname, ti in winti.items(): + read_tinfo_win(classname, ti, winti, wintables, {}) + + return wintables + +def fix_windows_classname(classname): + # Double pointers are spaced... + classnamefix = classname.replace("* *", "**") + + # References/pointers that are const are spaced... + classnamefix = classnamefix.replace("const &", "const&") + classnamefix = classnamefix.replace("const *", "const*") + + # And true/false is instead replaced with 1/0 + def replacer(m): + # Avoid replacing 1s and 0s that are a part of classnames + # Thanks ChatGPT + return re.sub(r"(?<=\W)1(?=\W)", "true", re.sub(r"(?<=\W)0(?=\W)", "false", m.group())) + classnamefix = re.sub(r"<[^>]+>", replacer, classnamefix) + + # Other quirks are inline structs and templated enums + # which are pretty much impossible to deduce + return classnamefix + +# Idk why but sometimes pointers have a mind of their own +def fix_windows_classname2(classname): + return classname.replace(" *", "*") + +def fix_win_overloads(linuxitems, winitems, vclass, functable): + for i in range(min(len(linuxitems), len(winitems))): + currfuncs = linuxitems[i].funcs + vfuncs = [] + for u in range(len(currfuncs)): + f = VFunc.create(mangledname=currfuncs[u]) + for j, baseclass in enumerate(vclass.baseclasses.values()): + if f.postname in baseclass.postnames: + f.inheritid = j + break + + # Unbelievable hack right here + # Looks like pointers are getting shoved next to their types instead of spaced sometimes + # Not entirely sure what causes this. + # CAI_BaseNPC::CanStandOn(CBaseEntity*) vs CBaseEntity::CanStandOn(CBaseEntity *) + # Maybe it's the difference in the types of the pointers and this? + trystr = f.postname + breakout = False + for k in range(trystr.count(" *")): + trystr = trystr.replace(" *", "*", 1) + if trystr in baseclass.postnames: + f.inheritid = j + f.postname = trystr + breakout = True + break + + if breakout: + break + + vfuncs.append(f) - # Is this a half-ass decent overload - go = 1 - for loadnode in range(len(node)): - if not any([i - funclist.index(val) > FUCK for val in node[loadnode]]): - node[loadnode].append(n) - go = 0 + # Remove Linux's extra dtor + for u, f in enumerate(vfuncs): + if "::~" in f.name: + del vfuncs[u] break - if go: - node.append([n]) + # Windows does overloads backwards, reverse them + funcnameset = set() + u = 0 + while u < len(vfuncs): + f = vfuncs[u] - funcoverloads[overloadname] = node - i += 1 + if f.mangledname.startswith("__cxa"):# or f.mangledname.startswith("_ZThn") or f.mangledname.startswith("_ZTv"): + u += 1 + continue - for k, value in get_bcompat_items(funcoverloads): -# if len(value) <= 1: -# continue + if not f.name: + u += 1 + continue -# split = [] -# -# # Since subclass overloads shouldn't scoot up next to their baseclass brethren -# # hackily separate overloads by classname -# for mname in value: -# found = 0 -# -# name = idc.demangle_name(mname, idc.get_inf_attr(idc.INF_SHORT_DN)) -# typename = name[:name.find("::")] -# -# for i2 in range(len(split)): -# for othermname in split[i2]: -# name = idc.demangle_name(othermname, idc.get_inf_attr(idc.INF_SHORT_DN)) -# othertypename = name[:name.find("::")] -# -# if typename == othertypename: -# found = 1 -# split[i2].append(mname) -# break -# -# if found: -# break -# -# if not found: -# split.append([mname]) + # This is an overload, we take the function name here, and push it somewhere else + if f.sname in funcnameset: + # Find the first index of the overload + firstidx = -1 + for k in range(u): + if vfuncs[k].sname == f.sname: + firstidx = k + break + + if firstidx == -1: + print(f"[VTABLE IO] An impossibility has occurred. \"{f.sname}\" ({f.mangledname}, {f.name}) is in funcnameset but there is no possible overload.") + + overloadfunc = vfuncs[firstidx] + if overloadfunc.inheritid != f.inheritid: + # Although this function is an overload, it was created in a subclass + # So we don't move it + u += 1 + continue - for v in value: - if len(v) <= 1: + # Remove the current func from the list + del vfuncs[u] + # And insert it into the first index + vfuncs.insert(firstidx, f) + u += 1 continue - lowestidx = len(funclist) - for func in v: - temp = funclist.index(func) - if lowestidx > temp: - lowestidx = temp + funcnameset.add(f.sname) + u += 1 + + for f in vfuncs: + vclass.postnames.add(f.postname) + functable[linuxitems[i].thisoffs] = vfuncs + +def thunk_dance(winitems, vclass, functable): + # Now it's time for thunk dancing + mainltable = functable[0] + mainwtable = winitems[0].funcs + for currlinuxitems, currwinitems in zip(functable.items(), winitems): + thisoffs, ltable = currlinuxitems + wtable = currwinitems.funcs + if thisoffs == 0: + continue - count = 0 - while len(v): - k = v.pop() - funclist.insert(lowestidx + count, funclist.pop(funclist.index(k))) - count += 1 + # Remove any extra dtors from this table + dtorcount = 0 + for i, f in enumerate(ltable): + if "::~" in f.name: + dtorcount += 1 + if dtorcount > 1: + del ltable[i] + break - diff = len(funclist) - len(wintable) - if diff: - print("WARNING: {} vtable may be wrong! L{} - W{} = {}".format(key, len(funclist), len(wintable), diff)) + i = 0 + while i < len(mainltable): + f = mainltable[i] + if f.mangledname.startswith("__cxa"): + i += 1 + continue - return funclist + # I shouldn't need to do this, but destructors are wonky + if i == 0 and "::~" in f.name: + i += 1 + continue -def write_vtable(winv, functable, typename): - global FUNCS - ea = winv - i = 0 + if not f.postname: + i += 1 + continue - while ea != idc.BADADDR and i < len(functable): - dword = idaapi.get_wide_dword(ea) - name = idc.get_name(dword, idaapi.GN_VISIBLE) + # Windows skips the vtable function if it's implementation is in the thunks + # A way to check if this is true is to see which thunks are actually thunks + # Then we just pop its name from the main table, since it's no longer there + thunkidx = -1 + for u in range(len(ltable)): + if ltable[u].postname == f.postname: + thunkidx = u + break - if functable[i].startswith("__cxa"): + if thunkidx != -1: + try: + # We can't exactly see if the possible thunk jumps to a certain function (mainwtable[i]) because + # it's impossible to know what that function even is, so we instead check to see if + # it jumps into any function in the main vtable which is good enough + if not is_thunk(wtable[thunkidx], mainwtable): + ltable[thunkidx] = mainltable[i] + del mainltable[i] + continue + except: + print(f"[VTABLE IO] Anomalous thunk: {vclass.name}::{f.postname}, mainwtable {len(mainwtable)} wtable {len(wtable)} thunkidx {thunkidx} thisoffs {thisoffs}") + pass i += 1 - ea = idaapi.next_not_tail(ea) - continue + + # Update current linux table + functable[thisoffs] = ltable - if name == "__purecall": - i += 1 - ea = idaapi.next_not_tail(ea) - continue + # Update main table + functable[0] = mainltable - if not name or name.startswith("??"): - break +def prep_linux_vtables(linuxitems, winitems, vclass): + functable = {} - if functable[i] == name: - i += 1 - ea = idaapi.next_not_tail(ea) - continue + fix_win_overloads(linuxitems, winitems, vclass, functable) - nameflags = idaapi.SN_FORCE - if not name.startswith("sub_"): - if not USE_WEAK_NAMES: - i += 1 - ea = idaapi.next_not_tail(ea) + # No thunks, we are done + if min(len(linuxitems), len(winitems)) == 1: + return functable + + thunk_dance(winitems, vclass, functable) + + # Ready to write + return functable + +def merge_tables(functable, winitems): + for items in zip(functable.items(), winitems): + # Should probably make this unpacking/packing more efficient + currlitems, currwitems = items + _, ltable = currlitems + wtable = currwitems.funcs + + for i, f in enumerate(ltable): + targetname = f.mangledname + # Purecall, which should already be handled on the Windows side + if targetname.startswith("__cxa"): continue - nameflags |= idaapi.SN_WEAK - elif not USE_WEAK_NAMES: - FUNCS += 1 + # Size mismatch, skip it + try: + currfunc = wtable[i] + except: + continue + targetaddr = currfunc.ea + + flags = idaapi.get_full_flags(targetaddr) + # Already typed + if idaapi.has_name(flags): + if VOPTIONS.cImportOptions & VOptions.CommentReusedFunctions: + # If it's a Windows optimization (nullsubs, etc), + # add a comment with the actual name + # There's gotta be a way to rename the reference but not the function + currmangledname = idaapi.get_name(targetaddr) + currname = idaapi.demangle_name(currmangledname, idaapi.MNG_LONG_FORM) + if not currname or currname != f.name: + # Use short name for cmt since that's what IDA uses + shortname = idaapi.demangle_name(f.mangledname, idaapi.MNG_SHORT_FORM) + idaapi.set_cmt(currfunc.vaddr, shortname, False) + continue - idc.set_name(dword, functable[i], nameflags) - i += 1 - ea = idaapi.next_not_tail(ea) + func = idaapi.get_func(targetaddr) + # Not actually a function somehow + if not func: + continue -def build_export_table(linlist, winlist): - instance = (int, long) if version_info[0] < 3 else int - for i, v in enumerate(linlist): - if isinstance(v, instance): - linlist = linlist[:i] # Skipping thisoffs - break + # A library function (should already have a name) + if func.flags & idaapi.FUNC_LIB: + continue - listnode = linlist[:] + idaapi.set_name(targetaddr, targetname, idaapi.SN_FORCE) + global FUNCS + FUNCS += 1 - for i, v in enumerate(linlist): - name = str(v) - if name.startswith("__cxa"): - listnode[i] = None +def compare_tables(wintables, linuxtables): + functables = {} + for classname, vclass in wintables.items(): + if not vclass.vfuncs: continue - s = "L{:<6}".format(i) - try: - s += " W{}".format(winlist.index(name)) - except: - pass - - funcname = idc.demangle_name(name, idc.get_inf_attr(idc.INF_SHORT_DN)) - s = "{:<16} {}".format(s, funcname) - listnode[i] = s - - return [i for i in listnode if i != None] - -def parse_from_key(linuxtable, key, winv): - wintable, eatemp = parse_vtable(winv, key) - if not len(wintable): - return eatemp - - funclist = prep_vtable(linuxtable, key, wintable, winv) - if not funclist: - return eatemp - - if EXPORT_MODE in (Export_Yes, Export_YesOnly): - global EXPORT_TABLE - EXPORT_TABLE[key] = build_export_table(linuxtable[key], funclist) - - if EXPORT_MODE != Export_YesOnly: - write_vtable(winv, funclist, key) - - return eatemp - -def is_vtable(ea): - name = idc.get_name(ea) - if not name: - return "" - - name = idc.demangle_name(name, idc.get_inf_attr(idc.INF_SHORT_DN)) - if not name: - return "" - - if name.startswith("`vtable for'"): - name = name[12:] - elif name.endswith("::`vftable'"): - name = name[6:-11] - else: - return "" - - # Anonymous namespace? -# if "'" in name or "`" in name: -# return "" - return name - -def search_for_vtables(linuxtable): - seg = idaapi.get_segm_by_name(".rdata") - ea = seg.start_ea - end = seg.end_ea - - # Windows is better off finding the COL and deducing the vtable position from there - # This is because vtables can be referenced before they are created in position of the binary - found = set() - while ea < end and ea != idc.BADADDR: - if idaapi.get_item_size(ea) != 4 or idaapi.is_unknown(idaapi.get_full_flags(ea)): - ea = idaapi.next_head(ea, end) - continue + linuxtable = linuxtables.get(classname, {}) + if not linuxtable: + # Some weird Windows quirks + classnamefix = fix_windows_classname(classname) + linuxtable = linuxtables.get(classnamefix, {}) + if not linuxtable: + # Another very weird quirk + classnamefix = fix_windows_classname2(classnamefix) + linuxtable = linuxtables.get(classnamefix, {}) + if not linuxtable: +# print(f"[VTABLE IO] {classname}{f' (tried {classnamefix})' if classname != classnamefix else ''} not found in Linux tables. Skipping...") + continue - dword = idaapi.get_wide_dword(ea) - name = idc.get_name(dword, idaapi.GN_VISIBLE) + winitems = list(FuncList(x[0], x[1]) for x in vclass.vfuncs.items()) + # Sort by thisoffs, smallest first + winitems.sort(key=lambda x: x.thisoffs) - if name and name.startswith("??_R4"): - demangled = idc.demangle_name(name, idc.get_inf_attr(idc.INF_SHORT_DN)) - if not demangled or demangled in found: - ea = idaapi.next_head(ea, end) - continue + # Convert the string thisoffs to int + # Linux thisoffses are negative, abs them + linuxitems = list(FuncList(abs(int(x[0])), x[1]) for x in zip([abs(int(i)) for i in linuxtable.keys()], linuxtable.values())) + linuxitems.sort(key=lambda x: x.thisoffs) - if idaapi.get_item_size(ea + 4) == 4 and idaapi.get_wide_dword(ea + 4) != 0: - disasm = idc.generate_disasm_line(ea + 4, 0) - if disasm and disasm.strip().startswith("dd offset"): - actualname = demangled.split("::`RTTI")[0][6:] - if actualname in found: - ea = idaapi.next_head(ea, end) - continue + # If there's a size mismatch (very rare), then most likely IDA failed to analyze + # A certain vtable, so we can't continue given the high probability of catastrophich failure + if len(winitems) != len(linuxitems): + print(f"[VTABLE IO] {classname} vtable # mismatch - L{len(linuxitems)} W{len(winitems)}. Skipping...") + continue - found.add(actualname) + functable = prep_linux_vtables(linuxitems, winitems, vclass) + + skip = False + for items in zip(functable.items(), winitems): + currlinuxitems, currwinitems = items + thisoffs, ltable = currlinuxitems + if len(ltable) != len(currwinitems.funcs): + print(f"[VTABLE IO] WARNING: {vclass.name} vtable [W{currwinitems.thisoffs}/L{thisoffs}] may be wrong! L{len(ltable)} - W{len(currwinitems.funcs)} = {len(ltable) - len(currwinitems.funcs)}", end="") + if VOPTIONS.cImportOptions & VOptions.SkipMismatches: + print(". Skipping...") + skip = True + break + else: + print() - ea = parse_from_key(linuxtable, actualname, ea + 4) - continue + if skip: + continue + + functables[classname] = functable + + # Write! + if VOPTIONS.cExportOptions != VOptions.ExportOnly: + merge_tables(functable, winitems) - ea = idaapi.next_head(ea, end) + return functables def write_vtables(): - f = idaapi.ask_file(0, "*.json", "Select a file to import from") - if not f: + WaitBox.show("Importing file") + linuxtables = None + try: + with open(VOPTIONS.iFileImport) as f: + linuxtables = json.load(f) + except FileNotFoundError as e: + print(f"[VTABLE IO] File {VOPTIONS.iFileImport} not found.") return - global EXPORT_MODE - EXPORT_MODE = idaapi.ask_buttons("Yes", "Export only (do not type functions)", "No", -1, "Would you like to export virtual tables to a file?") + if not linuxtables: + return - if EXPORT_MODE in (Export_Yes, Export_YesOnly): - exportfile = idaapi.ask_file(1, "*.json", "Select a file to export virtual tables to") - if not exportfile: - return + WaitBox.show("Parsing Windows typeinfo") + winti = read_ti_win() + if winti is None: + return - linuxtable = None - idaapi.replace_wait_box("Importing file") - with open(f) as f: - linuxtable = json.load(f) + WaitBox.show("Generating windows vtables") + wintables = gen_win_tables(winti) - idaapi.replace_wait_box("Comparing vtables") - search_for_vtables(linuxtable) -# for key in get_bcompat_keys(linuxtable): -# parse_from_key(linuxtable, key) + WaitBox.show("Comparing vtables") + functables = compare_tables(wintables, linuxtables) + + if VOPTIONS.cExportOptions in (VOptions.ExportOnly, VOptions.ExportNormal): + if VOPTIONS.iFileExport is None or VOPTIONS.iFileExport == "*.json": + print("[VTABLE IO] No export file specified.") + return + + WaitBox.show("Writing to file") + exporttable = build_export_table(linuxtables, functables) + with open(VOPTIONS.iFileExport, "w") as f: + json.dump(exporttable, f, indent=4, sort_keys=True) - if EXPORT_MODE in (Export_Yes, Export_YesOnly): - with open(exportfile, "w") as f: - json.dump(EXPORT_TABLE, f, indent = 4, sort_keys = True) def main(): os = get_os() + if os == -1: + print(f"Unsupported OS?: {idaapi.get_file_type_name()}") + idaapi.beep() + return - if os == OS_Linux: - read_vtables() - print("Done!") - else: - write_vtables() - if FUNCS: - print("Successfully typed {} virtual functions".format(FUNCS)) - + try: + if os == OS_Linux: + read_vtables_linux() + print("Done!") + elif os == OS_Win: + global VOPTIONS + VOPTIONS = VForm.init_options() + if not VOPTIONS: + return + + write_vtables() + if FUNCS: + print(f"[VTABLE IO] Successfully typed {FUNCS} virtual functions") + else: + print("[VTABLE IO] No functions were typed") + + if EXPORTS: + print(f"[VTABLE IO] Successfully exported {EXPORTS} virtual tables") + + if FUNCS == 0 and EXPORTS == 0: + idaapi.beep() + except: + import traceback + traceback.print_exc() + print("Please file a bug report with supporting information at https://github.com/Scags/IDA-Scripts/issues") + idaapi.beep() + + WaitBox.hide() + +# import cProfile +# cProfile.run("main()", "vtable_io.prof") main() \ No newline at end of file diff --git a/vtable_structs.py b/vtable_structs.py index 6575157..5ce13ec 100644 --- a/vtable_structs.py +++ b/vtable_structs.py @@ -1,75 +1,219 @@ import idc import idautils import idaapi +import ctypes +import time -from time import time -from math import floor +from dataclasses import dataclass OS_Linux = 0 OS_Win = 1 -OS = None - -class OSData(object): - def __init__(self, os): - self.os = os - if os == OS_Linux: - self.segm = idaapi.get_segm_by_name(".rodata") +if idc.__EA64__: + ea_t = ctypes.c_uint64 + ptr_t = ctypes.c_int64 + get_ptr = idaapi.get_qword + FF_PTR = idc.FF_QWORD +else: + ea_t = ctypes.c_uint32 + ptr_t = ctypes.c_int32 + get_ptr = idaapi.get_dword + FF_PTR = idc.FF_DWORD + +def is_ptr(f): return (f & idaapi.MS_CLS) == idc.FF_DATA and (f & idaapi.DT_TYPE) == FF_PTR +def is_off(f): return (f & (idc.FF_0OFF|idc.FF_1OFF)) != 0 + + +_RTTICompleteObjectLocator_fields = [ + ("signature", ctypes.c_uint32), # signature + ("offset", ctypes.c_uint32), # offset of this vtable in complete class (from top) + ("cdOffset", ctypes.c_uint32), # offset of constructor displacement + ("pTypeDescriptor", ctypes.c_uint32), # ref TypeDescriptor + ("pClassHierarchyDescriptor", ctypes.c_uint32), # ref RTTIClassHierarchyDescriptor + ] + +if idc.__EA64__: + _RTTICompleteObjectLocator_fields.append(("pSelf", ctypes.c_uint32)) # ref to object's base + +class RTTICompleteObjectLocator(ctypes.Structure): + _fields_ = _RTTICompleteObjectLocator_fields + + +class TypeDescriptor(ctypes.Structure): + _fields_ = [ + ("pVFTable", ctypes.c_uint32), # reference to RTTI's vftable + ("spare", ctypes.c_uint32), # internal runtime reference + ("name", ctypes.c_uint8), # type descriptor name (no varstruct needed since we don't use this) + ] + + +class RTTIClassHierarchyDescriptor(ctypes.Structure): + _fields_ = [ + ("signature", ctypes.c_uint32), # signature + ("attribs", ctypes.c_uint32), # attributes + ("numBaseClasses", ctypes.c_uint32), # # of items in the array of base classes + ("pBaseClassArray", ctypes.c_uint32), # ref BaseClassArray + ] + + +class RTTIBaseClassDescriptor(ctypes.Structure): + _fields_ = [ + ("pTypeDescriptor", ctypes.c_uint32), # ref TypeDescriptor + ("numContainedBases", ctypes.c_uint32), # # of sub elements within base class array + ("mdisp", ctypes.c_uint32), # member displacement + ("pdisp", ctypes.c_uint32), # vftable displacement + ("vdisp", ctypes.c_uint32), # displacement within vftable + ("attributes", ctypes.c_uint32), # base class attributes + ("pClassDescriptor", ctypes.c_uint32), # ref RTTIClassHierarchyDescriptor + ] + + +class base_class_type_info(ctypes.Structure): + _fields_ = [ + ("basetype", ea_t), # Base class type + ("offsetflags", ea_t), # Offset and info + ] + + +class class_type_info(ctypes.Structure): + _fields_ = [ + ("pVFTable", ea_t), # reference to RTTI's vftable (__class_type_info) + ("pName", ea_t), # ref to type name + ] + +# I don't think this is right, but every case I found looked to be correct +# This might be a vtable? IDA sometimes says it is but not always +# Plus sometimes the flags member is 0x1, so it's not a thisoffs. Weird +class pointer_type_info(class_type_info): + _fields_ = [ + ("flags", ea_t), # Flags or something else + ("pType", ea_t), # ref to type + ] + +class si_class_type_info(class_type_info): + _fields_ = [ + ("pParent", ea_t), # ref to parent type + ] + +class vmi_class_type_info(class_type_info): + _fields_ = [ + ("flags", ctypes.c_uint32), # flags + ("basecount", ctypes.c_uint32), # # of base classes + ("pBaseArray", base_class_type_info), # array of BaseClassArray + ] + +def create_vmi_class_type_info(ea): + bytestr = idaapi.get_bytes(ea, ctypes.sizeof(vmi_class_type_info)) + tinfo = vmi_class_type_info.from_buffer_copy(bytestr) + + # Since this is a varstruct, we create a dynamic class with the proper size and type and return it instead + class vmi_class_type_info_dynamic(class_type_info): + _fields_ = [ + ("flags", ctypes.c_uint32), + ("basecount", ctypes.c_uint32), + ("pBaseArray", base_class_type_info * tinfo.basecount), + ] + + return vmi_class_type_info_dynamic + +# Idiot proof IDA wait box +class WaitBox: + buffertime = 0.0 + shown = False + msg = "" + + @staticmethod + def _show(msg): + WaitBox.msg = msg + if WaitBox.shown: + idaapi.replace_wait_box(msg) else: - self.segm = idaapi.get_segm_by_name(".rdata") - - def parse_vtable(self, ea, typename): - if self.os == OS_Linux: - ea += 8 - funcs = [] - - while ea != idc.BADADDR: - eatemp = ea - offs = idc.get_wide_dword(ea) - # if idaapi.is_unknown(idaapi.get_full_flags(ea)): - # break - - size = idc.get_item_size(ea) # This is bad abd abadbadbadbabdbabdad but there's no other choice here - if size != 4: - # This looks like it might be a bug with IDA - # Random points of a vtable are getting turned into unknown data - if size != 1: - break - - s = "".join(["%02x" % idc.get_wide_byte(ea + i) for i in range(3, -1, -1)]) - if not s.lower().startswith("ffff"): - ea = idaapi.next_not_tail(ea) - continue - - offs = int(s, 16) - ea += 3 - - name = idc.get_name(offs, idaapi.GN_VISIBLE) + idaapi.show_wait_box(msg) + WaitBox.shown = True + + @staticmethod + def show(msg, buffertime = 0.1): + if msg == WaitBox.msg: + return + + if buffertime > 0.0: + if time.time() - WaitBox.buffertime < buffertime: + return + WaitBox.buffertime = time.time() + WaitBox._show(msg) + + @staticmethod + def hide(): + if WaitBox.shown: + idaapi.hide_wait_box() + WaitBox.shown = False +STRUCTS = 0 + +class InfoCache(object): + tinfos = {} + vfuncs = {} + +# Class for windows type info, helps organize things +@dataclass(frozen=True) +class WinTI(object): + typedesc: int + name: str + cols: list[int] + vtables: list[int] + +@dataclass +class VFuncRef: + ea: int # Address to this function + mangledname: str + name: str + postname: str + sname: str + + @staticmethod + def create(ea=idc.BADADDR, mangledname=""): + if InfoCache.vfuncs.get(ea): + return InfoCache.vfuncs[ea] + + name = "" + postname = "" + sname = "" + if mangledname: + name = idaapi.demangle_name(mangledname, idaapi.MNG_SHORT_FORM) if name: - if self.os == OS_Linux: - if not(name.startswith("_Z") or name.startswith("__cxa")) or name.startswith("_ZTV"): - break # If we've exceeded past this vtable - elif name.startswith("??"): - break + postname = get_func_postname(name) + sname = postname.split("(")[0] else: - if self.os == OS_Win: - break - - # dd -offsettothis - # This is even worseworsoewewrosorooese - s = "%02x" % offs - if not s.lower().startswith("ffff"): - ea = idaapi.next_not_tail(ea) - continue + postname = mangledname + sname = mangledname - break - funcs.append(name) + vfunc = VFuncRef(ea, mangledname, name, postname, sname) + InfoCache.vfuncs[ea] = vfunc + return vfunc - ea = idaapi.next_not_tail(ea) +@dataclass(frozen=True) +class VFunc: + funcref: VFuncRef + vaddr: int # Address to this function's reference in its vtable - if len(funcs): - import_vtable(typename, funcs) - return eatemp + @staticmethod + def create(vaddr): + ea = get_ptr(vaddr) + ref = InfoCache.vfuncs.get(ea, VFuncRef.create(ea=ea, mangledname=idaapi.get_name(ea))) + return VFunc(ref, vaddr) + + +def get_os(): + ftype = idaapi.get_file_type_name() + if "ELF" in ftype: + return OS_Linux + elif "PE" in ftype: + return OS_Win + return -1 + +# Read a ctypes class from an ea +def get_class_from_ea(classtype, ea): + bytestr = idaapi.get_bytes(ea, ctypes.sizeof(classtype)) + return classtype.from_buffer_copy(bytestr) def add_struc_ex(name): strucid = idaapi.get_struc_id(name) @@ -78,113 +222,465 @@ def add_struc_ex(name): return strucid -def import_vtable(typename, funcs): - typestrucid = add_struc_ex(typename) - typestruc = idaapi.get_struc(typestrucid) - vstrucid = add_struc_ex(typename + "_vtbl") - vstruc = idaapi.get_struc(vstrucid) +# Anything past Classname:: +# Thank you CTFPlayer::SOCacheUnsubscribed... +def get_func_postname(name): + retname = name + template = 0 + iterback = 0 + for i, c in enumerate(retname): + if c == "<": + template += 1 + elif c == ">": + template -= 1 + # Find ( and break if we're not in a template + elif c == "(" and template == 0: + iterback = i + break + + # Run backwards from ( until we hit a :: + for i in range(iterback, -1, -1): + if retname[i] == ":": + retname = retname[i+1:] + break + + return retname + +def rva_to_ea(ea): + if idc.__EA64__: + return idaapi.get_imagebase() + ea + return ea + +def parse_si_tinfo(ea, tinfos): + for xref in idautils.XrefsTo(ea): + tinfo = get_class_from_ea(si_class_type_info, xref.frm) + tinfos[xref.frm + si_class_type_info.pParent.offset] = tinfo.pParent + + +def parse_pointer_tinfo(ea, tinfos): + for xref in idautils.XrefsTo(ea): + tinfo = get_class_from_ea(pointer_type_info, xref.frm) + tinfos[xref.frm + pointer_type_info.pType.offset] = tinfo.pType + + +def parse_vmi_tinfo(ea, tinfos): + for xref in idautils.XrefsTo(ea): + tinfotype = create_vmi_class_type_info(xref.frm) + tinfo = get_class_from_ea(tinfotype, xref.frm) + + for i in range(tinfo.basecount): + offset = vmi_class_type_info.pBaseArray.offset + i * ctypes.sizeof(base_class_type_info) + basetinfo = get_class_from_ea(base_class_type_info, xref.frm + offset) + tinfos[xref.frm + offset + base_class_type_info.basetype.offset] = basetinfo.basetype + +def get_tinfo_vtables(ea, tinfos, vtables): + if ea == idc.BADADDR: + return + + for tinfoxref in idautils.XrefsTo(ea, idaapi.XREF_DATA): + count = 0 + mangled = idaapi.get_name(tinfoxref.frm) + demangled = idc.demangle_name(mangled, idaapi.MNG_LONG_FORM) + if demangled is None: + print(f"[VTABLE STRUCTS] Invalid name at {tinfoxref.frm:#x}") + continue - loffs = idaapi.get_struc_last_offset(vstruc) - if loffs != idc.BADADDR: - idaapi.del_struc_members(vstruc, 0, loffs + 4) + classname = demangled[len("`typeinfo for'"):] + for xref in idautils.XrefsTo(tinfoxref.frm, idaapi.XREF_DATA): + if xref.frm not in tinfos.keys(): + # If address lies in a function + if idaapi.is_func(idaapi.get_full_flags(xref.frm)): + continue - for i in funcs: - demangled = idc.demangle_name(i, idc.get_inf_attr(idc.INF_SHORT_DN)) - if demangled == None: - demangled = i - else: - demangled = demangled[demangled.find("::")+2:demangled.find("(")] - # As per https://stackoverflow.com/questions/3411771/best-way-to-replace-multiple-characters-in-a-string - # this isn't as slow as you'd think - demangled = demangled\ - .replace("~", "_")\ - .replace("<", "_")\ - .replace(">", "_")\ - .replace(",", "_")\ - .replace("*", "_")\ - .replace(" ", "_")\ - .replace("operator==", "__eq__")\ - .replace("operator+", "__add__")\ - .replace("operator-", "__sub__")\ - .replace("operator*", "__mul__")\ - .replace("operator/", "__div__")\ - .replace("operator%", "__mod__")\ - .replace("operator<<", "__lshift__")\ - .replace("operator>>", "__rshift__")\ - .replace("operator&", "__and__")\ - .replace("operator|", "__or__")\ - .replace("operator^", "__xor__")\ - .replace("operator~", "__invert__") - while 1: - error = idaapi.add_struc_member(vstruc, demangled, idc.BADADDR, idc.FF_DWORD, None, 4) - - if error == 0: - break - - demangled += "_{}".format(hex(idaapi.get_struc_last_offset(vstruc) + 4)[2:]) + count += 1 + vtables[classname] = vtables.get(classname, []) + [xref.frm] + + +def get_tinfo_vtables(ea, tinfos, vtables): + if ea == idc.BADADDR: + return + + for tinfoxref in idautils.XrefsTo(ea, idaapi.XREF_DATA): + count = 0 + mangled = idaapi.get_name(tinfoxref.frm) + demangled = idc.demangle_name(mangled, idaapi.MNG_LONG_FORM) + if demangled is None: + print(f"[VTABLE STRUCTS] Invalid name at {tinfoxref.frm:#x}") + continue + + classname = demangled[len("`typeinfo for'"):] + for xref in idautils.XrefsTo(tinfoxref.frm, idaapi.XREF_DATA): + if xref.frm not in tinfos.keys(): + # If address lies in a function + if idaapi.is_func(idaapi.get_full_flags(xref.frm)): + continue + + count += 1 + vtables[classname] = vtables.get(classname, []) + [xref.frm] + + +def parse_vtables(vtables): + jsondata = {} + ptrsize = ctypes.sizeof(ea_t) + for classname, tables in vtables.items(): + # We don't *need* to do any sort of sorting in Linux and can just capture the thisoffset + # The Windows side of the script can organize later + for ea in tables: + thisoffs = get_ptr(ea - ptrsize) + + funcs = parse_vtable(ea + ptrsize) + # Can be zero if there's an xref in the global offset table (.got) section + # Fortunately the parse_vtable function doesn't grab anything from there + if funcs: + classdata = jsondata.get(classname, {}) + classdata[ptr_t(thisoffs).value] = funcs + jsondata[classname] = classdata + + return jsondata + +def parse_vtable(ea): + funcs = [] + + while ea != idc.BADADDR: + # Using flags sped this up by a lot + # Went from 4 secs to ~1.3 + flags = idaapi.get_full_flags(ea) + if not is_off(flags) or not is_ptr(flags): + break + + if get_os() == OS_Linux and idaapi.has_name(flags): + break + + offs = get_ptr(ea) + fflags = idaapi.get_full_flags(offs) + if not idaapi.is_code(fflags): + break + + if get_os() == OS_Win and not idaapi.has_any_name(fflags): + break + + vfunc = VFunc.create(ea) + # Invalid name, so this can be a "sub_", purecall, or an optimized function + # So to keep vtable_io compat, we grab the comment instead and update the names + if not vfunc.funcref.name: + cmt = idaapi.get_cmt(ea, False) + if cmt and "::" in cmt: + vfunc.funcref.mangledname = None + vfunc.funcref.name = cmt + vfunc.funcref.postname = get_func_postname(vfunc.funcref.name) + vfunc.funcref.sname = vfunc.funcref.postname.split("(")[0] + + funcs.append(vfunc) + + ea = idaapi.next_head(ea, idc.BADADDR) + return funcs + +def calc_member_tinfo(vfunc): + cached = InfoCache.tinfos.get(vfunc.funcref.ea, None) + if cached is not None: + return cached + + # Get the type info of the function if it's present + # In Windows, you can't get the actual tinfo so you can only guess + # and use the rudimentary type info + tinfo = idaapi.tinfo_t() + if not idaapi.get_tinfo(tinfo, vfunc.funcref.ea): + if idaapi.guess_tinfo(tinfo, vfunc.funcref.ea) == idaapi.GUESS_FUNC_FAILED: + tinfo = None + + if tinfo is not None: + tinfo.create_ptr(tinfo) + + InfoCache.tinfos[vfunc.funcref.ea] = tinfo + return tinfo + + +def create_structs(data): + # Now this is an awesome API function that we most certainly need + idaapi.begin_type_updating(idaapi.UTP_STRUCT) + + for classname, vtables in data.items(): + classstrucid = add_struc_ex(classname) + classstruc = idaapi.get_struc(classstrucid) + for thisoffs, vfuncs in vtables.items(): + thisoffs = abs(thisoffs) + postfix = f"{thisoffs:x}" if thisoffs != 0 else "" + structype = f"{classname}_vtbl{postfix}" + structype = idaapi.validate_name(structype, idaapi.VNT_TYPE, idaapi.SN_IDBENC) + + vtablestrucid = add_struc_ex(structype) + vtablestruc = idaapi.get_struc(vtablestrucid) + for i, vfunc in enumerate(vfuncs): + offs = i * ctypes.sizeof(ea_t) + targetname = vfunc.funcref.sname + + currmem = idaapi.get_member(vtablestruc, offs) + if currmem: + # memname = idaapi.get_member_name(currmem.id) + # # Can have a postfix so we use in operator + # if targetname in memname: + # if not currmem.has_ti(): + # tinfo = calc_member_tinfo(vfunc) + # if tinfo is not None: + # idaapi.set_member_tinfo(vtablestruc, currmem, 0, tinfo, 0) + # continue + + # # Sadly if you reorganize a vtable and move a function up, this will fail + # # and you'll have an unneeded postfix + # if not idaapi.set_name(currmem.id, targetname, idaapi.SN_NOCHECK): + # newname = f"{targetname}_{offs:x}" + # if not idaapi.set_name(currmem.id, newname, idaapi.SN_NOCHECK): + # print(f"Failed to set name for {classname}::{vfunc.funcref.sname} ({targetname}) at offset {offs:#x}") + # continue + + # tinfo = calc_member_tinfo(vfunc) + # if tinfo is not None: + # idaapi.set_member_tinfo(vtablestruc, currmem, 0, tinfo, 0) + continue + + else: + opinfo = idaapi.opinfo_t() + # I don't think this does anything + opinfo.ri.flags = idaapi.REF_OFF64 if idc.__EA64__ else idaapi.REF_OFF32 + opinfo.ri.target = vfunc.funcref.ea + opinfo.ri.base = 0 + opinfo.ri.tdelta = 0 + + serr = idaapi.add_struc_member(vtablestruc, targetname, offs, FF_PTR|idc.FF_0OFF|idc.FF_1OFF, opinfo, ctypes.sizeof(ea_t)) + # Failed, so there was either an invalid name or a name collision + if serr == idaapi.STRUC_ERROR_MEMBER_NAME: + targetname = idaapi.validate_name(targetname, idaapi.VNT_IDENT, idaapi.SN_IDBENC) + serr = idaapi.add_struc_member(vtablestruc, targetname, offs, FF_PTR|idc.FF_0OFF|idc.FF_1OFF, opinfo, ctypes.sizeof(ea_t)) + if serr == idaapi.STRUC_ERROR_MEMBER_NAME: + targetname = f"{targetname}_{offs:x}" + serr = idaapi.add_struc_member(vtablestruc, targetname, offs, FF_PTR|idc.FF_0OFF|idc.FF_1OFF, opinfo, ctypes.sizeof(ea_t)) + + if serr != idaapi.STRUC_ERROR_MEMBER_OK: + print(vtablestruc, vtablestrucid) + print(f"Failed to add member {classname}::{vfunc.funcref.sname} ({targetname}) at offset {offs:#x} -> {serr}") + continue + + tinfo = calc_member_tinfo(vfunc) + if tinfo is not None: + mem = idaapi.get_member(vtablestruc, offs) + idaapi.set_member_tinfo(vtablestruc, mem, 0, tinfo, 0) + + vmember = idaapi.get_member(classstruc, thisoffs) + if not vmember: + if idaapi.add_struc_member(classstruc, f"vftbl{postfix}", thisoffs, idc.FF_DATA|FF_PTR, None, ctypes.sizeof(ea_t)) == idaapi.STRUC_ERROR_MEMBER_OK: + global STRUCTS + STRUCTS += 1 + tinfo = idaapi.tinfo_t() + if idaapi.guess_tinfo(tinfo, vtablestrucid) != idaapi.GUESS_FUNC_FAILED: + mem = idaapi.get_member(classstruc, thisoffs) + tinfo.create_ptr(tinfo) + idaapi.set_member_tinfo(classstruc, mem, 0, tinfo, 0) + +def read_vtables_linux(): + WaitBox.show("Parsing typeinfo") + + # Step 1 and 2, crawl xrefs and stick the inherited class type infos into a structure + # After this, we can run over the xrefs again and see which xrefs come from another structure + # The remaining xrefs are either vtables or weird math in a function + xreftinfos = {} + + def getparse(name, fn, quiet=False): + tinfo = idc.get_name_ea_simple(name) + if tinfo == idc.BADADDR and not quiet: + print(f"[VTABLE STRUCTS] Type info {name} not found. Skipping...") + return None + + if fn is not None: + fn(tinfo, xreftinfos) + return tinfo + + # Don't need to parse base classes + tinfo = getparse("_ZTVN10__cxxabiv117__class_type_infoE", None) + tinfo_pointer = getparse("_ZTVN10__cxxabiv119__pointer_type_infoE", parse_pointer_tinfo, True) + tinfo_si = getparse("_ZTVN10__cxxabiv120__si_class_type_infoE", parse_si_tinfo) + tinfo_vmi = getparse("_ZTVN10__cxxabiv121__vmi_class_type_infoE", parse_vmi_tinfo) + + if len(xreftinfos) == 0: + print("[VTABLE STRUCTS] No type infos found. Are you sure you're in a C++ binary?") + return + + # Step 3, crawl xrefs to again and if the xref is not in the type info structure, then it's a vtable + WaitBox.show("Discovering vtables") + vtables = {} + get_tinfo_vtables(tinfo, xreftinfos, vtables) + get_tinfo_vtables(tinfo_pointer, xreftinfos, vtables) + get_tinfo_vtables(tinfo_si, xreftinfos, vtables) + get_tinfo_vtables(tinfo_vmi, xreftinfos, vtables) + + # Now, we have a list of vtables and their respective classes + WaitBox.show("Parsing vtables") + data = parse_vtables(vtables) + + WaitBox.show("Creating structs") + create_structs(data) + +def parse_ti(ea, tis): + typedesc = ea + flags = idaapi.get_full_flags(ea) + if idaapi.is_code(flags): + return try: - ti = idaapi.tinfo_t() - idaapi.parse_decl(ti, None, typename + "_vtbl;", 0) - ti.create_ptr(ti) - idaapi.add_struc_member(typestruc, "__vftable", 0, idc.FF_DWORD, None, 4) - idaapi.set_member_tinfo(typestruc, idaapi.get_member(typestruc, 0), 0, ti, 0) + classname = idaapi.demangle_name(idc.get_name(ea), idaapi.MNG_SHORT_FORM) + classname = classname.removeprefix("class ") + classname = classname.removeprefix("struct TypeDescriptor ") + classname = classname.removesuffix(" `RTTI Type Descriptor'") except: - print("Prevented a terrible, horrible, no good, very bad crash with {}!".format(typename)) + print(f"[VTABLE STRUCTS] Invalid vtable name at {ea:#x}") + return + + if classname in tis.keys(): + return -def is_vtable(ea): - currname = idc.get_name(ea) - if not currname: - return "" + vtables = [] - currname = idc.demangle_name(currname, idc.get_inf_attr(idc.INF_SHORT_DN)) - if not currname: - return "" + # Then figure out which xref is a/the COL + for xref in idautils.XrefsTo(typedesc): + ea = xref.frm + flags = idaapi.get_full_flags(ea) - # These break everything, so we won't support them, yet - if "(" in currname or "<" in currname: - return "" + # Dynamic cast + if idaapi.is_code(flags): + continue - if currname.startswith("`vtable for'"): - currname = currname[12:] - elif currname.endswith("::`vftable'"): - currname = currname[6:-11] - else: - return "" + name = idaapi.get_name(ea) + # Class type descriptor and/or random global data + # Kind of a hack but let's assume no one will rename these + if name and (name.startswith("??_R1") or name.startswith("off_")): + continue - # Anonymous namespace? - if "'" in currname or "`" in currname: - return "" - return currname + ea -= 4 + name = idaapi.get_name(ea) + # Catchable types + if name and name.startswith("__CT"): + continue -def get_os(): - return OSData(OS_Linux if idaapi.get_root_filename().endswith(".so") else OS_Win) - -UPDATE_TIME = time() -def update_window(s): - global UPDATE_TIME - currtime = time() - if currtime - UPDATE_TIME > 0.2: - idaapi.replace_wait_box(s) - UPDATE_TIME = currtime - -def search_for_vtables(): - startea = OS.segm.start_ea - ea = startea - endea = OS.segm.end_ea -# print(ea, endea) - - while ea < endea and ea != idc.BADADDR: - name = is_vtable(ea) - if name: - update_window("Importing {} | {}%".format(name, floor((ea - startea) / float(endea - startea) * 100.0 * 10.0) / 10.0)) - ea = OS.parse_vtable(ea, name) + # COL + ea -= 8 + workaround = False + if idaapi.is_unknown(idaapi.get_full_flags(ea)): + print(f"[VTABLE STRUCTS] Possible COL is unknown at {ea:#x}. This may be an unreferenced vtable. Trying workaround...") + # This might be a bug with IDA, but sometimes the COL isn't analyzed + # If there's still a reference, then we can still trace back + # If there is a list of functions (or even just one), then it's probably a vtable, + # but we'll still warn the user that it might be garbage + refs = list(idautils.XrefsTo(ea)) + if len(refs) == 1: + vtable = refs[0].frm + ctypes.sizeof(ea_t) + tryfunc = get_ptr(vtable + ctypes.sizeof(ea_t)) + funcflags = idaapi.get_full_flags(tryfunc) + if idaapi.is_func(funcflags): + print(f" - Workaround successful. Please assure that {vtable:#x} is a vtable.") + workaround = True + + if not workaround: + print(" - Workaround failed. Skipping...") + continue + + name = idaapi.get_name(ea) + if not workaround and (not name or not name.startswith("??_R4")): + print(f"[VTABLE STRUCTS] Invalid name at {ea:#x}. Possible unwind info. Ignoring...") + continue + + # In 64-bit PEs, the COL references itself, remove this + refs = list(idautils.XrefsTo(ea)) + if idc.__EA64__: + for n in range(len(refs)-1, -1, -1): + if refs[n].frm == ea + RTTICompleteObjectLocator.pSelf.offset: + del refs[n] + + # Now that we have the COL, we can use it to find the vtable that utilizes it and its thisoffs + if len(refs) != 1: + print(f"[VTABLE STRUCTS] Multiple vtables point to same COL - {name} at {ea:#x}") + continue + + vtable = refs[0].frm + ctypes.sizeof(ea_t) + thisoffs = idaapi.get_dword(ea + RTTICompleteObjectLocator.offset.offset) + vtables.append((thisoffs, vtable)) + + # Can have RTTI without a vtable + tis[classname] = {thisoffs: parse_vtable(vaddr) for thisoffs, vaddr in vtables} + +def string_method(type_info, vtabledata): + for string in idautils.Strings(): + sstr = str(string) + if not sstr.startswith(".?AV"): + continue + + ea = string.ea + ea -= TypeDescriptor.name.offset + trytinfo = rva_to_ea(idaapi.get_wide_dword(ea)) + # This is a weird string that isn't a part of a type descriptor + if trytinfo != type_info: continue - ea = idaapi.next_head(ea, endea) + + parse_ti(ea, vtabledata) + +def read_ti_win(): + # Step 1, get the vftable of type_info + type_info = idc.get_name_ea_simple("??_7type_info@@6B@") + if type_info is None: + print("[VTABLE STRUCTS] type_info not found. Are you sure you're in a C++ binary?") + return None + + vtabledata = {} + + # Step 2, get all xrefs to type_info + # Get type descriptor + for typedesc in idautils.XrefsTo(type_info): + parse_ti(typedesc.frm, vtabledata) + + # In some cases, the IDA either fails to reference some type descriptors with type_info + # Not exactly sure why, but it lists the ea of type_info as a "hash" when in reality it isn't + # A workaround for this is to parse type descriptor strings (".?AV*"), load up their references, and + # walk backwards to the start of what is supposed to be the type descriptor, and assure that + # its DWORD is the type_info vtable + # I only found this to be a problem in NMRIH, so it appears to be rare + WaitBox.show("Performing string parsing") + string_method(type_info, vtabledata) + + return vtabledata + +def read_vtables_win(): + WaitBox.show("Parsing Windows typeinfo") + data = read_ti_win() + + if data is None: + return + + WaitBox.show("Creating structs") + create_structs(data) def main(): - global OS - OS = get_os() - search_for_vtables() + os = get_os() + try: + if os == OS_Linux: + read_vtables_linux() + elif os == OS_Win: + read_vtables_win() + else: + print(f"Unsupported OS?: {idaapi.get_file_type_name()}") + idaapi.beep() + + if STRUCTS: + print(f"Successfully imported {STRUCTS} virtual structures") + else: + print("No virtual structures imported") + idaapi.beep() + except: + import traceback + traceback.print_exc() + print("Please file a bug report with supporting information at https://github.com/Scags/IDA-Scripts/issues") + idaapi.beep() + + idaapi.end_type_updating(idaapi.UTP_STRUCT) + WaitBox.hide() -main() \ No newline at end of file +# import cProfile +# cProfile.run("main()", "vtable_structs.prof") +main()