From a7c159a57759a4bcffd15b6e239c8f2b1ee3f61e Mon Sep 17 00:00:00 2001 From: nicolaas Date: Tue, 31 Mar 2026 17:46:02 -0400 Subject: [PATCH 1/5] Updates our use of open_program 1. Set the language 2. Set the image base address 3. Only perform Ghidra analysis after setting the image base address. --- .../components/pyghidra_components.py | 7 +++++-- .../ofrak_pyghidra/standalone/pyghidra_analysis.py | 13 ++++++++++--- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/disassemblers/ofrak_pyghidra/src/ofrak_pyghidra/components/pyghidra_components.py b/disassemblers/ofrak_pyghidra/src/ofrak_pyghidra/components/pyghidra_components.py index 83ebe1611..2ca0660ba 100644 --- a/disassemblers/ofrak_pyghidra/src/ofrak_pyghidra/components/pyghidra_components.py +++ b/disassemblers/ofrak_pyghidra/src/ofrak_pyghidra/components/pyghidra_components.py @@ -33,7 +33,6 @@ from ofrak_pyghidra.standalone.pyghidra_analysis import unpack, decompile_all_functions from ofrak_type.error import NotFoundError - _GHIDRA_AUTO_LOADABLE_FORMATS = [Elf, Ihex, Pe] @@ -318,7 +317,11 @@ async def analyze(self, resource: Resource, config=None): analysis = self.analysis_store.get_analysis(program_r.get_id()) if "decompilation" not in analysis[cb_key]: program_file = analysis["metadata"]["path"] - for cb_key, decomp in decompile_all_functions(program_file, None).items(): + language = analysis["metadata"]["language"] + base_addr = analysis["metadata"]["base_address"] + for cb_key, decomp in decompile_all_functions( + program_file, language, base_addr + ).items(): analysis[cb_key]["decompilation"] = decomp self.analysis_store.store_analysis(program_r.get_id(), analysis) else: diff --git a/disassemblers/ofrak_pyghidra/src/ofrak_pyghidra/standalone/pyghidra_analysis.py b/disassemblers/ofrak_pyghidra/src/ofrak_pyghidra/standalone/pyghidra_analysis.py index 1d3c428dd..13576c7d4 100644 --- a/disassemblers/ofrak_pyghidra/src/ofrak_pyghidra/standalone/pyghidra_analysis.py +++ b/disassemblers/ofrak_pyghidra/src/ofrak_pyghidra/standalone/pyghidra_analysis.py @@ -44,7 +44,7 @@ def unpack( program_file = os.path.join(tempdir, "program") with open(program_file, "wb") as f: f.write(b"\x00") - with pyghidra.open_program(program_file, language=language) as flat_api: + with pyghidra.open_program(program_file, language=language, analyze=False) as flat_api: LOGGER.info("Analysis completed. Caching analysis to JSON") # Java packages must be imported after pyghidra.start or pyghidra.open_program from ghidra.app.decompiler import DecompInterface, DecompileOptions @@ -110,6 +110,7 @@ def unpack( ) program.setImageBase(new_base_addr, True) LOGGER.info(f"Rebased program address to {hex(base_address)}") + GhidraProject.analyze(program) main_dictionary: Dict[str, Any] = {} code_regions = _unpack_program(flat_api) @@ -117,6 +118,7 @@ def unpack( main_dictionary["metadata"]["backend"] = "ghidra" main_dictionary["metadata"]["decompiled"] = decompiled main_dictionary["metadata"]["path"] = program_file + main_dictionary["metadata"]["language"] = language if base_address is not None: main_dictionary["metadata"]["base_address"] = base_address with open(program_file, "rb") as fh: @@ -475,13 +477,18 @@ def _decompile(func, decomp_interface, task_monitor): return decomp -def decompile_all_functions(program_file, language): - with pyghidra.open_program(program_file, language=language) as flat_api: +def decompile_all_functions(program_file, language, base_addr): + with pyghidra.open_program(program_file, language=language, analyze=False) as flat_api: from ghidra.app.decompiler import DecompInterface, DecompileOptions from ghidra.util.task import TaskMonitor + from ghidra.base.project import GhidraProject decomp = DecompInterface() program = flat_api.getCurrentProgram() + address_factory = program.getAddressFactory() + new_base_addr = address_factory.getDefaultAddressSpace().getAddress(hex(base_addr)) + program.setImageBase(new_base_addr, True) + GhidraProject.analyze(program) prog_options = DecompileOptions() prog_options.grabFromProgram(program) decomp.setOptions(prog_options) From c37561f2fdb912c00ae94d807d24cad4de4472a7 Mon Sep 17 00:00:00 2001 From: nicolaas Date: Tue, 31 Mar 2026 19:29:57 -0400 Subject: [PATCH 2/5] Handle case when base address is None --- .../src/ofrak_pyghidra/components/pyghidra_components.py | 4 +++- .../src/ofrak_pyghidra/standalone/pyghidra_analysis.py | 7 ++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/disassemblers/ofrak_pyghidra/src/ofrak_pyghidra/components/pyghidra_components.py b/disassemblers/ofrak_pyghidra/src/ofrak_pyghidra/components/pyghidra_components.py index 2ca0660ba..d876b26c2 100644 --- a/disassemblers/ofrak_pyghidra/src/ofrak_pyghidra/components/pyghidra_components.py +++ b/disassemblers/ofrak_pyghidra/src/ofrak_pyghidra/components/pyghidra_components.py @@ -318,7 +318,9 @@ async def analyze(self, resource: Resource, config=None): if "decompilation" not in analysis[cb_key]: program_file = analysis["metadata"]["path"] language = analysis["metadata"]["language"] - base_addr = analysis["metadata"]["base_address"] + base_addr = None + if "base_address" in analysis["metadata"]: + base_addr = analysis["metadata"]["base_address"] for cb_key, decomp in decompile_all_functions( program_file, language, base_addr ).items(): diff --git a/disassemblers/ofrak_pyghidra/src/ofrak_pyghidra/standalone/pyghidra_analysis.py b/disassemblers/ofrak_pyghidra/src/ofrak_pyghidra/standalone/pyghidra_analysis.py index 13576c7d4..1114f9a79 100644 --- a/disassemblers/ofrak_pyghidra/src/ofrak_pyghidra/standalone/pyghidra_analysis.py +++ b/disassemblers/ofrak_pyghidra/src/ofrak_pyghidra/standalone/pyghidra_analysis.py @@ -485,9 +485,10 @@ def decompile_all_functions(program_file, language, base_addr): decomp = DecompInterface() program = flat_api.getCurrentProgram() - address_factory = program.getAddressFactory() - new_base_addr = address_factory.getDefaultAddressSpace().getAddress(hex(base_addr)) - program.setImageBase(new_base_addr, True) + if base_addr is not None: + address_factory = program.getAddressFactory() + new_base_addr = address_factory.getDefaultAddressSpace().getAddress(hex(base_addr)) + program.setImageBase(new_base_addr, True) GhidraProject.analyze(program) prog_options = DecompileOptions() prog_options.grabFromProgram(program) From 265d85394c1837874c12b7662223c532fbfb3c80 Mon Sep 17 00:00:00 2001 From: nicolaas Date: Wed, 1 Apr 2026 08:47:53 -0400 Subject: [PATCH 3/5] Perform analyze only once --- .../src/ofrak_pyghidra/standalone/pyghidra_analysis.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/disassemblers/ofrak_pyghidra/src/ofrak_pyghidra/standalone/pyghidra_analysis.py b/disassemblers/ofrak_pyghidra/src/ofrak_pyghidra/standalone/pyghidra_analysis.py index 1114f9a79..3f74232be 100644 --- a/disassemblers/ofrak_pyghidra/src/ofrak_pyghidra/standalone/pyghidra_analysis.py +++ b/disassemblers/ofrak_pyghidra/src/ofrak_pyghidra/standalone/pyghidra_analysis.py @@ -90,9 +90,6 @@ def unpack( logging.warning( f"Failed to create memory block at 0x{region['virtual_address']:x}: {e}" ) - # Analyze all - analysis_mgr = program.getOptions("Analyzers") - flat_api.analyzeAll(program) # If base_address is provided, rebase the program if base_address is not None: # Convert base_address to int if it's a string @@ -110,7 +107,7 @@ def unpack( ) program.setImageBase(new_base_addr, True) LOGGER.info(f"Rebased program address to {hex(base_address)}") - GhidraProject.analyze(program) + GhidraProject.analyze(program) main_dictionary: Dict[str, Any] = {} code_regions = _unpack_program(flat_api) From 05147b607cd43c3b83e470ccf7b196de89fc743c Mon Sep 17 00:00:00 2001 From: nicolaas Date: Wed, 1 Apr 2026 10:02:18 -0400 Subject: [PATCH 4/5] Add missing GhidraProject import --- .../src/ofrak_pyghidra/standalone/pyghidra_analysis.py | 1 + 1 file changed, 1 insertion(+) diff --git a/disassemblers/ofrak_pyghidra/src/ofrak_pyghidra/standalone/pyghidra_analysis.py b/disassemblers/ofrak_pyghidra/src/ofrak_pyghidra/standalone/pyghidra_analysis.py index 3f74232be..e055b1ac0 100644 --- a/disassemblers/ofrak_pyghidra/src/ofrak_pyghidra/standalone/pyghidra_analysis.py +++ b/disassemblers/ofrak_pyghidra/src/ofrak_pyghidra/standalone/pyghidra_analysis.py @@ -51,6 +51,7 @@ def unpack( from ghidra.util.task import TaskMonitor from ghidra.program.model.block import BasicBlockModel from ghidra.program.model.symbol import RefType + from ghidra.base.project import GhidraProject from java.math import BigInteger from java.io import ByteArrayInputStream From 181c82ddba448467bf2ba5d2f625197eacb1a6a2 Mon Sep 17 00:00:00 2001 From: nicolaas Date: Wed, 1 Apr 2026 11:27:46 -0400 Subject: [PATCH 5/5] Correct scope of program variable --- .../src/ofrak_pyghidra/standalone/pyghidra_analysis.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/disassemblers/ofrak_pyghidra/src/ofrak_pyghidra/standalone/pyghidra_analysis.py b/disassemblers/ofrak_pyghidra/src/ofrak_pyghidra/standalone/pyghidra_analysis.py index e055b1ac0..edf5fc48c 100644 --- a/disassemblers/ofrak_pyghidra/src/ofrak_pyghidra/standalone/pyghidra_analysis.py +++ b/disassemblers/ofrak_pyghidra/src/ofrak_pyghidra/standalone/pyghidra_analysis.py @@ -55,9 +55,9 @@ def unpack( from java.math import BigInteger from java.io import ByteArrayInputStream + program = flat_api.getCurrentProgram() # If memory_regions are provided, delete all data and create new regions: if memory_regions: - program = flat_api.getCurrentProgram() memory = program.getMemory() address_factory = program.getAddressFactory() default_space = address_factory.getDefaultAddressSpace() @@ -101,7 +101,6 @@ def unpack( base_address = int(base_address) # Rebase the program to the specified base address - program = flat_api.getCurrentProgram() address_factory = program.getAddressFactory() new_base_addr = address_factory.getDefaultAddressSpace().getAddress( hex(base_address)