From 60b7e716a59afea16853c68cf58cc2fb7b72ffce Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Sat, 27 Jul 2024 08:47:31 -0700 Subject: [PATCH 1/2] Only include one case when compiling a stencil --- Tools/jit/_targets.py | 13 ++++++++++--- Tools/jit/template.c | 6 ++++-- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index 5604c429bcf8ad..e998c817d554cb 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -182,15 +182,22 @@ async def _compile( async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]: generated_cases = PYTHON_EXECUTOR_CASES_C_H.read_text() - opnames = sorted(re.findall(r"\n {8}case (\w+): \{\n", generated_cases)) + cases_and_opnames = sorted( + re.findall( + r"\n {8}(case (\w+): \{\n.*?\n {8}\})", generated_cases, flags=re.DOTALL + ) + ) tasks = [] with tempfile.TemporaryDirectory() as tempdir: work = pathlib.Path(tempdir).resolve() async with asyncio.TaskGroup() as group: coro = self._compile("trampoline", TOOLS_JIT / "trampoline.c", work) tasks.append(group.create_task(coro, name="trampoline")) - for opname in opnames: - coro = self._compile(opname, TOOLS_JIT_TEMPLATE_C, work) + template = TOOLS_JIT_TEMPLATE_C.read_text() + for case, opname in cases_and_opnames: + c = work / f"{opname}.c" + c.write_text(template.replace("CASE", case)) + coro = self._compile(opname, c, work) tasks.append(group.create_task(coro, name=opname)) return {task.get_name(): task.result() for task in tasks} diff --git a/Tools/jit/template.c b/Tools/jit/template.c index ec7d033e89deff..6cf15085f79933 100644 --- a/Tools/jit/template.c +++ b/Tools/jit/template.c @@ -84,6 +84,8 @@ do { \ #undef WITHIN_STACK_BOUNDS #define WITHIN_STACK_BOUNDS() 1 +#define TIER_TWO 2 + _Py_CODEUNIT * _JIT_ENTRY(_PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate) { @@ -107,9 +109,9 @@ _JIT_ENTRY(_PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState OPT_STAT_INC(uops_executed); UOP_STAT_INC(uopcode, execution_count); - // The actual instruction definitions (only one will be used): switch (uopcode) { -#include "executor_cases.c.h" + // The actual instruction definition gets inserted here: + CASE default: Py_UNREACHABLE(); } From a32111d3e1262097e633408e3ab130adef0c4736 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Tue, 13 Aug 2024 20:15:16 -0700 Subject: [PATCH 2/2] Add a comment about faster compilation --- Tools/jit/_targets.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index e998c817d554cb..f67fde3ea17285 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -195,6 +195,11 @@ async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]: tasks.append(group.create_task(coro, name="trampoline")) template = TOOLS_JIT_TEMPLATE_C.read_text() for case, opname in cases_and_opnames: + # Write out a copy of the template with *only* this case + # inserted. This is about twice as fast as #include'ing all + # of executor_cases.c.h each time we compile (since the C + # compiler wastes a bunch of time parsing the dead code for + # all of the other cases): c = work / f"{opname}.c" c.write_text(template.replace("CASE", case)) coro = self._compile(opname, c, work)