From 9ea219d9463014d09e79b195b86ea36500eeee00 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Thu, 16 Apr 2026 15:22:12 -0400 Subject: [PATCH 1/6] Add Cython ABI checking tool to toolshed --- toolshed/check_cython_abi.py | 167 +++++++++++++++++++++++++++++++++++ 1 file changed, 167 insertions(+) create mode 100644 toolshed/check_cython_abi.py diff --git a/toolshed/check_cython_abi.py b/toolshed/check_cython_abi.py new file mode 100644 index 0000000000..8909a4bfed --- /dev/null +++ b/toolshed/check_cython_abi.py @@ -0,0 +1,167 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + + +""" +Tool to check for Cython ABI changes in a given package. + +The workflow is basically: + +1) Build and install a "clean" upstream version of the package. + +2) Generate ABI files from the package by running (in the same venv in which the + package is installed), where `package_name` is the import path to the package, + e.g. `cuda.bindings`: + + python check_cython_abi.py generate + +3) Checkout a version with the changes to by tested, and build and install. + +4) Check the ABI against the previously generated files by running: + + python check_cython_abi.py check +""" + +import importlib +import json +import re +import sysconfig +from pathlib import Path + +EXT_SUFFIX = sysconfig.get_config_var("EXT_SUFFIX") +ABI_SUFFIX = ".abi.json" + + +def short_stem(name: str) -> str: + return name[: name.find(".")] + + +def get_package_path(package_name: str) -> Path: + package = importlib.import_module(package_name) + return Path(package.__file__).parent + + +def import_from_path(root_package: str, root_dir: Path, path: Path) -> object: + path = path.relative_to(root_dir) + parts = [root_package] + list(path.parts[:-1]) + [short_stem(path.name)] + return importlib.import_module(".".join(parts)) + + +def so_path_to_abi_path(so_path: Path, build_dir: Path, abi_dir: Path) -> Path: + abi_name = short_stem(so_path.name) + ABI_SUFFIX + return abi_dir / so_path.parent.relative_to(build_dir) / abi_name + + +def abi_path_to_so_path(abi_path: Path, build_dir: Path, abi_dir: Path) -> Path: + so_name = short_stem(abi_path.name) + EXT_SUFFIX + return build_dir / abi_path.parent.relative_to(abi_dir) / so_name + + +def pyx_capi_to_json(d: dict[str, object]) -> dict[str, str]: + """ + Converts the __pyx_capi__ dictionary to a JSON-serializable dictionary, + removing any memory addresses that are irrelevant for comparison. + """ + + def extract_name(v: object) -> str: + v = str(v) + match = re.match(r'', v) + assert match, f"Could not parse __pyx_capi__ entry: {v}" + return match.group(1) + + # Sort the dictionary by keys to make diffs in the JSON files smaller + return {k: extract_name(d[k]) for k in sorted(d.keys())} + + +def check_abi(expected: dict[str, str], found: dict[str, str]) -> tuple[bool, bool]: + has_errors = False + has_allowed_changes = False + for k, v in expected.items(): + if k not in found: + print(f" Missing symbol: {k}") + has_errors = True + elif found[k] != v: + print(f" Changed symbol: {k}: expected {v}, got {found[k]}") + has_errors = True + for k, v in found.items(): + if k not in expected: + print(f" Added symbol: {k}") + has_allowed_changes = True + return has_errors, has_allowed_changes + + +def check(package: str, abi_dir: Path) -> tuple[bool, bool]: + build_dir = get_package_path(package) + + has_errors = False + has_allowed_changes = False + for abi_path in Path(abi_dir).glob(f"**/*{ABI_SUFFIX}"): + so_path = abi_path_to_so_path(abi_path, build_dir, abi_dir) + if so_path.is_file(): + module = import_from_path(package, build_dir, so_path) + if hasattr(module, "__pyx_capi__"): + found_json = pyx_capi_to_json(module.__pyx_capi__) + with open(abi_path, encoding="utf-8") as f: + expected_json = json.load(f) + print(f"Checking module: {so_path.relative_to(build_dir)}") + check_errors, check_allowed_changes = check_abi(expected_json, found_json) + has_errors |= check_errors + has_allowed_changes |= check_allowed_changes + else: + print(f"Module no longer has an exposed ABI: {so_path.relative_to(build_dir)}") + has_errors = True + else: + print(f"No module found for {abi_path.relative_to(abi_dir)}") + has_errors = True + + for so_path in Path(build_dir).glob(f"**/*{EXT_SUFFIX}"): + module = import_from_path(package, build_dir, so_path) + if hasattr(module, "__pyx_capi__"): + abi_path = so_path_to_abi_path(so_path, build_dir, abi_dir) + if not abi_path.is_file(): + print(f"New module added {so_path.relative_to(build_dir)}") + has_allowed_changes = True + + if has_errors: + print("ERRORS FOUND") + elif has_allowed_changes: + print("Allowed changes found.") + + +def regenerate(package: str, abi_dir: Path) -> None: + if not abi_dir.is_dir(): + abi_dir.mkdir(parents=True, exist_ok=True) + + build_dir = get_package_path(package) + for so_path in Path(build_dir).glob(f"**/*{EXT_SUFFIX}"): + print(f"Generating ABI from {so_path.relative_to(build_dir)}") + module = import_from_path(package, build_dir, so_path) + if hasattr(module, "__pyx_capi__"): + abi_path = so_path_to_abi_path(so_path, build_dir, abi_dir) + abi_path.parent.mkdir(parents=True, exist_ok=True) + with open(abi_path, "w", encoding="utf-8") as f: + json.dump(pyx_capi_to_json(module.__pyx_capi__), f, indent=2) + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser( + prog="check_cython_abi", description="Checks for changes in the Cython ABI of a given package" + ) + + subparsers = parser.add_subparsers() + + regen_parser = subparsers.add_parser("generate", help="Regenerate the ABI files") + regen_parser.set_defaults(func=regenerate) + regen_parser.add_argument("package", help="Python package to collect data from") + regen_parser.add_argument("dir", help="Output directory to save data to") + + check_parser = subparsers.add_parser("check", help="Check the API against existing ABI files") + check_parser.set_defaults(func=check) + check_parser.add_argument("package", help="Python package to collect data from") + check_parser.add_argument("dir", help="Input directory to read data from") + + args = parser.parse_args() + args.func(args.package, Path(args.dir)) From d3c9c2e6b748e5c3759f51e8449f33f01acbd171 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Thu, 16 Apr 2026 15:54:04 -0400 Subject: [PATCH 2/6] Update toolshed/check_cython_abi.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- toolshed/check_cython_abi.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/toolshed/check_cython_abi.py b/toolshed/check_cython_abi.py index 8909a4bfed..4a165850ff 100644 --- a/toolshed/check_cython_abi.py +++ b/toolshed/check_cython_abi.py @@ -67,7 +67,8 @@ def pyx_capi_to_json(d: dict[str, object]) -> dict[str, str]: def extract_name(v: object) -> str: v = str(v) match = re.match(r'', v) - assert match, f"Could not parse __pyx_capi__ entry: {v}" + if match is None: + raise ValueError(f"Could not parse __pyx_capi__ entry: {v}") return match.group(1) # Sort the dictionary by keys to make diffs in the JSON files smaller From c24a3219a3b192012f0e38079d55833a0f9e365c Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Thu, 16 Apr 2026 15:56:46 -0400 Subject: [PATCH 3/6] Update toolshed/check_cython_abi.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- toolshed/check_cython_abi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/toolshed/check_cython_abi.py b/toolshed/check_cython_abi.py index 4a165850ff..8cd2439f82 100644 --- a/toolshed/check_cython_abi.py +++ b/toolshed/check_cython_abi.py @@ -16,7 +16,7 @@ python check_cython_abi.py generate -3) Checkout a version with the changes to by tested, and build and install. +3) Checkout a version with the changes to be tested, and build and install. 4) Check the ABI against the previously generated files by running: From 245dd3a6ef6b4b6eb81698e63b987c520baaa1e6 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Thu, 16 Apr 2026 15:57:03 -0400 Subject: [PATCH 4/6] Update toolshed/check_cython_abi.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- toolshed/check_cython_abi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/toolshed/check_cython_abi.py b/toolshed/check_cython_abi.py index 8cd2439f82..b036bd0e51 100644 --- a/toolshed/check_cython_abi.py +++ b/toolshed/check_cython_abi.py @@ -34,7 +34,7 @@ def short_stem(name: str) -> str: - return name[: name.find(".")] + return name.split(".", 1)[0] def get_package_path(package_name: str) -> Path: From cefb6954e8bed8aa1e0a8d6ad93805514b5b38a8 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Thu, 16 Apr 2026 15:59:22 -0400 Subject: [PATCH 5/6] Address comments in PR --- toolshed/check_cython_abi.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/toolshed/check_cython_abi.py b/toolshed/check_cython_abi.py index b036bd0e51..ee73c47eb8 100644 --- a/toolshed/check_cython_abi.py +++ b/toolshed/check_cython_abi.py @@ -26,6 +26,7 @@ import importlib import json import re +import sys import sysconfig from pathlib import Path @@ -92,7 +93,7 @@ def check_abi(expected: dict[str, str], found: dict[str, str]) -> tuple[bool, bo return has_errors, has_allowed_changes -def check(package: str, abi_dir: Path) -> tuple[bool, bool]: +def check(package: str, abi_dir: Path) -> bool: build_dir = get_package_path(package) has_errors = False @@ -126,11 +127,13 @@ def check(package: str, abi_dir: Path) -> tuple[bool, bool]: if has_errors: print("ERRORS FOUND") + return True elif has_allowed_changes: print("Allowed changes found.") + return False -def regenerate(package: str, abi_dir: Path) -> None: +def regenerate(package: str, abi_dir: Path) -> bool: if not abi_dir.is_dir(): abi_dir.mkdir(parents=True, exist_ok=True) @@ -144,6 +147,8 @@ def regenerate(package: str, abi_dir: Path) -> None: with open(abi_path, "w", encoding="utf-8") as f: json.dump(pyx_capi_to_json(module.__pyx_capi__), f, indent=2) + return False + if __name__ == "__main__": import argparse @@ -165,4 +170,9 @@ def regenerate(package: str, abi_dir: Path) -> None: check_parser.add_argument("dir", help="Input directory to read data from") args = parser.parse_args() - args.func(args.package, Path(args.dir)) + if hasattr(args, "func"): + if args.func(args.package, Path(args.dir)): + sys.exit(1) + else: + parser.print_help() + sys.exit(1) From 720f89074388510eb8d932e36acb77ea435edd22 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Fri, 17 Apr 2026 09:40:12 -0400 Subject: [PATCH 6/6] Address some of the comments in the PR --- toolshed/check_cython_abi.py | 90 ++++++++++++++++++++++++++---------- 1 file changed, 65 insertions(+), 25 deletions(-) diff --git a/toolshed/check_cython_abi.py b/toolshed/check_cython_abi.py index ee73c47eb8..2199d526a3 100644 --- a/toolshed/check_cython_abi.py +++ b/toolshed/check_cython_abi.py @@ -6,6 +6,13 @@ """ Tool to check for Cython ABI changes in a given package. +There are different types of ABI changes, only one of which is covered by this tool: + +- cdef function signatures (capsule strings) — covered here +- cdef class struct size (tp_basicsize) — not covered +- cdef class vtable layout / method reordering — not covered, and this one fails as silent UB rather than an import-time error +- Fused specialization ordering — partially covered (reorders manifest as capsule-name deltas, but the mapping is non-obvious) + The workflow is basically: 1) Build and install a "clean" upstream version of the package. @@ -23,9 +30,9 @@ python check_cython_abi.py check """ +import ctypes import importlib import json -import re import sys import sysconfig from pathlib import Path @@ -34,6 +41,15 @@ ABI_SUFFIX = ".abi.json" +_pycapsule_get_name = ctypes.pythonapi.PyCapsule_GetName +_pycapsule_get_name.restype = ctypes.c_char_p +_pycapsule_get_name.argtypes = [ctypes.py_object] + + +def get_capsule_name(v: object) -> str: + return _pycapsule_get_name(v).decode("utf-8") + + def short_stem(name: str) -> str: return name.split(".", 1)[0] @@ -59,24 +75,24 @@ def abi_path_to_so_path(abi_path: Path, build_dir: Path, abi_dir: Path) -> Path: return build_dir / abi_path.parent.relative_to(abi_dir) / so_name -def pyx_capi_to_json(d: dict[str, object]) -> dict[str, str]: - """ - Converts the __pyx_capi__ dictionary to a JSON-serializable dictionary, - removing any memory addresses that are irrelevant for comparison. - """ +def is_cython_module(module: object) -> bool: + # This is kind of quick-and-dirty, but seems to work + return hasattr(module, "__pyx_capi__") - def extract_name(v: object) -> str: - v = str(v) - match = re.match(r'', v) - if match is None: - raise ValueError(f"Could not parse __pyx_capi__ entry: {v}") - return match.group(1) +def module_to_json(module: object) -> dict: + """ + Converts extracts information about a Cython-compiled .so into JSON-serializable information. + """ # Sort the dictionary by keys to make diffs in the JSON files smaller - return {k: extract_name(d[k]) for k in sorted(d.keys())} + pyx_capi = module.__pyx_capi__ + return { + "functions": {k: get_capsule_name(pyx_capi[k]) for k in sorted(pyx_capi.keys())}, + } -def check_abi(expected: dict[str, str], found: dict[str, str]) -> tuple[bool, bool]: + +def check_functions(expected: dict[str, str], found: dict[str, str]) -> tuple[bool, bool]: has_errors = False has_allowed_changes = False for k, v in expected.items(): @@ -93,6 +109,17 @@ def check_abi(expected: dict[str, str], found: dict[str, str]) -> tuple[bool, bo return has_errors, has_allowed_changes +def compare(expected: dict, found: dict) -> tuple[bool, bool]: + has_errors = False + has_allowed_changes = False + + errors, allowed_changes = check_functions(expected["functions"], found["functions"]) + has_errors |= errors + has_allowed_changes |= allowed_changes + + return has_errors, has_allowed_changes + + def check(package: str, abi_dir: Path) -> bool: build_dir = get_package_path(package) @@ -101,17 +128,22 @@ def check(package: str, abi_dir: Path) -> bool: for abi_path in Path(abi_dir).glob(f"**/*{ABI_SUFFIX}"): so_path = abi_path_to_so_path(abi_path, build_dir, abi_dir) if so_path.is_file(): - module = import_from_path(package, build_dir, so_path) - if hasattr(module, "__pyx_capi__"): - found_json = pyx_capi_to_json(module.__pyx_capi__) + try: + module = import_from_path(package, build_dir, so_path) + except ImportError: + print(f"Failed to import module for {so_path.relative_to(build_dir)}") + has_errors = True + continue + if is_cython_module(module): + found_json = module_to_json(module) with open(abi_path, encoding="utf-8") as f: expected_json = json.load(f) print(f"Checking module: {so_path.relative_to(build_dir)}") - check_errors, check_allowed_changes = check_abi(expected_json, found_json) + check_errors, check_allowed_changes = compare(expected_json, found_json) has_errors |= check_errors has_allowed_changes |= check_allowed_changes else: - print(f"Module no longer has an exposed ABI: {so_path.relative_to(build_dir)}") + print(f"Module no longer has an exposed ABI or is no longer Cython: {so_path.relative_to(build_dir)}") has_errors = True else: print(f"No module found for {abi_path.relative_to(abi_dir)}") @@ -125,27 +157,35 @@ def check(package: str, abi_dir: Path) -> bool: print(f"New module added {so_path.relative_to(build_dir)}") has_allowed_changes = True + print() if has_errors: print("ERRORS FOUND") return True elif has_allowed_changes: print("Allowed changes found.") + else: + print("No changes found.") return False def regenerate(package: str, abi_dir: Path) -> bool: - if not abi_dir.is_dir(): - abi_dir.mkdir(parents=True, exist_ok=True) + if abi_dir.is_dir(): + print(f"ABI directory {abi_dir} already exists. Please remove it before regenerating.") + return True build_dir = get_package_path(package) for so_path in Path(build_dir).glob(f"**/*{EXT_SUFFIX}"): - print(f"Generating ABI from {so_path.relative_to(build_dir)}") - module = import_from_path(package, build_dir, so_path) - if hasattr(module, "__pyx_capi__"): + try: + module = import_from_path(package, build_dir, so_path) + except ImportError: + print(f"Failed to import module: {so_path.relative_to(build_dir)}") + continue + if is_cython_module(module): + print(f"Generating ABI from {so_path.relative_to(build_dir)}") abi_path = so_path_to_abi_path(so_path, build_dir, abi_dir) abi_path.parent.mkdir(parents=True, exist_ok=True) with open(abi_path, "w", encoding="utf-8") as f: - json.dump(pyx_capi_to_json(module.__pyx_capi__), f, indent=2) + json.dump(module_to_json(module), f, indent=2) return False