From 4533a8eae6c46bf0de622c96cbb3313e01e53b98 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 13 Aug 2025 16:08:38 -0500 Subject: [PATCH 1/4] re-write generate-projects-to-versions in Python --- .pre-commit-config.yaml | 9 +- ci/customization/projects-to-versions.json | 138 ++++++++++----------- ci/generate-projects-to-versions.py | 75 +++++++++++ ci/generate-projects-to-versions.sh | 133 -------------------- 4 files changed, 148 insertions(+), 207 deletions(-) create mode 100755 ci/generate-projects-to-versions.py delete mode 100755 ci/generate-projects-to-versions.sh diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index cad6c925b2a..081eb53894d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,9 +6,6 @@ ci: autoupdate_branch: "" autoupdate_commit_msg: "[pre-commit.ci] pre-commit autoupdate" autoupdate_schedule: quarterly - skip: - # requires 'jq' and 'yq', which don't come pre-installed in the pre-commit.ci image - - generate-projects-to-versions submodules: false repos: @@ -42,9 +39,11 @@ repos: hooks: - id: generate-projects-to-versions name: generate-projects-to-versions - entry: ./ci/generate-projects-to-versions.sh - language: system + entry: ./ci/generate-projects-to-versions.py + language: python pass_filenames: false + additional_dependencies: + - pyyaml - repo: https://github.com/sirosen/texthooks rev: 0.7.1 hooks: diff --git a/ci/customization/projects-to-versions.json b/ci/customization/projects-to-versions.json index 97aa6020750..cd53ce56431 100644 --- a/ci/customization/projects-to-versions.json +++ b/ci/customization/projects-to-versions.json @@ -1,112 +1,112 @@ { - "cucim": { + "cudf": { "legacy": "25.06", - "nightly": "25.10", - "stable": "25.08" + "stable": "25.08", + "nightly": "25.10" }, - "cudf": { + "dask-cudf": { "legacy": "25.06", - "nightly": "25.10", - "stable": "25.08" + "stable": "25.08", + "nightly": "25.10" }, - "cudf-java": { + "cuml": { "legacy": "25.06", - "stable": "25.08" + "stable": "25.08", + "nightly": "25.10" }, "cugraph": { "legacy": "25.06", - "nightly": "25.10", - "stable": "25.08" + "stable": "25.08", + "nightly": "25.10" }, - "cuml": { + "cuxfilter": { "legacy": "25.06", - "nightly": "25.10", - "stable": "25.08" + "stable": "25.08", + "nightly": "25.10" }, - "cuproj": { - "legacy": "25.02", - "stable": "25.04" + "cudf-java": { + "legacy": "25.06", + "stable": "25.08" }, - "cusignal": {}, - "cuspatial": { - "legacy": "25.02", - "stable": "25.04" + "cucim": { + "legacy": "25.06", + "stable": "25.08", + "nightly": "25.10" }, "cuvs": { "legacy": "25.06", - "nightly": "25.10", - "stable": "25.08" + "stable": "25.08", + "nightly": "25.10" }, - "cuxfilter": { + "kvikio": { "legacy": "25.06", - "nightly": "25.10", - "stable": "25.08" + "stable": "25.08", + "nightly": "25.10" + }, + "raft": { + "legacy": "25.06", + "stable": "25.08", + "nightly": "25.10" }, "dask-cuda": { "legacy": "25.06", - "nightly": "25.10", - "stable": "25.08" + "stable": "25.08", + "nightly": "25.10" }, - "dask-cudf": { + "rmm": { "legacy": "25.06", - "nightly": "25.10", - "stable": "25.08" + "stable": "25.08", + "nightly": "25.10" }, - "kvikio": { + "rapidsmpf": { + "stable": "25.08", + "nightly": "25.10" + }, + "librmm": { "legacy": "25.06", - "nightly": "25.10", - "stable": "25.08" + "stable": "25.08", + "nightly": "25.10" }, "libcudf": { "legacy": "25.06", - "nightly": "25.10", - "stable": "25.08" + "stable": "25.08", + "nightly": "25.10" }, "libcuml": { "legacy": "25.06", - "nightly": "25.10", - "stable": "25.08" - }, - "libcuproj": { - "legacy": "25.02", - "stable": "25.04" - }, - "libcuspatial": { - "legacy": "25.02", - "stable": "25.04" + "stable": "25.08", + "nightly": "25.10" }, "libkvikio": { "legacy": "25.06", - "nightly": "25.10", - "stable": "25.08" - }, - "librmm": { - "legacy": "25.06", - "nightly": "25.10", - "stable": "25.08" + "stable": "25.08", + "nightly": "25.10" }, "libucxx": { "legacy": "0.44", - "nightly": "0.46", - "stable": "0.45" - }, - "raft": { - "legacy": "25.06", - "nightly": "25.10", - "stable": "25.08" + "stable": "0.45", + "nightly": "0.46" }, "rapids-cmake": { "legacy": "25.06", - "nightly": "25.10", - "stable": "25.08" + "stable": "25.08", + "nightly": "25.10" }, - "rapidsmpf": { - "nightly": "25.10", - "stable": "25.08" + "cuproj": { + "legacy": "25.02", + "stable": "25.04" }, - "rmm": { - "legacy": "25.06", - "nightly": "25.10", - "stable": "25.08" + "cusignal": {}, + "cuspatial": { + "legacy": "25.02", + "stable": "25.04" + }, + "libcuproj": { + "legacy": "25.02", + "stable": "25.04" + }, + "libcuspatial": { + "legacy": "25.02", + "stable": "25.04" } -} +} \ No newline at end of file diff --git a/ci/generate-projects-to-versions.py b/ci/generate-projects-to-versions.py new file mode 100755 index 00000000000..c4c92a964ec --- /dev/null +++ b/ci/generate-projects-to-versions.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python3 +# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# [description] +# +# Determines: +# +# * which RAPIDS libraries to host docs for +# * what types of docs to host ('legacy', 'nightly', 'stable', etc.) +# * what versions to map to those types +# +# The libraries that should be copied are read from "_data/docs.yml". +# +# The versions that should be copied are read from a mix of sources: +# +# - active projects: "_data/releases.json" +# - inactive projects: 'version-overrides' field in entries in "_data/docs.yml" +# +# Produces a JSON mapping of the form: +# +# { +# "{project}": { +# "stable": {version_number}, +# "legacy": {version_number}, +# "nightly": {version_number} +# }, +# } +# +# With keys omitted based on configuration in _data/docs.yml. +# +# e.g. if a project has 'stable: 0' in that file, it will not have a '{project}.stable' +# key in the mapping produced by this script. +# +import json +import sys +import yaml +from collections import OrderedDict + +with open("_data/docs.yml") as f: + DOCS_YML_DICT = yaml.safe_load(f) + +with open("_data/releases.json") as f: + RELEASES_JSON_DICT = json.load(f) + +# using OrderedDict minimizes churn in the output as projects are added and removed +PROJECTS_TO_VERSIONS_DICT = OrderedDict() + +for docs_key in ["apis", "libs", "inactive-projects"]: + for project_name, project_details in DOCS_YML_DICT[docs_key].items(): + print(f"Processing: {project_name}", file=sys.stderr) + # what entry from releases.json should be used to find version numbers? + version_key = "version" + if "ucxx" in project_name: + version_key = "ucxx_version" + + # what versions should be built for this project? + versions_for_this_project = OrderedDict() + for version_name, should_include in project_details["versions"].items(): + if should_include == 1: + version_override = project_details.get("version-overrides", dict()).get(version_name, "") + if version_override: + versions_for_this_project[version_name] = version_override + else: + versions_for_this_project[version_name] = RELEASES_JSON_DICT[version_name][version_key] + else: + print(f"Skipping: {project_name} | {version_name}", file=sys.stderr) + + # update overall mapping + PROJECTS_TO_VERSIONS_DICT[project_name] = versions_for_this_project + + +with open("ci/customization/projects-to-versions.json", "w") as f: + json.dump(PROJECTS_TO_VERSIONS_DICT, f, indent=2) diff --git a/ci/generate-projects-to-versions.sh b/ci/generate-projects-to-versions.sh deleted file mode 100755 index 514cb2b85c7..00000000000 --- a/ci/generate-projects-to-versions.sh +++ /dev/null @@ -1,133 +0,0 @@ -#!/bin/bash -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. -# All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# [description] -# -# Determines: -# -# * which RAPIDS libraries to host docs for -# * what types of docs to host ('legacy', 'nightly', 'stable', etc.) -# * what versions to map to those types -# -# The libraries that should be copied are read from "_data/docs.yml". -# -# The versions that should be copied are read from a mix of sources: -# -# - active projects: "_data/releases.json" -# - inactive projects: 'version-overrides' field in entries in "_data/docs.yml" -# -# Produces a JSON mapping of the form: -# -# { -# "{project}": { -# "stable": {version_number}, -# "legacy": {version_number}, -# "nightly": {version_number} -# }, -# } -# -# With keys omitted based on configuration in _data/docs.yml. -# -# e.g. if a project has 'stable: 0' in that file, it will not have a '{project}.stable' -# key in the mapping produced by this script. -# -# Only that mapping is written to stdout, so this is safe to use inline like this: -# -# PROJECTS_TO_VERSIONS=$(./ci/generate-projects-to-versions.sh) -# -# WARNING: no guarantees are made about the ordering of output in this mapping. -# - -set -e -E -u -o pipefail - -log-stderr() { - echo "${1}" >&2 -} - -PROJECT_MAP=$(yq '.apis + .libs' _data/docs.yml) -INACTIVE_PROJECT_MAP=$(yq '.inactive-projects' _data/docs.yml) - -VERSION_MAP=$(jq '{ - "legacy": { "version": .legacy.version, "ucxx_version": .legacy.ucxx_version }, - "stable": { "version": .stable.version, "ucxx_version": .stable.ucxx_version }, - "nightly": { "version": .nightly.version, "ucxx_version": .nightly.ucxx_version } -}' _data/releases.json) - -PROJECTS_TO_VERSIONS='{}' - -for PROJECT in $(yq -r 'keys | .[]' <<< "$PROJECT_MAP"); do - THIS_PROJECT_MAP="{\"${PROJECT}\":{}}" - for VERSION_NAME in $(jq -r 'keys | .[]' <<< "$VERSION_MAP"); do - VERSION_NUMBER=$(jq -r --arg vn "$VERSION_NAME" --arg pr "$PROJECT" ' - if ($pr | contains("ucxx")) then - .[$vn].ucxx_version - else - .[$vn].version - end' <<< "$VERSION_MAP") - PROJECT_MAP_JSON=$(yq -r -o json '.' <<< "$PROJECT_MAP") - if [ "$(jq -r --arg pr "$PROJECT" --arg vn "$VERSION_NAME" '.[$pr].versions[$vn]' <<< "$PROJECT_MAP_JSON")" == "0" ]; then - log-stderr "Skipping: $PROJECT | $VERSION_NAME | $VERSION_NUMBER" - continue - fi - THIS_PROJECT_MAP=$( - jq \ - --arg pr "${PROJECT}" \ - --arg version_name "${VERSION_NAME}" \ - --arg version_number "${VERSION_NUMBER}" \ - '.[$pr] |= . + {$version_name: $version_number}' \ - <<< "${THIS_PROJECT_MAP}" - ) - done - # add this new entry to the mapping - PROJECTS_TO_VERSIONS=$( - jq --slurp \ - 'map(to_entries) | flatten | group_by(.key) | map({key: .[0].key, value: map(.value) | add}) | from_entries' \ - <<< "${PROJECTS_TO_VERSIONS}${THIS_PROJECT_MAP}" - ) -done - -# inactive projects have specific versions hard-coded in their configuration, process those separately -for PROJECT in $(yq -r 'keys | .[]' <<< "$INACTIVE_PROJECT_MAP"); do - THIS_PROJECT_MAP="{\"${PROJECT}\":{}}" - for VERSION_NAME in $(jq -r 'keys | .[]' <<< "$VERSION_MAP"); do - # do not attempt updates for any versions where the corresponding key is '0' in docs.yml - INACTIVE_PROJECT_MAP_JSON=$(yq -r -o json '.' <<< "$INACTIVE_PROJECT_MAP") - if [ "$(jq -r --arg pr "$PROJECT" --arg vn "$VERSION_NAME" '.[$pr].versions[$vn]' <<< "$INACTIVE_PROJECT_MAP_JSON")" == "0" ]; then - log-stderr "Skipping: $PROJECT | $VERSION_NAME" - continue - fi - - # get the version from the 'version-overrides' field in docs.yml, hard-coded there - # so it doesn't change from release-to-release for inactive projects - VERSION_NUMBER=$( - jq -r \ - --arg vn "$VERSION_NAME" \ - --arg pr "${PROJECT}" \ - '.[$pr]."version-overrides"[$vn]' \ - <<< "${INACTIVE_PROJECT_MAP_JSON}" - ) - PROJECT_MAP_JSON=$(yq -r -o json '.' <<< "$PROJECT_MAP") - if [ "$(jq -r --arg pr "$PROJECT" --arg vn "$VERSION_NAME" '.[$pr].versions[$vn]' <<< "$PROJECT_MAP_JSON")" == "0" ]; then - log-stderr "Skipping: $PROJECT | $VERSION_NAME | $VERSION_NUMBER" - continue - fi - THIS_PROJECT_MAP=$( - jq \ - --arg pr "${PROJECT}" \ - --arg version_name "${VERSION_NAME}" \ - --arg version_number "${VERSION_NUMBER}" \ - '.[$pr] |= . + {$version_name: $version_number}' \ - <<< "${THIS_PROJECT_MAP}" - ) - done - # add this new entry to the mapping - PROJECTS_TO_VERSIONS=$( - jq --slurp \ - 'map(to_entries) | flatten | group_by(.key) | map({key: .[0].key, value: map(.value) | add}) | from_entries' \ - <<< "${PROJECTS_TO_VERSIONS}${THIS_PROJECT_MAP}" - ) -done - -echo "${PROJECTS_TO_VERSIONS}" > ./ci/customization/projects-to-versions.json From 4038592280cbcc02fc2f345cf1ddff2f09c8ddb0 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 13 Aug 2025 16:17:34 -0500 Subject: [PATCH 2/4] update docs --- ci/customization/README.md | 4 ++-- ci/generate-projects-to-versions.py | 10 +++++++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/ci/customization/README.md b/ci/customization/README.md index 5c858dece07..fdc81314180 100644 --- a/ci/customization/README.md +++ b/ci/customization/README.md @@ -40,12 +40,12 @@ Building the API docs requires answering these questions: * what version types? (stable? legacy? nightly?) * what version numbers correspond to those version types? -Logic for all of that is centralized in a script. +Logic for all of that is centralized in a script which is run by `pre-commit`. Invoke it to see what will be built. ```shell -./ci/get-projects-to-versions.sh > ./ci/customization/projects-to-versions.json +pre-commit run --all-files generate-projects-to-versions ``` ### Symlinks diff --git a/ci/generate-projects-to-versions.py b/ci/generate-projects-to-versions.py index c4c92a964ec..ff55840d0d9 100755 --- a/ci/generate-projects-to-versions.py +++ b/ci/generate-projects-to-versions.py @@ -59,14 +59,18 @@ versions_for_this_project = OrderedDict() for version_name, should_include in project_details["versions"].items(): if should_include == 1: - version_override = project_details.get("version-overrides", dict()).get(version_name, "") + version_override = project_details.get("version-overrides", dict()).get( + version_name, "" + ) if version_override: versions_for_this_project[version_name] = version_override else: - versions_for_this_project[version_name] = RELEASES_JSON_DICT[version_name][version_key] + versions_for_this_project[version_name] = RELEASES_JSON_DICT[ + version_name + ][version_key] else: print(f"Skipping: {project_name} | {version_name}", file=sys.stderr) - + # update overall mapping PROJECTS_TO_VERSIONS_DICT[project_name] = versions_for_this_project From 719df90f2128b390e9266026e17faa4afbe9bd8b Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 13 Aug 2025 16:24:13 -0500 Subject: [PATCH 3/4] convince pre-commit-hooks and this new hook to stop fighting over newlines --- ci/customization/projects-to-versions.json | 2 +- ci/generate-projects-to-versions.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ci/customization/projects-to-versions.json b/ci/customization/projects-to-versions.json index cd53ce56431..0152b728f56 100644 --- a/ci/customization/projects-to-versions.json +++ b/ci/customization/projects-to-versions.json @@ -109,4 +109,4 @@ "legacy": "25.02", "stable": "25.04" } -} \ No newline at end of file +} diff --git a/ci/generate-projects-to-versions.py b/ci/generate-projects-to-versions.py index ff55840d0d9..5e8366d624f 100755 --- a/ci/generate-projects-to-versions.py +++ b/ci/generate-projects-to-versions.py @@ -77,3 +77,4 @@ with open("ci/customization/projects-to-versions.json", "w") as f: json.dump(PROJECTS_TO_VERSIONS_DICT, f, indent=2) + f.write("\n") From e45610a710dfd06196d0bb5db5e266f4e1abcaf6 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 13 Aug 2025 16:32:16 -0500 Subject: [PATCH 4/4] fix one more docs reference --- ci/README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ci/README.md b/ci/README.md index 28809a58a18..c497bf732d6 100644 --- a/ci/README.md +++ b/ci/README.md @@ -11,12 +11,13 @@ The steps are roughly as follows. ### Step 1: determine libraries and versions to build -`get-projects-to-versions.sh` is responsible for holding all the logic relevant to the questions "What projects' API docs should be hosted? What versions?". +`generate-projects-to-versions.py` is responsible for holding all the logic relevant to the questions "What projects' API docs should be hosted? What versions?". Run it from the root of the repo to see for yourself. ```shell -./ci/get-projects-to-versions.sh +pre-commit run --all-files generate-projects-to-versions +cat ./ci/customization/projects-to-versions.json ``` That script is reused by other automation to determine which projects and versions to build.