diff --git a/.github/workflows/dependencies.yml b/.github/workflows/dependencies.yml index fef65870b697..f32eb7d2ddf6 100644 --- a/.github/workflows/dependencies.yml +++ b/.github/workflows/dependencies.yml @@ -66,4 +66,4 @@ jobs: - name: Install cargo-machete run: cargo install cargo-machete --version ^0.9 --locked - name: Detect unused dependencies - run: cargo machete --with-metadata \ No newline at end of file + run: cargo machete --with-metadata diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index 3e2c48643c36..b62055b13b8f 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -51,6 +51,12 @@ jobs: python3 -m venv venv source venv/bin/activate pip install -r docs/requirements.txt + - name: Install dependency graph tooling + run: | + set -x + sudo apt-get update + sudo apt-get install -y graphviz + cargo install cargo-depgraph --version ^1.6 --locked - name: Build docs run: | diff --git a/.github/workflows/docs_pr.yaml b/.github/workflows/docs_pr.yaml index 81eeb4039ba9..784a33d4c584 100644 --- a/.github/workflows/docs_pr.yaml +++ b/.github/workflows/docs_pr.yaml @@ -54,10 +54,15 @@ jobs: python3 -m venv venv source venv/bin/activate pip install -r docs/requirements.txt + - name: Install dependency graph tooling + run: | + set -x + sudo apt-get update + sudo apt-get install -y graphviz + cargo install cargo-depgraph --version ^1.6 --locked - name: Build docs html and check for warnings run: | set -x source venv/bin/activate cd docs ./build.sh # fails on errors - diff --git a/docs/.gitignore b/docs/.gitignore index a3adddc690ab..e73866cc0f35 100644 --- a/docs/.gitignore +++ b/docs/.gitignore @@ -20,3 +20,7 @@ build/ venv/ .python-version __pycache__/ + +# Generated dependency graph artifacts (produced during docs CI) +source/_static/data/deps.dot +source/_static/data/deps.svg diff --git a/docs/README.md b/docs/README.md index c3d87ee8e84a..0340a3b8bf63 100644 --- a/docs/README.md +++ b/docs/README.md @@ -40,6 +40,11 @@ needing to create a virtual environment: uv run --with-requirements requirements.txt bash build.sh ``` +The docs build regenerates the workspace dependency graph via +`docs/scripts/generate_dependency_graph.sh`, so ensure `cargo`, `cargo-depgraph` +(`cargo install cargo-depgraph --version ^1.6 --locked`), and Graphviz `dot` +(`brew install graphviz` or `sudo apt-get install -y graphviz`) are available. + ## Build & Preview Run the provided script to build the HTML pages. diff --git a/docs/build.sh b/docs/build.sh index 9e4a118580ca..e12e3c1a5f20 100755 --- a/docs/build.sh +++ b/docs/build.sh @@ -18,7 +18,14 @@ # under the License. # -set -e +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "${SCRIPT_DIR}" + rm -rf build 2> /dev/null +# Keep the workspace dependency graph in sync with the codebase. +scripts/generate_dependency_graph.sh + make html diff --git a/docs/scripts/generate_dependency_graph.sh b/docs/scripts/generate_dependency_graph.sh new file mode 100755 index 000000000000..771f6f1932c3 --- /dev/null +++ b/docs/scripts/generate_dependency_graph.sh @@ -0,0 +1,97 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# See `usage()` for details about this script. +# +# The key commands to generate the dependency graph SVG in this script are: +# cargo depgraph ... | dot -Tsvg > deps.svg +# See below for the exact command used. + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_DIR="$(cd "${SCRIPT_DIR}/../.." && pwd)" +OUTPUT_DIR="${REPO_DIR}/docs/source/_static/data" +SVG_OUTPUT="${OUTPUT_DIR}/deps.svg" + +usage() { + cat <&2 + usage + exit 1 + ;; + esac + shift +done + +if ! command -v cargo >/dev/null 2>&1; then + echo "cargo is required to build the dependency graph." >&2 + exit 1 +fi + +if ! command -v cargo-depgraph > /dev/null 2>&1; then + echo "cargo-depgraph is required (install with: cargo install cargo-depgraph)." >&2 + exit 1 +fi + +if ! command -v dot >/dev/null 2>&1; then + echo "Graphviz 'dot' is required to render the SVG." >&2 + exit 1 +fi + +mkdir -p "${OUTPUT_DIR}" + +( + cd "${REPO_DIR}" + # Ignore utility crates only used by internal scripts + cargo depgraph \ + --workspace-only \ + --all-deps \ + --dedup-transitive-deps \ + --exclude gen,gen-common \ + | dot \ + -Grankdir=TB \ + -Gconcentrate=true \ + -Goverlap=false \ + -Tsvg \ + > "${SVG_OUTPUT}" +) + +echo "Wrote dependency graph SVG to ${SVG_OUTPUT}" diff --git a/docs/source/contributor-guide/architecture/dependency-graph.md b/docs/source/contributor-guide/architecture/dependency-graph.md new file mode 100644 index 000000000000..be3502f48bed --- /dev/null +++ b/docs/source/contributor-guide/architecture/dependency-graph.md @@ -0,0 +1,180 @@ + + +# Workspace Dependency Graph + +This page shows the dependency relationships between DataFusion's workspace +crates. This only includes internal dependencies, external crates like `Arrow` are not included + +The dependency graph is auto-generated by `docs/scripts/generate_dependency_graph.sh` to ensure it stays up-to-date, and the script now runs automatically as part of `docs/build.sh`. + +## Dependency Graph for Workspace Crates + + + +```{raw} html +
+
+``` + +```{eval-rst} +.. raw:: html + :file: ../../_static/data/deps.svg +``` + +```{raw} html +
+
+ Interactive SVG (pan, zoom, search) +
+ + +
+ Open SVG ↗ +
+
+ +``` + +### Legend + +- black lines: normal dependency +- blue lines: dev-dependency +- green lines: build-dependency +- dotted lines: optional dependency (could be removed by disabling a cargo feature) + +Transitive dependencies are intentionally ignored to keep the graph readable. + +The dependency graph is generated through `cargo depgraph` by `docs/scripts/generate_dependency_graph.sh`. diff --git a/docs/source/index.rst b/docs/source/index.rst index b589c9ce4047..a07f98023b9b 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -157,6 +157,7 @@ To get started, see contributor-guide/communication contributor-guide/development_environment contributor-guide/architecture + contributor-guide/architecture/dependency-graph contributor-guide/testing contributor-guide/api-health contributor-guide/howtos