From 9647ebc38533da7b4203e957308fa354d7768d64 Mon Sep 17 00:00:00 2001 From: Daniel McIlvaney Date: Wed, 6 May 2026 23:31:06 -0700 Subject: [PATCH] ci(prcheck): use 'azldev component changed' for affected-component detection Replace the bash 'git diff | grep /sources' step with structured azldev commands for lock validation, change detection, and scoped rendering. Pipeline step order: 1. Lock check -- 'azldev component update -a -q -O json', fail if any component has changed == true. Lock-update JSON published as a pipeline artifact for triage. 2. Changed-component detection -- 'azldev component changed --include-unchanged' writes the full per-component JSON to disk, published via ob_outputDirectory. The --include-unchanged flag ensures the JSON contains every known component, which is needed for the renderable-set filter in step 4. 3. Source/identity consistency tripwire -- hard-fail if any component reports sourcesChange == true with a changeType not in the allow-list {added, changed, deleted}. Prevents unauthenticated rewrites of the rendered 'sources' file under an existing component's identity. Data path is severed (subsequent steps skip); PR check remains advisory until ADO task 19179 removes job-level continueOnError. 4. Scoped render -- render set is the union of components flagged by 'azldev component changed' (inputs differ) and components whose spec tree was touched directly in the PR (git diff under specs/, mapped back to component names by compute_render_set.py). Deleted and unknown components are excluded via a renderable-set filter built from the full --include-unchanged JSON. 5. Prcheck API -- switches from --components to --changed-components-file , filtering to entries with sourcesChange == true and changeType in {added, changed} (allow-list, mirroring the consistency tripwire). Also: * Add --changed-components-file flag (mutually exclusive with --components) and _load_components_from_file() to run_prcheck.py. Uses an allow-list of changeType values for defense-in-depth. * Add compute_render_set.py for render-set computation. * Document AZLDEV_ALLOW_ROOT in ADO pipeline instructions (OneBranch containers run as root, azldev refuses by default). * Mark changedComponentsFile pipeline variable as isreadonly=true. * Switch API_BASE_URL to $(ApiBaseDirectUrl) (bypasses AFD). --- .../instructions/ado-pipeline.instructions.md | 12 ++ .../ado/templates/sources-upload-stages.yml | 111 +++++++----------- .../scripts/control-tower/compute_changed.sh | 59 ++++++++++ .../control-tower/compute_render_set.py | 86 ++++++++++++++ .../control-tower/render_and_verify.sh | 58 +++++++++ .../scripts/control-tower/run_prcheck.py | 75 +++++++++++- .../scripts/control-tower/verify_locks.sh | 54 +++++++++ 7 files changed, 381 insertions(+), 74 deletions(-) create mode 100755 .github/workflows/scripts/control-tower/compute_changed.sh create mode 100644 .github/workflows/scripts/control-tower/compute_render_set.py create mode 100755 .github/workflows/scripts/control-tower/render_and_verify.sh create mode 100755 .github/workflows/scripts/control-tower/verify_locks.sh diff --git a/.github/instructions/ado-pipeline.instructions.md b/.github/instructions/ado-pipeline.instructions.md index 32b7448db54..2ab7ef8c457 100644 --- a/.github/instructions/ado-pipeline.instructions.md +++ b/.github/instructions/ado-pipeline.instructions.md @@ -175,6 +175,18 @@ Avoid shell scripts beyond the smallest possible wiring (env exports, `##vso[... Python scripts are easier to test locally, easier to review, and avoid the foot-guns of bash quoting / globbing. +### `azldev` in OneBranch + +OneBranch runs all steps as `root`. `azldev` refuses to run many commands as root by default (a safety measure for developer workstations). To allow it in CI, set `AZLDEV_ALLOW_ROOT=1` -- but **set it inline as a prefix on each `azldev` invocation**, not at the step level. Inline scoping makes it obvious which calls actually need the override and avoids leaking the flag to unrelated commands in the same step. + +```bash +AZLDEV_ALLOW_ROOT=1 azldev component update --check-only -a -q +``` + +Symptom that you need this: `ERR Error: this command may not be run as root`. + +This is NOT safe for general use, only for disposable CI environments. + ## Security hardening Apply all of these unless there is a documented reason not to: diff --git a/.github/workflows/ado/templates/sources-upload-stages.yml b/.github/workflows/ado/templates/sources-upload-stages.yml index 3ac9432be20..48936a3e212 100644 --- a/.github/workflows/ado/templates/sources-upload-stages.yml +++ b/.github/workflows/ado/templates/sources-upload-stages.yml @@ -117,79 +117,48 @@ stages: env: AZLDEV_COMMIT: $(AzldevCommit) + # Verify lock files are current. --check-only validates without + # writing, exits nonzero if any lock would change. - script: | set -euo pipefail - - # Workaround for an ADO git config error during spec rendering. - # The config key may not be present on every agent image, so tolerate its absence. - git config --unset extensions.worktreeConfig || true - - # Full history is needed for spec rendering to work. - if [ "$(git rev-parse --is-shallow-repository)" = "true" ]; then - echo "##[group]Fetching full git history" - git fetch --unshallow - echo "##[endgroup]" - fi - - specs_dir="$(azldev config dump -q -f json | jq -r '.project.renderedSpecsDir')" - - echo "##[group]Specs rendering" - azldev component render -q -a --clean-stale - echo "##[endgroup]" - - # Check for any new or modified files under the specs directory. - if ! python3 .github/workflows/scripts/render-specs-check/check_rendered_specs.py --specs-dir "$specs_dir"; then - echo "##[group]Git diff" - git --no-pager diff -- "$specs_dir" - echo "##[endgroup]" - echo "##[error]Specs are not up to date. Run 'azldev component render -q -a --clean-stale' and try again." - exit 1 - fi - displayName: "Verify rendered specs are up to date" - # TODO: Re-enabled failing once specs in the main branch are updated and stable. - # ADO task: 19179 - continueOnError: true - + .github/workflows/scripts/control-tower/verify_locks.sh \ + --output-file "$(Build.ArtifactStagingDirectory)/lock-update.json" \ + --publish-dir "$(ob_outputDirectory)" + displayName: "Verify lock files are up to date" + + # Detect changed components. azldev hard-fails if any component has + # sourcesChange == true without a corresponding "identity change" + # -- meaning the component's input fingerprint (.comp.toml + lock) + # didn't move (changeType not in {added, changed, deleted}). That + # combination would let attacker-supplied bytes ride into the + # lookaside under an unchanged component identity, so we treat it + # as hostile and fail closed (supply-chain drift tripwire). + # --include-unchanged ensures the full component list is available + # for downstream consumers. - script: | set -euo pipefail - - cat << EOF - NOTE: It's possible more components changed between the source and target commits than displayed below. - This workflow only checks for updates in the 'sources' files and ignores all other changes. - Only components present on the source commit are reported; components that exist only on the - target branch are intentionally excluded. Pure renames are reported under their source-branch - name, since the component name is part of the cache blob name we upload. - EOF - - # Diff direction is TARGET -> SOURCE so additions/modifications represent the source-side state: - # * --no-renames decomposes a rename into an add (new path, source side) + delete (old path, - # target side); we want the source-side name because the component name is part of the cache - # blob name uploaded for the PR. It also silences git's diff.renameLimit warning on large diffs. - # * --diff-filter=d (lowercase) excludes deletions, which here correspond to paths present only - # on the target branch -- those components are not on the PR and must not be uploaded. - # Net effect: the listed '*/sources' files are exactly those present on the source commit that - # differ from target, including pure renames surfaced under the source-branch name. - sources_files="$( - git diff --no-renames --diff-filter=d "$TARGET_COMMIT" "$SOURCE_COMMIT" --name-only \ - | { grep '/sources$' || true; } \ - | sort -u - )" - - components=() - while IFS= read -r line; do - # Skip the spurious empty iteration when no '*/sources' files changed. - if [[ -n "$line" ]]; then - components+=("$(basename "$(dirname "$line")")") - fi - done <<< "$sources_files" - - components="$(IFS=, ; echo "${components[*]}")" - echo "Affected components: $components" - echo "##vso[task.setvariable variable=components]$components" - env: - SOURCE_COMMIT: $(sourceCommit) - TARGET_COMMIT: $(targetCommit) - displayName: "Determine affected components" + json_file="$(Build.ArtifactStagingDirectory)/changed-components/changed-components.json" + .github/workflows/scripts/control-tower/compute_changed.sh \ + --output-file "$json_file" \ + --publish-dir "$(ob_outputDirectory)" \ + --source-commit "$(sourceCommit)" \ + --target-commit "$(targetCommit)" + echo "##vso[task.setvariable variable=changedComponentsFile;isreadonly=true]$json_file" + displayName: "Compute changed components" + + # Render the components that need re-rendering (azldev-flagged plus + # any with hand-edited spec files), in --check-only mode: azldev + # renders to a staging area and diffs against the on-disk output + # without writing. Exits nonzero if any component's rendered output + # would change, catching both stale renders and direct hand-edits. + - script: | + set -euo pipefail + .github/workflows/scripts/control-tower/render_and_verify.sh \ + --output-dir "$(Build.ArtifactStagingDirectory)/render-check" \ + --changed-components-file "$(changedComponentsFile)" \ + --source-commit "$(sourceCommit)" \ + --target-commit "$(targetCommit)" + displayName: "Verify rendered specs are up to date" - task: AzureCLI@2 displayName: "Call Control Tower 'prcheck' API" @@ -204,14 +173,14 @@ stages: --api-audience "$API_AUDIENCE" \ --api-base-url "$API_BASE_URL" \ --build-reason "$BUILD_REASON" \ - --components "$COMPONENTS" \ + --changed-components-file "$CHANGED_COMPONENTS_FILE" \ --source-commit "$SOURCE_COMMIT" \ --repo-uri "$UPSTREAM_REPO_URL" env: API_AUDIENCE: $(ApiAudience) API_BASE_URL: $(ApiBaseDirectUrl) BUILD_REASON: $(Build.Reason) - COMPONENTS: $(components) + CHANGED_COMPONENTS_FILE: $(changedComponentsFile) SOURCE_COMMIT: $(sourceCommit) # TODO: Target commit is not used. Will be needed once we move detection of affected components to CT. # ADO task: 18816 diff --git a/.github/workflows/scripts/control-tower/compute_changed.sh b/.github/workflows/scripts/control-tower/compute_changed.sh new file mode 100755 index 00000000000..979f0dc540d --- /dev/null +++ b/.github/workflows/scripts/control-tower/compute_changed.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +# Compute the set of changed components between source and target commits. +# +# Writes the JSON to and copies it into /changed-components/ +# for triage artifact publication. +# +# 'azldev component changed' compares the source and target commits and emits +# one entry per component with its 'changeType' ('added', 'changed', +# 'unchanged', or 'deleted') plus a 'sourcesChange' flag indicating whether +# the rendered 'sources' file differs between commits. +# +# azldev hard-fails if any component has sourcesChange == true without a +# corresponding identity change (added/changed/deleted) -- supply-chain +# drift protection. +# +# Only components with sourcesChange == true AND changeType in {added, changed} +# are forwarded to Control Tower for upload. + +set -euo pipefail + +usage() { echo "Usage: $0 --output-file FILE --publish-dir DIR --source-commit SHA --target-commit SHA" >&2; exit 1; } + +while [[ $# -gt 0 ]]; do + case "$1" in + --output-file) output_file="$2"; shift 2 ;; + --publish-dir) publish_dir="$2"; shift 2 ;; + --source-commit) source_commit="$2"; shift 2 ;; + --target-commit) target_commit="$2"; shift 2 ;; + *) usage ;; + esac +done +[[ -z "${output_file:-}" || -z "${publish_dir:-}" || -z "${source_commit:-}" || -z "${target_commit:-}" ]] && usage + +mkdir -p "$(dirname "$output_file")" + +# Publish the changed-components JSON for post-mortem triage on EVERY exit +# path, not just success -- if azldev hard-fails on a consistency tripwire +# the partial JSON is exactly what an operator needs to investigate. +publish_artifact() { + if [[ -s "$output_file" ]]; then + mkdir -p "$publish_dir/changed-components" + cp "$output_file" "$publish_dir/changed-components/" || true + fi +} +trap publish_artifact EXIT + +echo "##[group]Computing changed components" +AZLDEV_ALLOW_ROOT=1 azldev component changed --from "$target_commit" --to "$source_commit" -a --include-unchanged -O json > "$output_file" +echo "##[endgroup]" + +echo "##[group]Changed components (non-unchanged only)" +jq '[.[] | select(.changeType != "unchanged")]' "$output_file" +echo "##[endgroup]" + +echo "##[group]Upload set (sourcesChange == true, changeType in {added, changed})" +upload_count=$(jq -r '[.[] | select(.sourcesChange == true and (.changeType | IN("added", "changed")))] | length' "$output_file") +jq -r '.[] | select(.sourcesChange == true and (.changeType | IN("added", "changed"))) | .component' "$output_file" | sort +echo "Total: $upload_count component(s) to upload." +echo "##[endgroup]" diff --git a/.github/workflows/scripts/control-tower/compute_render_set.py b/.github/workflows/scripts/control-tower/compute_render_set.py new file mode 100644 index 00000000000..3dbdb8a1841 --- /dev/null +++ b/.github/workflows/scripts/control-tower/compute_render_set.py @@ -0,0 +1,86 @@ +"""Compute the render set: components flagged by `azldev component changed` +plus components whose spec tree was touched directly in the PR. + +Emits one component name per line on stdout (azldev dedupes internally). +""" + +import argparse +import json +from pathlib import Path + + +def _load_entries(path: Path) -> list[dict]: + """Load the changed-components JSON.""" + return json.loads(path.read_text(encoding="utf-8")) + + +def _renderable_components(entries: list[dict]) -> set[str]: + """Component names that azldev can still render (everything except deleted).""" + return {e["component"] for e in entries if e.get("changeType") != "deleted"} + + +def from_changed(entries: list[dict]) -> list[str]: + """Components from `azldev component changed` JSON. + + Includes anything that is not 'deleted' and either has a non-'unchanged' + changeType or has sourcesChange=true (safety net). + """ + out = [] + for e in entries: + change_type = e.get("changeType") + sources_change = e.get("sourcesChange") is True + if change_type == "deleted": + continue + if change_type == "unchanged" and not sources_change: + continue + out.append(e["component"]) + return out + + +def from_specs_diff(path: Path, specs_dir: Path, renderable: set[str]) -> list[str]: + """Components with any modified file under specs_dir. + + Spec layout is rigid: ///... + so the component name is the second segment under specs_dir. + + Only components in ``renderable`` are included -- a deleted component's + .comp.toml is gone so ``azldev component render`` would fail, and a + path that doesn't map to any known component is noise. + """ + prefix = str(specs_dir).rstrip("/") + "/" + out = [] + for line in path.read_text(encoding="utf-8").splitlines(): + if not line.startswith(prefix): + continue + parts = line[len(prefix) :].split("/", 2) + if len(parts) >= 2 and parts[1]: + name = parts[1] + if name in renderable: + out.append(name) + return out + + +def main() -> None: + p = argparse.ArgumentParser() + p.add_argument("--changed-components-file", type=Path, required=True) + p.add_argument("--specs-diff-file", type=Path, required=True) + p.add_argument("--specs-dir", type=Path, required=True) + args = p.parse_args() + + entries = _load_entries(args.changed_components_file) + renderable = _renderable_components(entries) + + # Dedupe across both sources -- a component appearing in azldev's + # changed list AND with hand-edited specs would otherwise print twice, + # and a component with N modified spec files would print N times. + # dict.fromkeys preserves first-seen order. + names = dict.fromkeys([ + *from_changed(entries), + *from_specs_diff(args.specs_diff_file, args.specs_dir, renderable), + ]) + for name in names: + print(name) + + +if __name__ == "__main__": + main() diff --git a/.github/workflows/scripts/control-tower/render_and_verify.sh b/.github/workflows/scripts/control-tower/render_and_verify.sh new file mode 100755 index 00000000000..6c5c1d40d33 --- /dev/null +++ b/.github/workflows/scripts/control-tower/render_and_verify.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash +# Render changed specs and verify the rendered tree is clean. + +set -euo pipefail + +usage() { echo "Usage: $0 --output-dir DIR --changed-components-file FILE --source-commit SHA --target-commit SHA" >&2; exit 1; } + +while [[ $# -gt 0 ]]; do + case "$1" in + --output-dir) output_dir="$2"; shift 2 ;; + --changed-components-file) changed_components_file="$2"; shift 2 ;; + --source-commit) source_commit="$2"; shift 2 ;; + --target-commit) target_commit="$2"; shift 2 ;; + *) usage ;; + esac +done +[[ -z "${output_dir:-}" || -z "${changed_components_file:-}" || -z "${source_commit:-}" || -z "${target_commit:-}" ]] && usage + +# azldev's renderedSpecsDir is absolute. Translate to repo-relative +# so it matches git's output ('git diff --name-only' always emits +# repo-relative paths regardless of the path arg form). +specs_dir_abs="$(AZLDEV_ALLOW_ROOT=1 azldev config dump -q -f json | jq -r '.project.renderedSpecsDir')" +specs_dir="$(realpath --relative-to="$(pwd)" "$specs_dir_abs")" + +# Capture git diff under the specs tree so the render set can +# include components whose specs were edited directly (which +# azldev's input-fingerprint view of "changed" would miss). +# --no-renames prevents collapse of delete+add into a rename +# entry which would lose the old path. The Python script +# filters out deleted/unknown components using the full +# changed-components JSON. +mkdir -p "$output_dir" +specs_diff_file="$output_dir/specs-diff.txt" +git diff --no-renames --name-only "$target_commit" "$source_commit" -- "$specs_dir" > "$specs_diff_file" + +# Render set is the union of: +# - components flagged by 'azldev component changed' (inputs differ) +# - components whose spec tree was touched directly in the PR +changed=$(python3 .github/workflows/scripts/control-tower/compute_render_set.py \ + --changed-components-file "$changed_components_file" \ + --specs-diff-file "$specs_diff_file" \ + --specs-dir "$specs_dir") + +if [[ -z "$changed" ]]; then + echo "No changed components -- skipping render." +else + changed_count=$(echo "$changed" | wc -l) + echo "Rendering $changed_count component(s) (azldev dedupes internally)..." + echo "##[group]Render set" + # shellcheck disable=SC2001 + echo "$changed" | sed 's/^/ - /' + echo "##[endgroup]" + echo "##[group]Specs rendering + verification" + # --check-only renders to a staging area and diffs against on-disk specs. + # Exits nonzero if any rendered file differs from what's committed. + printf '%s\n' "$changed" | xargs -d '\n' env AZLDEV_ALLOW_ROOT=1 azldev component render --check-only -- + echo "##[endgroup]" +fi diff --git a/.github/workflows/scripts/control-tower/run_prcheck.py b/.github/workflows/scripts/control-tower/run_prcheck.py index 3bc8fb75f05..0a69eae685f 100644 --- a/.github/workflows/scripts/control-tower/run_prcheck.py +++ b/.github/workflows/scripts/control-tower/run_prcheck.py @@ -8,11 +8,20 @@ TimedOut / Unknown) or the local poll timeout elapses. 3. Exit 0 only if the terminal status is ``Completed``; otherwise surface the error details from the job status payload and exit 1. + +Component selection: + Pass either ``--components`` (comma-separated names) OR + ``--changed-components-file`` (path to the raw JSON output of + ``azldev component changed -a -O json``). With the file form, only + components whose ``sourcesChange`` is ``true`` and ``changeType`` is + in ``{added, changed}`` are forwarded -- those are the ones whose + lookaside tarballs need to be (re-)uploaded. """ import argparse import json import sys +from pathlib import Path from azure.identity import DefaultAzureCredential @@ -24,6 +33,53 @@ def _parse_components(value: str) -> list[str]: return [c.strip() for c in value.split(",") if c.strip()] +def _load_components_from_file(path: Path) -> list[str]: + """Filter the raw ``azldev component changed`` JSON down to the upload set. + + The "upload set" is every component whose rendered ``sources`` file + changed between the two refs (``sourcesChange == true``) AND whose + ``changeType`` is in the allow-list ``{added, changed}``. Using an + allow-list (rather than just excluding ``deleted``) mirrors the + pipeline's consistency tripwire and ensures any future unknown + ``changeType`` value fails closed rather than being forwarded to + Control Tower. + """ + ALLOWED_UPLOAD_TYPES = {"added", "changed"} + + try: + raw = path.read_text(encoding="utf-8") + except OSError as exc: + raise SystemExit( + f"##[error]Failed to read --changed-components-file {path!s}: {exc}" + ) from exc + + try: + entries = json.loads(raw) + except json.JSONDecodeError as exc: + raise SystemExit( + f"##[error]--changed-components-file {path!s} is not valid JSON: {exc}" + ) from exc + + if not isinstance(entries, list): + raise SystemExit( + f"##[error]--changed-components-file {path!s} top-level value " + f"must be a JSON array (got {type(entries).__name__})." + ) + + components: list[str] = [] + for entry in entries: + if not isinstance(entry, dict): + continue + if entry.get("changeType") not in ALLOWED_UPLOAD_TYPES: + continue + if entry.get("sourcesChange") is True: + name = entry.get("component") + if isinstance(name, str) and name: + components.append(name) + + return sorted(set(components)) + + def _parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser( description="Call the Control Tower prcheck API and wait for the job to finish.", @@ -42,12 +98,21 @@ def _parse_args() -> argparse.Namespace: help="ADO build reason (PullRequest, IndividualCI, …)", ) - parser.add_argument( + components_group = parser.add_mutually_exclusive_group(required=True) + components_group.add_argument( "--components", - required=True, type=_parse_components, help="Comma-separated list of affected component names", ) + components_group.add_argument( + "--changed-components-file", + type=Path, + help=( + "Path to the raw JSON output of 'azldev component changed -a -O json'. " + "Forwards only entries with sourcesChange == true and " + "changeType in {added, changed}." + ), + ) parser.add_argument("--source-commit", default=None, help="Source commit SHA") parser.add_argument( @@ -87,7 +152,11 @@ def main() -> None: print("##[error]--poll-timeout-seconds must be a positive integer.") sys.exit(2) - components: list[str] = args.components + components: list[str] + if args.changed_components_file is not None: + components = _load_components_from_file(args.changed_components_file) + else: + components = args.components # Normalize the base URL to avoid accidental double slashes if it was # configured with a trailing '/'. diff --git a/.github/workflows/scripts/control-tower/verify_locks.sh b/.github/workflows/scripts/control-tower/verify_locks.sh new file mode 100755 index 00000000000..736046b49b2 --- /dev/null +++ b/.github/workflows/scripts/control-tower/verify_locks.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash +# Verify all lock files are up to date. +# +# Writes the lock-update JSON to and copies it into /lock-update/ +# for triage artifact publication. Exits nonzero if any lock would change. + +set -euo pipefail + +usage() { echo "Usage: $0 --output-file FILE --publish-dir DIR" >&2; exit 1; } + +while [[ $# -gt 0 ]]; do + case "$1" in + --output-file) output_file="$2"; shift 2 ;; + --publish-dir) publish_dir="$2"; shift 2 ;; + *) usage ;; + esac +done +[[ -z "${output_file:-}" || -z "${publish_dir:-}" ]] && usage + +# Workaround for an ADO git config error. +# The config key may not be present on every agent image, so tolerate its absence. +git config --unset extensions.worktreeConfig || true + +# Full history is needed for lock resolution and spec rendering. +if [ "$(git rev-parse --is-shallow-repository)" = "true" ]; then + echo "##[group]Fetching full git history" + git fetch --unshallow + echo "##[endgroup]" +fi + +mkdir -p "$(dirname "$output_file")" + +# Publish the lock-update JSON for post-mortem triage on EVERY exit path, +# not just success -- the failure case is exactly when this artifact +# matters most. +publish_artifact() { + if [[ -s "$output_file" ]]; then + mkdir -p "$publish_dir/lock-update" + cp "$output_file" "$publish_dir/lock-update/" || true + fi +} +trap publish_artifact EXIT + +echo "##[group]Verifying lock files" +# --check-only exits nonzero if any lock is stale, without modifying the tree. +if ! AZLDEV_ALLOW_ROOT=1 azldev component update --check-only -a -q -O json > "$output_file"; then + echo "##[endgroup]" + echo "##[error]Lock file(s) are not up to date. Run 'azldev component update -a' and commit the result." + echo "##[group]Drifted components" + jq -r '.[] | select(.changed == true) | .component' "$output_file" + echo "##[endgroup]" + exit 1 +fi +echo "##[endgroup]"