Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 32 additions & 7 deletions .github/scripts/cleanup-long-running-cluster.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,17 +32,42 @@ fi

# Delete all test resources in resources without proxy resource.
if kubectl get crd resources.ucp.dev >/dev/null 2>&1; then
echo "delete all resources in resources.ucp.dev"
if [[ -n "$SKIP_RESOURCE_FILE" && -f "$SKIP_RESOURCE_FILE" ]]; then
echo "delete resources in resources.ucp.dev except entries in skip-resource-list"
else
echo "no skip-resource-list available; delete only scope.* resources in resources.ucp.dev"
fi
resources=$(kubectl get resources.ucp.dev -n radius-system --no-headers -o custom-columns=":metadata.name")
for r in $resources; do
# Skip resources if they're either scope.* or listed in skip resource file
if [[ $r == scope.local.* || $r == scope.aws.* || -z "$r" ]]; then
if [[ -z "$r" ]]; then
continue
fi

# Always skip built-in plane scopes.
if [[ $r == scope.local.* || $r == scope.aws.* ]]; then
echo "skip deletion: $r"
elif [ -n "$SKIP_RESOURCE_FILE" ] && [ -f "$SKIP_RESOURCE_FILE" ] && grep -q "$r" "$SKIP_RESOURCE_FILE"; then
echo "Skip deletion: $r (found in skip-resource-list $SKIP_RESOURCE_FILE)"
else
echo "deleting resource: $r"
continue
fi

# If a skip-resource file is available, use it to protect system resources.
if [ -n "$SKIP_RESOURCE_FILE" ] && [ -f "$SKIP_RESOURCE_FILE" ]; then
if grep -F -x -q -- "$r" "$SKIP_RESOURCE_FILE"; then
echo "skip deletion: $r (found in skip-resource-list $SKIP_RESOURCE_FILE)"
else
echo "deleting resource: $r"
kubectl delete resources.ucp.dev "$r" -n radius-system --ignore-not-found=true --wait=false
fi
continue
fi

# No skip-resource file: only delete scope entries (test resource groups).
# Non-scope resources (resource.*) may include system-critical entries
# that must not be deleted without a valid skip list.
if [[ $r == scope.* ]]; then
echo "deleting resource: $r (no skip list, scope entry)"
kubectl delete resources.ucp.dev "$r" -n radius-system --ignore-not-found=true --wait=false
else
echo "skip deletion: $r (no skip list available, preserving non-scope resource)"
fi
done
fi
Expand Down
72 changes: 71 additions & 1 deletion .github/scripts/manage-radius-installation.sh
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,46 @@ verify_manifests_registered() {
echo "Manifest verification complete."
}

# Actively verify that resource types are registered and the Radius API is
# able to serve requests. Unlike verify_manifests_registered (which reads
# historical pod logs), this makes a live API call.
# Returns: 0 = healthy, 1 = provider missing, 2 = query failed
verify_resource_types_available() {
echo ""
echo "Verifying resource types are available..."

# Ensure a workspace exists so rad CLI can reach the cluster.
local workspace_output workspace_exit_code
workspace_output=$(rad workspace create kubernetes --force 2>&1) &&
workspace_exit_code=0 || workspace_exit_code=$?

if [[ ${workspace_exit_code} -ne 0 ]]; then
echo "ERROR: Failed to create Radius Kubernetes workspace (exit code: ${workspace_exit_code})."
echo "rad workspace create output: ${workspace_output}"
return 2
fi

Comment thread
willdavsmith marked this conversation as resolved.
# List registered resource providers. Applications.Core must be present
# for environment/container operations to work.
local output exit_code
output=$(rad resource-provider list 2>&1) && exit_code=0 || exit_code=$?

if [[ ${exit_code} -ne 0 ]]; then
echo "ERROR: Failed to query registered resource providers (exit code: ${exit_code})."
echo "rad resource-provider list output: ${output}"
return 2
fi

if echo "${output}" | grep -Fq "Applications.Core"; then
echo "Resource types are available (Applications.Core provider found)."
return 0
fi

echo "ERROR: Applications.Core resource provider is NOT registered."
echo "rad resource-provider list output: ${output}"
return 1
}

# Save the list of Radius UCP resources to skip-delete-resources-list.txt
# This file is used by the cleanup job to avoid deleting Radius-managed resources.
save_skip_resources_list() {
Expand Down Expand Up @@ -196,7 +236,37 @@ main() {
install_radius
elif [[ "${cp_version}" == "${cli_version}" ]]; then
echo ""
echo "Radius control plane version matches CLI version (${cli_version}). No action needed."
echo "Radius control plane version matches CLI version (${cli_version}). Skipping install/upgrade."

# Verify resource types with retry for transient failures.
local check_result=0
verify_resource_types_available || check_result=$?

if [[ ${check_result} -eq 2 ]]; then
# Query failed (connectivity/auth issue). Retry once after a brief wait
# before taking destructive action.
echo ""
echo "Resource type query failed. Retrying in 30 seconds..."
sleep 30
check_result=0
verify_resource_types_available || check_result=$?
fi

if [[ ${check_result} -eq 0 ]]; then
save_skip_resources_list
elif [[ ${check_result} -eq 1 ]]; then
echo ""
echo "Resource types missing despite matching versions. Reinstalling Radius..."
if ! rad uninstall kubernetes --purge --yes; then
echo "Warning: Uninstall failed, continuing with install attempt..."
fi
install_radius
else
echo ""
echo "ERROR: Unable to verify resource types after retry."
echo "This may indicate a connectivity or authentication issue."
exit 1
fi
Comment thread
willdavsmith marked this conversation as resolved.
else
echo ""
echo "Version mismatch detected. Attempting upgrade from ${cp_version} to ${cli_version}..."
Expand Down
Loading