From 2144eb6fc8e4d029da8548019321158415b07a41 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Wed, 23 Jul 2025 07:39:53 +0000 Subject: [PATCH 1/7] chore(deps): update dependency flux (#648) Co-authored-by: rmvangun <85766511+rmvangun@users.noreply.github.com> Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> --- .github/workflows/ci.yaml | 7 ++- contexts/.gitignore | 2 +- .../prometheus/flux/helm-release.yaml | 2 +- terraform/cluster/talos/README.md | 5 +- terraform/cluster/talos/main.tf | 50 +++-------------- .../talos/modules/machine/.terraform.lock.hcl | 19 +++++++ .../cluster/talos/modules/machine/main.tf | 30 +++++++++++ .../talos/modules/machine/test.tftest.hcl | 14 +++-- .../talos/modules/machine/variables.tf | 11 ++++ .../cluster/talos/resources/healthcheck.ps1 | 34 ------------ .../cluster/talos/resources/healthcheck.sh | 53 ------------------- terraform/cluster/talos/test.tftest.hcl | 4 +- terraform/cluster/talos/variables.tf | 10 ---- terraform/gitops/flux/variables.tf | 4 +- 14 files changed, 88 insertions(+), 157 deletions(-) delete mode 100644 terraform/cluster/talos/resources/healthcheck.ps1 delete mode 100755 terraform/cluster/talos/resources/healthcheck.sh diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 7b5ce54e..348d4dc3 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -59,7 +59,12 @@ jobs: - name: Run shellcheck run: | sudo apt-get install -y shellcheck - find . -name "*.sh" -print0 | xargs -0 shellcheck + shell_files=$(find . -name "*.sh" -print) + if [ -n "$shell_files" ]; then + echo "$shell_files" | xargs shellcheck + else + echo "No shell scripts found to check" + fi - name: Setup Terraform uses: hashicorp/setup-terraform@b9cd54a3c349d3f38e8881555d616ced269862dd # v3.1.2 diff --git a/contexts/.gitignore b/contexts/.gitignore index ca8d105b..4d93ab0a 100644 --- a/contexts/.gitignore +++ b/contexts/.gitignore @@ -1 +1 @@ -/_template/ +!/_template/ diff --git a/kustomize/telemetry/resources/prometheus/flux/helm-release.yaml b/kustomize/telemetry/resources/prometheus/flux/helm-release.yaml index 29a281fd..ba8cc08f 100644 --- a/kustomize/telemetry/resources/prometheus/flux/helm-release.yaml +++ b/kustomize/telemetry/resources/prometheus/flux/helm-release.yaml @@ -10,7 +10,7 @@ spec: spec: chart: flux2 # renovate: datasource=helm depName=flux package=flux2 helmRepo=https://fluxcd-community.github.io/helm-charts - version: 2.16.2 + version: 2.16.3 sourceRef: kind: HelmRepository name: fluxcd-community diff --git a/terraform/cluster/talos/README.md b/terraform/cluster/talos/README.md index 38606138..73f53b19 100644 --- a/terraform/cluster/talos/README.md +++ b/terraform/cluster/talos/README.md @@ -11,7 +11,6 @@ | Name | Version | |------|---------| | [local](#provider\_local) | 2.5.3 | -| [null](#provider\_null) | 3.2.4 | | [talos](#provider\_talos) | 0.8.1 | ## Modules @@ -28,7 +27,6 @@ |------|------| | [local_sensitive_file.kubeconfig](https://registry.terraform.io/providers/hashicorp/local/latest/docs/resources/sensitive_file) | resource | | [local_sensitive_file.talosconfig](https://registry.terraform.io/providers/hashicorp/local/latest/docs/resources/sensitive_file) | resource | -| [null_resource.healthcheck](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource | | [talos_cluster_kubeconfig.this](https://registry.terraform.io/providers/siderolabs/talos/0.8.1/docs/resources/cluster_kubeconfig) | resource | | [talos_machine_secrets.this](https://registry.terraform.io/providers/siderolabs/talos/0.8.1/docs/resources/machine_secrets) | resource | | [talos_client_configuration.this](https://registry.terraform.io/providers/siderolabs/talos/0.8.1/docs/data-sources/client_configuration) | data source | @@ -43,8 +41,7 @@ | [context\_path](#input\_context\_path) | The path to the context folder, where kubeconfig and talosconfig are stored | `string` | `""` | no | | [controlplane\_config\_patches](#input\_controlplane\_config\_patches) | A YAML string of controlplane config patches to apply. Can be an empty string or valid YAML. | `string` | `""` | no | | [controlplanes](#input\_controlplanes) | A list of machine configuration details for control planes. |
list(object({
hostname = optional(string)
endpoint = string
node = string
disk_selector = optional(object({
busPath = optional(string)
modalias = optional(string)
model = optional(string)
name = optional(string)
serial = optional(string)
size = optional(string)
type = optional(string)
uuid = optional(string)
wwid = optional(string)
}))
wipe_disk = optional(bool, true)
extra_kernel_args = optional(list(string), [])
config_patches = optional(string, "")
}))
| `[]` | no | -| [kubernetes\_version](#input\_kubernetes\_version) | The kubernetes version to deploy. | `string` | `"1.33.2"` | no | -| [os\_type](#input\_os\_type) | The operating system type, must be either 'unix' or 'windows' | `string` | `"unix"` | no | +| [kubernetes\_version](#input\_kubernetes\_version) | The kubernetes version to deploy. | `string` | `"1.33.3"` | no | | [talos\_version](#input\_talos\_version) | The talos version to deploy. | `string` | `"1.10.5"` | no | | [worker\_config\_patches](#input\_worker\_config\_patches) | A YAML string of worker config patches to apply. Can be an empty string or valid YAML. | `string` | `""` | no | | [workers](#input\_workers) | A list of machine configuration details |
list(object({
hostname = optional(string)
endpoint = string
node = string
disk_selector = optional(object({
busPath = optional(string)
modalias = optional(string)
model = optional(string)
name = optional(string)
serial = optional(string)
size = optional(string)
type = optional(string)
uuid = optional(string)
wwid = optional(string)
}))
wipe_disk = optional(bool, true)
extra_kernel_args = optional(list(string), [])
config_patches = optional(string, "")
}))
| `[]` | no | diff --git a/terraform/cluster/talos/main.tf b/terraform/cluster/talos/main.tf index b697ec39..98038977 100644 --- a/terraform/cluster/talos/main.tf +++ b/terraform/cluster/talos/main.tf @@ -50,6 +50,8 @@ module "controlplane_bootstrap" { machine_type = "controlplane" endpoint = var.controlplanes[0].endpoint bootstrap = true // Bootstrap the first control plane node + talosconfig_path = local.talosconfig_path + enable_health_check = true config_patches = compact(concat([ var.common_config_patches, var.controlplane_config_patches, @@ -76,6 +78,8 @@ module "controlplanes" { machine_type = "controlplane" endpoint = var.controlplanes[count.index + 1].endpoint bootstrap = false // Do not bootstrap other control plane nodes + talosconfig_path = local.talosconfig_path + enable_health_check = true config_patches = compact(concat([ var.common_config_patches, var.controlplane_config_patches, @@ -105,6 +109,8 @@ module "workers" { talos_version = var.talos_version machine_type = "worker" endpoint = var.workers[count.index].endpoint + talosconfig_path = local.talosconfig_path + enable_health_check = true config_patches = compact(concat([ var.common_config_patches, var.worker_config_patches, @@ -157,48 +163,4 @@ resource "local_sensitive_file" "talosconfig" { } } -#----------------------------------------------------------------------------------------------------------------------- -# Cluster Health -#----------------------------------------------------------------------------------------------------------------------- - -# The following workaround is required until resolution of https://github.com/siderolabs/terraform-provider-talos/issues/221 - -# data "talos_cluster_health" "this" { -# depends_on = [ -# module.controlplane_bootstrap, -# module.controlplanes, -# module.workers -# ] -# client_configuration = talos_machine_secrets.this.client_configuration -# control_plane_nodes = var.controlplanes.*.node -# worker_nodes = var.workers.*.node -# endpoints = var.controlplanes.*.endpoint -# } - -locals { - healthcheck_command = var.os_type == "unix" ? "${path.module}/resources/healthcheck.sh" : "& { & '${path.module}/resources/healthcheck.ps1' }" - healthcheck_interpreter = var.os_type == "unix" ? ["sh", "-c"] : ["powershell", "-Command"] -} - -resource "null_resource" "healthcheck" { - triggers = { - always_run = timestamp() // Ensures the resource runs every time - } - - depends_on = [ - local_sensitive_file.kubeconfig, - local_sensitive_file.talosconfig - ] - - provisioner "local-exec" { - command = local.healthcheck_command - interpreter = local.healthcheck_interpreter - environment = { - KUBECONFIG = local.kubeconfig_path - NODE_COUNT = length(var.controlplanes) + length(var.workers) - TIMEOUT = 300 # 5 minutes - INTERVAL = 5 # 5 seconds - } - } -} diff --git a/terraform/cluster/talos/modules/machine/.terraform.lock.hcl b/terraform/cluster/talos/modules/machine/.terraform.lock.hcl index 515ed004..8c436b2f 100644 --- a/terraform/cluster/talos/modules/machine/.terraform.lock.hcl +++ b/terraform/cluster/talos/modules/machine/.terraform.lock.hcl @@ -1,6 +1,25 @@ # This file is maintained automatically by "terraform init". # Manual edits may be lost in future updates. +provider "registry.terraform.io/hashicorp/null" { + version = "3.2.4" + hashes = [ + "h1:L5V05xwp/Gto1leRryuesxjMfgZwjb7oool4WS1UEFQ=", + "zh:59f6b52ab4ff35739647f9509ee6d93d7c032985d9f8c6237d1f8a59471bbbe2", + "zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3", + "zh:795c897119ff082133150121d39ff26cb5f89a730a2c8c26f3a9c1abf81a9c43", + "zh:7b9c7b16f118fbc2b05a983817b8ce2f86df125857966ad356353baf4bff5c0a", + "zh:85e33ab43e0e1726e5f97a874b8e24820b6565ff8076523cc2922ba671492991", + "zh:9d32ac3619cfc93eb3c4f423492a8e0f79db05fec58e449dee9b2d5873d5f69f", + "zh:9e15c3c9dd8e0d1e3731841d44c34571b6c97f5b95e8296a45318b94e5287a6e", + "zh:b4c2ab35d1b7696c30b64bf2c0f3a62329107bd1a9121ce70683dec58af19615", + "zh:c43723e8cc65bcdf5e0c92581dcbbdcbdcf18b8d2037406a5f2033b1e22de442", + "zh:ceb5495d9c31bfb299d246ab333f08c7fb0d67a4f82681fbf47f2a21c3e11ab5", + "zh:e171026b3659305c558d9804062762d168f50ba02b88b231d20ec99578a6233f", + "zh:ed0fe2acdb61330b01841fa790be00ec6beaac91d41f311fb8254f74eb6a711f", + ] +} + provider "registry.terraform.io/siderolabs/talos" { version = "0.8.0" hashes = [ diff --git a/terraform/cluster/talos/modules/machine/main.tf b/terraform/cluster/talos/modules/machine/main.tf index 1bb0a832..f8ab6323 100644 --- a/terraform/cluster/talos/modules/machine/main.tf +++ b/terraform/cluster/talos/modules/machine/main.tf @@ -8,6 +8,9 @@ terraform { talos = { source = "siderolabs/talos" } + null = { + source = "hashicorp/null" + } } } @@ -74,3 +77,30 @@ resource "talos_machine_bootstrap" "bootstrap" { endpoint = var.endpoint client_configuration = var.client_configuration } + +#----------------------------------------------------------------------------------------------------------------------- +# Node Health Check +#----------------------------------------------------------------------------------------------------------------------- + +locals { + # Use hostname if available, otherwise fall back to node address + node_name = var.hostname != null && var.hostname != "" ? var.hostname : var.node +} + +resource "null_resource" "node_healthcheck" { + triggers = { + node_id = var.node + } + + depends_on = [ + talos_machine_configuration_apply.this, + talos_machine_bootstrap.bootstrap + ] + + provisioner "local-exec" { + command = var.enable_health_check ? "windsor check node-health --nodes ${local.node_name} --timeout 5m" : "echo 'Health check disabled for testing'" + environment = var.enable_health_check ? { + TALOSCONFIG = var.talosconfig_path + } : {} + } +} diff --git a/terraform/cluster/talos/modules/machine/test.tftest.hcl b/terraform/cluster/talos/modules/machine/test.tftest.hcl index f5132728..bff71892 100644 --- a/terraform/cluster/talos/modules/machine/test.tftest.hcl +++ b/terraform/cluster/talos/modules/machine/test.tftest.hcl @@ -4,6 +4,10 @@ mock_provider "talos" { mock_resource "talos_machine_bootstrap" {} } +mock_provider "null" { + mock_resource "null_resource" {} +} + variables { machine_type = "controlplane" endpoint = "dummy" @@ -47,10 +51,12 @@ variables { token = "dummy" } } - cluster_name = "dummy" - cluster_endpoint = "https://dummy" - kubernetes_version = "dummy" - talos_version = "1.10.1" + cluster_name = "dummy" + cluster_endpoint = "https://dummy" + kubernetes_version = "dummy" + talos_version = "1.10.1" + talosconfig_path = "/tmp/dummy-talosconfig" + enable_health_check = false } run "machine_config_patch_with_disk_and_hostname" { diff --git a/terraform/cluster/talos/modules/machine/variables.tf b/terraform/cluster/talos/modules/machine/variables.tf index 384bfc92..54ae088a 100644 --- a/terraform/cluster/talos/modules/machine/variables.tf +++ b/terraform/cluster/talos/modules/machine/variables.tf @@ -112,3 +112,14 @@ variable "bootstrap" { type = bool default = false } + +variable "talosconfig_path" { + description = "Path to the talosconfig file for health checking." + type = string +} + +variable "enable_health_check" { + description = "Whether to enable health checking for this node." + type = bool + default = true +} diff --git a/terraform/cluster/talos/resources/healthcheck.ps1 b/terraform/cluster/talos/resources/healthcheck.ps1 deleted file mode 100644 index d80241d0..00000000 --- a/terraform/cluster/talos/resources/healthcheck.ps1 +++ /dev/null @@ -1,34 +0,0 @@ -# Number of nodes to check for readiness -[int]$NODE_COUNT = if ($env:NODE_COUNT -ne $null) { [int]$env:NODE_COUNT } else { (kubectl get nodes --no-headers 2>$null | Where-Object { $_.Trim() -ne "" } | Measure-Object | Select-Object -ExpandProperty Count) } -[int]$TIMEOUT = if ($env:TIMEOUT -ne $null) { [int]$env:TIMEOUT } else { 300 } # Default timeout of 300 seconds -[int]$INTERVAL = if ($env:INTERVAL -ne $null) { [int]$env:INTERVAL } else { 10 } # Default check interval of 10 seconds - -$start_time = Get-Date -$previous_ready_count = 0 - -Write-Host "Waiting for $NODE_COUNT nodes to be ready..." - -while ($true) { - $ready_nodes = kubectl get nodes --no-headers 2>$null | Where-Object { $_ -match '\sReady\s' } | ForEach-Object { $_.Split(' ')[0] } - $ready_count = $ready_nodes.Count - - if ($ready_count -ne $previous_ready_count) { - Write-Host "$ready_count / $NODE_COUNT nodes are ready" - $previous_ready_count = $ready_count - } - - if ($ready_count -eq $NODE_COUNT) { - Write-Host "All nodes are ready" - exit 0 - } - - $current_time = Get-Date - $elapsed_time = ($current_time - $start_time).TotalSeconds - - if ($elapsed_time -ge $TIMEOUT) { - Write-Host "Timeout reached: Not all nodes are ready" - exit 1 - } - - Start-Sleep -Seconds $INTERVAL -} diff --git a/terraform/cluster/talos/resources/healthcheck.sh b/terraform/cluster/talos/resources/healthcheck.sh deleted file mode 100755 index 3860cc1d..00000000 --- a/terraform/cluster/talos/resources/healthcheck.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env sh - -# Set the number of nodes to check for readiness. If not provided, default to the current number of nodes. -NODE_COUNT=${NODE_COUNT:-$(kubectl get nodes --no-headers 2>/dev/null | awk 'NF' | wc -l)} -# Set the timeout period in seconds. Default is 300 seconds (5 minutes). -TIMEOUT=${TIMEOUT:-300} -# Set the interval between readiness checks in seconds. Default is 10 seconds. -INTERVAL=${INTERVAL:-10} - -# Record the start time of the script to calculate elapsed time later. -start_time=$(date +%s) -# Initialize the previous ready count to track changes in node readiness. -previous_ready_count=0 - -# Inform the user about the number of nodes expected to be ready. -echo "Waiting for $NODE_COUNT nodes to be ready" - -# Continuously check the readiness of nodes. -while true; do - # Attempt to get the list of nodes that are in the 'Ready' state. - if ready_nodes=$(kubectl get nodes --no-headers 2>/dev/null | awk '$2 == "Ready" {print $1}'); then - # Count the number of nodes that are ready. - ready_count=$(echo "$ready_nodes" | awk 'NF' | wc -l) - else - # If the command fails, assume no nodes are ready. - ready_count=0 - fi - - # If the number of ready nodes has changed, print the current status. - if [ "$ready_count" -ne "$previous_ready_count" ]; then - echo "$ready_count / $NODE_COUNT nodes are ready" - previous_ready_count=$ready_count - fi - - # If all nodes are ready, exit the script successfully. - if [ "$ready_count" -eq "$NODE_COUNT" ]; then - echo "All nodes are ready" - exit 0 - fi - - # Calculate the elapsed time since the script started. - current_time=$(date +%s) - elapsed_time=$((current_time - start_time)) - - # If the elapsed time exceeds the timeout, exit the script with an error. - if [ "$elapsed_time" -ge "$TIMEOUT" ]; then - echo "Timeout reached: Not all nodes are ready" - exit 1 - fi - - # Wait for the specified interval before checking again. - sleep "$INTERVAL" -done diff --git a/terraform/cluster/talos/test.tftest.hcl b/terraform/cluster/talos/test.tftest.hcl index 9f7ab08f..3d5289c5 100644 --- a/terraform/cluster/talos/test.tftest.hcl +++ b/terraform/cluster/talos/test.tftest.hcl @@ -215,12 +215,11 @@ run "no_config_files" { } # Verifies that all input validation rules are enforced simultaneously, ensuring that -# invalid values for os_type, kubernetes_version, talos_version, cluster_name, +# invalid values for kubernetes_version, talos_version, cluster_name, # cluster_endpoint, and YAML configs are properly caught and reported run "multiple_invalid_inputs" { command = plan expect_failures = [ - var.os_type, var.kubernetes_version, var.talos_version, var.cluster_name, @@ -232,7 +231,6 @@ run "multiple_invalid_inputs" { var.workers, ] variables { - os_type = "macos" kubernetes_version = "v1.33" talos_version = "v1.10.1" cluster_name = "" diff --git a/terraform/cluster/talos/variables.tf b/terraform/cluster/talos/variables.tf index 9c1e5b19..c83d9f81 100644 --- a/terraform/cluster/talos/variables.tf +++ b/terraform/cluster/talos/variables.tf @@ -4,16 +4,6 @@ variable "context_path" { default = "" } -variable "os_type" { - description = "The operating system type, must be either 'unix' or 'windows'" - type = string - default = "unix" - validation { - condition = var.os_type == "unix" || var.os_type == "windows" - error_message = "The operating system type must be either 'unix' or 'windows'." - } -} - variable "kubernetes_version" { description = "The kubernetes version to deploy." type = string diff --git a/terraform/gitops/flux/variables.tf b/terraform/gitops/flux/variables.tf index 0edbbf3f..c861d908 100644 --- a/terraform/gitops/flux/variables.tf +++ b/terraform/gitops/flux/variables.tf @@ -8,7 +8,7 @@ variable "flux_helm_version" { description = "The version of Flux Helm chart to install" type = string # renovate: datasource=helm depName=flux package=flux2 helmRepo=https://fluxcd-community.github.io/helm-charts - default = "2.16.2" + default = "2.16.3" validation { condition = can(regex("^[0-9]+\\.[0-9]+\\.[0-9]+$", var.flux_helm_version)) @@ -20,7 +20,7 @@ variable "flux_version" { description = "The version of Flux to install" type = string # renovate: datasource=github-releases depName=flux package=fluxcd/flux2 - default = "2.6.3" + default = "2.6.4" validation { condition = can(regex("^[0-9]+\\.[0-9]+\\.[0-9]+$", var.flux_version)) From 990cd1b43d1367f644fa85c578d0001dbef38ac9 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Wed, 23 Jul 2025 14:56:50 +0000 Subject: [PATCH 2/7] chore(deps): update dependency aws/aws-cli to v2.27.57 (#689) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> --- aqua.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aqua.yaml b/aqua.yaml index 0c9eecb6..3d6431c2 100644 --- a/aqua.yaml +++ b/aqua.yaml @@ -22,7 +22,7 @@ packages: - name: lima-vm/lima@v1.2.0 - name: docker/cli@v27.4.1 - name: docker/compose@v2.38.2 - - name: aws/aws-cli@2.27.55 + - name: aws/aws-cli@2.27.57 - name: helm/helm@v3.18.4 - name: fluxcd/flux2@v2.6.4 - name: hashicorp/vault@v1.20.0 From 6216e5c141b438c18483114e3af2e161b8e10ad8 Mon Sep 17 00:00:00 2001 From: Ryan VanGundy Date: Wed, 23 Jul 2025 17:08:40 -0400 Subject: [PATCH 3/7] fix(cluster/talos): Add k8s endpoint health check The windsor command now supports including k8s api endpoints in its node health check. Leveraging this may help prevent issues when the cluster nodes are healthy but the API isn't yet accessible. In order to use this routine, the kubeconfig must have been created. It makes sense to perform the kubeconfig generation inside the machine module when `bootstrap = true`. Also sets parallelism to 1 in the template when building the talos cluster. This will be leveraged in the future for upgrades, but is appropriate to establish as part of the sequenced health checking routine. --- contexts/_template/blueprint.jsonnet | 17 +-- terraform/cluster/talos/main.tf | 29 +---- .../talos/modules/machine/.terraform.lock.hcl | 19 +++ .../cluster/talos/modules/machine/main.tf | 38 +++++- .../cluster/talos/modules/machine/output.tf | 6 + .../talos/modules/machine/test.tftest.hcl | 117 ++++++++++++++++++ .../talos/modules/machine/variables.tf | 6 + terraform/cluster/talos/test.tftest.hcl | 10 +- 8 files changed, 197 insertions(+), 45 deletions(-) diff --git a/contexts/_template/blueprint.jsonnet b/contexts/_template/blueprint.jsonnet index b2eaae0d..d5007aad 100644 --- a/contexts/_template/blueprint.jsonnet +++ b/contexts/_template/blueprint.jsonnet @@ -53,15 +53,16 @@ local terraformConfigs = { "local": [ { path: "cluster/talos", + parallelism: 1, }, { path: "gitops/flux", destroy: false, - values: { + values: if rawProvider == "local" then { git_username: "local", - git_password: "local", + git_password: "local", webhook_token: "abcdef123456", - }, + } else {}, } ] }; @@ -431,15 +432,7 @@ local blueprintMetadata = { }; // Source configuration -local sourceConfig = [ - { - name: "core", - url: "github.com/windsorcli/core", - ref: { - branch: "main", - }, - }, -]; +local sourceConfig = []; // Start of Blueprint blueprintMetadata + { diff --git a/terraform/cluster/talos/main.tf b/terraform/cluster/talos/main.tf index 98038977..8f669430 100644 --- a/terraform/cluster/talos/main.tf +++ b/terraform/cluster/talos/main.tf @@ -6,6 +6,9 @@ terraform { source = "siderolabs/talos" version = "0.8.1" } + local = { + source = "hashicorp/local" + } } } @@ -24,7 +27,6 @@ resource "talos_machine_secrets" "this" { locals { // Local variables for configuration paths and data talosconfig = data.talos_client_configuration.this.talos_config - kubeconfig = talos_cluster_kubeconfig.this.kubeconfig_raw talosconfig_path = "${var.context_path}/.talos/config" kubeconfig_path = "${var.context_path}/.kube/config" @@ -51,6 +53,7 @@ module "controlplane_bootstrap" { endpoint = var.controlplanes[0].endpoint bootstrap = true // Bootstrap the first control plane node talosconfig_path = local.talosconfig_path + kubeconfig_path = local.kubeconfig_path enable_health_check = true config_patches = compact(concat([ var.common_config_patches, @@ -79,6 +82,7 @@ module "controlplanes" { endpoint = var.controlplanes[count.index + 1].endpoint bootstrap = false // Do not bootstrap other control plane nodes talosconfig_path = local.talosconfig_path + kubeconfig_path = local.kubeconfig_path enable_health_check = true config_patches = compact(concat([ var.common_config_patches, @@ -110,6 +114,7 @@ module "workers" { machine_type = "worker" endpoint = var.workers[count.index].endpoint talosconfig_path = local.talosconfig_path + kubeconfig_path = local.kubeconfig_path enable_health_check = true config_patches = compact(concat([ var.common_config_patches, @@ -122,34 +127,12 @@ module "workers" { # Config Files #----------------------------------------------------------------------------------------------------------------------- -resource "talos_cluster_kubeconfig" "this" { - depends_on = [module.controlplane_bootstrap] - - client_configuration = talos_machine_secrets.this.client_configuration - node = var.controlplanes[0].node - endpoint = var.controlplanes[0].endpoint -} - data "talos_client_configuration" "this" { cluster_name = var.cluster_name client_configuration = talos_machine_secrets.this.client_configuration endpoints = var.controlplanes.*.endpoint } -// Write kubeconfig to a local file -resource "local_sensitive_file" "kubeconfig" { - count = trim(var.context_path, " ") != "" ? 1 : 0 // Create file only if path is specified and not empty/whitespace - depends_on = [local_sensitive_file.talosconfig] // Ensure Talos config is written first - - content = talos_cluster_kubeconfig.this.kubeconfig_raw - filename = local.kubeconfig_path - file_permission = "0600" // Set file permissions to read/write for owner only - - lifecycle { - ignore_changes = [content] // Ignore changes to content to prevent unnecessary updates - } -} - // Write Talos config to a local file resource "local_sensitive_file" "talosconfig" { count = trim(var.context_path, " ") != "" ? 1 : 0 // Create file only if path is specified and not empty/whitespace diff --git a/terraform/cluster/talos/modules/machine/.terraform.lock.hcl b/terraform/cluster/talos/modules/machine/.terraform.lock.hcl index 8c436b2f..2418f380 100644 --- a/terraform/cluster/talos/modules/machine/.terraform.lock.hcl +++ b/terraform/cluster/talos/modules/machine/.terraform.lock.hcl @@ -1,6 +1,25 @@ # This file is maintained automatically by "terraform init". # Manual edits may be lost in future updates. +provider "registry.terraform.io/hashicorp/local" { + version = "2.5.3" + hashes = [ + "h1:MCzg+hs1/ZQ32u56VzJMWP9ONRQPAAqAjuHuzbyshvI=", + "zh:284d4b5b572eacd456e605e94372f740f6de27b71b4e1fd49b63745d8ecd4927", + "zh:40d9dfc9c549e406b5aab73c023aa485633c1b6b730c933d7bcc2fa67fd1ae6e", + "zh:6243509bb208656eb9dc17d3c525c89acdd27f08def427a0dce22d5db90a4c8b", + "zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3", + "zh:885d85869f927853b6fe330e235cd03c337ac3b933b0d9ae827ec32fa1fdcdbf", + "zh:bab66af51039bdfcccf85b25fe562cbba2f54f6b3812202f4873ade834ec201d", + "zh:c505ff1bf9442a889ac7dca3ac05a8ee6f852e0118dd9a61796a2f6ff4837f09", + "zh:d36c0b5770841ddb6eaf0499ba3de48e5d4fc99f4829b6ab66b0fab59b1aaf4f", + "zh:ddb6a407c7f3ec63efb4dad5f948b54f7f4434ee1a2607a49680d494b1776fe1", + "zh:e0dafdd4500bec23d3ff221e3a9b60621c5273e5df867bc59ef6b7e41f5c91f6", + "zh:ece8742fd2882a8fc9d6efd20e2590010d43db386b920b2a9c220cfecc18de47", + "zh:f4c6b3eb8f39105004cf720e202f04f57e3578441cfb76ca27611139bc116a82", + ] +} + provider "registry.terraform.io/hashicorp/null" { version = "3.2.4" hashes = [ diff --git a/terraform/cluster/talos/modules/machine/main.tf b/terraform/cluster/talos/modules/machine/main.tf index f8ab6323..d6d0f030 100644 --- a/terraform/cluster/talos/modules/machine/main.tf +++ b/terraform/cluster/talos/modules/machine/main.tf @@ -11,6 +11,9 @@ terraform { null = { source = "hashicorp/null" } + local = { + source = "hashicorp/local" + } } } @@ -78,6 +81,32 @@ resource "talos_machine_bootstrap" "bootstrap" { client_configuration = var.client_configuration } +#----------------------------------------------------------------------------------------------------------------------- +# Kubeconfig Generation +#----------------------------------------------------------------------------------------------------------------------- + +resource "talos_cluster_kubeconfig" "this" { + count = var.bootstrap ? 1 : 0 + depends_on = [talos_machine_bootstrap.bootstrap] + + client_configuration = var.client_configuration + node = var.node + endpoint = var.endpoint +} + +// Write kubeconfig to a local file when bootstrap is true +resource "local_sensitive_file" "kubeconfig" { + count = var.bootstrap && trim(var.kubeconfig_path, " ") != "" ? 1 : 0 + + content = talos_cluster_kubeconfig.this[0].kubeconfig_raw + filename = var.kubeconfig_path + file_permission = "0600" // Set file permissions to read/write for owner only + + lifecycle { + ignore_changes = [content] // Ignore changes to content to prevent unnecessary updates + } +} + #----------------------------------------------------------------------------------------------------------------------- # Node Health Check #----------------------------------------------------------------------------------------------------------------------- @@ -85,6 +114,9 @@ resource "talos_machine_bootstrap" "bootstrap" { locals { # Use hostname if available, otherwise fall back to node address node_name = var.hostname != null && var.hostname != "" ? var.hostname : var.node + + # Always use Talos API; during bootstrap also check Kubernetes API + health_check_command = var.bootstrap ? "windsor check node-health --nodes ${local.node_name} --timeout 5m --k8s-endpoint" : "windsor check node-health --nodes ${local.node_name} --timeout 5m" } resource "null_resource" "node_healthcheck" { @@ -94,13 +126,15 @@ resource "null_resource" "node_healthcheck" { depends_on = [ talos_machine_configuration_apply.this, - talos_machine_bootstrap.bootstrap + talos_machine_bootstrap.bootstrap, + local_sensitive_file.kubeconfig ] provisioner "local-exec" { - command = var.enable_health_check ? "windsor check node-health --nodes ${local.node_name} --timeout 5m" : "echo 'Health check disabled for testing'" + command = var.enable_health_check ? local.health_check_command : "echo 'Health check disabled'" environment = var.enable_health_check ? { TALOSCONFIG = var.talosconfig_path + KUBECONFIG = var.bootstrap ? var.kubeconfig_path : "" } : {} } } diff --git a/terraform/cluster/talos/modules/machine/output.tf b/terraform/cluster/talos/modules/machine/output.tf index 607b7052..3434f2e9 100644 --- a/terraform/cluster/talos/modules/machine/output.tf +++ b/terraform/cluster/talos/modules/machine/output.tf @@ -5,3 +5,9 @@ output "node" { output "endpoint" { value = var.endpoint } + +output "kubeconfig" { + description = "The generated kubeconfig when bootstrap is true" + value = var.bootstrap ? talos_cluster_kubeconfig.this[0].kubeconfig_raw : null + sensitive = true +} diff --git a/terraform/cluster/talos/modules/machine/test.tftest.hcl b/terraform/cluster/talos/modules/machine/test.tftest.hcl index bff71892..8ee26e41 100644 --- a/terraform/cluster/talos/modules/machine/test.tftest.hcl +++ b/terraform/cluster/talos/modules/machine/test.tftest.hcl @@ -2,12 +2,17 @@ mock_provider "talos" { mock_resource "talos_machine_configuration" {} mock_resource "talos_machine_configuration_apply" {} mock_resource "talos_machine_bootstrap" {} + mock_resource "talos_cluster_kubeconfig" {} } mock_provider "null" { mock_resource "null_resource" {} } +mock_provider "local" { + mock_resource "local_sensitive_file" {} +} + variables { machine_type = "controlplane" endpoint = "dummy" @@ -56,6 +61,7 @@ variables { kubernetes_version = "dummy" talos_version = "1.10.1" talosconfig_path = "/tmp/dummy-talosconfig" + kubeconfig_path = "" enable_health_check = false } @@ -162,3 +168,114 @@ run "config_patches_includes_extra" { error_message = "Should include nameservers in extra patch" } } + +run "bootstrap_mode_generates_kubeconfig" { + variables { + bootstrap = true + kubeconfig_path = "/tmp/test-kubeconfig" + disk_selector = null + hostname = "test-node" + } + + assert { + condition = length(talos_cluster_kubeconfig.this) == 1 + error_message = "Should create kubeconfig resource when bootstrap is true" + } + + assert { + condition = length(local_sensitive_file.kubeconfig) == 1 + error_message = "Should create kubeconfig file when bootstrap is true and path is provided" + } + + assert { + condition = local_sensitive_file.kubeconfig[0].filename == "/tmp/test-kubeconfig" + error_message = "Should write kubeconfig to specified path" + } +} + +run "non_bootstrap_mode_no_kubeconfig" { + variables { + bootstrap = false + kubeconfig_path = "/tmp/test-kubeconfig" + disk_selector = null + hostname = "test-node" + } + + assert { + condition = length(talos_cluster_kubeconfig.this) == 0 + error_message = "Should not create kubeconfig resource when bootstrap is false" + } + + assert { + condition = length(local_sensitive_file.kubeconfig) == 0 + error_message = "Should not create kubeconfig file when bootstrap is false" + } +} + +run "bootstrap_mode_empty_kubeconfig_path" { + variables { + bootstrap = true + kubeconfig_path = "" + disk_selector = null + hostname = "test-node" + } + + assert { + condition = length(talos_cluster_kubeconfig.this) == 1 + error_message = "Should create kubeconfig resource when bootstrap is true" + } + + assert { + condition = length(local_sensitive_file.kubeconfig) == 0 + error_message = "Should not create kubeconfig file when path is empty" + } +} + +run "health_check_command_bootstrap_mode" { + variables { + bootstrap = true + hostname = "test-node" + disk_selector = null + } + + assert { + condition = !strcontains(local.health_check_command, "--k8s-endpoint") + error_message = "Should not include --k8s-endpoint flag during bootstrap" + } + + assert { + condition = strcontains(local.health_check_command, "test-node") + error_message = "Should include node name in health check command" + } +} + +run "health_check_command_non_bootstrap_mode" { + variables { + bootstrap = false + hostname = "test-node" + disk_selector = null + } + + assert { + condition = strcontains(local.health_check_command, "--k8s-endpoint") + error_message = "Should include --k8s-endpoint flag after bootstrap" + } + + assert { + condition = strcontains(local.health_check_command, "test-node") + error_message = "Should include node name in health check command" + } +} + +run "health_check_command_without_hostname" { + variables { + bootstrap = true + hostname = "" + disk_selector = null + } + + assert { + condition = strcontains(local.health_check_command, "dummy") + error_message = "Should use node address when hostname is empty" + } +} diff --git a/terraform/cluster/talos/modules/machine/variables.tf b/terraform/cluster/talos/modules/machine/variables.tf index 54ae088a..307509ab 100644 --- a/terraform/cluster/talos/modules/machine/variables.tf +++ b/terraform/cluster/talos/modules/machine/variables.tf @@ -123,3 +123,9 @@ variable "enable_health_check" { type = bool default = true } + +variable "kubeconfig_path" { + description = "Path where the kubeconfig file should be written when bootstrap is true." + type = string + default = "" +} diff --git a/terraform/cluster/talos/test.tftest.hcl b/terraform/cluster/talos/test.tftest.hcl index 3d5289c5..e8bfbf98 100644 --- a/terraform/cluster/talos/test.tftest.hcl +++ b/terraform/cluster/talos/test.tftest.hcl @@ -21,10 +21,7 @@ run "minimal_configuration" { error_message = "Talos config file should be generated" } - assert { - condition = length(local_sensitive_file.kubeconfig) == 1 - error_message = "Kubeconfig file should be generated" - } + assert { condition = module.controlplane_bootstrap.node == "192.168.1.10" @@ -208,10 +205,7 @@ run "no_config_files" { error_message = "No Talos config file should be generated without context path" } - assert { - condition = length(local_sensitive_file.kubeconfig) == 0 - error_message = "No Kubeconfig file should be generated without context path" - } + } # Verifies that all input validation rules are enforced simultaneously, ensuring that From 8827afeb5d4c9d198ac276d596cafacb9129d2b9 Mon Sep 17 00:00:00 2001 From: Ryan VanGundy Date: Wed, 23 Jul 2025 21:01:18 -0400 Subject: [PATCH 4/7] Fix test --- terraform/cluster/talos/modules/machine/test.tftest.hcl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/terraform/cluster/talos/modules/machine/test.tftest.hcl b/terraform/cluster/talos/modules/machine/test.tftest.hcl index 8ee26e41..1b108783 100644 --- a/terraform/cluster/talos/modules/machine/test.tftest.hcl +++ b/terraform/cluster/talos/modules/machine/test.tftest.hcl @@ -239,8 +239,8 @@ run "health_check_command_bootstrap_mode" { } assert { - condition = !strcontains(local.health_check_command, "--k8s-endpoint") - error_message = "Should not include --k8s-endpoint flag during bootstrap" + condition = strcontains(local.health_check_command, "--k8s-endpoint") + error_message = "Should include --k8s-endpoint flag during bootstrap" } assert { @@ -257,8 +257,8 @@ run "health_check_command_non_bootstrap_mode" { } assert { - condition = strcontains(local.health_check_command, "--k8s-endpoint") - error_message = "Should include --k8s-endpoint flag after bootstrap" + condition = !strcontains(local.health_check_command, "--k8s-endpoint") + error_message = "Should not include --k8s-endpoint flag after bootstrap" } assert { From 632a5dceb948d021145ed72030a2990695e05759 Mon Sep 17 00:00:00 2001 From: Ryan VanGundy Date: Wed, 23 Jul 2025 21:11:28 -0400 Subject: [PATCH 5/7] terraform fmt --- .../cluster/talos/modules/machine/main.tf | 4 +- .../talos/modules/machine/test.tftest.hcl | 66 +++++++++---------- 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/terraform/cluster/talos/modules/machine/main.tf b/terraform/cluster/talos/modules/machine/main.tf index d6d0f030..9b5ff936 100644 --- a/terraform/cluster/talos/modules/machine/main.tf +++ b/terraform/cluster/talos/modules/machine/main.tf @@ -86,7 +86,7 @@ resource "talos_machine_bootstrap" "bootstrap" { #----------------------------------------------------------------------------------------------------------------------- resource "talos_cluster_kubeconfig" "this" { - count = var.bootstrap ? 1 : 0 + count = var.bootstrap ? 1 : 0 depends_on = [talos_machine_bootstrap.bootstrap] client_configuration = var.client_configuration @@ -114,7 +114,7 @@ resource "local_sensitive_file" "kubeconfig" { locals { # Use hostname if available, otherwise fall back to node address node_name = var.hostname != null && var.hostname != "" ? var.hostname : var.node - + # Always use Talos API; during bootstrap also check Kubernetes API health_check_command = var.bootstrap ? "windsor check node-health --nodes ${local.node_name} --timeout 5m --k8s-endpoint" : "windsor check node-health --nodes ${local.node_name} --timeout 5m" } diff --git a/terraform/cluster/talos/modules/machine/test.tftest.hcl b/terraform/cluster/talos/modules/machine/test.tftest.hcl index 1b108783..1ea93769 100644 --- a/terraform/cluster/talos/modules/machine/test.tftest.hcl +++ b/terraform/cluster/talos/modules/machine/test.tftest.hcl @@ -171,22 +171,22 @@ run "config_patches_includes_extra" { run "bootstrap_mode_generates_kubeconfig" { variables { - bootstrap = true - kubeconfig_path = "/tmp/test-kubeconfig" - disk_selector = null - hostname = "test-node" + bootstrap = true + kubeconfig_path = "/tmp/test-kubeconfig" + disk_selector = null + hostname = "test-node" } - + assert { condition = length(talos_cluster_kubeconfig.this) == 1 error_message = "Should create kubeconfig resource when bootstrap is true" } - + assert { condition = length(local_sensitive_file.kubeconfig) == 1 error_message = "Should create kubeconfig file when bootstrap is true and path is provided" } - + assert { condition = local_sensitive_file.kubeconfig[0].filename == "/tmp/test-kubeconfig" error_message = "Should write kubeconfig to specified path" @@ -195,17 +195,17 @@ run "bootstrap_mode_generates_kubeconfig" { run "non_bootstrap_mode_no_kubeconfig" { variables { - bootstrap = false - kubeconfig_path = "/tmp/test-kubeconfig" - disk_selector = null - hostname = "test-node" + bootstrap = false + kubeconfig_path = "/tmp/test-kubeconfig" + disk_selector = null + hostname = "test-node" } - + assert { condition = length(talos_cluster_kubeconfig.this) == 0 error_message = "Should not create kubeconfig resource when bootstrap is false" } - + assert { condition = length(local_sensitive_file.kubeconfig) == 0 error_message = "Should not create kubeconfig file when bootstrap is false" @@ -214,17 +214,17 @@ run "non_bootstrap_mode_no_kubeconfig" { run "bootstrap_mode_empty_kubeconfig_path" { variables { - bootstrap = true - kubeconfig_path = "" - disk_selector = null - hostname = "test-node" + bootstrap = true + kubeconfig_path = "" + disk_selector = null + hostname = "test-node" } - + assert { condition = length(talos_cluster_kubeconfig.this) == 1 error_message = "Should create kubeconfig resource when bootstrap is true" } - + assert { condition = length(local_sensitive_file.kubeconfig) == 0 error_message = "Should not create kubeconfig file when path is empty" @@ -233,16 +233,16 @@ run "bootstrap_mode_empty_kubeconfig_path" { run "health_check_command_bootstrap_mode" { variables { - bootstrap = true - hostname = "test-node" - disk_selector = null + bootstrap = true + hostname = "test-node" + disk_selector = null } - + assert { condition = strcontains(local.health_check_command, "--k8s-endpoint") error_message = "Should include --k8s-endpoint flag during bootstrap" } - + assert { condition = strcontains(local.health_check_command, "test-node") error_message = "Should include node name in health check command" @@ -251,16 +251,16 @@ run "health_check_command_bootstrap_mode" { run "health_check_command_non_bootstrap_mode" { variables { - bootstrap = false - hostname = "test-node" - disk_selector = null + bootstrap = false + hostname = "test-node" + disk_selector = null } - + assert { condition = !strcontains(local.health_check_command, "--k8s-endpoint") error_message = "Should not include --k8s-endpoint flag after bootstrap" } - + assert { condition = strcontains(local.health_check_command, "test-node") error_message = "Should include node name in health check command" @@ -269,11 +269,11 @@ run "health_check_command_non_bootstrap_mode" { run "health_check_command_without_hostname" { variables { - bootstrap = true - hostname = "" - disk_selector = null + bootstrap = true + hostname = "" + disk_selector = null } - + assert { condition = strcontains(local.health_check_command, "dummy") error_message = "Should use node address when hostname is empty" From b9652957b9e3f83c4fbdaf2902d80b318b70542a Mon Sep 17 00:00:00 2001 From: Ryan VanGundy Date: Wed, 23 Jul 2025 21:11:37 -0400 Subject: [PATCH 6/7] docs --- terraform/cluster/talos/README.md | 2 -- terraform/gitops/flux/README.md | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/terraform/cluster/talos/README.md b/terraform/cluster/talos/README.md index 73f53b19..63351fb1 100644 --- a/terraform/cluster/talos/README.md +++ b/terraform/cluster/talos/README.md @@ -25,9 +25,7 @@ | Name | Type | |------|------| -| [local_sensitive_file.kubeconfig](https://registry.terraform.io/providers/hashicorp/local/latest/docs/resources/sensitive_file) | resource | | [local_sensitive_file.talosconfig](https://registry.terraform.io/providers/hashicorp/local/latest/docs/resources/sensitive_file) | resource | -| [talos_cluster_kubeconfig.this](https://registry.terraform.io/providers/siderolabs/talos/0.8.1/docs/resources/cluster_kubeconfig) | resource | | [talos_machine_secrets.this](https://registry.terraform.io/providers/siderolabs/talos/0.8.1/docs/resources/machine_secrets) | resource | | [talos_client_configuration.this](https://registry.terraform.io/providers/siderolabs/talos/0.8.1/docs/data-sources/client_configuration) | data source | diff --git a/terraform/gitops/flux/README.md b/terraform/gitops/flux/README.md index 1b4b9f37..e6de5560 100644 --- a/terraform/gitops/flux/README.md +++ b/terraform/gitops/flux/README.md @@ -31,9 +31,9 @@ No modules. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| -| [flux\_helm\_version](#input\_flux\_helm\_version) | The version of Flux Helm chart to install | `string` | `"2.16.2"` | no | +| [flux\_helm\_version](#input\_flux\_helm\_version) | The version of Flux Helm chart to install | `string` | `"2.16.3"` | no | | [flux\_namespace](#input\_flux\_namespace) | The namespace in which Flux will be installed | `string` | `"system-gitops"` | no | -| [flux\_version](#input\_flux\_version) | The version of Flux to install | `string` | `"2.6.3"` | no | +| [flux\_version](#input\_flux\_version) | The version of Flux to install | `string` | `"2.6.4"` | no | | [git\_auth\_secret](#input\_git\_auth\_secret) | The name of the secret to store the git authentication details | `string` | `"flux-system"` | no | | [git\_password](#input\_git\_password) | The git password or PAT used to authenticte with the git provider | `string` | `""` | no | | [git\_username](#input\_git\_username) | The git user to use to authenticte with the git provider | `string` | `"git"` | no | From 9bee5590886a376ea8f7b3657dee5ae6cf60366c Mon Sep 17 00:00:00 2001 From: Ryan VanGundy Date: Wed, 23 Jul 2025 21:26:27 -0400 Subject: [PATCH 7/7] Fix test --- terraform/cluster/talos/modules/machine/test.tftest.hcl | 4 ---- 1 file changed, 4 deletions(-) diff --git a/terraform/cluster/talos/modules/machine/test.tftest.hcl b/terraform/cluster/talos/modules/machine/test.tftest.hcl index 77e2fef4..1ea93769 100644 --- a/terraform/cluster/talos/modules/machine/test.tftest.hcl +++ b/terraform/cluster/talos/modules/machine/test.tftest.hcl @@ -13,10 +13,6 @@ mock_provider "local" { mock_resource "local_sensitive_file" {} } -mock_provider "null" { - mock_resource "null_resource" {} -} - variables { machine_type = "controlplane" endpoint = "dummy"