diff --git a/infrastructure/.gitignore b/infrastructure/.gitignore index b08457db..a32fd5bc 100644 --- a/infrastructure/.gitignore +++ b/infrastructure/.gitignore @@ -1,7 +1,8 @@ **/cr-secret.yaml **/customer-values.yaml **/auth - +**/.backend.hcl +**/kubeconfig.yaml **/*.lock.* auth diff --git a/infrastructure/terraform/README.md b/infrastructure/terraform/README.md index 19ccc1af..a172191a 100644 --- a/infrastructure/terraform/README.md +++ b/infrastructure/terraform/README.md @@ -92,6 +92,69 @@ terraform output -json | jq -r .cluster_name.value - **Security**: The sa_key.json file contains sensitive credentials and should never be committed to version control - **State Management**: Consider using remote state storage for team environments +## Using the bucket as a Terraform S3 backend (optional) + +If you want Terraform state to be stored in the object storage bucket, add a backend block to your root module. +Note: backend blocks cannot reference resources, so you must hardcode or pass the values via variables/partials. + +### Bootstrap script (recommended) + +Note: `backend "s3" {}` is already defined in `main.tf`. The bootstrap step still works because it runs `terraform init -backend=false`, which ignores the backend block. + +Use the helper script to bootstrap the backend in two phases: +1) Run a local-only apply to create the bucket + credentials. +2) Generate `.backend.hcl` and migrate state to S3. + +```bash +./scripts/init-backend.sh +``` + +This writes `infrastructure/terraform/.backend.hcl` (contains credentials) and runs `terraform init -force-copy`. +You can re-run the script at any time; it reuses the existing backend config if present. +If you want remote state from the start, run this script before your first full `terraform apply`. + +If you want non-interactive bootstrap: + +```bash +BOOTSTRAP_AUTO_APPROVE=1 ./scripts/init-backend.sh +``` + +Manual phase 1 (if you want to see the exact commands the script runs): + +```bash +terraform init -backend=false +terraform apply \ + -target=stackit_objectstorage_bucket.tfstate \ + -target=stackit_objectstorage_credentials_group.rag_creds_group \ + -target=stackit_objectstorage_credential.rag_creds +``` + +### Manual backend block + +```hcl +terraform { + backend "s3" { + bucket = "" + key = "terraform.tfstate" + region = "eu01" + + # Use the same credentials as above + access_key = "" + secret_key = "" + + endpoints = { + s3 = "https://object.storage.eu01.onstackit.cloud" + } + + # AWS-specific checks must be disabled for STACKIT + skip_credentials_validation = true + skip_region_validation = true + skip_s3_checksum = true + skip_requesting_account_id = true + } +} +``` + ## Cleanup To destroy all resources: diff --git a/infrastructure/terraform/dns.tf b/infrastructure/terraform/dns.tf index 3f2ad0ea..8702b2c8 100644 --- a/infrastructure/terraform/dns.tf +++ b/infrastructure/terraform/dns.tf @@ -1,7 +1,9 @@ resource "stackit_dns_zone" "rag_zone" { - project_id = var.project_id - name = "${var.name_prefix}-zone" - dns_name = var.dns_name + project_id = var.project_id + name = "${var.name_prefix}-zone" + dns_name = var.dns_name + contact_email = "data-ai@stackit.cloud" + type = "primary" } output "dns_nameservers" { diff --git a/infrastructure/terraform/main.tf b/infrastructure/terraform/main.tf index 600f2e20..60eadd95 100644 --- a/infrastructure/terraform/main.tf +++ b/infrastructure/terraform/main.tf @@ -1,4 +1,5 @@ terraform { + backend "s3" {} required_providers { stackit = { source = "stackitcloud/stackit" @@ -9,4 +10,5 @@ terraform { provider "stackit" { service_account_key_path = "sa_key.json" + default_region = "eu01" } diff --git a/infrastructure/terraform/model_serving.tf b/infrastructure/terraform/model_serving.tf new file mode 100644 index 00000000..dc214c58 --- /dev/null +++ b/infrastructure/terraform/model_serving.tf @@ -0,0 +1,12 @@ +resource "stackit_modelserving_token" "rag_modelserving" { + project_id = var.project_id + name = "${var.name_prefix}-modelserving-token" + + # No ttl_duration set -> token does not expire. +} + +output "model_serving_bearer_token" { + description = "Bearer token for AI Model Serving API" + value = stackit_modelserving_token.rag_modelserving.token + sensitive = true +} diff --git a/infrastructure/terraform/object_storage.tf b/infrastructure/terraform/object_storage.tf index cb5a67e2..58735569 100644 --- a/infrastructure/terraform/object_storage.tf +++ b/infrastructure/terraform/object_storage.tf @@ -1,8 +1,19 @@ +# This resource stays stable for 365 days, then changes +resource "time_rotating" "key_rotation" { + rotation_days = 365 +} + resource "stackit_objectstorage_bucket" "documents" { name = "${var.name_prefix}-documents-${var.deployment_timestamp}" project_id = var.project_id } +resource "stackit_objectstorage_bucket" "tfstate" { + name = "${var.name_prefix}-tfstate-${var.deployment_timestamp}" + project_id = var.project_id + depends_on = [stackit_objectstorage_credentials_group.rag_creds_group] +} + resource "stackit_objectstorage_bucket" "langfuse" { name = "${var.name_prefix}-langfuse-${var.deployment_timestamp}" project_id = var.project_id @@ -16,7 +27,7 @@ resource "stackit_objectstorage_credentials_group" "rag_creds_group" { resource "stackit_objectstorage_credential" "rag_creds" { project_id = var.project_id credentials_group_id = stackit_objectstorage_credentials_group.rag_creds_group.credentials_group_id - expiration_timestamp = timeadd(timestamp(), "8760h") # Expires after 1 year + expiration_timestamp = timeadd(time_rotating.key_rotation.rfc3339, "8760h") } output "object_storage_access_key" { @@ -30,5 +41,5 @@ output "object_storage_secret_key" { } output "object_storage_bucket" { - value = stackit_objectstorage_bucket.documents.name + value = stackit_objectstorage_bucket.tfstate.name } diff --git a/infrastructure/terraform/redis.tf b/infrastructure/terraform/redis.tf new file mode 100644 index 00000000..b70d9ce0 --- /dev/null +++ b/infrastructure/terraform/redis.tf @@ -0,0 +1,18 @@ +resource "stackit_redis_instance" "rag_redis" { + project_id = var.project_id + name = "${var.name_prefix}-redis" + version = var.redis_version + plan_name = var.redis_plan_name + + parameters = { + sgw_acl = join(",", stackit_ske_cluster.rag_cluster.egress_address_ranges) + enable_monitoring = false + down_after_milliseconds = 30000 + } +} + + +resource "stackit_redis_credential" "rag_redis_cred" { + project_id = var.project_id + instance_id = stackit_redis_instance.rag_redis.instance_id +} diff --git a/infrastructure/terraform/scripts/init-backend.sh b/infrastructure/terraform/scripts/init-backend.sh new file mode 100755 index 00000000..af474be6 --- /dev/null +++ b/infrastructure/terraform/scripts/init-backend.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash +set -euo pipefail + +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +root_dir="$(cd "${script_dir}/.." && pwd)" + +backend_config_file="${BACKEND_CONFIG_FILE:-${root_dir}/.backend.hcl}" +auto_approve="${BOOTSTRAP_AUTO_APPROVE:-0}" + +cd "${root_dir}" + +if ! command -v terraform >/dev/null 2>&1; then + echo "terraform is not installed or not in PATH." >&2 + exit 1 +fi + +if [ -f "${backend_config_file}" ]; then + terraform init -backend-config="${backend_config_file}" + exit 0 +fi + +echo "Bootstrapping object storage for Terraform state (local backend)." +terraform init -backend=false + +if ! bucket="$(terraform output -raw object_storage_bucket 2>/dev/null)"; then + apply_args=( + "-target=stackit_objectstorage_bucket.tfstate" + "-target=stackit_objectstorage_credentials_group.rag_creds_group" + "-target=stackit_objectstorage_credential.rag_creds" + "-target=time_rotating.key_rotation" # <--- Add this (needed for creds) + "-target=output.object_storage_bucket" # <--- Add this + "-target=output.object_storage_access_key" # <--- Add this + "-target=output.object_storage_secret_key" # <--- Add this + ) + if [ "${auto_approve}" = "1" ]; then + terraform apply -auto-approve "${apply_args[@]}" + else + terraform apply "${apply_args[@]}" + fi + bucket="$(terraform output -raw object_storage_bucket)" +fi + +access_key="$(terraform output -raw object_storage_access_key)" +secret_key="$(terraform output -raw object_storage_secret_key)" + +cat > "${backend_config_file}" <> +EOF +} diff --git a/infrastructure/terraform/seed-secrets/variables.tf b/infrastructure/terraform/seed-secrets/variables.tf new file mode 100644 index 00000000..feb1cf65 --- /dev/null +++ b/infrastructure/terraform/seed-secrets/variables.tf @@ -0,0 +1,39 @@ +variable "vault_address" { + description = "Vault address (STACKIT Secrets Manager URL)." + type = string + default = "https://prod.sm.eu01.stackit.cloud" +} + +variable "vault_mount_path" { + description = "Secrets Manager instance ID (KV mount path)." + type = string +} + +variable "vault_userpass_path" { + description = "Vault userpass auth path." + type = string + default = "userpass" +} + +variable "vault_username" { + description = "Secrets Manager user name." + type = string +} + +variable "vault_password" { + description = "Secrets Manager user password." + type = string + sensitive = true +} + +variable "vault_secret_name" { + description = "KV secret name used by External Secrets." + type = string + default = "rag-secrets" +} + +variable "rag_secrets" { + description = "Map of secret keys/values stored under the rag-secrets secret." + type = map(string) + sensitive = true +} diff --git a/infrastructure/terraform/ske.tf b/infrastructure/terraform/ske.tf index 9e1028e3..37427460 100644 --- a/infrastructure/terraform/ske.tf +++ b/infrastructure/terraform/ske.tf @@ -1,18 +1,18 @@ resource "stackit_ske_cluster" "rag_cluster" { project_id = var.project_id name = var.rag_cluster_name - kubernetes_version_min = "1.31" # Update to the latest available version + kubernetes_version_min = "1.34" # Update to the latest available version node_pools = [ { name = "${var.name_prefix}-node" - machine_type = "g1.4" + machine_type = "g2i.8" os_name = "flatcar" minimum = "1" maximum = "1" max_surge = "1" availability_zones = ["${var.region}-1"] # Single availability zone - volume_size = 50 + volume_size = 64 volume_type = "storage_premium_perf1" } ] @@ -28,3 +28,29 @@ resource "stackit_ske_cluster" "rag_cluster" { output "cluster_name" { value = stackit_ske_cluster.rag_cluster.name } + + +# ------------------------------------------------- +# Kubeconfig for the cluster +# ------------------------------------------------- +resource "stackit_ske_kubeconfig" "kubeconfig" { + project_id = var.project_id + cluster_name = stackit_ske_cluster.rag_cluster.name + + # 6‑month expiration (seconds) + expiration = 15552000 + + # Refresh only when the config is already expired + refresh = true +} + +output "kubeconfig" { + description = "Base‑64 encoded kubeconfig" + value = stackit_ske_kubeconfig.kubeconfig.kube_config + sensitive = true +} + +resource "local_file" "kubeconfig_file" { + filename = "${path.module}/kubeconfig.yaml" + content = stackit_ske_kubeconfig.kubeconfig.kube_config +} diff --git a/infrastructure/terraform/variables.tf b/infrastructure/terraform/variables.tf index f256c8dd..b99534bd 100644 --- a/infrastructure/terraform/variables.tf +++ b/infrastructure/terraform/variables.tf @@ -4,7 +4,7 @@ variable "project_id" { } variable "dns_name" { - description = "DNS name for the service" + description = "DNS name for the service. Desired free sub‑domain (e.g. ends with .runs.onstackit.cloud)." type = string validation { condition = can(regex("^([a-z0-9.-]+)$", var.dns_name)) @@ -36,5 +36,29 @@ variable "region" { variable "deployment_timestamp" { description = "Static timestamp for resource naming to avoid recreation on each apply" type = string - default = "20250603" # Set this once and don't change unless you want to recreate resources + default = "20260113" # Set this once and don't change unless you want to recreate resources +} + +variable "redis_version" { + description = "Redis service version" + type = string + default = "7" +} + +variable "redis_plan_name" { + description = "Redis plan name" + type = string + default = "stackit-redis-1.2.10-replica" +} + +variable "secretsmanager_user_description" { + description = "Description for the Secrets Manager user" + type = string + default = "RAG secrets manager user" +} + +variable "secretsmanager_user_write_enabled" { + description = "Allow the Secrets Manager user to write secrets" + type = bool + default = true }