diff --git a/.terraform.lock.hcl b/.terraform.lock.hcl index 97ae20b..689168a 100644 --- a/.terraform.lock.hcl +++ b/.terraform.lock.hcl @@ -64,6 +64,26 @@ provider "registry.terraform.io/hashicorp/kubernetes" { ] } +provider "registry.terraform.io/hashicorp/null" { + version = "3.2.4" + constraints = "~> 3.0" + hashes = [ + "h1:hkf5w5B6q8e2A42ND2CjAvgvSN3puAosDmOJb3zCVQM=", + "zh:59f6b52ab4ff35739647f9509ee6d93d7c032985d9f8c6237d1f8a59471bbbe2", + "zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3", + "zh:795c897119ff082133150121d39ff26cb5f89a730a2c8c26f3a9c1abf81a9c43", + "zh:7b9c7b16f118fbc2b05a983817b8ce2f86df125857966ad356353baf4bff5c0a", + "zh:85e33ab43e0e1726e5f97a874b8e24820b6565ff8076523cc2922ba671492991", + "zh:9d32ac3619cfc93eb3c4f423492a8e0f79db05fec58e449dee9b2d5873d5f69f", + "zh:9e15c3c9dd8e0d1e3731841d44c34571b6c97f5b95e8296a45318b94e5287a6e", + "zh:b4c2ab35d1b7696c30b64bf2c0f3a62329107bd1a9121ce70683dec58af19615", + "zh:c43723e8cc65bcdf5e0c92581dcbbdcbdcf18b8d2037406a5f2033b1e22de442", + "zh:ceb5495d9c31bfb299d246ab333f08c7fb0d67a4f82681fbf47f2a21c3e11ab5", + "zh:e171026b3659305c558d9804062762d168f50ba02b88b231d20ec99578a6233f", + "zh:ed0fe2acdb61330b01841fa790be00ec6beaac91d41f311fb8254f74eb6a711f", + ] +} + provider "registry.terraform.io/hashicorp/time" { version = "0.13.1" constraints = "~> 0.11" diff --git a/README.md b/README.md index a907977..f87f244 100644 --- a/README.md +++ b/README.md @@ -9,10 +9,11 @@ A basic Terraform module for creating and managing Amazon EKS (Elastic Kubernete - **Fargate Profiles**: Supported via `fargate_profiles` and Fargate IAM role creation - **AutoMode**: Placeholder wiring only; no AutoMode-specific resources yet - **IRSA Support**: OIDC provider setup for IAM Roles for Service Accounts -- **EKS Capabilities**: Managed ACK, KRO, and ArgoCD capabilities (optional, default: disabled) +- **EKS Capabilities**: Managed ACK and KRO capabilities (optional, default: disabled) - **ACK**: AWS Controllers for Kubernetes - create AWS resources via Kubernetes manifests - **KRO**: Kube Resource Orchestrator - platform engineering abstractions - - **ArgoCD**: GitOps capability for continuous deployment + - **ArgoCD**: Scaffolded only (requires AWS Identity Center setup) +- **Access Management**: Automatic EKS access entry creation for cluster admins when capabilities are enabled - **Optional Addons**: - EBS CSI Driver (optional, default: disabled) - AWS Load Balancer Controller (optional, default: disabled) @@ -97,7 +98,13 @@ module "eks" { # Enable EKS Capabilities for platform engineering enable_ack_capability = true # AWS Controllers for Kubernetes enable_kro_capability = true # Kube Resource Orchestrator - enable_argocd_capability = true # ArgoCD GitOps + # enable_argocd_capability = false # Not supported yet - requires Identity Center + + # Grant cluster admin access to IAM users/roles + cluster_admin_arns = [ + "arn:aws:iam::123456789012:user/admin-user", + "arn:aws:iam::123456789012:role/admin-role" + ] tags = { Environment = "production" @@ -105,6 +112,15 @@ module "eks" { } ``` +**Note**: When capabilities are enabled, the cluster uses `API_AND_CONFIG_MAP` authentication mode. You must specify `cluster_admin_arns` to grant access to IAM users/roles for kubectl access. + + tags = { + Environment = "production" + } +} + +``` + ### Fargate Example ```hcl @@ -177,7 +193,7 @@ module "eks" { - **[examples/basic](examples/basic/)** - Basic EKS cluster with EC2 node groups - **[examples/ebs-web-app](examples/ebs-web-app/)** - Web application with EBS persistent volume -- **[examples/eks-capabilities](examples/eks-capabilities/)** - Complete platform engineering example with ACK, KRO, and ArgoCD capabilities +- **[examples/eks-capabilities](examples/eks-capabilities/)** - Complete platform engineering example with ACK and KRO capabilities (ArgoCD scaffolded but not supported) ## Requirements diff --git a/access-entries.tf b/access-entries.tf new file mode 100644 index 0000000..4b9082b --- /dev/null +++ b/access-entries.tf @@ -0,0 +1,101 @@ +# ============================================================================= +# User Access Entries +# Grant cluster admin access to specified IAM users/roles +# ============================================================================= + +locals { + # Determine if user access entries should be created + # Only create when: + # 1. cluster_admin_arns is not empty, AND + # 2. Either capabilities are enabled OR authentication mode is not CONFIG_MAP + create_user_access_entries = length(var.cluster_admin_arns) > 0 && ( + var.enable_ack_capability || + var.enable_kro_capability || + var.enable_argocd_capability || + var.cluster_authentication_mode != "CONFIG_MAP" + ) +} + +resource "aws_eks_access_entry" "cluster_admins" { + for_each = local.create_user_access_entries ? toset(var.cluster_admin_arns) : [] + + cluster_name = aws_eks_cluster.this.name + principal_arn = each.value + type = "STANDARD" + + depends_on = [ + aws_eks_cluster.this + ] +} + +resource "aws_eks_access_policy_association" "cluster_admin_policy" { + for_each = local.create_user_access_entries ? toset(var.cluster_admin_arns) : [] + + cluster_name = aws_eks_cluster.this.name + principal_arn = each.value + policy_arn = "arn:aws:eks::aws:cluster-access-policy/AmazonEKSClusterAdminPolicy" + + access_scope { + type = "cluster" + } + + depends_on = [ + aws_eks_access_entry.cluster_admins + ] +} + +# ============================================================================= +# EC2 Node Access Entry +# Required when using API or API_AND_CONFIG_MAP authentication mode +# ============================================================================= + +locals { + # Determine if access entries are needed for EC2 nodes + ec2_needs_access_entry = contains(var.compute_mode, "ec2") && ( + var.enable_ack_capability || + var.enable_kro_capability || + var.enable_argocd_capability || + var.cluster_authentication_mode != "CONFIG_MAP" + ) +} + +resource "aws_eks_access_entry" "ec2_nodes" { + count = local.ec2_needs_access_entry ? 1 : 0 + + cluster_name = aws_eks_cluster.this.name + principal_arn = aws_iam_role.eks_nodes[0].arn + type = "EC2_LINUX" + + depends_on = [ + aws_eks_cluster.this, + aws_iam_role.eks_nodes[0] + ] +} + +# ============================================================================= +# Fargate Pod Access Entry +# Required when using API or API_AND_CONFIG_MAP authentication mode +# ============================================================================= + +locals { + # Determine if access entries are needed for Fargate pods + fargate_needs_access_entry = contains(var.compute_mode, "fargate") && ( + var.enable_ack_capability || + var.enable_kro_capability || + var.enable_argocd_capability || + var.cluster_authentication_mode != "CONFIG_MAP" + ) +} + +resource "aws_eks_access_entry" "fargate_pods" { + count = local.fargate_needs_access_entry ? 1 : 0 + + cluster_name = aws_eks_cluster.this.name + principal_arn = aws_iam_role.eks_fargate[0].arn + type = "FARGATE_LINUX" + + depends_on = [ + aws_eks_cluster.this, + aws_iam_role.eks_fargate[0] + ] +} diff --git a/addons.tf b/addons.tf index 052edfa..761c306 100644 --- a/addons.tf +++ b/addons.tf @@ -128,7 +128,9 @@ resource "aws_eks_addon" "ebs_csi_driver" { service_account_role_arn = aws_iam_role.ebs_csi_driver[0].arn depends_on = [ - aws_iam_role_policy.ebs_csi_driver[0] + aws_iam_role_policy.ebs_csi_driver[0], + # Wait for nodes to be available since EBS CSI driver runs as pods + aws_eks_node_group.default, ] tags = var.tags @@ -154,7 +156,9 @@ resource "kubernetes_storage_class" "ebs_csi_default" { } depends_on = [ - aws_eks_addon.ebs_csi_driver[0] + aws_eks_addon.ebs_csi_driver[0], + # Wait for access entries to be created and propagated if they exist + aws_eks_access_policy_association.cluster_admin_policy ] } @@ -300,7 +304,9 @@ resource "kubernetes_service_account" "aws_lb_controller" { aws_eks_cluster.this, aws_iam_role_policy_attachment.aws_lb_controller[0], aws_iam_role_policy_attachment.aws_lb_controller_ec2[0], - aws_iam_role_policy.aws_lb_controller_waf[0] + aws_iam_role_policy.aws_lb_controller_waf[0], + # Wait for access entries to be created and propagated if they exist + aws_eks_access_policy_association.cluster_admin_policy ] } diff --git a/capabilities-iam.tf b/capabilities-iam.tf index e2e696a..8ada2fc 100644 --- a/capabilities-iam.tf +++ b/capabilities-iam.tf @@ -69,33 +69,40 @@ resource "aws_iam_role" "kro_capability" { # Note: KRO capability roles don't require managed policies - AWS manages permissions internally -# ArgoCD Capability Role -data "aws_iam_policy_document" "argocd_capability_assume_role" { - count = var.enable_argocd_capability ? 1 : 0 - - statement { - effect = "Allow" - - principals { - type = "Service" - identifiers = ["capabilities.eks.amazonaws.com"] - } - - actions = [ - "sts:AssumeRole", - "sts:TagSession" - ] - } -} - -resource "aws_iam_role" "argocd_capability" { - count = var.enable_argocd_capability ? 1 : 0 - - name = var.argocd_capability_role_arn != null ? null : "${var.cluster_name}-argocd-capability-role" - name_prefix = var.argocd_capability_role_arn != null ? null : null - assume_role_policy = data.aws_iam_policy_document.argocd_capability_assume_role[0].json - tags = var.tags -} +# ============================================================================= +# ArgoCD Capability Role (SCAFFOLDED - NOT SUPPORTED YET) +# ============================================================================= +# ArgoCD capability requires AWS Identity Center configuration +# This is scaffolded for future implementation but not currently supported +# Uncomment and configure Identity Center before enabling +# ============================================================================= -# Note: ArgoCD capability roles don't require managed policies - AWS manages permissions internally -# ArgoCD also requires configuration which should be provided via the capability resource +# data "aws_iam_policy_document" "argocd_capability_assume_role" { +# count = var.enable_argocd_capability ? 1 : 0 +# +# statement { +# effect = "Allow" +# +# principals { +# type = "Service" +# identifiers = ["capabilities.eks.amazonaws.com"] +# } +# +# actions = [ +# "sts:AssumeRole", +# "sts:TagSession" +# ] +# } +# } +# +# resource "aws_iam_role" "argocd_capability" { +# count = var.enable_argocd_capability ? 1 : 0 +# +# name = var.argocd_capability_role_arn != null ? null : "${var.cluster_name}-argocd-capability-role" +# name_prefix = var.argocd_capability_role_arn != null ? null : null +# assume_role_policy = data.aws_iam_policy_document.argocd_capability_assume_role[0].json +# tags = var.tags +# } +# +# # Note: ArgoCD capability roles don't require managed policies - AWS manages permissions internally +# # ArgoCD also requires configuration which should be provided via the capability resource diff --git a/capabilities.tf b/capabilities.tf index e187d89..6089570 100644 --- a/capabilities.tf +++ b/capabilities.tf @@ -43,27 +43,33 @@ resource "aws_eks_capability" "kro" { tags = var.tags } -resource "aws_eks_capability" "argocd" { - count = var.enable_argocd_capability ? 1 : 0 - - cluster_name = aws_eks_cluster.this.name - capability_name = "ARGOCD" - type = "ARGOCD" - - # ArgoCD requires a role ARN - # Use provided role ARN or create one automatically - role_arn = var.argocd_capability_role_arn != null ? var.argocd_capability_role_arn : aws_iam_role.argocd_capability[0].arn - - # Note: ArgoCD capability requires AWS Identity Center configuration - # This is typically done via AWS Console or requires additional setup - # For now, this resource will fail if Identity Center is not configured - # Users should configure Identity Center before enabling ArgoCD capability - - delete_propagation_policy = "RETAIN" - - depends_on = [ - aws_eks_cluster.this - ] +# ============================================================================= +# ArgoCD Capability (SCAFFOLDED - NOT SUPPORTED YET) +# ============================================================================= +# ArgoCD capability requires AWS Identity Center configuration +# This is scaffolded for future implementation but not currently supported +# Uncomment and configure Identity Center before enabling +# ============================================================================= - tags = var.tags -} +# resource "aws_eks_capability" "argocd" { +# count = var.enable_argocd_capability ? 1 : 0 +# +# cluster_name = aws_eks_cluster.this.name +# capability_name = "ARGOCD" +# type = "ARGOCD" +# +# # ArgoCD requires a role ARN +# # Use provided role ARN or create one automatically +# role_arn = var.argocd_capability_role_arn != null ? var.argocd_capability_role_arn : aws_iam_role.argocd_capability[0].arn +# +# # ArgoCD requires configuration parameter with Identity Center details +# configuration = var.argocd_capability_configuration != null ? var.argocd_capability_configuration : jsonencode({}) +# +# delete_propagation_policy = "RETAIN" +# +# depends_on = [ +# aws_eks_cluster.this +# ] +# +# tags = var.tags +# } diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..b5ef07f --- /dev/null +++ b/docs/README.md @@ -0,0 +1,7 @@ +# terraform-aws-eks-basic Documentation + +This directory contains detailed documentation for the module's design decisions, architecture, and operational guides. + +## Contents + +- [Authentication Modes](authentication.md) - EKS authentication modes and access entry management diff --git a/docs/authentication.md b/docs/authentication.md new file mode 100644 index 0000000..6421ee3 --- /dev/null +++ b/docs/authentication.md @@ -0,0 +1,157 @@ +# EKS Authentication Modes and Access Entry Management + +## Overview + +EKS supports three authentication modes for cluster access: + +- `CONFIG_MAP` - Legacy mode using aws-auth ConfigMap (default when no capabilities) +- `API` - API-only authentication via EKS access entries +- `API_AND_CONFIG_MAP` - Hybrid mode supporting both methods (required for capabilities) + +## Why Access Entries Are Needed + +When EKS capabilities (ACK, KRO) are enabled, the cluster automatically switches to `API_AND_CONFIG_MAP` mode. In this mode: + +1. **Capabilities** authenticate via the EKS API +2. **Nodes and pods** need explicit access entries to join the cluster +3. **Users and roles** need explicit access entries for kubectl/API access +4. The aws-auth ConfigMap alone is insufficient + +## Access Entry Types + +### Infrastructure Access Entries (Automatic) + +The module automatically creates access entries for infrastructure resources: + +- **EC2 Nodes**: `type = "EC2_LINUX"` - Allows worker nodes to join the cluster +- **Fargate Pods**: `type = "FARGATE_LINUX"` - Allows Fargate pods to schedule + +These are created automatically when capabilities are enabled. + +### User Access Entries (Manual) + +For human users and CI/CD systems to access the cluster, you must explicitly grant access: + +- **Type**: `STANDARD` - For IAM users/roles that need cluster access +- **Policy**: `AmazonEKSClusterAdminPolicy` - Grants full cluster admin permissions + +#### Example: Granting Admin Access + +```hcl +resource "aws_eks_access_entry" "cluster_admins" { + for_each = toset(var.cluster_admin_arns) + + cluster_name = module.eks.cluster_name + principal_arn = each.value + type = "STANDARD" +} + +resource "aws_eks_access_policy_association" "cluster_admin_policy" { + for_each = toset(var.cluster_admin_arns) + + cluster_name = module.eks.cluster_name + principal_arn = each.value + policy_arn = "arn:aws:eks::aws:cluster-access-policy/AmazonEKSClusterAdminPolicy" + + access_scope { + type = "cluster" + } + + depends_on = [aws_eks_access_entry.cluster_admins] +} +``` + +See the `eks-capabilities` example for a complete implementation. + +## Implementation + +### Conditional Logic + +Access entries are created when: + +- Any capability is enabled (ACK or KRO), OR +- `cluster_authentication_mode` is explicitly set to `API` or `API_AND_CONFIG_MAP` + +```hcl +local.ec2_needs_access_entry = contains(var.compute_mode, "ec2") && ( + var.enable_ack_capability || + var.enable_kro_capability || + # var.enable_argocd_capability || # ArgoCD not currently supported + var.cluster_authentication_mode != "CONFIG_MAP" +) +``` + +### Resource Types + +- **EC2 Nodes**: Use `type = "EC2_LINUX"` access entry +- **Fargate Pods**: Use `type = "FARGATE_LINUX"` access entry +- **Users/Roles**: Use `type = "STANDARD"` access entry + +### Deployment Order + +1. EKS cluster created +2. IAM roles for nodes/pods created +3. Access entries created (if needed) +4. Node groups/Fargate profiles created + +This ensures nodes have the credentials to authenticate before attempting to join. + +## Backward Compatibility + +| Scenario | Authentication Mode | Access Entries | Result | +| -------- | ----------------- | -------------- | ------- | +| No capabilities | CONFIG_MAP | Not created | aws-auth ConfigMap only | +| With capabilities | API_AND_CONFIG_MAP | Created | Both methods available | +| Explicit API mode | API | Created | API-only authentication | + +## Troubleshooting + +### Nodes Fail to Join + +**Symptom**: `NodeCreationFailure: Instances failed to join the kubernetes cluster` + +**Cause**: Cluster is in `API_AND_CONFIG_MAP` mode but node access entries weren't created + +**Solution**: Verify node access entries exist: + +```bash +aws eks list-access-entries --cluster-name +``` + +### Fargate Pods Stuck Pending + +**Symptom**: Fargate pods remain in `Pending` state + +**Cause**: Missing Fargate pod access entry in API authentication mode + +**Solution**: Check Fargate access entry exists and pod execution role matches + +### User Cannot Access Cluster + +**Symptom**: `Your current IAM principal doesn't have access to Kubernetes objects on this cluster` + +**Cause**: Your IAM user/role doesn't have an EKS access entry + +**Solution**: Add your IAM ARN to the cluster admin access entries: + +```bash +# Option 1: Via AWS CLI +aws eks create-access-entry \ + --cluster-name \ + --principal-arn \ + --type STANDARD + +aws eks associate-access-policy \ + --cluster-name \ + --principal-arn \ + --policy-arn arn:aws:eks::aws:cluster-access-policy/AmazonEKSClusterAdminPolicy \ + --access-scope type=cluster + +# Option 2: Via Terraform (see eks-capabilities example) +# Add your ARN to cluster_admin_arns variable and apply +``` + +## References + +- [EKS Access Entries](https://docs.aws.amazon.com/eks/latest/userguide/access-entries.html) +- [EKS Authentication Modes](https://docs.aws.amazon.com/eks/latest/userguide/grant-k8s-access.html) diff --git a/ec2.tf b/ec2.tf index af6dea3..71d97c4 100644 --- a/ec2.tf +++ b/ec2.tf @@ -87,6 +87,6 @@ resource "aws_eks_node_group" "default" { depends_on = [ aws_iam_role_policy_attachment.eks_nodes_worker[0], aws_iam_role_policy_attachment.eks_nodes_cni[0], - aws_iam_role_policy_attachment.eks_nodes_ecr[0], + aws_iam_role_policy_attachment.eks_nodes_ecr[0] ] } diff --git a/examples/eks-capabilities/README.md b/examples/eks-capabilities/README.md index 6558d06..6720398 100644 --- a/examples/eks-capabilities/README.md +++ b/examples/eks-capabilities/README.md @@ -1,13 +1,15 @@ # EKS Capabilities Example -This example demonstrates how to use EKS Capabilities (ACK, KRO, and ArgoCD) for platform engineering. It shows how platform teams can create reusable abstractions and how development teams can deploy applications with AWS resources using simple Kubernetes manifests. +This example demonstrates how to use EKS Capabilities (ACK and KRO) for platform engineering. It shows how platform teams can create reusable abstractions and how development teams can deploy applications with AWS resources using simple Kubernetes manifests. + +**Note**: ArgoCD capability is scaffolded in the codebase but not currently supported as it requires AWS Identity Center configuration. ## What This Example Creates 1. **EKS Cluster** with capabilities enabled: - **ACK** (AWS Controllers for Kubernetes) - Create AWS resources via Kubernetes manifests - **KRO** (Kube Resource Orchestrator) - Platform engineering abstractions - - **ArgoCD** - GitOps capability for continuous deployment (disabled by default in this example) + - **ArgoCD** - Scaffolded only (not supported - requires AWS Identity Center) 2. **KRO Resource Graph Definition (RGD)** - Platform team abstraction template 3. **KRO Resource Group Instance** - Developer-facing application deployment @@ -16,7 +18,7 @@ This example demonstrates how to use EKS Capabilities (ACK, KRO, and ArgoCD) for ## Features Demonstrated -- ✅ EKS Capabilities enablement (ACK, KRO, optional ArgoCD) +- ✅ EKS Capabilities enablement (ACK, KRO) - ✅ Platform engineering with KRO Resource Graph Definitions - ✅ Creating AWS resources (DynamoDB, S3, IAM) via ACK as part of the WebAppStack - ✅ Creating additional ACK example resources via standalone manifests @@ -39,8 +41,16 @@ Create a `terraform.tfvars` file: ```hcl aws_region = "ap-southeast-2" cluster_name = "eks-capabilities" + +# IMPORTANT: Add your IAM user/role ARNs to access the cluster +cluster_admin_arns = [ + "arn:aws:iam::YOUR_ACCOUNT:role/your-admin-role", + "arn:aws:iam::YOUR_ACCOUNT:user/your-user" +] ``` +**Note**: The `cluster_admin_arns` variable is required for cluster access. When EKS capabilities are enabled, the cluster uses `API_AND_CONFIG_MAP` authentication mode, which requires explicit access entries. Add the ARNs of IAM users/roles that need admin access to the cluster. The module automatically creates the access entries for you. + ### Step 2: Initialize and Apply ```bash @@ -49,231 +59,275 @@ terraform plan ``` Because this example uses the Kubernetes provider (which needs a live cluster), -apply it in two stages: +apply it in stages: ```bash # 1) Create the EKS cluster first terraform apply -target=module.eks -auto-approve -# 2) Apply the rest (KRO/ACK resources, RGD, etc.) +# 2) Apply KRO RBAC and RGD first (required for WebAppStack CRD validation) +terraform apply -target='kubernetes_manifest.kro_rbac' -target='kubernetes_manifest.kro_rgd' -auto-approve + +# 3) Apply ACK resources (DynamoDB table, S3 bucket) - needed for IAM policy +terraform apply -target='kubernetes_manifest.ack_dynamodb_table' -target='kubernetes_manifest.ack_s3_bucket' -auto-approve + +# 4) Apply WebAppStack instance (creates Pod Identity Association, then Deployment) +terraform apply -target='kubernetes_manifest.kro_webappstack_instance' -auto-approve + +# 4.5) Wait for Pod Identity Association to be ready, then restart deployment +# Note: KRO's dependsOn ensures creation order but not readiness state. Pods created before +# the Pod Identity Association is fully ready won't have the required env vars. We wait for +# the association to be ready, then restart the deployment to ensure all pods get env vars. +kubectl wait --for=jsonpath='{.status.conditions[?(@.type=="Ready")].status}=True' \ + podidentityassociation/eks-capabilities-app --timeout=60s +kubectl rollout restart deployment/eks-capabilities-app + +# 5) Apply any remaining resources (access entries, etc.) terraform apply -auto-approve ``` Wait for the cluster and capabilities to be fully provisioned (this may take 10-15 minutes). -**Note:** The module automatically creates IAM roles for enabled capabilities (ACK, KRO, and ArgoCD if you enable it) with the appropriate managed policies. If you prefer to use existing roles, you can provide them via the `*_capability_role_arn` variables. +**Note:** The module automatically creates IAM roles for enabled capabilities and EKS access entries for `cluster_admin_arns` when you run `terraform apply`. -### Step 3: Configure kubectl +### Step 3: Configure kubectl and Verify Access ```bash -aws eks update-kubeconfig --name --region +aws eks update-kubeconfig --name eks-capabilities --region ap-southeast-2 ``` -### Step 4: Verify Kubernetes Resources - -The Terraform deployment automatically creates: - -- **KRO RBAC Configuration** - Grants the KRO capability role cluster-admin permissions -- **KRO Resource Graph Definition (RGD)** - Platform team abstraction template -- **ACK Example Resources** - DynamoDB table and S3 bucket +**Verify access entries were created:** -These are automatically deployed as part of `terraform apply`. Verify they exist: +The access entries should have been created in Step 2. Verify they exist: ```bash -# Check KRO RBAC -kubectl get clusterrolebinding eks-capabilities-kro-cluster-admin +# Check your current IAM principal +aws sts get-caller-identity --query Arn --output text -# Check KRO RGD -kubectl get resourcegraphdefinition eks-capabilities-appstack.kro.run -# Check ACK example resources -kubectl get table eks-capabilities-table -kubectl get bucket eks-capabilities-bucket +# List all access entries for the cluster +aws eks list-access-entries --cluster-name eks-capabilities --region ap-southeast-2 ``` -### Step 5: Verify Capabilities +If your ARN is not in the access entries list, run `terraform apply` to create the missing access entries. See the Troubleshooting section below for more details. -Check that capabilities are active: +### Step 4: Verify Deployment + +Verify that all resources were created successfully: ```bash -# Check available APIs +# Check capabilities are active kubectl api-resources | grep -E "(resourcegraphdefinition|webappstack|podidentityassociation|table|bucket|role.iam.services.k8s.aws|policy.iam.services.k8s.aws)" -# Verify KRO API -kubectl get resourcegraphdefinition -``` +# Check KRO resources +kubectl get resourcegraphdefinition eks-capabilities-appstack.kro.run +kubectl get webappstack eks-capabilities-app -### Step 6: Verify Resource Graph Definition +# Check ACK example resources +kubectl get table eks-capabilities-table eks-capabilities-app +kubectl get bucket eks-capabilities-bucket -The RGD was automatically created by Terraform. Verify it: +# Check WebAppStack AWS resources +kubectl get role.iam.services.k8s.aws eks-capabilities-app-role +kubectl get policy.iam.services.k8s.aws eks-capabilities-app-policy +kubectl get podidentityassociation eks-capabilities-app +``` + +Wait for the WebAppStack to be ready (about 1-2 minutes), then test the application: ```bash -kubectl get resourcegraphdefinition eks-capabilities-appstack.kro.run -kubectl describe resourcegraphdefinition eks-capabilities-appstack.kro.run +# Port forward for quick testing +kubectl port-forward service/eks-capabilities-app 8080:80 ``` -**Expected output:** +Then open in your browser. -- The RGD should show `STATE: Inactive` initially (KRO is processing it) -- Once active, the `WebAppStack` API will be available for use -- The `describe` command shows the full RGD definition with all resources +## Understanding the Components -### Step 7: Deploy Application (Development Team) +### KRO Resource Graph Definition (RGD) -The development team uses the abstraction to deploy their application: +The RGD template (`kubernetes/platform-team/eks-capabilities-appstack-rgd.yaml.tpl`) defines a reusable abstraction that bundles: -```bash -kubectl apply -f kubernetes/dev-team/eks-capabilities-app-instance.yaml -``` +- Kubernetes resources (Deployment, Service, ServiceAccount) +- AWS resources via ACK (DynamoDB table, IAM role/policy, Pod Identity Association) +- Conditional resources (S3 bucket, Ingress when enabled) -Watch the resources being created: +### KRO WebAppStack Instance -```bash -kubectl get webappstack eks-capabilities-app -w -``` +The instance (`kubernetes/dev-team/eks-capabilities-app-instance.yaml`) demonstrates how developers use the abstraction with a simple manifest that automatically creates all required resources. + +### Access the Application -Wait for the deployment to be ready (about 1-2 minutes), then test the application: +**Port Forward (Quick Test):** ```bash -# Port forward for quick testing kubectl port-forward service/eks-capabilities-app 8080:80 ``` -Then open in your browser. +**ALB (Production):** -### Step 8: Verify WebAppStack Resources +```bash +kubectl get ingress eks-capabilities-app-ingress -o jsonpath='{.status.loadBalancer.ingress[0].hostname}' +``` -The WebAppStack creates the AWS resources needed by the demo app. Verify they exist: +## ArgoCD Capability (Not Currently Supported) -```bash -# DynamoDB table used by the app -kubectl get table eks-capabilities-app +The ArgoCD capability is **scaffolded in the code but not supported** for the following reason: -# IAM role and policy used by Pod Identity -kubectl get role.iam.services.k8s.aws eks-capabilities-app-role -kubectl get policy.iam.services.k8s.aws eks-capabilities-app-policy +**Prerequisite**: AWS Identity Center (formerly AWS SSO) must be configured for ArgoCD authentication. Local users are not supported. -# Pod Identity Association -kubectl get podidentityassociation eks-capabilities-app +Once you have Identity Center configured, you can: -# Optional S3 bucket (only if bucket.enabled=true) -kubectl get bucket eks-capabilities-app-bucket +1. Uncomment the ArgoCD resources in `capabilities.tf` and `capabilities-iam.tf` +2. Provide Identity Center configuration via `argocd_capability_configuration` +3. Enable the capability with `enable_argocd_capability = true` -# Keep this name distinct from the ACK example bucket (eks-capabilities-bucket) +For more information, see [AWS EKS ArgoCD Documentation](https://docs.aws.amazon.com/eks/latest/userguide/argocd-considerations.html). -``` +## Cleanup -### Step 9: Verify ACK Example Resources +To clean up all resources, you must delete Kubernetes resources **before** destroying the cluster: -The ACK example resources are created independently of the WebAppStack: +**Important**: Delete KRO and ACK resources first (while cluster exists), then destroy Terraform infrastructure. ```bash -# DynamoDB table and S3 bucket created via ACK manifests -kubectl get table eks-capabilities-table -kubectl get bucket eks-capabilities-bucket +# Step 1: Delete KRO WebAppStack instance +# This cascades to delete all KRO-managed AWS resources (DynamoDB table, IAM role/policy, Pod Identity Association) +kubectl delete webappstack eks-capabilities-app +# Step 2: Delete WebAppStack-created AWS resources +# These are created by the RGD and may need explicit deletion +kubectl delete table eks-capabilities-app +kubectl delete bucket eks-capabilities-app-bucket 2>/dev/null || true # Only if bucket.enabled=true + +# Step 3: Delete ACK example resources +# Note: The ACK capability only supports delete_propagation_policy = "RETAIN", +# which means AWS resources are NOT automatically deleted when Kubernetes resources are deleted. +# You must manually delete AWS resources after deleting Kubernetes resources. + +# Delete Kubernetes resources +kubectl delete table eks-capabilities-table +kubectl delete bucket eks-capabilities-bucket + +# Manually delete AWS resources (required because RETAIN policy prevents automatic deletion) +aws dynamodb delete-table --table-name eks-capabilities-table --region ap-southeast-2 +aws s3 rm s3://eks-capabilities-bucket --recursive 2>/dev/null || true +aws s3api delete-bucket --bucket eks-capabilities-bucket --region ap-southeast-2 2>/dev/null || true + +# Step 4: Wait for AWS resources to be fully deleted +# Verify resources are gone: +kubectl get table,bucket,role.iam.services.k8s.aws,policy.iam.services.k8s.aws,podidentityassociation + +# Step 5: Destroy Terraform resources in phases (reverse of apply) +# Phase 1: Destroy Kubernetes resources first (while cluster exists) +# This deletes KRO RBAC and Resource Graph Definition +# Note: WebAppStack instance was already deleted in Step 1 +terraform destroy -target=kubernetes_manifest.kro_rbac \ + -target=kubernetes_manifest.capabilities_pod_identity_rbac \ + -target=kubernetes_manifest.capabilities_pod_identity_rbac_binding \ + -target=kubernetes_manifest.kro_rgd \ + -target=kubernetes_manifest.ack_dynamodb_table \ + -target=kubernetes_manifest.ack_s3_bucket + +# Phase 2: Destroy the EKS cluster and all infrastructure +# This deletes EKS Capabilities (ACK and KRO), cluster, and all infrastructure +terraform destroy ``` -## Understanding the Components - -### KRO Resource Graph Definition (RGD) +**Note**: KRO/ACK resources create AWS resources (DynamoDB, S3, IAM) that must be deleted while the cluster exists. Terraform-managed resources (RBAC, RGD) and EKS capabilities are automatically deleted during `terraform destroy`. -The RGD template in `kubernetes/platform-team/eks-capabilities-appstack-rgd.yaml.tpl` defines: +Important: ACK Delete Propagation Policy -- **Schema**: Developer-facing API (WebAppStack) -- **Resources**: Multiple Kubernetes and AWS resources bundled together -- **Dependencies**: Automatic dependency resolution -- **Conditional Resources**: S3 bucket and Ingress only created when enabled +The ACK capability only supports `delete_propagation_policy = "RETAIN"` (this is the only valid value). This means: -### KRO Resource Group Instance +- When you delete a Kubernetes resource (e.g., `kubectl delete table`), the **AWS resource is NOT deleted** - it's retained +- The resource-level annotation `services.k8s.aws/deletion-policy: Delete` does not override the capability-level policy +- You **must manually delete AWS resources** after deleting Kubernetes resources using AWS CLI commands -The instance in `kubernetes/dev-team/eks-capabilities-app-instance.yaml` shows: +**Troubleshooting Cleanup**: -- Simple developer interface -- Single manifest deploys multiple resources -- Automatic resource creation and dependency management +If resources are stuck or won't delete: -### KRO-managed AWS Resources +1. **Check resource status:** -The WebAppStack uses ACK-backed resources under the hood: + ```bash + kubectl get webappstack eks-capabilities-app -o yaml + kubectl describe table eks-capabilities-table + kubectl describe bucket eks-capabilities-bucket + ``` -- DynamoDB table for app state -- Optional S3 bucket when enabled -- IAM role/policy for Pod Identity +2. **If table is stuck in "Terminating" state, check for finalizers:** -## Verifying the Deployment + ```bash + kubectl get table eks-capabilities-table -o jsonpath='{.metadata.finalizers}' + ``` -### Check Application Status +3. **Force delete if needed (use with caution):** -```bash -# Check the WebAppStack instance -kubectl get webappstack eks-capabilities-app -o yaml + ```bash + # Remove finalizers to force delete Kubernetes resource + # Note: With RETAIN policy, AWS resource will still exist and must be deleted manually + kubectl patch webappstack eks-capabilities-app -p '{"metadata":{"finalizers":[]}}' --type=merge + kubectl patch table eks-capabilities-table -p '{"metadata":{"finalizers":[]}}' --type=merge + kubectl patch bucket eks-capabilities-bucket -p '{"metadata":{"finalizers":[]}}' --type=merge + ``` -# Check deployment -kubectl get deployment eks-capabilities-app +4. **Manually delete AWS resources (required - RETAIN is the only supported policy):** -# Check service -kubectl get service eks-capabilities-app + ```bash + # Delete DynamoDB table directly in AWS + aws dynamodb delete-table --table-name eks-capabilities-table --region ap-southeast-2 -# Check DynamoDB table -kubectl get table eks-capabilities-app + # Delete S3 bucket (must be empty first) + aws s3 rm s3://eks-capabilities-bucket --recursive 2>/dev/null || true + aws s3api delete-bucket --bucket eks-capabilities-bucket --region ap-southeast-2 2>/dev/null || true + ``` -# Check IAM role and policy (ACK) -kubectl get role.iam.services.k8s.aws eks-capabilities-app-role -kubectl get policy.iam.services.k8s.aws eks-capabilities-app-policy -``` +5. **Verify AWS resources are deleted before proceeding:** -### Check Pod Identity + ```bash + # Check DynamoDB tables + aws dynamodb list-tables --region ap-southeast-2 -```bash -# Verify Pod Identity Association -kubectl get podidentityassociation eks-capabilities-app + # Check S3 buckets + aws s3 ls -# Check ServiceAccount -kubectl get serviceaccount eks-capabilities-app -o yaml -``` + # Check IAM roles + aws iam list-roles --query 'Roles[?contains(RoleName, `eks-capabilities`)].RoleName' + ``` -### Access the Application +## Troubleshooting -#### Option 1: Port Forward (Quick Test) +### Kubernetes Provider "Unauthorized" Error After Creating Access Entries -```bash -kubectl port-forward service/eks-capabilities-app 8080:80 -``` +**Symptom**: After creating access entries, Kubernetes resources fail with `Error: Unauthorized` -Then open in your browser. +**Cause**: When using `terraform apply -target`, the Kubernetes provider's auth token data source may not refresh. The token was generated before the access entries existed. -#### Option 2: ALB (Production) +**Solution**: Run `terraform apply` again without `-target` to refresh all data sources and apply remaining resources: ```bash -kubectl get ingress eks-capabilities-app-ingress -o jsonpath='{.status.loadBalancer.ingress[0].hostname}' +terraform apply ``` -## ArgoCD Capability +The module now includes dependencies to ensure Kubernetes resources wait for access entries, but you still need to refresh the provider's auth token by running apply again. -The ArgoCD capability is disabled by default in this example. If you enable it, it provides: +### kubectl Authentication Errors -- Managed ArgoCD installation -- GitOps workflow support -- Application synchronization from Git repositories +**Symptom**: `"the server has asked for the client to provide credentials"` or `"You must be logged in to the server"` -For full ArgoCD setup, refer to AWS documentation or future examples. +**Solution**: -## Cleanup - -To clean up all resources: - -```bash -# Delete the application instance (cascades to all resources) -kubectl delete webappstack eks-capabilities-app - -# Delete ACK example resources -kubectl delete -f kubernetes/ack-resources/ +1. **Check your IAM principal and access entries:** -# Destroy Terraform resources (this will also delete RBAC and RGD) -terraform destroy -``` + ```bash + aws sts get-caller-identity --query Arn --output text + aws eks list-access-entries --cluster-name eks-capabilities --region ap-southeast-2 + terraform state list | grep cluster_admins + ``` -## Troubleshooting +2. **If your ARN is missing:** + - Add it to `cluster_admin_arns` in `terraform.tfvars` + - Run `terraform apply` to create the access entry ### Capabilities Not Active @@ -281,9 +335,9 @@ If capabilities show as "CREATING" for a long time: ```bash # Check capability status -aws eks describe-capability --cluster-name --capability-name ACK -aws eks describe-capability --cluster-name --capability-name KRO -aws eks describe-capability --cluster-name --capability-name ARGOCD +aws eks describe-capability --cluster-name eks-capabilities --capability-name ACK +aws eks describe-capability --cluster-name eks-capabilities --capability-name KRO +# ArgoCD not currently supported ``` ### KRO Resources Not Creating @@ -344,7 +398,7 @@ aws eks describe-capability --cluster-name --capability-name ARGO ## Next Steps - Explore creating more complex RGDs with multiple AWS services -- Set up ArgoCD for GitOps workflows +- Configure AWS Identity Center for ArgoCD capability (future) - Implement namespace-specific IAM roles using IAMRoleSelector - Create additional platform abstractions for different application types diff --git a/examples/eks-capabilities/kubernetes-resources.tf b/examples/eks-capabilities/kubernetes-resources.tf index f735703..76f9156 100644 --- a/examples/eks-capabilities/kubernetes-resources.tf +++ b/examples/eks-capabilities/kubernetes-resources.tf @@ -208,3 +208,30 @@ resource "kubernetes_manifest" "ack_s3_bucket" { module.eks ] } + +# ============================================================================= +# KRO WebAppStack Instance (Application Deployment) +# ============================================================================= + +# WebAppStack instance - automatically deploys the application +resource "kubernetes_manifest" "kro_webappstack_instance" { + count = local.enable_kro ? 1 : 0 + + manifest = yamldecode( + replace( + file("${path.module}/kubernetes/dev-team/eks-capabilities-app-instance.yaml"), + "region: ap-southeast-2", + "region: ${var.aws_region}" + ) + ) + + field_manager { + force_conflicts = true + } + + depends_on = [ + module.eks, + kubernetes_manifest.kro_rgd, + kubernetes_manifest.kro_rbac + ] +} diff --git a/examples/eks-capabilities/kubernetes/platform-team/eks-capabilities-appstack-rgd.yaml.tpl b/examples/eks-capabilities/kubernetes/platform-team/eks-capabilities-appstack-rgd.yaml.tpl index b41e89e..c0a2e7e 100644 --- a/examples/eks-capabilities/kubernetes/platform-team/eks-capabilities-appstack-rgd.yaml.tpl +++ b/examples/eks-capabilities/kubernetes/platform-team/eks-capabilities-appstack-rgd.yaml.tpl @@ -27,6 +27,9 @@ spec: resources: # Kubernetes Deployment - id: deployment + dependsOn: + - podidentity + - serviceaccount template: apiVersion: apps/v1 kind: Deployment @@ -55,7 +58,7 @@ spec: name: http env: - name: AWS_REGION - value: "__AWS_REGION__" + value: ${schema.spec.bucket.region} - name: DYNAMODB_TABLE_NAME value: ${schema.spec.name} resources: diff --git a/examples/eks-capabilities/main.tf b/examples/eks-capabilities/main.tf index 6cb9241..136c2b8 100644 --- a/examples/eks-capabilities/main.tf +++ b/examples/eks-capabilities/main.tf @@ -116,7 +116,6 @@ module "eks" { node_disk_size = var.node_disk_size # Enable EKS Capabilities - # Note: ArgoCD requires AWS Identity Center setup, so it's disabled by default enable_ack_capability = var.enable_ack_capability ack_capability_iam_policy_arns = { s3 = "arn:aws:iam::aws:policy/AmazonS3FullAccess" @@ -126,7 +125,7 @@ module "eks" { } enable_kro_capability = var.enable_kro_capability kro_capability_role_arn = var.kro_capability_role_arn - enable_argocd_capability = var.enable_argocd_capability # Requires AWS Identity Center configuration + enable_argocd_capability = var.enable_argocd_capability # Optional: Enable EBS CSI Driver for persistent volumes enable_ebs_csi_driver = var.enable_ebs_csi_driver @@ -134,5 +133,11 @@ module "eks" { # Enable Pod Identity Agent for AWS SDK credentials in pods enable_pod_identity_agent = var.enable_pod_identity_agent + # Cluster admin access entries + cluster_admin_arns = var.cluster_admin_arns + tags = var.tags + + # Explicitly depend on the VPC module to ensure all its resources are created before the EKS cluster + depends_on = [module.vpc] } diff --git a/examples/eks-capabilities/outputs.tf b/examples/eks-capabilities/outputs.tf index a77354e..3d833fe 100644 --- a/examples/eks-capabilities/outputs.tf +++ b/examples/eks-capabilities/outputs.tf @@ -63,10 +63,7 @@ output "next_steps" { 3. Verify Resource Graph Definition (automatically deployed): kubectl get resourcegraphdefinition eks-capabilities-appstack.kro.run - 4. Deploy application using KRO (dev team): - kubectl apply -f kubernetes/dev-team/eks-capabilities-app-instance.yaml - - 5. Verify ACK resources (automatically deployed): + 4. Verify ACK resources (automatically deployed): kubectl get table,bucket,role 6. Verify capabilities: diff --git a/examples/eks-capabilities/terraform.tfvars.example b/examples/eks-capabilities/terraform.tfvars.example index dd93d94..36f3ba8 100644 --- a/examples/eks-capabilities/terraform.tfvars.example +++ b/examples/eks-capabilities/terraform.tfvars.example @@ -4,6 +4,13 @@ aws_region = "ap-southeast-2" cluster_name = "eks-capabilities" +# Cluster admin access - IAM users/roles to grant admin access via EKS access entries +# Replace with your actual IAM ARNs +cluster_admin_arns = [ + # "arn:aws:iam::YOUR_ACCOUNT:role/your-admin-role", + # "arn:aws:iam::YOUR_ACCOUNT:user/your-user" +] + # Optional: Override default node configuration # node_instance_types = ["t3.medium"] # node_desired_size = 2 diff --git a/examples/eks-capabilities/variables.tf b/examples/eks-capabilities/variables.tf index 901072c..9b755ad 100644 --- a/examples/eks-capabilities/variables.tf +++ b/examples/eks-capabilities/variables.tf @@ -71,9 +71,9 @@ variable "enable_kro_capability" { } variable "enable_argocd_capability" { - description = "Whether to enable the ArgoCD capability" + description = "Whether to enable the ArgoCD capability. Note: ArgoCD requires a configuration parameter and AWS Identity Center setup." type = bool - default = false + default = true } variable "kro_capability_role_arn" { @@ -82,6 +82,12 @@ variable "kro_capability_role_arn" { default = null } +variable "cluster_admin_arns" { + description = "List of IAM user/role ARNs to grant cluster admin access via EKS access entries" + type = list(string) + default = [] +} + variable "tags" { description = "Map of tags to apply to all resources" type = map(string) diff --git a/fargate.tf b/fargate.tf index 4ce20dd..ffcc518 100644 --- a/fargate.tf +++ b/fargate.tf @@ -55,6 +55,6 @@ resource "aws_eks_fargate_profile" "default" { tags = merge(var.tags, each.value.tags != null ? each.value.tags : {}) depends_on = [ - aws_iam_role_policy_attachment.eks_fargate_pod_execution_role[0], + aws_iam_role_policy_attachment.eks_fargate_pod_execution_role[0] ] } diff --git a/outputs.tf b/outputs.tf index 1061855..b0ba9d7 100644 --- a/outputs.tf +++ b/outputs.tf @@ -113,6 +113,20 @@ output "kro_capability_arn" { } output "argocd_capability_arn" { - description = "ARN of the ArgoCD capability (when enabled)" - value = var.enable_argocd_capability ? aws_eks_capability.argocd[0].arn : null + description = "ARN of the ArgoCD capability (when enabled). NOTE: ArgoCD not currently supported - scaffolded for future use." + value = null # ArgoCD capability is commented out (scaffolded) +} + +# ============================================================================= +# Access Entry Outputs +# ============================================================================= + +output "ec2_access_entry_created" { + description = "Whether an access entry was created for EC2 nodes" + value = local.ec2_needs_access_entry +} + +output "fargate_access_entry_created" { + description = "Whether an access entry was created for Fargate pods" + value = local.fargate_needs_access_entry } diff --git a/test.tf b/test.tf new file mode 100644 index 0000000..e69de29 diff --git a/variables.tf b/variables.tf index 4aef633..619dc38 100644 --- a/variables.tf +++ b/variables.tf @@ -241,7 +241,7 @@ variable "enable_kro_capability" { } variable "enable_argocd_capability" { - description = "Whether to enable ArgoCD GitOps capability" + description = "Whether to enable ArgoCD GitOps capability. NOTE: Not currently supported - requires AWS Identity Center configuration. Scaffolded for future use." type = bool default = false } @@ -265,13 +265,13 @@ variable "kro_capability_role_arn" { } variable "argocd_capability_role_arn" { - description = "IAM role ARN for ArgoCD capability. If not provided, AWS will create a default role." + description = "IAM role ARN for ArgoCD capability. NOTE: ArgoCD not currently supported - requires AWS Identity Center. Scaffolded for future use." type = string default = null } variable "argocd_capability_configuration" { - description = "Configuration JSON for ArgoCD capability. If not provided, a minimal configuration will be used. Full ArgoCD setup requires AWS Identity Center configuration." + description = "Configuration JSON for ArgoCD capability. NOTE: ArgoCD not currently supported - requires AWS Identity Center configuration. Scaffolded for future use." type = string default = null } @@ -280,6 +280,18 @@ variable "argocd_capability_configuration" { # Common Variables # ============================================================================= +variable "cluster_admin_arns" { + description = "List of IAM user/role ARNs to grant cluster admin access via EKS access entries. Only used when capabilities are enabled or cluster_authentication_mode is not CONFIG_MAP. Defaults to empty list." + type = list(string) + default = [] +} + +variable "access_entry_wait_duration" { + description = "Duration to wait after creating EKS access entries before creating node groups/Fargate profiles. This allows AWS to propagate the access entries. Defaults to 30s." + type = string + default = "30s" +} + variable "tags" { description = "Map of tags to apply to all resources" type = map(string)