From 153709afc4d01ca7366234c3c5b1b5b098d7cf8d Mon Sep 17 00:00:00 2001 From: Harshith <104987110+Harshithraj24@users.noreply.github.com> Date: Fri, 4 Apr 2025 11:11:55 +0530 Subject: [PATCH 01/20] feat: debug permissions for karpenter and alb controller (#168) --- aws/e6data_with_existing_eks/support.tf | 5 +---- aws/e6data_with_existing_eks/terraform.tfvars | 1 + aws/e6data_with_existing_eks/variables.tf | 5 +++++ aws/e6data_with_existing_vpc/support.tf | 5 +---- aws/e6data_with_existing_vpc/terraform.tfvars | 2 ++ aws/e6data_with_existing_vpc/variables.tf | 5 +++++ aws/e6data_with_new_eks/support.tf | 6 +----- aws/e6data_with_new_eks/terraform.tfvars | 2 ++ aws/e6data_with_new_eks/variables.tf | 6 ++++++ azure/e6data_with_existing_aks/support.tf | 5 +---- azure/e6data_with_existing_aks/terraform.tfvars | 2 ++ azure/e6data_with_existing_aks/variables.tf | 6 ++++++ azure/e6data_with_existing_vnet/support.tf | 5 +---- azure/e6data_with_existing_vnet/terraform.tfvars | 2 ++ azure/e6data_with_existing_vnet/variables.tf | 6 ++++++ azure/e6data_with_new_aks/support.tf | 5 +---- azure/e6data_with_new_aks/terraform.tfvars | 2 ++ azure/e6data_with_new_aks/variables.tf | 6 ++++++ gcp/e6data_with_existing_gke/support.tf | 1 + gcp/e6data_with_existing_gke/terraform.tfvars | 2 ++ gcp/e6data_with_existing_gke/variables.tf | 6 ++++++ gcp/e6data_with_existing_vpc/support.tf | 1 + gcp/e6data_with_existing_vpc/terraform.tfvars | 2 ++ gcp/e6data_with_existing_vpc/variables.tf | 6 ++++++ gcp/e6data_with_new_gke/support.tf | 1 + gcp/e6data_with_new_gke/terraform.tfvars | 2 ++ gcp/e6data_with_new_gke/variables.tf | 6 ++++++ 27 files changed, 78 insertions(+), 25 deletions(-) diff --git a/aws/e6data_with_existing_eks/support.tf b/aws/e6data_with_existing_eks/support.tf index dcc76f5c..d8d6295b 100644 --- a/aws/e6data_with_existing_eks/support.tf +++ b/aws/e6data_with_existing_eks/support.tf @@ -14,10 +14,7 @@ locals { type = "AWS" oidc_value = aws_iam_role.e6data_engine_role.arn control_plane_user = ["e6data-${var.workspace_name}-user"] - } - karpenter = { - nodepool = local.e6data_nodepool_name - nodeclass = local.e6data_nodeclass_name + debug_namespaces = var.debug_namespaces } }) mapUsers = try(data.kubernetes_config_map_v1.aws_auth_read.data["mapUsers"], "") diff --git a/aws/e6data_with_existing_eks/terraform.tfvars b/aws/e6data_with_existing_eks/terraform.tfvars index de0937e9..e77110c1 100644 --- a/aws/e6data_with_existing_eks/terraform.tfvars +++ b/aws/e6data_with_existing_eks/terraform.tfvars @@ -18,6 +18,7 @@ bucket_names = ["*"] ### List of bucket names that the e6data engine queries and kubernetes_namespace = "e6data" ### Value of the Kubernetes namespace to deploy the e6data workspace. helm_chart_version = "2.1.7" ### e6data workspace Helm chart version to be used. +debug_namespaces = ["kube-system"] ### Below are the tags which will be applied to all the resources created by this Terraform script. cost_tags = { diff --git a/aws/e6data_with_existing_eks/variables.tf b/aws/e6data_with_existing_eks/variables.tf index a9de0d3e..4a1779ed 100644 --- a/aws/e6data_with_existing_eks/variables.tf +++ b/aws/e6data_with_existing_eks/variables.tf @@ -83,3 +83,8 @@ variable "nodepool_cpu_limits" { default = 100000 } +variable "debug_namespaces" { + type = list(string) + description = "kaprneter and alb controller namespaces" + default = ["kube-system"] +} \ No newline at end of file diff --git a/aws/e6data_with_existing_vpc/support.tf b/aws/e6data_with_existing_vpc/support.tf index fff610d5..5f16c0d4 100644 --- a/aws/e6data_with_existing_vpc/support.tf +++ b/aws/e6data_with_existing_vpc/support.tf @@ -17,10 +17,7 @@ locals { type = "AWS" oidc_value = aws_iam_role.e6data_engine_role.arn control_plane_user = ["e6data-${var.workspace_name}-user"] - } - karpenter = { - nodepool = local.e6data_nodepool_name - nodeclass = local.e6data_nodeclass_name + debug_namespaces = var.debug_namespaces } }) } diff --git a/aws/e6data_with_existing_vpc/terraform.tfvars b/aws/e6data_with_existing_vpc/terraform.tfvars index 1537ba33..d3e0936e 100644 --- a/aws/e6data_with_existing_vpc/terraform.tfvars +++ b/aws/e6data_with_existing_vpc/terraform.tfvars @@ -52,6 +52,8 @@ karpenter_namespace = "kube-system" ### Namespace to deploy the karpe karpenter_service_account_name = "karpenter" ### Service account name for the karpenter karpenter_release_version = "1.0.8" ### Version of the karpenter Helm chart +debug_namespaces = ["kube-system"] + #### Additional ingress/egress rules for the EKS Security Group # additional_ingress_rules = [ # { diff --git a/aws/e6data_with_existing_vpc/variables.tf b/aws/e6data_with_existing_vpc/variables.tf index e80748b5..4dc87f14 100644 --- a/aws/e6data_with_existing_vpc/variables.tf +++ b/aws/e6data_with_existing_vpc/variables.tf @@ -263,6 +263,11 @@ variable "additional_egress_rules" { default = [] } +variable "debug_namespaces" { + type = list(string) + description = "kaprneter and alb controller namespaces" + default = ["kube-system"] +} variable "vpc_cni_version" { description = "Version of the VPC CNI to use" type = string diff --git a/aws/e6data_with_new_eks/support.tf b/aws/e6data_with_new_eks/support.tf index 6ff25b5d..8765c581 100644 --- a/aws/e6data_with_new_eks/support.tf +++ b/aws/e6data_with_new_eks/support.tf @@ -18,13 +18,9 @@ locals { type = "AWS" oidc_value = aws_iam_role.e6data_engine_role.arn control_plane_user = ["e6data-${var.workspace_name}-user"] - } - karpenter = { - nodepool = local.e6data_nodepool_name - nodeclass = local.e6data_nodeclass_name + debug_namespaces = var.debug_namespaces } }) - } resource "random_string" "random" { diff --git a/aws/e6data_with_new_eks/terraform.tfvars b/aws/e6data_with_new_eks/terraform.tfvars index cd428029..f939df1d 100644 --- a/aws/e6data_with_new_eks/terraform.tfvars +++ b/aws/e6data_with_new_eks/terraform.tfvars @@ -52,6 +52,8 @@ karpenter_namespace = "kube-system" ### Namespace to deploy the karpe karpenter_service_account_name = "karpenter" ### Service account name for the karpenter karpenter_release_version = "1.0.8" ### Version of the karpenter Helm chart +debug_namespaces = ["kube-system"] + #### Additional ingress/egress rules for the EKS Security Group # additional_ingress_rules = [ # { diff --git a/aws/e6data_with_new_eks/variables.tf b/aws/e6data_with_new_eks/variables.tf index fc4e88c4..cce795c7 100644 --- a/aws/e6data_with_new_eks/variables.tf +++ b/aws/e6data_with_new_eks/variables.tf @@ -262,6 +262,12 @@ variable "additional_egress_rules" { default = [] } +variable "debug_namespaces" { + type = list(string) + description = "kaprneter and alb controller namespaces" + default = ["kube-system"] +} + variable "vpc_cni_version" { description = "Version of the VPC CNI to use" type = string diff --git a/azure/e6data_with_existing_aks/support.tf b/azure/e6data_with_existing_aks/support.tf index 83535b0c..41c16a59 100644 --- a/azure/e6data_with_existing_aks/support.tf +++ b/azure/e6data_with_existing_aks/support.tf @@ -32,10 +32,7 @@ locals { type = "AZURE" oidc_value = azurerm_user_assigned_identity.e6data_identity.client_id control_plane_user = [azurerm_user_assigned_identity.federated_identity.principal_id] - } - karpenter = { - nodepool = local.e6data_nodepool_name - nodeclass = local.e6data_nodeclass_name + debug_namespaces = var.debug_namespaces } }) diff --git a/azure/e6data_with_existing_aks/terraform.tfvars b/azure/e6data_with_existing_aks/terraform.tfvars index 8e1b8ae1..010760b4 100644 --- a/azure/e6data_with_existing_aks/terraform.tfvars +++ b/azure/e6data_with_existing_aks/terraform.tfvars @@ -38,6 +38,8 @@ key_vault_rg_name = "" # The resourc nginx_ingress_controller_namespace = "kube-system" # Namespace where the Nginx Ingress Controller will be deployed nginx_ingress_controller_version = "4.7.1" # Version of the Nginx Ingress Controller to be installed +debug_namespaces = ["kube-system"] + # Toggle to decide whether to deploy the akv2k8s Helm chart. # Set to true to deploy, false to skip deployment. deploy_akv2k8s = false diff --git a/azure/e6data_with_existing_aks/variables.tf b/azure/e6data_with_existing_aks/variables.tf index 7e776552..a1adc1a3 100644 --- a/azure/e6data_with_existing_aks/variables.tf +++ b/azure/e6data_with_existing_aks/variables.tf @@ -116,4 +116,10 @@ variable "deploy_nginx_ingress" { description = "Decide whether to deploy nginx ingress" type = bool default = true +} + +variable "debug_namespaces" { + type = list(string) + description = "kaprneter and alb controller namespaces" + default = ["kube-system"] } \ No newline at end of file diff --git a/azure/e6data_with_existing_vnet/support.tf b/azure/e6data_with_existing_vnet/support.tf index 00845029..1167bbb8 100644 --- a/azure/e6data_with_existing_vnet/support.tf +++ b/azure/e6data_with_existing_vnet/support.tf @@ -35,10 +35,7 @@ locals { type = "AZURE" oidc_value = azurerm_user_assigned_identity.e6data_identity.client_id control_plane_user = [azurerm_user_assigned_identity.federated_identity.principal_id] - } - karpenter = { - nodepool = local.e6data_nodepool_name - nodeclass = local.e6data_nodeclass_name + debug_namespaces = var.debug_namespaces } }) diff --git a/azure/e6data_with_existing_vnet/terraform.tfvars b/azure/e6data_with_existing_vnet/terraform.tfvars index 63f60aa7..4d03f84e 100644 --- a/azure/e6data_with_existing_vnet/terraform.tfvars +++ b/azure/e6data_with_existing_vnet/terraform.tfvars @@ -49,6 +49,8 @@ karpenter_namespace = "kube-system" # Namespace karpenter_service_account_name = "karpenter" # Service account name for Karpenter karpenter_release_version = "0.7.4" # Karpenter release version +debug_namespaces = ["kube-system"] + # Key Vault Configuration key_vault_name = "" # Please provide the Key Vault name in which the certificate for the domain is present. If left blank, a new Key Vault will be created in the AKS resource group. key_vault_rg_name = "" # The resource group for the specified Key Vault. If left blank, it will default to the AKS resource group. For more info : https://docs.e6data.com/product-documentation/connectivity/endpoints diff --git a/azure/e6data_with_existing_vnet/variables.tf b/azure/e6data_with_existing_vnet/variables.tf index f78d27d5..00b67a28 100644 --- a/azure/e6data_with_existing_vnet/variables.tf +++ b/azure/e6data_with_existing_vnet/variables.tf @@ -168,3 +168,9 @@ variable "identity_id" { type = string description = "Identity ID from the e6data console." } + +variable "debug_namespaces" { + type = list(string) + description = "kaprneter and alb controller namespaces" + default = ["kube-system"] +} \ No newline at end of file diff --git a/azure/e6data_with_new_aks/support.tf b/azure/e6data_with_new_aks/support.tf index 15471da1..18c6dc02 100644 --- a/azure/e6data_with_new_aks/support.tf +++ b/azure/e6data_with_new_aks/support.tf @@ -43,10 +43,7 @@ locals { type = "AZURE" oidc_value = azurerm_user_assigned_identity.e6data_identity.client_id control_plane_user = [azurerm_user_assigned_identity.federated_identity.principal_id] - } - karpenter = { - nodepool = local.e6data_nodepool_name - nodeclass = local.e6data_nodeclass_name + debug_namespaces = var.debug_namespaces } }) diff --git a/azure/e6data_with_new_aks/terraform.tfvars b/azure/e6data_with_new_aks/terraform.tfvars index da2a164a..dbdd117a 100644 --- a/azure/e6data_with_new_aks/terraform.tfvars +++ b/azure/e6data_with_new_aks/terraform.tfvars @@ -47,6 +47,8 @@ karpenter_namespace = "kube-system" # Namespace karpenter_service_account_name = "karpenter" # Service account name for Karpenter karpenter_release_version = "0.7.4" # Karpenter release version +debug_namespaces = ["kube-system"] + # Key Vault Configuration key_vault_name = "" # Please provide the Key Vault name in which the certificate for the domain is present. If left blank, a new Key Vault will be created in the AKS resource group. key_vault_rg_name = "" # The resource group for the specified Key Vault. If left blank, it will default to the AKS resource group. For more info : https://docs.e6data.com/product-documentation/connectivity/endpoints diff --git a/azure/e6data_with_new_aks/variables.tf b/azure/e6data_with_new_aks/variables.tf index 92a634e4..faff8400 100644 --- a/azure/e6data_with_new_aks/variables.tf +++ b/azure/e6data_with_new_aks/variables.tf @@ -161,4 +161,10 @@ variable "identity_id" { variable "nodepool_instance_arch" { type = list(string) description = "Instance arch for nodepool" +} + +variable "debug_namespaces" { + type = list(string) + description = "kaprneter and alb controller namespaces" + default = ["kube-system"] } \ No newline at end of file diff --git a/gcp/e6data_with_existing_gke/support.tf b/gcp/e6data_with_existing_gke/support.tf index ce5c02bb..56e59f2e 100644 --- a/gcp/e6data_with_existing_gke/support.tf +++ b/gcp/e6data_with_existing_gke/support.tf @@ -13,6 +13,7 @@ locals { type = "GCP" oidc_value = google_service_account.workspace_sa.email control_plane_user = var.control_plane_user + debug_namespaces = var.debug_namespaces } }) diff --git a/gcp/e6data_with_existing_gke/terraform.tfvars b/gcp/e6data_with_existing_gke/terraform.tfvars index 2c900159..f5941e37 100644 --- a/gcp/e6data_with_existing_gke/terraform.tfvars +++ b/gcp/e6data_with_existing_gke/terraform.tfvars @@ -17,6 +17,8 @@ spot_enabled = true # A boolean that represents whether the underly kubernetes_namespace = "namespace1" +debug_namespaces = ["kube-system"] + cost_labels = {} # Cost labels for tracking costs # Note: The variable cost_labels only accepts lowercase letters ([a-z]), numeric characters ([0-9]), underscores (_) and dashes (-). diff --git a/gcp/e6data_with_existing_gke/variables.tf b/gcp/e6data_with_existing_gke/variables.tf index f8a9cef3..cbe5a519 100644 --- a/gcp/e6data_with_existing_gke/variables.tf +++ b/gcp/e6data_with_existing_gke/variables.tf @@ -71,3 +71,9 @@ variable "spot_enabled" { type = bool description = "Enable spot instances in node pools" } + +variable "debug_namespaces" { + type = list(string) + description = "kaprneter and alb controller namespaces" + default = ["kube-system"] +} \ No newline at end of file diff --git a/gcp/e6data_with_existing_vpc/support.tf b/gcp/e6data_with_existing_vpc/support.tf index 7c269605..0c0dadcc 100644 --- a/gcp/e6data_with_existing_vpc/support.tf +++ b/gcp/e6data_with_existing_vpc/support.tf @@ -13,6 +13,7 @@ locals { type = "GCP" oidc_value = google_service_account.workspace_sa.email control_plane_user = var.control_plane_user + debug_namespaces = var.debug_namespaces } }) diff --git a/gcp/e6data_with_existing_vpc/terraform.tfvars b/gcp/e6data_with_existing_vpc/terraform.tfvars index eb81b3ae..bbe030cf 100644 --- a/gcp/e6data_with_existing_vpc/terraform.tfvars +++ b/gcp/e6data_with_existing_vpc/terraform.tfvars @@ -44,6 +44,8 @@ authorized_networks = { #External networks that can access the Kubernet # Kubernetes Namespace kubernetes_namespace = "namespace" # The namespace to use for Kubernetes resources +debug_namespaces = ["kube-system"] + # Cost Labels cost_labels = {} # Cost labels for tracking costs # Note: The variable cost_labels only accepts lowercase letters ([a-z]), numeric characters ([0-9]), underscores (_) and dashes (-). diff --git a/gcp/e6data_with_existing_vpc/variables.tf b/gcp/e6data_with_existing_vpc/variables.tf index 37002dcc..c966348c 100644 --- a/gcp/e6data_with_existing_vpc/variables.tf +++ b/gcp/e6data_with_existing_vpc/variables.tf @@ -159,4 +159,10 @@ variable "deletion_protection" { variable "authorized_networks" { type = map(string) description = "authorized_networks" +} + +variable "debug_namespaces" { + type = list(string) + description = "kaprneter and alb controller namespaces" + default = ["kube-system"] } \ No newline at end of file diff --git a/gcp/e6data_with_new_gke/support.tf b/gcp/e6data_with_new_gke/support.tf index ed54b908..daee2b4b 100644 --- a/gcp/e6data_with_new_gke/support.tf +++ b/gcp/e6data_with_new_gke/support.tf @@ -15,6 +15,7 @@ locals { type = "GCP" oidc_value = google_service_account.workspace_sa.email control_plane_user = var.control_plane_user + debug_namespaces = var.debug_namespaces } }) diff --git a/gcp/e6data_with_new_gke/terraform.tfvars b/gcp/e6data_with_new_gke/terraform.tfvars index 906448aa..fcdef537 100644 --- a/gcp/e6data_with_new_gke/terraform.tfvars +++ b/gcp/e6data_with_new_gke/terraform.tfvars @@ -43,6 +43,8 @@ authorized_networks = { #External networks that can access the Kubernet # Kubernetes Namespace kubernetes_namespace = "namespace" # The namespace to use for Kubernetes resources +debug_namespaces = ["kube-system"] + # Cost Labels cost_labels = {} # Cost labels for tracking costs # Note: The variable cost_labels only accepts lowercase letters ([a-z]), numeric characters ([0-9]), underscores (_) and dashes (-). diff --git a/gcp/e6data_with_new_gke/variables.tf b/gcp/e6data_with_new_gke/variables.tf index 25783214..ced919da 100644 --- a/gcp/e6data_with_new_gke/variables.tf +++ b/gcp/e6data_with_new_gke/variables.tf @@ -149,4 +149,10 @@ variable "deletion_protection" { variable "authorized_networks" { type = map(string) description = "authorized_networks" +} + +variable "debug_namespaces" { + type = list(string) + description = "kaprneter and alb controller namespaces" + default = ["kube-system"] } \ No newline at end of file From 127e7511c45fd32b0643dfac02e3c4777d59b17d Mon Sep 17 00:00:00 2001 From: Harshith <104987110+Harshithraj24@users.noreply.github.com> Date: Sat, 5 Apr 2025 11:55:32 +0530 Subject: [PATCH 02/20] fix: cpu manager policy options full pcpus only true (#173) --- .../karpenter-provisioner-manifests/nodeclass.yaml | 1 + .../karpenter-provisioner-manifests/nodeclass.yaml | 1 + .../karpenter-provisioner-manifests/nodeclass.yaml | 1 + 3 files changed, 3 insertions(+) diff --git a/aws/e6data_with_existing_eks/karpenter-provisioner-manifests/nodeclass.yaml b/aws/e6data_with_existing_eks/karpenter-provisioner-manifests/nodeclass.yaml index 8bb7aebd..999d0805 100644 --- a/aws/e6data_with_existing_eks/karpenter-provisioner-manifests/nodeclass.yaml +++ b/aws/e6data_with_existing_eks/karpenter-provisioner-manifests/nodeclass.yaml @@ -23,6 +23,7 @@ spec: userData: | echo "$(jq '.allowedUnsafeSysctls += ["net.core.somaxconn","net.ipv4.ip_local_port_range"]' /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json echo "$(jq '.cpuManagerPolicy = "static"' /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json + echo "$(jq '.cpuManagerPolicyOptions = {"full-pcpus-only": "true"}' /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json mount_location="/app/tmp" mkdir -p $mount_location yum install nvme-cli -y diff --git a/aws/e6data_with_existing_vpc/karpenter-provisioner-manifests/nodeclass.yaml b/aws/e6data_with_existing_vpc/karpenter-provisioner-manifests/nodeclass.yaml index 398cc0e9..f105ce21 100644 --- a/aws/e6data_with_existing_vpc/karpenter-provisioner-manifests/nodeclass.yaml +++ b/aws/e6data_with_existing_vpc/karpenter-provisioner-manifests/nodeclass.yaml @@ -23,6 +23,7 @@ spec: userData: | echo "$(jq '.allowedUnsafeSysctls += ["net.core.somaxconn","net.ipv4.ip_local_port_range"]' /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json echo "$(jq '.cpuManagerPolicy = "static"' /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json + echo "$(jq '.cpuManagerPolicyOptions = {"full-pcpus-only": "true"}' /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json mount_location="/app/tmp" mkdir -p $mount_location yum install nvme-cli -y diff --git a/aws/e6data_with_new_eks/karpenter-provisioner-manifests/nodeclass.yaml b/aws/e6data_with_new_eks/karpenter-provisioner-manifests/nodeclass.yaml index 8bb7aebd..999d0805 100644 --- a/aws/e6data_with_new_eks/karpenter-provisioner-manifests/nodeclass.yaml +++ b/aws/e6data_with_new_eks/karpenter-provisioner-manifests/nodeclass.yaml @@ -23,6 +23,7 @@ spec: userData: | echo "$(jq '.allowedUnsafeSysctls += ["net.core.somaxconn","net.ipv4.ip_local_port_range"]' /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json echo "$(jq '.cpuManagerPolicy = "static"' /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json + echo "$(jq '.cpuManagerPolicyOptions = {"full-pcpus-only": "true"}' /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json mount_location="/app/tmp" mkdir -p $mount_location yum install nvme-cli -y From 7601a2079f66457fb8f179762300238995c2e72c Mon Sep 17 00:00:00 2001 From: Harshith <104987110+Harshithraj24@users.noreply.github.com> Date: Thu, 10 Apr 2025 00:50:07 +0530 Subject: [PATCH 03/20] Update nodeclass.yaml --- .../karpenter-provisioner-manifests/nodeclass.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/aws/e6data_with_new_eks/karpenter-provisioner-manifests/nodeclass.yaml b/aws/e6data_with_new_eks/karpenter-provisioner-manifests/nodeclass.yaml index 999d0805..3f645483 100644 --- a/aws/e6data_with_new_eks/karpenter-provisioner-manifests/nodeclass.yaml +++ b/aws/e6data_with_new_eks/karpenter-provisioner-manifests/nodeclass.yaml @@ -22,8 +22,6 @@ spec: volumeType: gp3 userData: | echo "$(jq '.allowedUnsafeSysctls += ["net.core.somaxconn","net.ipv4.ip_local_port_range"]' /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json - echo "$(jq '.cpuManagerPolicy = "static"' /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json - echo "$(jq '.cpuManagerPolicyOptions = {"full-pcpus-only": "true"}' /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json mount_location="/app/tmp" mkdir -p $mount_location yum install nvme-cli -y From 93c606e0129a088f4a986fcffa018c2cb0e94cf0 Mon Sep 17 00:00:00 2001 From: Harshith <104987110+Harshithraj24@users.noreply.github.com> Date: Thu, 10 Apr 2025 00:50:20 +0530 Subject: [PATCH 04/20] Update nodeclass.yaml --- .../karpenter-provisioner-manifests/nodeclass.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/aws/e6data_with_existing_vpc/karpenter-provisioner-manifests/nodeclass.yaml b/aws/e6data_with_existing_vpc/karpenter-provisioner-manifests/nodeclass.yaml index f105ce21..3f645483 100644 --- a/aws/e6data_with_existing_vpc/karpenter-provisioner-manifests/nodeclass.yaml +++ b/aws/e6data_with_existing_vpc/karpenter-provisioner-manifests/nodeclass.yaml @@ -22,8 +22,6 @@ spec: volumeType: gp3 userData: | echo "$(jq '.allowedUnsafeSysctls += ["net.core.somaxconn","net.ipv4.ip_local_port_range"]' /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json - echo "$(jq '.cpuManagerPolicy = "static"' /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json - echo "$(jq '.cpuManagerPolicyOptions = {"full-pcpus-only": "true"}' /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json mount_location="/app/tmp" mkdir -p $mount_location yum install nvme-cli -y From 06da8a56656fd257e1833bac7bbb2b9f487e1475 Mon Sep 17 00:00:00 2001 From: Harshith <104987110+Harshithraj24@users.noreply.github.com> Date: Thu, 10 Apr 2025 00:50:32 +0530 Subject: [PATCH 05/20] Update nodeclass.yaml --- .../karpenter-provisioner-manifests/nodeclass.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/aws/e6data_with_existing_eks/karpenter-provisioner-manifests/nodeclass.yaml b/aws/e6data_with_existing_eks/karpenter-provisioner-manifests/nodeclass.yaml index 999d0805..3f645483 100644 --- a/aws/e6data_with_existing_eks/karpenter-provisioner-manifests/nodeclass.yaml +++ b/aws/e6data_with_existing_eks/karpenter-provisioner-manifests/nodeclass.yaml @@ -22,8 +22,6 @@ spec: volumeType: gp3 userData: | echo "$(jq '.allowedUnsafeSysctls += ["net.core.somaxconn","net.ipv4.ip_local_port_range"]' /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json - echo "$(jq '.cpuManagerPolicy = "static"' /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json - echo "$(jq '.cpuManagerPolicyOptions = {"full-pcpus-only": "true"}' /etc/kubernetes/kubelet/kubelet-config.json)" > /etc/kubernetes/kubelet/kubelet-config.json mount_location="/app/tmp" mkdir -p $mount_location yum install nvme-cli -y From a2379a4edd412722cafa9186c427983e7084fa43 Mon Sep 17 00:00:00 2001 From: Harshith Date: Mon, 14 Apr 2025 09:05:07 +0530 Subject: [PATCH 06/20] eks upgrade fixes --- aws/e6data_with_existing_eks/support.tf | 34 ++++++++----------------- aws/e6data_with_existing_vpc/eks.tf | 18 +++---------- aws/e6data_with_new_eks/eks.tf | 26 ++++++++----------- 3 files changed, 25 insertions(+), 53 deletions(-) diff --git a/aws/e6data_with_existing_eks/support.tf b/aws/e6data_with_existing_eks/support.tf index d8d6295b..d8c71ddc 100644 --- a/aws/e6data_with_existing_eks/support.tf +++ b/aws/e6data_with_existing_eks/support.tf @@ -72,36 +72,24 @@ data "aws_eks_node_group" "current" { } provider "kubernetes" { - alias = "eks_e6data" - host = data.aws_eks_cluster.current.endpoint - cluster_ca_certificate = base64decode(data.aws_eks_cluster.current.certificate_authority[0].data) - exec { - api_version = "client.authentication.k8s.io/v1beta1" - args = ["eks", "get-token", "--cluster-name", var.eks_cluster_name] - command = var.aws_command_line_path - } + alias = "e6data" + host = module.eks.eks_endpoint + cluster_ca_certificate = base64decode(module.eks.eks_certificate_data) + token = data.aws_eks_cluster_auth.target_eks_auth.token } provider "kubectl" { - host = data.aws_eks_cluster.current.endpoint - cluster_ca_certificate = base64decode(data.aws_eks_cluster.current.certificate_authority[0].data) + host = module.eks.eks_endpoint + cluster_ca_certificate = base64decode(module.eks.eks_certificate_data) load_config_file = false - exec { - api_version = "client.authentication.k8s.io/v1beta1" - args = ["eks", "get-token", "--cluster-name", var.eks_cluster_name] - command = var.aws_command_line_path - } + token = data.aws_eks_cluster_auth.target_eks_auth.token } provider "helm" { - alias = "eks_e6data" + alias = "e6data" kubernetes { - host = data.aws_eks_cluster.current.endpoint - cluster_ca_certificate = base64decode(data.aws_eks_cluster.current.certificate_authority[0].data) - exec { - api_version = "client.authentication.k8s.io/v1beta1" - args = ["eks", "get-token", "--cluster-name", var.eks_cluster_name] - command = var.aws_command_line_path - } + host = module.eks.eks_endpoint + cluster_ca_certificate = base64decode(module.eks.eks_certificate_data) + token = data.aws_eks_cluster_auth.target_eks_auth.token } } \ No newline at end of file diff --git a/aws/e6data_with_existing_vpc/eks.tf b/aws/e6data_with_existing_vpc/eks.tf index 93123603..75f5dfd4 100644 --- a/aws/e6data_with_existing_vpc/eks.tf +++ b/aws/e6data_with_existing_vpc/eks.tf @@ -42,22 +42,14 @@ provider "kubernetes" { alias = "e6data" host = module.eks.eks_endpoint cluster_ca_certificate = base64decode(module.eks.eks_certificate_data) - exec { - api_version = "client.authentication.k8s.io/v1beta1" - args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name] - command = var.aws_command_line_path - } + token = data.aws_eks_cluster_auth.target_eks_auth.token } provider "kubectl" { host = module.eks.eks_endpoint cluster_ca_certificate = base64decode(module.eks.eks_certificate_data) load_config_file = false - exec { - api_version = "client.authentication.k8s.io/v1beta1" - args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name] - command = var.aws_command_line_path - } + token = data.aws_eks_cluster_auth.target_eks_auth.token } provider "helm" { @@ -65,11 +57,7 @@ provider "helm" { kubernetes { host = module.eks.eks_endpoint cluster_ca_certificate = base64decode(module.eks.eks_certificate_data) - exec { - api_version = "client.authentication.k8s.io/v1beta1" - args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name] - command = var.aws_command_line_path - } + token = data.aws_eks_cluster_auth.target_eks_auth.token } } diff --git a/aws/e6data_with_new_eks/eks.tf b/aws/e6data_with_new_eks/eks.tf index 93123603..0f1b1e9a 100644 --- a/aws/e6data_with_new_eks/eks.tf +++ b/aws/e6data_with_new_eks/eks.tf @@ -38,26 +38,26 @@ resource "aws_ec2_tag" "cluster_primary_security_group" { value = "e6data" } +data "aws_eks_cluster_auth" "target_eks_auth" { + name = module.eks.cluster_name + + depends_on = [ + module.eks + ] +} + provider "kubernetes" { alias = "e6data" host = module.eks.eks_endpoint cluster_ca_certificate = base64decode(module.eks.eks_certificate_data) - exec { - api_version = "client.authentication.k8s.io/v1beta1" - args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name] - command = var.aws_command_line_path - } + token = data.aws_eks_cluster_auth.target_eks_auth.token } provider "kubectl" { host = module.eks.eks_endpoint cluster_ca_certificate = base64decode(module.eks.eks_certificate_data) load_config_file = false - exec { - api_version = "client.authentication.k8s.io/v1beta1" - args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name] - command = var.aws_command_line_path - } + token = data.aws_eks_cluster_auth.target_eks_auth.token } provider "helm" { @@ -65,11 +65,7 @@ provider "helm" { kubernetes { host = module.eks.eks_endpoint cluster_ca_certificate = base64decode(module.eks.eks_certificate_data) - exec { - api_version = "client.authentication.k8s.io/v1beta1" - args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name] - command = var.aws_command_line_path - } + token = data.aws_eks_cluster_auth.target_eks_auth.token } } From 8ff8cba8e38fc6c456a7cd3b9a1735b298cd58c7 Mon Sep 17 00:00:00 2001 From: Harshith Date: Mon, 14 Apr 2025 12:11:26 +0530 Subject: [PATCH 07/20] eks upgrade fixes --- aws/e6data_with_existing_eks/support.tf | 4 ++-- aws/e6data_with_existing_vpc/eks.tf | 4 ++-- aws/e6data_with_new_eks/eks.tf | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/aws/e6data_with_existing_eks/support.tf b/aws/e6data_with_existing_eks/support.tf index d8c71ddc..8c19acdd 100644 --- a/aws/e6data_with_existing_eks/support.tf +++ b/aws/e6data_with_existing_eks/support.tf @@ -72,7 +72,7 @@ data "aws_eks_node_group" "current" { } provider "kubernetes" { - alias = "e6data" + alias = "eks_e6data" host = module.eks.eks_endpoint cluster_ca_certificate = base64decode(module.eks.eks_certificate_data) token = data.aws_eks_cluster_auth.target_eks_auth.token @@ -86,7 +86,7 @@ provider "kubectl" { } provider "helm" { - alias = "e6data" + alias = "eks_e6data" kubernetes { host = module.eks.eks_endpoint cluster_ca_certificate = base64decode(module.eks.eks_certificate_data) diff --git a/aws/e6data_with_existing_vpc/eks.tf b/aws/e6data_with_existing_vpc/eks.tf index 75f5dfd4..55f28a86 100644 --- a/aws/e6data_with_existing_vpc/eks.tf +++ b/aws/e6data_with_existing_vpc/eks.tf @@ -39,7 +39,7 @@ resource "aws_ec2_tag" "cluster_primary_security_group" { } provider "kubernetes" { - alias = "e6data" + alias = "eks_e6data" host = module.eks.eks_endpoint cluster_ca_certificate = base64decode(module.eks.eks_certificate_data) token = data.aws_eks_cluster_auth.target_eks_auth.token @@ -53,7 +53,7 @@ provider "kubectl" { } provider "helm" { - alias = "e6data" + alias = "eks_e6data" kubernetes { host = module.eks.eks_endpoint cluster_ca_certificate = base64decode(module.eks.eks_certificate_data) diff --git a/aws/e6data_with_new_eks/eks.tf b/aws/e6data_with_new_eks/eks.tf index 0f1b1e9a..fc5d5589 100644 --- a/aws/e6data_with_new_eks/eks.tf +++ b/aws/e6data_with_new_eks/eks.tf @@ -47,7 +47,7 @@ data "aws_eks_cluster_auth" "target_eks_auth" { } provider "kubernetes" { - alias = "e6data" + alias = "eks_e6data" host = module.eks.eks_endpoint cluster_ca_certificate = base64decode(module.eks.eks_certificate_data) token = data.aws_eks_cluster_auth.target_eks_auth.token @@ -61,7 +61,7 @@ provider "kubectl" { } provider "helm" { - alias = "e6data" + alias = "eks_e6data" kubernetes { host = module.eks.eks_endpoint cluster_ca_certificate = base64decode(module.eks.eks_certificate_data) From d9097087ba68dd67adf67a81d2627f0bd3d95a7b Mon Sep 17 00:00:00 2001 From: Harshith Date: Mon, 14 Apr 2025 12:13:20 +0530 Subject: [PATCH 08/20] eks upgrade fixes --- aws/e6data_with_existing_eks/support.tf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/aws/e6data_with_existing_eks/support.tf b/aws/e6data_with_existing_eks/support.tf index 8c19acdd..ad479c98 100644 --- a/aws/e6data_with_existing_eks/support.tf +++ b/aws/e6data_with_existing_eks/support.tf @@ -73,13 +73,13 @@ data "aws_eks_node_group" "current" { provider "kubernetes" { alias = "eks_e6data" - host = module.eks.eks_endpoint + host = data.aws_eks_cluster.current.endpoint cluster_ca_certificate = base64decode(module.eks.eks_certificate_data) token = data.aws_eks_cluster_auth.target_eks_auth.token } provider "kubectl" { - host = module.eks.eks_endpoint + host = data.aws_eks_cluster.current.endpoint cluster_ca_certificate = base64decode(module.eks.eks_certificate_data) load_config_file = false token = data.aws_eks_cluster_auth.target_eks_auth.token @@ -88,7 +88,7 @@ provider "kubectl" { provider "helm" { alias = "eks_e6data" kubernetes { - host = module.eks.eks_endpoint + host = data.aws_eks_cluster.current.endpoint cluster_ca_certificate = base64decode(module.eks.eks_certificate_data) token = data.aws_eks_cluster_auth.target_eks_auth.token } From 1887bb9d1546bca28595c92af2f2e5f929f366ff Mon Sep 17 00:00:00 2001 From: Harshith Date: Mon, 14 Apr 2025 12:16:25 +0530 Subject: [PATCH 09/20] eks upgrade fixes --- aws/e6data_with_existing_eks/support.tf | 16 ++++++++++------ aws/e6data_with_existing_vpc/eks.tf | 8 ++++++++ aws/e6data_with_new_eks/eks.tf | 8 ++++++++ 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/aws/e6data_with_existing_eks/support.tf b/aws/e6data_with_existing_eks/support.tf index ad479c98..12079a43 100644 --- a/aws/e6data_with_existing_eks/support.tf +++ b/aws/e6data_with_existing_eks/support.tf @@ -71,25 +71,29 @@ data "aws_eks_node_group" "current" { node_group_name = tolist(data.aws_eks_node_groups.current.names)[0] } +data "aws_eks_cluster_auth" "target_eks_auth" { + name = module.eks.cluster_name +} + provider "kubernetes" { alias = "eks_e6data" host = data.aws_eks_cluster.current.endpoint - cluster_ca_certificate = base64decode(module.eks.eks_certificate_data) - token = data.aws_eks_cluster_auth.target_eks_auth.token + cluster_ca_certificate = base64decode(data.aws_eks_cluster.current.certificate_authority[0].data) + token = data.aws_eks_cluster_auth.current.token } provider "kubectl" { host = data.aws_eks_cluster.current.endpoint - cluster_ca_certificate = base64decode(module.eks.eks_certificate_data) + cluster_ca_certificate = base64decode(data.aws_eks_cluster.current.certificate_authority[0].data) load_config_file = false - token = data.aws_eks_cluster_auth.target_eks_auth.token + token = data.aws_eks_cluster_auth.current.token } provider "helm" { alias = "eks_e6data" kubernetes { host = data.aws_eks_cluster.current.endpoint - cluster_ca_certificate = base64decode(module.eks.eks_certificate_data) - token = data.aws_eks_cluster_auth.target_eks_auth.token + cluster_ca_certificate = base64decode(data.aws_eks_cluster.current.certificate_authority[0].data) + token = data.aws_eks_cluster_auth.current.token } } \ No newline at end of file diff --git a/aws/e6data_with_existing_vpc/eks.tf b/aws/e6data_with_existing_vpc/eks.tf index 55f28a86..78122cc6 100644 --- a/aws/e6data_with_existing_vpc/eks.tf +++ b/aws/e6data_with_existing_vpc/eks.tf @@ -38,6 +38,14 @@ resource "aws_ec2_tag" "cluster_primary_security_group" { value = "e6data" } +data "aws_eks_cluster_auth" "target_eks_auth" { + name = module.eks.cluster_name + + depends_on = [ + module.eks + ] +} + provider "kubernetes" { alias = "eks_e6data" host = module.eks.eks_endpoint diff --git a/aws/e6data_with_new_eks/eks.tf b/aws/e6data_with_new_eks/eks.tf index fc5d5589..8da77bc7 100644 --- a/aws/e6data_with_new_eks/eks.tf +++ b/aws/e6data_with_new_eks/eks.tf @@ -46,6 +46,14 @@ data "aws_eks_cluster_auth" "target_eks_auth" { ] } +data "aws_eks_cluster_auth" "target_eks_auth" { + name = module.eks.cluster_name + + depends_on = [ + module.eks + ] +} + provider "kubernetes" { alias = "eks_e6data" host = module.eks.eks_endpoint From 5a5d3b78a72f67cef3ef901db1ea43f4341531f5 Mon Sep 17 00:00:00 2001 From: Harshith Date: Mon, 14 Apr 2025 12:17:54 +0530 Subject: [PATCH 10/20] eks upgrade fixes --- aws/e6data_with_existing_eks/support.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aws/e6data_with_existing_eks/support.tf b/aws/e6data_with_existing_eks/support.tf index 12079a43..6cabb1e9 100644 --- a/aws/e6data_with_existing_eks/support.tf +++ b/aws/e6data_with_existing_eks/support.tf @@ -72,7 +72,7 @@ data "aws_eks_node_group" "current" { } data "aws_eks_cluster_auth" "target_eks_auth" { - name = module.eks.cluster_name + name = data.aws_eks_cluster.current.name } provider "kubernetes" { From c87c83c8a3de043fec0606d773229ab7b2c79b44 Mon Sep 17 00:00:00 2001 From: Harshith <104987110+Harshithraj24@users.noreply.github.com> Date: Tue, 15 Apr 2025 19:45:44 +0530 Subject: [PATCH 11/20] Update default_nodegroup.tf --- aws/e6data_with_new_eks/default_nodegroup.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aws/e6data_with_new_eks/default_nodegroup.tf b/aws/e6data_with_new_eks/default_nodegroup.tf index 297a438d..717eb622 100644 --- a/aws/e6data_with_new_eks/default_nodegroup.tf +++ b/aws/e6data_with_new_eks/default_nodegroup.tf @@ -32,7 +32,7 @@ resource "aws_launch_template" "default_nodegroup_launch_template" { metadata_options { http_endpoint = "enabled" http_tokens = "required" - http_put_response_hop_limit = 1 + http_put_response_hop_limit = 2 instance_metadata_tags = "enabled" } @@ -99,4 +99,4 @@ resource "aws_iam_role" "eks_nodegroup_iam_role" { name = "${local.e6data_workspace_name}-${random_string.random.result}" managed_policy_arns = var.eks_nodegroup_iam_policy_arn assume_role_policy = data.aws_iam_policy_document.eks_nodegroup_iam_assume_policy.json -} \ No newline at end of file +} From 044b64a2ba85a5276ca016523121d533a187aaff Mon Sep 17 00:00:00 2001 From: Harshith Date: Tue, 22 Apr 2025 16:42:04 +0530 Subject: [PATCH 12/20] fix: provider --- aws/e6data_with_existing_vpc/eks.tf | 4 ++-- aws/e6data_with_new_eks/eks.tf | 12 ++---------- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/aws/e6data_with_existing_vpc/eks.tf b/aws/e6data_with_existing_vpc/eks.tf index 78122cc6..bad650ca 100644 --- a/aws/e6data_with_existing_vpc/eks.tf +++ b/aws/e6data_with_existing_vpc/eks.tf @@ -47,7 +47,7 @@ data "aws_eks_cluster_auth" "target_eks_auth" { } provider "kubernetes" { - alias = "eks_e6data" + alias = "e6data" host = module.eks.eks_endpoint cluster_ca_certificate = base64decode(module.eks.eks_certificate_data) token = data.aws_eks_cluster_auth.target_eks_auth.token @@ -61,7 +61,7 @@ provider "kubectl" { } provider "helm" { - alias = "eks_e6data" + alias = "e6data" kubernetes { host = module.eks.eks_endpoint cluster_ca_certificate = base64decode(module.eks.eks_certificate_data) diff --git a/aws/e6data_with_new_eks/eks.tf b/aws/e6data_with_new_eks/eks.tf index 8da77bc7..0f1b1e9a 100644 --- a/aws/e6data_with_new_eks/eks.tf +++ b/aws/e6data_with_new_eks/eks.tf @@ -46,16 +46,8 @@ data "aws_eks_cluster_auth" "target_eks_auth" { ] } -data "aws_eks_cluster_auth" "target_eks_auth" { - name = module.eks.cluster_name - - depends_on = [ - module.eks - ] -} - provider "kubernetes" { - alias = "eks_e6data" + alias = "e6data" host = module.eks.eks_endpoint cluster_ca_certificate = base64decode(module.eks.eks_certificate_data) token = data.aws_eks_cluster_auth.target_eks_auth.token @@ -69,7 +61,7 @@ provider "kubectl" { } provider "helm" { - alias = "eks_e6data" + alias = "e6data" kubernetes { host = module.eks.eks_endpoint cluster_ca_certificate = base64decode(module.eks.eks_certificate_data) From eb2a8fae83690a5cb7aa14b5293da3d75108294b Mon Sep 17 00:00:00 2001 From: Harshith <104987110+Harshithraj24@users.noreply.github.com> Date: Fri, 9 May 2025 12:57:34 +0530 Subject: [PATCH 13/20] fix: debug namepsaces change detection by the helm module (#178) --- aws/e6data_with_existing_eks/helm.tf | 3 --- aws/e6data_with_existing_vpc/helm.tf | 4 ---- aws/e6data_with_new_eks/helm.tf | 4 ---- 3 files changed, 11 deletions(-) diff --git a/aws/e6data_with_existing_eks/helm.tf b/aws/e6data_with_existing_eks/helm.tf index a0e52291..cb5b7ae6 100644 --- a/aws/e6data_with_existing_eks/helm.tf +++ b/aws/e6data_with_existing_eks/helm.tf @@ -12,8 +12,5 @@ resource "helm_release" "e6data_workspace_deployment" { values = [local.helm_values_file] - lifecycle { - ignore_changes = [values] - } # depends_on = [aws_eks_access_policy_association.tf_runner_auth_policy] } \ No newline at end of file diff --git a/aws/e6data_with_existing_vpc/helm.tf b/aws/e6data_with_existing_vpc/helm.tf index 10715e55..f3bcae4e 100644 --- a/aws/e6data_with_existing_vpc/helm.tf +++ b/aws/e6data_with_existing_vpc/helm.tf @@ -12,9 +12,5 @@ resource "helm_release" "e6data_workspace_deployment" { values = [local.helm_values_file] - lifecycle { - ignore_changes = [values] - } - depends_on = [module.eks, aws_eks_node_group.default_node_group, module.e6data_authentication] } \ No newline at end of file diff --git a/aws/e6data_with_new_eks/helm.tf b/aws/e6data_with_new_eks/helm.tf index 10715e55..f3bcae4e 100644 --- a/aws/e6data_with_new_eks/helm.tf +++ b/aws/e6data_with_new_eks/helm.tf @@ -12,9 +12,5 @@ resource "helm_release" "e6data_workspace_deployment" { values = [local.helm_values_file] - lifecycle { - ignore_changes = [values] - } - depends_on = [module.eks, aws_eks_node_group.default_node_group, module.e6data_authentication] } \ No newline at end of file From 7acd6cd0a70205ca3f177caff966ddb9fdd6daf0 Mon Sep 17 00:00:00 2001 From: anurage6data Date: Wed, 14 May 2025 11:28:18 +0530 Subject: [PATCH 14/20] PLT-6597 - IAM authetication --- .../e6data_engine_iam.tf | 31 ++++++++++++++++++- aws/e6data_with_existing_eks/variables.tf | 4 +++ .../e6data_engine_iam.tf | 30 +++++++++++++++++- aws/e6data_with_existing_vpc/variables.tf | 4 +++ aws/e6data_with_new_eks/e6data_engine_iam.tf | 30 +++++++++++++++++- aws/e6data_with_new_eks/variables.tf | 4 +++ 6 files changed, 100 insertions(+), 3 deletions(-) diff --git a/aws/e6data_with_existing_eks/e6data_engine_iam.tf b/aws/e6data_with_existing_eks/e6data_engine_iam.tf index 34e86ad3..284c65b9 100644 --- a/aws/e6data_with_existing_eks/e6data_engine_iam.tf +++ b/aws/e6data_with_existing_eks/e6data_engine_iam.tf @@ -17,6 +17,28 @@ data "aws_iam_policy_document" "oidc_assume_role_policy" { } } +data "aws_iam_policy_document" "system_tables_policy" { + statement { + sid = "AssumeRole" + effect = "Allow" + + actions = [ + "sts:AssumeRole" + ] + resources = ["arn:aws:iam::${local.cross_account_id}:role/e6-system-tables-*"] + } + + statement { + sid = "TagSession" + effect = "Allow" + + actions = [ + "sts:TagSession" + ] + resources = ["*"] + } +} + data "aws_iam_policy_document" "engine_iam_glue_s3readAccess_doc" { statement { sid = "glueReadOnlyAccess" @@ -60,9 +82,16 @@ resource "aws_iam_policy" "e6data_engine_s3_glue_policy" { policy = data.aws_iam_policy_document.engine_iam_glue_s3readAccess_doc.json } +resource "aws_iam_policy" "e6data_engine_system_tables_policy" { + name = "${local.e6data_workspace_name}-engine-system-tables-${random_string.random.result}" + description = "Allows assume the role for system tables" + policy = data.aws_iam_policy_document.system_tables_policy.json +} + + # Create an IAM role for the engine, allowing it to assume the role with specified policies attached resource "aws_iam_role" "e6data_engine_role" { name = "${local.e6data_workspace_name}-engine-role-${random_string.random.result}" assume_role_policy = data.aws_iam_policy_document.oidc_assume_role_policy.json - managed_policy_arns = [aws_iam_policy.e6data_engine_s3_glue_policy.arn, aws_iam_policy.e6data_s3_read_write_policy.arn] + managed_policy_arns = [aws_iam_policy.e6data_engine_s3_glue_policy.arn, aws_iam_policy.e6data_s3_read_write_policy.arn, aws_iam_policy.e6data_engine_system_tables_policy.arn] } \ No newline at end of file diff --git a/aws/e6data_with_existing_eks/variables.tf b/aws/e6data_with_existing_eks/variables.tf index b0c52014..5dc254b0 100644 --- a/aws/e6data_with_existing_eks/variables.tf +++ b/aws/e6data_with_existing_eks/variables.tf @@ -87,4 +87,8 @@ variable "debug_namespaces" { type = list(string) description = "kaprneter and alb controller namespaces" default = ["kube-system"] +} + +locals { + cross_account_id = split(":", var.e6data_cross_oidc_role_arn[0])[4] } \ No newline at end of file diff --git a/aws/e6data_with_existing_vpc/e6data_engine_iam.tf b/aws/e6data_with_existing_vpc/e6data_engine_iam.tf index 34e86ad3..b61f38d6 100644 --- a/aws/e6data_with_existing_vpc/e6data_engine_iam.tf +++ b/aws/e6data_with_existing_vpc/e6data_engine_iam.tf @@ -53,6 +53,28 @@ data "aws_iam_policy_document" "engine_iam_glue_s3readAccess_doc" { } } +data "aws_iam_policy_document" "system_tables_policy" { + statement { + sid = "AssumeRole" + effect = "Allow" + + actions = [ + "sts:AssumeRole" + ] + resources = ["arn:aws:iam::${local.cross_account_id}:role/e6-system-tables-*"] + } + + statement { + sid = "TagSession" + effect = "Allow" + + actions = [ + "sts:TagSession" + ] + resources = ["*"] + } +} + # Create an IAM policy that grants read access to S3 buckets and the Glue catalog resource "aws_iam_policy" "e6data_engine_s3_glue_policy" { name = "${local.e6data_workspace_name}-engine-s3-glue-${random_string.random.result}" @@ -60,9 +82,15 @@ resource "aws_iam_policy" "e6data_engine_s3_glue_policy" { policy = data.aws_iam_policy_document.engine_iam_glue_s3readAccess_doc.json } +resource "aws_iam_policy" "e6data_engine_system_tables_policy" { + name = "${local.e6data_workspace_name}-engine-system-tables-${random_string.random.result}" + description = "Allows assume the role for system tables" + policy = data.aws_iam_policy_document.system_tables_policy.json +} + # Create an IAM role for the engine, allowing it to assume the role with specified policies attached resource "aws_iam_role" "e6data_engine_role" { name = "${local.e6data_workspace_name}-engine-role-${random_string.random.result}" assume_role_policy = data.aws_iam_policy_document.oidc_assume_role_policy.json - managed_policy_arns = [aws_iam_policy.e6data_engine_s3_glue_policy.arn, aws_iam_policy.e6data_s3_read_write_policy.arn] + managed_policy_arns = [aws_iam_policy.e6data_engine_s3_glue_policy.arn, aws_iam_policy.e6data_s3_read_write_policy.arn, aws_iam_policy.e6data_engine_system_tables_policy.arn] } \ No newline at end of file diff --git a/aws/e6data_with_existing_vpc/variables.tf b/aws/e6data_with_existing_vpc/variables.tf index 4dc87f14..38229e0b 100644 --- a/aws/e6data_with_existing_vpc/variables.tf +++ b/aws/e6data_with_existing_vpc/variables.tf @@ -290,4 +290,8 @@ variable "minimum_ip_target" { description = "Minimum number of IP addresses to keep available for pod assignment." type = number default = 12 +} + +locals { + cross_account_id = split(":", var.e6data_cross_oidc_role_arn[0])[4] } \ No newline at end of file diff --git a/aws/e6data_with_new_eks/e6data_engine_iam.tf b/aws/e6data_with_new_eks/e6data_engine_iam.tf index 999d6e31..8d2e5d5f 100644 --- a/aws/e6data_with_new_eks/e6data_engine_iam.tf +++ b/aws/e6data_with_new_eks/e6data_engine_iam.tf @@ -53,6 +53,28 @@ data "aws_iam_policy_document" "engine_iam_glue_s3readAccess_doc" { } } +data "aws_iam_policy_document" "system_tables_policy" { + statement { + sid = "AssumeRole" + effect = "Allow" + + actions = [ + "sts:AssumeRole" + ] + resources = ["arn:aws:iam::${local.cross_account_id}:role/e6-system-tables-*"] + } + + statement { + sid = "TagSession" + effect = "Allow" + + actions = [ + "sts:TagSession" + ] + resources = ["*"] + } +} + # Create an IAM policy that grants read access to S3 buckets and the Glue catalog resource "aws_iam_policy" "e6data_engine_s3_glue_policy" { name = "${local.e6data_workspace_name}-engine-s3-glue-policy-${random_string.random.result}" @@ -60,9 +82,15 @@ resource "aws_iam_policy" "e6data_engine_s3_glue_policy" { policy = data.aws_iam_policy_document.engine_iam_glue_s3readAccess_doc.json } +resource "aws_iam_policy" "e6data_engine_system_tables_policy" { + name = "${local.e6data_workspace_name}-engine-system-tables-${random_string.random.result}" + description = "Allows assume the role for system tables" + policy = data.aws_iam_policy_document.system_tables_policy.json +} + # Create an IAM role for the engine, allowing it to assume the role with specified policies attached resource "aws_iam_role" "e6data_engine_role" { name = "${local.e6data_workspace_name}-engine-role-${random_string.random.result}" assume_role_policy = data.aws_iam_policy_document.oidc_assume_role_policy.json - managed_policy_arns = [aws_iam_policy.e6data_engine_s3_glue_policy.arn, aws_iam_policy.e6data_s3_read_write_policy.arn] + managed_policy_arns = [aws_iam_policy.e6data_engine_s3_glue_policy.arn, aws_iam_policy.e6data_s3_read_write_policy.arn, aws_iam_policy.e6data_engine_system_tables_policy.arn] } \ No newline at end of file diff --git a/aws/e6data_with_new_eks/variables.tf b/aws/e6data_with_new_eks/variables.tf index cce795c7..f8ee2618 100644 --- a/aws/e6data_with_new_eks/variables.tf +++ b/aws/e6data_with_new_eks/variables.tf @@ -290,4 +290,8 @@ variable "minimum_ip_target" { description = "Minimum number of IP addresses to keep available for pod assignment." type = number default = 12 +} + +locals { + cross_account_id = split(":", var.e6data_cross_oidc_role_arn[0])[4] } \ No newline at end of file From d686d4137e393cc1e70844315c300ec6b932b3ae Mon Sep 17 00:00:00 2001 From: Siddhanth03 Date: Thu, 22 May 2025 12:38:30 +0530 Subject: [PATCH 15/20] Update logging service to none --- gcp/e6data_with_new_gke/modules/gke_cluster/gke.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gcp/e6data_with_new_gke/modules/gke_cluster/gke.tf b/gcp/e6data_with_new_gke/modules/gke_cluster/gke.tf index 799f18d0..87ba62d9 100644 --- a/gcp/e6data_with_new_gke/modules/gke_cluster/gke.tf +++ b/gcp/e6data_with_new_gke/modules/gke_cluster/gke.tf @@ -5,7 +5,7 @@ resource "google_container_cluster" "gke_cluster" { location = var.region min_master_version = var.gke_version monitoring_service = "monitoring.googleapis.com/kubernetes" - logging_service = "logging.googleapis.com/kubernetes" + logging_service = "none" network = var.network subnetwork = var.subnetwork initial_node_count = var.initial_node_count @@ -70,4 +70,4 @@ resource "google_container_cluster" "gke_cluster" { } -data "google_client_config" "default" {} \ No newline at end of file +data "google_client_config" "default" {} From 80cb73351c50c4df95521a4b5c657f036d27bf93 Mon Sep 17 00:00:00 2001 From: Siddhanth03 Date: Thu, 22 May 2025 12:39:27 +0530 Subject: [PATCH 16/20] Update gke.tf --- gcp/e6data_with_new_gke/modules/gke_cluster/gke.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcp/e6data_with_new_gke/modules/gke_cluster/gke.tf b/gcp/e6data_with_new_gke/modules/gke_cluster/gke.tf index 87ba62d9..093c8b08 100644 --- a/gcp/e6data_with_new_gke/modules/gke_cluster/gke.tf +++ b/gcp/e6data_with_new_gke/modules/gke_cluster/gke.tf @@ -5,7 +5,7 @@ resource "google_container_cluster" "gke_cluster" { location = var.region min_master_version = var.gke_version monitoring_service = "monitoring.googleapis.com/kubernetes" - logging_service = "none" + logging_service = "logging.googleapis.com/kubernetes" network = var.network subnetwork = var.subnetwork initial_node_count = var.initial_node_count From b504a5f469f5a8a70780930f1cfbb536c9cfa63a Mon Sep 17 00:00:00 2001 From: Siddhanth03 Date: Thu, 22 May 2025 12:42:12 +0530 Subject: [PATCH 17/20] Update logging service to none (#180) --- gcp/e6data_with_existing_vpc/modules/gke_cluster/gke.tf | 4 ++-- gcp/e6data_with_new_gke/modules/gke_cluster/gke.tf | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/gcp/e6data_with_existing_vpc/modules/gke_cluster/gke.tf b/gcp/e6data_with_existing_vpc/modules/gke_cluster/gke.tf index 799f18d0..87ba62d9 100644 --- a/gcp/e6data_with_existing_vpc/modules/gke_cluster/gke.tf +++ b/gcp/e6data_with_existing_vpc/modules/gke_cluster/gke.tf @@ -5,7 +5,7 @@ resource "google_container_cluster" "gke_cluster" { location = var.region min_master_version = var.gke_version monitoring_service = "monitoring.googleapis.com/kubernetes" - logging_service = "logging.googleapis.com/kubernetes" + logging_service = "none" network = var.network subnetwork = var.subnetwork initial_node_count = var.initial_node_count @@ -70,4 +70,4 @@ resource "google_container_cluster" "gke_cluster" { } -data "google_client_config" "default" {} \ No newline at end of file +data "google_client_config" "default" {} diff --git a/gcp/e6data_with_new_gke/modules/gke_cluster/gke.tf b/gcp/e6data_with_new_gke/modules/gke_cluster/gke.tf index 093c8b08..87ba62d9 100644 --- a/gcp/e6data_with_new_gke/modules/gke_cluster/gke.tf +++ b/gcp/e6data_with_new_gke/modules/gke_cluster/gke.tf @@ -5,7 +5,7 @@ resource "google_container_cluster" "gke_cluster" { location = var.region min_master_version = var.gke_version monitoring_service = "monitoring.googleapis.com/kubernetes" - logging_service = "logging.googleapis.com/kubernetes" + logging_service = "none" network = var.network subnetwork = var.subnetwork initial_node_count = var.initial_node_count From 867ee358eed86ab60fa74327eb48e48390e12a70 Mon Sep 17 00:00:00 2001 From: Srinath Prabhu <72329057+srinath-prabhu@users.noreply.github.com> Date: Mon, 16 Jun 2025 15:52:16 +0530 Subject: [PATCH 18/20] Update aws/e6data_with_existing_vpc/variables.tf Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- aws/e6data_with_existing_vpc/variables.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aws/e6data_with_existing_vpc/variables.tf b/aws/e6data_with_existing_vpc/variables.tf index 38229e0b..56a5c3b0 100644 --- a/aws/e6data_with_existing_vpc/variables.tf +++ b/aws/e6data_with_existing_vpc/variables.tf @@ -265,7 +265,7 @@ variable "additional_egress_rules" { variable "debug_namespaces" { type = list(string) - description = "kaprneter and alb controller namespaces" + description = "karpenter and alb controller namespaces" default = ["kube-system"] } variable "vpc_cni_version" { From 52b540dcd70d2bd0b248635e9337fa063e4025e3 Mon Sep 17 00:00:00 2001 From: Harshith Date: Mon, 16 Jun 2025 15:54:44 +0530 Subject: [PATCH 19/20] kube version --- aws/e6data_with_existing_vpc/terraform.tfvars | 4 ++-- aws/e6data_with_new_eks/terraform.tfvars | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/aws/e6data_with_existing_vpc/terraform.tfvars b/aws/e6data_with_existing_vpc/terraform.tfvars index d3e0936e..df73de2c 100644 --- a/aws/e6data_with_existing_vpc/terraform.tfvars +++ b/aws/e6data_with_existing_vpc/terraform.tfvars @@ -10,8 +10,8 @@ workspace_name = "workspace" ### Name of the e6data workspace to be created. helm_chart_version = "2.1.7" ### e6data workspace Helm chart version to be used. # Kubernetes Variables -kube_version = "1.31" ### The Kubernetes cluster version. Version 1.24 or higher is required. -default_nodegroup_kube_version = "1.31" +kube_version = "1.32" ### The Kubernetes cluster version. Version 1.24 or higher is required. +default_nodegroup_kube_version = "1.32" eks_disk_size = 100 ### Disk size for the instances in the nodepool. A minimum of 100 GB is required. nodepool_instance_family = ["t3", "t4g", "t2", "c7g", "c7gd", "c6g", "c8g", "r8g", "i8g", "c6gd", "r6g", "r6gd", "r7g", "r7gd", "i3"] diff --git a/aws/e6data_with_new_eks/terraform.tfvars b/aws/e6data_with_new_eks/terraform.tfvars index f939df1d..bc6fa881 100644 --- a/aws/e6data_with_new_eks/terraform.tfvars +++ b/aws/e6data_with_new_eks/terraform.tfvars @@ -10,8 +10,8 @@ workspace_name = "workspace" ### Name of the e6data workspace to be created. helm_chart_version = "2.1.7" ### e6data workspace Helm chart version to be used. # Kubernetes Variables -kube_version = "1.31" ### The Kubernetes cluster version. Version 1.24 or higher is required. -default_nodegroup_kube_version = "1.31" +kube_version = "1.32" ### The Kubernetes cluster version. Version 1.24 or higher is required. +default_nodegroup_kube_version = "1.32" eks_disk_size = 100 ### Disk size for the instances in the nodepool. A minimum of 100 GB is required. nodepool_instance_family = ["t3", "t4g", "t2", "c7g", "c7gd", "c6g", "c8g", "r8g", "i8g", "c6gd", "r6g", "r6gd", "r7g", "r7gd", "i3"] From 4ed777785569f7c45add18c5e5f2def9c54c6454 Mon Sep 17 00:00:00 2001 From: Srinath Prabhu <72329057+srinath-prabhu@users.noreply.github.com> Date: Mon, 16 Jun 2025 16:11:45 +0530 Subject: [PATCH 20/20] Update support.tf --- aws/e6data_with_existing_eks/support.tf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/aws/e6data_with_existing_eks/support.tf b/aws/e6data_with_existing_eks/support.tf index 6cabb1e9..2f066013 100644 --- a/aws/e6data_with_existing_eks/support.tf +++ b/aws/e6data_with_existing_eks/support.tf @@ -79,14 +79,14 @@ provider "kubernetes" { alias = "eks_e6data" host = data.aws_eks_cluster.current.endpoint cluster_ca_certificate = base64decode(data.aws_eks_cluster.current.certificate_authority[0].data) - token = data.aws_eks_cluster_auth.current.token + token = data.aws_eks_cluster_auth.target_eks_auth.token } provider "kubectl" { host = data.aws_eks_cluster.current.endpoint cluster_ca_certificate = base64decode(data.aws_eks_cluster.current.certificate_authority[0].data) load_config_file = false - token = data.aws_eks_cluster_auth.current.token + token = data.aws_eks_cluster_auth.target_eks_auth.token } provider "helm" { @@ -94,6 +94,6 @@ provider "helm" { kubernetes { host = data.aws_eks_cluster.current.endpoint cluster_ca_certificate = base64decode(data.aws_eks_cluster.current.certificate_authority[0].data) - token = data.aws_eks_cluster_auth.current.token + token = data.aws_eks_cluster_auth.target_eks_auth.token } -} \ No newline at end of file +}