diff --git a/gcp/e6data_with_existing_gke/provider.tf b/gcp/e6data_with_existing_gke/provider.tf index 599dacd3..76aa4072 100644 --- a/gcp/e6data_with_existing_gke/provider.tf +++ b/gcp/e6data_with_existing_gke/provider.tf @@ -12,6 +12,7 @@ provider "google" { project = var.gcp_project_id region = var.gcp_region default_labels = var.cost_labels + /* credentials = "{{GOOGLE_CLOUD_KEYFILE_JSON}}" */ # access_token = "{{ gcp_access_token }}" } diff --git a/gcp/e6data_with_existing_vpc/autoscaler.tf b/gcp/e6data_with_existing_vpc/autoscaler.tf new file mode 100644 index 00000000..64410187 --- /dev/null +++ b/gcp/e6data_with_existing_vpc/autoscaler.tf @@ -0,0 +1,25 @@ +module "autoscaler_deployment" { + providers = { + kubernetes = kubernetes.gke_e6data + helm = helm.gke_e6data + } + + source = "./modules/autoscaler" + + helm_chart_name = "autoscaler" + helm_chart_version = var.autoscaler_helm_chart_version + + namespace = var.autoscaler_namespace + service_account_name = var.autoscaler_service_account_name + + cluster_name = module.gke_e6data.cluster_name + nodepool_name = google_container_node_pool.workspace.name + + gcp_project_id = var.gcp_project_id + + tolerations_key = "e6data-workspace-name" + tolerations_value = var.workspace_name + + + depends_on = [module.gke_e6data, google_container_node_pool.default_gke_cluster_nodepool] +} \ No newline at end of file diff --git a/gcp/e6data_with_existing_vpc/default_nodepool.tf b/gcp/e6data_with_existing_vpc/default_nodepool.tf index 0be463b6..9b389f04 100644 --- a/gcp/e6data_with_existing_vpc/default_nodepool.tf +++ b/gcp/e6data_with_existing_vpc/default_nodepool.tf @@ -2,7 +2,7 @@ resource "google_container_node_pool" "default_gke_cluster_nodepool" { name_prefix = "e6data-default" location = local.kubernetes_cluster_location cluster = module.gke_e6data.cluster_name - node_count = 2 + node_count = 1 version = var.gke_version max_pods_per_node = 64 @@ -21,12 +21,6 @@ resource "google_container_node_pool" "default_gke_cluster_nodepool" { } } - autoscaling { - total_min_node_count = 2 - total_max_node_count = 3 - location_policy = "ANY" - } - lifecycle { ignore_changes = [node_count, autoscaling, node_config[0].labels, version] } diff --git a/gcp/e6data_with_existing_vpc/modules/autoscaler/iam.tf b/gcp/e6data_with_existing_vpc/modules/autoscaler/iam.tf new file mode 100644 index 00000000..e29d3ba4 --- /dev/null +++ b/gcp/e6data_with_existing_vpc/modules/autoscaler/iam.tf @@ -0,0 +1,33 @@ +resource "google_service_account" "cluster_autoscaler" { + account_id = "${var.cluster_name}-ca-sa" + display_name = "Kubernetes Cluster Autoscaler Service Account" +} + +resource "google_project_iam_binding" "compute_instance_admin" { + project = var.gcp_project_id + role = "roles/compute.instanceAdmin.v1" + + members = [ + "serviceAccount:${google_service_account.cluster_autoscaler.email}" + ] +} + +resource "kubernetes_service_account" "cluster_autoscaler" { + metadata { + name = var.service_account_name + namespace = var.namespace + + annotations = { + "iam.gke.io/gcp-service-account" = "${google_service_account.cluster_autoscaler.email}" + } + } +} + +resource "google_service_account_iam_binding" "workload_identity_binding" { + service_account_id = google_service_account.cluster_autoscaler.name + role = "roles/iam.workloadIdentityUser" + + members = [ + "serviceAccount:${var.gcp_project_id}.svc.id.goog[${var.namespace}/${kubernetes_service_account.cluster_autoscaler.metadata.0.name}]" + ] +} \ No newline at end of file diff --git a/gcp/e6data_with_existing_vpc/modules/autoscaler/main.tf b/gcp/e6data_with_existing_vpc/modules/autoscaler/main.tf new file mode 100644 index 00000000..1fde2a65 --- /dev/null +++ b/gcp/e6data_with_existing_vpc/modules/autoscaler/main.tf @@ -0,0 +1,127 @@ +terraform { + required_providers { + helm = { + source = "hashicorp/helm" + } + kubernetes = { + source = "hashicorp/kubernetes" + } + } +} + +resource "null_resource" "waiting" { + provisioner "local-exec" { + command = "sleep 60" + } +} + +resource "helm_release" "autoscaler_deployment" { + name = var.helm_chart_name + repository = "https://kubernetes.github.io/autoscaler" + chart = "cluster-autoscaler" + namespace = var.namespace + version = var.helm_chart_version + timeout = 600 + + set { + name = "autoDiscovery.clusterName" + value = var.cluster_name + } + + set { + name = "autoscalingGroupsnamePrefix[0].name" + value = "gke-${substr(var.cluster_name, 0, 14)}" + } + + set { + name = "autoscalingGroupsnamePrefix[0].maxSize" + value = "10" + } + + set { + name = "autoscalingGroupsnamePrefix[0].minSize" + value = "1" + } + + set { + name = "cloudProvider" + value = "gce" + } + + set { + name = "extraArgs.ignore-daemonsets-utilization" + value = "true" + } + + set { + name = "extraArgs.scan-interval" + value = "10s" + } + + set { + name = "extraArgs.leader-elect" + value = false + } + + set { + name = "extraArgs.scale-down-unneeded-time" + value = "3m" + } + + set { + name = "extraArgs.scale-down-unready-time" + value = "3m" + } + + set { + name = "extraArgs.scale-down-utilization-threshold" + value = "0.2" + } + + set { + name = "extraArgs.scale-down-delay-after-add" + value = "3m" + } + + set { + name = "extraArgs.scale-down-delay-after-delete" + value = "3m" + } + + set { + name = "extraArgs.ignore-daemonsets-utilization" + value = "true" + } + + set { + name = "rbac.serviceAccount.create" + value = false + } + + set { + name = "rbac.serviceAccount.name" + value = kubernetes_service_account.cluster_autoscaler.metadata.0.name + } + + set { + name = "tolerations[0].key" + value = var.tolerations_key + } + + set { + name = "tolerations[0].operator" + value = "Equal" + } + + set { + name = "tolerations[0].value" + value = var.tolerations_value + } + + set { + name = "tolerations[0].effect" + value = "NoSchedule" + } + + depends_on = [null_resource.waiting, google_service_account.cluster_autoscaler, kubernetes_service_account.cluster_autoscaler] +} \ No newline at end of file diff --git a/gcp/e6data_with_existing_vpc/modules/autoscaler/variables.tf b/gcp/e6data_with_existing_vpc/modules/autoscaler/variables.tf new file mode 100644 index 00000000..7eee5e1d --- /dev/null +++ b/gcp/e6data_with_existing_vpc/modules/autoscaler/variables.tf @@ -0,0 +1,43 @@ +variable "cluster_name" { + type = string + description = "Name of kubernetes cluster" +} + +variable "helm_chart_version" { + type = string + description = "Helm chart version to be used" +} + +variable "namespace" { + type = string + description = "Namespace to deploy in helm chart" +} + +variable "helm_chart_name" { + type = string + description = "Name of helm chart" +} + +variable "service_account_name" { + type = string + description = "Kubernetes Service Account Name for RBAC" +} + +variable "tolerations_key" { + type = string + description = "tolerations key" +} + +variable "tolerations_value" { + type = string + description = "tolerations value" +} +variable "gcp_project_id" { + type = string + description = "GCP Project ID" +} + +variable "nodepool_name" { + type = string + description = "e6data nodepool name" +} \ No newline at end of file diff --git a/gcp/e6data_with_existing_vpc/modules/gke_cluster/gke.tf b/gcp/e6data_with_existing_vpc/modules/gke_cluster/gke.tf index 02baec1a..b9e557da 100644 --- a/gcp/e6data_with_existing_vpc/modules/gke_cluster/gke.tf +++ b/gcp/e6data_with_existing_vpc/modules/gke_cluster/gke.tf @@ -15,6 +15,10 @@ resource "google_container_cluster" "gke_cluster" { enabled = false } + cluster_autoscaling{ + enabled = false + } + # Workloads are being configured to utilize the Workload Identity provider instead of directly relying on the service account of the worker node. # https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity?source=post_page-----d11a230f8e49-------------------------------- workload_identity_config { diff --git a/gcp/e6data_with_existing_vpc/node_pool.tf b/gcp/e6data_with_existing_vpc/node_pool.tf index 6b7649f9..b6e8cb81 100644 --- a/gcp/e6data_with_existing_vpc/node_pool.tf +++ b/gcp/e6data_with_existing_vpc/node_pool.tf @@ -6,11 +6,7 @@ resource "google_container_node_pool" "workspace" { version = var.gke_version initial_node_count = 0 - autoscaling { - total_min_node_count = 0 - total_max_node_count = var.max_instances_in_nodepool - location_policy = "ANY" - } + node_config { disk_size_gb = 100 spot = var.spot_enabled diff --git a/gcp/e6data_with_existing_vpc/terraform.tfvars b/gcp/e6data_with_existing_vpc/terraform.tfvars index 8ded9229..9f14e631 100644 --- a/gcp/e6data_with_existing_vpc/terraform.tfvars +++ b/gcp/e6data_with_existing_vpc/terraform.tfvars @@ -48,4 +48,10 @@ kubernetes_namespace = "namespace" # The namespace to use for Kubernetes resourc cost_labels = {} # Cost labels for tracking costs # Note: The variable cost_labels only accepts lowercase letters ([a-z]), numeric characters ([0-9]), underscores (_) and dashes (-). -buckets = ["*"] ### List of bucket names that the e6data engine queries and therefore, require read access to. Default is ["*"] which means all buckets, it is advisable to change this. \ No newline at end of file +buckets = ["*"] ### List of bucket names that the e6data engine queries and therefore, require read access to. Default is ["*"] which means all buckets, it is advisable to change this. + +# Autoscaler Variables +autoscaler_namespace = "kube-system" ### Namespace to deploy the cluster autoscaler +autoscaler_service_account_name = "cluster-autoscaler" ### Service account name for the cluster autoscaler +autoscaler_helm_chart_name = "autoscaler" ### Name of the cluster autoscaler Helm chart +autoscaler_helm_chart_version = "9.37.0" ### Version of the cluster autoscaler Helm chart \ No newline at end of file diff --git a/gcp/e6data_with_existing_vpc/variables.tf b/gcp/e6data_with_existing_vpc/variables.tf index 37002dcc..1847ecef 100644 --- a/gcp/e6data_with_existing_vpc/variables.tf +++ b/gcp/e6data_with_existing_vpc/variables.tf @@ -159,4 +159,25 @@ variable "deletion_protection" { variable "authorized_networks" { type = map(string) description = "authorized_networks" +} + +# Autoscaler variables +variable "autoscaler_namespace" { + type = string + description = "Autoscaler namespace" +} + +variable "autoscaler_service_account_name" { + type = string + description = "Autoscaler service account name" +} + +variable "autoscaler_helm_chart_name" { + type = string + description = "Autoscaler helm chart name" +} + +variable "autoscaler_helm_chart_version" { + type = string + description = "Autoscaler helm chart version" } \ No newline at end of file diff --git a/gcp/e6data_with_new_gke/autoscaler.tf b/gcp/e6data_with_new_gke/autoscaler.tf new file mode 100644 index 00000000..64410187 --- /dev/null +++ b/gcp/e6data_with_new_gke/autoscaler.tf @@ -0,0 +1,25 @@ +module "autoscaler_deployment" { + providers = { + kubernetes = kubernetes.gke_e6data + helm = helm.gke_e6data + } + + source = "./modules/autoscaler" + + helm_chart_name = "autoscaler" + helm_chart_version = var.autoscaler_helm_chart_version + + namespace = var.autoscaler_namespace + service_account_name = var.autoscaler_service_account_name + + cluster_name = module.gke_e6data.cluster_name + nodepool_name = google_container_node_pool.workspace.name + + gcp_project_id = var.gcp_project_id + + tolerations_key = "e6data-workspace-name" + tolerations_value = var.workspace_name + + + depends_on = [module.gke_e6data, google_container_node_pool.default_gke_cluster_nodepool] +} \ No newline at end of file diff --git a/gcp/e6data_with_new_gke/default_nodepool.tf b/gcp/e6data_with_new_gke/default_nodepool.tf index b685767a..1890ad9e 100644 --- a/gcp/e6data_with_new_gke/default_nodepool.tf +++ b/gcp/e6data_with_new_gke/default_nodepool.tf @@ -22,12 +22,6 @@ resource "google_container_node_pool" "default_gke_cluster_nodepool" { } } - autoscaling { - total_min_node_count = 1 - total_max_node_count = 3 - location_policy = "ANY" - } - lifecycle { create_before_destroy = true ignore_changes = [node_count, autoscaling, node_config[0].labels] diff --git a/gcp/e6data_with_new_gke/modules/autoscaler/iam.tf b/gcp/e6data_with_new_gke/modules/autoscaler/iam.tf new file mode 100644 index 00000000..e29d3ba4 --- /dev/null +++ b/gcp/e6data_with_new_gke/modules/autoscaler/iam.tf @@ -0,0 +1,33 @@ +resource "google_service_account" "cluster_autoscaler" { + account_id = "${var.cluster_name}-ca-sa" + display_name = "Kubernetes Cluster Autoscaler Service Account" +} + +resource "google_project_iam_binding" "compute_instance_admin" { + project = var.gcp_project_id + role = "roles/compute.instanceAdmin.v1" + + members = [ + "serviceAccount:${google_service_account.cluster_autoscaler.email}" + ] +} + +resource "kubernetes_service_account" "cluster_autoscaler" { + metadata { + name = var.service_account_name + namespace = var.namespace + + annotations = { + "iam.gke.io/gcp-service-account" = "${google_service_account.cluster_autoscaler.email}" + } + } +} + +resource "google_service_account_iam_binding" "workload_identity_binding" { + service_account_id = google_service_account.cluster_autoscaler.name + role = "roles/iam.workloadIdentityUser" + + members = [ + "serviceAccount:${var.gcp_project_id}.svc.id.goog[${var.namespace}/${kubernetes_service_account.cluster_autoscaler.metadata.0.name}]" + ] +} \ No newline at end of file diff --git a/gcp/e6data_with_new_gke/modules/autoscaler/main.tf b/gcp/e6data_with_new_gke/modules/autoscaler/main.tf new file mode 100644 index 00000000..1fde2a65 --- /dev/null +++ b/gcp/e6data_with_new_gke/modules/autoscaler/main.tf @@ -0,0 +1,127 @@ +terraform { + required_providers { + helm = { + source = "hashicorp/helm" + } + kubernetes = { + source = "hashicorp/kubernetes" + } + } +} + +resource "null_resource" "waiting" { + provisioner "local-exec" { + command = "sleep 60" + } +} + +resource "helm_release" "autoscaler_deployment" { + name = var.helm_chart_name + repository = "https://kubernetes.github.io/autoscaler" + chart = "cluster-autoscaler" + namespace = var.namespace + version = var.helm_chart_version + timeout = 600 + + set { + name = "autoDiscovery.clusterName" + value = var.cluster_name + } + + set { + name = "autoscalingGroupsnamePrefix[0].name" + value = "gke-${substr(var.cluster_name, 0, 14)}" + } + + set { + name = "autoscalingGroupsnamePrefix[0].maxSize" + value = "10" + } + + set { + name = "autoscalingGroupsnamePrefix[0].minSize" + value = "1" + } + + set { + name = "cloudProvider" + value = "gce" + } + + set { + name = "extraArgs.ignore-daemonsets-utilization" + value = "true" + } + + set { + name = "extraArgs.scan-interval" + value = "10s" + } + + set { + name = "extraArgs.leader-elect" + value = false + } + + set { + name = "extraArgs.scale-down-unneeded-time" + value = "3m" + } + + set { + name = "extraArgs.scale-down-unready-time" + value = "3m" + } + + set { + name = "extraArgs.scale-down-utilization-threshold" + value = "0.2" + } + + set { + name = "extraArgs.scale-down-delay-after-add" + value = "3m" + } + + set { + name = "extraArgs.scale-down-delay-after-delete" + value = "3m" + } + + set { + name = "extraArgs.ignore-daemonsets-utilization" + value = "true" + } + + set { + name = "rbac.serviceAccount.create" + value = false + } + + set { + name = "rbac.serviceAccount.name" + value = kubernetes_service_account.cluster_autoscaler.metadata.0.name + } + + set { + name = "tolerations[0].key" + value = var.tolerations_key + } + + set { + name = "tolerations[0].operator" + value = "Equal" + } + + set { + name = "tolerations[0].value" + value = var.tolerations_value + } + + set { + name = "tolerations[0].effect" + value = "NoSchedule" + } + + depends_on = [null_resource.waiting, google_service_account.cluster_autoscaler, kubernetes_service_account.cluster_autoscaler] +} \ No newline at end of file diff --git a/gcp/e6data_with_new_gke/modules/autoscaler/variables.tf b/gcp/e6data_with_new_gke/modules/autoscaler/variables.tf new file mode 100644 index 00000000..7eee5e1d --- /dev/null +++ b/gcp/e6data_with_new_gke/modules/autoscaler/variables.tf @@ -0,0 +1,43 @@ +variable "cluster_name" { + type = string + description = "Name of kubernetes cluster" +} + +variable "helm_chart_version" { + type = string + description = "Helm chart version to be used" +} + +variable "namespace" { + type = string + description = "Namespace to deploy in helm chart" +} + +variable "helm_chart_name" { + type = string + description = "Name of helm chart" +} + +variable "service_account_name" { + type = string + description = "Kubernetes Service Account Name for RBAC" +} + +variable "tolerations_key" { + type = string + description = "tolerations key" +} + +variable "tolerations_value" { + type = string + description = "tolerations value" +} +variable "gcp_project_id" { + type = string + description = "GCP Project ID" +} + +variable "nodepool_name" { + type = string + description = "e6data nodepool name" +} \ No newline at end of file diff --git a/gcp/e6data_with_new_gke/modules/gke_cluster/gke.tf b/gcp/e6data_with_new_gke/modules/gke_cluster/gke.tf index 02baec1a..a5e5a652 100644 --- a/gcp/e6data_with_new_gke/modules/gke_cluster/gke.tf +++ b/gcp/e6data_with_new_gke/modules/gke_cluster/gke.tf @@ -35,14 +35,17 @@ resource "google_container_cluster" "gke_cluster" { ip_allocation_policy { } + cluster_autoscaling{ + enabled = false + } + addons_config { http_load_balancing { disabled = false } - - dns_cache_config { - enabled = var.dns_cache_enabled - } + dns_cache_config { + enabled = var.dns_cache_enabled + } } diff --git a/gcp/e6data_with_new_gke/node_pool.tf b/gcp/e6data_with_new_gke/node_pool.tf index 6b7649f9..b6e8cb81 100644 --- a/gcp/e6data_with_new_gke/node_pool.tf +++ b/gcp/e6data_with_new_gke/node_pool.tf @@ -6,11 +6,7 @@ resource "google_container_node_pool" "workspace" { version = var.gke_version initial_node_count = 0 - autoscaling { - total_min_node_count = 0 - total_max_node_count = var.max_instances_in_nodepool - location_policy = "ANY" - } + node_config { disk_size_gb = 100 spot = var.spot_enabled diff --git a/gcp/e6data_with_new_gke/terraform.tfvars b/gcp/e6data_with_new_gke/terraform.tfvars index 6a48cf38..8948bb30 100644 --- a/gcp/e6data_with_new_gke/terraform.tfvars +++ b/gcp/e6data_with_new_gke/terraform.tfvars @@ -47,4 +47,10 @@ kubernetes_namespace = "namespace" # The namespace to use for Kubernetes resourc cost_labels = {} # Cost labels for tracking costs # Note: The variable cost_labels only accepts lowercase letters ([a-z]), numeric characters ([0-9]), underscores (_) and dashes (-). -buckets = ["*"] ### List of bucket names that the e6data engine queries and therefore, require read access to. Default is ["*"] which means all buckets, it is advisable to change this. \ No newline at end of file +buckets = ["*"] ### List of bucket names that the e6data engine queries and therefore, require read access to. Default is ["*"] which means all buckets, it is advisable to change this. + +# Autoscaler Variables +autoscaler_namespace = "kube-system" ### Namespace to deploy the cluster autoscaler +autoscaler_service_account_name = "cluster-autoscaler" ### Service account name for the cluster autoscaler +autoscaler_helm_chart_name = "autoscaler" ### Name of the cluster autoscaler Helm chart +autoscaler_helm_chart_version = "9.37.0" ### Version of the cluster autoscaler Helm chart \ No newline at end of file diff --git a/gcp/e6data_with_new_gke/variables.tf b/gcp/e6data_with_new_gke/variables.tf index 25783214..d614bcf3 100644 --- a/gcp/e6data_with_new_gke/variables.tf +++ b/gcp/e6data_with_new_gke/variables.tf @@ -149,4 +149,25 @@ variable "deletion_protection" { variable "authorized_networks" { type = map(string) description = "authorized_networks" +} + +# Autoscaler variables +variable "autoscaler_namespace" { + type = string + description = "Autoscaler namespace" +} + +variable "autoscaler_service_account_name" { + type = string + description = "Autoscaler service account name" +} + +variable "autoscaler_helm_chart_name" { + type = string + description = "Autoscaler helm chart name" +} + +variable "autoscaler_helm_chart_version" { + type = string + description = "Autoscaler helm chart version" } \ No newline at end of file