diff --git a/ecs/example/locust/locustfile.py b/ecs/example/locust/locustfile.py new file mode 100644 index 00000000..6b75cd98 --- /dev/null +++ b/ecs/example/locust/locustfile.py @@ -0,0 +1,29 @@ +import random + +from locust import HttpLocust, TaskSet, task, between + + +class UserBehaviour(TaskSet): + @task(5) + def status_2xx(self): + self.client.get("/status/200") + + @task(2) + def status_3xx(self): + self.client.get("/redirect-to", params=dict( + url="/status/200", + status_code=302, + )) + + @task(2) + def status_4xx(self): + self.client.get("/status/400") + + @task(1) + def status_5xx(self): + self.client.get("/status/500") + + +class WebsiteUser(HttpLocust): + task_set = UserBehaviour + wait_time = between(5, 10) diff --git a/ecs/example/locust/start b/ecs/example/locust/start new file mode 100755 index 00000000..dab7b0a1 --- /dev/null +++ b/ecs/example/locust/start @@ -0,0 +1,10 @@ +#!/bin/sh + +set -eux + +docker run \ + -p 8089:8089 \ + --volume "$PWD/locust/locustfile.py:/mnt/locust/locustfile.py" \ + -e LOCUSTFILE_PATH=/mnt/locust/locustfile.py \ + -e TARGET_URL="$(terraform output lb_url)" \ + locustio/locust diff --git a/ecs/example/main.tf b/ecs/example/main.tf index c0022183..a8213283 100644 --- a/ecs/example/main.tf +++ b/ecs/example/main.tf @@ -48,7 +48,8 @@ module "worker_task" { environment = local.environment task = "worker" image = "kennethreitz/httpbin:latest" - memory_soft_limit = 128 + memory_soft_limit = 48 + cpu = 128 environment_variables = { DEBUG = "True" @@ -72,7 +73,8 @@ module "web_task" { environment = local.environment task = "web" image = "kennethreitz/httpbin:latest" - memory_soft_limit = 128 + memory_soft_limit = 48 + cpu = 128 ports = [80] environment_variables = { @@ -87,7 +89,7 @@ module "web" { name = "web" cluster_arn = module.cluster.arn task_definition_arn = module.web_task.arn - desired_count = 1 + desired_count = 2 vpc_id = module.cluster.vpc_id listener_arn = module.cluster.http_listener_arn @@ -96,6 +98,31 @@ module "web" { healthcheck_path = "/" } +module "dashboard" { + source = "./../../cloudwatch/dashboard" + + name = "terraform-ecs-example" + widgets = [ + module.web.widgets.responses, + module.web.widgets.response_percentages, + module.web.widgets.response_time, + module.web.widgets.scaling, + module.web.widgets.cpu_utilization, + module.web.widgets.memory_utilization, + module.worker.widgets.scaling, + module.worker.widgets.cpu_utilization, + module.worker.widgets.memory_utilization, + ] +} + output "hosts_id" { value = module.hosts.id } + +output "dashboard_url" { + value = module.dashboard.url +} + +output "lb_url" { + value = "http://${module.cluster.load_balancer_domain}" +} diff --git a/ecs/main.tf b/ecs/main.tf index dfb64fd4..4b1bfeed 100644 --- a/ecs/main.tf +++ b/ecs/main.tf @@ -23,5 +23,10 @@ resource "aws_ecs_cluster" "cluster" { count = var.create ? 1 : 0 name = "${var.project}-${var.environment}" + + setting { + name = "containerInsights" + value = "enabled" + } } diff --git a/ecs/services/web/README.md b/ecs/services/web/README.md index a09e21bf..0fd7e847 100644 --- a/ecs/services/web/README.md +++ b/ecs/services/web/README.md @@ -117,6 +117,10 @@ Creates an ECS service exposed to the internet using an Application Load Balance Service id +* `metrics` + + Cloudwatch metrics, see [metrics.tf](./metrics.tf) + * `target_group_arn` Load balancer target group ARN @@ -124,3 +128,7 @@ Creates an ECS service exposed to the internet using an Application Load Balance * `target_group_name` Load balancer target group name + +* `widgets` + + Cloudwatch dashboard widgets, see [widgets.tf](./widgets.tf) diff --git a/ecs/services/web/main.tf b/ecs/services/web/main.tf index c32b12ca..fbef48c3 100644 --- a/ecs/services/web/main.tf +++ b/ecs/services/web/main.tf @@ -86,4 +86,3 @@ resource "aws_lb_listener_rule" "service" { values = [var.rule_path] } } - diff --git a/ecs/services/web/metrics.tf b/ecs/services/web/metrics.tf new file mode 100644 index 00000000..d5cebe0a --- /dev/null +++ b/ecs/services/web/metrics.tf @@ -0,0 +1,276 @@ +locals { + metrics = { + requests = module.metric_requests + status_2xx_responses = module.metrics_response_statuses.out_map["2xx"] + status_2xx_response_percentage = module.metrics_response_status_percentages.out_map["2xx"] + status_3xx_responses = module.metrics_response_statuses.out_map["3xx"] + status_3xx_response_percentage = module.metrics_response_status_percentages.out_map["3xx"] + status_4xx_responses = module.metrics_response_statuses.out_map["4xx"] + status_4xx_response_percentage = module.metrics_response_status_percentages.out_map["4xx"] + status_5xx_responses = module.metrics_response_statuses.out_map["5xx"] + status_5xx_response_percentage = module.metrics_response_status_percentages.out_map["5xx"] + connection_errors = module.metric_connection_errors + connection_error_percentage = module.metric_connection_error_percentage + average_response_time = module.metrics_response_time.out_map.average + p50_response_time = module.metrics_response_time.out_map.p50 + p90_response_time = module.metrics_response_time.out_map.p90 + p95_response_time = module.metrics_response_time.out_map.p95 + p99_response_time = module.metrics_response_time.out_map.p99 + max_response_time = module.metrics_response_time.out_map.max + desired_tasks = module.metrics_tasks.out_map.desired + pending_tasks = module.metrics_tasks.out_map.pending + running_tasks = module.metrics_tasks.out_map.running + healthy_tasks = module.metric_healthy_tasks + average_cpu_reservation = module.metric_average_cpu_reservation + min_cpu_utilization = module.metrics_cpu_utilization.out_map.min + average_cpu_utilization = module.metrics_cpu_utilization.out_map.average + max_cpu_utilization = module.metrics_cpu_utilization.out_map.max + average_memory_reservation = module.metric_average_memory_reservation + min_memory_utilization = module.metrics_memory_utilization.out_map.min + average_memory_utilization = module.metrics_memory_utilization.out_map.average + max_memory_utilization = module.metrics_memory_utilization.out_map.max + } +} + +data "aws_lb_listener" "listener" { + count = var.create ? 1 : 0 + + arn = var.listener_arn +} + +data "aws_lb" "lb" { + count = var.create ? 1 : 0 + + arn = data.aws_lb_listener.listener[0].load_balancer_arn +} + +module "cloudwatch_consts" { + source = "./../../../cloudwatch/consts" +} + +locals { + colors = module.cloudwatch_consts.colors +} + +module "metric_requests" { + source = "./../../../cloudwatch/metric" + + namespace = "AWS/ApplicationELB" + name = "RequestCount" + label = "Responses" + stat = "Sum" + period = 60 + + dimensions = { + LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" + TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" + } +} + +locals { + metrics_response_statuses = { + "2xx" = { color = local.colors.green } + "3xx" = { color = local.colors.blue } + "4xx" = { color = local.colors.orange } + "5xx" = { color = local.colors.red } + } +} + +module "metrics_response_statuses" { + source = "./../../../cloudwatch/metric/many" + + vars_map = { for status, variant in local.metrics_response_statuses : status => { + namespace = "AWS/ApplicationELB" + name = "HTTPCode_Target_${upper(status)}_Count" + label = "${status} responses" + color = variant.color + stat = "Sum" + period = 60 + + dimensions = { + LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" + TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" + } + } } +} + +module "metrics_response_status_percentages" { + source = "./../../../cloudwatch/metric_expression/many" + + vars_map = { for status, variant in local.metrics_response_statuses : status => { + expression = "IF(${module.metric_requests.id} == 0, 0, FILL(${module.metrics_response_statuses.out_map[status].id}, 0) / ${module.metric_requests.id} * 100)" + label = "${status} responses" + color = variant.color + } } +} + +module "metric_connection_errors" { + source = "./../../../cloudwatch/metric" + + namespace = "AWS/ApplicationELB" + name = "TargetConnectionErrorCount" + label = "Connection errors" + color = local.colors.purple + stat = "Sum" + period = 60 + + dimensions = { + LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" + TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" + } +} + +module "metric_connection_error_percentage" { + source = "./../../../cloudwatch/metric_expression" + + expression = "IF(${module.metric_requests.id} == 0, 0, FILL(${module.metric_connection_errors.id}, 0) / ${module.metric_requests.id} * 100)" + label = "Connection errors" + color = module.metric_connection_errors.color +} + +locals { + metrics_response_time_variants = { + average = { stat = "Average", color = local.colors.red } + p50 = { stat = "p50", color = local.colors.red } + p90 = { stat = "p90", color = local.colors.orange } + p95 = { stat = "p95", color = local.colors.orange } + p99 = { stat = "p99", color = local.colors.light_red } + max = { stat = "Maximum", color = local.colors.light_orange } + } +} + +module "metrics_response_time" { + source = "./../../../cloudwatch/metric/many" + + vars_map = { for k, variant in local.metrics_response_time_variants : k => { + namespace = "AWS/ApplicationELB" + name = "TargetResponseTime" + label = "${variant.stat} response time" + color = variant.color + stat = variant.stat + period = 60 + + dimensions = { + LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" + TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" + } + } } +} + +locals { + metrics_tasks_variants = { + desired = { state = "Desired", color = local.colors.grey } + pending = { state = "Pending", color = local.colors.orange } + running = { state = "Running", color = local.colors.light_green } + } +} + +module "metrics_tasks" { + source = "./../../../cloudwatch/metric/many" + + vars_map = { for k, variant in local.metrics_tasks_variants : k => { + namespace = "ECS/ContainerInsights" + name = "${variant.state}TaskCount" + label = "${variant.state} task count" + color = variant.color + stat = "Average" + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } + } } +} + +module "metric_healthy_tasks" { + source = "./../../../cloudwatch/metric" + + namespace = "AWS/ApplicationELB" + name = "HealthyHostCount" + label = "Healthy task count" + color = local.colors.green + stat = "Average" + period = 60 + + dimensions = { + LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" + TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" + } +} + +module "metric_average_cpu_reservation" { + source = "./../../../cloudwatch/metric" + + namespace = "ECS/ContainerInsights" + name = "CpuReserved" + label = "Average CPU reserved" + color = local.colors.grey + stat = "Average" + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } +} + +locals { + metrics_utilization_variants = { + min = { stat = "Minimum", color = local.colors.light_orange } + average = { stat = "Average", color = local.colors.orange } + max = { stat = "Maximum", color = local.colors.red } + } +} + +module "metrics_cpu_utilization" { + source = "./../../../cloudwatch/metric/many" + + vars_map = { for k, variant in local.metrics_utilization_variants : k => { + namespace = "ECS/ContainerInsights" + name = "CpuUtilized" + label = "${variant.stat} CPU utilized" + color = variant.color + stat = variant.stat + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } + } } +} + +module "metric_average_memory_reservation" { + source = "./../../../cloudwatch/metric" + + namespace = "ECS/ContainerInsights" + name = "MemoryReserved" + label = "Average memory reserved" + color = local.colors.grey + stat = "Average" + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } +} + +module "metrics_memory_utilization" { + source = "./../../../cloudwatch/metric/many" + + vars_map = { for k, variant in local.metrics_utilization_variants : k => { + namespace = "ECS/ContainerInsights" + name = "MemoryUtilized" + label = "${variant.stat} memory utilized" + color = variant.color + stat = variant.stat + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } + } } +} diff --git a/ecs/services/web/outputs.tf b/ecs/services/web/outputs.tf index 2dae7430..42008b6b 100644 --- a/ecs/services/web/outputs.tf +++ b/ecs/services/web/outputs.tf @@ -13,3 +13,12 @@ output "target_group_arn" { value = var.create ? aws_lb_target_group.service[0].arn : null } +output "metrics" { + description = "Cloudwatch metrics, see [metrics.tf](./metrics.tf)" + value = local.metrics +} + +output "widgets" { + description = "Cloudwatch dashboard widgets, see [widgets.tf](./widgets.tf)" + value = local.widgets +} diff --git a/ecs/services/web/widgets.tf b/ecs/services/web/widgets.tf new file mode 100644 index 00000000..2961171d --- /dev/null +++ b/ecs/services/web/widgets.tf @@ -0,0 +1,99 @@ +locals { + widgets = { + responses = module.widget_responses + response_percentages = module.widget_response_percentages + response_time = module.widget_response_time + scaling = module.widget_scaling + cpu_utilization = module.widget_cpu_utilization + memory_utilization = module.widget_memory_utilization + } +} + +module "widget_responses" { + source = "./../../../cloudwatch/metric_widget" + + title = "${var.name} service responses" + stacked = true + left_metrics = [ + local.metrics.connection_errors, + local.metrics.status_5xx_responses, + local.metrics.status_4xx_responses, + local.metrics.status_3xx_responses, + local.metrics.status_2xx_responses, + ] +} + +module "widget_response_percentages" { + source = "./../../../cloudwatch/metric_widget" + + title = "${var.name} service response percentages" + stacked = true + left_metrics = [ + local.metrics.connection_error_percentage, + local.metrics.status_5xx_response_percentage, + local.metrics.status_4xx_response_percentage, + local.metrics.status_3xx_response_percentage, + local.metrics.status_2xx_response_percentage, + ] + left_range = [0, 100] + hidden_metrics = [ + local.metrics.requests, + local.metrics.status_2xx_responses, + local.metrics.status_3xx_responses, + local.metrics.status_4xx_responses, + local.metrics.status_5xx_responses, + local.metrics.connection_errors, + ] +} + +module "widget_response_time" { + source = "./../../../cloudwatch/metric_widget" + + title = "${var.name} service response times" + stacked = true + left_metrics = [ + local.metrics.p50_response_time, + local.metrics.p95_response_time, + local.metrics.p99_response_time, + local.metrics.max_response_time, + ] +} + +module "widget_scaling" { + source = "./../../../cloudwatch/metric_widget" + + title = "${var.name} service scaling" + left_metrics = [ + local.metrics.desired_tasks, + local.metrics.pending_tasks, + local.metrics.running_tasks, + local.metrics.healthy_tasks, + ] + left_range = [0, null] +} + +module "widget_cpu_utilization" { + source = "./../../../cloudwatch/metric_widget" + + title = "${var.name} service CPU utilization" + left_metrics = [ + local.metrics.average_cpu_reservation, + local.metrics.min_cpu_utilization, + local.metrics.average_cpu_utilization, + local.metrics.max_cpu_utilization, + ] + left_range = [0, null] +} + +module "widget_memory_utilization" { + source = "./../../../cloudwatch/metric_widget" + + title = "${var.name} service memory utilization" + left_metrics = [ + local.metrics.average_memory_reservation, + local.metrics.min_memory_utilization, + local.metrics.average_memory_utilization, + local.metrics.max_memory_utilization, + ] + left_range = [0, null] +} diff --git a/ecs/services/worker/README.md b/ecs/services/worker/README.md index 188b66f4..d4413e7a 100644 --- a/ecs/services/worker/README.md +++ b/ecs/services/worker/README.md @@ -52,3 +52,11 @@ Creates an ECS service for background workers * `id` Service id + +* `metrics` + + Cloudwatch metrics, see [metrics.tf](./metrics.tf) + +* `widgets` + + Cloudwatch dashboard widgets, see [widgets.tf](./widgets.tf) diff --git a/ecs/services/worker/main.tf b/ecs/services/worker/main.tf index 0a349946..0bce2830 100644 --- a/ecs/services/worker/main.tf +++ b/ecs/services/worker/main.tf @@ -1,3 +1,13 @@ +locals { + cluster_name = var.create ? substr(data.aws_arn.cluster[0].resource, length("cluster/"), -1) : "" +} + +data "aws_arn" "cluster" { + count = var.create ? 1 : 0 + + arn = var.cluster_arn +} + resource "aws_ecs_service" "service" { count = var.create ? 1 : 0 diff --git a/ecs/services/worker/metrics.tf b/ecs/services/worker/metrics.tf new file mode 100644 index 00000000..d566868d --- /dev/null +++ b/ecs/services/worker/metrics.tf @@ -0,0 +1,125 @@ +locals { + metrics = { + desired_tasks = module.metrics_tasks.out_map.desired + pending_tasks = module.metrics_tasks.out_map.pending + running_tasks = module.metrics_tasks.out_map.running + average_cpu_reservation = module.metric_average_cpu_reservation + min_cpu_utilization = module.metrics_cpu_utilization.out_map.min + average_cpu_utilization = module.metrics_cpu_utilization.out_map.average + max_cpu_utilization = module.metrics_cpu_utilization.out_map.max + average_memory_reservation = module.metric_average_memory_reservation + min_memory_utilization = module.metrics_memory_utilization.out_map.min + average_memory_utilization = module.metrics_memory_utilization.out_map.average + max_memory_utilization = module.metrics_memory_utilization.out_map.max + } +} + +module "cloudwatch_consts" { + source = "./../../../cloudwatch/consts" +} + +locals { + colors = module.cloudwatch_consts.colors +} + +locals { + metrics_tasks_variants = { + desired = { state = "Desired", color = local.colors.grey } + pending = { state = "Pending", color = local.colors.orange } + running = { state = "Running", color = local.colors.green } + } +} + +module "metrics_tasks" { + source = "./../../../cloudwatch/metric/many" + + vars_map = { for k, variant in local.metrics_tasks_variants : k => { + namespace = "ECS/ContainerInsights" + name = "${variant.state}TaskCount" + label = "${variant.state} task count" + color = variant.color + stat = "Average" + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } + } } +} + +module "metric_average_cpu_reservation" { + source = "./../../../cloudwatch/metric" + + namespace = "ECS/ContainerInsights" + name = "CpuReserved" + label = "Average CPU reserved" + color = local.colors.grey + stat = "Average" + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } +} + +locals { + metrics_utilization_variants = { + min = { stat = "Minimum", color = local.colors.light_orange } + average = { stat = "Average", color = local.colors.orange } + max = { stat = "Maximum", color = local.colors.red } + } +} + +module "metrics_cpu_utilization" { + source = "./../../../cloudwatch/metric/many" + + vars_map = { for k, variant in local.metrics_utilization_variants : k => { + namespace = "ECS/ContainerInsights" + name = "CpuUtilized" + label = "${variant.stat} CPU utilized" + color = variant.color + stat = variant.stat + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } + } } +} + +module "metric_average_memory_reservation" { + source = "./../../../cloudwatch/metric" + + namespace = "ECS/ContainerInsights" + name = "MemoryReserved" + label = "Average memory reserved" + color = local.colors.grey + stat = "Average" + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } +} + +module "metrics_memory_utilization" { + source = "./../../../cloudwatch/metric/many" + + vars_map = { for k, variant in local.metrics_utilization_variants : k => { + namespace = "ECS/ContainerInsights" + name = "MemoryUtilized" + label = "${variant.stat} memory utilized" + color = variant.color + stat = variant.stat + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } + } } +} diff --git a/ecs/services/worker/outputs.tf b/ecs/services/worker/outputs.tf index 4cd50a6f..bd3b7656 100644 --- a/ecs/services/worker/outputs.tf +++ b/ecs/services/worker/outputs.tf @@ -3,3 +3,12 @@ output "id" { value = var.create ? aws_ecs_service.service[0].id : null } +output "metrics" { + description = "Cloudwatch metrics, see [metrics.tf](./metrics.tf)" + value = local.metrics +} + +output "widgets" { + description = "Cloudwatch dashboard widgets, see [widgets.tf](./widgets.tf)" + value = local.widgets +} diff --git a/ecs/services/worker/widgets.tf b/ecs/services/worker/widgets.tf new file mode 100644 index 00000000..3ac84172 --- /dev/null +++ b/ecs/services/worker/widgets.tf @@ -0,0 +1,45 @@ +locals { + widgets = { + scaling = module.widget_scaling + cpu_utilization = module.widget_cpu_utilization + memory_utilization = module.widget_memory_utilization + } +} + +module "widget_scaling" { + source = "./../../../cloudwatch/metric_widget" + + title = "${var.name} service scaling" + left_metrics = [ + local.metrics.desired_tasks, + local.metrics.pending_tasks, + local.metrics.running_tasks, + ] + left_range = [0, null] +} + +module "widget_cpu_utilization" { + source = "./../../../cloudwatch/metric_widget" + + title = "${var.name} service CPU utilization" + left_metrics = [ + local.metrics.average_cpu_reservation, + local.metrics.min_cpu_utilization, + local.metrics.average_cpu_utilization, + local.metrics.max_cpu_utilization, + ] + left_range = [0, null] +} + +module "widget_memory_utilization" { + source = "./../../../cloudwatch/metric_widget" + + title = "${var.name} service memory utilization" + left_metrics = [ + local.metrics.average_memory_reservation, + local.metrics.min_memory_utilization, + local.metrics.average_memory_utilization, + local.metrics.max_memory_utilization, + ] + left_range = [0, null] +}