From 734eba3010cf2115809e8daaeb475dcbe7914225 Mon Sep 17 00:00:00 2001 From: Marek Skrajnowski Date: Fri, 24 Apr 2020 16:00:41 +0200 Subject: [PATCH 01/17] docs(ecs): added a simple locustfile to easily generate some traffic to example's web service --- ecs/example/locust/locustfile.py | 29 +++++++++++++++++++++++++++++ ecs/example/locust/start | 10 ++++++++++ ecs/example/main.tf | 4 ++++ 3 files changed, 43 insertions(+) create mode 100644 ecs/example/locust/locustfile.py create mode 100755 ecs/example/locust/start diff --git a/ecs/example/locust/locustfile.py b/ecs/example/locust/locustfile.py new file mode 100644 index 00000000..6b75cd98 --- /dev/null +++ b/ecs/example/locust/locustfile.py @@ -0,0 +1,29 @@ +import random + +from locust import HttpLocust, TaskSet, task, between + + +class UserBehaviour(TaskSet): + @task(5) + def status_2xx(self): + self.client.get("/status/200") + + @task(2) + def status_3xx(self): + self.client.get("/redirect-to", params=dict( + url="/status/200", + status_code=302, + )) + + @task(2) + def status_4xx(self): + self.client.get("/status/400") + + @task(1) + def status_5xx(self): + self.client.get("/status/500") + + +class WebsiteUser(HttpLocust): + task_set = UserBehaviour + wait_time = between(5, 10) diff --git a/ecs/example/locust/start b/ecs/example/locust/start new file mode 100755 index 00000000..dab7b0a1 --- /dev/null +++ b/ecs/example/locust/start @@ -0,0 +1,10 @@ +#!/bin/sh + +set -eux + +docker run \ + -p 8089:8089 \ + --volume "$PWD/locust/locustfile.py:/mnt/locust/locustfile.py" \ + -e LOCUSTFILE_PATH=/mnt/locust/locustfile.py \ + -e TARGET_URL="$(terraform output lb_url)" \ + locustio/locust diff --git a/ecs/example/main.tf b/ecs/example/main.tf index c0022183..9b90d329 100644 --- a/ecs/example/main.tf +++ b/ecs/example/main.tf @@ -99,3 +99,7 @@ module "web" { output "hosts_id" { value = module.hosts.id } + +output "lb_url" { + value = "http://${module.cluster.load_balancer_domain}" +} From b1f8435ce6afbf9d50c32147a2830554d230e529 Mon Sep 17 00:00:00 2001 From: Marek Skrajnowski Date: Fri, 24 Apr 2020 16:02:24 +0200 Subject: [PATCH 02/17] feat(ecs/services/web): added response status related cloudwatch metrics and widgets --- ecs/example/main.tf | 14 +++ ecs/services/web/README.md | 44 ++++++++++ ecs/services/web/main.tf | 168 ++++++++++++++++++++++++++++++++++++ ecs/services/web/outputs.tf | 50 +++++++++++ 4 files changed, 276 insertions(+) diff --git a/ecs/example/main.tf b/ecs/example/main.tf index 9b90d329..13d58181 100644 --- a/ecs/example/main.tf +++ b/ecs/example/main.tf @@ -96,10 +96,24 @@ module "web" { healthcheck_path = "/" } +module "dashboard" { + source = "./../../cloudwatch/dashboard" + + name = "terraform-ecs-example" + widgets = [ + module.web.widget_responses, + module.web.widget_response_ratios, + ] +} + output "hosts_id" { value = module.hosts.id } +output "dashboard_url" { + value = module.dashboard.url +} + output "lb_url" { value = "http://${module.cluster.load_balancer_domain}" } diff --git a/ecs/services/web/README.md b/ecs/services/web/README.md index a09e21bf..2244466a 100644 --- a/ecs/services/web/README.md +++ b/ecs/services/web/README.md @@ -117,6 +117,42 @@ Creates an ECS service exposed to the internet using an Application Load Balance Service id +* `metric_2xx_responses` + + Cloudwatch metric tracking the number of 2xx responses + +* `metric_2xx_responses_ratio` + + Cloudwatch metric tracking percentage of 2xx responses + +* `metric_3xx_responses` + + Cloudwatch metric tracking the number of 3xx responses + +* `metric_3xx_responses_ratio` + + Cloudwatch metric tracking percentage of 3xx responses + +* `metric_4xx_responses` + + Cloudwatch metric tracking the number of 4xx responses + +* `metric_4xx_responses_ratio` + + Cloudwatch metric tracking percentage of 4xx responses + +* `metric_5xx_responses` + + Cloudwatch metric tracking the number of 5xx responses + +* `metric_5xx_responses_ratio` + + Cloudwatch metric tracking percentage of 5xx responses + +* `metric_responses` + + Cloudwatch metric tracking total number of responses + * `target_group_arn` Load balancer target group ARN @@ -124,3 +160,11 @@ Creates an ECS service exposed to the internet using an Application Load Balance * `target_group_name` Load balancer target group name + +* `widget_response_ratios` + + Cloudwatch dashboard widget that shows a breakdown of response status code percentages + +* `widget_responses` + + Cloudwatch dashboard widget that shows a breakdown of response status codes diff --git a/ecs/services/web/main.tf b/ecs/services/web/main.tf index c32b12ca..f944b469 100644 --- a/ecs/services/web/main.tf +++ b/ecs/services/web/main.tf @@ -87,3 +87,171 @@ resource "aws_lb_listener_rule" "service" { } } +# cloudwatch metrics ---------------------------------------------------------- + +data "aws_lb_listener" "listener" { + count = var.create ? 1 : 0 + + arn = var.listener_arn +} + +data "aws_lb" "lb" { + count = var.create ? 1 : 0 + + arn = data.aws_lb_listener.listener[0].load_balancer_arn +} + +module "cloudwatch_consts" { + source = "./../../../cloudwatch/consts" +} + +locals { + colors = module.cloudwatch_consts.colors +} + +module "metric_responses" { + source = "./../../../cloudwatch/metric" + + namespace = "AWS/ApplicationELB" + name = "RequestCount" + label = "Responses" + stat = "Sum" + period = 60 + + dimensions = { + LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" + TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" + } +} + +module "metric_2xx_responses" { + source = "./../../../cloudwatch/metric" + + namespace = "AWS/ApplicationELB" + name = "HTTPCode_Target_2XX_Count" + label = "2xx responses" + color = local.colors.green + stat = "Sum" + period = 60 + + dimensions = { + LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" + TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" + } +} + +module "metric_2xx_responses_ratio" { + source = "./../../../cloudwatch/metric_expression" + + expression = "IF(${module.metric_responses.id} == 0, 0, FILL(${module.metric_2xx_responses.id}, 0) / ${module.metric_responses.id} * 100)" + label = "2xx response ratio" + color = module.metric_2xx_responses.color +} + +module "metric_3xx_responses" { + source = "./../../../cloudwatch/metric" + + namespace = "AWS/ApplicationELB" + name = "HTTPCode_Target_3XX_Count" + label = "3xx responses" + color = local.colors.blue + stat = "Sum" + period = 60 + + dimensions = { + LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" + TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" + } +} + +module "metric_3xx_responses_ratio" { + source = "./../../../cloudwatch/metric_expression" + + expression = "IF(${module.metric_responses.id} == 0, 0, FILL(${module.metric_3xx_responses.id}, 0) / ${module.metric_responses.id} * 100)" + label = "3xx response ratio" + color = module.metric_3xx_responses.color +} + +module "metric_4xx_responses" { + source = "./../../../cloudwatch/metric" + + namespace = "AWS/ApplicationELB" + name = "HTTPCode_Target_4XX_Count" + label = "4xx responses" + color = local.colors.orange + stat = "Sum" + period = 60 + + dimensions = { + LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" + TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" + } +} + +module "metric_4xx_responses_ratio" { + source = "./../../../cloudwatch/metric_expression" + + expression = "IF(${module.metric_responses.id} == 0, 0, FILL(${module.metric_4xx_responses.id}, 0) / ${module.metric_responses.id} * 100)" + label = "4xx response ratio" + color = module.metric_4xx_responses.color +} + +module "metric_5xx_responses" { + source = "./../../../cloudwatch/metric" + + namespace = "AWS/ApplicationELB" + name = "HTTPCode_Target_5XX_Count" + label = "5xx responses" + color = local.colors.red + stat = "Sum" + period = 60 + + dimensions = { + LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" + TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" + } +} + +module "metric_5xx_responses_ratio" { + source = "./../../../cloudwatch/metric_expression" + + expression = "IF(${module.metric_responses.id} == 0, 0, FILL(${module.metric_5xx_responses.id}, 0) / ${module.metric_responses.id} * 100)" + label = "5xx response ratio" + color = module.metric_5xx_responses.color +} + +# cloudwatch dashboard widgets ------------------------------------------------ + +module "widget_responses" { + source = "./../../../cloudwatch/metric_widget" + + title = "Responses" + stacked = true + left_metrics = [ + module.metric_5xx_responses, + module.metric_4xx_responses, + module.metric_3xx_responses, + module.metric_2xx_responses, + ] +} + +module "widget_response_ratios" { + source = "./../../../cloudwatch/metric_widget" + + title = "Response ratios" + stacked = true + left_metrics = [ + module.metric_5xx_responses_ratio, + module.metric_4xx_responses_ratio, + module.metric_3xx_responses_ratio, + module.metric_2xx_responses_ratio, + ] + left_range = [0, 100] + hidden_metrics = [ + module.metric_responses, + module.metric_2xx_responses, + module.metric_3xx_responses, + module.metric_4xx_responses, + module.metric_5xx_responses, + ] +} diff --git a/ecs/services/web/outputs.tf b/ecs/services/web/outputs.tf index 2dae7430..1c03827f 100644 --- a/ecs/services/web/outputs.tf +++ b/ecs/services/web/outputs.tf @@ -13,3 +13,53 @@ output "target_group_arn" { value = var.create ? aws_lb_target_group.service[0].arn : null } +# cloudwatch metrics ---------------------------------------------------------- + +output "metric_responses" { + description = "Cloudwatch metric tracking total number of responses" + value = module.metric_responses +} +output "metric_2xx_responses" { + description = "Cloudwatch metric tracking the number of 2xx responses" + value = module.metric_2xx_responses +} +output "metric_2xx_responses_ratio" { + description = "Cloudwatch metric tracking percentage of 2xx responses" + value = module.metric_2xx_responses_ratio +} +output "metric_3xx_responses" { + description = "Cloudwatch metric tracking the number of 3xx responses" + value = module.metric_3xx_responses +} +output "metric_3xx_responses_ratio" { + description = "Cloudwatch metric tracking percentage of 3xx responses" + value = module.metric_3xx_responses_ratio +} +output "metric_4xx_responses" { + description = "Cloudwatch metric tracking the number of 4xx responses" + value = module.metric_4xx_responses +} +output "metric_4xx_responses_ratio" { + description = "Cloudwatch metric tracking percentage of 4xx responses" + value = module.metric_4xx_responses_ratio +} +output "metric_5xx_responses" { + description = "Cloudwatch metric tracking the number of 5xx responses" + value = module.metric_5xx_responses +} +output "metric_5xx_responses_ratio" { + description = "Cloudwatch metric tracking percentage of 5xx responses" + value = module.metric_5xx_responses_ratio +} + +# cloudwatch dashboard widgets ------------------------------------------------ + +output "widget_responses" { + description = "Cloudwatch dashboard widget that shows a breakdown of response status codes" + value = module.widget_responses +} + +output "widget_response_ratios" { + description = "Cloudwatch dashboard widget that shows a breakdown of response status code percentages" + value = module.widget_response_ratios +} From 53d57a26fc55a6fd3f57b7db262a3220442c2014 Mon Sep 17 00:00:00 2001 From: Marek Skrajnowski Date: Fri, 24 Apr 2020 16:31:17 +0200 Subject: [PATCH 03/17] feat(ecs/services/web): added response time cloudwatch metrics and dashboard widget --- ecs/example/main.tf | 1 + ecs/services/web/README.md | 28 ++++++++++ ecs/services/web/main.tf | 105 ++++++++++++++++++++++++++++++++++++ ecs/services/web/outputs.tf | 29 ++++++++++ 4 files changed, 163 insertions(+) diff --git a/ecs/example/main.tf b/ecs/example/main.tf index 13d58181..66d5476b 100644 --- a/ecs/example/main.tf +++ b/ecs/example/main.tf @@ -103,6 +103,7 @@ module "dashboard" { widgets = [ module.web.widget_responses, module.web.widget_response_ratios, + module.web.widget_response_time, ] } diff --git a/ecs/services/web/README.md b/ecs/services/web/README.md index 2244466a..946d3ad2 100644 --- a/ecs/services/web/README.md +++ b/ecs/services/web/README.md @@ -149,6 +149,30 @@ Creates an ECS service exposed to the internet using an Application Load Balance Cloudwatch metric tracking percentage of 5xx responses +* `metric_average_response_time` + + Cloudwatch metric tracking average response time + +* `metric_max_response_time` + + Cloudwatch metric tracking maximum response time + +* `metric_p50_response_time` + + Cloudwatch metric tracking median response time + +* `metric_p90_response_time` + + Cloudwatch metric tracking 90th percentile response time + +* `metric_p95_response_time` + + Cloudwatch metric tracking 95th percentile response time + +* `metric_p99_response_time` + + Cloudwatch metric tracking 99th percentile response time + * `metric_responses` Cloudwatch metric tracking total number of responses @@ -165,6 +189,10 @@ Creates an ECS service exposed to the internet using an Application Load Balance Cloudwatch dashboard widget that shows a breakdown of response status code percentages +* `widget_response_time` + + Cloudwatch dashboard widget that shows a breakdown of response time percentiles + * `widget_responses` Cloudwatch dashboard widget that shows a breakdown of response status codes diff --git a/ecs/services/web/main.tf b/ecs/services/web/main.tf index f944b469..9ea6cd33 100644 --- a/ecs/services/web/main.tf +++ b/ecs/services/web/main.tf @@ -220,6 +220,98 @@ module "metric_5xx_responses_ratio" { color = module.metric_5xx_responses.color } +module "metric_average_response_time" { + source = "./../../../cloudwatch/metric" + + namespace = "AWS/ApplicationELB" + name = "TargetResponseTime" + label = "Average response time" + color = local.colors.red + stat = "Average" + period = 60 + + dimensions = { + LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" + TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" + } +} + +module "metric_p50_response_time" { + source = "./../../../cloudwatch/metric" + + namespace = "AWS/ApplicationELB" + name = "TargetResponseTime" + label = "p50 response time" + color = local.colors.red + stat = "p50" + period = 60 + + dimensions = { + LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" + TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" + } +} + +module "metric_p90_response_time" { + source = "./../../../cloudwatch/metric" + + namespace = "AWS/ApplicationELB" + name = "TargetResponseTime" + label = "p90 response time" + stat = "p90" + period = 60 + + dimensions = { + LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" + TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" + } +} + +module "metric_p95_response_time" { + source = "./../../../cloudwatch/metric" + + namespace = "AWS/ApplicationELB" + name = "TargetResponseTime" + label = "p95 response time" + stat = "p95" + period = 60 + + dimensions = { + LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" + TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" + } +} + +module "metric_p99_response_time" { + source = "./../../../cloudwatch/metric" + + namespace = "AWS/ApplicationELB" + name = "TargetResponseTime" + label = "p99 response time" + stat = "p99" + period = 60 + + dimensions = { + LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" + TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" + } +} + +module "metric_max_response_time" { + source = "./../../../cloudwatch/metric" + + namespace = "AWS/ApplicationELB" + name = "TargetResponseTime" + label = "Maximum response time" + stat = "Maximum" + period = 60 + + dimensions = { + LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" + TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" + } +} + # cloudwatch dashboard widgets ------------------------------------------------ module "widget_responses" { @@ -255,3 +347,16 @@ module "widget_response_ratios" { module.metric_5xx_responses, ] } + +module "widget_response_time" { + source = "./../../../cloudwatch/metric_widget" + + title = "Response time" + stacked = true + left_metrics = [ + merge(module.metric_p50_response_time, { color = local.colors.red }), + merge(module.metric_p95_response_time, { color = local.colors.orange }), + merge(module.metric_p99_response_time, { color = local.colors.light_red }), + merge(module.metric_max_response_time, { color = local.colors.light_orange }), + ] +} diff --git a/ecs/services/web/outputs.tf b/ecs/services/web/outputs.tf index 1c03827f..89c0e8a6 100644 --- a/ecs/services/web/outputs.tf +++ b/ecs/services/web/outputs.tf @@ -51,6 +51,30 @@ output "metric_5xx_responses_ratio" { description = "Cloudwatch metric tracking percentage of 5xx responses" value = module.metric_5xx_responses_ratio } +output "metric_average_response_time" { + description = "Cloudwatch metric tracking average response time" + value = module.metric_average_response_time +} +output "metric_p50_response_time" { + description = "Cloudwatch metric tracking median response time" + value = module.metric_p50_response_time +} +output "metric_p90_response_time" { + description = "Cloudwatch metric tracking 90th percentile response time" + value = module.metric_p90_response_time +} +output "metric_p95_response_time" { + description = "Cloudwatch metric tracking 95th percentile response time" + value = module.metric_p95_response_time +} +output "metric_p99_response_time" { + description = "Cloudwatch metric tracking 99th percentile response time" + value = module.metric_p99_response_time +} +output "metric_max_response_time" { + description = "Cloudwatch metric tracking maximum response time" + value = module.metric_max_response_time +} # cloudwatch dashboard widgets ------------------------------------------------ @@ -63,3 +87,8 @@ output "widget_response_ratios" { description = "Cloudwatch dashboard widget that shows a breakdown of response status code percentages" value = module.widget_response_ratios } + +output "widget_response_time" { + description = "Cloudwatch dashboard widget that shows a breakdown of response time percentiles" + value = module.widget_response_time +} From 3b9bdf4be97fe8a469e71a30e16276a73c403224 Mon Sep 17 00:00:00 2001 From: Marek Skrajnowski Date: Mon, 27 Apr 2020 10:33:09 +0200 Subject: [PATCH 04/17] feat(ecs/services/web): added cloudwatch metrics and widgets for cpu and memory utilization --- ecs/example/main.tf | 4 + ecs/services/web/README.md | 40 +++++++++ ecs/services/web/main.tf | 173 ++++++++++++++++++++++++++++++++++++ ecs/services/web/outputs.tf | 43 +++++++++ 4 files changed, 260 insertions(+) diff --git a/ecs/example/main.tf b/ecs/example/main.tf index 66d5476b..70fbfec3 100644 --- a/ecs/example/main.tf +++ b/ecs/example/main.tf @@ -73,6 +73,7 @@ module "web_task" { task = "web" image = "kennethreitz/httpbin:latest" memory_soft_limit = 128 + cpu = 128 ports = [80] environment_variables = { @@ -104,6 +105,9 @@ module "dashboard" { module.web.widget_responses, module.web.widget_response_ratios, module.web.widget_response_time, + module.web.widget_tasks, + module.web.widget_cpu, + module.web.widget_memory, ] } diff --git a/ecs/services/web/README.md b/ecs/services/web/README.md index 946d3ad2..39704d01 100644 --- a/ecs/services/web/README.md +++ b/ecs/services/web/README.md @@ -149,14 +149,38 @@ Creates an ECS service exposed to the internet using an Application Load Balance Cloudwatch metric tracking percentage of 5xx responses +* `metric_average_cpu_utilization` + + Cloudwatch metric tracking average CPU utilization + +* `metric_average_memory_utilization` + + Cloudwatch metric tracking average memory utilization + * `metric_average_response_time` Cloudwatch metric tracking average response time +* `metric_max_cpu_utilization` + + Cloudwatch metric tracking maximum CPU utilization + +* `metric_max_memory_utilization` + + Cloudwatch metric tracking maximum memory utilization + * `metric_max_response_time` Cloudwatch metric tracking maximum response time +* `metric_min_cpu_utilization` + + Cloudwatch metric tracking minimum CPU utilization + +* `metric_min_memory_utilization` + + Cloudwatch metric tracking minimum memory utilization + * `metric_p50_response_time` Cloudwatch metric tracking median response time @@ -177,6 +201,10 @@ Creates an ECS service exposed to the internet using an Application Load Balance Cloudwatch metric tracking total number of responses +* `metric_tasks` + + Cloudwatch metric tracking tasks count + * `target_group_arn` Load balancer target group ARN @@ -185,6 +213,14 @@ Creates an ECS service exposed to the internet using an Application Load Balance Load balancer target group name +* `widget_cpu` + + Cloudwatch dashboard widget that shows CPU utilization relative to CPU reservation + +* `widget_memory` + + Cloudwatch dashboard widget that shows memory utilization relative to memory reservation + * `widget_response_ratios` Cloudwatch dashboard widget that shows a breakdown of response status code percentages @@ -196,3 +232,7 @@ Creates an ECS service exposed to the internet using an Application Load Balance * `widget_responses` Cloudwatch dashboard widget that shows a breakdown of response status codes + +* `widget_tasks` + + Cloudwatch dashboard widget that shows tasks count diff --git a/ecs/services/web/main.tf b/ecs/services/web/main.tf index 9ea6cd33..b6571576 100644 --- a/ecs/services/web/main.tf +++ b/ecs/services/web/main.tf @@ -312,6 +312,129 @@ module "metric_max_response_time" { } } +module "metric_tasks" { + source = "./../../../cloudwatch/metric" + + namespace = "AWS/ECS" + name = "CPUUtilization" + label = "Task count" + stat = "SampleCount" + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } +} + +data "aws_ecs_container_definition" "web" { + count = var.create ? 1 : 0 + + task_definition = var.task_definition_arn + container_name = local.container +} + +locals { + cpu_reservation = var.create ? data.aws_ecs_container_definition.web[0].cpu : 0 + memory_reservation = var.create ? data.aws_ecs_container_definition.web[0].memory_reservation : 0 +} + +module "metric_min_cpu_utilization" { + source = "./../../../cloudwatch/metric" + + namespace = "AWS/ECS" + name = "CPUUtilization" + label = "Minimum CPU utilization" + color = local.colors.green + stat = "Minimum" + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } +} + +module "metric_average_cpu_utilization" { + source = "./../../../cloudwatch/metric" + + namespace = "AWS/ECS" + name = "CPUUtilization" + label = "Average CPU utilization" + color = local.colors.orange + stat = "Average" + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } +} + +module "metric_max_cpu_utilization" { + source = "./../../../cloudwatch/metric" + + namespace = "AWS/ECS" + name = "CPUUtilization" + label = "Maximum CPU utilization" + color = local.colors.red + stat = "Maximum" + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } +} + +module "metric_min_memory_utilization" { + source = "./../../../cloudwatch/metric" + + namespace = "AWS/ECS" + name = "MemoryUtilization" + label = "Minimum memory utilization" + color = local.colors.green + stat = "Minimum" + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } +} + +module "metric_average_memory_utilization" { + source = "./../../../cloudwatch/metric" + + namespace = "AWS/ECS" + name = "MemoryUtilization" + label = "Average memory utilization" + color = local.colors.orange + stat = "Average" + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } +} + +module "metric_max_memory_utilization" { + source = "./../../../cloudwatch/metric" + + namespace = "AWS/ECS" + name = "MemoryUtilization" + label = "Maximum memory utilization" + color = local.colors.red + stat = "Maximum" + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } +} + # cloudwatch dashboard widgets ------------------------------------------------ module "widget_responses" { @@ -360,3 +483,53 @@ module "widget_response_time" { merge(module.metric_max_response_time, { color = local.colors.light_orange }), ] } + +module "widget_tasks" { + source = "./../../../cloudwatch/metric_widget" + + title = "Tasks" + left_metrics = [module.metric_tasks] + left_range = [0, null] +} + +module "annotation_cpu_reservation" { + source = "./../../../cloudwatch/annotation" + + value = 100 + label = "Reservation - ${local.cpu_reservation / 1024} vCPU" + color = local.colors.grey +} + +module "widget_cpu" { + source = "./../../../cloudwatch/metric_widget" + + title = "CPU utilization" + left_metrics = [ + module.metric_min_cpu_utilization, + module.metric_average_cpu_utilization, + module.metric_max_cpu_utilization, + ] + left_annotations = [module.annotation_cpu_reservation] + left_range = [0, null] +} + +module "annotation_memory_reservation" { + source = "./../../../cloudwatch/annotation" + + value = 100 + label = "Reservation - ${local.memory_reservation} MB" + color = local.colors.grey +} + +module "widget_memory" { + source = "./../../../cloudwatch/metric_widget" + + title = "Memory utilization" + left_metrics = [ + module.metric_min_memory_utilization, + module.metric_average_memory_utilization, + module.metric_max_memory_utilization, + ] + left_annotations = [module.annotation_memory_reservation] + left_range = [0, null] +} diff --git a/ecs/services/web/outputs.tf b/ecs/services/web/outputs.tf index 89c0e8a6..d8b23f56 100644 --- a/ecs/services/web/outputs.tf +++ b/ecs/services/web/outputs.tf @@ -75,6 +75,34 @@ output "metric_max_response_time" { description = "Cloudwatch metric tracking maximum response time" value = module.metric_max_response_time } +output "metric_tasks" { + description = "Cloudwatch metric tracking tasks count" + value = module.metric_tasks +} +output "metric_min_cpu_utilization" { + description = "Cloudwatch metric tracking minimum CPU utilization" + value = module.metric_min_cpu_utilization +} +output "metric_average_cpu_utilization" { + description = "Cloudwatch metric tracking average CPU utilization" + value = module.metric_average_cpu_utilization +} +output "metric_max_cpu_utilization" { + description = "Cloudwatch metric tracking maximum CPU utilization" + value = module.metric_max_cpu_utilization +} +output "metric_min_memory_utilization" { + description = "Cloudwatch metric tracking minimum memory utilization" + value = module.metric_min_memory_utilization +} +output "metric_average_memory_utilization" { + description = "Cloudwatch metric tracking average memory utilization" + value = module.metric_average_memory_utilization +} +output "metric_max_memory_utilization" { + description = "Cloudwatch metric tracking maximum memory utilization" + value = module.metric_max_memory_utilization +} # cloudwatch dashboard widgets ------------------------------------------------ @@ -92,3 +120,18 @@ output "widget_response_time" { description = "Cloudwatch dashboard widget that shows a breakdown of response time percentiles" value = module.widget_response_time } + +output "widget_tasks" { + description = "Cloudwatch dashboard widget that shows tasks count" + value = module.widget_tasks +} + +output "widget_cpu" { + description = "Cloudwatch dashboard widget that shows CPU utilization relative to CPU reservation" + value = module.widget_cpu +} + +output "widget_memory" { + description = "Cloudwatch dashboard widget that shows memory utilization relative to memory reservation" + value = module.widget_memory +} From ab220ff55f0cecce75f84e231fbd417d7fae6597 Mon Sep 17 00:00:00 2001 From: Marek Skrajnowski Date: Mon, 27 Apr 2020 11:10:24 +0200 Subject: [PATCH 05/17] feat(ecs/services/web): added connection errors metrics and added them to widgets --- ecs/services/web/README.md | 8 ++++++++ ecs/services/web/main.tf | 27 +++++++++++++++++++++++++++ ecs/services/web/outputs.tf | 8 ++++++++ 3 files changed, 43 insertions(+) diff --git a/ecs/services/web/README.md b/ecs/services/web/README.md index 39704d01..6e7e7e8c 100644 --- a/ecs/services/web/README.md +++ b/ecs/services/web/README.md @@ -161,6 +161,14 @@ Creates an ECS service exposed to the internet using an Application Load Balance Cloudwatch metric tracking average response time +* `metric_connection_errors` + + Cloudwatch metric tracking the number of connection errors from the load balancer + +* `metric_connection_errors_ratio` + + Cloudwatch metric tracking percentage of connection errors from the load balancer + * `metric_max_cpu_utilization` Cloudwatch metric tracking maximum CPU utilization diff --git a/ecs/services/web/main.tf b/ecs/services/web/main.tf index b6571576..99f0a49e 100644 --- a/ecs/services/web/main.tf +++ b/ecs/services/web/main.tf @@ -220,6 +220,30 @@ module "metric_5xx_responses_ratio" { color = module.metric_5xx_responses.color } +module "metric_connection_errors" { + source = "./../../../cloudwatch/metric" + + namespace = "AWS/ApplicationELB" + name = "TargetConnectionErrorCount" + label = "Connection errors" + color = local.colors.purple + stat = "Sum" + period = 60 + + dimensions = { + LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" + TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" + } +} + +module "metric_connection_errors_ratio" { + source = "./../../../cloudwatch/metric_expression" + + expression = "IF(${module.metric_responses.id} == 0, 0, FILL(${module.metric_connection_errors.id}, 0) / ${module.metric_responses.id} * 100)" + label = "Connection errors ratio" + color = module.metric_connection_errors.color +} + module "metric_average_response_time" { source = "./../../../cloudwatch/metric" @@ -443,6 +467,7 @@ module "widget_responses" { title = "Responses" stacked = true left_metrics = [ + module.metric_connection_errors, module.metric_5xx_responses, module.metric_4xx_responses, module.metric_3xx_responses, @@ -456,6 +481,7 @@ module "widget_response_ratios" { title = "Response ratios" stacked = true left_metrics = [ + module.metric_connection_errors_ratio, module.metric_5xx_responses_ratio, module.metric_4xx_responses_ratio, module.metric_3xx_responses_ratio, @@ -468,6 +494,7 @@ module "widget_response_ratios" { module.metric_3xx_responses, module.metric_4xx_responses, module.metric_5xx_responses, + module.metric_connection_errors, ] } diff --git a/ecs/services/web/outputs.tf b/ecs/services/web/outputs.tf index d8b23f56..da60a21d 100644 --- a/ecs/services/web/outputs.tf +++ b/ecs/services/web/outputs.tf @@ -51,6 +51,14 @@ output "metric_5xx_responses_ratio" { description = "Cloudwatch metric tracking percentage of 5xx responses" value = module.metric_5xx_responses_ratio } +output "metric_connection_errors" { + description = "Cloudwatch metric tracking the number of connection errors from the load balancer" + value = module.metric_connection_errors +} +output "metric_connection_errors_ratio" { + description = "Cloudwatch metric tracking percentage of connection errors from the load balancer" + value = module.metric_connection_errors_ratio +} output "metric_average_response_time" { description = "Cloudwatch metric tracking average response time" value = module.metric_average_response_time From 5dce2872a566331640b33ab8d578bf84800d05f2 Mon Sep 17 00:00:00 2001 From: Marek Skrajnowski Date: Mon, 27 Apr 2020 11:12:27 +0200 Subject: [PATCH 06/17] refactor(ecs/services/web): renamed metric_responses to metric_requests --- ecs/services/web/README.md | 4 ++-- ecs/services/web/main.tf | 14 +++++++------- ecs/services/web/outputs.tf | 6 +++--- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/ecs/services/web/README.md b/ecs/services/web/README.md index 6e7e7e8c..39e407dc 100644 --- a/ecs/services/web/README.md +++ b/ecs/services/web/README.md @@ -205,9 +205,9 @@ Creates an ECS service exposed to the internet using an Application Load Balance Cloudwatch metric tracking 99th percentile response time -* `metric_responses` +* `metric_requests` - Cloudwatch metric tracking total number of responses + Cloudwatch metric tracking total number of requests * `metric_tasks` diff --git a/ecs/services/web/main.tf b/ecs/services/web/main.tf index 99f0a49e..8a71aa5b 100644 --- a/ecs/services/web/main.tf +++ b/ecs/services/web/main.tf @@ -109,7 +109,7 @@ locals { colors = module.cloudwatch_consts.colors } -module "metric_responses" { +module "metric_requests" { source = "./../../../cloudwatch/metric" namespace = "AWS/ApplicationELB" @@ -143,7 +143,7 @@ module "metric_2xx_responses" { module "metric_2xx_responses_ratio" { source = "./../../../cloudwatch/metric_expression" - expression = "IF(${module.metric_responses.id} == 0, 0, FILL(${module.metric_2xx_responses.id}, 0) / ${module.metric_responses.id} * 100)" + expression = "IF(${module.metric_requests.id} == 0, 0, FILL(${module.metric_2xx_responses.id}, 0) / ${module.metric_requests.id} * 100)" label = "2xx response ratio" color = module.metric_2xx_responses.color } @@ -167,7 +167,7 @@ module "metric_3xx_responses" { module "metric_3xx_responses_ratio" { source = "./../../../cloudwatch/metric_expression" - expression = "IF(${module.metric_responses.id} == 0, 0, FILL(${module.metric_3xx_responses.id}, 0) / ${module.metric_responses.id} * 100)" + expression = "IF(${module.metric_requests.id} == 0, 0, FILL(${module.metric_3xx_responses.id}, 0) / ${module.metric_requests.id} * 100)" label = "3xx response ratio" color = module.metric_3xx_responses.color } @@ -191,7 +191,7 @@ module "metric_4xx_responses" { module "metric_4xx_responses_ratio" { source = "./../../../cloudwatch/metric_expression" - expression = "IF(${module.metric_responses.id} == 0, 0, FILL(${module.metric_4xx_responses.id}, 0) / ${module.metric_responses.id} * 100)" + expression = "IF(${module.metric_requests.id} == 0, 0, FILL(${module.metric_4xx_responses.id}, 0) / ${module.metric_requests.id} * 100)" label = "4xx response ratio" color = module.metric_4xx_responses.color } @@ -215,7 +215,7 @@ module "metric_5xx_responses" { module "metric_5xx_responses_ratio" { source = "./../../../cloudwatch/metric_expression" - expression = "IF(${module.metric_responses.id} == 0, 0, FILL(${module.metric_5xx_responses.id}, 0) / ${module.metric_responses.id} * 100)" + expression = "IF(${module.metric_requests.id} == 0, 0, FILL(${module.metric_5xx_responses.id}, 0) / ${module.metric_requests.id} * 100)" label = "5xx response ratio" color = module.metric_5xx_responses.color } @@ -239,7 +239,7 @@ module "metric_connection_errors" { module "metric_connection_errors_ratio" { source = "./../../../cloudwatch/metric_expression" - expression = "IF(${module.metric_responses.id} == 0, 0, FILL(${module.metric_connection_errors.id}, 0) / ${module.metric_responses.id} * 100)" + expression = "IF(${module.metric_requests.id} == 0, 0, FILL(${module.metric_connection_errors.id}, 0) / ${module.metric_requests.id} * 100)" label = "Connection errors ratio" color = module.metric_connection_errors.color } @@ -489,7 +489,7 @@ module "widget_response_ratios" { ] left_range = [0, 100] hidden_metrics = [ - module.metric_responses, + module.metric_requests, module.metric_2xx_responses, module.metric_3xx_responses, module.metric_4xx_responses, diff --git a/ecs/services/web/outputs.tf b/ecs/services/web/outputs.tf index da60a21d..3f616642 100644 --- a/ecs/services/web/outputs.tf +++ b/ecs/services/web/outputs.tf @@ -15,9 +15,9 @@ output "target_group_arn" { # cloudwatch metrics ---------------------------------------------------------- -output "metric_responses" { - description = "Cloudwatch metric tracking total number of responses" - value = module.metric_responses +output "metric_requests" { + description = "Cloudwatch metric tracking total number of requests" + value = module.metric_requests } output "metric_2xx_responses" { description = "Cloudwatch metric tracking the number of 2xx responses" From c403014b6c2733a5dee96a96d4362ba0893d6ee3 Mon Sep 17 00:00:00 2001 From: Marek Skrajnowski Date: Mon, 27 Apr 2020 13:44:45 +0200 Subject: [PATCH 07/17] feat(ecs): enabled container insights --- ecs/main.tf | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ecs/main.tf b/ecs/main.tf index dfb64fd4..4b1bfeed 100644 --- a/ecs/main.tf +++ b/ecs/main.tf @@ -23,5 +23,10 @@ resource "aws_ecs_cluster" "cluster" { count = var.create ? 1 : 0 name = "${var.project}-${var.environment}" + + setting { + name = "containerInsights" + value = "enabled" + } } From a659e6163a0e09aeb681138ba7452071a8c22fd5 Mon Sep 17 00:00:00 2001 From: Marek Skrajnowski Date: Mon, 27 Apr 2020 13:45:47 +0200 Subject: [PATCH 08/17] docs(ecs): adjusted example tasks memory and cpu requirements --- ecs/example/main.tf | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ecs/example/main.tf b/ecs/example/main.tf index 70fbfec3..39b6c19b 100644 --- a/ecs/example/main.tf +++ b/ecs/example/main.tf @@ -48,7 +48,8 @@ module "worker_task" { environment = local.environment task = "worker" image = "kennethreitz/httpbin:latest" - memory_soft_limit = 128 + memory_soft_limit = 48 + cpu = 128 environment_variables = { DEBUG = "True" @@ -72,7 +73,7 @@ module "web_task" { environment = local.environment task = "web" image = "kennethreitz/httpbin:latest" - memory_soft_limit = 128 + memory_soft_limit = 48 cpu = 128 ports = [80] From a9ff5284eb85f5567748d14bdbc68ea097bd41ea Mon Sep 17 00:00:00 2001 From: Marek Skrajnowski Date: Mon, 27 Apr 2020 14:03:14 +0200 Subject: [PATCH 09/17] refactor(ecs/services/web): renamed widget_cpu/_memory to _cpu_reservation/_memory_reservation --- ecs/example/main.tf | 4 ++-- ecs/services/web/README.md | 4 ++-- ecs/services/web/main.tf | 4 ++-- ecs/services/web/outputs.tf | 8 ++++---- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/ecs/example/main.tf b/ecs/example/main.tf index 39b6c19b..0292a8f3 100644 --- a/ecs/example/main.tf +++ b/ecs/example/main.tf @@ -107,8 +107,8 @@ module "dashboard" { module.web.widget_response_ratios, module.web.widget_response_time, module.web.widget_tasks, - module.web.widget_cpu, - module.web.widget_memory, + module.web.widget_cpu_reservation, + module.web.widget_memory_reservation, ] } diff --git a/ecs/services/web/README.md b/ecs/services/web/README.md index 39e407dc..e64fcff1 100644 --- a/ecs/services/web/README.md +++ b/ecs/services/web/README.md @@ -221,11 +221,11 @@ Creates an ECS service exposed to the internet using an Application Load Balance Load balancer target group name -* `widget_cpu` +* `widget_cpu_reservation` Cloudwatch dashboard widget that shows CPU utilization relative to CPU reservation -* `widget_memory` +* `widget_memory_reservation` Cloudwatch dashboard widget that shows memory utilization relative to memory reservation diff --git a/ecs/services/web/main.tf b/ecs/services/web/main.tf index 8a71aa5b..9118a31b 100644 --- a/ecs/services/web/main.tf +++ b/ecs/services/web/main.tf @@ -527,7 +527,7 @@ module "annotation_cpu_reservation" { color = local.colors.grey } -module "widget_cpu" { +module "widget_cpu_reservation" { source = "./../../../cloudwatch/metric_widget" title = "CPU utilization" @@ -548,7 +548,7 @@ module "annotation_memory_reservation" { color = local.colors.grey } -module "widget_memory" { +module "widget_memory_reservation" { source = "./../../../cloudwatch/metric_widget" title = "Memory utilization" diff --git a/ecs/services/web/outputs.tf b/ecs/services/web/outputs.tf index 3f616642..f7c00d5d 100644 --- a/ecs/services/web/outputs.tf +++ b/ecs/services/web/outputs.tf @@ -134,12 +134,12 @@ output "widget_tasks" { value = module.widget_tasks } -output "widget_cpu" { +output "widget_cpu_reservation" { description = "Cloudwatch dashboard widget that shows CPU utilization relative to CPU reservation" - value = module.widget_cpu + value = module.widget_cpu_reservation } -output "widget_memory" { +output "widget_memory_reservation" { description = "Cloudwatch dashboard widget that shows memory utilization relative to memory reservation" - value = module.widget_memory + value = module.widget_memory_reservation } From b9919ec5f748a6bb5f8432bb19432c6875527d00 Mon Sep 17 00:00:00 2001 From: Marek Skrajnowski Date: Mon, 27 Apr 2020 19:28:56 +0200 Subject: [PATCH 10/17] refactor(ecs/services/web): switch to container insights metrics for cpu/memory utilization --- ecs/example/main.tf | 6 +-- ecs/services/web/README.md | 16 ++++-- ecs/services/web/main.tf | 104 +++++++++++++++++++----------------- ecs/services/web/outputs.tf | 20 ++++--- 4 files changed, 83 insertions(+), 63 deletions(-) diff --git a/ecs/example/main.tf b/ecs/example/main.tf index 0292a8f3..417506d0 100644 --- a/ecs/example/main.tf +++ b/ecs/example/main.tf @@ -89,7 +89,7 @@ module "web" { name = "web" cluster_arn = module.cluster.arn task_definition_arn = module.web_task.arn - desired_count = 1 + desired_count = 2 vpc_id = module.cluster.vpc_id listener_arn = module.cluster.http_listener_arn @@ -107,8 +107,8 @@ module "dashboard" { module.web.widget_response_ratios, module.web.widget_response_time, module.web.widget_tasks, - module.web.widget_cpu_reservation, - module.web.widget_memory_reservation, + module.web.widget_cpu_utilization, + module.web.widget_memory_utilization, ] } diff --git a/ecs/services/web/README.md b/ecs/services/web/README.md index e64fcff1..b92e69af 100644 --- a/ecs/services/web/README.md +++ b/ecs/services/web/README.md @@ -149,10 +149,18 @@ Creates an ECS service exposed to the internet using an Application Load Balance Cloudwatch metric tracking percentage of 5xx responses +* `metric_average_cpu_reservation` + + Cloudwatch metric tracking average CPU reservation + * `metric_average_cpu_utilization` Cloudwatch metric tracking average CPU utilization +* `metric_average_memory_reservation` + + Cloudwatch metric tracking average memory reservation + * `metric_average_memory_utilization` Cloudwatch metric tracking average memory utilization @@ -221,13 +229,13 @@ Creates an ECS service exposed to the internet using an Application Load Balance Load balancer target group name -* `widget_cpu_reservation` +* `widget_cpu_utilization` - Cloudwatch dashboard widget that shows CPU utilization relative to CPU reservation + Cloudwatch dashboard widget that shows CPU utilization -* `widget_memory_reservation` +* `widget_memory_utilization` - Cloudwatch dashboard widget that shows memory utilization relative to memory reservation + Cloudwatch dashboard widget that shows memory utilization * `widget_response_ratios` diff --git a/ecs/services/web/main.tf b/ecs/services/web/main.tf index 9118a31b..1090da47 100644 --- a/ecs/services/web/main.tf +++ b/ecs/services/web/main.tf @@ -351,25 +351,29 @@ module "metric_tasks" { } } -data "aws_ecs_container_definition" "web" { - count = var.create ? 1 : 0 +module "metric_average_cpu_reservation" { + source = "./../../../cloudwatch/metric" - task_definition = var.task_definition_arn - container_name = local.container -} + namespace = "ECS/ContainerInsights" + name = "CpuReserved" + label = "Average CPU reserved" + color = local.colors.grey + stat = "Average" + period = 60 -locals { - cpu_reservation = var.create ? data.aws_ecs_container_definition.web[0].cpu : 0 - memory_reservation = var.create ? data.aws_ecs_container_definition.web[0].memory_reservation : 0 + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } } module "metric_min_cpu_utilization" { source = "./../../../cloudwatch/metric" - namespace = "AWS/ECS" - name = "CPUUtilization" - label = "Minimum CPU utilization" - color = local.colors.green + namespace = "ECS/ContainerInsights" + name = "CpuUtilized" + label = "Minimum CPU utilized" + color = local.colors.light_orange stat = "Minimum" period = 60 @@ -382,9 +386,9 @@ module "metric_min_cpu_utilization" { module "metric_average_cpu_utilization" { source = "./../../../cloudwatch/metric" - namespace = "AWS/ECS" - name = "CPUUtilization" - label = "Average CPU utilization" + namespace = "ECS/ContainerInsights" + name = "CpuUtilized" + label = "Average CPU utilized" color = local.colors.orange stat = "Average" period = 60 @@ -398,9 +402,9 @@ module "metric_average_cpu_utilization" { module "metric_max_cpu_utilization" { source = "./../../../cloudwatch/metric" - namespace = "AWS/ECS" - name = "CPUUtilization" - label = "Maximum CPU utilization" + namespace = "ECS/ContainerInsights" + name = "CpuUtilized" + label = "Maximum CPU utilized" color = local.colors.red stat = "Maximum" period = 60 @@ -411,13 +415,29 @@ module "metric_max_cpu_utilization" { } } +module "metric_average_memory_reservation" { + source = "./../../../cloudwatch/metric" + + namespace = "ECS/ContainerInsights" + name = "MemoryReserved" + label = "Average memory reserved" + color = local.colors.grey + stat = "Average" + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } +} + module "metric_min_memory_utilization" { source = "./../../../cloudwatch/metric" - namespace = "AWS/ECS" - name = "MemoryUtilization" - label = "Minimum memory utilization" - color = local.colors.green + namespace = "ECS/ContainerInsights" + name = "MemoryUtilized" + label = "Minimum memory utilized" + color = local.colors.light_orange stat = "Minimum" period = 60 @@ -430,9 +450,9 @@ module "metric_min_memory_utilization" { module "metric_average_memory_utilization" { source = "./../../../cloudwatch/metric" - namespace = "AWS/ECS" - name = "MemoryUtilization" - label = "Average memory utilization" + namespace = "ECS/ContainerInsights" + name = "MemoryUtilized" + label = "Average memory utilized" color = local.colors.orange stat = "Average" period = 60 @@ -446,9 +466,9 @@ module "metric_average_memory_utilization" { module "metric_max_memory_utilization" { source = "./../../../cloudwatch/metric" - namespace = "AWS/ECS" - name = "MemoryUtilization" - label = "Maximum memory utilization" + namespace = "ECS/ContainerInsights" + name = "MemoryUtilized" + label = "Maximum memory utilized" color = local.colors.red stat = "Maximum" period = 60 @@ -519,44 +539,28 @@ module "widget_tasks" { left_range = [0, null] } -module "annotation_cpu_reservation" { - source = "./../../../cloudwatch/annotation" - - value = 100 - label = "Reservation - ${local.cpu_reservation / 1024} vCPU" - color = local.colors.grey -} - -module "widget_cpu_reservation" { +module "widget_cpu_utilization" { source = "./../../../cloudwatch/metric_widget" title = "CPU utilization" left_metrics = [ + module.metric_average_cpu_reservation, module.metric_min_cpu_utilization, module.metric_average_cpu_utilization, module.metric_max_cpu_utilization, ] - left_annotations = [module.annotation_cpu_reservation] - left_range = [0, null] -} - -module "annotation_memory_reservation" { - source = "./../../../cloudwatch/annotation" - - value = 100 - label = "Reservation - ${local.memory_reservation} MB" - color = local.colors.grey + left_range = [0, null] } -module "widget_memory_reservation" { +module "widget_memory_utilization" { source = "./../../../cloudwatch/metric_widget" title = "Memory utilization" left_metrics = [ + module.metric_average_memory_reservation, module.metric_min_memory_utilization, module.metric_average_memory_utilization, module.metric_max_memory_utilization, ] - left_annotations = [module.annotation_memory_reservation] - left_range = [0, null] + left_range = [0, null] } diff --git a/ecs/services/web/outputs.tf b/ecs/services/web/outputs.tf index f7c00d5d..b4ac93c4 100644 --- a/ecs/services/web/outputs.tf +++ b/ecs/services/web/outputs.tf @@ -87,6 +87,10 @@ output "metric_tasks" { description = "Cloudwatch metric tracking tasks count" value = module.metric_tasks } +output "metric_average_cpu_reservation" { + description = "Cloudwatch metric tracking average CPU reservation" + value = module.metric_average_cpu_reservation +} output "metric_min_cpu_utilization" { description = "Cloudwatch metric tracking minimum CPU utilization" value = module.metric_min_cpu_utilization @@ -99,6 +103,10 @@ output "metric_max_cpu_utilization" { description = "Cloudwatch metric tracking maximum CPU utilization" value = module.metric_max_cpu_utilization } +output "metric_average_memory_reservation" { + description = "Cloudwatch metric tracking average memory reservation" + value = module.metric_average_memory_reservation +} output "metric_min_memory_utilization" { description = "Cloudwatch metric tracking minimum memory utilization" value = module.metric_min_memory_utilization @@ -134,12 +142,12 @@ output "widget_tasks" { value = module.widget_tasks } -output "widget_cpu_reservation" { - description = "Cloudwatch dashboard widget that shows CPU utilization relative to CPU reservation" - value = module.widget_cpu_reservation +output "widget_cpu_utilization" { + description = "Cloudwatch dashboard widget that shows CPU utilization" + value = module.widget_cpu_utilization } -output "widget_memory_reservation" { - description = "Cloudwatch dashboard widget that shows memory utilization relative to memory reservation" - value = module.widget_memory_reservation +output "widget_memory_utilization" { + description = "Cloudwatch dashboard widget that shows memory utilization" + value = module.widget_memory_utilization } From 18f23e0adc8bb7adf2b85568b2512256f8808438 Mon Sep 17 00:00:00 2001 From: Marek Skrajnowski Date: Mon, 27 Apr 2020 19:39:39 +0200 Subject: [PATCH 11/17] refactor(ecs/services/web): changed tasks widget to scaling widget using container insights metrics --- ecs/example/main.tf | 2 +- ecs/services/web/README.md | 16 ++++++++--- ecs/services/web/main.tf | 53 +++++++++++++++++++++++++++++++------ ecs/services/web/outputs.tf | 18 +++++++++---- 4 files changed, 71 insertions(+), 18 deletions(-) diff --git a/ecs/example/main.tf b/ecs/example/main.tf index 417506d0..6c376cb7 100644 --- a/ecs/example/main.tf +++ b/ecs/example/main.tf @@ -106,7 +106,7 @@ module "dashboard" { module.web.widget_responses, module.web.widget_response_ratios, module.web.widget_response_time, - module.web.widget_tasks, + module.web.widget_scaling, module.web.widget_cpu_utilization, module.web.widget_memory_utilization, ] diff --git a/ecs/services/web/README.md b/ecs/services/web/README.md index b92e69af..b71330e2 100644 --- a/ecs/services/web/README.md +++ b/ecs/services/web/README.md @@ -177,6 +177,10 @@ Creates an ECS service exposed to the internet using an Application Load Balance Cloudwatch metric tracking percentage of connection errors from the load balancer +* `metric_desired_tasks` + + Cloudwatch metric tracking desired tasks count + * `metric_max_cpu_utilization` Cloudwatch metric tracking maximum CPU utilization @@ -213,13 +217,17 @@ Creates an ECS service exposed to the internet using an Application Load Balance Cloudwatch metric tracking 99th percentile response time +* `metric_pending_tasks` + + Cloudwatch metric tracking pending tasks count + * `metric_requests` Cloudwatch metric tracking total number of requests -* `metric_tasks` +* `metric_running_tasks` - Cloudwatch metric tracking tasks count + Cloudwatch metric tracking running tasks count * `target_group_arn` @@ -249,6 +257,6 @@ Creates an ECS service exposed to the internet using an Application Load Balance Cloudwatch dashboard widget that shows a breakdown of response status codes -* `widget_tasks` +* `widget_scaling` - Cloudwatch dashboard widget that shows tasks count + Cloudwatch dashboard widget that shows scaling state diff --git a/ecs/services/web/main.tf b/ecs/services/web/main.tf index 1090da47..b706640a 100644 --- a/ecs/services/web/main.tf +++ b/ecs/services/web/main.tf @@ -336,13 +336,46 @@ module "metric_max_response_time" { } } -module "metric_tasks" { +module "metric_desired_tasks" { source = "./../../../cloudwatch/metric" - namespace = "AWS/ECS" - name = "CPUUtilization" - label = "Task count" - stat = "SampleCount" + namespace = "ECS/ContainerInsights" + name = "DesiredTaskCount" + label = "Desired task count" + color = local.colors.grey + stat = "Average" + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } +} + +module "metric_pending_tasks" { + source = "./../../../cloudwatch/metric" + + namespace = "ECS/ContainerInsights" + name = "PendingTaskCount" + label = "Pending task count" + color = local.colors.orange + stat = "Average" + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } +} + +module "metric_running_tasks" { + source = "./../../../cloudwatch/metric" + + namespace = "ECS/ContainerInsights" + name = "RunningTaskCount" + label = "Running task count" + color = local.colors.green + stat = "Average" period = 60 dimensions = { @@ -534,9 +567,13 @@ module "widget_response_time" { module "widget_tasks" { source = "./../../../cloudwatch/metric_widget" - title = "Tasks" - left_metrics = [module.metric_tasks] - left_range = [0, null] + title = "Scaling" + left_metrics = [ + module.metric_desired_tasks, + module.metric_pending_tasks, + module.metric_running_tasks, + ] + left_range = [0, null] } module "widget_cpu_utilization" { diff --git a/ecs/services/web/outputs.tf b/ecs/services/web/outputs.tf index b4ac93c4..098c2ffe 100644 --- a/ecs/services/web/outputs.tf +++ b/ecs/services/web/outputs.tf @@ -83,9 +83,17 @@ output "metric_max_response_time" { description = "Cloudwatch metric tracking maximum response time" value = module.metric_max_response_time } -output "metric_tasks" { - description = "Cloudwatch metric tracking tasks count" - value = module.metric_tasks +output "metric_desired_tasks" { + description = "Cloudwatch metric tracking desired tasks count" + value = module.metric_desired_tasks +} +output "metric_pending_tasks" { + description = "Cloudwatch metric tracking pending tasks count" + value = module.metric_pending_tasks +} +output "metric_running_tasks" { + description = "Cloudwatch metric tracking running tasks count" + value = module.metric_running_tasks } output "metric_average_cpu_reservation" { description = "Cloudwatch metric tracking average CPU reservation" @@ -137,8 +145,8 @@ output "widget_response_time" { value = module.widget_response_time } -output "widget_tasks" { - description = "Cloudwatch dashboard widget that shows tasks count" +output "widget_scaling" { + description = "Cloudwatch dashboard widget that shows scaling state" value = module.widget_tasks } From 36b3882d281df8299170318805bc4d9103b53ac6 Mon Sep 17 00:00:00 2001 From: Marek Skrajnowski Date: Mon, 27 Apr 2020 19:42:15 +0200 Subject: [PATCH 12/17] feat(ecs/services/web): prefixed widget titles with the service name --- ecs/services/web/main.tf | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ecs/services/web/main.tf b/ecs/services/web/main.tf index b706640a..f73d4da3 100644 --- a/ecs/services/web/main.tf +++ b/ecs/services/web/main.tf @@ -517,7 +517,7 @@ module "metric_max_memory_utilization" { module "widget_responses" { source = "./../../../cloudwatch/metric_widget" - title = "Responses" + title = "${var.name} service responses" stacked = true left_metrics = [ module.metric_connection_errors, @@ -531,7 +531,7 @@ module "widget_responses" { module "widget_response_ratios" { source = "./../../../cloudwatch/metric_widget" - title = "Response ratios" + title = "${var.name} service response ratios" stacked = true left_metrics = [ module.metric_connection_errors_ratio, @@ -554,7 +554,7 @@ module "widget_response_ratios" { module "widget_response_time" { source = "./../../../cloudwatch/metric_widget" - title = "Response time" + title = "${var.name} service response times" stacked = true left_metrics = [ merge(module.metric_p50_response_time, { color = local.colors.red }), @@ -567,7 +567,7 @@ module "widget_response_time" { module "widget_tasks" { source = "./../../../cloudwatch/metric_widget" - title = "Scaling" + title = "${var.name} service scaling" left_metrics = [ module.metric_desired_tasks, module.metric_pending_tasks, @@ -579,7 +579,7 @@ module "widget_tasks" { module "widget_cpu_utilization" { source = "./../../../cloudwatch/metric_widget" - title = "CPU utilization" + title = "${var.name} service CPU utilization" left_metrics = [ module.metric_average_cpu_reservation, module.metric_min_cpu_utilization, @@ -592,7 +592,7 @@ module "widget_cpu_utilization" { module "widget_memory_utilization" { source = "./../../../cloudwatch/metric_widget" - title = "Memory utilization" + title = "${var.name} service memory utilization" left_metrics = [ module.metric_average_memory_reservation, module.metric_min_memory_utilization, From b48dbed7df1ed03d8120955fd32a4fc7aba88453 Mon Sep 17 00:00:00 2001 From: Marek Skrajnowski Date: Mon, 27 Apr 2020 20:01:12 +0200 Subject: [PATCH 13/17] refactor(ecs/services/web): moved metrics and widgets to separate files, replaced metrics_* and widgets_* outputs with just metrics and widgets objects --- ecs/example/main.tf | 12 +- ecs/services/web/README.md | 136 +--------- ecs/services/web/main.tf | 515 ------------------------------------ ecs/services/web/metrics.tf | 455 +++++++++++++++++++++++++++++++ ecs/services/web/outputs.tf | 149 +---------- ecs/services/web/widgets.tf | 98 +++++++ 6 files changed, 569 insertions(+), 796 deletions(-) create mode 100644 ecs/services/web/metrics.tf create mode 100644 ecs/services/web/widgets.tf diff --git a/ecs/example/main.tf b/ecs/example/main.tf index 6c376cb7..11e22805 100644 --- a/ecs/example/main.tf +++ b/ecs/example/main.tf @@ -103,12 +103,12 @@ module "dashboard" { name = "terraform-ecs-example" widgets = [ - module.web.widget_responses, - module.web.widget_response_ratios, - module.web.widget_response_time, - module.web.widget_scaling, - module.web.widget_cpu_utilization, - module.web.widget_memory_utilization, + module.web.widgets.responses, + module.web.widgets.response_percentages, + module.web.widgets.response_time, + module.web.widgets.scaling, + module.web.widgets.cpu_utilization, + module.web.widgets.memory_utilization, ] } diff --git a/ecs/services/web/README.md b/ecs/services/web/README.md index b71330e2..0fd7e847 100644 --- a/ecs/services/web/README.md +++ b/ecs/services/web/README.md @@ -117,117 +117,9 @@ Creates an ECS service exposed to the internet using an Application Load Balance Service id -* `metric_2xx_responses` +* `metrics` - Cloudwatch metric tracking the number of 2xx responses - -* `metric_2xx_responses_ratio` - - Cloudwatch metric tracking percentage of 2xx responses - -* `metric_3xx_responses` - - Cloudwatch metric tracking the number of 3xx responses - -* `metric_3xx_responses_ratio` - - Cloudwatch metric tracking percentage of 3xx responses - -* `metric_4xx_responses` - - Cloudwatch metric tracking the number of 4xx responses - -* `metric_4xx_responses_ratio` - - Cloudwatch metric tracking percentage of 4xx responses - -* `metric_5xx_responses` - - Cloudwatch metric tracking the number of 5xx responses - -* `metric_5xx_responses_ratio` - - Cloudwatch metric tracking percentage of 5xx responses - -* `metric_average_cpu_reservation` - - Cloudwatch metric tracking average CPU reservation - -* `metric_average_cpu_utilization` - - Cloudwatch metric tracking average CPU utilization - -* `metric_average_memory_reservation` - - Cloudwatch metric tracking average memory reservation - -* `metric_average_memory_utilization` - - Cloudwatch metric tracking average memory utilization - -* `metric_average_response_time` - - Cloudwatch metric tracking average response time - -* `metric_connection_errors` - - Cloudwatch metric tracking the number of connection errors from the load balancer - -* `metric_connection_errors_ratio` - - Cloudwatch metric tracking percentage of connection errors from the load balancer - -* `metric_desired_tasks` - - Cloudwatch metric tracking desired tasks count - -* `metric_max_cpu_utilization` - - Cloudwatch metric tracking maximum CPU utilization - -* `metric_max_memory_utilization` - - Cloudwatch metric tracking maximum memory utilization - -* `metric_max_response_time` - - Cloudwatch metric tracking maximum response time - -* `metric_min_cpu_utilization` - - Cloudwatch metric tracking minimum CPU utilization - -* `metric_min_memory_utilization` - - Cloudwatch metric tracking minimum memory utilization - -* `metric_p50_response_time` - - Cloudwatch metric tracking median response time - -* `metric_p90_response_time` - - Cloudwatch metric tracking 90th percentile response time - -* `metric_p95_response_time` - - Cloudwatch metric tracking 95th percentile response time - -* `metric_p99_response_time` - - Cloudwatch metric tracking 99th percentile response time - -* `metric_pending_tasks` - - Cloudwatch metric tracking pending tasks count - -* `metric_requests` - - Cloudwatch metric tracking total number of requests - -* `metric_running_tasks` - - Cloudwatch metric tracking running tasks count + Cloudwatch metrics, see [metrics.tf](./metrics.tf) * `target_group_arn` @@ -237,26 +129,6 @@ Creates an ECS service exposed to the internet using an Application Load Balance Load balancer target group name -* `widget_cpu_utilization` - - Cloudwatch dashboard widget that shows CPU utilization - -* `widget_memory_utilization` - - Cloudwatch dashboard widget that shows memory utilization - -* `widget_response_ratios` - - Cloudwatch dashboard widget that shows a breakdown of response status code percentages - -* `widget_response_time` - - Cloudwatch dashboard widget that shows a breakdown of response time percentiles - -* `widget_responses` - - Cloudwatch dashboard widget that shows a breakdown of response status codes - -* `widget_scaling` +* `widgets` - Cloudwatch dashboard widget that shows scaling state + Cloudwatch dashboard widgets, see [widgets.tf](./widgets.tf) diff --git a/ecs/services/web/main.tf b/ecs/services/web/main.tf index f73d4da3..fbef48c3 100644 --- a/ecs/services/web/main.tf +++ b/ecs/services/web/main.tf @@ -86,518 +86,3 @@ resource "aws_lb_listener_rule" "service" { values = [var.rule_path] } } - -# cloudwatch metrics ---------------------------------------------------------- - -data "aws_lb_listener" "listener" { - count = var.create ? 1 : 0 - - arn = var.listener_arn -} - -data "aws_lb" "lb" { - count = var.create ? 1 : 0 - - arn = data.aws_lb_listener.listener[0].load_balancer_arn -} - -module "cloudwatch_consts" { - source = "./../../../cloudwatch/consts" -} - -locals { - colors = module.cloudwatch_consts.colors -} - -module "metric_requests" { - source = "./../../../cloudwatch/metric" - - namespace = "AWS/ApplicationELB" - name = "RequestCount" - label = "Responses" - stat = "Sum" - period = 60 - - dimensions = { - LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" - TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" - } -} - -module "metric_2xx_responses" { - source = "./../../../cloudwatch/metric" - - namespace = "AWS/ApplicationELB" - name = "HTTPCode_Target_2XX_Count" - label = "2xx responses" - color = local.colors.green - stat = "Sum" - period = 60 - - dimensions = { - LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" - TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" - } -} - -module "metric_2xx_responses_ratio" { - source = "./../../../cloudwatch/metric_expression" - - expression = "IF(${module.metric_requests.id} == 0, 0, FILL(${module.metric_2xx_responses.id}, 0) / ${module.metric_requests.id} * 100)" - label = "2xx response ratio" - color = module.metric_2xx_responses.color -} - -module "metric_3xx_responses" { - source = "./../../../cloudwatch/metric" - - namespace = "AWS/ApplicationELB" - name = "HTTPCode_Target_3XX_Count" - label = "3xx responses" - color = local.colors.blue - stat = "Sum" - period = 60 - - dimensions = { - LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" - TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" - } -} - -module "metric_3xx_responses_ratio" { - source = "./../../../cloudwatch/metric_expression" - - expression = "IF(${module.metric_requests.id} == 0, 0, FILL(${module.metric_3xx_responses.id}, 0) / ${module.metric_requests.id} * 100)" - label = "3xx response ratio" - color = module.metric_3xx_responses.color -} - -module "metric_4xx_responses" { - source = "./../../../cloudwatch/metric" - - namespace = "AWS/ApplicationELB" - name = "HTTPCode_Target_4XX_Count" - label = "4xx responses" - color = local.colors.orange - stat = "Sum" - period = 60 - - dimensions = { - LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" - TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" - } -} - -module "metric_4xx_responses_ratio" { - source = "./../../../cloudwatch/metric_expression" - - expression = "IF(${module.metric_requests.id} == 0, 0, FILL(${module.metric_4xx_responses.id}, 0) / ${module.metric_requests.id} * 100)" - label = "4xx response ratio" - color = module.metric_4xx_responses.color -} - -module "metric_5xx_responses" { - source = "./../../../cloudwatch/metric" - - namespace = "AWS/ApplicationELB" - name = "HTTPCode_Target_5XX_Count" - label = "5xx responses" - color = local.colors.red - stat = "Sum" - period = 60 - - dimensions = { - LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" - TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" - } -} - -module "metric_5xx_responses_ratio" { - source = "./../../../cloudwatch/metric_expression" - - expression = "IF(${module.metric_requests.id} == 0, 0, FILL(${module.metric_5xx_responses.id}, 0) / ${module.metric_requests.id} * 100)" - label = "5xx response ratio" - color = module.metric_5xx_responses.color -} - -module "metric_connection_errors" { - source = "./../../../cloudwatch/metric" - - namespace = "AWS/ApplicationELB" - name = "TargetConnectionErrorCount" - label = "Connection errors" - color = local.colors.purple - stat = "Sum" - period = 60 - - dimensions = { - LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" - TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" - } -} - -module "metric_connection_errors_ratio" { - source = "./../../../cloudwatch/metric_expression" - - expression = "IF(${module.metric_requests.id} == 0, 0, FILL(${module.metric_connection_errors.id}, 0) / ${module.metric_requests.id} * 100)" - label = "Connection errors ratio" - color = module.metric_connection_errors.color -} - -module "metric_average_response_time" { - source = "./../../../cloudwatch/metric" - - namespace = "AWS/ApplicationELB" - name = "TargetResponseTime" - label = "Average response time" - color = local.colors.red - stat = "Average" - period = 60 - - dimensions = { - LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" - TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" - } -} - -module "metric_p50_response_time" { - source = "./../../../cloudwatch/metric" - - namespace = "AWS/ApplicationELB" - name = "TargetResponseTime" - label = "p50 response time" - color = local.colors.red - stat = "p50" - period = 60 - - dimensions = { - LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" - TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" - } -} - -module "metric_p90_response_time" { - source = "./../../../cloudwatch/metric" - - namespace = "AWS/ApplicationELB" - name = "TargetResponseTime" - label = "p90 response time" - stat = "p90" - period = 60 - - dimensions = { - LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" - TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" - } -} - -module "metric_p95_response_time" { - source = "./../../../cloudwatch/metric" - - namespace = "AWS/ApplicationELB" - name = "TargetResponseTime" - label = "p95 response time" - stat = "p95" - period = 60 - - dimensions = { - LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" - TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" - } -} - -module "metric_p99_response_time" { - source = "./../../../cloudwatch/metric" - - namespace = "AWS/ApplicationELB" - name = "TargetResponseTime" - label = "p99 response time" - stat = "p99" - period = 60 - - dimensions = { - LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" - TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" - } -} - -module "metric_max_response_time" { - source = "./../../../cloudwatch/metric" - - namespace = "AWS/ApplicationELB" - name = "TargetResponseTime" - label = "Maximum response time" - stat = "Maximum" - period = 60 - - dimensions = { - LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" - TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" - } -} - -module "metric_desired_tasks" { - source = "./../../../cloudwatch/metric" - - namespace = "ECS/ContainerInsights" - name = "DesiredTaskCount" - label = "Desired task count" - color = local.colors.grey - stat = "Average" - period = 60 - - dimensions = { - ServiceName = var.name - ClusterName = local.cluster_name - } -} - -module "metric_pending_tasks" { - source = "./../../../cloudwatch/metric" - - namespace = "ECS/ContainerInsights" - name = "PendingTaskCount" - label = "Pending task count" - color = local.colors.orange - stat = "Average" - period = 60 - - dimensions = { - ServiceName = var.name - ClusterName = local.cluster_name - } -} - -module "metric_running_tasks" { - source = "./../../../cloudwatch/metric" - - namespace = "ECS/ContainerInsights" - name = "RunningTaskCount" - label = "Running task count" - color = local.colors.green - stat = "Average" - period = 60 - - dimensions = { - ServiceName = var.name - ClusterName = local.cluster_name - } -} - -module "metric_average_cpu_reservation" { - source = "./../../../cloudwatch/metric" - - namespace = "ECS/ContainerInsights" - name = "CpuReserved" - label = "Average CPU reserved" - color = local.colors.grey - stat = "Average" - period = 60 - - dimensions = { - ServiceName = var.name - ClusterName = local.cluster_name - } -} - -module "metric_min_cpu_utilization" { - source = "./../../../cloudwatch/metric" - - namespace = "ECS/ContainerInsights" - name = "CpuUtilized" - label = "Minimum CPU utilized" - color = local.colors.light_orange - stat = "Minimum" - period = 60 - - dimensions = { - ServiceName = var.name - ClusterName = local.cluster_name - } -} - -module "metric_average_cpu_utilization" { - source = "./../../../cloudwatch/metric" - - namespace = "ECS/ContainerInsights" - name = "CpuUtilized" - label = "Average CPU utilized" - color = local.colors.orange - stat = "Average" - period = 60 - - dimensions = { - ServiceName = var.name - ClusterName = local.cluster_name - } -} - -module "metric_max_cpu_utilization" { - source = "./../../../cloudwatch/metric" - - namespace = "ECS/ContainerInsights" - name = "CpuUtilized" - label = "Maximum CPU utilized" - color = local.colors.red - stat = "Maximum" - period = 60 - - dimensions = { - ServiceName = var.name - ClusterName = local.cluster_name - } -} - -module "metric_average_memory_reservation" { - source = "./../../../cloudwatch/metric" - - namespace = "ECS/ContainerInsights" - name = "MemoryReserved" - label = "Average memory reserved" - color = local.colors.grey - stat = "Average" - period = 60 - - dimensions = { - ServiceName = var.name - ClusterName = local.cluster_name - } -} - -module "metric_min_memory_utilization" { - source = "./../../../cloudwatch/metric" - - namespace = "ECS/ContainerInsights" - name = "MemoryUtilized" - label = "Minimum memory utilized" - color = local.colors.light_orange - stat = "Minimum" - period = 60 - - dimensions = { - ServiceName = var.name - ClusterName = local.cluster_name - } -} - -module "metric_average_memory_utilization" { - source = "./../../../cloudwatch/metric" - - namespace = "ECS/ContainerInsights" - name = "MemoryUtilized" - label = "Average memory utilized" - color = local.colors.orange - stat = "Average" - period = 60 - - dimensions = { - ServiceName = var.name - ClusterName = local.cluster_name - } -} - -module "metric_max_memory_utilization" { - source = "./../../../cloudwatch/metric" - - namespace = "ECS/ContainerInsights" - name = "MemoryUtilized" - label = "Maximum memory utilized" - color = local.colors.red - stat = "Maximum" - period = 60 - - dimensions = { - ServiceName = var.name - ClusterName = local.cluster_name - } -} - -# cloudwatch dashboard widgets ------------------------------------------------ - -module "widget_responses" { - source = "./../../../cloudwatch/metric_widget" - - title = "${var.name} service responses" - stacked = true - left_metrics = [ - module.metric_connection_errors, - module.metric_5xx_responses, - module.metric_4xx_responses, - module.metric_3xx_responses, - module.metric_2xx_responses, - ] -} - -module "widget_response_ratios" { - source = "./../../../cloudwatch/metric_widget" - - title = "${var.name} service response ratios" - stacked = true - left_metrics = [ - module.metric_connection_errors_ratio, - module.metric_5xx_responses_ratio, - module.metric_4xx_responses_ratio, - module.metric_3xx_responses_ratio, - module.metric_2xx_responses_ratio, - ] - left_range = [0, 100] - hidden_metrics = [ - module.metric_requests, - module.metric_2xx_responses, - module.metric_3xx_responses, - module.metric_4xx_responses, - module.metric_5xx_responses, - module.metric_connection_errors, - ] -} - -module "widget_response_time" { - source = "./../../../cloudwatch/metric_widget" - - title = "${var.name} service response times" - stacked = true - left_metrics = [ - merge(module.metric_p50_response_time, { color = local.colors.red }), - merge(module.metric_p95_response_time, { color = local.colors.orange }), - merge(module.metric_p99_response_time, { color = local.colors.light_red }), - merge(module.metric_max_response_time, { color = local.colors.light_orange }), - ] -} - -module "widget_tasks" { - source = "./../../../cloudwatch/metric_widget" - - title = "${var.name} service scaling" - left_metrics = [ - module.metric_desired_tasks, - module.metric_pending_tasks, - module.metric_running_tasks, - ] - left_range = [0, null] -} - -module "widget_cpu_utilization" { - source = "./../../../cloudwatch/metric_widget" - - title = "${var.name} service CPU utilization" - left_metrics = [ - module.metric_average_cpu_reservation, - module.metric_min_cpu_utilization, - module.metric_average_cpu_utilization, - module.metric_max_cpu_utilization, - ] - left_range = [0, null] -} - -module "widget_memory_utilization" { - source = "./../../../cloudwatch/metric_widget" - - title = "${var.name} service memory utilization" - left_metrics = [ - module.metric_average_memory_reservation, - module.metric_min_memory_utilization, - module.metric_average_memory_utilization, - module.metric_max_memory_utilization, - ] - left_range = [0, null] -} diff --git a/ecs/services/web/metrics.tf b/ecs/services/web/metrics.tf new file mode 100644 index 00000000..16b381a5 --- /dev/null +++ b/ecs/services/web/metrics.tf @@ -0,0 +1,455 @@ +locals { + metrics = { + requests = module.metric_requests + status_2xx_responses = module.metric_2xx_responses + status_2xx_response_percentage = module.metric_2xx_response_percentage + status_3xx_responses = module.metric_3xx_responses + status_3xx_response_percentage = module.metric_3xx_response_percentage + status_4xx_responses = module.metric_4xx_responses + status_4xx_response_percentage = module.metric_4xx_response_percentage + status_5xx_responses = module.metric_5xx_responses + status_5xx_response_percentage = module.metric_5xx_response_percentage + connection_errors = module.metric_connection_errors + connection_error_percentage = module.metric_connection_error_percentage + average_response_time = module.metric_average_response_time + p50_response_time = module.metric_p50_response_time + p90_response_time = module.metric_p90_response_time + p95_response_time = module.metric_p95_response_time + p99_response_time = module.metric_p99_response_time + max_response_time = module.metric_max_response_time + desired_tasks = module.metric_desired_tasks + pending_tasks = module.metric_pending_tasks + running_tasks = module.metric_running_tasks + average_cpu_reservation = module.metric_average_cpu_reservation + min_cpu_utilization = module.metric_min_cpu_utilization + average_cpu_utilization = module.metric_average_cpu_utilization + max_cpu_utilization = module.metric_max_cpu_utilization + average_memory_reservation = module.metric_average_memory_reservation + min_memory_utilization = module.metric_min_memory_utilization + average_memory_utilization = module.metric_average_memory_utilization + max_memory_utilization = module.metric_max_memory_utilization + } +} + +data "aws_lb_listener" "listener" { + count = var.create ? 1 : 0 + + arn = var.listener_arn +} + +data "aws_lb" "lb" { + count = var.create ? 1 : 0 + + arn = data.aws_lb_listener.listener[0].load_balancer_arn +} + +module "cloudwatch_consts" { + source = "./../../../cloudwatch/consts" +} + +locals { + colors = module.cloudwatch_consts.colors +} + +module "metric_requests" { + source = "./../../../cloudwatch/metric" + + namespace = "AWS/ApplicationELB" + name = "RequestCount" + label = "Responses" + stat = "Sum" + period = 60 + + dimensions = { + LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" + TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" + } +} + +module "metric_2xx_responses" { + source = "./../../../cloudwatch/metric" + + namespace = "AWS/ApplicationELB" + name = "HTTPCode_Target_2XX_Count" + label = "2xx responses" + color = local.colors.green + stat = "Sum" + period = 60 + + dimensions = { + LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" + TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" + } +} + +module "metric_2xx_response_percentage" { + source = "./../../../cloudwatch/metric_expression" + + expression = "IF(${module.metric_requests.id} == 0, 0, FILL(${module.metric_2xx_responses.id}, 0) / ${module.metric_requests.id} * 100)" + label = "2xx response ratio" + color = module.metric_2xx_responses.color +} + +module "metric_3xx_responses" { + source = "./../../../cloudwatch/metric" + + namespace = "AWS/ApplicationELB" + name = "HTTPCode_Target_3XX_Count" + label = "3xx responses" + color = local.colors.blue + stat = "Sum" + period = 60 + + dimensions = { + LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" + TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" + } +} + +module "metric_3xx_response_percentage" { + source = "./../../../cloudwatch/metric_expression" + + expression = "IF(${module.metric_requests.id} == 0, 0, FILL(${module.metric_3xx_responses.id}, 0) / ${module.metric_requests.id} * 100)" + label = "3xx response ratio" + color = module.metric_3xx_responses.color +} + +module "metric_4xx_responses" { + source = "./../../../cloudwatch/metric" + + namespace = "AWS/ApplicationELB" + name = "HTTPCode_Target_4XX_Count" + label = "4xx responses" + color = local.colors.orange + stat = "Sum" + period = 60 + + dimensions = { + LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" + TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" + } +} + +module "metric_4xx_response_percentage" { + source = "./../../../cloudwatch/metric_expression" + + expression = "IF(${module.metric_requests.id} == 0, 0, FILL(${module.metric_4xx_responses.id}, 0) / ${module.metric_requests.id} * 100)" + label = "4xx response ratio" + color = module.metric_4xx_responses.color +} + +module "metric_5xx_responses" { + source = "./../../../cloudwatch/metric" + + namespace = "AWS/ApplicationELB" + name = "HTTPCode_Target_5XX_Count" + label = "5xx responses" + color = local.colors.red + stat = "Sum" + period = 60 + + dimensions = { + LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" + TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" + } +} + +module "metric_5xx_response_percentage" { + source = "./../../../cloudwatch/metric_expression" + + expression = "IF(${module.metric_requests.id} == 0, 0, FILL(${module.metric_5xx_responses.id}, 0) / ${module.metric_requests.id} * 100)" + label = "5xx response ratio" + color = module.metric_5xx_responses.color +} + +module "metric_connection_errors" { + source = "./../../../cloudwatch/metric" + + namespace = "AWS/ApplicationELB" + name = "TargetConnectionErrorCount" + label = "Connection errors" + color = local.colors.purple + stat = "Sum" + period = 60 + + dimensions = { + LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" + TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" + } +} + +module "metric_connection_error_percentage" { + source = "./../../../cloudwatch/metric_expression" + + expression = "IF(${module.metric_requests.id} == 0, 0, FILL(${module.metric_connection_errors.id}, 0) / ${module.metric_requests.id} * 100)" + label = "Connection errors ratio" + color = module.metric_connection_errors.color +} + +module "metric_average_response_time" { + source = "./../../../cloudwatch/metric" + + namespace = "AWS/ApplicationELB" + name = "TargetResponseTime" + label = "Average response time" + color = local.colors.red + stat = "Average" + period = 60 + + dimensions = { + LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" + TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" + } +} + +module "metric_p50_response_time" { + source = "./../../../cloudwatch/metric" + + namespace = "AWS/ApplicationELB" + name = "TargetResponseTime" + label = "p50 response time" + color = local.colors.red + stat = "p50" + period = 60 + + dimensions = { + LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" + TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" + } +} + +module "metric_p90_response_time" { + source = "./../../../cloudwatch/metric" + + namespace = "AWS/ApplicationELB" + name = "TargetResponseTime" + label = "p90 response time" + stat = "p90" + period = 60 + + dimensions = { + LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" + TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" + } +} + +module "metric_p95_response_time" { + source = "./../../../cloudwatch/metric" + + namespace = "AWS/ApplicationELB" + name = "TargetResponseTime" + label = "p95 response time" + stat = "p95" + period = 60 + + dimensions = { + LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" + TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" + } +} + +module "metric_p99_response_time" { + source = "./../../../cloudwatch/metric" + + namespace = "AWS/ApplicationELB" + name = "TargetResponseTime" + label = "p99 response time" + stat = "p99" + period = 60 + + dimensions = { + LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" + TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" + } +} + +module "metric_max_response_time" { + source = "./../../../cloudwatch/metric" + + namespace = "AWS/ApplicationELB" + name = "TargetResponseTime" + label = "Maximum response time" + stat = "Maximum" + period = 60 + + dimensions = { + LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" + TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" + } +} + +module "metric_desired_tasks" { + source = "./../../../cloudwatch/metric" + + namespace = "ECS/ContainerInsights" + name = "DesiredTaskCount" + label = "Desired task count" + color = local.colors.grey + stat = "Average" + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } +} + +module "metric_pending_tasks" { + source = "./../../../cloudwatch/metric" + + namespace = "ECS/ContainerInsights" + name = "PendingTaskCount" + label = "Pending task count" + color = local.colors.orange + stat = "Average" + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } +} + +module "metric_running_tasks" { + source = "./../../../cloudwatch/metric" + + namespace = "ECS/ContainerInsights" + name = "RunningTaskCount" + label = "Running task count" + color = local.colors.green + stat = "Average" + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } +} + +module "metric_average_cpu_reservation" { + source = "./../../../cloudwatch/metric" + + namespace = "ECS/ContainerInsights" + name = "CpuReserved" + label = "Average CPU reserved" + color = local.colors.grey + stat = "Average" + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } +} + +module "metric_min_cpu_utilization" { + source = "./../../../cloudwatch/metric" + + namespace = "ECS/ContainerInsights" + name = "CpuUtilized" + label = "Minimum CPU utilized" + color = local.colors.light_orange + stat = "Minimum" + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } +} + +module "metric_average_cpu_utilization" { + source = "./../../../cloudwatch/metric" + + namespace = "ECS/ContainerInsights" + name = "CpuUtilized" + label = "Average CPU utilized" + color = local.colors.orange + stat = "Average" + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } +} + +module "metric_max_cpu_utilization" { + source = "./../../../cloudwatch/metric" + + namespace = "ECS/ContainerInsights" + name = "CpuUtilized" + label = "Maximum CPU utilized" + color = local.colors.red + stat = "Maximum" + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } +} + +module "metric_average_memory_reservation" { + source = "./../../../cloudwatch/metric" + + namespace = "ECS/ContainerInsights" + name = "MemoryReserved" + label = "Average memory reserved" + color = local.colors.grey + stat = "Average" + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } +} + +module "metric_min_memory_utilization" { + source = "./../../../cloudwatch/metric" + + namespace = "ECS/ContainerInsights" + name = "MemoryUtilized" + label = "Minimum memory utilized" + color = local.colors.light_orange + stat = "Minimum" + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } +} + +module "metric_average_memory_utilization" { + source = "./../../../cloudwatch/metric" + + namespace = "ECS/ContainerInsights" + name = "MemoryUtilized" + label = "Average memory utilized" + color = local.colors.orange + stat = "Average" + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } +} + +module "metric_max_memory_utilization" { + source = "./../../../cloudwatch/metric" + + namespace = "ECS/ContainerInsights" + name = "MemoryUtilized" + label = "Maximum memory utilized" + color = local.colors.red + stat = "Maximum" + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } +} diff --git a/ecs/services/web/outputs.tf b/ecs/services/web/outputs.tf index 098c2ffe..42008b6b 100644 --- a/ecs/services/web/outputs.tf +++ b/ecs/services/web/outputs.tf @@ -13,149 +13,12 @@ output "target_group_arn" { value = var.create ? aws_lb_target_group.service[0].arn : null } -# cloudwatch metrics ---------------------------------------------------------- - -output "metric_requests" { - description = "Cloudwatch metric tracking total number of requests" - value = module.metric_requests -} -output "metric_2xx_responses" { - description = "Cloudwatch metric tracking the number of 2xx responses" - value = module.metric_2xx_responses -} -output "metric_2xx_responses_ratio" { - description = "Cloudwatch metric tracking percentage of 2xx responses" - value = module.metric_2xx_responses_ratio -} -output "metric_3xx_responses" { - description = "Cloudwatch metric tracking the number of 3xx responses" - value = module.metric_3xx_responses -} -output "metric_3xx_responses_ratio" { - description = "Cloudwatch metric tracking percentage of 3xx responses" - value = module.metric_3xx_responses_ratio -} -output "metric_4xx_responses" { - description = "Cloudwatch metric tracking the number of 4xx responses" - value = module.metric_4xx_responses -} -output "metric_4xx_responses_ratio" { - description = "Cloudwatch metric tracking percentage of 4xx responses" - value = module.metric_4xx_responses_ratio -} -output "metric_5xx_responses" { - description = "Cloudwatch metric tracking the number of 5xx responses" - value = module.metric_5xx_responses -} -output "metric_5xx_responses_ratio" { - description = "Cloudwatch metric tracking percentage of 5xx responses" - value = module.metric_5xx_responses_ratio -} -output "metric_connection_errors" { - description = "Cloudwatch metric tracking the number of connection errors from the load balancer" - value = module.metric_connection_errors -} -output "metric_connection_errors_ratio" { - description = "Cloudwatch metric tracking percentage of connection errors from the load balancer" - value = module.metric_connection_errors_ratio -} -output "metric_average_response_time" { - description = "Cloudwatch metric tracking average response time" - value = module.metric_average_response_time -} -output "metric_p50_response_time" { - description = "Cloudwatch metric tracking median response time" - value = module.metric_p50_response_time -} -output "metric_p90_response_time" { - description = "Cloudwatch metric tracking 90th percentile response time" - value = module.metric_p90_response_time -} -output "metric_p95_response_time" { - description = "Cloudwatch metric tracking 95th percentile response time" - value = module.metric_p95_response_time -} -output "metric_p99_response_time" { - description = "Cloudwatch metric tracking 99th percentile response time" - value = module.metric_p99_response_time -} -output "metric_max_response_time" { - description = "Cloudwatch metric tracking maximum response time" - value = module.metric_max_response_time -} -output "metric_desired_tasks" { - description = "Cloudwatch metric tracking desired tasks count" - value = module.metric_desired_tasks -} -output "metric_pending_tasks" { - description = "Cloudwatch metric tracking pending tasks count" - value = module.metric_pending_tasks -} -output "metric_running_tasks" { - description = "Cloudwatch metric tracking running tasks count" - value = module.metric_running_tasks -} -output "metric_average_cpu_reservation" { - description = "Cloudwatch metric tracking average CPU reservation" - value = module.metric_average_cpu_reservation -} -output "metric_min_cpu_utilization" { - description = "Cloudwatch metric tracking minimum CPU utilization" - value = module.metric_min_cpu_utilization -} -output "metric_average_cpu_utilization" { - description = "Cloudwatch metric tracking average CPU utilization" - value = module.metric_average_cpu_utilization -} -output "metric_max_cpu_utilization" { - description = "Cloudwatch metric tracking maximum CPU utilization" - value = module.metric_max_cpu_utilization -} -output "metric_average_memory_reservation" { - description = "Cloudwatch metric tracking average memory reservation" - value = module.metric_average_memory_reservation -} -output "metric_min_memory_utilization" { - description = "Cloudwatch metric tracking minimum memory utilization" - value = module.metric_min_memory_utilization -} -output "metric_average_memory_utilization" { - description = "Cloudwatch metric tracking average memory utilization" - value = module.metric_average_memory_utilization -} -output "metric_max_memory_utilization" { - description = "Cloudwatch metric tracking maximum memory utilization" - value = module.metric_max_memory_utilization -} - -# cloudwatch dashboard widgets ------------------------------------------------ - -output "widget_responses" { - description = "Cloudwatch dashboard widget that shows a breakdown of response status codes" - value = module.widget_responses -} - -output "widget_response_ratios" { - description = "Cloudwatch dashboard widget that shows a breakdown of response status code percentages" - value = module.widget_response_ratios -} - -output "widget_response_time" { - description = "Cloudwatch dashboard widget that shows a breakdown of response time percentiles" - value = module.widget_response_time -} - -output "widget_scaling" { - description = "Cloudwatch dashboard widget that shows scaling state" - value = module.widget_tasks -} - -output "widget_cpu_utilization" { - description = "Cloudwatch dashboard widget that shows CPU utilization" - value = module.widget_cpu_utilization +output "metrics" { + description = "Cloudwatch metrics, see [metrics.tf](./metrics.tf)" + value = local.metrics } -output "widget_memory_utilization" { - description = "Cloudwatch dashboard widget that shows memory utilization" - value = module.widget_memory_utilization +output "widgets" { + description = "Cloudwatch dashboard widgets, see [widgets.tf](./widgets.tf)" + value = local.widgets } diff --git a/ecs/services/web/widgets.tf b/ecs/services/web/widgets.tf new file mode 100644 index 00000000..11a6018f --- /dev/null +++ b/ecs/services/web/widgets.tf @@ -0,0 +1,98 @@ +locals { + widgets = { + responses = module.widget_responses + response_percentages = module.widget_response_percentages + response_time = module.widget_response_time + scaling = module.widget_scaling + cpu_utilization = module.widget_cpu_utilization + memory_utilization = module.widget_memory_utilization + } +} + +module "widget_responses" { + source = "./../../../cloudwatch/metric_widget" + + title = "${var.name} service responses" + stacked = true + left_metrics = [ + local.metrics.connection_errors, + local.metrics.status_5xx_responses, + local.metrics.status_4xx_responses, + local.metrics.status_3xx_responses, + local.metrics.status_2xx_responses, + ] +} + +module "widget_response_percentages" { + source = "./../../../cloudwatch/metric_widget" + + title = "${var.name} service response percentages" + stacked = true + left_metrics = [ + local.metrics.connection_error_percentage, + local.metrics.status_5xx_response_percentage, + local.metrics.status_4xx_response_percentage, + local.metrics.status_3xx_response_percentage, + local.metrics.status_2xx_response_percentage, + ] + left_range = [0, 100] + hidden_metrics = [ + local.metrics.requests, + local.metrics.status_2xx_responses, + local.metrics.status_3xx_responses, + local.metrics.status_4xx_responses, + local.metrics.status_5xx_responses, + local.metrics.connection_errors, + ] +} + +module "widget_response_time" { + source = "./../../../cloudwatch/metric_widget" + + title = "${var.name} service response times" + stacked = true + left_metrics = [ + merge(local.metrics.p50_response_time, { color = local.colors.red }), + merge(local.metrics.p95_response_time, { color = local.colors.orange }), + merge(local.metrics.p99_response_time, { color = local.colors.light_red }), + merge(local.metrics.max_response_time, { color = local.colors.light_orange }), + ] +} + +module "widget_scaling" { + source = "./../../../cloudwatch/metric_widget" + + title = "${var.name} service scaling" + left_metrics = [ + local.metrics.desired_tasks, + local.metrics.pending_tasks, + local.metrics.running_tasks, + ] + left_range = [0, null] +} + +module "widget_cpu_utilization" { + source = "./../../../cloudwatch/metric_widget" + + title = "${var.name} service CPU utilization" + left_metrics = [ + local.metrics.average_cpu_reservation, + local.metrics.min_cpu_utilization, + local.metrics.average_cpu_utilization, + local.metrics.max_cpu_utilization, + ] + left_range = [0, null] +} + +module "widget_memory_utilization" { + source = "./../../../cloudwatch/metric_widget" + + title = "${var.name} service memory utilization" + left_metrics = [ + local.metrics.average_memory_reservation, + local.metrics.min_memory_utilization, + local.metrics.average_memory_utilization, + local.metrics.max_memory_utilization, + ] + left_range = [0, null] +} From 1e9a3c6bac5ce6a0dc3af495ea7d079a12076c8b Mon Sep 17 00:00:00 2001 From: Marek Skrajnowski Date: Mon, 27 Apr 2020 20:09:57 +0200 Subject: [PATCH 14/17] feat(ecs/services/web): added healthy_tasks metric and added it to scaling widget --- ecs/services/web/metrics.tf | 19 ++++++++++++++++++- ecs/services/web/widgets.tf | 1 + 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/ecs/services/web/metrics.tf b/ecs/services/web/metrics.tf index 16b381a5..541d1b82 100644 --- a/ecs/services/web/metrics.tf +++ b/ecs/services/web/metrics.tf @@ -20,6 +20,7 @@ locals { desired_tasks = module.metric_desired_tasks pending_tasks = module.metric_pending_tasks running_tasks = module.metric_running_tasks + healthy_tasks = module.metric_healthy_tasks average_cpu_reservation = module.metric_average_cpu_reservation min_cpu_utilization = module.metric_min_cpu_utilization average_cpu_utilization = module.metric_average_cpu_utilization @@ -316,7 +317,7 @@ module "metric_running_tasks" { namespace = "ECS/ContainerInsights" name = "RunningTaskCount" label = "Running task count" - color = local.colors.green + color = local.colors.light_green stat = "Average" period = 60 @@ -326,6 +327,22 @@ module "metric_running_tasks" { } } +module "metric_healthy_tasks" { + source = "./../../../cloudwatch/metric" + + namespace = "AWS/ApplicationELB" + name = "HealthyHostCount" + label = "Healthy task count" + color = local.colors.green + stat = "Average" + period = 60 + + dimensions = { + LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" + TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" + } +} + module "metric_average_cpu_reservation" { source = "./../../../cloudwatch/metric" diff --git a/ecs/services/web/widgets.tf b/ecs/services/web/widgets.tf index 11a6018f..183088cc 100644 --- a/ecs/services/web/widgets.tf +++ b/ecs/services/web/widgets.tf @@ -67,6 +67,7 @@ module "widget_scaling" { local.metrics.desired_tasks, local.metrics.pending_tasks, local.metrics.running_tasks, + local.metrics.healthy_tasks, ] left_range = [0, null] } From b7a490d4292e28a539390f5bdb28839969682fc4 Mon Sep 17 00:00:00 2001 From: Marek Skrajnowski Date: Mon, 27 Apr 2020 20:17:39 +0200 Subject: [PATCH 15/17] feat(ecs/services/worker): added cloudwatch metrics and widgets --- ecs/example/main.tf | 3 + ecs/services/worker/README.md | 8 ++ ecs/services/worker/main.tf | 10 ++ ecs/services/worker/metrics.tf | 199 +++++++++++++++++++++++++++++++++ ecs/services/worker/outputs.tf | 9 ++ ecs/services/worker/widgets.tf | 45 ++++++++ 6 files changed, 274 insertions(+) create mode 100644 ecs/services/worker/metrics.tf create mode 100644 ecs/services/worker/widgets.tf diff --git a/ecs/example/main.tf b/ecs/example/main.tf index 11e22805..a8213283 100644 --- a/ecs/example/main.tf +++ b/ecs/example/main.tf @@ -109,6 +109,9 @@ module "dashboard" { module.web.widgets.scaling, module.web.widgets.cpu_utilization, module.web.widgets.memory_utilization, + module.worker.widgets.scaling, + module.worker.widgets.cpu_utilization, + module.worker.widgets.memory_utilization, ] } diff --git a/ecs/services/worker/README.md b/ecs/services/worker/README.md index 188b66f4..d4413e7a 100644 --- a/ecs/services/worker/README.md +++ b/ecs/services/worker/README.md @@ -52,3 +52,11 @@ Creates an ECS service for background workers * `id` Service id + +* `metrics` + + Cloudwatch metrics, see [metrics.tf](./metrics.tf) + +* `widgets` + + Cloudwatch dashboard widgets, see [widgets.tf](./widgets.tf) diff --git a/ecs/services/worker/main.tf b/ecs/services/worker/main.tf index 0a349946..0bce2830 100644 --- a/ecs/services/worker/main.tf +++ b/ecs/services/worker/main.tf @@ -1,3 +1,13 @@ +locals { + cluster_name = var.create ? substr(data.aws_arn.cluster[0].resource, length("cluster/"), -1) : "" +} + +data "aws_arn" "cluster" { + count = var.create ? 1 : 0 + + arn = var.cluster_arn +} + resource "aws_ecs_service" "service" { count = var.create ? 1 : 0 diff --git a/ecs/services/worker/metrics.tf b/ecs/services/worker/metrics.tf new file mode 100644 index 00000000..d422b1d8 --- /dev/null +++ b/ecs/services/worker/metrics.tf @@ -0,0 +1,199 @@ +locals { + metrics = { + desired_tasks = module.metric_desired_tasks + pending_tasks = module.metric_pending_tasks + running_tasks = module.metric_running_tasks + average_cpu_reservation = module.metric_average_cpu_reservation + min_cpu_utilization = module.metric_min_cpu_utilization + average_cpu_utilization = module.metric_average_cpu_utilization + max_cpu_utilization = module.metric_max_cpu_utilization + average_memory_reservation = module.metric_average_memory_reservation + min_memory_utilization = module.metric_min_memory_utilization + average_memory_utilization = module.metric_average_memory_utilization + max_memory_utilization = module.metric_max_memory_utilization + } +} + +module "cloudwatch_consts" { + source = "./../../../cloudwatch/consts" +} + +locals { + colors = module.cloudwatch_consts.colors +} + +module "metric_desired_tasks" { + source = "./../../../cloudwatch/metric" + + namespace = "ECS/ContainerInsights" + name = "DesiredTaskCount" + label = "Desired task count" + color = local.colors.grey + stat = "Average" + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } +} + +module "metric_pending_tasks" { + source = "./../../../cloudwatch/metric" + + namespace = "ECS/ContainerInsights" + name = "PendingTaskCount" + label = "Pending task count" + color = local.colors.orange + stat = "Average" + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } +} + +module "metric_running_tasks" { + source = "./../../../cloudwatch/metric" + + namespace = "ECS/ContainerInsights" + name = "RunningTaskCount" + label = "Running task count" + color = local.colors.green + stat = "Average" + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } +} + +module "metric_average_cpu_reservation" { + source = "./../../../cloudwatch/metric" + + namespace = "ECS/ContainerInsights" + name = "CpuReserved" + label = "Average CPU reserved" + color = local.colors.grey + stat = "Average" + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } +} + +module "metric_min_cpu_utilization" { + source = "./../../../cloudwatch/metric" + + namespace = "ECS/ContainerInsights" + name = "CpuUtilized" + label = "Minimum CPU utilized" + color = local.colors.light_orange + stat = "Minimum" + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } +} + +module "metric_average_cpu_utilization" { + source = "./../../../cloudwatch/metric" + + namespace = "ECS/ContainerInsights" + name = "CpuUtilized" + label = "Average CPU utilized" + color = local.colors.orange + stat = "Average" + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } +} + +module "metric_max_cpu_utilization" { + source = "./../../../cloudwatch/metric" + + namespace = "ECS/ContainerInsights" + name = "CpuUtilized" + label = "Maximum CPU utilized" + color = local.colors.red + stat = "Maximum" + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } +} + +module "metric_average_memory_reservation" { + source = "./../../../cloudwatch/metric" + + namespace = "ECS/ContainerInsights" + name = "MemoryReserved" + label = "Average memory reserved" + color = local.colors.grey + stat = "Average" + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } +} + +module "metric_min_memory_utilization" { + source = "./../../../cloudwatch/metric" + + namespace = "ECS/ContainerInsights" + name = "MemoryUtilized" + label = "Minimum memory utilized" + color = local.colors.light_orange + stat = "Minimum" + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } +} + +module "metric_average_memory_utilization" { + source = "./../../../cloudwatch/metric" + + namespace = "ECS/ContainerInsights" + name = "MemoryUtilized" + label = "Average memory utilized" + color = local.colors.orange + stat = "Average" + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } +} + +module "metric_max_memory_utilization" { + source = "./../../../cloudwatch/metric" + + namespace = "ECS/ContainerInsights" + name = "MemoryUtilized" + label = "Maximum memory utilized" + color = local.colors.red + stat = "Maximum" + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } +} diff --git a/ecs/services/worker/outputs.tf b/ecs/services/worker/outputs.tf index 4cd50a6f..bd3b7656 100644 --- a/ecs/services/worker/outputs.tf +++ b/ecs/services/worker/outputs.tf @@ -3,3 +3,12 @@ output "id" { value = var.create ? aws_ecs_service.service[0].id : null } +output "metrics" { + description = "Cloudwatch metrics, see [metrics.tf](./metrics.tf)" + value = local.metrics +} + +output "widgets" { + description = "Cloudwatch dashboard widgets, see [widgets.tf](./widgets.tf)" + value = local.widgets +} diff --git a/ecs/services/worker/widgets.tf b/ecs/services/worker/widgets.tf new file mode 100644 index 00000000..3ac84172 --- /dev/null +++ b/ecs/services/worker/widgets.tf @@ -0,0 +1,45 @@ +locals { + widgets = { + scaling = module.widget_scaling + cpu_utilization = module.widget_cpu_utilization + memory_utilization = module.widget_memory_utilization + } +} + +module "widget_scaling" { + source = "./../../../cloudwatch/metric_widget" + + title = "${var.name} service scaling" + left_metrics = [ + local.metrics.desired_tasks, + local.metrics.pending_tasks, + local.metrics.running_tasks, + ] + left_range = [0, null] +} + +module "widget_cpu_utilization" { + source = "./../../../cloudwatch/metric_widget" + + title = "${var.name} service CPU utilization" + left_metrics = [ + local.metrics.average_cpu_reservation, + local.metrics.min_cpu_utilization, + local.metrics.average_cpu_utilization, + local.metrics.max_cpu_utilization, + ] + left_range = [0, null] +} + +module "widget_memory_utilization" { + source = "./../../../cloudwatch/metric_widget" + + title = "${var.name} service memory utilization" + left_metrics = [ + local.metrics.average_memory_reservation, + local.metrics.min_memory_utilization, + local.metrics.average_memory_utilization, + local.metrics.max_memory_utilization, + ] + left_range = [0, null] +} From 227a7295f77b51d56447a893738d9abbbd5fceb2 Mon Sep 17 00:00:00 2001 From: Marek Skrajnowski Date: Wed, 29 Apr 2020 14:39:05 +0200 Subject: [PATCH 16/17] refactor(ecs/services/worker): DRYing out the code with cloudwatch/metric/many module --- ecs/services/worker/metrics.tf | 208 +++++++++++---------------------- 1 file changed, 67 insertions(+), 141 deletions(-) diff --git a/ecs/services/worker/metrics.tf b/ecs/services/worker/metrics.tf index d422b1d8..d566868d 100644 --- a/ecs/services/worker/metrics.tf +++ b/ecs/services/worker/metrics.tf @@ -1,16 +1,16 @@ locals { metrics = { - desired_tasks = module.metric_desired_tasks - pending_tasks = module.metric_pending_tasks - running_tasks = module.metric_running_tasks + desired_tasks = module.metrics_tasks.out_map.desired + pending_tasks = module.metrics_tasks.out_map.pending + running_tasks = module.metrics_tasks.out_map.running average_cpu_reservation = module.metric_average_cpu_reservation - min_cpu_utilization = module.metric_min_cpu_utilization - average_cpu_utilization = module.metric_average_cpu_utilization - max_cpu_utilization = module.metric_max_cpu_utilization + min_cpu_utilization = module.metrics_cpu_utilization.out_map.min + average_cpu_utilization = module.metrics_cpu_utilization.out_map.average + max_cpu_utilization = module.metrics_cpu_utilization.out_map.max average_memory_reservation = module.metric_average_memory_reservation - min_memory_utilization = module.metric_min_memory_utilization - average_memory_utilization = module.metric_average_memory_utilization - max_memory_utilization = module.metric_max_memory_utilization + min_memory_utilization = module.metrics_memory_utilization.out_map.min + average_memory_utilization = module.metrics_memory_utilization.out_map.average + max_memory_utilization = module.metrics_memory_utilization.out_map.max } } @@ -22,52 +22,30 @@ locals { colors = module.cloudwatch_consts.colors } -module "metric_desired_tasks" { - source = "./../../../cloudwatch/metric" - - namespace = "ECS/ContainerInsights" - name = "DesiredTaskCount" - label = "Desired task count" - color = local.colors.grey - stat = "Average" - period = 60 - - dimensions = { - ServiceName = var.name - ClusterName = local.cluster_name - } -} - -module "metric_pending_tasks" { - source = "./../../../cloudwatch/metric" - - namespace = "ECS/ContainerInsights" - name = "PendingTaskCount" - label = "Pending task count" - color = local.colors.orange - stat = "Average" - period = 60 - - dimensions = { - ServiceName = var.name - ClusterName = local.cluster_name +locals { + metrics_tasks_variants = { + desired = { state = "Desired", color = local.colors.grey } + pending = { state = "Pending", color = local.colors.orange } + running = { state = "Running", color = local.colors.green } } } -module "metric_running_tasks" { - source = "./../../../cloudwatch/metric" - - namespace = "ECS/ContainerInsights" - name = "RunningTaskCount" - label = "Running task count" - color = local.colors.green - stat = "Average" - period = 60 - - dimensions = { - ServiceName = var.name - ClusterName = local.cluster_name - } +module "metrics_tasks" { + source = "./../../../cloudwatch/metric/many" + + vars_map = { for k, variant in local.metrics_tasks_variants : k => { + namespace = "ECS/ContainerInsights" + name = "${variant.state}TaskCount" + label = "${variant.state} task count" + color = variant.color + stat = "Average" + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } + } } } module "metric_average_cpu_reservation" { @@ -86,52 +64,30 @@ module "metric_average_cpu_reservation" { } } -module "metric_min_cpu_utilization" { - source = "./../../../cloudwatch/metric" - - namespace = "ECS/ContainerInsights" - name = "CpuUtilized" - label = "Minimum CPU utilized" - color = local.colors.light_orange - stat = "Minimum" - period = 60 - - dimensions = { - ServiceName = var.name - ClusterName = local.cluster_name - } -} - -module "metric_average_cpu_utilization" { - source = "./../../../cloudwatch/metric" - - namespace = "ECS/ContainerInsights" - name = "CpuUtilized" - label = "Average CPU utilized" - color = local.colors.orange - stat = "Average" - period = 60 - - dimensions = { - ServiceName = var.name - ClusterName = local.cluster_name +locals { + metrics_utilization_variants = { + min = { stat = "Minimum", color = local.colors.light_orange } + average = { stat = "Average", color = local.colors.orange } + max = { stat = "Maximum", color = local.colors.red } } } -module "metric_max_cpu_utilization" { - source = "./../../../cloudwatch/metric" - - namespace = "ECS/ContainerInsights" - name = "CpuUtilized" - label = "Maximum CPU utilized" - color = local.colors.red - stat = "Maximum" - period = 60 - - dimensions = { - ServiceName = var.name - ClusterName = local.cluster_name - } +module "metrics_cpu_utilization" { + source = "./../../../cloudwatch/metric/many" + + vars_map = { for k, variant in local.metrics_utilization_variants : k => { + namespace = "ECS/ContainerInsights" + name = "CpuUtilized" + label = "${variant.stat} CPU utilized" + color = variant.color + stat = variant.stat + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } + } } } module "metric_average_memory_reservation" { @@ -150,50 +106,20 @@ module "metric_average_memory_reservation" { } } -module "metric_min_memory_utilization" { - source = "./../../../cloudwatch/metric" - - namespace = "ECS/ContainerInsights" - name = "MemoryUtilized" - label = "Minimum memory utilized" - color = local.colors.light_orange - stat = "Minimum" - period = 60 - - dimensions = { - ServiceName = var.name - ClusterName = local.cluster_name - } -} - -module "metric_average_memory_utilization" { - source = "./../../../cloudwatch/metric" - - namespace = "ECS/ContainerInsights" - name = "MemoryUtilized" - label = "Average memory utilized" - color = local.colors.orange - stat = "Average" - period = 60 - - dimensions = { - ServiceName = var.name - ClusterName = local.cluster_name - } -} - -module "metric_max_memory_utilization" { - source = "./../../../cloudwatch/metric" - - namespace = "ECS/ContainerInsights" - name = "MemoryUtilized" - label = "Maximum memory utilized" - color = local.colors.red - stat = "Maximum" - period = 60 - - dimensions = { - ServiceName = var.name - ClusterName = local.cluster_name - } +module "metrics_memory_utilization" { + source = "./../../../cloudwatch/metric/many" + + vars_map = { for k, variant in local.metrics_utilization_variants : k => { + namespace = "ECS/ContainerInsights" + name = "MemoryUtilized" + label = "${variant.stat} memory utilized" + color = variant.color + stat = variant.stat + period = 60 + + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } + } } } From 4cd5c86cc3ba49d21840db5a43569babc380d2c1 Mon Sep 17 00:00:00 2001 From: Marek Skrajnowski Date: Wed, 29 Apr 2020 15:15:33 +0200 Subject: [PATCH 17/17] refactor(ecs/services/web): DRYing out code with cloudwatch/metric/many module --- ecs/services/web/metrics.tf | 446 ++++++++++-------------------------- ecs/services/web/widgets.tf | 8 +- 2 files changed, 129 insertions(+), 325 deletions(-) diff --git a/ecs/services/web/metrics.tf b/ecs/services/web/metrics.tf index 541d1b82..d5cebe0a 100644 --- a/ecs/services/web/metrics.tf +++ b/ecs/services/web/metrics.tf @@ -1,34 +1,34 @@ locals { metrics = { requests = module.metric_requests - status_2xx_responses = module.metric_2xx_responses - status_2xx_response_percentage = module.metric_2xx_response_percentage - status_3xx_responses = module.metric_3xx_responses - status_3xx_response_percentage = module.metric_3xx_response_percentage - status_4xx_responses = module.metric_4xx_responses - status_4xx_response_percentage = module.metric_4xx_response_percentage - status_5xx_responses = module.metric_5xx_responses - status_5xx_response_percentage = module.metric_5xx_response_percentage + status_2xx_responses = module.metrics_response_statuses.out_map["2xx"] + status_2xx_response_percentage = module.metrics_response_status_percentages.out_map["2xx"] + status_3xx_responses = module.metrics_response_statuses.out_map["3xx"] + status_3xx_response_percentage = module.metrics_response_status_percentages.out_map["3xx"] + status_4xx_responses = module.metrics_response_statuses.out_map["4xx"] + status_4xx_response_percentage = module.metrics_response_status_percentages.out_map["4xx"] + status_5xx_responses = module.metrics_response_statuses.out_map["5xx"] + status_5xx_response_percentage = module.metrics_response_status_percentages.out_map["5xx"] connection_errors = module.metric_connection_errors connection_error_percentage = module.metric_connection_error_percentage - average_response_time = module.metric_average_response_time - p50_response_time = module.metric_p50_response_time - p90_response_time = module.metric_p90_response_time - p95_response_time = module.metric_p95_response_time - p99_response_time = module.metric_p99_response_time - max_response_time = module.metric_max_response_time - desired_tasks = module.metric_desired_tasks - pending_tasks = module.metric_pending_tasks - running_tasks = module.metric_running_tasks + average_response_time = module.metrics_response_time.out_map.average + p50_response_time = module.metrics_response_time.out_map.p50 + p90_response_time = module.metrics_response_time.out_map.p90 + p95_response_time = module.metrics_response_time.out_map.p95 + p99_response_time = module.metrics_response_time.out_map.p99 + max_response_time = module.metrics_response_time.out_map.max + desired_tasks = module.metrics_tasks.out_map.desired + pending_tasks = module.metrics_tasks.out_map.pending + running_tasks = module.metrics_tasks.out_map.running healthy_tasks = module.metric_healthy_tasks average_cpu_reservation = module.metric_average_cpu_reservation - min_cpu_utilization = module.metric_min_cpu_utilization - average_cpu_utilization = module.metric_average_cpu_utilization - max_cpu_utilization = module.metric_max_cpu_utilization + min_cpu_utilization = module.metrics_cpu_utilization.out_map.min + average_cpu_utilization = module.metrics_cpu_utilization.out_map.average + max_cpu_utilization = module.metrics_cpu_utilization.out_map.max average_memory_reservation = module.metric_average_memory_reservation - min_memory_utilization = module.metric_min_memory_utilization - average_memory_utilization = module.metric_average_memory_utilization - max_memory_utilization = module.metric_max_memory_utilization + min_memory_utilization = module.metrics_memory_utilization.out_map.min + average_memory_utilization = module.metrics_memory_utilization.out_map.average + max_memory_utilization = module.metrics_memory_utilization.out_map.max } } @@ -67,100 +67,41 @@ module "metric_requests" { } } -module "metric_2xx_responses" { - source = "./../../../cloudwatch/metric" - - namespace = "AWS/ApplicationELB" - name = "HTTPCode_Target_2XX_Count" - label = "2xx responses" - color = local.colors.green - stat = "Sum" - period = 60 - - dimensions = { - LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" - TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" - } -} - -module "metric_2xx_response_percentage" { - source = "./../../../cloudwatch/metric_expression" - - expression = "IF(${module.metric_requests.id} == 0, 0, FILL(${module.metric_2xx_responses.id}, 0) / ${module.metric_requests.id} * 100)" - label = "2xx response ratio" - color = module.metric_2xx_responses.color -} - -module "metric_3xx_responses" { - source = "./../../../cloudwatch/metric" - - namespace = "AWS/ApplicationELB" - name = "HTTPCode_Target_3XX_Count" - label = "3xx responses" - color = local.colors.blue - stat = "Sum" - period = 60 - - dimensions = { - LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" - TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" +locals { + metrics_response_statuses = { + "2xx" = { color = local.colors.green } + "3xx" = { color = local.colors.blue } + "4xx" = { color = local.colors.orange } + "5xx" = { color = local.colors.red } } } -module "metric_3xx_response_percentage" { - source = "./../../../cloudwatch/metric_expression" - - expression = "IF(${module.metric_requests.id} == 0, 0, FILL(${module.metric_3xx_responses.id}, 0) / ${module.metric_requests.id} * 100)" - label = "3xx response ratio" - color = module.metric_3xx_responses.color -} - -module "metric_4xx_responses" { - source = "./../../../cloudwatch/metric" - - namespace = "AWS/ApplicationELB" - name = "HTTPCode_Target_4XX_Count" - label = "4xx responses" - color = local.colors.orange - stat = "Sum" - period = 60 - - dimensions = { - LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" - TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" - } -} +module "metrics_response_statuses" { + source = "./../../../cloudwatch/metric/many" -module "metric_4xx_response_percentage" { - source = "./../../../cloudwatch/metric_expression" + vars_map = { for status, variant in local.metrics_response_statuses : status => { + namespace = "AWS/ApplicationELB" + name = "HTTPCode_Target_${upper(status)}_Count" + label = "${status} responses" + color = variant.color + stat = "Sum" + period = 60 - expression = "IF(${module.metric_requests.id} == 0, 0, FILL(${module.metric_4xx_responses.id}, 0) / ${module.metric_requests.id} * 100)" - label = "4xx response ratio" - color = module.metric_4xx_responses.color + dimensions = { + LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" + TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" + } + } } } -module "metric_5xx_responses" { - source = "./../../../cloudwatch/metric" +module "metrics_response_status_percentages" { + source = "./../../../cloudwatch/metric_expression/many" - namespace = "AWS/ApplicationELB" - name = "HTTPCode_Target_5XX_Count" - label = "5xx responses" - color = local.colors.red - stat = "Sum" - period = 60 - - dimensions = { - LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" - TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" - } -} - -module "metric_5xx_response_percentage" { - source = "./../../../cloudwatch/metric_expression" - - expression = "IF(${module.metric_requests.id} == 0, 0, FILL(${module.metric_5xx_responses.id}, 0) / ${module.metric_requests.id} * 100)" - label = "5xx response ratio" - color = module.metric_5xx_responses.color + vars_map = { for status, variant in local.metrics_response_statuses : status => { + expression = "IF(${module.metric_requests.id} == 0, 0, FILL(${module.metrics_response_statuses.out_map[status].id}, 0) / ${module.metric_requests.id} * 100)" + label = "${status} responses" + color = variant.color + } } } module "metric_connection_errors" { @@ -183,148 +124,63 @@ module "metric_connection_error_percentage" { source = "./../../../cloudwatch/metric_expression" expression = "IF(${module.metric_requests.id} == 0, 0, FILL(${module.metric_connection_errors.id}, 0) / ${module.metric_requests.id} * 100)" - label = "Connection errors ratio" + label = "Connection errors" color = module.metric_connection_errors.color } -module "metric_average_response_time" { - source = "./../../../cloudwatch/metric" - - namespace = "AWS/ApplicationELB" - name = "TargetResponseTime" - label = "Average response time" - color = local.colors.red - stat = "Average" - period = 60 - - dimensions = { - LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" - TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" - } -} - -module "metric_p50_response_time" { - source = "./../../../cloudwatch/metric" - - namespace = "AWS/ApplicationELB" - name = "TargetResponseTime" - label = "p50 response time" - color = local.colors.red - stat = "p50" - period = 60 - - dimensions = { - LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" - TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" - } -} - -module "metric_p90_response_time" { - source = "./../../../cloudwatch/metric" - - namespace = "AWS/ApplicationELB" - name = "TargetResponseTime" - label = "p90 response time" - stat = "p90" - period = 60 - - dimensions = { - LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" - TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" - } -} - -module "metric_p95_response_time" { - source = "./../../../cloudwatch/metric" - - namespace = "AWS/ApplicationELB" - name = "TargetResponseTime" - label = "p95 response time" - stat = "p95" - period = 60 - - dimensions = { - LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" - TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" - } -} - -module "metric_p99_response_time" { - source = "./../../../cloudwatch/metric" - - namespace = "AWS/ApplicationELB" - name = "TargetResponseTime" - label = "p99 response time" - stat = "p99" - period = 60 - - dimensions = { - LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" - TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" - } -} - -module "metric_max_response_time" { - source = "./../../../cloudwatch/metric" - - namespace = "AWS/ApplicationELB" - name = "TargetResponseTime" - label = "Maximum response time" - stat = "Maximum" - period = 60 - - dimensions = { - LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" - TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" +locals { + metrics_response_time_variants = { + average = { stat = "Average", color = local.colors.red } + p50 = { stat = "p50", color = local.colors.red } + p90 = { stat = "p90", color = local.colors.orange } + p95 = { stat = "p95", color = local.colors.orange } + p99 = { stat = "p99", color = local.colors.light_red } + max = { stat = "Maximum", color = local.colors.light_orange } } } -module "metric_desired_tasks" { - source = "./../../../cloudwatch/metric" +module "metrics_response_time" { + source = "./../../../cloudwatch/metric/many" - namespace = "ECS/ContainerInsights" - name = "DesiredTaskCount" - label = "Desired task count" - color = local.colors.grey - stat = "Average" - period = 60 + vars_map = { for k, variant in local.metrics_response_time_variants : k => { + namespace = "AWS/ApplicationELB" + name = "TargetResponseTime" + label = "${variant.stat} response time" + color = variant.color + stat = variant.stat + period = 60 - dimensions = { - ServiceName = var.name - ClusterName = local.cluster_name - } + dimensions = { + LoadBalancer = var.create ? data.aws_lb.lb[0].arn_suffix : "" + TargetGroup = var.create ? aws_lb_target_group.service[0].arn_suffix : "" + } + } } } -module "metric_pending_tasks" { - source = "./../../../cloudwatch/metric" - - namespace = "ECS/ContainerInsights" - name = "PendingTaskCount" - label = "Pending task count" - color = local.colors.orange - stat = "Average" - period = 60 - - dimensions = { - ServiceName = var.name - ClusterName = local.cluster_name +locals { + metrics_tasks_variants = { + desired = { state = "Desired", color = local.colors.grey } + pending = { state = "Pending", color = local.colors.orange } + running = { state = "Running", color = local.colors.light_green } } } -module "metric_running_tasks" { - source = "./../../../cloudwatch/metric" +module "metrics_tasks" { + source = "./../../../cloudwatch/metric/many" - namespace = "ECS/ContainerInsights" - name = "RunningTaskCount" - label = "Running task count" - color = local.colors.light_green - stat = "Average" - period = 60 + vars_map = { for k, variant in local.metrics_tasks_variants : k => { + namespace = "ECS/ContainerInsights" + name = "${variant.state}TaskCount" + label = "${variant.state} task count" + color = variant.color + stat = "Average" + period = 60 - dimensions = { - ServiceName = var.name - ClusterName = local.cluster_name - } + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } + } } } module "metric_healthy_tasks" { @@ -359,52 +215,30 @@ module "metric_average_cpu_reservation" { } } -module "metric_min_cpu_utilization" { - source = "./../../../cloudwatch/metric" - - namespace = "ECS/ContainerInsights" - name = "CpuUtilized" - label = "Minimum CPU utilized" - color = local.colors.light_orange - stat = "Minimum" - period = 60 - - dimensions = { - ServiceName = var.name - ClusterName = local.cluster_name - } -} - -module "metric_average_cpu_utilization" { - source = "./../../../cloudwatch/metric" - - namespace = "ECS/ContainerInsights" - name = "CpuUtilized" - label = "Average CPU utilized" - color = local.colors.orange - stat = "Average" - period = 60 - - dimensions = { - ServiceName = var.name - ClusterName = local.cluster_name +locals { + metrics_utilization_variants = { + min = { stat = "Minimum", color = local.colors.light_orange } + average = { stat = "Average", color = local.colors.orange } + max = { stat = "Maximum", color = local.colors.red } } } -module "metric_max_cpu_utilization" { - source = "./../../../cloudwatch/metric" +module "metrics_cpu_utilization" { + source = "./../../../cloudwatch/metric/many" - namespace = "ECS/ContainerInsights" - name = "CpuUtilized" - label = "Maximum CPU utilized" - color = local.colors.red - stat = "Maximum" - period = 60 + vars_map = { for k, variant in local.metrics_utilization_variants : k => { + namespace = "ECS/ContainerInsights" + name = "CpuUtilized" + label = "${variant.stat} CPU utilized" + color = variant.color + stat = variant.stat + period = 60 - dimensions = { - ServiceName = var.name - ClusterName = local.cluster_name - } + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } + } } } module "metric_average_memory_reservation" { @@ -423,50 +257,20 @@ module "metric_average_memory_reservation" { } } -module "metric_min_memory_utilization" { - source = "./../../../cloudwatch/metric" +module "metrics_memory_utilization" { + source = "./../../../cloudwatch/metric/many" - namespace = "ECS/ContainerInsights" - name = "MemoryUtilized" - label = "Minimum memory utilized" - color = local.colors.light_orange - stat = "Minimum" - period = 60 + vars_map = { for k, variant in local.metrics_utilization_variants : k => { + namespace = "ECS/ContainerInsights" + name = "MemoryUtilized" + label = "${variant.stat} memory utilized" + color = variant.color + stat = variant.stat + period = 60 - dimensions = { - ServiceName = var.name - ClusterName = local.cluster_name - } -} - -module "metric_average_memory_utilization" { - source = "./../../../cloudwatch/metric" - - namespace = "ECS/ContainerInsights" - name = "MemoryUtilized" - label = "Average memory utilized" - color = local.colors.orange - stat = "Average" - period = 60 - - dimensions = { - ServiceName = var.name - ClusterName = local.cluster_name - } -} - -module "metric_max_memory_utilization" { - source = "./../../../cloudwatch/metric" - - namespace = "ECS/ContainerInsights" - name = "MemoryUtilized" - label = "Maximum memory utilized" - color = local.colors.red - stat = "Maximum" - period = 60 - - dimensions = { - ServiceName = var.name - ClusterName = local.cluster_name - } + dimensions = { + ServiceName = var.name + ClusterName = local.cluster_name + } + } } } diff --git a/ecs/services/web/widgets.tf b/ecs/services/web/widgets.tf index 183088cc..2961171d 100644 --- a/ecs/services/web/widgets.tf +++ b/ecs/services/web/widgets.tf @@ -52,10 +52,10 @@ module "widget_response_time" { title = "${var.name} service response times" stacked = true left_metrics = [ - merge(local.metrics.p50_response_time, { color = local.colors.red }), - merge(local.metrics.p95_response_time, { color = local.colors.orange }), - merge(local.metrics.p99_response_time, { color = local.colors.light_red }), - merge(local.metrics.max_response_time, { color = local.colors.light_orange }), + local.metrics.p50_response_time, + local.metrics.p95_response_time, + local.metrics.p99_response_time, + local.metrics.max_response_time, ] }