From c2a2fca82fc1e13ca11953f50d94bd14555cee06 Mon Sep 17 00:00:00 2001 From: Marek Skrajnowski Date: Tue, 28 Apr 2020 17:36:02 +0200 Subject: [PATCH 01/17] feat(ecs/network): metrics and widgets scaffold --- ecs/network/README.md | 8 ++++++++ ecs/network/metrics.tf | 5 +++++ ecs/network/outputs.tf | 10 ++++++++++ ecs/network/widgets.tf | 5 +++++ 4 files changed, 28 insertions(+) create mode 100644 ecs/network/metrics.tf create mode 100644 ecs/network/widgets.tf diff --git a/ecs/network/README.md b/ecs/network/README.md index eda763a7..1023ea9e 100644 --- a/ecs/network/README.md +++ b/ecs/network/README.md @@ -108,6 +108,10 @@ Creates networking resources needed for a standard ECS cluster setup: The canonical hosted zone ID of the Application Load Balancer (to be used in a Route 53 Alias record) +* `metrics` + + Cloudwatch metrics, see [metrics.tf](./metrics.tf) + * `private_blocks` The CIDR blocks of private subnets @@ -135,3 +139,7 @@ Creates networking resources needed for a standard ECS cluster setup: * `vpc_id` The ID of the VPC + +* `widgets` + + Cloudwatch dashboard widgets, see [widgets.tf](./widgets.tf) diff --git a/ecs/network/metrics.tf b/ecs/network/metrics.tf new file mode 100644 index 00000000..c4db1a29 --- /dev/null +++ b/ecs/network/metrics.tf @@ -0,0 +1,5 @@ +locals { + metrics = { + + } +} diff --git a/ecs/network/outputs.tf b/ecs/network/outputs.tf index a768beb5..880f147b 100644 --- a/ecs/network/outputs.tf +++ b/ecs/network/outputs.tf @@ -92,3 +92,13 @@ output "hosts_security_group_arn" { value = var.create ? aws_security_group.hosts[0].arn : null description = "The ARN of the Security Group which should be used by host instances" } + +output "metrics" { + description = "Cloudwatch metrics, see [metrics.tf](./metrics.tf)" + value = local.metrics +} + +output "widgets" { + description = "Cloudwatch dashboard widgets, see [widgets.tf](./widgets.tf)" + value = local.widgets +} diff --git a/ecs/network/widgets.tf b/ecs/network/widgets.tf new file mode 100644 index 00000000..f1ee0bc0 --- /dev/null +++ b/ecs/network/widgets.tf @@ -0,0 +1,5 @@ +locals { + widgets = { + + } +} From 6107ca822f15ee45fec76e87934808a13f69e2d8 Mon Sep 17 00:00:00 2001 From: Marek Skrajnowski Date: Tue, 28 Apr 2020 19:54:32 +0200 Subject: [PATCH 02/17] feat(ecs/network): ALB response metrics and dashboard widgets --- ecs/README.md | 8 + ecs/example/main.tf | 15 ++ ecs/network/README.md | 16 +- ecs/network/metrics.tf | 339 ++++++++++++++++++++++++++++++++++++++++- ecs/network/outputs.tf | 12 +- ecs/network/widgets.tf | 73 ++++++++- ecs/outputs.tf | 10 ++ 7 files changed, 456 insertions(+), 17 deletions(-) diff --git a/ecs/README.md b/ecs/README.md index 0d48c3e4..a5b376f7 100644 --- a/ecs/README.md +++ b/ecs/README.md @@ -129,6 +129,14 @@ Based on [AWS reference architecture](https://github.com/aws-samples/ecs-refarch The ID of the Internet Gateway +* `lb_metrics` + + Load balancer related Cloudwatch metrics, see [network/metrics.tf](./network/metrics.tf) + +* `lb_widgets` + + Load balancer related Cloudwatch dashboard widgets, see [network/widgets.tf](./network/widgets.tf) + * `load_balancer_arn` The ARN of the Application Load Balancer diff --git a/ecs/example/main.tf b/ecs/example/main.tf index c0022183..e0b1ce29 100644 --- a/ecs/example/main.tf +++ b/ecs/example/main.tf @@ -96,6 +96,21 @@ module "web" { healthcheck_path = "/" } +module "dashboard" { + source = "./../../cloudwatch/dashboard" + + name = "terraform-ecs-example" + widgets = [ + module.cluster.lb_widgets.responses, + module.cluster.lb_widgets.response_percentages, + module.cluster.lb_widgets.response_time, + ] +} + output "hosts_id" { value = module.hosts.id } + +output "dashboard_url" { + value = module.dashboard.url +} diff --git a/ecs/network/README.md b/ecs/network/README.md index 1023ea9e..babc407a 100644 --- a/ecs/network/README.md +++ b/ecs/network/README.md @@ -84,6 +84,14 @@ Creates networking resources needed for a standard ECS cluster setup: The ID of the Internet Gateway +* `lb_metrics` + + Load balancer related Cloudwatch metrics, see [metrics.tf](./metrics.tf) + +* `lb_widgets` + + Load balancer related Cloudwatch dashboard widgets, see [widgets.tf](./widgets.tf) + * `load_balancer_arn` The ARN of the Application Load Balancer @@ -108,10 +116,6 @@ Creates networking resources needed for a standard ECS cluster setup: The canonical hosted zone ID of the Application Load Balancer (to be used in a Route 53 Alias record) -* `metrics` - - Cloudwatch metrics, see [metrics.tf](./metrics.tf) - * `private_blocks` The CIDR blocks of private subnets @@ -139,7 +143,3 @@ Creates networking resources needed for a standard ECS cluster setup: * `vpc_id` The ID of the VPC - -* `widgets` - - Cloudwatch dashboard widgets, see [widgets.tf](./widgets.tf) diff --git a/ecs/network/metrics.tf b/ecs/network/metrics.tf index c4db1a29..12596fa0 100644 --- a/ecs/network/metrics.tf +++ b/ecs/network/metrics.tf @@ -1,5 +1,342 @@ locals { - metrics = { + lb_metrics = { + requests = module.metric_requests + lb_responses = module.metric_lb_responses + lb_fixed_responses = module.metric_lb_fixed_responses + lb_fixed_response_percentage = module.metric_lb_fixed_response_percentage + lb_redirects = module.metric_lb_redirects + lb_redirect_percentage = module.metric_lb_redirect_percentage + lb_4xx_responses = module.metric_lb_4xx_responses + lb_4xx_response_percentage = module.metric_lb_4xx_response_percentage + lb_5xx_responses = module.metric_lb_5xx_responses + lb_5xx_response_percentage = module.metric_lb_5xx_response_percentage + lb_tls_negotiation_errors = module.metric_lb_tls_negotiation_errors + lb_tls_negotiation_error_percentage = module.metric_lb_tls_negotiation_error_percentage + target_requests = module.metric_target_requests + target_2xx_responses = module.metric_target_2xx_responses + target_2xx_response_percentage = module.metric_target_2xx_response_percentage + target_3xx_responses = module.metric_target_3xx_responses + target_3xx_response_percentage = module.metric_target_3xx_response_percentage + target_4xx_responses = module.metric_target_4xx_responses + target_4xx_response_percentage = module.metric_target_4xx_response_percentage + target_5xx_responses = module.metric_target_5xx_responses + target_5xx_response_percentage = module.metric_target_5xx_response_percentage + target_connection_errors = module.metric_target_connection_errors + target_connection_error_percentage = module.metric_target_connection_error_percentage + target_average_response_time = module.metric_target_average_response_time + target_p50_response_time = module.metric_target_p50_response_time + target_p90_response_time = module.metric_target_p90_response_time + target_p95_response_time = module.metric_target_p95_response_time + target_p99_response_time = module.metric_target_p99_response_time + target_max_response_time = module.metric_target_max_response_time + } +} + +module "cloudwatch_consts" { + source = "./../../cloudwatch/consts" +} +locals { + colors = module.cloudwatch_consts.colors + lb_namespace = "AWS/ApplicationELB" + lb_dimensions = { + LoadBalancer = var.create ? aws_lb.lb[0].arn_suffix : "" } } + +module "metric_requests" { + source = "./../../cloudwatch/metric_expression" + expression = "${module.metric_target_requests.id} + ${module.metric_lb_responses.id}" + label = "Requests" +} + +module "metric_target_requests" { + source = "./../../cloudwatch/metric" + + namespace = local.lb_namespace + dimensions = local.lb_dimensions + name = "RequestCount" + label = "Target requests" + stat = "Sum" + period = 60 +} + +module "metric_lb_responses" { + source = "./../../cloudwatch/metric_expression" + expression = join(" + ", [ + module.metric_lb_fixed_responses.id, + module.metric_lb_redirects.id, + module.metric_lb_4xx_responses.id, + module.metric_lb_5xx_responses.id, + module.metric_lb_tls_negotiation_errors.id, + ]) + label = "ALB responses" +} + +module "metric_lb_fixed_responses" { + source = "./../../cloudwatch/metric" + + namespace = local.lb_namespace + dimensions = local.lb_dimensions + name = "HTTP_Fixed_Response_Count" + label = "ALB fixed responses" + color = local.colors.light_green + stat = "Sum" + period = 60 +} + +module "metric_lb_fixed_response_percentage" { + source = "./../../cloudwatch/metric_expression" + + expression = "IF(${module.metric_requests.id} == 0, 0, FILL(${module.metric_lb_fixed_responses.id}, 0) / ${module.metric_requests.id} * 100)" + label = "ALB fixed response percentage" + color = module.metric_lb_fixed_responses.color +} + +module "metric_lb_redirects" { + source = "./../../cloudwatch/metric" + + namespace = local.lb_namespace + dimensions = local.lb_dimensions + name = "HTTP_Redirect_Count" + label = "ALB redirects" + color = local.colors.light_blue + stat = "Sum" + period = 60 +} + +module "metric_lb_redirect_percentage" { + source = "./../../cloudwatch/metric_expression" + + expression = "IF(${module.metric_requests.id} == 0, 0, FILL(${module.metric_lb_redirects.id}, 0) / ${module.metric_requests.id} * 100)" + label = "ALB redirect percentage" + color = module.metric_lb_redirects.color +} + +module "metric_lb_4xx_responses" { + source = "./../../cloudwatch/metric" + + namespace = local.lb_namespace + dimensions = local.lb_dimensions + name = "HTTPCode_ELB_2XX_Count" + label = "ALB 4xx responses" + color = local.colors.light_orange + stat = "Sum" + period = 60 +} + +module "metric_lb_4xx_response_percentage" { + source = "./../../cloudwatch/metric_expression" + + expression = "IF(${module.metric_requests.id} == 0, 0, FILL(${module.metric_lb_4xx_responses.id}, 0) / ${module.metric_requests.id} * 100)" + label = "ALB 4xx response percentage" + color = module.metric_lb_4xx_responses.color +} + +module "metric_lb_5xx_responses" { + source = "./../../cloudwatch/metric" + + namespace = local.lb_namespace + dimensions = local.lb_dimensions + name = "HTTPCode_ELB_2XX_Count" + label = "ALB 5xx responses" + color = local.colors.light_red + stat = "Sum" + period = 60 +} + +module "metric_lb_5xx_response_percentage" { + source = "./../../cloudwatch/metric_expression" + + expression = "IF(${module.metric_requests.id} == 0, 0, FILL(${module.metric_lb_5xx_responses.id}, 0) / ${module.metric_requests.id} * 100)" + label = "ALB 5xx response percentage" + color = module.metric_lb_5xx_responses.color +} + +module "metric_lb_tls_negotiation_errors" { + source = "./../../cloudwatch/metric" + + namespace = local.lb_namespace + dimensions = local.lb_dimensions + name = "ClientTLSNegotiationErrorCount" + label = "ALB TLS negotiation errors" + color = local.colors.light_purple + stat = "Sum" + period = 60 +} + +module "metric_lb_tls_negotiation_error_percentage" { + source = "./../../cloudwatch/metric_expression" + + expression = "IF(${module.metric_requests.id} == 0, 0, FILL(${module.metric_lb_tls_negotiation_errors.id}, 0) / ${module.metric_requests.id} * 100)" + label = "ALB tls_negotiation_error percentage" + color = module.metric_lb_tls_negotiation_errors.color +} + +module "metric_target_2xx_responses" { + source = "./../../cloudwatch/metric" + + namespace = local.lb_namespace + dimensions = local.lb_dimensions + name = "HTTPCode_Target_2XX_Count" + label = "Target 2xx responses" + color = local.colors.green + stat = "Sum" + period = 60 +} + +module "metric_target_2xx_response_percentage" { + source = "./../../cloudwatch/metric_expression" + + expression = "IF(${module.metric_requests.id} == 0, 0, FILL(${module.metric_target_2xx_responses.id}, 0) / ${module.metric_requests.id} * 100)" + label = "Target 2xx response percentage" + color = module.metric_target_2xx_responses.color +} + +module "metric_target_3xx_responses" { + source = "./../../cloudwatch/metric" + + namespace = local.lb_namespace + dimensions = local.lb_dimensions + name = "HTTPCode_Target_3XX_Count" + label = "Target 3xx responses" + color = local.colors.blue + stat = "Sum" + period = 60 +} + +module "metric_target_3xx_response_percentage" { + source = "./../../cloudwatch/metric_expression" + + expression = "IF(${module.metric_requests.id} == 0, 0, FILL(${module.metric_target_3xx_responses.id}, 0) / ${module.metric_requests.id} * 100)" + label = "Target 3xx response percentage" + color = module.metric_target_3xx_responses.color +} + +module "metric_target_4xx_responses" { + source = "./../../cloudwatch/metric" + + namespace = local.lb_namespace + dimensions = local.lb_dimensions + name = "HTTPCode_Target_4XX_Count" + label = "Target 4xx responses" + color = local.colors.orange + stat = "Sum" + period = 60 +} + +module "metric_target_4xx_response_percentage" { + source = "./../../cloudwatch/metric_expression" + + expression = "IF(${module.metric_requests.id} == 0, 0, FILL(${module.metric_target_4xx_responses.id}, 0) / ${module.metric_requests.id} * 100)" + label = "Target 4xx response percentage" + color = module.metric_target_4xx_responses.color +} + +module "metric_target_5xx_responses" { + source = "./../../cloudwatch/metric" + + namespace = local.lb_namespace + dimensions = local.lb_dimensions + name = "HTTPCode_Target_5XX_Count" + label = "Target 5xx responses" + color = local.colors.red + stat = "Sum" + period = 60 +} + +module "metric_target_5xx_response_percentage" { + source = "./../../cloudwatch/metric_expression" + + expression = "IF(${module.metric_requests.id} == 0, 0, FILL(${module.metric_target_5xx_responses.id}, 0) / ${module.metric_requests.id} * 100)" + label = "Target 5xx response percentage" + color = module.metric_target_5xx_responses.color +} + +module "metric_target_connection_errors" { + source = "./../../cloudwatch/metric" + + namespace = local.lb_namespace + dimensions = local.lb_dimensions + name = "TargetConnectionErrorCount" + label = "Target connection errors" + color = local.colors.purple + stat = "Sum" + period = 60 +} + +module "metric_target_connection_error_percentage" { + source = "./../../cloudwatch/metric_expression" + + expression = "IF(${module.metric_requests.id} == 0, 0, FILL(${module.metric_target_connection_errors.id}, 0) / ${module.metric_requests.id} * 100)" + label = "Target connection errors percentage" + color = module.metric_target_connection_errors.color +} + +module "metric_target_average_response_time" { + source = "./../../cloudwatch/metric" + + namespace = local.lb_namespace + dimensions = local.lb_dimensions + name = "TargetResponseTime" + label = "Average target response time" + color = local.colors.red + stat = "Average" + period = 60 +} + +module "metric_target_p50_response_time" { + source = "./../../cloudwatch/metric" + + namespace = local.lb_namespace + dimensions = local.lb_dimensions + name = "TargetResponseTime" + label = "p50 target response time" + color = local.colors.red + stat = "p50" + period = 60 +} + +module "metric_target_p90_response_time" { + source = "./../../cloudwatch/metric" + + namespace = local.lb_namespace + dimensions = local.lb_dimensions + name = "TargetResponseTime" + label = "p90 target response time" + stat = "p90" + period = 60 +} + +module "metric_target_p95_response_time" { + source = "./../../cloudwatch/metric" + + namespace = local.lb_namespace + dimensions = local.lb_dimensions + name = "TargetResponseTime" + label = "p95 target response time" + stat = "p95" + period = 60 +} + +module "metric_target_p99_response_time" { + source = "./../../cloudwatch/metric" + + namespace = local.lb_namespace + dimensions = local.lb_dimensions + name = "TargetResponseTime" + label = "p99 target response time" + stat = "p99" + period = 60 +} + +module "metric_target_max_response_time" { + source = "./../../cloudwatch/metric" + + namespace = local.lb_namespace + dimensions = local.lb_dimensions + name = "TargetResponseTime" + label = "Maximum target response time" + stat = "Maximum" + period = 60 +} diff --git a/ecs/network/outputs.tf b/ecs/network/outputs.tf index 880f147b..678f9fec 100644 --- a/ecs/network/outputs.tf +++ b/ecs/network/outputs.tf @@ -93,12 +93,12 @@ output "hosts_security_group_arn" { description = "The ARN of the Security Group which should be used by host instances" } -output "metrics" { - description = "Cloudwatch metrics, see [metrics.tf](./metrics.tf)" - value = local.metrics +output "lb_metrics" { + description = "Load balancer related Cloudwatch metrics, see [metrics.tf](./metrics.tf)" + value = local.lb_metrics } -output "widgets" { - description = "Cloudwatch dashboard widgets, see [widgets.tf](./widgets.tf)" - value = local.widgets +output "lb_widgets" { + description = "Load balancer related Cloudwatch dashboard widgets, see [widgets.tf](./widgets.tf)" + value = local.lb_widgets } diff --git a/ecs/network/widgets.tf b/ecs/network/widgets.tf index f1ee0bc0..df2aef40 100644 --- a/ecs/network/widgets.tf +++ b/ecs/network/widgets.tf @@ -1,5 +1,74 @@ locals { - widgets = { - + lb_widgets = { + responses = module.widget_responses + response_percentages = module.widget_response_percentages + response_time = module.widget_response_time } } + +module "widget_responses" { + source = "./../../cloudwatch/metric_widget" + + title = "ALB responses" + stacked = true + left_metrics = [ + local.lb_metrics.lb_tls_negotiation_errors, + local.lb_metrics.target_connection_errors, + local.lb_metrics.lb_5xx_responses, + local.lb_metrics.target_5xx_responses, + local.lb_metrics.lb_4xx_responses, + local.lb_metrics.target_4xx_responses, + local.lb_metrics.lb_redirects, + local.lb_metrics.target_3xx_responses, + local.lb_metrics.lb_fixed_responses, + local.lb_metrics.target_2xx_responses, + ] +} + +module "widget_response_percentages" { + source = "./../../cloudwatch/metric_widget" + + title = "ALB response percentages" + stacked = true + left_metrics = [ + local.lb_metrics.lb_tls_negotiation_error_percentage, + local.lb_metrics.target_connection_error_percentage, + local.lb_metrics.lb_5xx_response_percentage, + local.lb_metrics.target_5xx_response_percentage, + local.lb_metrics.lb_4xx_response_percentage, + local.lb_metrics.target_4xx_response_percentage, + local.lb_metrics.lb_redirect_percentage, + local.lb_metrics.target_3xx_response_percentage, + local.lb_metrics.lb_fixed_response_percentage, + local.lb_metrics.target_2xx_response_percentage, + ] + left_range = [0, 100] + hidden_metrics = [ + local.lb_metrics.requests, + local.lb_metrics.lb_responses, + local.lb_metrics.target_requests, + local.lb_metrics.lb_tls_negotiation_errors, + local.lb_metrics.target_connection_errors, + local.lb_metrics.lb_5xx_responses, + local.lb_metrics.target_5xx_responses, + local.lb_metrics.lb_4xx_responses, + local.lb_metrics.target_4xx_responses, + local.lb_metrics.lb_redirects, + local.lb_metrics.target_3xx_responses, + local.lb_metrics.lb_fixed_responses, + local.lb_metrics.target_2xx_responses, + ] +} + +module "widget_response_time" { + source = "./../../cloudwatch/metric_widget" + + title = "ALB target response times" + stacked = true + left_metrics = [ + merge(local.lb_metrics.target_p50_response_time, { color = local.colors.red }), + merge(local.lb_metrics.target_p95_response_time, { color = local.colors.orange }), + merge(local.lb_metrics.target_p99_response_time, { color = local.colors.light_red }), + merge(local.lb_metrics.target_max_response_time, { color = local.colors.light_orange }), + ] +} diff --git a/ecs/outputs.tf b/ecs/outputs.tf index dd7e8346..0891473a 100644 --- a/ecs/outputs.tf +++ b/ecs/outputs.tf @@ -105,6 +105,16 @@ output "hosts_security_group_arn" { description = "The ARN of the Security Group which should be used by host instances" } +output "lb_metrics" { + value = module.network.lb_metrics + description = "Load balancer related Cloudwatch metrics, see [network/metrics.tf](./network/metrics.tf)" +} + +output "lb_widgets" { + value = module.network.lb_widgets + description = "Load balancer related Cloudwatch dashboard widgets, see [network/widgets.tf](./network/widgets.tf)" +} + # access outputs output "host_role_name" { From e1e679fb08e1936666113983245e0a6214d46544 Mon Sep 17 00:00:00 2001 From: Marek Skrajnowski Date: Wed, 29 Apr 2020 12:10:46 +0200 Subject: [PATCH 03/17] refactor(ecs/network): ALB widget metrics reorder --- ecs/example/main.tf | 2 +- ecs/network/widgets.tf | 40 ++++++++++++++++++++-------------------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/ecs/example/main.tf b/ecs/example/main.tf index e0b1ce29..efbad103 100644 --- a/ecs/example/main.tf +++ b/ecs/example/main.tf @@ -103,7 +103,7 @@ module "dashboard" { widgets = [ module.cluster.lb_widgets.responses, module.cluster.lb_widgets.response_percentages, - module.cluster.lb_widgets.response_time, + module.cluster.lb_widgets.target_response_time, ] } diff --git a/ecs/network/widgets.tf b/ecs/network/widgets.tf index df2aef40..bd000919 100644 --- a/ecs/network/widgets.tf +++ b/ecs/network/widgets.tf @@ -2,25 +2,25 @@ locals { lb_widgets = { responses = module.widget_responses response_percentages = module.widget_response_percentages - response_time = module.widget_response_time + target_response_time = module.widget_target_response_time } } module "widget_responses" { source = "./../../cloudwatch/metric_widget" - title = "ALB responses" + title = "ALB and target responses" stacked = true left_metrics = [ local.lb_metrics.lb_tls_negotiation_errors, - local.lb_metrics.target_connection_errors, local.lb_metrics.lb_5xx_responses, - local.lb_metrics.target_5xx_responses, local.lb_metrics.lb_4xx_responses, - local.lb_metrics.target_4xx_responses, local.lb_metrics.lb_redirects, - local.lb_metrics.target_3xx_responses, local.lb_metrics.lb_fixed_responses, + local.lb_metrics.target_connection_errors, + local.lb_metrics.target_5xx_responses, + local.lb_metrics.target_4xx_responses, + local.lb_metrics.target_3xx_responses, local.lb_metrics.target_2xx_responses, ] } @@ -28,18 +28,18 @@ module "widget_responses" { module "widget_response_percentages" { source = "./../../cloudwatch/metric_widget" - title = "ALB response percentages" + title = "ALB and target response percentages" stacked = true left_metrics = [ local.lb_metrics.lb_tls_negotiation_error_percentage, - local.lb_metrics.target_connection_error_percentage, local.lb_metrics.lb_5xx_response_percentage, - local.lb_metrics.target_5xx_response_percentage, local.lb_metrics.lb_4xx_response_percentage, - local.lb_metrics.target_4xx_response_percentage, local.lb_metrics.lb_redirect_percentage, - local.lb_metrics.target_3xx_response_percentage, local.lb_metrics.lb_fixed_response_percentage, + local.lb_metrics.target_connection_error_percentage, + local.lb_metrics.target_5xx_response_percentage, + local.lb_metrics.target_4xx_response_percentage, + local.lb_metrics.target_3xx_response_percentage, local.lb_metrics.target_2xx_response_percentage, ] left_range = [0, 100] @@ -47,20 +47,20 @@ module "widget_response_percentages" { local.lb_metrics.requests, local.lb_metrics.lb_responses, local.lb_metrics.target_requests, - local.lb_metrics.lb_tls_negotiation_errors, - local.lb_metrics.target_connection_errors, - local.lb_metrics.lb_5xx_responses, - local.lb_metrics.target_5xx_responses, - local.lb_metrics.lb_4xx_responses, - local.lb_metrics.target_4xx_responses, - local.lb_metrics.lb_redirects, - local.lb_metrics.target_3xx_responses, local.lb_metrics.lb_fixed_responses, + local.lb_metrics.lb_redirects, + local.lb_metrics.lb_4xx_responses, + local.lb_metrics.lb_5xx_responses, + local.lb_metrics.lb_tls_negotiation_errors, local.lb_metrics.target_2xx_responses, + local.lb_metrics.target_3xx_responses, + local.lb_metrics.target_4xx_responses, + local.lb_metrics.target_5xx_responses, + local.lb_metrics.target_connection_errors, ] } -module "widget_response_time" { +module "widget_target_response_time" { source = "./../../cloudwatch/metric_widget" title = "ALB target response times" From ceebd2fc2956f62b7390b2b22a97e43c90f5c840 Mon Sep 17 00:00:00 2001 From: Marek Skrajnowski Date: Wed, 29 Apr 2020 12:46:48 +0200 Subject: [PATCH 04/17] feat(ecs/network): more ALB metrics and widgets --- ecs/example/main.tf | 3 +++ ecs/network/metrics.tf | 50 ++++++++++++++++++++++++++++++++++++++++++ ecs/network/widgets.tf | 33 ++++++++++++++++++++++++++++ 3 files changed, 86 insertions(+) diff --git a/ecs/example/main.tf b/ecs/example/main.tf index efbad103..522d0eda 100644 --- a/ecs/example/main.tf +++ b/ecs/example/main.tf @@ -104,6 +104,9 @@ module "dashboard" { module.cluster.lb_widgets.responses, module.cluster.lb_widgets.response_percentages, module.cluster.lb_widgets.target_response_time, + module.cluster.lb_widgets.connections, + module.cluster.lb_widgets.lcus, + module.cluster.lb_widgets.traffic, ] } diff --git a/ecs/network/metrics.tf b/ecs/network/metrics.tf index 12596fa0..4c6d6129 100644 --- a/ecs/network/metrics.tf +++ b/ecs/network/metrics.tf @@ -29,6 +29,10 @@ locals { target_p95_response_time = module.metric_target_p95_response_time target_p99_response_time = module.metric_target_p99_response_time target_max_response_time = module.metric_target_max_response_time + consumed_lcus = module.metric_consumed_lcus + active_connections = module.metric_lb_active_connections + new_connections = module.metric_lb_new_connections + traffic = module.metric_lb_traffic } } @@ -340,3 +344,49 @@ module "metric_target_max_response_time" { stat = "Maximum" period = 60 } + +module "metric_consumed_lcus" { + source = "./../../cloudwatch/metric" + + namespace = local.lb_namespace + dimensions = local.lb_dimensions + name = "ConsumedLCUs" + label = "Consumed LCUs" + stat = "Sum" + period = 60 +} + +module "metric_lb_active_connections" { + source = "./../../cloudwatch/metric" + + namespace = local.lb_namespace + dimensions = local.lb_dimensions + name = "ActiveConnectionCount" + label = "Active connections" + color = local.colors.blue + stat = "Sum" + period = 60 +} + +module "metric_lb_new_connections" { + source = "./../../cloudwatch/metric" + + namespace = local.lb_namespace + dimensions = local.lb_dimensions + name = "NewConnectionCount" + label = "New connections" + color = local.colors.green + stat = "Sum" + period = 60 +} + +module "metric_lb_traffic" { + source = "./../../cloudwatch/metric" + + namespace = local.lb_namespace + dimensions = local.lb_dimensions + name = "ProcessedBytes" + label = "Traffic" + stat = "Sum" + period = 60 +} diff --git a/ecs/network/widgets.tf b/ecs/network/widgets.tf index bd000919..d10bc6d9 100644 --- a/ecs/network/widgets.tf +++ b/ecs/network/widgets.tf @@ -3,6 +3,9 @@ locals { responses = module.widget_responses response_percentages = module.widget_response_percentages target_response_time = module.widget_target_response_time + connections = module.widget_lb_connections + lcus = module.widget_lb_lcus + traffic = module.widget_lb_traffic } } @@ -71,4 +74,34 @@ module "widget_target_response_time" { merge(local.lb_metrics.target_p99_response_time, { color = local.colors.light_red }), merge(local.lb_metrics.target_max_response_time, { color = local.colors.light_orange }), ] + left_range = [0, null] +} + +module "widget_lb_connections" { + source = "./../../cloudwatch/metric_widget" + + title = "ALB connections" + stacked = true + left_metrics = [ + local.lb_metrics.active_connections, + local.lb_metrics.new_connections, + ] + left_range = [0, null] +} + +module "widget_lb_lcus" { + source = "./../../cloudwatch/metric_widget" + + title = "ALB consumed LCUs" + stacked = true + left_metrics = [local.lb_metrics.consumed_lcus] +} + +module "widget_lb_traffic" { + source = "./../../cloudwatch/metric_widget" + + title = "ALB traffic" + stacked = true + left_metrics = [local.lb_metrics.traffic] + left_range = [0, null] } From 706a0f08ccb5911e1f503f60007cfdad9bb3a2b2 Mon Sep 17 00:00:00 2001 From: Marek Skrajnowski Date: Wed, 29 Apr 2020 18:19:23 +0200 Subject: [PATCH 05/17] refactor(ecs/network): DRYing up request counting metrics with cloudwatch/metric/many module --- ecs/network/metrics.tf | 365 ++++++++++++++--------------------------- 1 file changed, 123 insertions(+), 242 deletions(-) diff --git a/ecs/network/metrics.tf b/ecs/network/metrics.tf index 4c6d6129..b6c628ed 100644 --- a/ecs/network/metrics.tf +++ b/ecs/network/metrics.tf @@ -1,38 +1,43 @@ locals { lb_metrics = { + # ALB request counts requests = module.metric_requests lb_responses = module.metric_lb_responses - lb_fixed_responses = module.metric_lb_fixed_responses - lb_fixed_response_percentage = module.metric_lb_fixed_response_percentage - lb_redirects = module.metric_lb_redirects - lb_redirect_percentage = module.metric_lb_redirect_percentage - lb_4xx_responses = module.metric_lb_4xx_responses - lb_4xx_response_percentage = module.metric_lb_4xx_response_percentage - lb_5xx_responses = module.metric_lb_5xx_responses - lb_5xx_response_percentage = module.metric_lb_5xx_response_percentage - lb_tls_negotiation_errors = module.metric_lb_tls_negotiation_errors - lb_tls_negotiation_error_percentage = module.metric_lb_tls_negotiation_error_percentage - target_requests = module.metric_target_requests - target_2xx_responses = module.metric_target_2xx_responses - target_2xx_response_percentage = module.metric_target_2xx_response_percentage - target_3xx_responses = module.metric_target_3xx_responses - target_3xx_response_percentage = module.metric_target_3xx_response_percentage - target_4xx_responses = module.metric_target_4xx_responses - target_4xx_response_percentage = module.metric_target_4xx_response_percentage - target_5xx_responses = module.metric_target_5xx_responses - target_5xx_response_percentage = module.metric_target_5xx_response_percentage - target_connection_errors = module.metric_target_connection_errors - target_connection_error_percentage = module.metric_target_connection_error_percentage - target_average_response_time = module.metric_target_average_response_time - target_p50_response_time = module.metric_target_p50_response_time - target_p90_response_time = module.metric_target_p90_response_time - target_p95_response_time = module.metric_target_p95_response_time - target_p99_response_time = module.metric_target_p99_response_time - target_max_response_time = module.metric_target_max_response_time - consumed_lcus = module.metric_consumed_lcus - active_connections = module.metric_lb_active_connections - new_connections = module.metric_lb_new_connections - traffic = module.metric_lb_traffic + lb_fixed_responses = module.metrics_response_count.out_map.lb_fixed + lb_fixed_response_percentage = module.metrics_response_percentage.out_map.lb_fixed + lb_redirects = module.metrics_response_count.out_map.lb_redirect + lb_redirect_percentage = module.metrics_response_percentage.out_map.lb_redirect + lb_4xx_responses = module.metrics_response_count.out_map.lb_4xx + lb_4xx_response_percentage = module.metrics_response_percentage.out_map.lb_4xx + lb_5xx_responses = module.metrics_response_count.out_map.lb_5xx + lb_5xx_response_percentage = module.metrics_response_percentage.out_map.lb_5xx + lb_tls_negotiation_errors = module.metrics_response_count.out_map.lb_tls_negotiation_error + lb_tls_negotiation_error_percentage = module.metrics_response_percentage.out_map.lb_tls_negotiation_error + target_requests = module.metrics_response_count.out_map.target + target_2xx_responses = module.metrics_response_count.out_map.target_2xx + target_2xx_response_percentage = module.metrics_response_percentage.out_map.target_2xx + target_3xx_responses = module.metrics_response_count.out_map.target_3xx + target_3xx_response_percentage = module.metrics_response_percentage.out_map.target_3xx + target_4xx_responses = module.metrics_response_count.out_map.target_4xx + target_4xx_response_percentage = module.metrics_response_percentage.out_map.target_4xx + target_5xx_responses = module.metrics_response_count.out_map.target_5xx + target_5xx_response_percentage = module.metrics_response_percentage.out_map.target_5xx + target_connection_errors = module.metrics_response_count.out_map.target_connection_error + target_connection_error_percentage = module.metrics_response_percentage.out_map.target_connection_error + + # ALB response times + target_average_response_time = module.metric_target_average_response_time + target_p50_response_time = module.metric_target_p50_response_time + target_p90_response_time = module.metric_target_p90_response_time + target_p95_response_time = module.metric_target_p95_response_time + target_p99_response_time = module.metric_target_p99_response_time + target_max_response_time = module.metric_target_max_response_time + + # ALB other + consumed_lcus = module.metric_consumed_lcus + active_connections = module.metric_lb_active_connections + new_connections = module.metric_lb_new_connections + traffic = module.metric_lb_traffic } } @@ -48,233 +53,109 @@ locals { } } -module "metric_requests" { - source = "./../../cloudwatch/metric_expression" - expression = "${module.metric_target_requests.id} + ${module.metric_lb_responses.id}" - label = "Requests" -} - -module "metric_target_requests" { - source = "./../../cloudwatch/metric" - - namespace = local.lb_namespace - dimensions = local.lb_dimensions - name = "RequestCount" - label = "Target requests" - stat = "Sum" - period = 60 +locals { + metrics_response_count = { + lb_fixed = { + name = "HTTP_Fixed_Response_Count" + label = "ALB fixed responses" + color = local.colors.light_green + } + lb_redirect = { + name = "HTTP_Redirect_Count" + label = "ALB redirects" + color = local.colors.light_blue + } + lb_4xx = { + name = "HTTPCode_ELB_4XX_Count" + label = "ALB 4xx responses" + color = local.colors.light_orange + } + lb_5xx = { + name = "HTTPCode_ELB_5XX_Count" + label = "ALB 5xx responses" + color = local.colors.light_red + } + lb_tls_negotiation_error = { + name = "ClientTLSNegotiationErrorCount" + label = "ALB TLS negotiation errors" + color = local.colors.light_purple + } + target = { + name = "RequestCount" + label = "Target requests" + color = null + } + target_2xx = { + name = "HTTPCode_Target_2XX_Count" + label = "Target 2xx responses" + color = local.colors.green + } + target_3xx = { + name = "HTTPCode_Target_3XX_Count" + label = "Target 3xx responses" + color = local.colors.blue + } + target_4xx = { + name = "HTTPCode_Target_4XX_Count" + label = "Target 4xx responses" + color = local.colors.orange + } + target_5xx = { + name = "HTTPCode_Target_5XX_Count" + label = "Target 5xx responses" + color = local.colors.red + } + target_connection_error = { + name = "TargetConnectionErrorCount" + label = "Target connection errors" + color = local.colors.purple + } + } } module "metric_lb_responses" { source = "./../../cloudwatch/metric_expression" expression = join(" + ", [ - module.metric_lb_fixed_responses.id, - module.metric_lb_redirects.id, - module.metric_lb_4xx_responses.id, - module.metric_lb_5xx_responses.id, - module.metric_lb_tls_negotiation_errors.id, + module.metrics_response_count.out_map.lb_fixed.id, + module.metrics_response_count.out_map.lb_redirect.id, + module.metrics_response_count.out_map.lb_4xx.id, + module.metrics_response_count.out_map.lb_5xx.id, + module.metrics_response_count.out_map.lb_tls_negotiation_error.id, ]) label = "ALB responses" } -module "metric_lb_fixed_responses" { - source = "./../../cloudwatch/metric" - - namespace = local.lb_namespace - dimensions = local.lb_dimensions - name = "HTTP_Fixed_Response_Count" - label = "ALB fixed responses" - color = local.colors.light_green - stat = "Sum" - period = 60 -} - -module "metric_lb_fixed_response_percentage" { - source = "./../../cloudwatch/metric_expression" - - expression = "IF(${module.metric_requests.id} == 0, 0, FILL(${module.metric_lb_fixed_responses.id}, 0) / ${module.metric_requests.id} * 100)" - label = "ALB fixed response percentage" - color = module.metric_lb_fixed_responses.color -} - -module "metric_lb_redirects" { - source = "./../../cloudwatch/metric" - - namespace = local.lb_namespace - dimensions = local.lb_dimensions - name = "HTTP_Redirect_Count" - label = "ALB redirects" - color = local.colors.light_blue - stat = "Sum" - period = 60 -} - -module "metric_lb_redirect_percentage" { - source = "./../../cloudwatch/metric_expression" - - expression = "IF(${module.metric_requests.id} == 0, 0, FILL(${module.metric_lb_redirects.id}, 0) / ${module.metric_requests.id} * 100)" - label = "ALB redirect percentage" - color = module.metric_lb_redirects.color -} - -module "metric_lb_4xx_responses" { - source = "./../../cloudwatch/metric" - - namespace = local.lb_namespace - dimensions = local.lb_dimensions - name = "HTTPCode_ELB_2XX_Count" - label = "ALB 4xx responses" - color = local.colors.light_orange - stat = "Sum" - period = 60 -} - -module "metric_lb_4xx_response_percentage" { - source = "./../../cloudwatch/metric_expression" - - expression = "IF(${module.metric_requests.id} == 0, 0, FILL(${module.metric_lb_4xx_responses.id}, 0) / ${module.metric_requests.id} * 100)" - label = "ALB 4xx response percentage" - color = module.metric_lb_4xx_responses.color -} - -module "metric_lb_5xx_responses" { - source = "./../../cloudwatch/metric" - - namespace = local.lb_namespace - dimensions = local.lb_dimensions - name = "HTTPCode_ELB_2XX_Count" - label = "ALB 5xx responses" - color = local.colors.light_red - stat = "Sum" - period = 60 -} - -module "metric_lb_5xx_response_percentage" { - source = "./../../cloudwatch/metric_expression" - - expression = "IF(${module.metric_requests.id} == 0, 0, FILL(${module.metric_lb_5xx_responses.id}, 0) / ${module.metric_requests.id} * 100)" - label = "ALB 5xx response percentage" - color = module.metric_lb_5xx_responses.color -} - -module "metric_lb_tls_negotiation_errors" { - source = "./../../cloudwatch/metric" - - namespace = local.lb_namespace - dimensions = local.lb_dimensions - name = "ClientTLSNegotiationErrorCount" - label = "ALB TLS negotiation errors" - color = local.colors.light_purple - stat = "Sum" - period = 60 -} - -module "metric_lb_tls_negotiation_error_percentage" { - source = "./../../cloudwatch/metric_expression" - - expression = "IF(${module.metric_requests.id} == 0, 0, FILL(${module.metric_lb_tls_negotiation_errors.id}, 0) / ${module.metric_requests.id} * 100)" - label = "ALB tls_negotiation_error percentage" - color = module.metric_lb_tls_negotiation_errors.color -} - -module "metric_target_2xx_responses" { - source = "./../../cloudwatch/metric" - - namespace = local.lb_namespace - dimensions = local.lb_dimensions - name = "HTTPCode_Target_2XX_Count" - label = "Target 2xx responses" - color = local.colors.green - stat = "Sum" - period = 60 -} - -module "metric_target_2xx_response_percentage" { - source = "./../../cloudwatch/metric_expression" - - expression = "IF(${module.metric_requests.id} == 0, 0, FILL(${module.metric_target_2xx_responses.id}, 0) / ${module.metric_requests.id} * 100)" - label = "Target 2xx response percentage" - color = module.metric_target_2xx_responses.color -} - -module "metric_target_3xx_responses" { - source = "./../../cloudwatch/metric" - - namespace = local.lb_namespace - dimensions = local.lb_dimensions - name = "HTTPCode_Target_3XX_Count" - label = "Target 3xx responses" - color = local.colors.blue - stat = "Sum" - period = 60 -} - -module "metric_target_3xx_response_percentage" { - source = "./../../cloudwatch/metric_expression" - - expression = "IF(${module.metric_requests.id} == 0, 0, FILL(${module.metric_target_3xx_responses.id}, 0) / ${module.metric_requests.id} * 100)" - label = "Target 3xx response percentage" - color = module.metric_target_3xx_responses.color -} - -module "metric_target_4xx_responses" { - source = "./../../cloudwatch/metric" - - namespace = local.lb_namespace - dimensions = local.lb_dimensions - name = "HTTPCode_Target_4XX_Count" - label = "Target 4xx responses" - color = local.colors.orange - stat = "Sum" - period = 60 -} - -module "metric_target_4xx_response_percentage" { - source = "./../../cloudwatch/metric_expression" - - expression = "IF(${module.metric_requests.id} == 0, 0, FILL(${module.metric_target_4xx_responses.id}, 0) / ${module.metric_requests.id} * 100)" - label = "Target 4xx response percentage" - color = module.metric_target_4xx_responses.color -} - -module "metric_target_5xx_responses" { - source = "./../../cloudwatch/metric" - - namespace = local.lb_namespace - dimensions = local.lb_dimensions - name = "HTTPCode_Target_5XX_Count" - label = "Target 5xx responses" - color = local.colors.red - stat = "Sum" - period = 60 -} - -module "metric_target_5xx_response_percentage" { +module "metric_requests" { source = "./../../cloudwatch/metric_expression" - - expression = "IF(${module.metric_requests.id} == 0, 0, FILL(${module.metric_target_5xx_responses.id}, 0) / ${module.metric_requests.id} * 100)" - label = "Target 5xx response percentage" - color = module.metric_target_5xx_responses.color + expression = join(" + ", [ + module.metric_lb_responses.id, + module.metrics_response_count.out_map.target.id, + ]) + label = "Requests" } -module "metric_target_connection_errors" { - source = "./../../cloudwatch/metric" +module "metrics_response_count" { + source = "./../../cloudwatch/metric/many" - namespace = local.lb_namespace - dimensions = local.lb_dimensions - name = "TargetConnectionErrorCount" - label = "Target connection errors" - color = local.colors.purple - stat = "Sum" - period = 60 + vars_map = { for k, variant in local.metrics_response_count : k => { + namespace = local.lb_namespace + dimensions = local.lb_dimensions + name = variant.name + label = variant.label + color = variant.color + stat = "Sum" + period = 60 + } } } -module "metric_target_connection_error_percentage" { - source = "./../../cloudwatch/metric_expression" +module "metrics_response_percentage" { + source = "./../../cloudwatch/metric_expression/many" - expression = "IF(${module.metric_requests.id} == 0, 0, FILL(${module.metric_target_connection_errors.id}, 0) / ${module.metric_requests.id} * 100)" - label = "Target connection errors percentage" - color = module.metric_target_connection_errors.color + vars_map = { for k, variant in local.metrics_response_count : k => { + expression = "IF(${module.metric_requests.id} == 0, 0, FILL(${module.metrics_response_count.out_map[k].id}, 0) / ${module.metric_requests.id} * 100)" + label = variant.label + color = variant.color + } } } module "metric_target_average_response_time" { From e23783da9bfb0ea890d11c83ffcb142b0f4dea3c Mon Sep 17 00:00:00 2001 From: Marek Skrajnowski Date: Wed, 29 Apr 2020 18:25:53 +0200 Subject: [PATCH 06/17] refactor(ecs/network): DRYing up response time metrics with cloudwatch/metric/many --- ecs/network/metrics.tf | 95 ++++++++++++------------------------------ ecs/network/widgets.tf | 8 ++-- 2 files changed, 30 insertions(+), 73 deletions(-) diff --git a/ecs/network/metrics.tf b/ecs/network/metrics.tf index b6c628ed..b3e8871d 100644 --- a/ecs/network/metrics.tf +++ b/ecs/network/metrics.tf @@ -26,12 +26,12 @@ locals { target_connection_error_percentage = module.metrics_response_percentage.out_map.target_connection_error # ALB response times - target_average_response_time = module.metric_target_average_response_time - target_p50_response_time = module.metric_target_p50_response_time - target_p90_response_time = module.metric_target_p90_response_time - target_p95_response_time = module.metric_target_p95_response_time - target_p99_response_time = module.metric_target_p99_response_time - target_max_response_time = module.metric_target_max_response_time + target_average_response_time = module.metrics_target_response_time.out_map.average + target_p50_response_time = module.metrics_target_response_time.out_map.p50 + target_p90_response_time = module.metrics_target_response_time.out_map.p90 + target_p95_response_time = module.metrics_target_response_time.out_map.p95 + target_p99_response_time = module.metrics_target_response_time.out_map.p99 + target_max_response_time = module.metrics_target_response_time.out_map.max # ALB other consumed_lcus = module.metric_consumed_lcus @@ -158,72 +158,29 @@ module "metrics_response_percentage" { } } } -module "metric_target_average_response_time" { - source = "./../../cloudwatch/metric" - - namespace = local.lb_namespace - dimensions = local.lb_dimensions - name = "TargetResponseTime" - label = "Average target response time" - color = local.colors.red - stat = "Average" - period = 60 -} - -module "metric_target_p50_response_time" { - source = "./../../cloudwatch/metric" - - namespace = local.lb_namespace - dimensions = local.lb_dimensions - name = "TargetResponseTime" - label = "p50 target response time" - color = local.colors.red - stat = "p50" - period = 60 -} - -module "metric_target_p90_response_time" { - source = "./../../cloudwatch/metric" - - namespace = local.lb_namespace - dimensions = local.lb_dimensions - name = "TargetResponseTime" - label = "p90 target response time" - stat = "p90" - period = 60 -} - -module "metric_target_p95_response_time" { - source = "./../../cloudwatch/metric" - - namespace = local.lb_namespace - dimensions = local.lb_dimensions - name = "TargetResponseTime" - label = "p95 target response time" - stat = "p95" - period = 60 -} - -module "metric_target_p99_response_time" { - source = "./../../cloudwatch/metric" - - namespace = local.lb_namespace - dimensions = local.lb_dimensions - name = "TargetResponseTime" - label = "p99 target response time" - stat = "p99" - period = 60 +locals { + metrics_response_time_variants = { + average = { stat = "Average", color = local.colors.red } + p50 = { stat = "p50", color = local.colors.red } + p90 = { stat = "p90", color = local.colors.orange } + p95 = { stat = "p95", color = local.colors.orange } + p99 = { stat = "p99", color = local.colors.light_red } + max = { stat = "Maximum", color = local.colors.light_orange } + } } -module "metric_target_max_response_time" { - source = "./../../cloudwatch/metric" +module "metrics_target_response_time" { + source = "./../../cloudwatch/metric/many" - namespace = local.lb_namespace - dimensions = local.lb_dimensions - name = "TargetResponseTime" - label = "Maximum target response time" - stat = "Maximum" - period = 60 + vars_map = { for k, variant in local.metrics_response_time_variants : k => { + namespace = local.lb_namespace + dimensions = local.lb_dimensions + name = "TargetResponseTime" + label = "${variant.stat} target response time" + color = variant.color + stat = variant.stat + period = 60 + } } } module "metric_consumed_lcus" { diff --git a/ecs/network/widgets.tf b/ecs/network/widgets.tf index d10bc6d9..a72cbfa4 100644 --- a/ecs/network/widgets.tf +++ b/ecs/network/widgets.tf @@ -69,10 +69,10 @@ module "widget_target_response_time" { title = "ALB target response times" stacked = true left_metrics = [ - merge(local.lb_metrics.target_p50_response_time, { color = local.colors.red }), - merge(local.lb_metrics.target_p95_response_time, { color = local.colors.orange }), - merge(local.lb_metrics.target_p99_response_time, { color = local.colors.light_red }), - merge(local.lb_metrics.target_max_response_time, { color = local.colors.light_orange }), + local.lb_metrics.target_p50_response_time, + local.lb_metrics.target_p95_response_time, + local.lb_metrics.target_p99_response_time, + local.lb_metrics.target_max_response_time, ] left_range = [0, null] } From d549420cd2c16fdb2805d362b05158278c3c2962 Mon Sep 17 00:00:00 2001 From: Marek Skrajnowski Date: Thu, 30 Apr 2020 12:19:43 +0200 Subject: [PATCH 07/17] feat(ecs/network): nat instance cpu metrics and widgets --- ecs/README.md | 8 ++++ ecs/example/main.tf | 3 ++ ecs/network/README.md | 8 ++++ ecs/network/metrics.tf | 87 ++++++++++++++++++++++++++++++++++++++++++ ecs/network/outputs.tf | 10 +++++ ecs/network/widgets.tf | 38 ++++++++++++++++++ ecs/outputs.tf | 10 +++++ 7 files changed, 164 insertions(+) diff --git a/ecs/README.md b/ecs/README.md index a5b376f7..ff09e3aa 100644 --- a/ecs/README.md +++ b/ecs/README.md @@ -165,6 +165,14 @@ Based on [AWS reference architecture](https://github.com/aws-samples/ecs-refarch Cluster name +* `nat_instance_metrics` + + NAT instance related Cloudwatch metrics, see [network/metrics.tf](./network/metrics.tf) + +* `nat_instance_widgets` + + NAT instance related Cloudwatch dashboard widgets, see [network/widgets.tf](./network/widgets.tf) + * `private_blocks` The CIDR blocks of private subnets diff --git a/ecs/example/main.tf b/ecs/example/main.tf index 522d0eda..9810cfa1 100644 --- a/ecs/example/main.tf +++ b/ecs/example/main.tf @@ -107,6 +107,9 @@ module "dashboard" { module.cluster.lb_widgets.connections, module.cluster.lb_widgets.lcus, module.cluster.lb_widgets.traffic, + module.cluster.nat_instance_widgets.cpu_utilization, + module.cluster.nat_instance_widgets.cpu_credit_balance, + module.cluster.nat_instance_widgets.cpu_credit_usage, ] } diff --git a/ecs/network/README.md b/ecs/network/README.md index babc407a..77b0af1b 100644 --- a/ecs/network/README.md +++ b/ecs/network/README.md @@ -116,6 +116,14 @@ Creates networking resources needed for a standard ECS cluster setup: The canonical hosted zone ID of the Application Load Balancer (to be used in a Route 53 Alias record) +* `nat_instance_metrics` + + NAT instance Cloudwatch metrics, see [metrics.tf](./metrics.tf) + +* `nat_instance_widgets` + + NAT instance Cloudwatch dashboard widgets, see [widgets.tf](./widgets.tf) + * `private_blocks` The CIDR blocks of private subnets diff --git a/ecs/network/metrics.tf b/ecs/network/metrics.tf index b3e8871d..ee3f6307 100644 --- a/ecs/network/metrics.tf +++ b/ecs/network/metrics.tf @@ -39,6 +39,17 @@ locals { new_connections = module.metric_lb_new_connections traffic = module.metric_lb_traffic } + + nat_instance_metrics = { + min_cpu_utilization = module.metric_nat_instance_min_cpu_utilization + average_cpu_utilization = module.metric_nat_instance_average_cpu_utilization + max_cpu_utilization = module.metric_nat_instance_max_cpu_utilization + + cpu_credit_usage = module.metric_nat_instance_cpu_credit_usage + cpu_credit_balance = module.metrics_nat_instance_cpu_credit_balance.out_map.balance + cpu_surplus_credit_balance = module.metrics_nat_instance_cpu_credit_balance.out_map.surplus + cpu_surplus_credits_charged = module.metrics_nat_instance_cpu_credit_balance.out_map.surplus_charged + } } module "cloudwatch_consts" { @@ -228,3 +239,79 @@ module "metric_lb_traffic" { stat = "Sum" period = 60 } + +locals { + search_nat_instance_dimensions = join(" OR ", [ + for id in aws_instance.nat.*.id : "InstanceId=${jsonencode(id)}" + ]) + search_nat_instance = "Namespace=\"AWS/EC2\" (${local.search_nat_instance_dimensions})" +} + +module "metric_nat_instance_min_cpu_utilization" { + source = "./../../cloudwatch/metric_expression" + + expression = "MIN(SEARCH('${local.search_nat_instance} MetricName=\"CPUUtilization\"', 'Minimum', 60))" + label = "Minimum CPU utilization" + color = local.colors.light_orange +} + +module "metric_nat_instance_average_cpu_utilization" { + source = "./../../cloudwatch/metric_expression" + + expression = < { + expression = < Date: Thu, 30 Apr 2020 14:28:10 +0200 Subject: [PATCH 08/17] fix(ecs/network): NAT instance average cpu utilization metric label and color --- ecs/network/metrics.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ecs/network/metrics.tf b/ecs/network/metrics.tf index ee3f6307..16b4d4d4 100644 --- a/ecs/network/metrics.tf +++ b/ecs/network/metrics.tf @@ -263,8 +263,8 @@ module "metric_nat_instance_average_cpu_utilization" { SUM(SEARCH('${local.search_nat_instance} MetricName="CPUUtilization"', 'SampleCount', 60)) EOF - label = "Minimum CPU utilization" - color = local.colors.light_orange + label = "Average CPU utilization" + color = local.colors.orange } module "metric_nat_instance_max_cpu_utilization" { From ba9161c6778b9faa27880c2724a21148644f0202 Mon Sep 17 00:00:00 2001 From: Marek Skrajnowski Date: Thu, 30 Apr 2020 14:37:28 +0200 Subject: [PATCH 09/17] feat(ecs/network): NAT instance network traffic metrics and widgets --- ecs/example/main.tf | 2 ++ ecs/network/metrics.tf | 64 ++++++++++++++++++++++++++++++++++++++++++ ecs/network/widgets.tf | 26 +++++++++++++++++ 3 files changed, 92 insertions(+) diff --git a/ecs/example/main.tf b/ecs/example/main.tf index 9810cfa1..9c058c04 100644 --- a/ecs/example/main.tf +++ b/ecs/example/main.tf @@ -110,6 +110,8 @@ module "dashboard" { module.cluster.nat_instance_widgets.cpu_utilization, module.cluster.nat_instance_widgets.cpu_credit_balance, module.cluster.nat_instance_widgets.cpu_credit_usage, + module.cluster.nat_instance_widgets.network_bytes, + module.cluster.nat_instance_widgets.network_packets, ] } diff --git a/ecs/network/metrics.tf b/ecs/network/metrics.tf index 16b4d4d4..eb24355c 100644 --- a/ecs/network/metrics.tf +++ b/ecs/network/metrics.tf @@ -49,6 +49,15 @@ locals { cpu_credit_balance = module.metrics_nat_instance_cpu_credit_balance.out_map.balance cpu_surplus_credit_balance = module.metrics_nat_instance_cpu_credit_balance.out_map.surplus cpu_surplus_credits_charged = module.metrics_nat_instance_cpu_credit_balance.out_map.surplus_charged + + bytes_received = module.metrics_nat_instance_io_sum.out_map.bytes_received + packets_received = module.metrics_nat_instance_io_sum.out_map.packets_received + bytes_sent = module.metrics_nat_instance_io_sum.out_map.bytes_sent + packets_sent = module.metrics_nat_instance_io_sum.out_map.packets_sent + average_bytes_received = module.metrics_nat_instance_io_average.out_map.bytes_received + average_packets_received = module.metrics_nat_instance_io_average.out_map.packets_received + average_bytes_sent = module.metrics_nat_instance_io_average.out_map.bytes_sent + average_packets_sent = module.metrics_nat_instance_io_average.out_map.packets_sent } } @@ -315,3 +324,58 @@ module "metrics_nat_instance_cpu_credit_balance" { color = variant.color } } } + +locals { + io_colors = { + read = local.colors.green + write = local.colors.orange + } + + metrics_nat_instance_io = { + bytes_received = { + name = "NetworkIn" + label = "Received bytes" + color = local.io_colors.read + } + packets_received = { + name = "NetworkPacketsIn" + label = "Received packets" + color = local.io_colors.read + } + bytes_sent = { + name = "NetworkOut" + label = "Sent bytes" + color = local.io_colors.write + } + packets_sent = { + name = "NetworkPacketsOut" + label = "Sent packets" + color = local.io_colors.write + } + } +} + +module "metrics_nat_instance_io_sum" { + source = "./../../cloudwatch/metric_expression/many" + + vars_map = { for k, variant in local.metrics_nat_instance_io : k => { + expression = < { + expression = < Date: Thu, 30 Apr 2020 14:52:59 +0200 Subject: [PATCH 10/17] docs(ecs): added a simple locustfile to easily generate some traffic to example cluster --- ecs/example/locust/locustfile.py | 29 +++++++++++++++++++++++++++++ ecs/example/locust/start | 10 ++++++++++ ecs/example/main.tf | 6 +++++- 3 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 ecs/example/locust/locustfile.py create mode 100755 ecs/example/locust/start diff --git a/ecs/example/locust/locustfile.py b/ecs/example/locust/locustfile.py new file mode 100644 index 00000000..6b75cd98 --- /dev/null +++ b/ecs/example/locust/locustfile.py @@ -0,0 +1,29 @@ +import random + +from locust import HttpLocust, TaskSet, task, between + + +class UserBehaviour(TaskSet): + @task(5) + def status_2xx(self): + self.client.get("/status/200") + + @task(2) + def status_3xx(self): + self.client.get("/redirect-to", params=dict( + url="/status/200", + status_code=302, + )) + + @task(2) + def status_4xx(self): + self.client.get("/status/400") + + @task(1) + def status_5xx(self): + self.client.get("/status/500") + + +class WebsiteUser(HttpLocust): + task_set = UserBehaviour + wait_time = between(5, 10) diff --git a/ecs/example/locust/start b/ecs/example/locust/start new file mode 100755 index 00000000..dab7b0a1 --- /dev/null +++ b/ecs/example/locust/start @@ -0,0 +1,10 @@ +#!/bin/sh + +set -eux + +docker run \ + -p 8089:8089 \ + --volume "$PWD/locust/locustfile.py:/mnt/locust/locustfile.py" \ + -e LOCUSTFILE_PATH=/mnt/locust/locustfile.py \ + -e TARGET_URL="$(terraform output lb_url)" \ + locustio/locust diff --git a/ecs/example/main.tf b/ecs/example/main.tf index 9c058c04..9a2a1bc0 100644 --- a/ecs/example/main.tf +++ b/ecs/example/main.tf @@ -17,7 +17,7 @@ module "cluster" { project_index = local.project_index environment = local.environment availability_zones_count = 1 - nat_instance = true + nat_instance = false } module "hosts" { @@ -122,3 +122,7 @@ output "hosts_id" { output "dashboard_url" { value = module.dashboard.url } + +output "lb_url" { + value = "http://${module.cluster.load_balancer_domain}" +} From b6678e4b431f6e29c464a9723e2f200e167b5845 Mon Sep 17 00:00:00 2001 From: Marek Skrajnowski Date: Thu, 30 Apr 2020 15:38:55 +0200 Subject: [PATCH 11/17] feat(ecs/network): removed NAT instance average IO metrics --- ecs/network/metrics.tf | 27 +++++---------------------- ecs/network/widgets.tf | 8 ++++---- 2 files changed, 9 insertions(+), 26 deletions(-) diff --git a/ecs/network/metrics.tf b/ecs/network/metrics.tf index eb24355c..a8649d34 100644 --- a/ecs/network/metrics.tf +++ b/ecs/network/metrics.tf @@ -50,14 +50,10 @@ locals { cpu_surplus_credit_balance = module.metrics_nat_instance_cpu_credit_balance.out_map.surplus cpu_surplus_credits_charged = module.metrics_nat_instance_cpu_credit_balance.out_map.surplus_charged - bytes_received = module.metrics_nat_instance_io_sum.out_map.bytes_received - packets_received = module.metrics_nat_instance_io_sum.out_map.packets_received - bytes_sent = module.metrics_nat_instance_io_sum.out_map.bytes_sent - packets_sent = module.metrics_nat_instance_io_sum.out_map.packets_sent - average_bytes_received = module.metrics_nat_instance_io_average.out_map.bytes_received - average_packets_received = module.metrics_nat_instance_io_average.out_map.packets_received - average_bytes_sent = module.metrics_nat_instance_io_average.out_map.bytes_sent - average_packets_sent = module.metrics_nat_instance_io_average.out_map.packets_sent + bytes_received = module.metrics_nat_instance_io.out_map.bytes_received + packets_received = module.metrics_nat_instance_io.out_map.packets_received + bytes_sent = module.metrics_nat_instance_io.out_map.bytes_sent + packets_sent = module.metrics_nat_instance_io.out_map.packets_sent } } @@ -355,7 +351,7 @@ locals { } } -module "metrics_nat_instance_io_sum" { +module "metrics_nat_instance_io" { source = "./../../cloudwatch/metric_expression/many" vars_map = { for k, variant in local.metrics_nat_instance_io : k => { @@ -366,16 +362,3 @@ module "metrics_nat_instance_io_sum" { color = variant.color } } } - -module "metrics_nat_instance_io_average" { - source = "./../../cloudwatch/metric_expression/many" - - vars_map = { for k, variant in local.metrics_nat_instance_io : k => { - expression = < Date: Thu, 30 Apr 2020 15:47:33 +0200 Subject: [PATCH 12/17] feat(ecs/network): NAT gateway IO metrics and widgets --- ecs/README.md | 8 +++++ ecs/example/main.tf | 2 ++ ecs/network/README.md | 8 +++++ ecs/network/metrics.tf | 71 ++++++++++++++++++++++++++++++++++++++++++ ecs/network/outputs.tf | 10 ++++++ ecs/network/widgets.tf | 33 ++++++++++++++++++++ ecs/outputs.tf | 10 ++++++ 7 files changed, 142 insertions(+) diff --git a/ecs/README.md b/ecs/README.md index ff09e3aa..b4afe1d2 100644 --- a/ecs/README.md +++ b/ecs/README.md @@ -165,6 +165,14 @@ Based on [AWS reference architecture](https://github.com/aws-samples/ecs-refarch Cluster name +* `nat_gateway_metrics` + + NAT gateway related Cloudwatch metrics, see [network/metrics.tf](./network/metrics.tf) + +* `nat_gateway_widgets` + + NAT gateway related Cloudwatch dashboard widgets, see [network/widgets.tf](./network/widgets.tf) + * `nat_instance_metrics` NAT instance related Cloudwatch metrics, see [network/metrics.tf](./network/metrics.tf) diff --git a/ecs/example/main.tf b/ecs/example/main.tf index 9a2a1bc0..b6752bfd 100644 --- a/ecs/example/main.tf +++ b/ecs/example/main.tf @@ -112,6 +112,8 @@ module "dashboard" { module.cluster.nat_instance_widgets.cpu_credit_usage, module.cluster.nat_instance_widgets.network_bytes, module.cluster.nat_instance_widgets.network_packets, + module.cluster.nat_gateway_widgets.network_bytes, + module.cluster.nat_gateway_widgets.network_packets, ] } diff --git a/ecs/network/README.md b/ecs/network/README.md index 77b0af1b..06845535 100644 --- a/ecs/network/README.md +++ b/ecs/network/README.md @@ -116,6 +116,14 @@ Creates networking resources needed for a standard ECS cluster setup: The canonical hosted zone ID of the Application Load Balancer (to be used in a Route 53 Alias record) +* `nat_gateway_metrics` + + NAT gateway Cloudwatch metrics, see [metrics.tf](./metrics.tf) + +* `nat_gateway_widgets` + + NAT gateway Cloudwatch dashboard widgets, see [widgets.tf](./widgets.tf) + * `nat_instance_metrics` NAT instance Cloudwatch metrics, see [metrics.tf](./metrics.tf) diff --git a/ecs/network/metrics.tf b/ecs/network/metrics.tf index a8649d34..935a6516 100644 --- a/ecs/network/metrics.tf +++ b/ecs/network/metrics.tf @@ -55,6 +55,18 @@ locals { bytes_sent = module.metrics_nat_instance_io.out_map.bytes_sent packets_sent = module.metrics_nat_instance_io.out_map.packets_sent } + + nat_gateway_metrics = { + bytes_received_in = module.metrics_nat_gateway_io_bytes.out_map.received_in + bytes_received_out = module.metrics_nat_gateway_io_bytes.out_map.received_out + bytes_sent_in = module.metrics_nat_gateway_io_bytes.out_map.sent_in + bytes_sent_out = module.metrics_nat_gateway_io_bytes.out_map.sent_out + + packets_received_in = module.metrics_nat_gateway_io_packets.out_map.received_in + packets_received_out = module.metrics_nat_gateway_io_packets.out_map.received_out + packets_sent_in = module.metrics_nat_gateway_io_packets.out_map.sent_in + packets_sent_out = module.metrics_nat_gateway_io_packets.out_map.sent_out + } } module "cloudwatch_consts" { @@ -325,6 +337,9 @@ locals { io_colors = { read = local.colors.green write = local.colors.orange + + light_read = local.colors.light_green + light_write = local.colors.light_orange } metrics_nat_instance_io = { @@ -362,3 +377,59 @@ module "metrics_nat_instance_io" { color = variant.color } } } + +locals { + nat_gateway_dimensions = { + NatGatewayId = var.create && ! var.nat_instance ? aws_nat_gateway.public[0].id : "" + } + metrics_nat_gateway_io = { + sent_in = { + name_suffix = "OutToSource" + label_suffix = "sent to VPC" + color = local.io_colors.light_read + } + sent_out = { + name_suffix = "OutToDestination" + label_suffix = "sent to WAN" + color = local.io_colors.write + } + received_in = { + name_suffix = "InFromSource" + label_suffix = "received from VPC" + color = local.io_colors.light_write + } + received_out = { + name_suffix = "InFromDestination" + label_suffix = "received from WAN" + color = local.io_colors.read + } + } +} + +module "metrics_nat_gateway_io_bytes" { + source = "./../../cloudwatch/metric/many" + + vars_map = { for k, v in local.metrics_nat_gateway_io : k => { + namespace = "AWS/NATGateway" + dimensions = local.nat_gateway_dimensions + name = "Bytes${v.name_suffix}" + label = "Bytes ${v.label_suffix}" + color = v.color + stat = "Sum" + period = 60 + } } +} + +module "metrics_nat_gateway_io_packets" { + source = "./../../cloudwatch/metric/many" + + vars_map = { for k, v in local.metrics_nat_gateway_io : k => { + namespace = "AWS/NATGateway" + dimensions = local.nat_gateway_dimensions + name = "Packets${v.name_suffix}" + label = "Packets ${v.label_suffix}" + color = v.color + stat = "Sum" + period = 60 + } } +} diff --git a/ecs/network/outputs.tf b/ecs/network/outputs.tf index c7f81b34..15f07ce4 100644 --- a/ecs/network/outputs.tf +++ b/ecs/network/outputs.tf @@ -112,3 +112,13 @@ output "nat_instance_widgets" { description = "NAT instance Cloudwatch dashboard widgets, see [widgets.tf](./widgets.tf)" value = local.nat_instance_widgets } + +output "nat_gateway_metrics" { + description = "NAT gateway Cloudwatch metrics, see [metrics.tf](./metrics.tf)" + value = local.nat_gateway_metrics +} + +output "nat_gateway_widgets" { + description = "NAT gateway Cloudwatch dashboard widgets, see [widgets.tf](./widgets.tf)" + value = local.nat_gateway_widgets +} diff --git a/ecs/network/widgets.tf b/ecs/network/widgets.tf index c5351e89..5d86536d 100644 --- a/ecs/network/widgets.tf +++ b/ecs/network/widgets.tf @@ -14,6 +14,11 @@ locals { network_bytes = module.widget_nat_instance_network_bytes network_packets = module.widget_nat_instance_network_packets } + + nat_gateway_widgets = { + network_bytes = module.widget_nat_gateway_network_bytes + network_packets = module.widget_nat_gateway_network_packets + } } module "widget_responses" { @@ -169,3 +174,31 @@ module "widget_nat_instance_network_packets" { ] left_range = [0, null] } + +module "widget_nat_gateway_network_bytes" { + source = "./../../cloudwatch/metric_widget" + + title = "NAT network traffic" + stacked = true + left_metrics = [ + local.nat_gateway_metrics.bytes_received_out, + local.nat_gateway_metrics.bytes_sent_in, + local.nat_gateway_metrics.bytes_received_in, + local.nat_gateway_metrics.bytes_sent_out, + ] + left_range = [0, null] +} + +module "widget_nat_gateway_network_packets" { + source = "./../../cloudwatch/metric_widget" + + title = "NAT network traffic" + stacked = true + left_metrics = [ + local.nat_gateway_metrics.packets_received_out, + local.nat_gateway_metrics.packets_sent_in, + local.nat_gateway_metrics.packets_received_in, + local.nat_gateway_metrics.packets_sent_out, + ] + left_range = [0, null] +} diff --git a/ecs/outputs.tf b/ecs/outputs.tf index 60fdd98c..d9b13b66 100644 --- a/ecs/outputs.tf +++ b/ecs/outputs.tf @@ -125,6 +125,16 @@ output "nat_instance_widgets" { description = "NAT instance related Cloudwatch dashboard widgets, see [network/widgets.tf](./network/widgets.tf)" } +output "nat_gateway_metrics" { + value = module.network.nat_gateway_metrics + description = "NAT gateway related Cloudwatch metrics, see [network/metrics.tf](./network/metrics.tf)" +} + +output "nat_gateway_widgets" { + value = module.network.nat_gateway_widgets + description = "NAT gateway related Cloudwatch dashboard widgets, see [network/widgets.tf](./network/widgets.tf)" +} + # access outputs output "host_role_name" { From 3aa3859dae4f67e94f732438f6c371e1c09b9672 Mon Sep 17 00:00:00 2001 From: Marek Skrajnowski Date: Thu, 30 Apr 2020 15:52:48 +0200 Subject: [PATCH 13/17] feat(ecs/network): NAT gateway dropped packets metric --- ecs/network/metrics.tf | 35 ++++++++++++++++++++++++----------- ecs/network/widgets.tf | 1 + 2 files changed, 25 insertions(+), 11 deletions(-) diff --git a/ecs/network/metrics.tf b/ecs/network/metrics.tf index 935a6516..8438e7e4 100644 --- a/ecs/network/metrics.tf +++ b/ecs/network/metrics.tf @@ -57,15 +57,16 @@ locals { } nat_gateway_metrics = { - bytes_received_in = module.metrics_nat_gateway_io_bytes.out_map.received_in - bytes_received_out = module.metrics_nat_gateway_io_bytes.out_map.received_out - bytes_sent_in = module.metrics_nat_gateway_io_bytes.out_map.sent_in - bytes_sent_out = module.metrics_nat_gateway_io_bytes.out_map.sent_out - - packets_received_in = module.metrics_nat_gateway_io_packets.out_map.received_in - packets_received_out = module.metrics_nat_gateway_io_packets.out_map.received_out - packets_sent_in = module.metrics_nat_gateway_io_packets.out_map.sent_in - packets_sent_out = module.metrics_nat_gateway_io_packets.out_map.sent_out + bytes_received_in = module.metrics_nat_gateway_bytes.out_map.received_in + bytes_received_out = module.metrics_nat_gateway_bytes.out_map.received_out + bytes_sent_in = module.metrics_nat_gateway_bytes.out_map.sent_in + bytes_sent_out = module.metrics_nat_gateway_bytes.out_map.sent_out + + packets_dropped = module.metrics_nat_gateway_packets_dropped + packets_received_in = module.metrics_nat_gateway_packets.out_map.received_in + packets_received_out = module.metrics_nat_gateway_packets.out_map.received_out + packets_sent_in = module.metrics_nat_gateway_packets.out_map.sent_in + packets_sent_out = module.metrics_nat_gateway_packets.out_map.sent_out } } @@ -406,7 +407,7 @@ locals { } } -module "metrics_nat_gateway_io_bytes" { +module "metrics_nat_gateway_bytes" { source = "./../../cloudwatch/metric/many" vars_map = { for k, v in local.metrics_nat_gateway_io : k => { @@ -420,7 +421,7 @@ module "metrics_nat_gateway_io_bytes" { } } } -module "metrics_nat_gateway_io_packets" { +module "metrics_nat_gateway_packets" { source = "./../../cloudwatch/metric/many" vars_map = { for k, v in local.metrics_nat_gateway_io : k => { @@ -433,3 +434,15 @@ module "metrics_nat_gateway_io_packets" { period = 60 } } } + +module "metrics_nat_gateway_packets_dropped" { + source = "./../../cloudwatch/metric" + + namespace = "AWS/NATGateway" + dimensions = local.nat_gateway_dimensions + name = "PacketsDropCount" + label = "Packets dropped" + color = local.colors.red + stat = "Sum" + period = 60 +} diff --git a/ecs/network/widgets.tf b/ecs/network/widgets.tf index 5d86536d..3783e410 100644 --- a/ecs/network/widgets.tf +++ b/ecs/network/widgets.tf @@ -195,6 +195,7 @@ module "widget_nat_gateway_network_packets" { title = "NAT network traffic" stacked = true left_metrics = [ + local.nat_gateway_metrics.packets_dropped, local.nat_gateway_metrics.packets_received_out, local.nat_gateway_metrics.packets_sent_in, local.nat_gateway_metrics.packets_received_in, From 2cac0225ceaaac4ddbe8034980f9dd60d1214b7b Mon Sep 17 00:00:00 2001 From: Marek Skrajnowski Date: Thu, 30 Apr 2020 15:58:55 +0200 Subject: [PATCH 14/17] refactor(ecs/network): renamed NAT gateway widgets --- ecs/network/widgets.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ecs/network/widgets.tf b/ecs/network/widgets.tf index 3783e410..642c25a9 100644 --- a/ecs/network/widgets.tf +++ b/ecs/network/widgets.tf @@ -178,7 +178,7 @@ module "widget_nat_instance_network_packets" { module "widget_nat_gateway_network_bytes" { source = "./../../cloudwatch/metric_widget" - title = "NAT network traffic" + title = "NAT gateway network traffic" stacked = true left_metrics = [ local.nat_gateway_metrics.bytes_received_out, @@ -192,7 +192,7 @@ module "widget_nat_gateway_network_bytes" { module "widget_nat_gateway_network_packets" { source = "./../../cloudwatch/metric_widget" - title = "NAT network traffic" + title = "NAT gateway network traffic" stacked = true left_metrics = [ local.nat_gateway_metrics.packets_dropped, From c5bd5d0bd895eb36cce589026c000f352130658f Mon Sep 17 00:00:00 2001 From: Marek Skrajnowski Date: Thu, 30 Apr 2020 18:26:53 +0200 Subject: [PATCH 15/17] feat(ecs/network): NAT gateway connection metrics and widgets --- ecs/example/main.tf | 2 ++ ecs/network/metrics.tf | 44 ++++++++++++++++++++++++++++++++++++++++++ ecs/network/widgets.tf | 28 +++++++++++++++++++++++++-- 3 files changed, 72 insertions(+), 2 deletions(-) diff --git a/ecs/example/main.tf b/ecs/example/main.tf index b6752bfd..7c4ce0c8 100644 --- a/ecs/example/main.tf +++ b/ecs/example/main.tf @@ -114,6 +114,8 @@ module "dashboard" { module.cluster.nat_instance_widgets.network_packets, module.cluster.nat_gateway_widgets.network_bytes, module.cluster.nat_gateway_widgets.network_packets, + module.cluster.nat_gateway_widgets.active_connections, + module.cluster.nat_gateway_widgets.connection_attempts, ] } diff --git a/ecs/network/metrics.tf b/ecs/network/metrics.tf index 8438e7e4..5308ff49 100644 --- a/ecs/network/metrics.tf +++ b/ecs/network/metrics.tf @@ -67,6 +67,11 @@ locals { packets_received_out = module.metrics_nat_gateway_packets.out_map.received_out packets_sent_in = module.metrics_nat_gateway_packets.out_map.sent_in packets_sent_out = module.metrics_nat_gateway_packets.out_map.sent_out + + active_connections = module.metrics_nat_gateway_connections.out_map.active + connection_attempts = module.metrics_nat_gateway_connections.out_map.attempt + established_connections = module.metrics_nat_gateway_connections.out_map.established + port_allocation_errors = module.metrics_nat_gateway_connections.out_map.port_allocation_error } } @@ -446,3 +451,42 @@ module "metrics_nat_gateway_packets_dropped" { stat = "Sum" period = 60 } + +locals { + metrics_nat_gateway_connections = { + active = { + name = "ActiveConnectionCount" + label = "Active connections" + color = null + } + attempt = { + name = "ConnectionAttemptCount" + label = "Connection attempts" + color = local.colors.blue + } + established = { + name = "ConnectionAttemptCount" + label = "Connections established" + color = local.colors.green + } + port_allocation_error = { + name = "ErrorPortAllocation" + label = "Port allocation errors" + color = local.colors.red + } + } +} + +module "metrics_nat_gateway_connections" { + source = "./../../cloudwatch/metric/many" + + vars_map = { for k, v in local.metrics_nat_gateway_connections : k => { + namespace = "AWS/NATGateway" + dimensions = local.nat_gateway_dimensions + name = v.name + label = v.label + color = v.color + stat = "Sum" + period = 60 + } } +} diff --git a/ecs/network/widgets.tf b/ecs/network/widgets.tf index 642c25a9..2a9fcc4b 100644 --- a/ecs/network/widgets.tf +++ b/ecs/network/widgets.tf @@ -7,6 +7,7 @@ locals { lcus = module.widget_lb_lcus traffic = module.widget_lb_traffic } + nat_instance_widgets = { cpu_utilization = module.widget_nat_instance_cpu_utilization cpu_credit_balance = module.widget_nat_instance_cpu_credit_balance @@ -16,8 +17,10 @@ locals { } nat_gateway_widgets = { - network_bytes = module.widget_nat_gateway_network_bytes - network_packets = module.widget_nat_gateway_network_packets + network_bytes = module.widget_nat_gateway_network_bytes + network_packets = module.widget_nat_gateway_network_packets + active_connections = module.widget_nat_gateway_active_connections + connection_attempts = module.widget_nat_gateway_connection_attempts } } @@ -203,3 +206,24 @@ module "widget_nat_gateway_network_packets" { ] left_range = [0, null] } + +module "widget_nat_gateway_active_connections" { + source = "./../../cloudwatch/metric_widget" + + title = "NAT gateway active connections" + stacked = true + left_metrics = [local.nat_gateway_metrics.active_connections] + left_range = [0, null] +} + +module "widget_nat_gateway_connection_attempts" { + source = "./../../cloudwatch/metric_widget" + + title = "NAT gateway connection attempts" + left_metrics = [ + local.nat_gateway_metrics.connection_attempts, + local.nat_gateway_metrics.established_connections, + local.nat_gateway_metrics.port_allocation_errors, + ] + left_range = [0, null] +} From e2e9c7f8b11716736b0b51317205b2a38fb7f65b Mon Sep 17 00:00:00 2001 From: Marek Skrajnowski Date: Thu, 30 Apr 2020 18:27:42 +0200 Subject: [PATCH 16/17] fix(ecs/network): NAT gateway connections established metric showing attempts instead --- ecs/network/metrics.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ecs/network/metrics.tf b/ecs/network/metrics.tf index 5308ff49..cc5467b7 100644 --- a/ecs/network/metrics.tf +++ b/ecs/network/metrics.tf @@ -465,7 +465,7 @@ locals { color = local.colors.blue } established = { - name = "ConnectionAttemptCount" + name = "ConnectionEstablishedCount" label = "Connections established" color = local.colors.green } From 693d7a42d9576a8d79ebd856e4aeeb19837a936e Mon Sep 17 00:00:00 2001 From: Marek Skrajnowski Date: Thu, 30 Apr 2020 18:31:37 +0200 Subject: [PATCH 17/17] docs(ecs): revert to using NAT instances in the example --- ecs/example/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ecs/example/main.tf b/ecs/example/main.tf index 7c4ce0c8..e4d579a7 100644 --- a/ecs/example/main.tf +++ b/ecs/example/main.tf @@ -17,7 +17,7 @@ module "cluster" { project_index = local.project_index environment = local.environment availability_zones_count = 1 - nat_instance = false + nat_instance = true } module "hosts" {