diff --git a/.gitallowed b/.gitallowed new file mode 100644 index 000000000..06c40afd2 --- /dev/null +++ b/.gitallowed @@ -0,0 +1 @@ +SOFTWARE\\Microsoft\\PowerShell\\1\\ShellIds\\Microsoft\.PowerShell diff --git a/environment/metadata.go b/environment/metadata.go index 4049302e9..43f861964 100644 --- a/environment/metadata.go +++ b/environment/metadata.go @@ -69,6 +69,7 @@ type MetaData struct { PerformanceMetricMapName string PerformanceTestName string IPFamily string + IamRoleName string } type MetaDataStrings struct { @@ -116,6 +117,7 @@ type MetaDataStrings struct { PerformanceMetricMapName string PerformanceTestName string IPFamily string + IamRoleName string } func registerComputeType(dataString *MetaDataStrings) { @@ -317,6 +319,10 @@ func registerAccountId(dataString *MetaDataStrings) { flag.StringVar(&(dataString.AccountId), "accountId", "", "AWS account Id") } +func registerIamRoleName(dataString *MetaDataStrings) { + flag.StringVar(&(dataString.IamRoleName), "iamRoleName", "", "IAM role name for the EC2 instance") +} + func RegisterEnvironmentMetaDataFlags() *MetaDataStrings { registerComputeType(registeredMetaDataStrings) registerECSData(registeredMetaDataStrings) @@ -336,6 +342,7 @@ func RegisterEnvironmentMetaDataFlags() *MetaDataStrings { registerAgentStartCommand(registeredMetaDataStrings) registerAmpWorkspaceId(registeredMetaDataStrings) registerAccountId(registeredMetaDataStrings) + registerIamRoleName(registeredMetaDataStrings) return registeredMetaDataStrings } @@ -382,6 +389,7 @@ func GetEnvironmentMetaData() *MetaData { metaDataStorage.SampleApp = registeredMetaDataStrings.SampleApp metaDataStorage.AccountId = registeredMetaDataStrings.AccountId metaDataStorage.IPFamily = registeredMetaDataStrings.IPFamily + metaDataStorage.IamRoleName = registeredMetaDataStrings.IamRoleName fillEKSInstallationType(metaDataStorage, registeredMetaDataStrings) return metaDataStorage diff --git a/generator/test_case_generator.go b/generator/test_case_generator.go index 5c25a37df..6ea49c8ea 100644 --- a/generator/test_case_generator.go +++ b/generator/test_case_generator.go @@ -89,6 +89,10 @@ var testTypeToTestConfig = map[string][]testConfig{ testTypeKeyEc2Linux: { {testDir: "./test/ca_bundle"}, {testDir: "./test/cloudwatchlogs"}, + { + testDir: "./test/cloudwatchlogs_concurrency", + terraformDir: "terraform/ec2/cloudwatchlogs_concurrency", + }, { testDir: "./test/log_state/logfile", targets: map[string]map[string]struct{}{"os": {"al2": {}}}, @@ -192,6 +196,10 @@ var testTypeToTestConfig = map[string][]testConfig{ testTypeKeyEc2SELinux: { {testDir: "./test/ca_bundle"}, {testDir: "./test/cloudwatchlogs"}, + { + testDir: "./test/cloudwatchlogs_concurrency", + terraformDir: "terraform/ec2/cloudwatchlogs_concurrency", + }, { testDir: "./test/metrics_number_dimension", targets: map[string]map[string]struct{}{"os": {"al2": {}}}, @@ -430,6 +438,8 @@ type partition struct { configName string tests []string ami []string + // excludedTestDirs allows excluding specific test directories from this partition + excludedTestDirs map[string]struct{} // testConfigOverrides allows partition-specific test configurations // key is testDir, value is the override config testConfigOverrides map[string]testConfig @@ -445,6 +455,9 @@ var partitionTests = map[string]partition{ configName: "_itar", tests: []string{testTypeKeyEc2Linux}, ami: []string{"cloudwatch-agent-integration-test-aarch64-al2023*"}, + excludedTestDirs: map[string]struct{}{ + "./test/cloudwatchlogs_concurrency": {}, // IAM deny policy not deployed to ITAR account + }, testConfigOverrides: map[string]testConfig{ "./test/metric_value_benchmark": { // Exclude DiskIOInstanceStore and DiskIOEBS tests - custom AMI doesn't support NVMe instance store metrics @@ -460,6 +473,9 @@ var partitionTests = map[string]partition{ configName: "_china", tests: []string{testTypeKeyEc2Linux}, ami: []string{"cloudwatch-agent-integration-test-aarch64-al2023*"}, + excludedTestDirs: map[string]struct{}{ + "./test/cloudwatchlogs_concurrency": {}, // IAM deny policy not deployed to China account + }, testConfigOverrides: map[string]testConfig{ "./test/metric_value_benchmark": { // Exclude DiskIOInstanceStore and DiskIOEBS tests - custom AMI doesn't support NVMe instance store metrics @@ -487,7 +503,7 @@ func main() { if len(partition.tests) != 0 && !slices.Contains(partition.tests, testType) { continue } - testMatrix := genMatrix(testType, testConfigs, partition.ami, partition.testConfigOverrides) + testMatrix := genMatrix(testType, testConfigs, partition.ami, partition.testConfigOverrides, partition.excludedTestDirs) writeTestMatrixFile(testType+partition.configName, testMatrix) } } @@ -517,7 +533,7 @@ func generateTestName(testType string, test_directory string) string { return strings.Join(cleaned, "_") } -func genMatrix(testType string, testConfigs []testConfig, ami []string, overrides map[string]testConfig) []matrixRow { +func genMatrix(testType string, testConfigs []testConfig, ami []string, overrides map[string]testConfig, excludedTestDirs map[string]struct{}) []matrixRow { openTestMatrix, err := os.Open(fmt.Sprintf("generator/resources/%v_test_matrix.json", testType)) if err != nil { @@ -537,6 +553,13 @@ func genMatrix(testType string, testConfigs []testConfig, ami []string, override testMatrixComplete := make([]matrixRow, 0, len(testMatrix)) for _, test := range testMatrix { for _, testConfig := range testConfigs { + // Skip excluded test directories + if excludedTestDirs != nil { + if _, excluded := excludedTestDirs[testConfig.testDir]; excluded { + continue + } + } + // Apply partition-specific overrides if available if overrides != nil { if override, ok := overrides[testConfig.testDir]; ok { diff --git a/go.mod b/go.mod index 8d4988107..d6f631dca 100644 --- a/go.mod +++ b/go.mod @@ -20,8 +20,10 @@ require ( github.com/aws/aws-sdk-go-v2/service/dynamodb v1.26.3 github.com/aws/aws-sdk-go-v2/service/ec2 v1.138.2 github.com/aws/aws-sdk-go-v2/service/ecs v1.35.2 + github.com/aws/aws-sdk-go-v2/service/iam v1.27.4 github.com/aws/aws-sdk-go-v2/service/s3 v1.47.2 github.com/aws/aws-sdk-go-v2/service/ssm v1.44.2 + github.com/aws/aws-sdk-go-v2/service/sts v1.26.2 github.com/aws/aws-sdk-go-v2/service/xray v1.23.2 github.com/aws/aws-xray-sdk-go v1.8.3 github.com/cenkalti/backoff/v4 v4.2.1 @@ -63,7 +65,6 @@ require ( github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.16.8 // indirect github.com/aws/aws-sdk-go-v2/service/sso v1.18.2 // indirect github.com/aws/aws-sdk-go-v2/service/ssooidc v1.21.2 // indirect - github.com/aws/aws-sdk-go-v2/service/sts v1.26.2 // indirect github.com/aws/smithy-go v1.18.1 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/go-logr/logr v1.3.0 // indirect diff --git a/go.sum b/go.sum index af09a3b5d..2df148c09 100644 --- a/go.sum +++ b/go.sum @@ -94,6 +94,8 @@ github.com/aws/aws-sdk-go-v2/service/ec2 v1.138.2 h1:e3Imv1oXz+W3Tfclflkh72t5TUP github.com/aws/aws-sdk-go-v2/service/ec2 v1.138.2/go.mod h1:d1hAqgLDOPaSO1Piy/0bBmj6oAplFwv6p0cquHntNHM= github.com/aws/aws-sdk-go-v2/service/ecs v1.35.2 h1:yIr1T8uPhZT2cKCBeO39utfzG/RKJn3SxbuBOdj18Nc= github.com/aws/aws-sdk-go-v2/service/ecs v1.35.2/go.mod h1:MvDz+yXfa2sSEfHB57rdf83deKJIeKEopqHFhVmaRlk= +github.com/aws/aws-sdk-go-v2/service/iam v1.27.4 h1:W7aZ6WYk/R3kGhBbD6tAVwzYav8k0JQCGhEE+kXKl+k= +github.com/aws/aws-sdk-go-v2/service/iam v1.27.4/go.mod h1:LklzfZoa7bL/NdhOzoaRtqSLGhu5j+GqE/9WoOQGFKY= github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.10.3 h1:e3PCNeEaev/ZF01cQyNZgmYE9oYYePIMJs2mWSKG514= github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.10.3/go.mod h1:gIeeNyaL8tIEqZrzAnTeyhHcE0yysCtcaP+N9kxLZ+E= github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.2.8 h1:xyfOAYV/ujzZOo01H9+OnyeiRKmTEp6EsITTsmq332Q= diff --git a/terraform/ec2/cloudwatchlogs_concurrency/main.tf b/terraform/ec2/cloudwatchlogs_concurrency/main.tf new file mode 100644 index 000000000..cea071f0d --- /dev/null +++ b/terraform/ec2/cloudwatchlogs_concurrency/main.tf @@ -0,0 +1,202 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT + +module "common" { + source = "../../common" +} + +module "basic_components" { + source = "../../basic_components" + + region = var.region +} + +locals { + iam_role_name = "cwa-concurrency-${module.common.testing_id}" +} + +##################################################################### +# Generate EC2 Key Pair for log in access to EC2 +##################################################################### + +resource "tls_private_key" "ssh_key" { + count = var.ssh_key_name == "" ? 1 : 0 + algorithm = "RSA" + rsa_bits = 4096 +} + +resource "aws_key_pair" "aws_ssh_key" { + count = var.ssh_key_name == "" ? 1 : 0 + key_name = "ec2-key-pair-${module.common.testing_id}" + public_key = tls_private_key.ssh_key[0].public_key_openssh +} + +locals { + ssh_key_name = var.ssh_key_name != "" ? var.ssh_key_name : aws_key_pair.aws_ssh_key[0].key_name + private_key_content = var.ssh_key_name != "" ? var.ssh_key_value : tls_private_key.ssh_key[0].private_key_pem + binary_uri = var.is_canary ? "${var.s3_bucket}/release/amazon_linux/${var.arc}/latest/${var.binary_name}" : "${var.s3_bucket}/integration-test/binary/${var.cwa_github_sha}/linux/${var.arc}/${var.binary_name}" +} + +##################################################################### +# Per-test IAM Role with self-modify permissions +##################################################################### + +resource "aws_iam_role" "cwagent_role" { + name = local.iam_role_name + + assume_role_policy = jsonencode({ + Version = "2012-10-17" + Statement = [{ + Effect = "Allow" + Principal = { Service = "ec2.amazonaws.com" } + Action = "sts:AssumeRole" + }] + }) +} + +resource "aws_iam_role_policy" "cwagent_policy" { + name = "${local.iam_role_name}-policy" + role = aws_iam_role.cwagent_role.id + + policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + Sid = "DenyRestrictedLogGroups" + Effect = "Deny" + Action = ["logs:CreateLogGroup", "logs:CreateLogStream", "logs:PutLogEvents"] + Resource = "arn:aws:logs:*:*:log-group:aws-restricted-log-group-name-*" + }, + { + Effect = "Allow" + Action = [ + "cloudwatch:PutMetricData", + "cloudwatch:GetMetricData", + "cloudwatch:ListMetrics", + "logs:PutLogEvents", + "logs:DescribeLogStreams", + "logs:DescribeLogGroups", + "logs:CreateLogStream", + "logs:CreateLogGroup", + "logs:DeleteLogGroup", + "logs:DeleteLogStream", + "logs:PutRetentionPolicy", + "logs:GetLogEvents", + "ec2:DescribeVolumes", + "ec2:DescribeTags", + "ec2:DescribeInstances", + "ssm:GetParameter", + "ssm:Describe*", + "ssm:Get*", + "ssm:List*", + "s3:GetObject", + "s3:GetObjectAcl", + "s3:ListBucket", + ] + Resource = "*" + }, + { + Effect = "Allow" + Action = ["iam:PutRolePolicy", "iam:DeleteRolePolicy"] + Resource = aws_iam_role.cwagent_role.arn + } + ] + }) +} + +resource "aws_iam_instance_profile" "cwagent_instance_profile" { + name = "${local.iam_role_name}-profile" + role = aws_iam_role.cwagent_role.name +} + +##################################################################### +# Generate EC2 Instance +##################################################################### + +resource "aws_instance" "cwagent" { + ami = data.aws_ami.latest.id + instance_type = var.ec2_instance_type + key_name = local.ssh_key_name + iam_instance_profile = aws_iam_instance_profile.cwagent_instance_profile.name + vpc_security_group_ids = [module.basic_components.security_group] + associate_public_ip_address = true + instance_initiated_shutdown_behavior = "terminate" + + metadata_options { + http_endpoint = "enabled" + http_tokens = "required" + } + + tags = { + Name = "cwagent-integ-test-ec2-${var.test_name}-${module.common.testing_id}" + } +} + +data "aws_ami" "latest" { + most_recent = true + owners = ["self", "amazon"] + + filter { + name = "name" + values = [var.ami] + } +} + +##################################################################### +# Test Setup and Execution +##################################################################### + +resource "null_resource" "integration_test_setup" { + connection { + type = "ssh" + user = var.user + private_key = local.private_key_content + host = aws_instance.cwagent.public_ip + } + + provisioner "remote-exec" { + inline = [ + "echo sha ${var.cwa_github_sha}", + "sudo cloud-init status --wait", + "echo clone ${var.github_test_repo} branch ${var.github_test_repo_branch} and install agent", + "if [ ! -d amazon-cloudwatch-agent-test/vendor ]; then", + "echo 'Vendor directory not found, cloning...'", + "sudo rm -rf amazon-cloudwatch-agent-test", + "git clone --branch ${var.github_test_repo_branch} ${var.github_test_repo} -q", + "fi", + "cd amazon-cloudwatch-agent-test", + "git rev-parse --short HEAD", + "aws s3 cp --no-progress s3://${local.binary_uri} .", + "export PATH=$PATH:/snap/bin:/usr/local/go/bin", + var.install_agent, + ] + } + + depends_on = [ + aws_iam_role.cwagent_role, + aws_iam_role_policy.cwagent_policy + ] +} + +resource "null_resource" "integration_test_run" { + connection { + type = "ssh" + user = var.user + private_key = local.private_key_content + host = aws_instance.cwagent.public_ip + } + + provisioner "remote-exec" { + inline = [ + "echo Preparing environment...", + "nohup bash -c 'while true; do sudo shutdown -c; sleep 30; done' >/dev/null 2>&1 &", + "export AWS_REGION=${var.region}", + "export PATH=$PATH:/snap/bin:/usr/local/go/bin", + "cd ~/amazon-cloudwatch-agent-test", + "echo run sanity test && go test ./test/sanity -p 1 -v", + "go test ${var.test_dir} -p 1 -timeout 1h -computeType=EC2 -bucket=${var.s3_bucket} -cwaCommitSha=${var.cwa_github_sha} -instanceId=${aws_instance.cwagent.id} -iamRoleName=${local.iam_role_name} -v" + ] + } + + depends_on = [null_resource.integration_test_setup] +} diff --git a/terraform/ec2/cloudwatchlogs_concurrency/variables.tf b/terraform/ec2/cloudwatchlogs_concurrency/variables.tf new file mode 100644 index 000000000..f922aa925 --- /dev/null +++ b/terraform/ec2/cloudwatchlogs_concurrency/variables.tf @@ -0,0 +1,130 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT + +variable "region" { + type = string + default = "us-west-2" +} + +variable "is_selinux_test" { + type = bool + default = false +} + +variable "selinux_branch" { + type = string + default = "main" +} + +variable "ec2_instance_type" { + type = string + default = "t3a.medium" +} + +variable "ssh_key_name" { + type = string + default = "" +} + +variable "ami" { + type = string + default = "cloudwatch-agent-integration-test-ubuntu*" +} + +variable "ssh_key_value" { + type = string + default = "" +} + +variable "user" { + type = string + default = "" +} + +variable "install_agent" { + description = "go run ./install/install_agent.go deb or go run ./install/install_agent.go rpm" + type = string + default = "go run ./install/install_agent.go rpm" +} + +variable "ca_cert_path" { + type = string + default = "" +} + +variable "arc" { + type = string + default = "amd64" + + validation { + condition = contains(["amd64", "arm64"], var.arc) + error_message = "Valid values for arc are (amd64, arm64)." + } +} + +variable "binary_name" { + type = string + default = "" +} + +variable "local_stack_host_name" { + type = string + default = "localhost.localstack.cloud" +} + +variable "s3_bucket" { + type = string + default = "" +} + +variable "test_name" { + type = string + default = "" +} + +variable "test_dir" { + type = string + default = "" +} + +variable "cwa_github_sha" { + type = string + default = "" +} + +variable "github_test_repo" { + type = string + default = "https://github.com/aws/amazon-cloudwatch-agent-test.git" +} + +variable "github_test_repo_branch" { + type = string + default = "main" +} + +variable "is_canary" { + type = bool + default = false +} + +variable "excluded_tests" { + type = string + default = "" +} + +variable "plugin_tests" { + type = string + default = "" +} + +variable "agent_start" { + description = "default command should be for ec2 with linux" + type = string + default = "sudo /opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a fetch-config -m ec2 -s -c " +} + +variable "is_onprem" { + description = "Whether to run in on-premises mode instead of EC2 mode" + type = bool + default = false +} diff --git a/terraform/setup/iam.tf b/terraform/setup/iam.tf index 6501293c3..3e4c22665 100644 --- a/terraform/setup/iam.tf +++ b/terraform/setup/iam.tf @@ -88,9 +88,20 @@ data "aws_iam_policy_document" "user-managed-policy-document" { "s3:GetObject", "s3:ListBucket", "s3:PutObject", + "iam:PutRolePolicy", + "iam:DeleteRolePolicy", ] resources = ["*"] } + + statement { + effect = "Deny" + actions = [ + "logs:PutLogEvents", + "logs:CreateLogStream" + ] + resources = ["arn:aws:logs:*:*:log-group:aws-restricted-log-group-name-*:*"] + } } resource "aws_iam_policy" "cwagent_iam_policy" { diff --git a/test/cloudwatchlogs_concurrency/concurrency_test.go b/test/cloudwatchlogs_concurrency/concurrency_test.go new file mode 100644 index 000000000..78708e626 --- /dev/null +++ b/test/cloudwatchlogs_concurrency/concurrency_test.go @@ -0,0 +1,129 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT + +//go:build !windows + +package cloudwatchlogs_concurrency + +import ( + "fmt" + "os" + "testing" + "time" + + "github.com/stretchr/testify/assert" + + "github.com/aws/amazon-cloudwatch-agent-test/environment" + "github.com/aws/amazon-cloudwatch-agent-test/util/awsservice" + "github.com/aws/amazon-cloudwatch-agent-test/util/common" +) + +const ( + configOutputPath = "/opt/aws/amazon-cloudwatch-agent/bin/config.json" + logLineId1 = "foo" + logLineId2 = "bar" + sleepForFlush = 30 * time.Second +) + +var logLineIds = []string{logLineId1, logLineId2} + +func init() { + environment.RegisterEnvironmentMetaDataFlags() +} + +// TestConcurrencyPoisonPill validates that when retry heap size equals concurrency +// and is smaller than the number of failing log groups, the allowed log group +// continues to publish logs despite multiple denied log groups. +func TestConcurrencyPoisonPill(t *testing.T) { + env := environment.GetEnvironmentMetaData() + instanceId := env.InstanceId + if instanceId == "" { + instanceId = awsservice.GetInstanceId() + } + + accessGrantedLogGroup := fmt.Sprintf("access-granted-%s", instanceId) + accessGrantedLogFile := "/tmp/access_granted.log" + + // Create 10 denied log groups + deniedLogGroups := make([]string, 10) + deniedLogFiles := make([]string, 10) + for i := 0; i < 10; i++ { + deniedLogGroups[i] = fmt.Sprintf("aws-restricted-log-group-name-%d-%s", i, instanceId) + deniedLogFiles[i] = fmt.Sprintf("/tmp/access_denied_%d.log", i) + } + + defer awsservice.DeleteLogGroupAndStream(accessGrantedLogGroup, instanceId) + for _, lg := range deniedLogGroups { + defer awsservice.DeleteLogGroupAndStream(lg, instanceId) + } + + // Create log files + grantedFile, err := os.Create(accessGrantedLogFile) + assert.NoError(t, err) + defer grantedFile.Close() + defer os.Remove(accessGrantedLogFile) + + deniedFiles := make([]*os.File, 10) + for i := 0; i < 10; i++ { + deniedFiles[i], err = os.Create(deniedLogFiles[i]) + assert.NoError(t, err) + defer deniedFiles[i].Close() + defer os.Remove(deniedLogFiles[i]) + } + + common.DeleteFile(common.AgentLogFile) + common.TouchFile(common.AgentLogFile) + start := time.Now() + + common.CopyFile("resources/config_concurrency.json", configOutputPath) + common.StartAgent(configOutputPath, true, false) + + time.Sleep(sleepForFlush) + + // Write logs to all files + writeLogLines(t, grantedFile, 10) + for i := 0; i < 10; i++ { + writeLogLines(t, deniedFiles[i], 10) + } + + time.Sleep(sleepForFlush) + common.StopAgent() + end := time.Now() + + // Validate access granted log group has logs + err = awsservice.ValidateLogs( + accessGrantedLogGroup, + instanceId, + &start, + &end, + awsservice.AssertLogsCount(20), // 10 iterations * 2 logLineIds + ) + assert.NoError(t, err, "Access granted log group should have logs despite denied log groups") + + // Validate denied log groups don't exist (ResourceNotFoundException due to AccessDenied on CreateLogGroup) + for _, lg := range deniedLogGroups { + err = awsservice.ValidateLogs( + lg, + instanceId, + &start, + &end, + awsservice.AssertLogsCount(0), + ) + assert.Error(t, err, "Denied log group should not exist") + assert.Contains(t, err.Error(), "ResourceNotFoundException", "Expected ResourceNotFoundException for denied log group") + } + +} + +func writeLogLines(t *testing.T, f *os.File, iterations int) { + for i := 0; i < iterations; i++ { + ts := time.Now() + for _, id := range logLineIds { + _, err := f.WriteString(fmt.Sprintf("%s - [%s] #%d This is a log line.\n", ts.Format(time.StampMilli), id, i)) + if err != nil { + t.Logf("Error occurred writing log line: %v", err) + } + } + time.Sleep(1 * time.Millisecond) + } +} diff --git a/test/cloudwatchlogs_concurrency/recovery_test.go b/test/cloudwatchlogs_concurrency/recovery_test.go new file mode 100644 index 000000000..7dd514dd9 --- /dev/null +++ b/test/cloudwatchlogs_concurrency/recovery_test.go @@ -0,0 +1,132 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT + +//go:build !windows + +package cloudwatchlogs_concurrency + +import ( + "fmt" + "os" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/aws/amazon-cloudwatch-agent-test/environment" + "github.com/aws/amazon-cloudwatch-agent-test/util/awsservice" + "github.com/aws/amazon-cloudwatch-agent-test/util/common" +) + +const ( + recoveryPolicyPrefix = "cwagent-recovery-deny-" + iamPropagationWait = 30 * time.Second +) + +// TestConcurrencyRecovery validates that the agent recovers and publishes logs +// after IAM deny permissions are removed mid-test. +func TestConcurrencyRecovery(t *testing.T) { + env := environment.GetEnvironmentMetaData() + instanceId := env.InstanceId + if instanceId == "" { + instanceId = awsservice.GetInstanceId() + } + + iamRoleName := env.IamRoleName + if iamRoleName == "" { + t.Skip("Skipping TestConcurrencyRecovery: -iamRoleName not provided (requires per-test IAM role with self-modify permissions)") + } + + allowedLogGroup := fmt.Sprintf("recovery-allowed-%s", instanceId) + recoveryLogGroup := fmt.Sprintf("recovery-test-target-%s", instanceId) + policyName := recoveryPolicyPrefix + instanceId + logGroupArn := fmt.Sprintf("arn:aws:logs:*:*:log-group:%s:*", recoveryLogGroup) + + err := awsservice.PutRoleDenyPolicy(iamRoleName, policyName, logGroupArn) + require.NoError(t, err, "Failed to create deny policy") + + policyCreated := true + defer func() { + if policyCreated { + if cleanupErr := awsservice.DeleteRoleInlinePolicy(iamRoleName, policyName); cleanupErr != nil { + t.Logf("Warning: failed to cleanup deny policy: %v", cleanupErr) + } + } + }() + + time.Sleep(iamPropagationWait) + + defer awsservice.DeleteLogGroupAndStream(allowedLogGroup, instanceId) + defer awsservice.DeleteLogGroupAndStream(recoveryLogGroup, instanceId) + + allowedFile, err := os.Create("/tmp/recovery_allowed.log") + require.NoError(t, err) + defer allowedFile.Close() + defer os.Remove("/tmp/recovery_allowed.log") + + recoveryFile, err := os.Create("/tmp/recovery_target.log") + require.NoError(t, err) + defer recoveryFile.Close() + defer os.Remove("/tmp/recovery_target.log") + + common.DeleteFile(common.AgentLogFile) + common.TouchFile(common.AgentLogFile) + + common.CopyFile("resources/config_recovery.json", configOutputPath) + common.StartAgent(configOutputPath, true, false) + defer common.StopAgent() + + time.Sleep(sleepForFlush) + + writeLogLines(t, allowedFile, 10) + writeLogLines(t, recoveryFile, 10) + time.Sleep(sleepForFlush) + + err = awsservice.ValidateLogs(allowedLogGroup, instanceId, nil, nil, + awsservice.AssertLogsCount(20)) + assert.NoError(t, err, "Allowed log group should have logs") + + err = awsservice.ValidateLogs(recoveryLogGroup, instanceId, nil, nil, + awsservice.AssertLogsCount(0)) + assert.Error(t, err, "Recovery log group should not exist while denied") + assert.Contains(t, err.Error(), "ResourceNotFoundException") + + err = awsservice.DeleteRoleInlinePolicy(iamRoleName, policyName) + assert.NoError(t, err, "Failed to delete deny policy") + policyCreated = false + + t.Logf("Deny policy removed, waiting %v for IAM propagation...", iamPropagationWait) + time.Sleep(iamPropagationWait) + + writeLogLines(t, recoveryFile, 10) + time.Sleep(sleepForFlush) + + common.StopAgent() + + err = awsservice.ValidateLogs(recoveryLogGroup, instanceId, nil, nil, + awsservice.AssertLogsCount(40)) + if err != nil { + printAgentLogs(t) + } + assert.NoError(t, err, "Recovery log group should have logs after permissions restored") +} + +func printAgentLogs(t *testing.T) { + t.Log("=== CloudWatch Agent Logs (last 100 lines) ===") + content, err := os.ReadFile(common.AgentLogFile) + if err != nil { + t.Logf("Failed to read agent log: %v", err) + return + } + lines := strings.Split(string(content), "\n") + start := 0 + if len(lines) > 100 { + start = len(lines) - 100 + } + for i := start; i < len(lines); i++ { + t.Log(lines[i]) + } + t.Log("=== End Agent Logs ===") +} diff --git a/test/cloudwatchlogs_concurrency/resources/config_concurrency.json b/test/cloudwatchlogs_concurrency/resources/config_concurrency.json new file mode 100644 index 000000000..cd37bbc26 --- /dev/null +++ b/test/cloudwatchlogs_concurrency/resources/config_concurrency.json @@ -0,0 +1,93 @@ +{ + "agent": { + "run_as_user": "root", + "debug": true + }, + "logs": { + "logs_collected": { + "files": { + "collect_list": [ + { + "file_path": "/tmp/access_granted.log", + "log_group_name": "access-granted-{instance_id}", + "log_stream_name": "{instance_id}", + "timezone": "UTC", + "retention_in_days": 7 + }, + { + "file_path": "/tmp/access_denied_0.log", + "log_group_name": "aws-restricted-log-group-name-0-{instance_id}", + "log_stream_name": "{instance_id}", + "timezone": "UTC", + "retention_in_days": 7 + }, + { + "file_path": "/tmp/access_denied_1.log", + "log_group_name": "aws-restricted-log-group-name-1-{instance_id}", + "log_stream_name": "{instance_id}", + "timezone": "UTC", + "retention_in_days": 7 + }, + { + "file_path": "/tmp/access_denied_2.log", + "log_group_name": "aws-restricted-log-group-name-2-{instance_id}", + "log_stream_name": "{instance_id}", + "timezone": "UTC", + "retention_in_days": 7 + }, + { + "file_path": "/tmp/access_denied_3.log", + "log_group_name": "aws-restricted-log-group-name-3-{instance_id}", + "log_stream_name": "{instance_id}", + "timezone": "UTC", + "retention_in_days": 7 + }, + { + "file_path": "/tmp/access_denied_4.log", + "log_group_name": "aws-restricted-log-group-name-4-{instance_id}", + "log_stream_name": "{instance_id}", + "timezone": "UTC", + "retention_in_days": 7 + }, + { + "file_path": "/tmp/access_denied_5.log", + "log_group_name": "aws-restricted-log-group-name-5-{instance_id}", + "log_stream_name": "{instance_id}", + "timezone": "UTC", + "retention_in_days": 7 + }, + { + "file_path": "/tmp/access_denied_6.log", + "log_group_name": "aws-restricted-log-group-name-6-{instance_id}", + "log_stream_name": "{instance_id}", + "timezone": "UTC", + "retention_in_days": 7 + }, + { + "file_path": "/tmp/access_denied_7.log", + "log_group_name": "aws-restricted-log-group-name-7-{instance_id}", + "log_stream_name": "{instance_id}", + "timezone": "UTC", + "retention_in_days": 7 + }, + { + "file_path": "/tmp/access_denied_8.log", + "log_group_name": "aws-restricted-log-group-name-8-{instance_id}", + "log_stream_name": "{instance_id}", + "timezone": "UTC", + "retention_in_days": 7 + }, + { + "file_path": "/tmp/access_denied_9.log", + "log_group_name": "aws-restricted-log-group-name-9-{instance_id}", + "log_stream_name": "{instance_id}", + "timezone": "UTC", + "retention_in_days": 7 + } + ] + } + }, + "force_flush_interval": 5, + "concurrency": 2 + } +} diff --git a/test/cloudwatchlogs_concurrency/resources/config_recovery.json b/test/cloudwatchlogs_concurrency/resources/config_recovery.json new file mode 100644 index 000000000..d90454607 --- /dev/null +++ b/test/cloudwatchlogs_concurrency/resources/config_recovery.json @@ -0,0 +1,30 @@ +{ + "agent": { + "run_as_user": "root", + "debug": true + }, + "logs": { + "logs_collected": { + "files": { + "collect_list": [ + { + "file_path": "/tmp/recovery_allowed.log", + "log_group_name": "recovery-allowed-{instance_id}", + "log_stream_name": "{instance_id}", + "timezone": "UTC", + "retention_in_days": 7 + }, + { + "file_path": "/tmp/recovery_target.log", + "log_group_name": "recovery-test-target-{instance_id}", + "log_stream_name": "{instance_id}", + "timezone": "UTC", + "retention_in_days": 7 + } + ] + } + }, + "force_flush_interval": 5, + "concurrency": 2 + } +} diff --git a/util/awsservice/constant.go b/util/awsservice/constant.go index 772044357..db84e6a89 100644 --- a/util/awsservice/constant.go +++ b/util/awsservice/constant.go @@ -18,6 +18,7 @@ import ( "github.com/aws/aws-sdk-go-v2/service/dynamodb" "github.com/aws/aws-sdk-go-v2/service/ec2" "github.com/aws/aws-sdk-go-v2/service/ecs" + "github.com/aws/aws-sdk-go-v2/service/iam" "github.com/aws/aws-sdk-go-v2/service/s3" "github.com/aws/aws-sdk-go-v2/service/ssm" "github.com/aws/aws-sdk-go-v2/service/sts" @@ -57,6 +58,7 @@ var ( S3Client *s3.Client CloudformationClient *cloudformation.Client XrayClient *xray.Client + IamClient *iam.Client ) func init() { @@ -99,6 +101,7 @@ func ConfigureAWSClients(region string) error { S3Client = s3.NewFromConfig(awsCfg) CloudformationClient = cloudformation.NewFromConfig(awsCfg) XrayClient = xray.NewFromConfig(awsCfg) + IamClient = iam.NewFromConfig(awsCfg) return nil } diff --git a/util/awsservice/iam.go b/util/awsservice/iam.go new file mode 100644 index 000000000..426e05c47 --- /dev/null +++ b/util/awsservice/iam.go @@ -0,0 +1,53 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT + +package awsservice + +import ( + "encoding/json" + "fmt" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/iam" +) + +// PutRoleDenyPolicy creates an inline deny policy on a role for logs:CreateLogGroup, +// logs:PutLogEvents, and logs:CreateLogStream on the given log group ARN pattern. +func PutRoleDenyPolicy(roleName, policyName, logGroupPattern string) error { + policy := map[string]interface{}{ + "Version": "2012-10-17", + "Statement": []map[string]interface{}{ + { + "Effect": "Deny", + "Action": []string{"logs:CreateLogGroup", "logs:PutLogEvents", "logs:CreateLogStream"}, + "Resource": logGroupPattern, + }, + }, + } + policyJSON, err := json.Marshal(policy) + if err != nil { + return fmt.Errorf("failed to marshal policy: %w", err) + } + + _, err = IamClient.PutRolePolicy(ctx, &iam.PutRolePolicyInput{ + RoleName: aws.String(roleName), + PolicyName: aws.String(policyName), + PolicyDocument: aws.String(string(policyJSON)), + }) + if err != nil { + return fmt.Errorf("failed to put role policy: %w", err) + } + return nil +} + +// DeleteRoleInlinePolicy deletes an inline policy from a role. +func DeleteRoleInlinePolicy(roleName, policyName string) error { + _, err := IamClient.DeleteRolePolicy(ctx, &iam.DeleteRolePolicyInput{ + RoleName: aws.String(roleName), + PolicyName: aws.String(policyName), + }) + if err != nil { + return fmt.Errorf("failed to delete role policy: %w", err) + } + return nil +}