From 8f509c281e035df19d8b4cf24bb3dd052a1280a2 Mon Sep 17 00:00:00 2001 From: Adithya Kolla Date: Wed, 9 Aug 2023 17:34:00 -0700 Subject: [PATCH 01/15] change exit codes of svc/env/job deploy --- internal/pkg/cli/svc_deploy.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/internal/pkg/cli/svc_deploy.go b/internal/pkg/cli/svc_deploy.go index ffa8cb4349f..84adf95a63e 100644 --- a/internal/pkg/cli/svc_deploy.go +++ b/internal/pkg/cli/svc_deploy.go @@ -668,6 +668,18 @@ func (e *errHasDiff) ExitCode() int { return 1 } +type errNoInfrastructureChanges struct { + parentErr error +} + +func (e *errNoInfrastructureChanges) Error() string { + return e.parentErr.Error() +} + +func (e *errNoInfrastructureChanges) ExitCode() int { + return 0 +} + func diff(differ templateDiffer, tmpl string, writer io.Writer) error { if out, err := differ.DeployDiff(tmpl); err != nil { return err From 8e5b01328d3a78ae68c35600a3400036ff09cb38 Mon Sep 17 00:00:00 2001 From: Adithya Kolla Date: Thu, 10 Aug 2023 14:23:15 -0700 Subject: [PATCH 02/15] move errstruct to errors.go --- internal/pkg/cli/svc_deploy.go | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/internal/pkg/cli/svc_deploy.go b/internal/pkg/cli/svc_deploy.go index 84adf95a63e..ffa8cb4349f 100644 --- a/internal/pkg/cli/svc_deploy.go +++ b/internal/pkg/cli/svc_deploy.go @@ -668,18 +668,6 @@ func (e *errHasDiff) ExitCode() int { return 1 } -type errNoInfrastructureChanges struct { - parentErr error -} - -func (e *errNoInfrastructureChanges) Error() string { - return e.parentErr.Error() -} - -func (e *errNoInfrastructureChanges) ExitCode() int { - return 0 -} - func diff(differ templateDiffer, tmpl string, writer io.Writer) error { if out, err := differ.DeployDiff(tmpl); err != nil { return err From f9f67d4fbe29f9ad5655e119e30eadd7e878ff2d Mon Sep 17 00:00:00 2001 From: Adithya Kolla Date: Tue, 29 Aug 2023 03:00:26 -0700 Subject: [PATCH 03/15] stopped task info in the progress tracker --- .../mocks/mock_cloudformation.go | 15 ++++ internal/pkg/stream/ecs.go | 37 +++++++++ internal/pkg/stream/ecs_test.go | 82 ++++++++++++++++++- internal/pkg/term/progress/ecs.go | 72 +++++++++++++++- internal/pkg/term/progress/ecs_test.go | 47 +++++++++-- 5 files changed, 239 insertions(+), 14 deletions(-) diff --git a/internal/pkg/deploy/cloudformation/mocks/mock_cloudformation.go b/internal/pkg/deploy/cloudformation/mocks/mock_cloudformation.go index 8dddc27ce5b..40ef169c9ff 100644 --- a/internal/pkg/deploy/cloudformation/mocks/mock_cloudformation.go +++ b/internal/pkg/deploy/cloudformation/mocks/mock_cloudformation.go @@ -189,6 +189,21 @@ func (mr *MockecsClientMockRecorder) Service(clusterName, serviceName interface{ return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Service", reflect.TypeOf((*MockecsClient)(nil).Service), clusterName, serviceName) } +// StoppedServiceTasks mocks base method. +func (m *MockecsClient) StoppedServiceTasks(cluster, service string) ([]*ecs.Task, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "StoppedServiceTasks", cluster, service) + ret0, _ := ret[0].([]*ecs.Task) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// StoppedServiceTasks indicates an expected call of StoppedServiceTasks. +func (mr *MockecsClientMockRecorder) StoppedServiceTasks(cluster, service interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "StoppedServiceTasks", reflect.TypeOf((*MockecsClient)(nil).StoppedServiceTasks), cluster, service) +} + // MockcwClient is a mock of cwClient interface. type MockcwClient struct { ctrl *gomock.Controller diff --git a/internal/pkg/stream/ecs.go b/internal/pkg/stream/ecs.go index 52861640208..81291ac08a8 100644 --- a/internal/pkg/stream/ecs.go +++ b/internal/pkg/stream/ecs.go @@ -6,6 +6,7 @@ package stream import ( "fmt" "math/rand" + "sort" "strings" "sync" "time" @@ -24,11 +25,16 @@ const ( rollOutEmpty = "" ) +const ( + ecsScalingActivity = "Scaling activity initiated by" +) + var ecsEventFailureKeywords = []string{"fail", "unhealthy", "error", "throttle", "unable", "missing", "alarm detected", "rolling back"} // ECSServiceDescriber is the interface to describe an ECS service. type ECSServiceDescriber interface { Service(clusterName, serviceName string) (*ecs.Service, error) + StoppedServiceTasks(cluster, service string) ([]*ecs.Task, error) } // CloudWatchDescriber is the interface to describe CW alarms. @@ -69,6 +75,7 @@ type ECSService struct { Deployments []ECSDeployment LatestFailureEvents []string Alarms []cloudwatch.AlarmStatus + StoppedTasks []ecs.Task } // ECSDeploymentStreamer is a Streamer for ECSService descriptions until the deployment is completed. @@ -154,6 +161,35 @@ func (s *ECSDeploymentStreamer) Fetch() (next time.Time, done bool, err error) { done = true } } + + stoppedSvcTasks, err := s.client.StoppedServiceTasks(s.cluster, s.service) + if err != nil { + if request.IsErrorThrottle(err) { + s.ecsRetries += 1 + return nextFetchDate(s.clock, s.rand, s.ecsRetries), false, nil + } + return next, false, fmt.Errorf("fetch stopped tasks: %w", err) + } + s.ecsRetries = 0 + + var stoppedTasks []ecs.Task + for _, st := range stoppedSvcTasks { + if stoppingAt := aws.TimeValue(st.StoppingAt); stoppingAt.Before(s.deploymentCreationTime) || + (strings.Contains(aws.StringValue(st.StoppedReason), ecsScalingActivity)) { + continue + } + stoppedTasks = append(stoppedTasks, ecs.Task{ + TaskArn: st.TaskArn, + DesiredStatus: st.DesiredStatus, + LastStatus: st.LastStatus, + StoppedReason: st.StoppedReason, + StoppingAt: st.StoppingAt, + }) + } + sort.SliceStable(stoppedTasks, func(i, j int) bool { + return aws.TimeValue(stoppedTasks[i].StoppingAt).After(aws.TimeValue(stoppedTasks[j].StoppingAt)) + }) + var failureMsgs []string for _, event := range out.Events { if createdAt := aws.TimeValue(event.CreatedAt); createdAt.Before(s.deploymentCreationTime) { @@ -187,6 +223,7 @@ func (s *ECSDeploymentStreamer) Fetch() (next time.Time, done bool, err error) { Deployments: deployments, LatestFailureEvents: failureMsgs, Alarms: alarms, + StoppedTasks: stoppedTasks, }) return nextFetchDate(s.clock, s.rand, 0), done, nil } diff --git a/internal/pkg/stream/ecs_test.go b/internal/pkg/stream/ecs_test.go index 017bf73bf93..dbe90e0fa68 100644 --- a/internal/pkg/stream/ecs_test.go +++ b/internal/pkg/stream/ecs_test.go @@ -5,10 +5,11 @@ package stream import ( "errors" - "github.com/aws/copilot-cli/internal/pkg/aws/cloudwatch" "testing" "time" + "github.com/aws/copilot-cli/internal/pkg/aws/cloudwatch" + "github.com/aws/aws-sdk-go/aws" awsecs "github.com/aws/aws-sdk-go/service/ecs" "github.com/aws/copilot-cli/internal/pkg/aws/ecs" @@ -16,8 +17,10 @@ import ( ) type mockECS struct { - out *ecs.Service - err error + out *ecs.Service + tasks []*ecs.Task + err error + taskError error } type mockCW struct { @@ -28,6 +31,9 @@ type mockCW struct { func (m mockECS) Service(clusterName, serviceName string) (*ecs.Service, error) { return m.out, m.err } +func (m mockECS) StoppedServiceTasks(clusterName, serviceName string) ([]*ecs.Task, error) { + return m.tasks, m.taskError +} func (m mockCW) AlarmStatuses(opts ...cloudwatch.DescribeAlarmOpts) ([]cloudwatch.AlarmStatus, error) { return m.out, m.err @@ -97,6 +103,37 @@ func TestECSDeploymentStreamer_Fetch(t *testing.T) { // THEN require.EqualError(t, err, "retrieve alarm statuses: some error") }) + t.Run("returns a wrapped error on stopped tasks call failure", func(t *testing.T) { + // GIVEN + m := mockECS{ + out: &ecs.Service{ + DeploymentConfiguration: &awsecs.DeploymentConfiguration{ + Alarms: &awsecs.DeploymentAlarms{ + AlarmNames: []*string{aws.String("alarm1"), aws.String("alarm2")}, + Enable: aws.Bool(true), + Rollback: aws.Bool(true), + }, + }, + }, + tasks: []*ecs.Task{ + { + TaskArn: aws.String("arn:aws:ecs:us-east-2:197732814171:task/testbugbash-testenv-Cluster-qrvEB"), + DesiredStatus: aws.String("Stopped"), + LastStatus: aws.String("Deprovisioning"), + StoppedReason: aws.String("unable to pull secrets"), + }, + }, + taskError: errors.New("some error"), + } + cw := mockCW{} + streamer := NewECSDeploymentStreamer(m, cw, "my-cluster", "my-svc", time.Now()) + + // WHEN + _, _, err := streamer.Fetch() + + // THEN + require.EqualError(t, err, "fetch stopped tasks: some error") + }) t.Run("stores events, alarms, and failures until deployment is done", func(t *testing.T) { // GIVEN oldStartDate := time.Date(2020, time.November, 23, 17, 0, 0, 0, time.UTC) @@ -145,6 +182,29 @@ func TestECSDeploymentStreamer_Fetch(t *testing.T) { }, }, }, + tasks: []*ecs.Task{ + { + TaskArn: aws.String("arn:aws:ecs:us-east-2:197732814171:task/bugbash-test-Cluster-qrvEB"), + DesiredStatus: aws.String("Stopped"), + LastStatus: aws.String("Deprovisioning"), + StoppedReason: aws.String("unable to pull secrets"), + StoppingAt: aws.Time(startDate.Add(10 * time.Second)), + }, + { + TaskArn: aws.String("arn:aws:ecs:us-east-2:197732814171:task/bugbash-test-Cluster-qrvEBt"), + DesiredStatus: aws.String("Stopped"), + LastStatus: aws.String("Stopped"), + StoppedReason: aws.String("unable to pull secrets"), + StoppingAt: aws.Time(oldStartDate), + }, + { + TaskArn: aws.String("arn:aws:ecs:us-east-2:197732814171:task/bugbash-test-Cluster-qrvEBs"), + DesiredStatus: aws.String("Stopped"), + LastStatus: aws.String("Deprovisioning"), + StoppedReason: aws.String("ELB healthcheck failed"), + StoppingAt: aws.Time(startDate.Add(20 * time.Second)), + }, + }, } cw := mockCW{ out: []cloudwatch.AlarmStatus{ @@ -200,6 +260,22 @@ func TestECSDeploymentStreamer_Fetch(t *testing.T) { }, }, LatestFailureEvents: []string{"deployment failed: alarm detected", "rolling back to deployment X"}, + StoppedTasks: []ecs.Task{ + { + TaskArn: aws.String("arn:aws:ecs:us-east-2:197732814171:task/bugbash-test-Cluster-qrvEBs"), + DesiredStatus: aws.String("Stopped"), + LastStatus: aws.String("Deprovisioning"), + StoppedReason: aws.String("ELB healthcheck failed"), + StoppingAt: aws.Time(startDate.Add(20 * time.Second)), + }, + { + TaskArn: aws.String("arn:aws:ecs:us-east-2:197732814171:task/bugbash-test-Cluster-qrvEB"), + DesiredStatus: aws.String("Stopped"), + LastStatus: aws.String("Deprovisioning"), + StoppedReason: aws.String("unable to pull secrets"), + StoppingAt: aws.Time(startDate.Add(10 * time.Second)), + }, + }, }, }, streamer.eventsToFlush) require.True(t, done, "there should be no more work to do since the deployment is completed") diff --git a/internal/pkg/term/progress/ecs.go b/internal/pkg/term/progress/ecs.go index be2c9447c6f..9430215dd84 100644 --- a/internal/pkg/term/progress/ecs.go +++ b/internal/pkg/term/progress/ecs.go @@ -10,7 +10,9 @@ import ( "strconv" "sync" + "github.com/aws/aws-sdk-go/aws" "github.com/aws/copilot-cli/internal/pkg/aws/cloudwatch" + "github.com/dustin/go-humanize/english" "github.com/aws/copilot-cli/internal/pkg/aws/ecs" "github.com/aws/copilot-cli/internal/pkg/stream" @@ -21,6 +23,10 @@ const ( maxServiceEventsToDisplay = 5 // Total number of events we want to display at most for ECS service events. ) +const ( + maxStoppedTasksToDisplay = 2 +) + // ECSServiceSubscriber is the interface to subscribe channels to ECS service descriptions. type ECSServiceSubscriber interface { Subscribe() <-chan stream.ECSService @@ -40,9 +46,10 @@ func ListeningRollingUpdateRenderer(streamer ECSServiceSubscriber, opts RenderOp type rollingUpdateComponent struct { // Data to render. - deployments []stream.ECSDeployment - failureMsgs []string - alarms []cloudwatch.AlarmStatus + deployments []stream.ECSDeployment + failureMsgs []string + alarms []cloudwatch.AlarmStatus + stoppedTasks []ecs.Task // Style configuration for the component. padding int @@ -58,6 +65,10 @@ func (c *rollingUpdateComponent) Listen() { for ev := range c.stream { c.mu.Lock() c.deployments = ev.Deployments + c.stoppedTasks = ev.StoppedTasks + if len(c.stoppedTasks) > maxStoppedTasksToDisplay { + c.stoppedTasks = c.stoppedTasks[:maxStoppedTasksToDisplay] + } c.failureMsgs = append(c.failureMsgs, ev.LatestFailureEvents...) if len(c.failureMsgs) > c.maxLenFailureMsgs { c.failureMsgs = c.failureMsgs[len(c.failureMsgs)-c.maxLenFailureMsgs:] @@ -80,6 +91,12 @@ func (c *rollingUpdateComponent) Render(out io.Writer) (numLines int, err error) } numLines += nl + nl, err = c.renderStoppedTasks(buf) + if err != nil { + return 0, err + } + numLines += nl + nl, err = c.renderFailureMsgs(buf) if err != nil { return 0, err @@ -180,6 +197,55 @@ func (c *rollingUpdateComponent) renderAlarms(out io.Writer) (numLines int, err return renderComponents(out, components) } +func (c *rollingUpdateComponent) renderStoppedTasks(out io.Writer) (numLines int, err error) { + if len(c.stoppedTasks) == 0 { + return 0, nil + } + header := []string{"TaskId", "CurrentStatus", "DesiredStatus"} + var rows [][]string + title := fmt.Sprintf("Latest %d %s stopped reason", len(c.stoppedTasks), english.PluralWord(len(c.stoppedTasks), "task", "tasks")) + title = fmt.Sprintf("%s%s", color.DullRed.Sprintf("✘ "), color.Faint.Sprintf(title)) + childComponents := []Renderer{ + &singleLineComponent{}, // Add an empty line before rendering task stopped events. + &singleLineComponent{ + Text: title, + Padding: c.padding, + }, + } + for _, st := range c.stoppedTasks { + id, err := ecs.TaskID(aws.StringValue(st.TaskArn)) + if err != nil { + return 0, err + } + rows = append(rows, []string{ + id, + aws.StringValue(st.LastStatus), + aws.StringValue(st.DesiredStatus), + }) + for i, truncatedReason := range splitByLength(fmt.Sprintf("%s: %s", id, aws.StringValue(st.StoppedReason)), maxCellLength) { + pretty := fmt.Sprintf(" %s", truncatedReason) + if i == 0 { + pretty = fmt.Sprintf("- %s", truncatedReason) + } + childComponents = append(childComponents, &singleLineComponent{ + Text: pretty, + Padding: c.padding + nestedComponentPadding, + }) + } + } + table := newTableComponent(color.Faint.Sprintf("Latest %d stopped %s", len(c.stoppedTasks), english.PluralWord(len(c.stoppedTasks), "task", "tasks")), header, rows) + table.Padding = c.padding + treeComponent := treeComponent{ + Root: table, + Children: childComponents, + } + nl, err := treeComponent.Render(out) + if err != nil { + return 0, fmt.Errorf("render deployments table: %w", err) + } + return nl, err +} + func reverseStrings(arr []string) []string { reversed := make([]string, len(arr)) copy(reversed, arr) diff --git a/internal/pkg/term/progress/ecs_test.go b/internal/pkg/term/progress/ecs_test.go index a3cc7ea9dbf..a0168780758 100644 --- a/internal/pkg/term/progress/ecs_test.go +++ b/internal/pkg/term/progress/ecs_test.go @@ -4,11 +4,14 @@ package progress import ( + "strings" + "testing" + + "github.com/aws/aws-sdk-go/aws" "github.com/aws/copilot-cli/internal/pkg/aws/cloudwatch" + "github.com/aws/copilot-cli/internal/pkg/aws/ecs" "github.com/aws/copilot-cli/internal/pkg/stream" "github.com/stretchr/testify/require" - "strings" - "testing" ) func TestRollingUpdateComponent_Listen(t *testing.T) { @@ -77,9 +80,10 @@ func TestRollingUpdateComponent_Listen(t *testing.T) { func TestRollingUpdateComponent_Render(t *testing.T) { testCases := map[string]struct { - inDeployments []stream.ECSDeployment - inFailureMsgs []string - inAlarms []cloudwatch.AlarmStatus + inDeployments []stream.ECSDeployment + inFailureMsgs []string + inAlarms []cloudwatch.AlarmStatus + inStoppedTasks []ecs.Task wantedNumLines int wantedOut string @@ -151,6 +155,32 @@ Alarms Name State alarm1 [OK] alarm2 [ALARM] +`, + }, + "should render stopped tasks and their statuses": { + inStoppedTasks: []ecs.Task{ + { + TaskArn: aws.String("arn:aws:ecs:us-east-2:197732814171:task/bugbash-test-Cluster-qrvEBaBlImsZ/21479dca3393490a9d95f27353186bf6"), + DesiredStatus: aws.String("STOPPED"), + LastStatus: aws.String("DEPROVISIONING"), + StoppedReason: aws.String("ELB healthcheck failed"), + }, + { + TaskArn: aws.String("arn:aws:ecs:us-east-2:197732814171:task/bugbash-test-Cluster-qrvEBaBlImsZ/2243bac3ca1d4b3a8c66888348cba2e1"), + DesiredStatus: aws.String("STOPPED"), + LastStatus: aws.String("STOPPING"), + StoppedReason: aws.String("unable to pull secrets"), + }, + }, + wantedNumLines: 8, + wantedOut: `Latest 2 stopped tasks + TaskId CurrentStatus DesiredStatus + 21479dca3393490a9d95f27353186bf6 DEPROVISIONING STOPPED + 2243bac3ca1d4b3a8c66888348cba2e1 STOPPING STOPPED + +✘ Latest 2 tasks stopped reason + - 21479dca3393490a9d95f27353186bf6: ELB healthcheck failed + - 2243bac3ca1d4b3a8c66888348cba2e1: unable to pull secrets `, }, } @@ -160,9 +190,10 @@ Alarms // GIVEN buf := new(strings.Builder) c := &rollingUpdateComponent{ - deployments: tc.inDeployments, - failureMsgs: tc.inFailureMsgs, - alarms: tc.inAlarms, + deployments: tc.inDeployments, + failureMsgs: tc.inFailureMsgs, + alarms: tc.inAlarms, + stoppedTasks: tc.inStoppedTasks, } // WHEN From 08b4a93dd6b2e32a43f4afc087a168da67a20f5c Mon Sep 17 00:00:00 2001 From: Adithya Kolla Date: Tue, 29 Aug 2023 09:24:25 -0700 Subject: [PATCH 04/15] add tests that ignores scaling activity --- internal/pkg/stream/ecs_test.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/internal/pkg/stream/ecs_test.go b/internal/pkg/stream/ecs_test.go index dbe90e0fa68..0de053f3744 100644 --- a/internal/pkg/stream/ecs_test.go +++ b/internal/pkg/stream/ecs_test.go @@ -204,6 +204,13 @@ func TestECSDeploymentStreamer_Fetch(t *testing.T) { StoppedReason: aws.String("ELB healthcheck failed"), StoppingAt: aws.Time(startDate.Add(20 * time.Second)), }, + { + TaskArn: aws.String("arn:aws:ecs:us-east-2:197732814171:task/bugbash-test-Cluster-qrvEBu"), + DesiredStatus: aws.String("Stopped"), + LastStatus: aws.String("Deprovisioning"), + StoppedReason: aws.String("Scaling activity initiated by deployment ecs-svc/mocktaskid"), + StoppingAt: aws.Time(startDate.Add(30 * time.Second)), + }, }, } cw := mockCW{ From 54fd468cb46e8e1380b720f30e9dd549290ad940 Mon Sep 17 00:00:00 2001 From: Adithya Kolla Date: Tue, 29 Aug 2023 09:49:49 -0700 Subject: [PATCH 05/15] fix tests --- internal/pkg/deploy/cloudformation/cloudformation_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/pkg/deploy/cloudformation/cloudformation_test.go b/internal/pkg/deploy/cloudformation/cloudformation_test.go index 71c5ac03bd4..81f012f66c3 100644 --- a/internal/pkg/deploy/cloudformation/cloudformation_test.go +++ b/internal/pkg/deploy/cloudformation/cloudformation_test.go @@ -359,6 +359,7 @@ Resources: }, }, }, nil) + mockECS.EXPECT().StoppedServiceTasks("cluster", "service").Return(nil, nil) mockCFN.EXPECT().Describe(stackName).Return(&cloudformation.StackDescription{ StackStatus: aws.String("CREATE_COMPLETE"), }, nil) From fcb8b68f42d52ec26d0f0c2b30532ceb21a1f0ae Mon Sep 17 00:00:00 2001 From: Adithya Kolla Date: Wed, 30 Aug 2023 15:52:16 -0700 Subject: [PATCH 06/15] add testcase incase of split stopped reasons --- internal/pkg/term/progress/ecs_test.go | 29 ++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/internal/pkg/term/progress/ecs_test.go b/internal/pkg/term/progress/ecs_test.go index a0168780758..99c83cf1c85 100644 --- a/internal/pkg/term/progress/ecs_test.go +++ b/internal/pkg/term/progress/ecs_test.go @@ -181,6 +181,35 @@ Alarms ✘ Latest 2 tasks stopped reason - 21479dca3393490a9d95f27353186bf6: ELB healthcheck failed - 2243bac3ca1d4b3a8c66888348cba2e1: unable to pull secrets +`, + }, + "should render stopped tasks and split long stopped reasons": { + inStoppedTasks: []ecs.Task{ + { + TaskArn: aws.String("arn:aws:ecs:us-east-2:197732814171:task/bugbash-test-Cluster-qrvEBaBlImsZ/21479dca3393490a9d95f27353186bf6"), + DesiredStatus: aws.String("STOPPED"), + LastStatus: aws.String("DEPROVISIONING"), + StoppedReason: aws.String("ELB healthcheck failed"), + }, + { + TaskArn: aws.String("arn:aws:ecs:us-east-2:197732814171:task/bugbash-test-Cluster-qrvEBaBlImsZ/2243bac3ca1d4b3a8c66888348cba2e1"), + DesiredStatus: aws.String("STOPPED"), + LastStatus: aws.String("STOPPING"), + StoppedReason: aws.String("ResourceInitializationError: unable to pull secrets or registry auth: execution resource retrieval failed: unable to retrieve secrets from ssm: service call has been retried 1 time(s)"), + }, + }, + wantedNumLines: 11, + wantedOut: `Latest 2 stopped tasks + TaskId CurrentStatus DesiredStatus + 21479dca3393490a9d95f27353186bf6 DEPROVISIONING STOPPED + 2243bac3ca1d4b3a8c66888348cba2e1 STOPPING STOPPED + +✘ Latest 2 tasks stopped reason + - 21479dca3393490a9d95f27353186bf6: ELB healthcheck failed + - 2243bac3ca1d4b3a8c66888348cba2e1: ResourceInitializationError: unable + to pull secrets or registry auth: execution resource retrieval failed: + unable to retrieve secrets from ssm: service call has been retried 1 + time(s) `, }, } From 435747c6345e2a1e0a891350dde094bc9916fdc3 Mon Sep 17 00:00:00 2001 From: Adithya Kolla Date: Tue, 5 Sep 2023 16:20:59 -0700 Subject: [PATCH 07/15] use shorttask id and use deployment id --- internal/pkg/aws/ecs/task.go | 4 ++-- internal/pkg/aws/ecs/task_test.go | 25 ++++++++++++++++++++++++ internal/pkg/stream/ecs.go | 9 +++++++-- internal/pkg/stream/ecs_test.go | 8 ++++++++ internal/pkg/term/progress/ecs.go | 4 ++-- internal/pkg/term/progress/ecs_test.go | 27 +++++++++++++------------- 6 files changed, 57 insertions(+), 20 deletions(-) diff --git a/internal/pkg/aws/ecs/task.go b/internal/pkg/aws/ecs/task.go index b2b763037e7..29de2592883 100644 --- a/internal/pkg/aws/ecs/task.go +++ b/internal/pkg/aws/ecs/task.go @@ -55,7 +55,7 @@ type Task ecs.Task // becomes "4082490e (sample-fargate:2)" func (t Task) String() string { taskID, _ := TaskID(aws.StringValue(t.TaskArn)) - taskID = shortTaskID(taskID) + taskID = ShortTaskID(taskID) taskDefName, _ := taskDefinitionName(aws.StringValue(t.TaskDefinitionArn)) return fmt.Sprintf("%s (%s)", taskID, taskDefName) } @@ -291,7 +291,7 @@ func TaskDefinitionVersion(taskDefARN string) (int, error) { return version, nil } -func shortTaskID(id string) string { +func ShortTaskID(id string) string { if len(id) >= shortTaskIDLength { return id[:shortTaskIDLength] } diff --git a/internal/pkg/aws/ecs/task_test.go b/internal/pkg/aws/ecs/task_test.go index cd23e0fa6a6..ab743879ab4 100644 --- a/internal/pkg/aws/ecs/task_test.go +++ b/internal/pkg/aws/ecs/task_test.go @@ -628,6 +628,31 @@ func TestTaskDefinition_EntryPoint(t *testing.T) { } } +func TestShortTaskID(t *testing.T) { + testCases := map[string]struct { + inTaskId string + wantedTaskId string + }{ + "should return short task id": { + inTaskId: "37930fffc2104a1db455aef109b5d122", + wantedTaskId: "37930fff", + }, + } + + for name, tc := range testCases { + t.Run(name, func(t *testing.T) { + // GIVEN + ctrl := gomock.NewController(t) + defer ctrl.Finish() + + got := ShortTaskID(tc.inTaskId) + + require.Equal(t, tc.wantedTaskId, got) + }) + + } +} + func TestFilterRunningTasks(t *testing.T) { testCases := map[string]struct { inTasks []*Task diff --git a/internal/pkg/stream/ecs.go b/internal/pkg/stream/ecs.go index 81291ac08a8..e3b3f9652e6 100644 --- a/internal/pkg/stream/ecs.go +++ b/internal/pkg/stream/ecs.go @@ -53,6 +53,7 @@ type ECSDeployment struct { RolloutState string CreatedAt time.Time UpdatedAt time.Time + Id string } func (d ECSDeployment) isPrimary() bool { @@ -142,6 +143,7 @@ func (s *ECSDeploymentStreamer) Fetch() (next time.Time, done bool, err error) { s.ecsRetries = 0 var deployments []ECSDeployment + var primaryDeploymentId string for _, deployment := range out.Deployments { status := aws.StringValue(deployment.Status) desiredCount, runningCount := aws.Int64Value(deployment.DesiredCount), aws.Int64Value(deployment.RunningCount) @@ -155,13 +157,16 @@ func (s *ECSDeploymentStreamer) Fetch() (next time.Time, done bool, err error) { RolloutState: aws.StringValue(deployment.RolloutState), CreatedAt: aws.TimeValue(deployment.CreatedAt), UpdatedAt: aws.TimeValue(deployment.UpdatedAt), + Id: aws.StringValue(deployment.Id), } deployments = append(deployments, rollingDeploy) if isDeploymentDone(rollingDeploy, s.deploymentCreationTime) { done = true } + if rollingDeploy.isPrimary() { + primaryDeploymentId = rollingDeploy.Id + } } - stoppedSvcTasks, err := s.client.StoppedServiceTasks(s.cluster, s.service) if err != nil { if request.IsErrorThrottle(err) { @@ -174,7 +179,7 @@ func (s *ECSDeploymentStreamer) Fetch() (next time.Time, done bool, err error) { var stoppedTasks []ecs.Task for _, st := range stoppedSvcTasks { - if stoppingAt := aws.TimeValue(st.StoppingAt); stoppingAt.Before(s.deploymentCreationTime) || + if stoppingAt := aws.TimeValue(st.StoppingAt); aws.StringValue(st.StartedBy) != primaryDeploymentId || stoppingAt.Before(s.deploymentCreationTime) || (strings.Contains(aws.StringValue(st.StoppedReason), ecsScalingActivity)) { continue } diff --git a/internal/pkg/stream/ecs_test.go b/internal/pkg/stream/ecs_test.go index 0de053f3744..571a348f733 100644 --- a/internal/pkg/stream/ecs_test.go +++ b/internal/pkg/stream/ecs_test.go @@ -150,6 +150,7 @@ func TestECSDeploymentStreamer_Fetch(t *testing.T) { Status: aws.String("PRIMARY"), TaskDefinition: aws.String("arn:aws:ecs:us-west-2:1111:task-definition/myapp-test-mysvc:2"), UpdatedAt: aws.Time(startDate), + Id: aws.String("ecs-svc/123"), }, { DesiredCount: aws.Int64(10), @@ -160,6 +161,7 @@ func TestECSDeploymentStreamer_Fetch(t *testing.T) { Status: aws.String("ACTIVE"), TaskDefinition: aws.String("arn:aws:ecs:us-west-2:1111:task-definition/myapp-test-mysvc:1"), UpdatedAt: aws.Time(oldStartDate), + Id: aws.String("ecs-svc/456"), }, }, DeploymentConfiguration: &awsecs.DeploymentConfiguration{ @@ -189,6 +191,7 @@ func TestECSDeploymentStreamer_Fetch(t *testing.T) { LastStatus: aws.String("Deprovisioning"), StoppedReason: aws.String("unable to pull secrets"), StoppingAt: aws.Time(startDate.Add(10 * time.Second)), + StartedBy: aws.String("ecs-svc/123"), }, { TaskArn: aws.String("arn:aws:ecs:us-east-2:197732814171:task/bugbash-test-Cluster-qrvEBt"), @@ -196,6 +199,7 @@ func TestECSDeploymentStreamer_Fetch(t *testing.T) { LastStatus: aws.String("Stopped"), StoppedReason: aws.String("unable to pull secrets"), StoppingAt: aws.Time(oldStartDate), + StartedBy: aws.String("ecs-svc/123"), }, { TaskArn: aws.String("arn:aws:ecs:us-east-2:197732814171:task/bugbash-test-Cluster-qrvEBs"), @@ -203,6 +207,7 @@ func TestECSDeploymentStreamer_Fetch(t *testing.T) { LastStatus: aws.String("Deprovisioning"), StoppedReason: aws.String("ELB healthcheck failed"), StoppingAt: aws.Time(startDate.Add(20 * time.Second)), + StartedBy: aws.String("ecs-svc/123"), }, { TaskArn: aws.String("arn:aws:ecs:us-east-2:197732814171:task/bugbash-test-Cluster-qrvEBu"), @@ -210,6 +215,7 @@ func TestECSDeploymentStreamer_Fetch(t *testing.T) { LastStatus: aws.String("Deprovisioning"), StoppedReason: aws.String("Scaling activity initiated by deployment ecs-svc/mocktaskid"), StoppingAt: aws.Time(startDate.Add(30 * time.Second)), + StartedBy: aws.String("ecs-svc/123"), }, }, } @@ -244,6 +250,7 @@ func TestECSDeploymentStreamer_Fetch(t *testing.T) { PendingCount: 0, RolloutState: "COMPLETED", UpdatedAt: startDate, + Id: "ecs-svc/123", }, { Status: "ACTIVE", @@ -254,6 +261,7 @@ func TestECSDeploymentStreamer_Fetch(t *testing.T) { PendingCount: 0, RolloutState: "FAILED", UpdatedAt: oldStartDate, + Id: "ecs-svc/456", }, }, Alarms: []cloudwatch.AlarmStatus{ diff --git a/internal/pkg/term/progress/ecs.go b/internal/pkg/term/progress/ecs.go index 9430215dd84..a4384ad025b 100644 --- a/internal/pkg/term/progress/ecs.go +++ b/internal/pkg/term/progress/ecs.go @@ -218,11 +218,11 @@ func (c *rollingUpdateComponent) renderStoppedTasks(out io.Writer) (numLines int return 0, err } rows = append(rows, []string{ - id, + ecs.ShortTaskID(id), aws.StringValue(st.LastStatus), aws.StringValue(st.DesiredStatus), }) - for i, truncatedReason := range splitByLength(fmt.Sprintf("%s: %s", id, aws.StringValue(st.StoppedReason)), maxCellLength) { + for i, truncatedReason := range splitByLength(fmt.Sprintf("%s: %s", ecs.ShortTaskID(id), aws.StringValue(st.StoppedReason)), maxCellLength) { pretty := fmt.Sprintf(" %s", truncatedReason) if i == 0 { pretty = fmt.Sprintf("- %s", truncatedReason) diff --git a/internal/pkg/term/progress/ecs_test.go b/internal/pkg/term/progress/ecs_test.go index 99c83cf1c85..dab2d638fcc 100644 --- a/internal/pkg/term/progress/ecs_test.go +++ b/internal/pkg/term/progress/ecs_test.go @@ -174,13 +174,13 @@ Alarms }, wantedNumLines: 8, wantedOut: `Latest 2 stopped tasks - TaskId CurrentStatus DesiredStatus - 21479dca3393490a9d95f27353186bf6 DEPROVISIONING STOPPED - 2243bac3ca1d4b3a8c66888348cba2e1 STOPPING STOPPED + TaskId CurrentStatus DesiredStatus + 21479dca DEPROVISIONING STOPPED + 2243bac3 STOPPING STOPPED ✘ Latest 2 tasks stopped reason - - 21479dca3393490a9d95f27353186bf6: ELB healthcheck failed - - 2243bac3ca1d4b3a8c66888348cba2e1: unable to pull secrets + - 21479dca: ELB healthcheck failed + - 2243bac3: unable to pull secrets `, }, "should render stopped tasks and split long stopped reasons": { @@ -198,18 +198,17 @@ Alarms StoppedReason: aws.String("ResourceInitializationError: unable to pull secrets or registry auth: execution resource retrieval failed: unable to retrieve secrets from ssm: service call has been retried 1 time(s)"), }, }, - wantedNumLines: 11, + wantedNumLines: 10, wantedOut: `Latest 2 stopped tasks - TaskId CurrentStatus DesiredStatus - 21479dca3393490a9d95f27353186bf6 DEPROVISIONING STOPPED - 2243bac3ca1d4b3a8c66888348cba2e1 STOPPING STOPPED + TaskId CurrentStatus DesiredStatus + 21479dca DEPROVISIONING STOPPED + 2243bac3 STOPPING STOPPED ✘ Latest 2 tasks stopped reason - - 21479dca3393490a9d95f27353186bf6: ELB healthcheck failed - - 2243bac3ca1d4b3a8c66888348cba2e1: ResourceInitializationError: unable - to pull secrets or registry auth: execution resource retrieval failed: - unable to retrieve secrets from ssm: service call has been retried 1 - time(s) + - 21479dca: ELB healthcheck failed + - 2243bac3: ResourceInitializationError: unable to pull secrets or regis + try auth: execution resource retrieval failed: unable to retrieve secr + ets from ssm: service call has been retried 1 time(s) `, }, } From 967bc6c24f3d548d95d537b5b17a652e3483e1db Mon Sep 17 00:00:00 2001 From: Adithya Kolla Date: Tue, 5 Sep 2023 16:29:30 -0700 Subject: [PATCH 08/15] add doc comment --- internal/pkg/aws/ecs/task.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/internal/pkg/aws/ecs/task.go b/internal/pkg/aws/ecs/task.go index 29de2592883..3f0d6ab76e2 100644 --- a/internal/pkg/aws/ecs/task.go +++ b/internal/pkg/aws/ecs/task.go @@ -291,6 +291,8 @@ func TaskDefinitionVersion(taskDefARN string) (int, error) { return version, nil } +// // ShortTaskID truncates a string to a maximum length of shortTaskIDLength. +// If the input is shorter, it remains unchanged. func ShortTaskID(id string) string { if len(id) >= shortTaskIDLength { return id[:shortTaskIDLength] From beae8214e6de13e4404a3ffcc93673a3562d303c Mon Sep 17 00:00:00 2001 From: Adithya Kolla Date: Tue, 5 Sep 2023 16:34:04 -0700 Subject: [PATCH 09/15] fix doc comment --- internal/pkg/aws/ecs/task.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/pkg/aws/ecs/task.go b/internal/pkg/aws/ecs/task.go index 3f0d6ab76e2..4a982cf315f 100644 --- a/internal/pkg/aws/ecs/task.go +++ b/internal/pkg/aws/ecs/task.go @@ -291,7 +291,7 @@ func TaskDefinitionVersion(taskDefARN string) (int, error) { return version, nil } -// // ShortTaskID truncates a string to a maximum length of shortTaskIDLength. +// ShortTaskID truncates a string to a maximum length of shortTaskIDLength. // If the input is shorter, it remains unchanged. func ShortTaskID(id string) string { if len(id) >= shortTaskIDLength { From 9109bffa6843940c14812bb73a4af1a20a93f052 Mon Sep 17 00:00:00 2001 From: Adithya Kolla Date: Tue, 5 Sep 2023 19:21:12 -0700 Subject: [PATCH 10/15] collapse taskids incase of same reason --- internal/pkg/term/progress/ecs.go | 31 ++++++++++++++++++++- internal/pkg/term/progress/ecs_test.go | 37 +++++++++++++++++++++----- 2 files changed, 61 insertions(+), 7 deletions(-) diff --git a/internal/pkg/term/progress/ecs.go b/internal/pkg/term/progress/ecs.go index a4384ad025b..1ac213e2947 100644 --- a/internal/pkg/term/progress/ecs.go +++ b/internal/pkg/term/progress/ecs.go @@ -8,6 +8,7 @@ import ( "fmt" "io" "strconv" + "strings" "sync" "github.com/aws/aws-sdk-go/aws" @@ -197,6 +198,11 @@ func (c *rollingUpdateComponent) renderAlarms(out io.Writer) (numLines int, err return renderComponents(out, components) } +type stoppedTasksInfo struct { + ids []string + reason string +} + func (c *rollingUpdateComponent) renderStoppedTasks(out io.Writer) (numLines int, err error) { if len(c.stoppedTasks) == 0 { return 0, nil @@ -212,17 +218,40 @@ func (c *rollingUpdateComponent) renderStoppedTasks(out io.Writer) (numLines int Padding: c.padding, }, } + + taskInfoSlice := make([]stoppedTasksInfo, 0, len(c.stoppedTasks)) for _, st := range c.stoppedTasks { id, err := ecs.TaskID(aws.StringValue(st.TaskArn)) if err != nil { return 0, err } + // Check if there is already an entry with the same task stopped reason. + var found bool + for i, taskInfo := range taskInfoSlice { + if taskInfo.reason == aws.StringValue(st.StoppedReason) { + taskInfoSlice[i].ids = append(taskInfoSlice[i].ids, ecs.ShortTaskID(id)) + found = true + break + } + } + + // If not found, create a new entry + if !found { + stInfo := stoppedTasksInfo{ + reason: aws.StringValue(st.StoppedReason), + ids: []string{ecs.ShortTaskID(id)}, + } + taskInfoSlice = append(taskInfoSlice, stInfo) + } + rows = append(rows, []string{ ecs.ShortTaskID(id), aws.StringValue(st.LastStatus), aws.StringValue(st.DesiredStatus), }) - for i, truncatedReason := range splitByLength(fmt.Sprintf("%s: %s", ecs.ShortTaskID(id), aws.StringValue(st.StoppedReason)), maxCellLength) { + } + for _, info := range taskInfoSlice { + for i, truncatedReason := range splitByLength(fmt.Sprintf("[%s]: %s", strings.Join(info.ids, ","), info.reason), maxCellLength) { pretty := fmt.Sprintf(" %s", truncatedReason) if i == 0 { pretty = fmt.Sprintf("- %s", truncatedReason) diff --git a/internal/pkg/term/progress/ecs_test.go b/internal/pkg/term/progress/ecs_test.go index dab2d638fcc..9a0e730a629 100644 --- a/internal/pkg/term/progress/ecs_test.go +++ b/internal/pkg/term/progress/ecs_test.go @@ -179,8 +179,33 @@ Alarms 2243bac3 STOPPING STOPPED ✘ Latest 2 tasks stopped reason - - 21479dca: ELB healthcheck failed - - 2243bac3: unable to pull secrets + - [21479dca]: ELB healthcheck failed + - [2243bac3]: unable to pull secrets +`, + }, + "render collapse if task reasons are same": { + inStoppedTasks: []ecs.Task{ + { + TaskArn: aws.String("arn:aws:ecs:us-east-2:197732814171:task/bugbash-test-Cluster-qrvEBaBlImsZ/21479dca3393490a9d95f27353186bf6"), + DesiredStatus: aws.String("STOPPED"), + LastStatus: aws.String("DEPROVISIONING"), + StoppedReason: aws.String("Essential container in the task exited"), + }, + { + TaskArn: aws.String("arn:aws:ecs:us-east-2:197732814171:task/bugbash-test-Cluster-qrvEBaBlImsZ/2243bac3ca1d4b3a8c66888348cba2e1"), + DesiredStatus: aws.String("STOPPED"), + LastStatus: aws.String("STOPPING"), + StoppedReason: aws.String("Essential container in the task exited"), + }, + }, + wantedNumLines: 7, + wantedOut: `Latest 2 stopped tasks + TaskId CurrentStatus DesiredStatus + 21479dca DEPROVISIONING STOPPED + 2243bac3 STOPPING STOPPED + +✘ Latest 2 tasks stopped reason + - [21479dca,2243bac3]: Essential container in the task exited `, }, "should render stopped tasks and split long stopped reasons": { @@ -205,10 +230,10 @@ Alarms 2243bac3 STOPPING STOPPED ✘ Latest 2 tasks stopped reason - - 21479dca: ELB healthcheck failed - - 2243bac3: ResourceInitializationError: unable to pull secrets or regis - try auth: execution resource retrieval failed: unable to retrieve secr - ets from ssm: service call has been retried 1 time(s) + - [21479dca]: ELB healthcheck failed + - [2243bac3]: ResourceInitializationError: unable to pull secrets or reg + istry auth: execution resource retrieval failed: unable to retrieve se + crets from ssm: service call has been retried 1 time(s) `, }, } From 59523b1189b103b357e23858b1d6f4e6d4018660 Mon Sep 17 00:00:00 2001 From: Adithya Kolla Date: Wed, 6 Sep 2023 12:14:19 -0700 Subject: [PATCH 11/15] add recommended actions --- internal/pkg/term/progress/ecs.go | 14 +++++++++++++ internal/pkg/term/progress/ecs_test.go | 29 ++++++++++++++++++-------- 2 files changed, 34 insertions(+), 9 deletions(-) diff --git a/internal/pkg/term/progress/ecs.go b/internal/pkg/term/progress/ecs.go index 1ac213e2947..283826f1a13 100644 --- a/internal/pkg/term/progress/ecs.go +++ b/internal/pkg/term/progress/ecs.go @@ -264,6 +264,12 @@ func (c *rollingUpdateComponent) renderStoppedTasks(out io.Writer) (numLines int } table := newTableComponent(color.Faint.Sprintf("Latest %d stopped %s", len(c.stoppedTasks), english.PluralWord(len(c.stoppedTasks), "task", "tasks")), header, rows) table.Padding = c.padding + var debug debugStoppedTaskReason + childComponents = append(childComponents, + &singleLineComponent{ + Text: debug.RecommendActions(), + Padding: c.padding + nestedComponentPadding, + }) treeComponent := treeComponent{ Root: table, Children: childComponents, @@ -292,3 +298,11 @@ func parseServiceARN(arn string) (cluster, service string) { // Errors can't happen on valid ARNs. return parsed.ClusterName(), parsed.ServiceName() } + +type debugStoppedTaskReason struct{} + +func (d *debugStoppedTaskReason) RecommendActions() string { + return fmt.Sprintf(`To troubleshoot the task stopped reason: + 1. You can run %s to see the logs of the last Stopped Task. + 2. You can follow this article https://repost.aws/knowledge-center/ecs-task-stopped.`, color.HighlightCode("copilot svc logs --previous")) +} diff --git a/internal/pkg/term/progress/ecs_test.go b/internal/pkg/term/progress/ecs_test.go index 9a0e730a629..b1f85db8394 100644 --- a/internal/pkg/term/progress/ecs_test.go +++ b/internal/pkg/term/progress/ecs_test.go @@ -4,6 +4,7 @@ package progress import ( + "fmt" "strings" "testing" @@ -11,6 +12,7 @@ import ( "github.com/aws/copilot-cli/internal/pkg/aws/cloudwatch" "github.com/aws/copilot-cli/internal/pkg/aws/ecs" "github.com/aws/copilot-cli/internal/pkg/stream" + "github.com/aws/copilot-cli/internal/pkg/term/color" "github.com/stretchr/testify/require" ) @@ -172,8 +174,8 @@ Alarms StoppedReason: aws.String("unable to pull secrets"), }, }, - wantedNumLines: 8, - wantedOut: `Latest 2 stopped tasks + wantedNumLines: 9, + wantedOut: fmt.Sprintf(`Latest 2 stopped tasks TaskId CurrentStatus DesiredStatus 21479dca DEPROVISIONING STOPPED 2243bac3 STOPPING STOPPED @@ -181,7 +183,10 @@ Alarms ✘ Latest 2 tasks stopped reason - [21479dca]: ELB healthcheck failed - [2243bac3]: unable to pull secrets -`, + To troubleshoot the task stopped reason: + 1. You can run %s to see the logs of the last Stopped Task. + 2. You can follow this article https://repost.aws/knowledge-center/ecs-task-stopped. +`, color.HighlightCode("copilot svc logs --previous")), }, "render collapse if task reasons are same": { inStoppedTasks: []ecs.Task{ @@ -198,15 +203,18 @@ Alarms StoppedReason: aws.String("Essential container in the task exited"), }, }, - wantedNumLines: 7, - wantedOut: `Latest 2 stopped tasks + wantedNumLines: 8, + wantedOut: fmt.Sprintf(`Latest 2 stopped tasks TaskId CurrentStatus DesiredStatus 21479dca DEPROVISIONING STOPPED 2243bac3 STOPPING STOPPED ✘ Latest 2 tasks stopped reason - [21479dca,2243bac3]: Essential container in the task exited -`, + To troubleshoot the task stopped reason: + 1. You can run %s to see the logs of the last Stopped Task. + 2. You can follow this article https://repost.aws/knowledge-center/ecs-task-stopped. +`, color.HighlightCode("copilot svc logs --previous")), }, "should render stopped tasks and split long stopped reasons": { inStoppedTasks: []ecs.Task{ @@ -223,8 +231,8 @@ Alarms StoppedReason: aws.String("ResourceInitializationError: unable to pull secrets or registry auth: execution resource retrieval failed: unable to retrieve secrets from ssm: service call has been retried 1 time(s)"), }, }, - wantedNumLines: 10, - wantedOut: `Latest 2 stopped tasks + wantedNumLines: 11, + wantedOut: fmt.Sprintf(`Latest 2 stopped tasks TaskId CurrentStatus DesiredStatus 21479dca DEPROVISIONING STOPPED 2243bac3 STOPPING STOPPED @@ -234,7 +242,10 @@ Alarms - [2243bac3]: ResourceInitializationError: unable to pull secrets or reg istry auth: execution resource retrieval failed: unable to retrieve se crets from ssm: service call has been retried 1 time(s) -`, + To troubleshoot the task stopped reason: + 1. You can run %s to see the logs of the last Stopped Task. + 2. You can follow this article https://repost.aws/knowledge-center/ecs-task-stopped. +`, color.HighlightCode("copilot svc logs --previous")), }, } From 8af9adc29412199851f51e30638fca641eff7818 Mon Sep 17 00:00:00 2001 From: Adithya Kolla Date: Wed, 6 Sep 2023 18:55:48 -0700 Subject: [PATCH 12/15] remove bebug struct and use single comp --- internal/pkg/term/progress/ecs.go | 21 +++++++++++---------- internal/pkg/term/progress/ecs_test.go | 15 +++++++++------ 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/internal/pkg/term/progress/ecs.go b/internal/pkg/term/progress/ecs.go index 283826f1a13..5c0a15d7f81 100644 --- a/internal/pkg/term/progress/ecs.go +++ b/internal/pkg/term/progress/ecs.go @@ -264,10 +264,19 @@ func (c *rollingUpdateComponent) renderStoppedTasks(out io.Writer) (numLines int } table := newTableComponent(color.Faint.Sprintf("Latest %d stopped %s", len(c.stoppedTasks), english.PluralWord(len(c.stoppedTasks), "task", "tasks")), header, rows) table.Padding = c.padding - var debug debugStoppedTaskReason childComponents = append(childComponents, + &singleLineComponent{}, &singleLineComponent{ - Text: debug.RecommendActions(), + Text: color.Faint.Sprintf("Troubleshoot task stopped reason"), + Padding: c.padding, + }, + &singleLineComponent{ + Text: fmt.Sprintf("1. You can run %s to see the logs of the last Stopped Task.", + color.HighlightCode("copilot svc logs --previous")), + Padding: c.padding + nestedComponentPadding, + }, + &singleLineComponent{ + Text: fmt.Sprintf("2. You can follow this article %s.", color.Emphasize("https://repost.aws/knowledge-center/ecs-task-stopped")), Padding: c.padding + nestedComponentPadding, }) treeComponent := treeComponent{ @@ -298,11 +307,3 @@ func parseServiceARN(arn string) (cluster, service string) { // Errors can't happen on valid ARNs. return parsed.ClusterName(), parsed.ServiceName() } - -type debugStoppedTaskReason struct{} - -func (d *debugStoppedTaskReason) RecommendActions() string { - return fmt.Sprintf(`To troubleshoot the task stopped reason: - 1. You can run %s to see the logs of the last Stopped Task. - 2. You can follow this article https://repost.aws/knowledge-center/ecs-task-stopped.`, color.HighlightCode("copilot svc logs --previous")) -} diff --git a/internal/pkg/term/progress/ecs_test.go b/internal/pkg/term/progress/ecs_test.go index b1f85db8394..3768be43945 100644 --- a/internal/pkg/term/progress/ecs_test.go +++ b/internal/pkg/term/progress/ecs_test.go @@ -174,7 +174,7 @@ Alarms StoppedReason: aws.String("unable to pull secrets"), }, }, - wantedNumLines: 9, + wantedNumLines: 12, wantedOut: fmt.Sprintf(`Latest 2 stopped tasks TaskId CurrentStatus DesiredStatus 21479dca DEPROVISIONING STOPPED @@ -183,7 +183,8 @@ Alarms ✘ Latest 2 tasks stopped reason - [21479dca]: ELB healthcheck failed - [2243bac3]: unable to pull secrets - To troubleshoot the task stopped reason: + +Troubleshoot task stopped reason 1. You can run %s to see the logs of the last Stopped Task. 2. You can follow this article https://repost.aws/knowledge-center/ecs-task-stopped. `, color.HighlightCode("copilot svc logs --previous")), @@ -203,7 +204,7 @@ Alarms StoppedReason: aws.String("Essential container in the task exited"), }, }, - wantedNumLines: 8, + wantedNumLines: 11, wantedOut: fmt.Sprintf(`Latest 2 stopped tasks TaskId CurrentStatus DesiredStatus 21479dca DEPROVISIONING STOPPED @@ -211,7 +212,8 @@ Alarms ✘ Latest 2 tasks stopped reason - [21479dca,2243bac3]: Essential container in the task exited - To troubleshoot the task stopped reason: + +Troubleshoot task stopped reason 1. You can run %s to see the logs of the last Stopped Task. 2. You can follow this article https://repost.aws/knowledge-center/ecs-task-stopped. `, color.HighlightCode("copilot svc logs --previous")), @@ -231,7 +233,7 @@ Alarms StoppedReason: aws.String("ResourceInitializationError: unable to pull secrets or registry auth: execution resource retrieval failed: unable to retrieve secrets from ssm: service call has been retried 1 time(s)"), }, }, - wantedNumLines: 11, + wantedNumLines: 14, wantedOut: fmt.Sprintf(`Latest 2 stopped tasks TaskId CurrentStatus DesiredStatus 21479dca DEPROVISIONING STOPPED @@ -242,7 +244,8 @@ Alarms - [2243bac3]: ResourceInitializationError: unable to pull secrets or reg istry auth: execution resource retrieval failed: unable to retrieve se crets from ssm: service call has been retried 1 time(s) - To troubleshoot the task stopped reason: + +Troubleshoot task stopped reason 1. You can run %s to see the logs of the last Stopped Task. 2. You can follow this article https://repost.aws/knowledge-center/ecs-task-stopped. `, color.HighlightCode("copilot svc logs --previous")), From 9212a5180454298638b743a29797c7671e4dc90e Mon Sep 17 00:00:00 2001 From: Adithya Kolla Date: Wed, 6 Sep 2023 22:55:59 -0700 Subject: [PATCH 13/15] addr fb: use map and sort the map --- internal/pkg/term/progress/ecs.go | 50 +++++++++++++------------- internal/pkg/term/progress/ecs_test.go | 22 ++++++++---- 2 files changed, 41 insertions(+), 31 deletions(-) diff --git a/internal/pkg/term/progress/ecs.go b/internal/pkg/term/progress/ecs.go index 5c0a15d7f81..9bb583e458a 100644 --- a/internal/pkg/term/progress/ecs.go +++ b/internal/pkg/term/progress/ecs.go @@ -7,9 +7,11 @@ import ( "bytes" "fmt" "io" + "sort" "strconv" "strings" "sync" + "time" "github.com/aws/aws-sdk-go/aws" "github.com/aws/copilot-cli/internal/pkg/aws/cloudwatch" @@ -199,8 +201,8 @@ func (c *rollingUpdateComponent) renderAlarms(out io.Writer) (numLines int, err } type stoppedTasksInfo struct { - ids []string - reason string + ids []string + latestStoppingAt time.Time } func (c *rollingUpdateComponent) renderStoppedTasks(out io.Writer) (numLines int, err error) { @@ -219,39 +221,38 @@ func (c *rollingUpdateComponent) renderStoppedTasks(out io.Writer) (numLines int }, } - taskInfoSlice := make([]stoppedTasksInfo, 0, len(c.stoppedTasks)) + taskInfoMap := make(map[string]stoppedTasksInfo, len(c.stoppedTasks)) for _, st := range c.stoppedTasks { id, err := ecs.TaskID(aws.StringValue(st.TaskArn)) if err != nil { return 0, err } - // Check if there is already an entry with the same task stopped reason. - var found bool - for i, taskInfo := range taskInfoSlice { - if taskInfo.reason == aws.StringValue(st.StoppedReason) { - taskInfoSlice[i].ids = append(taskInfoSlice[i].ids, ecs.ShortTaskID(id)) - found = true - break + existingTaskInfo, ok := taskInfoMap[aws.StringValue(st.StoppedReason)] + if ok { + existingTaskInfo.ids = append(existingTaskInfo.ids, ecs.ShortTaskID(id)) + existingTaskInfo.latestStoppingAt = aws.TimeValue(st.StoppingAt) + taskInfoMap[aws.StringValue(st.StoppedReason)] = existingTaskInfo + } else { + taskInfoMap[aws.StringValue(st.StoppedReason)] = stoppedTasksInfo{ + ids: []string{ecs.ShortTaskID(id)}, + latestStoppingAt: aws.TimeValue(st.StoppingAt), } } - - // If not found, create a new entry - if !found { - stInfo := stoppedTasksInfo{ - reason: aws.StringValue(st.StoppedReason), - ids: []string{ecs.ShortTaskID(id)}, - } - taskInfoSlice = append(taskInfoSlice, stInfo) - } - rows = append(rows, []string{ ecs.ShortTaskID(id), aws.StringValue(st.LastStatus), aws.StringValue(st.DesiredStatus), }) } - for _, info := range taskInfoSlice { - for i, truncatedReason := range splitByLength(fmt.Sprintf("[%s]: %s", strings.Join(info.ids, ","), info.reason), maxCellLength) { + var sortReasons []string + for reason := range taskInfoMap { + sortReasons = append(sortReasons, reason) + } + sort.SliceStable(sortReasons, func(i, j int) bool { + return taskInfoMap[sortReasons[i]].latestStoppingAt.After(taskInfoMap[sortReasons[j]].latestStoppingAt) + }) + for _, reason := range sortReasons { + for i, truncatedReason := range splitByLength(fmt.Sprintf("[%s]: %s", strings.Join(taskInfoMap[reason].ids, ","), reason), maxCellLength) { pretty := fmt.Sprintf(" %s", truncatedReason) if i == 0 { pretty = fmt.Sprintf("- %s", truncatedReason) @@ -271,12 +272,13 @@ func (c *rollingUpdateComponent) renderStoppedTasks(out io.Writer) (numLines int Padding: c.padding, }, &singleLineComponent{ - Text: fmt.Sprintf("1. You can run %s to see the logs of the last Stopped Task.", + Text: fmt.Sprintf("1. You can run %s to see the logs of the last stopped task.", color.HighlightCode("copilot svc logs --previous")), Padding: c.padding + nestedComponentPadding, }, &singleLineComponent{ - Text: fmt.Sprintf("2. You can follow this article %s.", color.Emphasize("https://repost.aws/knowledge-center/ecs-task-stopped")), + Text: fmt.Sprintf("2. You can visit this article %s.", + color.Emphasize("https://repost.aws/knowledge-center/ecs-task-stopped")), Padding: c.padding + nestedComponentPadding, }) treeComponent := treeComponent{ diff --git a/internal/pkg/term/progress/ecs_test.go b/internal/pkg/term/progress/ecs_test.go index 3768be43945..fdb253e7e3a 100644 --- a/internal/pkg/term/progress/ecs_test.go +++ b/internal/pkg/term/progress/ecs_test.go @@ -7,6 +7,7 @@ import ( "fmt" "strings" "testing" + "time" "github.com/aws/aws-sdk-go/aws" "github.com/aws/copilot-cli/internal/pkg/aws/cloudwatch" @@ -81,6 +82,7 @@ func TestRollingUpdateComponent_Listen(t *testing.T) { } func TestRollingUpdateComponent_Render(t *testing.T) { + startDate := time.Date(2020, time.November, 23, 18, 0, 0, 0, time.UTC) testCases := map[string]struct { inDeployments []stream.ECSDeployment inFailureMsgs []string @@ -166,12 +168,14 @@ Alarms DesiredStatus: aws.String("STOPPED"), LastStatus: aws.String("DEPROVISIONING"), StoppedReason: aws.String("ELB healthcheck failed"), + StoppingAt: aws.Time(startDate.Add(20 * time.Second)), }, { TaskArn: aws.String("arn:aws:ecs:us-east-2:197732814171:task/bugbash-test-Cluster-qrvEBaBlImsZ/2243bac3ca1d4b3a8c66888348cba2e1"), DesiredStatus: aws.String("STOPPED"), LastStatus: aws.String("STOPPING"), StoppedReason: aws.String("unable to pull secrets"), + StoppingAt: aws.Time(startDate.Add(10 * time.Second)), }, }, wantedNumLines: 12, @@ -185,23 +189,25 @@ Alarms - [2243bac3]: unable to pull secrets Troubleshoot task stopped reason - 1. You can run %s to see the logs of the last Stopped Task. - 2. You can follow this article https://repost.aws/knowledge-center/ecs-task-stopped. + 1. You can run %s to see the logs of the last stopped task. + 2. You can visit this article https://repost.aws/knowledge-center/ecs-task-stopped. `, color.HighlightCode("copilot svc logs --previous")), }, - "render collapse if task reasons are same": { + "render collapse taskids if task reasons are same": { inStoppedTasks: []ecs.Task{ { TaskArn: aws.String("arn:aws:ecs:us-east-2:197732814171:task/bugbash-test-Cluster-qrvEBaBlImsZ/21479dca3393490a9d95f27353186bf6"), DesiredStatus: aws.String("STOPPED"), LastStatus: aws.String("DEPROVISIONING"), StoppedReason: aws.String("Essential container in the task exited"), + StoppingAt: aws.Time(startDate.Add(20 * time.Second)), }, { TaskArn: aws.String("arn:aws:ecs:us-east-2:197732814171:task/bugbash-test-Cluster-qrvEBaBlImsZ/2243bac3ca1d4b3a8c66888348cba2e1"), DesiredStatus: aws.String("STOPPED"), LastStatus: aws.String("STOPPING"), StoppedReason: aws.String("Essential container in the task exited"), + StoppingAt: aws.Time(startDate.Add(10 * time.Second)), }, }, wantedNumLines: 11, @@ -214,8 +220,8 @@ Troubleshoot task stopped reason - [21479dca,2243bac3]: Essential container in the task exited Troubleshoot task stopped reason - 1. You can run %s to see the logs of the last Stopped Task. - 2. You can follow this article https://repost.aws/knowledge-center/ecs-task-stopped. + 1. You can run %s to see the logs of the last stopped task. + 2. You can visit this article https://repost.aws/knowledge-center/ecs-task-stopped. `, color.HighlightCode("copilot svc logs --previous")), }, "should render stopped tasks and split long stopped reasons": { @@ -225,12 +231,14 @@ Troubleshoot task stopped reason DesiredStatus: aws.String("STOPPED"), LastStatus: aws.String("DEPROVISIONING"), StoppedReason: aws.String("ELB healthcheck failed"), + StoppingAt: aws.Time(startDate.Add(20 * time.Second)), }, { TaskArn: aws.String("arn:aws:ecs:us-east-2:197732814171:task/bugbash-test-Cluster-qrvEBaBlImsZ/2243bac3ca1d4b3a8c66888348cba2e1"), DesiredStatus: aws.String("STOPPED"), LastStatus: aws.String("STOPPING"), StoppedReason: aws.String("ResourceInitializationError: unable to pull secrets or registry auth: execution resource retrieval failed: unable to retrieve secrets from ssm: service call has been retried 1 time(s)"), + StoppingAt: aws.Time(startDate.Add(10 * time.Second)), }, }, wantedNumLines: 14, @@ -246,8 +254,8 @@ Troubleshoot task stopped reason crets from ssm: service call has been retried 1 time(s) Troubleshoot task stopped reason - 1. You can run %s to see the logs of the last Stopped Task. - 2. You can follow this article https://repost.aws/knowledge-center/ecs-task-stopped. + 1. You can run %s to see the logs of the last stopped task. + 2. You can visit this article https://repost.aws/knowledge-center/ecs-task-stopped. `, color.HighlightCode("copilot svc logs --previous")), }, } From 7d7078acab269030712b7654173f03cb990304ea Mon Sep 17 00:00:00 2001 From: Adithya Kolla Date: Thu, 7 Sep 2023 09:22:16 -0700 Subject: [PATCH 14/15] add testcase --- internal/pkg/aws/ecs/task_test.go | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/internal/pkg/aws/ecs/task_test.go b/internal/pkg/aws/ecs/task_test.go index ab743879ab4..3bbd75e46f2 100644 --- a/internal/pkg/aws/ecs/task_test.go +++ b/internal/pkg/aws/ecs/task_test.go @@ -633,10 +633,14 @@ func TestShortTaskID(t *testing.T) { inTaskId string wantedTaskId string }{ - "should return short task id": { + "return truncated short task id": { inTaskId: "37930fffc2104a1db455aef109b5d122", wantedTaskId: "37930fff", }, + "return given short taskid": { + inTaskId: "37930fff", + wantedTaskId: "37930fff", + }, } for name, tc := range testCases { @@ -644,9 +648,9 @@ func TestShortTaskID(t *testing.T) { // GIVEN ctrl := gomock.NewController(t) defer ctrl.Finish() - + // WHEN got := ShortTaskID(tc.inTaskId) - + // THEN require.Equal(t, tc.wantedTaskId, got) }) From e287829ec6865d1260b84bc63ae766a3611172e5 Mon Sep 17 00:00:00 2001 From: Adithya Kolla Date: Fri, 8 Sep 2023 14:30:12 -0700 Subject: [PATCH 15/15] addr @Lou1415926 fb --- internal/pkg/term/progress/ecs.go | 36 +++++++++++++++---------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/internal/pkg/term/progress/ecs.go b/internal/pkg/term/progress/ecs.go index 9bb583e458a..ea51f30bb5e 100644 --- a/internal/pkg/term/progress/ecs.go +++ b/internal/pkg/term/progress/ecs.go @@ -200,11 +200,6 @@ func (c *rollingUpdateComponent) renderAlarms(out io.Writer) (numLines int, err return renderComponents(out, components) } -type stoppedTasksInfo struct { - ids []string - latestStoppingAt time.Time -} - func (c *rollingUpdateComponent) renderStoppedTasks(out io.Writer) (numLines int, err error) { if len(c.stoppedTasks) == 0 { return 0, nil @@ -220,20 +215,23 @@ func (c *rollingUpdateComponent) renderStoppedTasks(out io.Writer) (numLines int Padding: c.padding, }, } - - taskInfoMap := make(map[string]stoppedTasksInfo, len(c.stoppedTasks)) + type stoppedTasksInfo struct { + ids []string + latestStoppingAt time.Time + } + stopReason2Tasks := make(map[string]*stoppedTasksInfo, len(c.stoppedTasks)) for _, st := range c.stoppedTasks { id, err := ecs.TaskID(aws.StringValue(st.TaskArn)) if err != nil { return 0, err } - existingTaskInfo, ok := taskInfoMap[aws.StringValue(st.StoppedReason)] + tasks, ok := stopReason2Tasks[aws.StringValue(st.StoppedReason)] if ok { - existingTaskInfo.ids = append(existingTaskInfo.ids, ecs.ShortTaskID(id)) - existingTaskInfo.latestStoppingAt = aws.TimeValue(st.StoppingAt) - taskInfoMap[aws.StringValue(st.StoppedReason)] = existingTaskInfo + tasks.ids = append(tasks.ids, ecs.ShortTaskID(id)) + tasks.latestStoppingAt = aws.TimeValue(st.StoppingAt) + stopReason2Tasks[aws.StringValue(st.StoppedReason)] = tasks } else { - taskInfoMap[aws.StringValue(st.StoppedReason)] = stoppedTasksInfo{ + stopReason2Tasks[aws.StringValue(st.StoppedReason)] = &stoppedTasksInfo{ ids: []string{ecs.ShortTaskID(id)}, latestStoppingAt: aws.TimeValue(st.StoppingAt), } @@ -244,15 +242,15 @@ func (c *rollingUpdateComponent) renderStoppedTasks(out io.Writer) (numLines int aws.StringValue(st.DesiredStatus), }) } - var sortReasons []string - for reason := range taskInfoMap { - sortReasons = append(sortReasons, reason) + var sortedReasons []string + for reason := range stopReason2Tasks { + sortedReasons = append(sortedReasons, reason) } - sort.SliceStable(sortReasons, func(i, j int) bool { - return taskInfoMap[sortReasons[i]].latestStoppingAt.After(taskInfoMap[sortReasons[j]].latestStoppingAt) + sort.SliceStable(sortedReasons, func(i, j int) bool { + return stopReason2Tasks[sortedReasons[i]].latestStoppingAt.After(stopReason2Tasks[sortedReasons[j]].latestStoppingAt) }) - for _, reason := range sortReasons { - for i, truncatedReason := range splitByLength(fmt.Sprintf("[%s]: %s", strings.Join(taskInfoMap[reason].ids, ","), reason), maxCellLength) { + for _, reason := range sortedReasons { + for i, truncatedReason := range splitByLength(fmt.Sprintf("[%s]: %s", strings.Join(stopReason2Tasks[reason].ids, ","), reason), maxCellLength) { pretty := fmt.Sprintf(" %s", truncatedReason) if i == 0 { pretty = fmt.Sprintf("- %s", truncatedReason)