Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ func (a *Agent) UpdateTaskStatus(ctx context.Context, taskID string, status *api
err = nil // dispatcher no longer cares about this task.
} else {
log.G(ctx).WithError(err).Error("closing session after fatal error")
session.close()
session.errs <- err
}
} else {
log.G(ctx).Debug("task status reported")
Expand Down
26 changes: 24 additions & 2 deletions agent/reporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,15 @@ package agent
import (
"reflect"
"sync"
"time"

"github.com/docker/swarmkit/api"
"github.com/docker/swarmkit/log"
"golang.org/x/net/context"
)

const updateDelay = 4 * time.Second

// StatusReporter receives updates to task status. Method may be called
// concurrently, so implementations should be goroutine-safe.
type StatusReporter interface {
Expand Down Expand Up @@ -92,10 +95,26 @@ func (sr *statusReporter) run(ctx context.Context) {
}()

for {
if len(sr.statuses) == 0 {
sr.cond.Wait()
exitCh := make(chan struct{})

if len(sr.statuses) != 0 {
// if it's retry, then wait some time if there is no new updates reported
// it helps to keep retry loop less tight on update failures
go func() {
after := time.NewTimer(updateDelay)
defer after.Stop()
select {
case <-after.C:
sr.cond.Signal()
case <-exitCh:
}
}()
}

sr.cond.Wait()
// exit timer goroutine if there was one
close(exitCh)

if sr.closed {
// TODO(stevvooe): Add support here for waiting until all
// statuses are flushed before shutting down.
Expand Down Expand Up @@ -123,6 +142,9 @@ func (sr *statusReporter) run(ctx context.Context) {
if _, ok := sr.statuses[taskID]; !ok {
sr.statuses[taskID] = status
}
// if task update failed - it's better to give some time to agent
// to recover from possible error before try to update other tasks
break
}
}
}
Expand Down