Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 47 additions & 1 deletion cmd/claw-api/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ type dispatchOptions struct {
ignoreDegraded bool
}

const defaultWakeExecTimeout = 30 * time.Second
const openclawWakeExecTimeout = 2 * time.Minute

func newScheduler(manifest *schedulepkg.Manifest, docker *client.Client, state *scheduleStateStore, log io.Writer) (*scheduler, error) {
if manifest == nil || len(manifest.Invocations) == 0 {
return nil, nil
Expand Down Expand Up @@ -229,7 +232,15 @@ func (s *scheduler) dispatchWithOptions(ctx context.Context, entry *scheduledInv
return result
}

execCtx, cancel := context.WithTimeout(ctx, 30*time.Second)
if detail, skip := s.deferWakeForHealth(ctx, target.ID, entry.manifest.Wake.Adapter); skip {
result.status = "skipped"
result.detail = detail
result.skipped = true
s.logf("schedule %s: skipped (%s)", entry.manifest.ID, result.detail)
return result
}

execCtx, cancel := context.WithTimeout(ctx, wakeExecTimeout(entry.manifest.Wake.Adapter))
defer cancel()
stdout, stderr, exitCode, err := shared.ExecInContainer(execCtx, s.docker, target.ID, entry.manifest.Wake.Command)
if err != nil {
Expand Down Expand Up @@ -275,6 +286,41 @@ func (s *scheduler) dispatchWithOptions(ctx context.Context, entry *scheduledInv
return result
}

func wakeExecTimeout(adapter string) time.Duration {
switch strings.TrimSpace(adapter) {
case "openclaw-exec":
return openclawWakeExecTimeout
default:
return defaultWakeExecTimeout
}
}

func deferWakeForHealthStatus(adapter string, state *types.ContainerState) (string, bool) {
if strings.TrimSpace(adapter) != "openclaw-exec" || state == nil || state.Health == nil {
return "", false
}
status := strings.ToLower(strings.TrimSpace(state.Health.Status))
switch status {
case "", "healthy":
return "", false
case "starting":
return "target-health-starting", true
default:
return "target-health-" + status, true
}
}

func (s *scheduler) deferWakeForHealth(ctx context.Context, containerID, adapter string) (string, bool) {
if s == nil || s.docker == nil {
return "", false
}
info, err := s.docker.ContainerInspect(ctx, containerID)
if err != nil {
return "", false
}
return deferWakeForHealthStatus(adapter, info.State)
}

func (s *scheduler) lookupTargetContainer(ctx context.Context, target string) (types.Container, error) {
if s == nil || s.docker == nil {
return types.Container{}, fmt.Errorf("docker client unavailable")
Expand Down
47 changes: 47 additions & 0 deletions cmd/claw-api/scheduler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ package main
import (
"testing"
"time"

"github.com/docker/docker/api/types"
)

func TestNextSchedulerDelayAlignsToMinuteBoundary(t *testing.T) {
Expand Down Expand Up @@ -32,3 +34,48 @@ func TestShouldAttemptDegradedThrottlesToRoughlyTenPercent(t *testing.T) {
t.Fatalf("expected roughly 10%% allowed, got %d/%d", allowed, total)
}
}

func TestWakeExecTimeoutUsesOpenClawBudget(t *testing.T) {
if got := wakeExecTimeout("openclaw-exec"); got != openclawWakeExecTimeout {
t.Fatalf("expected openclaw wake timeout %v, got %v", openclawWakeExecTimeout, got)
}
if got := wakeExecTimeout("hermes-exec"); got != defaultWakeExecTimeout {
t.Fatalf("expected default wake timeout %v, got %v", defaultWakeExecTimeout, got)
}
}

func TestDeferWakeForHealthStatusRequiresHealthyOpenClawTarget(t *testing.T) {
t.Run("healthy openclaw proceeds", func(t *testing.T) {
if detail, skip := deferWakeForHealthStatus("openclaw-exec", &types.ContainerState{
Health: &types.Health{Status: "healthy"},
}); skip || detail != "" {
t.Fatalf("expected healthy openclaw target to proceed, got detail=%q skip=%v", detail, skip)
}
})

t.Run("starting openclaw defers", func(t *testing.T) {
detail, skip := deferWakeForHealthStatus("openclaw-exec", &types.ContainerState{
Health: &types.Health{Status: "starting"},
})
if !skip || detail != "target-health-starting" {
t.Fatalf("expected starting openclaw target to defer, got detail=%q skip=%v", detail, skip)
}
})

t.Run("unhealthy openclaw defers", func(t *testing.T) {
detail, skip := deferWakeForHealthStatus("openclaw-exec", &types.ContainerState{
Health: &types.Health{Status: "unhealthy"},
})
if !skip || detail != "target-health-unhealthy" {
t.Fatalf("expected unhealthy openclaw target to defer, got detail=%q skip=%v", detail, skip)
}
})

t.Run("non-openclaw adapter ignores health", func(t *testing.T) {
if detail, skip := deferWakeForHealthStatus("hermes-exec", &types.ContainerState{
Health: &types.Health{Status: "starting"},
}); skip || detail != "" {
t.Fatalf("expected non-openclaw adapter to ignore health deferral, got detail=%q skip=%v", detail, skip)
}
})
}
1 change: 1 addition & 0 deletions site/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ outline: deep
## Unreleased

- **Fix: OpenClaw scheduled jobs are materialized under the canonical cron store again** ([#159](https://github.com/mostlydev/clawdapus/issues/159)) — the OpenClaw driver now mounts a writable `~/.openclaw/cron/` directory and writes `jobs.json` there instead of under the config directory. Current OpenClaw builds resolve cron definitions from `~/.openclaw/cron/jobs.json`, so the previous layout left `openclaw cron list` empty and `openclaw cron run <id>` failed against jobs Clawdapus thought it had compiled. `claw up` now emits the native store where OpenClaw actually reads it, preserves the dedicated cron directory mount, and keeps pod-origin wakes targeting the runner-native `openclaw cron run <id>` contract.
- **Fix: OpenClaw scheduler wakes no longer burn failures during startup lag** ([#160](https://github.com/mostlydev/clawdapus/issues/160)) — `claw-api` now treats OpenClaw wakes as adapter-aware operations instead of generic 30 second execs. The scheduler defers `openclaw-exec` wakes while Docker health is still `starting` or `unhealthy`, so boot lag stops being recorded as a failed fire, and OpenClaw wakes now get a longer exec budget before being marked as timed out. This reduces false degradation on desks where the runner is still coming up even though the schedule is otherwise valid.

## v0.8.11 <Badge type="tip" text="Latest" /> {#v0-8-11}

Expand Down
Loading