From 5fb60750e6c1d9a928f5e552259dd0e4ca1bc2e8 Mon Sep 17 00:00:00 2001 From: Ivan Petrukhin Date: Mon, 22 Dec 2025 18:40:28 +0300 Subject: [PATCH 1/5] feat: hard time limit for submissions --- Exesh/internal/runtime/docker/runtime.go | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/Exesh/internal/runtime/docker/runtime.go b/Exesh/internal/runtime/docker/runtime.go index 4ee237d4..a4244991 100644 --- a/Exesh/internal/runtime/docker/runtime.go +++ b/Exesh/internal/runtime/docker/runtime.go @@ -4,14 +4,14 @@ import ( "archive/tar" "bytes" "context" + "errors" + "exesh/internal/runtime" "fmt" "io" "os" "path/filepath" "time" - "exesh/internal/runtime" - "github.com/docker/docker/api/types/container" "github.com/docker/docker/api/types/network" "github.com/docker/docker/client" @@ -144,7 +144,7 @@ func (dr *Runtime) Execute(ctx context.Context, cmd []string, params runtime.Exe if params.Stdin != nil { go func(r io.Reader) { _, _ = io.Copy(hjr.Conn, params.Stdin) - // BUG: why the fuck does this "CloseWrite" cause stdout to be empty + // BUG: why does this "CloseWrite" cause stdout to be empty // hjr.CloseWrite() }(params.Stdin) } @@ -154,9 +154,13 @@ func (dr *Runtime) Execute(ctx context.Context, cmd []string, params runtime.Exe return fmt.Errorf("start container: %w", err) } + // force larger deadline because the submission may just hang waiting for input + ctxTimeout, cancel := context.WithTimeout(ctx, 10*time.Duration(params.Limits.Time)) + defer cancel() + var insp container.InspectResponse for { - insp, err = dr.client.ContainerInspect(ctx, cr.ID) + insp, err = dr.client.ContainerInspect(ctxTimeout, cr.ID) if err != nil { return fmt.Errorf("inspect container: %w", err) } @@ -164,6 +168,15 @@ func (dr *Runtime) Execute(ctx context.Context, cmd []string, params runtime.Exe if !insp.State.Running { break } + + select { + case <-ctxTimeout.Done(): + if errors.Is(ctxTimeout.Err(), context.DeadlineExceeded) { + return runtime.ErrTimeout + } + return ctx.Err() + case <-time.After(1 * time.Second): + } } if insp.State.ExitCode == 137 { From c4cf45ba800d57e9b776c745524ffcdecf36815c Mon Sep 17 00:00:00 2001 From: Ivan Petrukhin Date: Mon, 22 Dec 2025 18:49:47 +0300 Subject: [PATCH 2/5] fix: hard limit for no time limit, and fix example --- Exesh/example/.gitignore | 3 +++ Exesh/example/main.go | 5 ++++- Exesh/internal/runtime/docker/runtime.go | 6 +++++- 3 files changed, 12 insertions(+), 2 deletions(-) create mode 100644 Exesh/example/.gitignore diff --git a/Exesh/example/.gitignore b/Exesh/example/.gitignore new file mode 100644 index 00000000..ff966fb8 --- /dev/null +++ b/Exesh/example/.gitignore @@ -0,0 +1,3 @@ +a.checker.out +a.out +out.txt diff --git a/Exesh/example/main.go b/Exesh/example/main.go index 90cac8cc..f4123871 100644 --- a/Exesh/example/main.go +++ b/Exesh/example/main.go @@ -65,6 +65,10 @@ func (dp *dummyOutputProvider) Locate(ctx context.Context, out execution.Output) return out.GetFile(), func() {}, nil } +func (dp *dummyOutputProvider) Reserve(ctx context.Context, out execution.Output) (path string, unlock func() error, smth func() error, err error) { + return out.GetFile(), func() error { return nil }, func() error { return nil }, nil +} + func (dp *dummyOutputProvider) Read(ctx context.Context, out execution.Output) (r io.Reader, unlock func(), err error) { unlock = func() {} f, err := os.OpenFile(out.GetFile(), os.O_RDONLY, 0o755) @@ -124,7 +128,6 @@ func main() { runJobId, inputs.NewArtifactInput("a.checker.out", checkJobId, workerID), inputs.NewArtifactInput("correct.txt", checkJobId, workerID), inputs.NewArtifactInput("out.txt", checkJobId, workerID), - outputs.NewArtifactOutput("verdict.txt", checkJobId), ))) fmt.Printf("check: %#v\n", checkResult) } diff --git a/Exesh/internal/runtime/docker/runtime.go b/Exesh/internal/runtime/docker/runtime.go index a4244991..9aa56422 100644 --- a/Exesh/internal/runtime/docker/runtime.go +++ b/Exesh/internal/runtime/docker/runtime.go @@ -155,7 +155,11 @@ func (dr *Runtime) Execute(ctx context.Context, cmd []string, params runtime.Exe } // force larger deadline because the submission may just hang waiting for input - ctxTimeout, cancel := context.WithTimeout(ctx, 10*time.Duration(params.Limits.Time)) + timeout := 30 * time.Second + if params.Limits.Time != 0 { + timeout = 10 * time.Duration(params.Limits.Time) + } + ctxTimeout, cancel := context.WithTimeout(ctx, timeout) defer cancel() var insp container.InspectResponse From 819511b738d04bc6abeda46693f7357e3a68762f Mon Sep 17 00:00:00 2001 From: Ivan Petrukhin Date: Mon, 22 Dec 2025 19:49:44 +0300 Subject: [PATCH 3/5] fix: close after writing stdin to container --- Exesh/example/in.txt | 1 - Exesh/internal/runtime/docker/runtime.go | 9 +++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Exesh/example/in.txt b/Exesh/example/in.txt index ccb07bfe..8d04f961 100644 --- a/Exesh/example/in.txt +++ b/Exesh/example/in.txt @@ -1,2 +1 @@ 1 2 - diff --git a/Exesh/internal/runtime/docker/runtime.go b/Exesh/internal/runtime/docker/runtime.go index 9aa56422..7ab4f639 100644 --- a/Exesh/internal/runtime/docker/runtime.go +++ b/Exesh/internal/runtime/docker/runtime.go @@ -5,13 +5,14 @@ import ( "bytes" "context" "errors" - "exesh/internal/runtime" "fmt" "io" "os" "path/filepath" "time" + "exesh/internal/runtime" + "github.com/docker/docker/api/types/container" "github.com/docker/docker/api/types/network" "github.com/docker/docker/client" @@ -76,8 +77,9 @@ func (dr *Runtime) Execute(ctx context.Context, cmd []string, params runtime.Exe cpuPolicy(int64(params.Limits.Time) / int64(time.Second))(hostConfig) memoryPolicy(int64(params.Limits.Memory))(hostConfig) + // we do not know why, but without StdinOnce, without CloseWrite stdin is not closed, and with it - stdout is empty cr, err := dr.client.ContainerCreate(ctx, - &container.Config{Image: dr.baseImage, Cmd: cmd, OpenStdin: true}, + &container.Config{Image: dr.baseImage, Cmd: cmd, OpenStdin: true, StdinOnce: true}, hostConfig, &network.NetworkingConfig{}, &v1.Platform{OS: "linux", Architecture: "amd64"}, @@ -144,8 +146,7 @@ func (dr *Runtime) Execute(ctx context.Context, cmd []string, params runtime.Exe if params.Stdin != nil { go func(r io.Reader) { _, _ = io.Copy(hjr.Conn, params.Stdin) - // BUG: why does this "CloseWrite" cause stdout to be empty - // hjr.CloseWrite() + hjr.CloseWrite() }(params.Stdin) } From 3164862d4d1725d4fdd1316b5d4fc790c37e19f1 Mon Sep 17 00:00:00 2001 From: Ivan Petrukhin Date: Tue, 23 Dec 2025 23:47:34 +0300 Subject: [PATCH 4/5] chore: set different timeouts --- Exesh/internal/runtime/docker/runtime.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Exesh/internal/runtime/docker/runtime.go b/Exesh/internal/runtime/docker/runtime.go index 7ab4f639..d41e2577 100644 --- a/Exesh/internal/runtime/docker/runtime.go +++ b/Exesh/internal/runtime/docker/runtime.go @@ -5,14 +5,13 @@ import ( "bytes" "context" "errors" + "exesh/internal/runtime" "fmt" "io" "os" "path/filepath" "time" - "exesh/internal/runtime" - "github.com/docker/docker/api/types/container" "github.com/docker/docker/api/types/network" "github.com/docker/docker/client" @@ -156,9 +155,9 @@ func (dr *Runtime) Execute(ctx context.Context, cmd []string, params runtime.Exe } // force larger deadline because the submission may just hang waiting for input - timeout := 30 * time.Second + timeout := 10 * time.Second if params.Limits.Time != 0 { - timeout = 10 * time.Duration(params.Limits.Time) + timeout = 5 * time.Duration(params.Limits.Time) } ctxTimeout, cancel := context.WithTimeout(ctx, timeout) defer cancel() From f8cc617cac17e3548acb1642cfcb142872f1248b Mon Sep 17 00:00:00 2001 From: Ivan Petrukhin Date: Tue, 23 Dec 2025 23:50:26 +0300 Subject: [PATCH 5/5] chore: bump execution_retry_after --- Exesh/config/coordinator/dev.yml | 4 ++-- Exesh/config/coordinator/docker.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Exesh/config/coordinator/dev.yml b/Exesh/config/coordinator/dev.yml index 3b6a2e06..4b5db66f 100644 --- a/Exesh/config/coordinator/dev.yml +++ b/Exesh/config/coordinator/dev.yml @@ -22,7 +22,7 @@ job_factory: execution_scheduler: executions_interval: 5s max_concurrency: 10 - execution_retry_after: 15s + execution_retry_after: 300s worker_pool: worker_die_after: 10s artifact_registry: @@ -30,4 +30,4 @@ artifact_registry: sender: brokers: - 0.0.0.0:29092 - topic: exesh.step-updates \ No newline at end of file + topic: exesh.step-updates diff --git a/Exesh/config/coordinator/docker.yml b/Exesh/config/coordinator/docker.yml index 4f7eed68..6ab4802d 100644 --- a/Exesh/config/coordinator/docker.yml +++ b/Exesh/config/coordinator/docker.yml @@ -23,7 +23,7 @@ job_factory: execution_scheduler: executions_interval: 3s max_concurrency: 10 - execution_retry_after: 15s + execution_retry_after: 300s worker_pool: worker_die_after: 10s artifact_registry: