Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion src/scriptworker/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -636,7 +636,7 @@ async def run_task(context, to_cancellable_process):
to_cancellable_process (types.Callable): tracks the process so that it can be stopped if the worker is shut down

Returns:
int: 1 on failure, 0 on success
int: >= 1 on failure, 0 on success

"""
env = deepcopy(os.environ)
Expand Down Expand Up @@ -668,6 +668,8 @@ async def run_task(context, to_cancellable_process):
status_line = "exit code: {}".format(exitcode)
if exitcode < 0:
status_line = "Automation Error: python exited with signal {}".format(exitcode)
# we must return a value > 0 to signal an error
exitcode = 1
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As far as I can tell this bug was introduced by #480, where we started returning exitcode directly instead of 0/1. Looking at that change it seems the intent might be to return 256 + exitcode instead, maybe, so that the signal number can be looked up in reversed_statuses (with 241 and 245 for SIGTERM and SIGSEGV)?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After some more digging, the context in bug 1654250 is that we wanted to propagate "some tool (i.e. subprocess) called by iscript segfaults" into "task status is intermittent-failure". I'm not convinced that ever worked, but in any case it should be fine, when the task script itself (not a command it ran) is killed by a signal, to return 1 (failure).

log.info(status_line)
print(status_line, file=log_filehandle)
stopped_due_to_worker_shutdown = context.proc.stopped_due_to_worker_shutdown
Expand Down
1 change: 1 addition & 0 deletions tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
}

TIMEOUT_SCRIPT = os.path.join(os.path.dirname(__file__), "data", "long_running.py")
KILLED_SCRIPT = os.path.join(os.path.dirname(__file__), "data", "killed.py")
AT_LEAST_PY38 = sys.version_info >= (3, 8)


Expand Down
6 changes: 6 additions & 0 deletions tests/data/killed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/usr/bin/python3

import os
import signal

os.kill(0, signal.SIGKILL)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm curious, wouldn't this kill the process on import? How does this work?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not imported, it's executed by the test (we point task_script at it).

23 changes: 22 additions & 1 deletion tests/test_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from scriptworker.exceptions import ScriptWorkerException, WorkerShutdownDuringTask
from scriptworker.worker import RunTasks, do_run_task

from . import AT_LEAST_PY38, TIMEOUT_SCRIPT, create_async, create_finished_future, create_slow_async, create_sync, noop_async, noop_sync
from . import AT_LEAST_PY38, KILLED_SCRIPT, TIMEOUT_SCRIPT, create_async, create_finished_future, create_slow_async, create_sync, noop_async, noop_sync


# constants helpers and fixtures {{{1
Expand Down Expand Up @@ -266,6 +266,27 @@ async def claim_work(*args, **kwargs):
assert status == context.config["task_max_timeout_status"]


@pytest.mark.asyncio
async def test_run_tasks_killed(context, successful_queue, mocker):
temp_dir = os.path.join(context.config["work_dir"], "killed")
task = {"foo": "bar", "credentials": {"a": "b"}, "task": {"task_defn": True}}
context.config["task_script"] = (sys.executable, KILLED_SCRIPT, temp_dir)
context.config["task_max_timeout"] = 1
context.queue = successful_queue

async def claim_work(*args, **kwargs):
return {"tasks": [task]}

mocker.patch.object(worker, "claim_work", new=claim_work)
mocker.patch.object(worker, "reclaim_task", new=noop_async)
mocker.patch.object(worker, "generate_cot", new=noop_sync)
mocker.patch.object(worker, "prepare_to_run_task", new=noop_sync)
mocker.patch.object(worker, "upload_artifacts", new=noop_async)
mocker.patch.object(worker, "complete_task", new=noop_async)
status = await worker.run_tasks(context)
assert status == STATUSES["failure"]


_MOCK_CLAIM_WORK_RETURN = {
"tasks": [
{
Expand Down