-
-
Notifications
You must be signed in to change notification settings - Fork 748
Queue up non-rootish tasks if they break priority ordering #7526
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Closed
Changes from all commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
69a3dac
What happens if rootish is static?
fjetter 4681348
move dynamic logic into decide_worker_rootish_queuing_disabled
fjetter ca99694
Add an objective function for queuing
fjetter e65cc87
Introduce objective function again
fjetter ee4c4c4
Use fan-out rootish definition
fjetter b21ccdc
Queue up non-rootish tasks if they break priority ordering
fjetter 7f6e80c
remove assert that is no longer valid
fjetter File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1096,6 +1096,29 @@ def _to_dict_no_nest(self, *, exclude: Container[str] = ()) -> dict[str, Any]: | |
| """ | ||
| return recursive_to_dict(self, exclude=exclude, members=True) | ||
|
|
||
| @property | ||
| def rootish(self): | ||
| """ | ||
| Whether this ``TaskGroup`` is root or root-like. | ||
|
|
||
| Root-ish tasks are part of a group that's typically considered to be at | ||
| the root or near the root of the graph and we expect it to be | ||
| responsible for the majority of data production. | ||
|
|
||
| Similar fan-out like patterns can also be found in intermediate graph | ||
| layers. | ||
|
|
||
| Most scheduler heuristics should be using | ||
| `Scheduler.is_rootish_no_restrictions` if they need to guarantee that a | ||
| task doesn't have any restrictions and can be run anywhere | ||
| """ | ||
| return ( | ||
| len(self.dependencies) < 5 | ||
| and (ndeps := sum(map(len, self.dependencies))) < 5 | ||
| # Fan-out | ||
| and (len(self) / ndeps > 2 if ndeps else True) | ||
| ) | ||
|
|
||
|
|
||
| class TaskState: | ||
| """A simple object holding information about a task. | ||
|
|
@@ -2039,6 +2062,7 @@ def decide_worker_rootish_queuing_disabled( | |
| """ | ||
| if self.validate: | ||
| # See root-ish-ness note below in `decide_worker_rootish_queuing_enabled` | ||
| assert self._is_rootish_no_restrictions(ts) | ||
| assert math.isinf(self.WORKER_SATURATION) | ||
|
|
||
| pool = self.idle.values() if self.idle else self.running | ||
|
|
@@ -2052,6 +2076,7 @@ def decide_worker_rootish_queuing_disabled( | |
| and tg.last_worker_tasks_left | ||
| and lws.status == Status.running | ||
| and self.workers.get(lws.address) is lws | ||
| and len(tg) > self.total_nthreads * 2 | ||
| ): | ||
| ws = lws | ||
| else: | ||
|
|
@@ -2074,7 +2099,16 @@ def decide_worker_rootish_queuing_disabled( | |
|
|
||
| return ws | ||
|
|
||
| def decide_worker_rootish_queuing_enabled(self) -> WorkerState | None: | ||
| def worker_objective_rootish_queuing(self, ws, ts): | ||
| # FIXME: This is basically the ordinary worker_objective but with task | ||
| # counts instead of occupancy. | ||
| comm_bytes = sum( | ||
| dts.get_nbytes() for dts in ts.dependencies if ws not in dts.who_has | ||
| ) | ||
| # See test_nbytes_determines_worker | ||
| return (len(ws.processing) / ws.nthreads, comm_bytes, ws.nbytes) | ||
|
|
||
| def decide_worker_rootish_queuing_enabled(self, ts) -> WorkerState | None: | ||
| """Pick a worker for a runnable root-ish task, if not all are busy. | ||
|
|
||
| Picks the least-busy worker out of the ``idle`` workers (idle workers have fewer | ||
|
|
@@ -2114,7 +2148,7 @@ def decide_worker_rootish_queuing_enabled(self) -> WorkerState | None: | |
| # NOTE: this will lead to worst-case scheduling with regards to co-assignment. | ||
| ws = min( | ||
| self.idle_task_count, | ||
| key=lambda ws: len(ws.processing) / ws.nthreads, | ||
| key=partial(self.worker_objective_rootish_queuing, ts=ts), | ||
| ) | ||
| if self.validate: | ||
| assert not _worker_full(ws, self.WORKER_SATURATION), ( | ||
|
|
@@ -2206,7 +2240,7 @@ def transition_waiting_processing(self, key: str, stimulus_id: str) -> RecsMsgs: | |
| """ | ||
| ts = self.tasks[key] | ||
|
|
||
| if self.is_rootish(ts): | ||
| if self._is_rootish_no_restrictions(ts): | ||
| # NOTE: having two root-ish methods is temporary. When the feature flag is | ||
| # removed, there should only be one, which combines co-assignment and | ||
| # queuing. Eventually, special-casing root tasks might be removed entirely, | ||
|
|
@@ -2215,9 +2249,25 @@ def transition_waiting_processing(self, key: str, stimulus_id: str) -> RecsMsgs: | |
| if not (ws := self.decide_worker_rootish_queuing_disabled(ts)): | ||
| return {ts.key: "no-worker"}, {}, {} | ||
| else: | ||
| if not (ws := self.decide_worker_rootish_queuing_enabled()): | ||
| if not (ws := self.decide_worker_rootish_queuing_enabled(ts)): | ||
| return {ts.key: "queued"}, {}, {} | ||
| else: | ||
| if not math.isinf(self.WORKER_SATURATION): | ||
| # Queuing can break priority ordering, e.g. when there are | ||
| # worker restrictions. | ||
| # We need to check here if there is a more important queued task | ||
| # and send the currently transitioning task back in the line to | ||
| # avoid breadth first search | ||
| # See also https://github.com/dask/distributed/issues/7496 | ||
| slots_available = sum( | ||
| _task_slots_available(ws, self.WORKER_SATURATION) | ||
| for ws in self.idle_task_count | ||
| ) | ||
|
|
||
| for qts in self.queued.peekn(slots_available): | ||
| if qts.priority < ts.priority: | ||
| return {ts.key: "queued"}, {}, {} | ||
|
|
||
| if not (ws := self.decide_worker_non_rootish(ts)): | ||
| return {ts.key: "no-worker"}, {}, {} | ||
|
|
||
|
|
@@ -2647,7 +2697,8 @@ def transition_waiting_queued(self, key: str, stimulus_id: str) -> RecsMsgs: | |
| ts = self.tasks[key] | ||
|
|
||
| if self.validate: | ||
| assert not self.idle_task_count, (ts, self.idle_task_count) | ||
| if self._is_rootish_no_restrictions(ts): | ||
| assert not self.idle_task_count, (ts, self.idle_task_count) | ||
| self._validate_ready(ts) | ||
|
Comment on lines
+2700
to
2702
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In an earlier version I asserted that if a task is not rootish, it would have restrictions. This caused CI to fail very hard. It turns out that running non-rootish tasks with lower priority than queued tasks is not impossible |
||
|
|
||
| ts.state = "queued" | ||
|
|
@@ -2688,10 +2739,14 @@ def transition_queued_processing(self, key: str, stimulus_id: str) -> RecsMsgs: | |
| assert not ts.actor, f"Actors can't be queued: {ts}" | ||
| assert ts in self.queued | ||
|
|
||
| if ws := self.decide_worker_rootish_queuing_enabled(): | ||
| self.queued.discard(ts) | ||
| worker_msgs = self._add_to_processing(ts, ws) | ||
| # If no worker, task just stays `queued` | ||
| if self._is_rootish_no_restrictions(ts): | ||
| if not (ws := self.decide_worker_rootish_queuing_enabled(ts)): | ||
| return {}, {}, {} | ||
| self.queued.discard(ts) | ||
| if not (ws := self.decide_worker_non_rootish(ts)): | ||
| return {ts.key: "no-worker"}, {}, {} | ||
| worker_msgs = self._add_to_processing(ts, ws) | ||
|
|
||
| return recommendations, {}, worker_msgs | ||
|
|
||
|
|
@@ -2812,22 +2867,16 @@ def story(self, *keys_or_tasks_or_stimuli: str | TaskState) -> list[Transition]: | |
| # Assigning Tasks to Workers # | ||
| ############################## | ||
|
|
||
| def is_rootish(self, ts: TaskState) -> bool: | ||
| """ | ||
| Whether ``ts`` is a root or root-like task. | ||
|
|
||
| Root-ish tasks are part of a group that's much larger than the cluster, | ||
| and have few or no dependencies. | ||
| """ | ||
| if ts.resource_restrictions or ts.worker_restrictions or ts.host_restrictions: | ||
| def _is_rootish_no_restrictions(self, ts: TaskState) -> bool: | ||
| """See also ``TaskGroup.rootish``""" | ||
| if ( | ||
| ts.resource_restrictions | ||
| or ts.worker_restrictions | ||
| or ts.host_restrictions | ||
| or ts.actor | ||
| ): | ||
| return False | ||
| tg = ts.group | ||
| # TODO short-circuit to True if `not ts.dependencies`? | ||
| return ( | ||
| len(tg) > self.total_nthreads * 2 | ||
| and len(tg.dependencies) < 5 | ||
| and sum(map(len, tg.dependencies)) < 5 | ||
| ) | ||
| return ts.group.rootish | ||
|
|
||
| def check_idle_saturated(self, ws: WorkerState, occ: float = -1.0) -> None: | ||
| """Update the status of the idle and saturated state | ||
|
|
@@ -5009,7 +5058,7 @@ def validate_queued(self, key): | |
| assert not ts.processing_on | ||
| assert not ( | ||
| ts.worker_restrictions or ts.host_restrictions or ts.resource_restrictions | ||
| ) | ||
| ) or not self.is_rootish(ts) | ||
| for dts in ts.dependencies: | ||
| assert dts.who_has | ||
| assert ts in dts.waiters | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.