-
-
Notifications
You must be signed in to change notification settings - Fork 748
Closed
Labels
flaky testIntermittent failures on CI.Intermittent failures on CI.
Description
____________________________ test_separate_key_cert ____________________________
addr = 'tls://127.0.0.1:50325', timeout = 5, deserialize = True
handshake_overrides = None
connection_args = {'extra_conn_args': {}, 'require_encryption': False, 'ssl_context': <ssl.SSLContext object at 0x7fee73cae840>}
scheme = 'tls', loc = '127.0.0.1:50325'
backend = <distributed.comm.tcp.TLSBackend object at 0x7fee6b7b1b80>
connector = <distributed.comm.tcp.TLSConnector object at 0x7fee730d5460>
comm = None, time_left = <function connect.<locals>.time_left at 0x7fee73cba9d0>
backoff_base = 0.01
asyncdefconnect(
addr, timeout=None, deserialize=True, handshake_overrides=None, **connection_args
):
"""
Connect to the given address (a URI such as ``tcp://127.0.0.1:1234``)
and yield a ``Comm`` object. If the connection attempt fails, it is
retried until the *timeout* is expired.
"""
if timeout isNone:
timeout = dask.config.get("distributed.comm.timeouts.connect")
timeout = parse_timedelta(timeout, default="seconds")
scheme, loc = parse_address(addr)
backend = registry.get_backend(scheme)
connector = backend.get_connector()
/Users/runner/miniconda3/envs/dask-distributed/lib/python3.8/site-packages/coverage/data.py:130: CoverageWarning: Data file '/Users/runner/work/distributed/distributed/.coverage.Mac-1657864285758.local.46419.052374' doesn't seem to be a coverage data file: cannot unpack non-iterable NoneType object
comm = None
data._warn(str(exc))
/Users/runner/miniconda3/envs/dask-distributed/lib/python3.8/site-packages/coverage/data.py:130: CoverageWarning: Data file '/Users/runner/work/distributed/distributed/.coverage.Mac-1657864285758.local.38336.418797' doesn't seem to be a coverage data file: cannot unpack non-iterable NoneType object
start = time()
data._warn(str(exc))
/Users/runner/miniconda3/envs/dask-distributed/lib/python3.8/site-packages/coverage/data.py:130: CoverageWarning: Data file '/Users/runner/work/distributed/distributed/.coverage.Mac-1657864285758.local.32195.531225' doesn't seem to be a coverage data file: cannot unpack non-iterable NoneType object
data._warn(str(exc))
deftime_left():
/Users/runner/miniconda3/envs/dask-distributed/lib/python3.8/site-packages/coverage/data.py:130: CoverageWarning: Data file '/Users/runner/work/distributed/distributed/.coverage.Mac-1657864285758.local.34667.922357' doesn't seem to be a coverage data file: cannot unpack non-iterable NoneType object
deadline = start + timeout
data._warn(str(exc))
returnmax(0, deadline - time())
/Users/runner/miniconda3/envs/dask-distributed/lib/python3.8/site-packages/coverage/data.py:130: CoverageWarning: Data file '/Users/runner/work/distributed/distributed/.coverage.Mac-1657864285758.local.36271.217147' doesn't seem to be a coverage data file: cannot unpack non-iterable NoneType object
data._warn(str(exc))
backoff_base = 0.01
/Users/runner/miniconda3/envs/dask-distributed/lib/python3.8/site-packages/coverage/data.py:130: CoverageWarning: Data file '/Users/runner/work/distributed/distributed/.coverage.Mac-1657864285758.local.31040.622054' doesn't seem to be a coverage data file: cannot unpack non-iterable NoneType object
attempt = 0
data._warn(str(exc))
/Users/runner/miniconda3/envs/dask-distributed/lib/python3.8/site-packages/coverage/data.py:130: CoverageWarning: Data file '/Users/runner/work/distributed/distributed/.coverage.Mac-1657864285758.local.47276.049279' doesn't seem to be a coverage data file: cannot unpack non-iterable NoneType object
# Prefer multiple small attempts than one long attempt. This should protect
data._warn(str(exc))
# primarily from DNS race conditions
/Users/runner/miniconda3/envs/dask-distributed/lib/python3.8/site-packages/coverage/data.py:130: CoverageWarning: Data file '/Users/runner/work/distributed/distributed/.coverage.Mac-1657864285758.local.32411.2[1860](https://github.com/dask/distributed/runs/7352829162?check_suite_focus=true#step:11:1861)7' doesn't seem to be a coverage data file: cannot unpack non-iterable NoneType object
# gh3104, gh4176, gh4167
data._warn(str(exc))
intermediate_cap = timeout / 5
/Users/runner/miniconda3/envs/dask-distributed/lib/python3.8/site-packages/coverage/data.py:130: CoverageWarning: Data file '/Users/runner/work/distributed/distributed/.coverage.Mac-1657864285758.local.45899.113954' doesn't seem to be a coverage data file: cannot unpack non-iterable NoneType object
active_exception = None
data._warn(str(exc))
while time_left() > 0:
/Users/runner/miniconda3/envs/dask-distributed/lib/python3.8/site-packages/coverage/data.py:130: CoverageWarning: Data file '/Users/runner/work/distributed/distributed/.coverage.Mac-1657864285758.local.33873.585300' doesn't seem to be a coverage data file: cannot unpack non-iterable NoneType object
try:
data._warn(str(exc))
> comm = await asyncio.wait_for(
/Users/runner/miniconda3/envs/dask-distributed/lib/python3.8/site-packages/coverage/data.py:130: CoverageWarning: Couldn't use data file '/Users/runner/work/distributed/distributed/.coverage.Mac-1657864285758.local.26311.985299': database disk image is malformed
connector.connect(loc, deserialize=deserialize, **connection_args),
data._warn(str(exc))
timeout=min(intermediate_cap, time_left()),
/Users/runner/miniconda3/envs/dask-distributed/lib/python3.8/site-packages/coverage/data.py:130: CoverageWarning: Couldn't use data file '/Users/runner/work/distributed/distributed/.coverage.Mac-1657864285758.local.44948.574294': database disk image is malformed
)
data._warn(str(exc))
/Users/runner/miniconda3/envs/dask-distributed/lib/python3.8/site-packages/coverage/data.py:130: CoverageWarning: Data file '/Users/runner/work/distributed/distributed/.coverage.Mac-1657864285758.local.38896.980031' doesn't seem to be a coverage data file: cannot unpack non-iterable NoneType object
distributed/comm/core.py:291:
data._warn(str(exc))
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/Users/runner/miniconda3/envs/dask-distributed/lib/python3.8/site-packages/coverage/data.py:130: CoverageWarning: Data file '/Users/runner/work/distributed/distributed/.coverage.Mac-1657864285758.local.45139.661214' doesn't seem to be a coverage data file: cannot unpack non-iterable NoneType object
data._warn(str(exc))
fut = <Task cancelled name='Task-2305' coro=<BaseTCPConnector.connect() done, defined at /Users/runner/work/distributed/distributed/distributed/comm/tcp.py:443>>
/Users/runner/miniconda3/envs/dask-distributed/lib/python3.8/site-packages/coverage/data.py:130: CoverageWarning: Data file '/Users/runner/work/distributed/distributed/.coverage.Mac-1657864285758.local.46369.603291' doesn't seem to be a coverage data file: cannot unpack non-iterable NoneType object
timeout = 0.07133984565734863
data._warn(str(exc))
/Users/runner/miniconda3/envs/dask-distributed/lib/python3.8/site-packages/coverage/data.py:130: CoverageWarning: Couldn't use data file '/Users/runner/work/distributed/distributed/.coverage.Mac-1657864285758.local.36069.799882': database disk image is malformed
asyncdefwait_for(fut, timeout, *, loop=None):
data._warn(str(exc))
"""Wait for the single Future or coroutine to complete, with timeout.
/Users/runner/miniconda3/envs/dask-distributed/lib/python3.8/site-packages/coverage/data.py:130: CoverageWarning: Data file '/Users/runner/work/distributed/distributed/.coverage.Mac-1657864285758.local.32412.158306' doesn't seem to be a coverage data file: cannot unpack non-iterable NoneType object
data._warn(str(exc))
Coroutine will be wrapped in Task.
/Users/runner/miniconda3/envs/dask-distributed/lib/python3.8/site-packages/coverage/data.py:130: CoverageWarning: Data file '/Users/runner/work/distributed/distributed/.coverage.Mac-1657864285758.local.31970.568814' doesn't seem to be a coverage data file: cannot unpack non-iterable NoneType object
data._warn(str(exc))
Returns result of the Future or coroutine. When a timeout occurs,
/Users/runner/miniconda3/envs/dask-distributed/lib/python3.8/site-packages/coverage/data.py:130: CoverageWarning: Data file '/Users/runner/work/distributed/distributed/.coverage.Mac-1657864285758.local.29199.006899' doesn't seem to be a coverage data file: cannot unpack non-iterable NoneType object
it cancels the task and raises TimeoutError. To avoid the task
data._warn(str(exc))
cancellation, wrap it in shield().
/Users/runner/miniconda3/envs/dask-distributed/lib/python3.8/site-packages/coverage/data.py:130: CoverageWarning: Data file '/Users/runner/work/distributed/distributed/.coverage.Mac-1657864285758.local.32369.451799' doesn't seem to be a coverage data file: cannot unpack non-iterable NoneType object
data._warn(str(exc))
If the wait is cancelled, the task is also cancelled.
/Users/runner/miniconda3/envs/dask-distributed/lib/python3.8/site-packages/coverage/data.py:130: CoverageWarning: Data file '/Users/runner/work/distributed/distributed/.coverage.Mac-1657864285758.local.38527.986223' doesn't seem to be a coverage data file: cannot unpack non-iterable NoneType object
data._warn(str(exc))
/Users/runner/miniconda3/envs/dask-distributed/lib/python3.8/site-packages/coverage/data.py:130: CoverageWarning: Data file '/Users/runner/work/distributed/distributed/.coverage.Mac-1657864285758.local.32153.150096' doesn't seem to be a coverage data file: cannot unpack non-iterable NoneType object
data._warn(str(exc))
/Users/runner/miniconda3/envs/dask-distributed/lib/python3.8/site-packages/coverage/data.py:130: CoverageWarning: Data file '/Users/runner/work/distributed/distributed/.coverage.Mac-1657864285758.local.38743.383876' doesn't seem to be a coverage data file: cannot unpack non-iterable NoneType object
data._warn(str(exc))
/Users/runner/miniconda3/envs/dask-distributed/lib/python3.8/site-packages/coverage/data.py:130: CoverageWarning: Data file '/Users/runner/work/distributed/distributed/.coverage.Mac-1657864285758.local.32753.456315' doesn't seem to be a coverage data file: cannot unpack non-iterable NoneType object
data._warn(str(exc))
This function is a coroutine.
"""
if loop isNone:
loop = events.get_running_loop()
else:
warnings.warn("The loop argument is deprecated since Python 3.8, "
"and scheduled for removal in Python 3.10.",
DeprecationWarning, stacklevel=2)
if timeout isNone:
returnawait fut
if timeout <= 0:
fut = ensure_future(fut, loop=loop)
if fut.done():
return fut.result()
await _cancel_and_wait(fut, loop=loop)
try:
fut.result()
except exceptions.CancelledError as exc:
raise exceptions.TimeoutError() fromexc
else:
raise exceptions.TimeoutError()
waiter = loop.create_future()
timeout_handle = loop.call_later(timeout, _release_waiter, waiter)
cb = functools.partial(_release_waiter, waiter)
fut = ensure_future(fut, loop=loop)
fut.add_done_callback(cb)
try:
# wait until the future completes or the timeout
try:
await waiter
except exceptions.CancelledError:
if fut.done():
return fut.result()
else:
fut.remove_done_callback(cb)
# We must ensure that the task is not running
# after wait_for() returns.
# See https://bugs.python.org/issue32751
await _cancel_and_wait(fut, loop=loop)
raise
if fut.done():
return fut.result()
else:
fut.remove_done_callback(cb)
# We must ensure that the task is not running
# after wait_for() returns.
# See https://bugs.python.org/issue32751
await _cancel_and_wait(fut, loop=loop)
> raise exceptions.TimeoutError()
E asyncio.exceptions.TimeoutError
../../../miniconda3/envs/dask-distributed/lib/python3.8/asyncio/tasks.py:501: TimeoutError
The above exception was the direct cause of the following exception:
loop = <tornado.platform.asyncio.AsyncIOLoop object at 0x7fee73aa2ee0>
deftest_separate_key_cert(loop):
port = open_port()
with popen(
[
"dask-scheduler",
"--no-dashboard",
f"--port={port}",
]
+ tls_args_2
) as s:
with popen(
["dask-worker", "--no-dashboard", f"tls://127.0.0.1:{port}"] + tls_args_2
) as w:
> with Client(
f"tls://127.0.0.1:{port}", loop=loop, security=tls_security()
) as c:
distributed/cli/tests/test_tls_cli.py:77:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
distributed/client.py:940: in __init__
self.start(timeout=timeout)
distributed/client.py:1098: in start
sync(self.loop, self._start, **kwargs)
distributed/utils.py:405: in sync
raise exc.with_traceback(tb)
distributed/utils.py:378: in f
result = yield future
../../../miniconda3/envs/dask-distributed/lib/python3.8/site-packages/tornado/gen.py:762: in run
value = future.result()
distributed/client.py:1178: in _start
awaitself._ensure_connected(timeout=timeout)
distributed/client.py:1241: in _ensure_connected
comm = await connect(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
addr = 'tls://127.0.0.1:50325', timeout = 5, deserialize = True
handshake_overrides = None
connection_args = {'extra_conn_args': {}, 'require_encryption': False, 'ssl_context': <ssl.SSLContext object at 0x7fee73cae840>}
scheme = 'tls', loc = '127.0.0.1:50325'
backend = <distributed.comm.tcp.TLSBackend object at 0x7fee6b7b1b80>
connector = <distributed.comm.tcp.TLSConnector object at 0x7fee730d5460>
comm = None, time_left = <function connect.<locals>.time_left at 0x7fee73cba9d0>
backoff_base = 0.01
asyncdefconnect(
addr, timeout=None, deserialize=True, handshake_overrides=None, **connection_args
):
"""
Connect to the given address (a URI such as ``tcp://127.0.0.1:1234``)
and yield a ``Comm`` object. If the connection attempt fails, it is
retried until the *timeout* is expired.
"""
if timeout isNone:
timeout = dask.config.get("distributed.comm.timeouts.connect")
timeout = parse_timedelta(timeout, default="seconds")
scheme, loc = parse_address(addr)
backend = registry.get_backend(scheme)
connector = backend.get_connector()
comm = None
start = time()
deftime_left():
deadline = start + timeout
returnmax(0, deadline - time())
backoff_base = 0.01
attempt = 0
# Prefer multiple small attempts than one long attempt. This should protect
# primarily from DNS race conditions
# gh3104, gh4176, gh4167
intermediate_cap = timeout / 5
active_exception = None
while time_left() > 0:
try:
comm = await asyncio.wait_for(
connector.connect(loc, deserialize=deserialize, **connection_args),
timeout=min(intermediate_cap, time_left()),
)
break
except FatalCommClosedError:
raise
# Note: CommClosed inherits from OSError
except (asyncio.TimeoutError, OSError) as exc:
active_exception = exc
# As descibed above, the intermediate timeout is used to distributed
# initial, bulk connect attempts homogeneously. In particular with
# the jitter upon retries we should not be worred about overloading
# any more DNS servers
intermediate_cap = timeout
# FullJitter see https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/
upper_cap = min(time_left(), backoff_base * (2**attempt))
backoff = random.uniform(0, upper_cap)
attempt += 1
logger.debug(
"Could not connect to %s, waiting for %s before retrying", loc, backoff
)
await asyncio.sleep(backoff)
else:
> raiseOSError(
f"Timed out trying to connect to {addr} after {timeout} s"
) fromactive_exception
E OSError: Timed out trying to connect to tls://127.0.0.1:50325 after 5 s
distributed/comm/core.py:317: OSError
----------------------------- Captured stderr call -----------------------------
2022-07-15 06:11:30,989 - distributed.scheduler - INFO - -----------------------------------------------
2022-07-15 06:11:31,004 - distributed.http.proxy - INFO - To route to workers diagnostics web server please install jupyter-server-proxy: python -m pip install jupyter-server-proxy
2022-07-15 06:11:31,014 - distributed.scheduler - INFO - State start
2022-07-15 06:11:31,026 - distributed.scheduler - INFO - -----------------------------------------------
2022-07-15 06:11:31,027 - distributed.scheduler - INFO - Clear task state
2022-07-15 06:11:31,028 - distributed.scheduler - INFO - Scheduler at: tls://10.213.5.31:50325
2022-07-15 06:11:31,028 - distributed.scheduler - INFO - dashboard at: :8787
2022-07-15 06:11:31,148 - distributed.nanny - INFO - Start Nanny at: 'tls://127.0.0.1:50338'
2022-07-15 06:11:32,729 - distributed.worker - INFO - Start worker at: tls://127.0.0.1:50343
2022-07-15 06:11:32,729 - distributed.worker - INFO - Listening to: tls://127.0.0.1:50343
2022-07-15 06:11:32,729 - distributed.worker - INFO - dashboard at: 127.0.0.1:50344
2022-07-15 06:11:32,729 - distributed.worker - INFO - Waiting to connect to: tls://127.0.0.1:50325
2022-07-15 06:11:32,730 - distributed.worker - INFO - -------------------------------------------------
2022-07-15 06:11:32,730 - distributed.worker - INFO - Threads: 3
2022-07-15 06:11:32,730 - distributed.worker - INFO - Memory: 14.00 GiB
2022-07-15 06:11:32,730 - distributed.worker - INFO - Local Directory: /var/folders/24/8k48jl6d249_n_qfxwsl6xvm0000gn/T/dask-worker-space/worker-ayht_31h
2022-07-15 06:11:32,730 - distributed.worker - INFO - -------------------------------------------------
2022-07-15 06:11:33,600 - distributed.scheduler - INFO - Register worker <WorkerState 'tls://127.0.0.1:50343', status: init, memory: 0, processing: 0>
2022-07-15 06:11:34,214 - distributed._signals - INFO - Received signal SIGINT (2)
2022-07-15 06:11:34,215 - distributed.nanny - INFO - Closing Nanny at 'tls://127.0.0.1:50338'.
2022-07-15 06:11:34,217 - distributed.nanny - INFO - Nanny asking worker to close
[2022](https://github.com/dask/distributed/runs/7352829162?check_suite_focus=true#step:11:2023)-07-15 06:11:34,219 - distributed.worker - INFO - Stopping worker at tls://127.0.0.1:50343
2022-07-15 06:11:34,219 - distributed.worker - INFO - Closed worker has not yet started: Status.init
2022-07-15 06:11:34,223 - distributed.scheduler - INFO - Starting worker compute stream, tls://127.0.0.1:50343
2022-07-15 06:11:34,223 - distributed.core - INFO - Starting established connection
2022-07-15 06:11:34,481 - distributed.scheduler - INFO - Remove worker <WorkerState 'tls://127.0.0.1:50343', status: init, memory: 0, processing: 0>
2022-07-15 06:11:34,482 - distributed.core - INFO - Removing comms to tls://127.0.0.1:50343
2022-07-15 06:11:34,482 - distributed.scheduler - INFO - Lost all workers
2022-07-15 06:11:34,603 - distributed.dask_worker - INFO - End worker
2022-07-15 06:11:34,779 - distributed._signals - INFO - Received signal SIGINT (2)
2022-07-15 06:11:34,779 - distributed.scheduler - INFO - Scheduler closing...
2022-07-15 06:11:34,780 - distributed.scheduler - INFO - Scheduler closing all comms
2022-07-15 06:11:34,781 - distributed.scheduler - INFO - Stopped scheduler at 'tls://10.213.5.31:50325'
2022-07-15 06:11:34,782 - distributed.scheduler - INFO - End scheduler
https://github.com/dask/distributed/runs/7352829162?check_suite_focus=true#step:11:1847
Metadata
Metadata
Assignees
Labels
flaky testIntermittent failures on CI.Intermittent failures on CI.