Skip to content
79 changes: 77 additions & 2 deletions cloudinit/distros/networking.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
import abc
import logging
import os

from cloudinit import net
from cloudinit import net, util


LOG = logging.getLogger(__name__)


# Type aliases (https://docs.python.org/3/library/typing.html#type-aliases),
Expand Down Expand Up @@ -102,10 +106,72 @@ def is_vlan(self, devname: DeviceName) -> bool:
def master_is_bridge_or_bond(self, devname: DeviceName) -> bool:
return net.master_is_bridge_or_bond(devname)

@abc.abstractmethod
def settle(self, *, exists=None) -> None:
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't have all the context regarding the creation of that abstraction, but my concern about it is that since this is an abstract method, we cannot ensure that the classes that implement it will follow the exists logic detailed in the description here.

Maybe we can enforce that by turning settle into this concrete implementation:

def settle(self, *, exists=None) -> None
    if exists is not None:
        exists = self.find_device_path(exists)
    self.settle_device(exists)

Then we would need to make both find_device_path and settle_device abstract. However, I am basing that only on what I saw on the LinuxNetworking implementation, so I don't know if that is a pattern we really want to enforce.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So in the Linux case, we're calling udevadm settle via util.udevadm_settle:

cloud-init/cloudinit/util.py

Lines 2628 to 2639 in 2b72791

def udevadm_settle(exists=None, timeout=None):
"""Invoke udevadm settle with optional exists and timeout parameters"""
settle_cmd = ["udevadm", "settle"]
if exists:
# skip the settle if the requested path already exists
if os.path.exists(exists):
return
settle_cmd.extend(['--exit-if-exists=%s' % exists])
if timeout:
settle_cmd.extend(['--timeout=%s' % timeout])
return subp.subp(settle_cmd)

As you can see, exists is essentially an optimisation: it allows us to return sooner than we might otherwise, but it doesn't materially change the action being performed. I've updated the docstring to try and reflect this more accurately. Is that a reasonable resolution?

(As an aside: BSDNetworking implements this as a pass because there is no equivalent on BSD, so this is really Linux-specific behaviour we're talking about: this implementation will only vary if we have a distro that doesn't use udev, which I infer we don't[0] because this code is run unconditionally today.)

[0] Currently; I wonder if Alpine Linux uses udev.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@OddBloke Yes, agreed

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

alpine uses eudev which is udev compatible

"""Wait for device population in the system to complete.

:param exists:
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My reading of this parameter name made me think initially this was a boolean. This is probably just a poorly named parameter (could have been device_name).

For this PR, can we please add typing hints to this exists parameter definition and maybe make to docstring for :param exists mention first that it's a Optional string of a specific DeviceName. If given, only perform....

Would you be opposed to me providing a separate PR to rename that param in udevadm_settle and settle from exists to device_name? There is only one callsite that uses the "exists" param in DataSourceAltCloud.py.

An optional optimisation. If given, only perform as much of the
settle process as is required for the given DeviceName to be
present in the system. (This may include skipping the settle
process entirely, if the device already exists.)
:type exists: Optional[DeviceName]
"""
pass

def wait_for_physdevs(
self, netcfg: NetworkConfig, *, strict: bool = True
) -> None:
return net.wait_for_physdevs(netcfg, strict=strict)
"""Wait for all the physical devices in `netcfg` to exist on the system

Specifically, this will call `self.settle` 5 times, and check after
each one if the physical devices are now present in the system.

:param netcfg:
The NetworkConfig from which to extract physical devices to wait
for.
:param strict:
Raise a `RuntimeError` if any physical devices are not present
after waiting.
"""
physdevs = self.extract_physdevs(netcfg)

# set of expected iface names and mac addrs
expected_ifaces = dict([(iface[0], iface[1]) for iface in physdevs])
expected_macs = set(expected_ifaces.keys())

# set of current macs
present_macs = self.get_interfaces_by_mac().keys()

# compare the set of expected mac address values to
# the current macs present; we only check MAC as cloud-init
# has not yet renamed interfaces and the netcfg may include
# such renames.
for _ in range(0, 5):
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know if we want this behavior, but should we parametrize the number of times to retry calling settle ? That would allow us to have a named variable here in the for loop and maybe modify that behavior for slower detection of physical devices.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO, it's easier to read if the value is hard-coded; you don't have to go and find the callers to check if they are passing in something other than the default. Until we have a compelling case to parameterise it (i.e. we want different behaviour for different callers), I'd prefer to leave it as-is. Is that reasonable?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, makes sense

if expected_macs.issubset(present_macs):
LOG.debug("net: all expected physical devices present")
return

missing = expected_macs.difference(present_macs)
LOG.debug("net: waiting for expected net devices: %s", missing)
for mac in missing:
# trigger a settle, unless this interface exists
devname = expected_ifaces[mac]
msg = "Waiting for settle or {} exists".format(devname)
util.log_time(
LOG.debug,
msg,
func=self.settle,
kwargs={"exists": devname},
)

# update present_macs after settles
present_macs = self.get_interfaces_by_mac().keys()

msg = "Not all expected physical devices present: %s" % missing
LOG.warning(msg)
if strict:
raise RuntimeError(msg)


class BSDNetworking(Networking):
Expand All @@ -114,6 +180,10 @@ class BSDNetworking(Networking):
def is_physical(self, devname: DeviceName) -> bool:
raise NotImplementedError()

def settle(self, *, exists=None) -> None:
"""BSD has no equivalent to `udevadm settle`; noop."""
pass


class LinuxNetworking(Networking):
"""Implementation of networking functionality common to Linux distros."""
Expand All @@ -138,3 +208,8 @@ def is_netfail_standby(self, devname: DeviceName) -> bool:

def is_physical(self, devname: DeviceName) -> bool:
return os.path.exists(net.sys_dev_path(devname, "device"))

def settle(self, *, exists=None) -> None:
if exists is not None:
exists = net.sys_dev_path(exists)
util.udevadm_settle(exists=exists)
152 changes: 151 additions & 1 deletion cloudinit/distros/tests/test_networking.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,39 @@

import pytest

from cloudinit.distros.networking import BSDNetworking, LinuxNetworking
from cloudinit import net
from cloudinit.distros.networking import (
BSDNetworking,
LinuxNetworking,
Networking,
)

# See https://docs.pytest.org/en/stable/example
# /parametrize.html#parametrizing-conditional-raising
from contextlib import ExitStack as does_not_raise


@pytest.yield_fixture
def generic_networking_cls():
"""Returns a direct Networking subclass which errors on /sys usage.

This enables the direct testing of functionality only present on the
``Networking`` super-class, and provides a check on accidentally using /sys
in that context.
"""

class TestNetworking(Networking):
def is_physical(self, *args, **kwargs):
raise NotImplementedError

def settle(self, *args, **kwargs):
raise NotImplementedError

error = AssertionError("Unexpectedly used /sys in generic networking code")
with mock.patch(
"cloudinit.net.get_sys_class_path", side_effect=error,
):
yield TestNetworking


@pytest.yield_fixture
Expand Down Expand Up @@ -40,3 +72,121 @@ def test_returns_true_if_device_is_physical(self, sys_class_net):
device_dir.join("device").write("")

assert LinuxNetworking().is_physical(devname)


class TestBSDNetworkingSettle:
def test_settle_doesnt_error(self):
# This also implicitly tests that it doesn't use subp.subp
BSDNetworking().settle()


@pytest.mark.usefixtures("sys_class_net")
@mock.patch("cloudinit.distros.networking.util.udevadm_settle", autospec=True)
class TestLinuxNetworkingSettle:
def test_no_arguments(self, m_udevadm_settle):
LinuxNetworking().settle()

assert [mock.call(exists=None)] == m_udevadm_settle.call_args_list

def test_exists_argument(self, m_udevadm_settle):
LinuxNetworking().settle(exists="ens3")

expected_path = net.sys_dev_path("ens3")
assert [
mock.call(exists=expected_path)
] == m_udevadm_settle.call_args_list


class TestNetworkingWaitForPhysDevs:
@pytest.fixture
def wait_for_physdevs_netcfg(self):
"""This config is shared across all the tests in this class."""

def ethernet(mac, name, driver=None, device_id=None):
v2_cfg = {"set-name": name, "match": {"macaddress": mac}}
if driver:
v2_cfg["match"].update({"driver": driver})
if device_id:
v2_cfg["match"].update({"device_id": device_id})

return v2_cfg

physdevs = [
["aa:bb:cc:dd:ee:ff", "eth0", "virtio", "0x1000"],
["00:11:22:33:44:55", "ens3", "e1000", "0x1643"],
]
netcfg = {
"version": 2,
"ethernets": {args[1]: ethernet(*args) for args in physdevs},
}
return netcfg

def test_skips_settle_if_all_present(
self, generic_networking_cls, wait_for_physdevs_netcfg,
):
networking = generic_networking_cls()
with mock.patch.object(
networking, "get_interfaces_by_mac"
) as m_get_interfaces_by_mac:
m_get_interfaces_by_mac.side_effect = iter(
[{"aa:bb:cc:dd:ee:ff": "eth0", "00:11:22:33:44:55": "ens3"}]
)
with mock.patch.object(
networking, "settle", autospec=True
) as m_settle:
networking.wait_for_physdevs(wait_for_physdevs_netcfg)
assert 0 == m_settle.call_count

def test_calls_udev_settle_on_missing(
self, generic_networking_cls, wait_for_physdevs_netcfg,
):
networking = generic_networking_cls()
with mock.patch.object(
networking, "get_interfaces_by_mac"
) as m_get_interfaces_by_mac:
m_get_interfaces_by_mac.side_effect = iter(
[
{
"aa:bb:cc:dd:ee:ff": "eth0"
}, # first call ens3 is missing
{
"aa:bb:cc:dd:ee:ff": "eth0",
"00:11:22:33:44:55": "ens3",
}, # second call has both
]
)
with mock.patch.object(
networking, "settle", autospec=True
) as m_settle:
networking.wait_for_physdevs(wait_for_physdevs_netcfg)
m_settle.assert_called_with(exists="ens3")

@pytest.mark.parametrize(
"strict,expectation",
[(True, pytest.raises(RuntimeError)), (False, does_not_raise())],
)
def test_retrying_and_strict_behaviour(
self,
strict,
expectation,
generic_networking_cls,
wait_for_physdevs_netcfg,
):
networking = generic_networking_cls()
with mock.patch.object(
networking, "get_interfaces_by_mac"
) as m_get_interfaces_by_mac:
m_get_interfaces_by_mac.return_value = {}

with mock.patch.object(
networking, "settle", autospec=True
) as m_settle:
with expectation:
networking.wait_for_physdevs(
wait_for_physdevs_netcfg, strict=strict
)

assert (
5 * len(wait_for_physdevs_netcfg["ethernets"])
== m_settle.call_count
)
38 changes: 0 additions & 38 deletions cloudinit/net/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import logging
import os
import re
from functools import partial

from cloudinit import subp
from cloudinit import util
Expand Down Expand Up @@ -494,43 +493,6 @@ def _version_2(netcfg):
raise RuntimeError('Unknown network config version: %s' % version)


def wait_for_physdevs(netcfg, strict=True):
physdevs = extract_physdevs(netcfg)

# set of expected iface names and mac addrs
expected_ifaces = dict([(iface[0], iface[1]) for iface in physdevs])
expected_macs = set(expected_ifaces.keys())

# set of current macs
present_macs = get_interfaces_by_mac().keys()

# compare the set of expected mac address values to
# the current macs present; we only check MAC as cloud-init
# has not yet renamed interfaces and the netcfg may include
# such renames.
for _ in range(0, 5):
if expected_macs.issubset(present_macs):
LOG.debug('net: all expected physical devices present')
return

missing = expected_macs.difference(present_macs)
LOG.debug('net: waiting for expected net devices: %s', missing)
for mac in missing:
# trigger a settle, unless this interface exists
syspath = sys_dev_path(expected_ifaces[mac])
settle = partial(util.udevadm_settle, exists=syspath)
msg = 'Waiting for udev events to settle or %s exists' % syspath
util.log_time(LOG.debug, msg, func=settle)

# update present_macs after settles
present_macs = get_interfaces_by_mac().keys()

msg = 'Not all expected physical devices present: %s' % missing
LOG.warning(msg)
if strict:
raise RuntimeError(msg)


def apply_network_config_names(netcfg, strict_present=True, strict_busy=True):
"""read the network config and rename devices accordingly.
if strict_present is false, then do not raise exception if no devices
Expand Down
Loading