Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 16 additions & 16 deletions pycloudlib/instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def __init__(self, key_pair):
self.port = '22'
self.username = 'ubuntu'
self.connect_timeout = 60
self.boot_id = ''

@property
@abstractmethod
Expand Down Expand Up @@ -99,7 +100,7 @@ def wait(self, *, raise_on_cloudinit_failure: bool = True):
complete successfully.
"""
self._wait_for_instance_start()
self._wait_for_execute()
self._wait_for_boot_id()
self._wait_for_cloudinit(raise_on_failure=raise_on_cloudinit_failure)

@abstractmethod
Expand Down Expand Up @@ -370,29 +371,28 @@ def _tmpfile(self):
self._tmp_count += 1
return path

def _wait_for_execute(self):
def _wait_for_boot_id(self):
"""Wait until we can execute a command in the instance."""
self._log.debug('_wait_for_execute to complete')
self._log.debug('_wait_for_boot_id to complete')

test_instance_command = "whoami"
result = self.execute(test_instance_command)
if result.failed:
retries = 10
while retries:
result = self.execute(test_instance_command)

if result.ok:
boot_id_command = "cat /proc/sys/kernel/random/boot_id"
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thinking about this further, part of the reason whoami is used here is that it's present on pretty much any Unix system: failures to run it almost certainly mean that we don't have access to the system. This replacement will never succeed on non-Linux systems, such as *BSD, where we'll block for 5 minutes and then fail.

We also won't have self.boot_id set on instances which aren't wait'd for at all, and it will be stale on subsequent boots of instances which are rebooted by means other than pycloudlib: cc_power_state or even a simple .execute("sudo reboot"). This will make using it tricky: it won't necessarily be present when we restart, so we'd have to determine it at that point regardless. I wonder if this indicates that instead of storing it at the instance-level we should contain it to the restart path, something like: determine current boot ID, trigger a restart, then call self.wait in a loop which we break out of once the post-wait boot ID doesn't match?

(As an aside, we should consider if whatever we do here allows us to drop (or substantially simplify) the reboot detection code in test_power_state_change in cloud-init; if we can't readily use it there, then that may mean we have not found the right abstraction.)

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought it might be possibly useful in other ways of checking boot too, but you're right in that it'd be a lot more work to keep the boot id current. So I agree, let's restrict it to restart.

Your parenthetical is right too. The boot detecting code was initially added there because of this, so a good test of this should be to remove that code.

retries = 300
while retries:
result = self.execute(boot_id_command)
if result.ok:
boot_id = result.stdout.strip()
if boot_id != self.boot_id:
self.boot_id = boot_id
break

retries -= 1
time.sleep(10)

if result.failed:
retries -= 1
time.sleep(1)
else:
raise OSError(
"{}\n{}".format(
"Instance can't be reached",
"Failed to execute {} command".format(
test_instance_command)
boot_id_command)
)
)

Expand Down
4 changes: 2 additions & 2 deletions pycloudlib/tests/test_instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def test_wait(self, raise_on_cloudinit_failure, concrete_instance_cls):
with mock.patch.multiple(
instance,
_wait_for_instance_start=mock.DEFAULT,
_wait_for_execute=mock.DEFAULT,
_wait_for_boot_id=mock.DEFAULT,
_wait_for_cloudinit=mock.DEFAULT,
) as mocks:
if raise_on_cloudinit_failure is not None:
Expand All @@ -76,7 +76,7 @@ def test_wait(self, raise_on_cloudinit_failure, concrete_instance_cls):
instance.wait()

assert 1 == mocks["_wait_for_instance_start"].call_count
assert 1 == mocks["_wait_for_execute"].call_count
assert 1 == mocks["_wait_for_boot_id"].call_count
assert 1 == mocks["_wait_for_cloudinit"].call_count
kwargs = mocks["_wait_for_cloudinit"].call_args[1]
if raise_on_cloudinit_failure is None:
Expand Down