Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 45 additions & 24 deletions cloudinit/sources/DataSourceAzure.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,10 @@ def crawl_metadata(self):
candidates = [self.seed_dir]
if os.path.isfile(REPROVISION_MARKER_FILE):
candidates.insert(0, "IMDS")
msg = ('Reprovision marker file already exists '
'before crawl of Azure metadata')
LOG.warning(msg)
report_diagnostic_event(msg)
Comment on lines +410 to +411
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like we're logging alongside ~every report_diagnostic_event call; I wonder if it might make more sense to change the signature of the function to report_diagnostic_event(msg, log_func) and then have, for example, this call look like the below?

Suggested change
LOG.warning(msg)
report_diagnostic_event(msg)
report_diagnostic_event(msg, LOG.warning)

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a very good suggestion @OddBloke! I'll implement that in the separate PR that will focus on increased logging/diagnostic reports.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the suggestion on how to split up the PR. I'll split it up so that 1 is goal state related and the other one is logging/telemetry related.

candidates.extend(list_possible_azure_ds_devs())
if ddir:
candidates.append(ddir)
Expand All @@ -426,17 +430,19 @@ def crawl_metadata(self):
ret = load_azure_ds_dir(cdev)

except NonAzureDataSource:
report_diagnostic_event(
"Did not find Azure data source in %s" % cdev)
msg = "Did not find Azure data source in %s" % cdev
LOG.debug(msg)
report_diagnostic_event(msg)
continue
except BrokenAzureDataSource as exc:
msg = 'BrokenAzureDataSource: %s' % exc
LOG.error(msg)
report_diagnostic_event(msg)
raise sources.InvalidMetaDataException(msg)
except util.MountFailedError:
msg = '%s was not mountable' % cdev
report_diagnostic_event(msg)
LOG.warning(msg)
report_diagnostic_event(msg)
continue

perform_reprovision = reprovision or self._should_reprovision(ret)
Expand All @@ -459,16 +465,24 @@ def crawl_metadata(self):
'userdata_raw': userdata_raw})
found = cdev

LOG.debug("found datasource in %s", cdev)
if perform_reprovision:
msg = "found datasource in IMDS"
else:
msg = "found datasource in %s" % cdev
LOG.debug(msg)
report_diagnostic_event(msg)
break

if not found:
msg = 'No Azure metadata found'
LOG.error(msg)
report_diagnostic_event(msg)
raise sources.InvalidMetaDataException(msg)

if found == ddir:
LOG.debug("using files cached in %s", ddir)
msg = "using files cached in %s" % ddir
LOG.debug(msg)
report_diagnostic_event(msg)

seed = _get_random_seed()
if seed:
Expand All @@ -488,8 +502,9 @@ def crawl_metadata(self):
azure_ds_reporter) as lease:
self._report_ready(lease=lease)
except Exception as e:
report_diagnostic_event(
"exception while reporting ready: %s" % e)
msg = "exception while reporting ready: %s" % e
LOG.error(msg)
report_diagnostic_event(msg)
raise
return crawled_data

Expand Down Expand Up @@ -617,10 +632,11 @@ def exc_cb(msg, exception):
else:
# If we get an exception while trying to call IMDS, we call
# DHCP and setup the ephemeral network to acquire a new IP.
report_diagnostic_event("poll IMDS with %s failed. "
"Exception: %s and code: %s" %
(msg, exception.cause,
exception.code))
evt_msg = ("poll IMDS with %s failed. "
"Exception: %s and code: %s" %
(msg, exception.cause, exception.code))
LOG.warning(evt_msg)
report_diagnostic_event(evt_msg)
return False

LOG.debug("poll IMDS failed with an unexpected exception: %s",
Expand All @@ -644,8 +660,8 @@ def exc_cb(msg, exception):
try:
nl_sock = netlink.create_bound_netlink_socket()
except netlink.NetlinkCreateSocketError as e:
report_diagnostic_event(e)
LOG.warning(e)
report_diagnostic_event(e)
self._ephemeral_dhcp_ctx.clean_network()
break

Expand All @@ -665,8 +681,8 @@ def exc_cb(msg, exception):
netlink.wait_for_media_disconnect_connect(
nl_sock, lease['interface'])
except AssertionError as error:
report_diagnostic_event(error)
LOG.error(error)
report_diagnostic_event(error)
break

vnet_switched = True
Expand All @@ -692,10 +708,12 @@ def exc_cb(msg, exception):
nl_sock.close()

if vnet_switched:
report_diagnostic_event("attempted dhcp %d times after reuse" %
dhcp_attempts)
report_diagnostic_event("polled imds %d times after reuse" %
self.imds_poll_counter)
msg = "attempted dhcp %d times after reuse" % dhcp_attempts
LOG.debug(msg)
report_diagnostic_event(msg)
msg = "polled imds %d times after reuse" % self.imds_poll_counter
LOG.debug(msg)
report_diagnostic_event(msg)

return return_val

Expand Down Expand Up @@ -768,12 +786,12 @@ def _negotiate(self):
try:
fabric_data = metadata_func()
except Exception as e:
LOG.error(
"Error communicating with Azure fabric; You may experience "
"connectivity issues.", exc_info=True)
report_diagnostic_event(
"Error communicating with Azure fabric; You may experience "
"connectivity issues: %s" % e)
LOG.warning(
"Error communicating with Azure fabric; You may experience "
"connectivity issues.", exc_info=True)
return False

util.del_file(REPORTED_READY_MARKER_FILE)
Expand Down Expand Up @@ -1132,6 +1150,7 @@ def read_azure_ovf(contents):
dom = minidom.parseString(contents)
except Exception as e:
error_str = "Invalid ovf-env.xml: %s" % e
LOG.error(error_str)
report_diagnostic_event(error_str)
raise BrokenAzureDataSource(error_str)

Expand Down Expand Up @@ -1414,7 +1433,9 @@ def get_metadata_from_imds(fallback_nic, retries):
azure_ds_reporter, fallback_nic):
return util.log_time(**kwargs)
except Exception as e:
report_diagnostic_event("exception while getting metadata: %s" % e)
msg = "exception while getting metadata: %s" % e
LOG.error(msg)
report_diagnostic_event(msg)
raise


Expand All @@ -1429,15 +1450,15 @@ def _get_metadata_from_imds(retries):
retries=retries, exception_cb=retry_on_url_exc)
except Exception as e:
msg = 'Ignoring IMDS instance metadata: %s' % e
LOG.error(msg)
report_diagnostic_event(msg)
LOG.debug(msg)
return {}
try:
return util.load_json(str(response))
except json.decoder.JSONDecodeError as e:
report_diagnostic_event('non-json imds response' % e)
LOG.warning(
LOG.error(
'Ignoring non-json IMDS instance metadata: %s', str(response))
report_diagnostic_event('non-json imds response' % e)
return {}


Expand Down
Loading