Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 21 additions & 26 deletions cloudinit/sources/DataSourceVultr.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from cloudinit import log as log
from cloudinit import sources
from cloudinit import util
from cloudinit import version

import cloudinit.sources.helpers.vultr as vultr

Expand All @@ -16,7 +17,11 @@
'url': 'http://169.254.169.254',
'retries': 30,
'timeout': 2,
'wait': 2
'wait': 2,
'user-agent': 'Cloud-Init/%s - OS: %s Variant: %s' %
(version.version_string(),
util.system_info()['system'],
util.system_info()['variant'])
}


Expand All @@ -40,21 +45,18 @@ def _get_data(self):
LOG.debug("Machine is a Vultr instance")

# Fetch metadata
md = self.get_metadata()

self.metadata_full = md
self.metadata['instanceid'] = md['instanceid']
self.metadata['local-hostname'] = md['hostname']
self.metadata['public-keys'] = md["public-keys"]
self.userdata_raw = md["user-data"]
self.metadata = self.get_metadata()
self.metadata['instance-id'] = self.metadata['instanceid']
self.metadata['local-hostname'] = self.metadata['hostname']
self.userdata_raw = self.metadata["user-data"]

# Generate config and process data
self.get_datasource_data(md)
self.get_datasource_data(self.metadata)

# Dump some data so diagnosing failures is manageable
LOG.debug("Vultr Vendor Config:")
LOG.debug(md['vendor-data']['config'])
LOG.debug("SUBID: %s", self.metadata['instanceid'])
LOG.debug(util.json_dumps(self.metadata['vendor-data']))
LOG.debug("SUBID: %s", self.metadata['instance-id'])
LOG.debug("Hostname: %s", self.metadata['local-hostname'])
if self.userdata_raw is not None:
LOG.debug("User-Data:")
Expand All @@ -64,14 +66,11 @@ def _get_data(self):

# Process metadata
def get_datasource_data(self, md):
# Grab config
config = md['vendor-data']['config']

# Generate network config
self.netcfg = vultr.generate_network_config(md['interfaces'])

# This requires info generated in the vendor config
user_scripts = vultr.generate_user_scripts(md, self.netcfg['config'])
# Grab vendordata
self.vendordata_raw = md['vendor-data']

# Default hostname is "guest" for whitelabel
if self.metadata['local-hostname'] == "":
Expand All @@ -81,18 +80,13 @@ def get_datasource_data(self, md):
if self.userdata_raw == "":
self.userdata_raw = None

# Assemble vendor-data
# This adds provided scripts and the config
self.vendordata_raw = []
self.vendordata_raw.extend(user_scripts)
self.vendordata_raw.append("#cloud-config\n%s" % config)

# Get the metadata by flag
def get_metadata(self):
return vultr.get_metadata(self.ds_cfg['url'],
self.ds_cfg['timeout'],
self.ds_cfg['retries'],
self.ds_cfg['wait'])
self.ds_cfg['wait'],
self.ds_cfg['user-agent'])

# Compare subid as instance id
def check_instance_id(self, sys_cfg):
Expand Down Expand Up @@ -137,11 +131,12 @@ def get_datasource_list(depends):
md = vultr.get_metadata(BUILTIN_DS_CONFIG['url'],
BUILTIN_DS_CONFIG['timeout'],
BUILTIN_DS_CONFIG['retries'],
BUILTIN_DS_CONFIG['wait'])
config = md['vendor-data']['config']
BUILTIN_DS_CONFIG['wait'],
BUILTIN_DS_CONFIG['user-agent'])
config = md['vendor-data']
sysinfo = vultr.get_sysinfo()

print(util.json_dumps(sysinfo))
print(config)
print(util.json_dumps(config))

# vi: ts=4 expandtab
68 changes: 17 additions & 51 deletions cloudinit/sources/helpers/vultr.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,17 @@


@lru_cache()
def get_metadata(url, timeout, retries, sec_between):
def get_metadata(url, timeout, retries, sec_between, agent):
# Bring up interface
try:
with EphemeralDHCPv4(connectivity_url_data={"url": url}):
# Fetch the metadata
v1 = read_metadata(url, timeout, retries, sec_between)
v1 = read_metadata(url, timeout, retries, sec_between, agent)
except (NoDHCPLeaseError) as exc:
LOG.error("Bailing, DHCP Exception: %s", exc)
raise

v1_json = json.loads(v1)
metadata = v1_json

return metadata
return json.loads(v1)


# Read the system information from SMBIOS
Expand Down Expand Up @@ -64,12 +61,20 @@ def is_vultr():


# Read Metadata endpoint
def read_metadata(url, timeout, retries, sec_between):
def read_metadata(url, timeout, retries, sec_between, agent):
url = "%s/v1.json" % url

# Announce os details so we can handle non Vultr origin
# images and provide correct vendordata generation.
headers = {
'Metadata-Token': 'cloudinit',
'User-Agent': agent
}

response = url_helper.readurl(url,
timeout=timeout,
retries=retries,
headers={'Metadata-Token': 'vultr'},
headers=headers,
sec_between=sec_between)

if not response.ok():
Expand Down Expand Up @@ -114,9 +119,9 @@ def generate_network_config(interfaces):
public = generate_public_network_interface(interfaces[0])
network['config'].append(public)

# Prepare interface 1, private
if len(interfaces) > 1:
private = generate_private_network_interface(interfaces[1])
# Prepare additional interfaces, private
for i in range(1, len(interfaces)):
private = generate_private_network_interface(interfaces[i])
network['config'].append(private)

return network
Expand All @@ -141,7 +146,7 @@ def generate_public_network_interface(interface):
"control": "auto"
},
{
"type": "dhcp6",
"type": "ipv6_slaac",
"control": "auto"
},
]
Expand Down Expand Up @@ -187,7 +192,6 @@ def generate_private_network_interface(interface):
"name": interface_name,
"type": "physical",
"mac_address": interface['mac'],
"accept-ra": 1,
"subnets": [
{
"type": "static",
Expand All @@ -201,42 +205,4 @@ def generate_private_network_interface(interface):
return netcfg


# This is for the vendor and startup scripts
def generate_user_scripts(md, network_config):
user_scripts = []

# Raid 1 script
if md['vendor-data']['raid1-script']:
user_scripts.append(md['vendor-data']['raid1-script'])

# Enable multi-queue on linux
if util.is_Linux() and md['vendor-data']['ethtool-script']:
ethtool_script = md['vendor-data']['ethtool-script']

# Tool location
tool = "/opt/vultr/ethtool"

# Go through the interfaces
for netcfg in network_config:
# If the interface has a mac and is physical
if "mac_address" in netcfg and netcfg['type'] == "physical":
# Set its multi-queue to num of cores as per RHEL Docs
name = netcfg['name']
command = "%s -L %s combined $(nproc --all)" % (tool, name)
ethtool_script = '%s\n%s' % (ethtool_script, command)

user_scripts.append(ethtool_script)

# This is for vendor scripts
if md['vendor-data']['vendor-script']:
user_scripts.append(md['vendor-data']['vendor-script'])

# Startup script
script = md['startup-script']
if script and script != "echo No configured startup script":
user_scripts.append(script)

return user_scripts


# vi: ts=4 expandtab
30 changes: 12 additions & 18 deletions tests/unittests/test_datasource/test_vultr.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,8 @@
'raid1-script': '',
'user-data': [
],
'vendor-data': {
'vendor-script': '',
'ethtool-script': '',
'config': {
'vendor-data': [
{
'package_upgrade': 'true',
'disable_root': 0,
'ssh_pwauth': 1,
Expand All @@ -83,7 +81,7 @@
}
}
}
}
]
}

VULTR_V1_2 = {
Expand Down Expand Up @@ -155,11 +153,8 @@
'user-data': [
],

'vendor-data': {
'vendor-script': '',
'ethtool-script': '',
'raid1-script': '',
'config': {
'vendor-data': [
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change int types from dict -> list makes me a little leery in this unittest as cloud-init handles those vendordata types differently depending on list vs dict processing in convert_vendordata. I'm not certain if this is just a unittest change for you or if Vultr platform actually provides as vendordata to the VM has changed type. Please provide sudo cloud-init query vendordata to confirm for us what structure of data is really surfaced.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So basically before we were providing a json dict with defined endpoints for each script since we were doing little modifications here and there. With the new format we moved to just providing a json array of strings each being a specific script or config.

So how we provide it would be an array of strings, here I left it a dic to be be easily modifiable if things changed or needs to be adjusted and convert it to the expected format bellow. If you would prefer I just leave it in the format that is fed to cloud-init I can go ahead and make those changes.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the clarification here. I think this is ok, but I really need to see the output of both sudo cloud-init query vendordata and possibly sudo cloud-init query merged_cfg. Normally vendordata in datasources is structured such that it merges on top of (and overrides) config defaults that are originally present in /etc/cloud/cloud.cfg and /etc/cloud/cloud.cfg.d/*.cfg. Your format seems to diverge from this typical behavior for Datasources. Your first iteration of DatasourceVultr also seems to diverge from this format anyway, so I don't think you are regressing anything. But, I think we might have to consider how your are using vendordata because this seems a bit different than how almost all other platforms treat vendordata. The docs remind us that vendordata can be disabled by user-data and that vendordata shouldn't be "required" to make a system operational. I don't know if that necessarily applies to your case, but it's a tension we should be aware of here.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

root@t2:~# cloud-init query vendordata
[
 "#cloud-config\n{\"package_upgrade\":\"true\",\"disable_root\":0,\"ssh_pwauth\":1,\"manage_etc_hosts\":\"true\",\"chpasswd\":{\"expire\":false,\"list\":[\"root:$6$snip\"]},\"system_info\":{\"default_user\":{\"name\":\"root\"}}}",
 "#!/bin/bash\n\n#\n# Functions\n#\n\nfunction check_command_exists () {\n    OUT=\"0\"\n    if ! [ -z \"$(which $1)\" ]; then\n        OUT=\"1\"\n    fi\n    echo \"${OUT}\"\n}\n\nfunction print () {\n    echo \"${@}\" >> /var/log/cloudinit_networking.log\n    echo \"${@}\"\n}\n\nfunction get_interfaces () {\n    if [ -z \"${INTERFACES}\" ]; then\n        INTERFACES=($(ls -l /sys/class/net/ | grep \"/net/e\" | awk -F' ' '{print $9}'))\n    fi\n}\n\n#\n# Start script\n#\n\n# Get the interface list\nget_interfaces\n\nif [ \"$(check_command_exists ethtool)\" == \"1\" ]; then\n    for int in \"${INTERFACES[@]}\"\n    do\n        ethtool -L ${int} combined $(nproc --all)\n    done\nelse\n    print \"Failed to find ethtool, cannot configure multi-queue!\"\nfi\n\necho \"\" > /usr/lib/sysctl.d/90-vultr.conf\necho \"# Accept IPv6 advertisements when forwarding is enabled\" >> /usr/lib/sysctl.d/90-vultr.conf\n\nfor int in \"${INTERFACES[@]}\"\ndo\n    echo \"net.ipv6.conf.${int}.accept_ra = 2\" >> /usr/lib/sysctl.d/90-vultr.conf\ndone\n\necho 'net.core.default_qdisc=fq' >> /usr/lib/sysctl.d/90-vultr.conf\necho 'net.ipv4.tcp_congestion_control=bbr' >> /usr/lib/sysctl.d/90-vultr.conf\necho \"\" >> /usr/lib/sysctl.d/90-vultr.conf"
]
root@t2:~# cloud-init query merged_cfg
{
 "_doc": "Merged cloud-init system config from /etc/cloud/cloud.cfg and /etc/cloud/cloud.cfg.d/",
 "_log": [
  "[loggers]\nkeys=root,cloudinit\n\n[handlers]\nkeys=consoleHandler,cloudLogHandler\n\n[formatters]\nkeys=simpleFormatter,arg0Formatter\n\n[logger_root]\nlevel=DEBUG\nhandlers=consoleHandler,cloudLogHandler\n\n[logger_cloudinit]\nlevel=DEBUG\nqualname=cloudinit\nhandlers=\npropagate=1\n\n[handler_consoleHandler]\nclass=StreamHandler\nlevel=WARNING\nformatter=arg0Formatter\nargs=(sys.stderr,)\n\n[formatter_arg0Formatter]\nformat=%(asctime)s - %(filename)s[%(levelname)s]: %(message)s\n\n[formatter_simpleFormatter]\nformat=[CLOUDINIT] %(filename)s[%(levelname)s]: %(message)s\n",
  "[handler_cloudLogHandler]\nclass=FileHandler\nlevel=DEBUG\nformatter=arg0Formatter\nargs=('/var/log/cloud-init.log', 'a', 'UTF-8')\n",
  "[handler_cloudLogHandler]\nclass=handlers.SysLogHandler\nlevel=DEBUG\nformatter=simpleFormatter\nargs=(\"/dev/log\", handlers.SysLogHandler.LOG_USER)\n"
 ],
 "cloud_config_modules": [
  "emit_upstart",
  "snap",
  "ssh-import-id",
  "locale",
  "set-passwords",
  "grub-dpkg",
  "apt-pipelining",
  "apt-configure",
  "ntp",
  "timezone",
  "disable-ec2-metadata",
  "runcmd",
  "byobu"
 ],
 "cloud_final_modules": [
  "package-update-upgrade-install",
  "fan",
  "landscape",
  "lxd",
  "puppet",
  "chef",
  "mcollective",
  "salt-minion",
  "reset_rmc",
  "refresh_rmc_and_interface",
  "rightscale_userdata",
  "scripts-vendor",
  "scripts-per-once",
  "scripts-per-boot",
  "scripts-per-instance",
  "scripts-user",
  "ssh-authkey-fingerprints",
  "keys-to-console",
  "phone-home",
  "final-message",
  "power-state-change"
 ],
 "cloud_init_modules": [
  "migrator",
  "seed_random",
  "bootcmd",
  "write-files",
  "growpart",
  "resizefs",
  "disk_setup",
  "mounts",
  "set_hostname",
  "update_hostname",
  "update_etc_hosts",
  "ca-certs",
  "rsyslog",
  "users-groups",
  "ssh"
 ],
 "datasource_list": [
  "Vultr",
  "None"
 ],
 "def_log_file": "/var/log/cloud-init.log",
 "disable_root": true,
 "log_cfgs": [
  [
   "[loggers]\nkeys=root,cloudinit\n\n[handlers]\nkeys=consoleHandler,cloudLogHandler\n\n[formatters]\nkeys=simpleFormatter,arg0Formatter\n\n[logger_root]\nlevel=DEBUG\nhandlers=consoleHandler,cloudLogHandler\n\n[logger_cloudinit]\nlevel=DEBUG\nqualname=cloudinit\nhandlers=\npropagate=1\n\n[handler_consoleHandler]\nclass=StreamHandler\nlevel=WARNING\nformatter=arg0Formatter\nargs=(sys.stderr,)\n\n[formatter_arg0Formatter]\nformat=%(asctime)s - %(filename)s[%(levelname)s]: %(message)s\n\n[formatter_simpleFormatter]\nformat=[CLOUDINIT] %(filename)s[%(levelname)s]: %(message)s\n",
   "[handler_cloudLogHandler]\nclass=FileHandler\nlevel=DEBUG\nformatter=arg0Formatter\nargs=('/var/log/cloud-init.log', 'a', 'UTF-8')\n"
  ]
 ],
 "output": {
  "all": "| tee -a /var/log/cloud-init-output.log"
 },
 "preserve_hostname": false,
 "syslog_fix_perms": [
  "syslog:adm",
  "root:adm",
  "root:wheel",
  "root:root"
 ],
 "users": [
  "default"
 ],
 "vendor_data": {
  "enabled": true,
  "prefix": []
 },
 "vendor_data2": {
  "enabled": true,
  "prefix": []
 }
}

So, in terms of how we are using vendordata. We are using it pass preferred settings (updates, packages, basic settings) and settings that the user chose for deployment. Outside of the root password, nothing is actually required for the system to be operational.

We do add scripts in afterwards to handle specific tasks at boot such as configuring second drives, network optimizations that do not exist as options in the config, and misc other things. The reason we do this is because we offer a marketplace and custom ISO features that allow users to end up with an image not made by us that we have no insight into but we would like the same tweaks and customizations/features we have available for our own images to still be available in these cases with the least user action required. Cloud-init vendordata looked like a suitable way to provide the startup scripts we have traditionally used to users own images with no modification required.

I am aware that user-data overwrites this. I wasn't sure if it was merged in or if the presence of user-data overrided the existing vendordata altogether, and if this applied only to that actual cloud-init config or the entire array, including scripts. If I could get clarification on that it would greatly help! Regardless the vendordata only results in an optimal system at this current moment and the system should be functional whether or not it runs (if not inaccessible barring the user accounted for a password in user-data).

We were also looking at this as an avenue to using the widely available cloud images without having to worry about building them with our requirements and having a way to make changes for compatibility purposes to OS's that need it without needing to directly modify the image.

Let me know if you have any suggestions or comment on how we intended to use this here.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@blackboxsw was wondering if I could get a follow up to this portion.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, in terms of how we are using vendordata. We are using it pass preferred settings (updates, packages, basic settings) and settings that the user chose for deployment. Outside of the root password, nothing is actually required for the system to be operational.

This is a perfectly applicable use of vendor data. Thanks for the context.

We were also looking at this as an avenue to using the widely available cloud images without having to worry about building them with our requirements and having a way to make changes for compatibility purposes to OS's that need it without needing to directly modify the image.

Thank you. This too makes sense. I just setup a Vultr account to poke around to see the images you are hosting don't yet expose cloud-init to the end-user in the compute VM launched. :/ At some point if there is access to a Vultr instance under test with access to your metadata service I'd ❤️ to poke around for a few minutes to get my bearings, but that doesn't need to happen for this PR to land.

I also didn't grok initially that you were already packing a list in as vendordata_raw in the first iteration of Vultr datasource. So I got a bit prickly thinking that list wouldn't work for the vendordata handling, but I was wrong in that regard.

I am aware that user-data overwrites this. I wasn't sure if it was merged in or if the presence of user-data overrided the existing vendordata altogether, and if this applied only to that actual cloud-init config or the entire array, including scripts.

Correct, user-data provided #cloud-config values will overwrite/override specific #cloud-config values from vendor-data or meta-data, it will not blow away the whole of the vendordata list of "parts".
But, user-data can also complete disable honoring any vendordata at all by providing the following

#cloud-config
vendor_data:
  enabled: false

If this is the case on a VM you'll see the log (and none of your vendor scripts would be run I believe.
2021-09-23 04:06:19,234 - stages.py[DEBUG]: vendordata consumption is disabled.

If not set enabled: false by a user, I'd expect to see
2021-09-23 04:13:26,355 - stages.py[DEBUG]: vendordata will be consumed. disabled_handlers=None

As long as your aren't getting WARNING logs in /var/log/cloud-init.log or /var/log/cloud-init-output.log for failures to handle the parts of your vendordata in the "init-network/consume-vendor-data" section of logs your should be in good shape here with providing both scripts and #cloud-config items in your vendordata_raw list.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great, I am not too worried about being able to disable it. I am aware of that and enjoy the idea the user always has an option to turn it off completely!

As for the account, I'll get you setup first thing. Just do me a favor and open a ticket from your account and request it be forwarded to the Marketplace queue.

{
'package_upgrade': 'true',
'disable_root': 0,
'ssh_pwauth': 1,
Expand All @@ -175,7 +170,7 @@
}
}
}
}
]
}

SSH_KEYS_1 = [
Expand Down Expand Up @@ -217,7 +212,7 @@
'accept-ra': 1,
'subnets': [
{'type': 'dhcp', 'control': 'auto'},
{'type': 'dhcp6', 'control': 'auto'}
{'type': 'ipv6_slaac', 'control': 'auto'}
],
}
]
Expand All @@ -237,14 +232,13 @@
'accept-ra': 1,
'subnets': [
{'type': 'dhcp', 'control': 'auto'},
{'type': 'dhcp6', 'control': 'auto'}
{'type': 'ipv6_slaac', 'control': 'auto'}
],
},
{
'name': 'eth1',
'type': 'physical',
'mac_address': '5a:00:03:1b:4e:ca',
'accept-ra': 1,
'subnets': [
{
"type": "static",
Expand All @@ -270,12 +264,12 @@ def setUp(self):
super(TestDataSourceVultr, self).setUp()

# Stored as a dict to make it easier to maintain
raw1 = json.dumps(VULTR_V1_1['vendor-data']['config'])
raw2 = json.dumps(VULTR_V1_2['vendor-data']['config'])
raw1 = json.dumps(VULTR_V1_1['vendor-data'][0])
raw2 = json.dumps(VULTR_V1_2['vendor-data'][0])

# Make expected format
VULTR_V1_1['vendor-data']['config'] = raw1
VULTR_V1_2['vendor-data']['config'] = raw2
VULTR_V1_1['vendor-data'] = [raw1]
VULTR_V1_2['vendor-data'] = [raw2]

self.tmp = self.tmp_dir()

Expand Down