summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnh Vo <anhvo@microsoft.com>2019-04-03 18:23:18 +0000
committerServer Team CI Bot <josh.powers+server-team-bot@canonical.com>2019-04-03 18:23:18 +0000
commit0d8c88393b51db6454491a379dcc2e691551217a (patch)
tree73b22343f430ef46bdf40f4b6163b45a5dcded8c
parent47c53002ea7a661c674c3e409357db7e8a00297a (diff)
downloadcloud-init-git-0d8c88393b51db6454491a379dcc2e691551217a.tar.gz
DatasourceAzure: add additional logging for azure datasource
Create an Azure logging decorator and use additional ReportEventStack context managers to provide additional logging details.
-rwxr-xr-x[-rw-r--r--]cloudinit/sources/DataSourceAzure.py231
-rwxr-xr-x[-rw-r--r--]cloudinit/sources/helpers/azure.py31
2 files changed, 179 insertions, 83 deletions
diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py
index b4e3f061..d4230b3c 100644..100755
--- a/cloudinit/sources/DataSourceAzure.py
+++ b/cloudinit/sources/DataSourceAzure.py
@@ -21,10 +21,14 @@ from cloudinit import net
from cloudinit.event import EventType
from cloudinit.net.dhcp import EphemeralDHCPv4
from cloudinit import sources
-from cloudinit.sources.helpers.azure import get_metadata_from_fabric
from cloudinit.sources.helpers import netlink
from cloudinit.url_helper import UrlError, readurl, retry_on_url_exc
from cloudinit import util
+from cloudinit.reporting import events
+
+from cloudinit.sources.helpers.azure import (azure_ds_reporter,
+ azure_ds_telemetry_reporter,
+ get_metadata_from_fabric)
LOG = logging.getLogger(__name__)
@@ -244,6 +248,7 @@ def set_hostname(hostname, hostname_command='hostname'):
util.subp([hostname_command, hostname])
+@azure_ds_telemetry_reporter
@contextlib.contextmanager
def temporary_hostname(temp_hostname, cfg, hostname_command='hostname'):
"""
@@ -290,6 +295,7 @@ class DataSourceAzure(sources.DataSource):
root = sources.DataSource.__str__(self)
return "%s [seed=%s]" % (root, self.seed)
+ @azure_ds_telemetry_reporter
def bounce_network_with_azure_hostname(self):
# When using cloud-init to provision, we have to set the hostname from
# the metadata and "bounce" the network to force DDNS to update via
@@ -315,6 +321,7 @@ class DataSourceAzure(sources.DataSource):
util.logexc(LOG, "handling set_hostname failed")
return False
+ @azure_ds_telemetry_reporter
def get_metadata_from_agent(self):
temp_hostname = self.metadata.get('local-hostname')
agent_cmd = self.ds_cfg['agent_command']
@@ -344,15 +351,18 @@ class DataSourceAzure(sources.DataSource):
LOG.debug("ssh authentication: "
"using fingerprint from fabirc")
- # wait very long for public SSH keys to arrive
- # https://bugs.launchpad.net/cloud-init/+bug/1717611
- missing = util.log_time(logfunc=LOG.debug,
- msg="waiting for SSH public key files",
- func=util.wait_for_files,
- args=(fp_files, 900))
-
- if len(missing):
- LOG.warning("Did not find files, but going on: %s", missing)
+ with events.ReportEventStack(
+ name="waiting-for-ssh-public-key",
+ description="wait for agents to retrieve ssh keys",
+ parent=azure_ds_reporter):
+ # wait very long for public SSH keys to arrive
+ # https://bugs.launchpad.net/cloud-init/+bug/1717611
+ missing = util.log_time(logfunc=LOG.debug,
+ msg="waiting for SSH public key files",
+ func=util.wait_for_files,
+ args=(fp_files, 900))
+ if len(missing):
+ LOG.warning("Did not find files, but going on: %s", missing)
metadata = {}
metadata['public-keys'] = key_value or pubkeys_from_crt_files(fp_files)
@@ -366,6 +376,7 @@ class DataSourceAzure(sources.DataSource):
subplatform_type = 'seed-dir'
return '%s (%s)' % (subplatform_type, self.seed)
+ @azure_ds_telemetry_reporter
def crawl_metadata(self):
"""Walk all instance metadata sources returning a dict on success.
@@ -467,6 +478,7 @@ class DataSourceAzure(sources.DataSource):
super(DataSourceAzure, self).clear_cached_attrs(attr_defaults)
self._metadata_imds = sources.UNSET
+ @azure_ds_telemetry_reporter
def _get_data(self):
"""Crawl and process datasource metadata caching metadata as attrs.
@@ -513,6 +525,7 @@ class DataSourceAzure(sources.DataSource):
# quickly (local check only) if self.instance_id is still valid
return sources.instance_id_matches_system_uuid(self.get_instance_id())
+ @azure_ds_telemetry_reporter
def setup(self, is_new_instance):
if self._negotiated is False:
LOG.debug("negotiating for %s (new_instance=%s)",
@@ -580,6 +593,7 @@ class DataSourceAzure(sources.DataSource):
if nl_sock:
nl_sock.close()
+ @azure_ds_telemetry_reporter
def _report_ready(self, lease):
"""Tells the fabric provisioning has completed """
try:
@@ -617,9 +631,14 @@ class DataSourceAzure(sources.DataSource):
def _reprovision(self):
"""Initiate the reprovisioning workflow."""
contents = self._poll_imds()
- md, ud, cfg = read_azure_ovf(contents)
- return (md, ud, cfg, {'ovf-env.xml': contents})
-
+ with events.ReportEventStack(
+ name="reprovisioning-read-azure-ovf",
+ description="read azure ovf during reprovisioning",
+ parent=azure_ds_reporter):
+ md, ud, cfg = read_azure_ovf(contents)
+ return (md, ud, cfg, {'ovf-env.xml': contents})
+
+ @azure_ds_telemetry_reporter
def _negotiate(self):
"""Negotiate with fabric and return data from it.
@@ -652,6 +671,7 @@ class DataSourceAzure(sources.DataSource):
util.del_file(REPROVISION_MARKER_FILE)
return fabric_data
+ @azure_ds_telemetry_reporter
def activate(self, cfg, is_new_instance):
address_ephemeral_resize(is_new_instance=is_new_instance,
preserve_ntfs=self.ds_cfg.get(
@@ -690,12 +710,14 @@ def _partitions_on_device(devpath, maxnum=16):
return []
+@azure_ds_telemetry_reporter
def _has_ntfs_filesystem(devpath):
ntfs_devices = util.find_devs_with("TYPE=ntfs", no_cache=True)
LOG.debug('ntfs_devices found = %s', ntfs_devices)
return os.path.realpath(devpath) in ntfs_devices
+@azure_ds_telemetry_reporter
def can_dev_be_reformatted(devpath, preserve_ntfs):
"""Determine if the ephemeral drive at devpath should be reformatted.
@@ -744,43 +766,59 @@ def can_dev_be_reformatted(devpath, preserve_ntfs):
(cand_part, cand_path, devpath))
return False, msg
+ @azure_ds_telemetry_reporter
def count_files(mp):
ignored = set(['dataloss_warning_readme.txt'])
return len([f for f in os.listdir(mp) if f.lower() not in ignored])
bmsg = ('partition %s (%s) on device %s was ntfs formatted' %
(cand_part, cand_path, devpath))
- try:
- file_count = util.mount_cb(cand_path, count_files, mtype="ntfs",
- update_env_for_mount={'LANG': 'C'})
- except util.MountFailedError as e:
- if "unknown filesystem type 'ntfs'" in str(e):
- return True, (bmsg + ' but this system cannot mount NTFS,'
- ' assuming there are no important files.'
- ' Formatting allowed.')
- return False, bmsg + ' but mount of %s failed: %s' % (cand_part, e)
-
- if file_count != 0:
- LOG.warning("it looks like you're using NTFS on the ephemeral disk, "
- 'to ensure that filesystem does not get wiped, set '
- '%s.%s in config', '.'.join(DS_CFG_PATH),
- DS_CFG_KEY_PRESERVE_NTFS)
- return False, bmsg + ' but had %d files on it.' % file_count
+
+ with events.ReportEventStack(
+ name="mount-ntfs-and-count",
+ description="mount-ntfs-and-count",
+ parent=azure_ds_reporter) as evt:
+ try:
+ file_count = util.mount_cb(cand_path, count_files, mtype="ntfs",
+ update_env_for_mount={'LANG': 'C'})
+ except util.MountFailedError as e:
+ evt.description = "cannot mount ntfs"
+ if "unknown filesystem type 'ntfs'" in str(e):
+ return True, (bmsg + ' but this system cannot mount NTFS,'
+ ' assuming there are no important files.'
+ ' Formatting allowed.')
+ return False, bmsg + ' but mount of %s failed: %s' % (cand_part, e)
+
+ if file_count != 0:
+ evt.description = "mounted and counted %d files" % file_count
+ LOG.warning("it looks like you're using NTFS on the ephemeral"
+ " disk, to ensure that filesystem does not get wiped,"
+ " set %s.%s in config", '.'.join(DS_CFG_PATH),
+ DS_CFG_KEY_PRESERVE_NTFS)
+ return False, bmsg + ' but had %d files on it.' % file_count
return True, bmsg + ' and had no important files. Safe for reformatting.'
+@azure_ds_telemetry_reporter
def address_ephemeral_resize(devpath=RESOURCE_DISK_PATH, maxwait=120,
is_new_instance=False, preserve_ntfs=False):
# wait for ephemeral disk to come up
naplen = .2
- missing = util.wait_for_files([devpath], maxwait=maxwait, naplen=naplen,
- log_pre="Azure ephemeral disk: ")
-
- if missing:
- LOG.warning("ephemeral device '%s' did not appear after %d seconds.",
- devpath, maxwait)
- return
+ with events.ReportEventStack(
+ name="wait-for-ephemeral-disk",
+ description="wait for ephemeral disk",
+ parent=azure_ds_reporter):
+ missing = util.wait_for_files([devpath],
+ maxwait=maxwait,
+ naplen=naplen,
+ log_pre="Azure ephemeral disk: ")
+
+ if missing:
+ LOG.warning("ephemeral device '%s' did"
+ " not appear after %d seconds.",
+ devpath, maxwait)
+ return
result = False
msg = None
@@ -808,6 +846,7 @@ def address_ephemeral_resize(devpath=RESOURCE_DISK_PATH, maxwait=120,
return
+@azure_ds_telemetry_reporter
def perform_hostname_bounce(hostname, cfg, prev_hostname):
# set the hostname to 'hostname' if it is not already set to that.
# then, if policy is not off, bounce the interface using command
@@ -843,6 +882,7 @@ def perform_hostname_bounce(hostname, cfg, prev_hostname):
return True
+@azure_ds_telemetry_reporter
def crtfile_to_pubkey(fname, data=None):
pipeline = ('openssl x509 -noout -pubkey < "$0" |'
'ssh-keygen -i -m PKCS8 -f /dev/stdin')
@@ -851,6 +891,7 @@ def crtfile_to_pubkey(fname, data=None):
return out.rstrip()
+@azure_ds_telemetry_reporter
def pubkeys_from_crt_files(flist):
pubkeys = []
errors = []
@@ -866,6 +907,7 @@ def pubkeys_from_crt_files(flist):
return pubkeys
+@azure_ds_telemetry_reporter
def write_files(datadir, files, dirmode=None):
def _redact_password(cnt, fname):
@@ -893,6 +935,7 @@ def write_files(datadir, files, dirmode=None):
util.write_file(filename=fname, content=content, mode=0o600)
+@azure_ds_telemetry_reporter
def invoke_agent(cmd):
# this is a function itself to simplify patching it for test
if cmd:
@@ -912,6 +955,7 @@ def find_child(node, filter_func):
return ret
+@azure_ds_telemetry_reporter
def load_azure_ovf_pubkeys(sshnode):
# This parses a 'SSH' node formatted like below, and returns
# an array of dicts.
@@ -964,6 +1008,7 @@ def load_azure_ovf_pubkeys(sshnode):
return found
+@azure_ds_telemetry_reporter
def read_azure_ovf(contents):
try:
dom = minidom.parseString(contents)
@@ -1064,6 +1109,7 @@ def read_azure_ovf(contents):
return (md, ud, cfg)
+@azure_ds_telemetry_reporter
def _extract_preprovisioned_vm_setting(dom):
"""Read the preprovision flag from the ovf. It should not
exist unless true."""
@@ -1092,6 +1138,7 @@ def encrypt_pass(password, salt_id="$6$"):
return crypt.crypt(password, salt_id + util.rand_str(strlen=16))
+@azure_ds_telemetry_reporter
def _check_freebsd_cdrom(cdrom_dev):
"""Return boolean indicating path to cdrom device has content."""
try:
@@ -1103,6 +1150,7 @@ def _check_freebsd_cdrom(cdrom_dev):
return False
+@azure_ds_telemetry_reporter
def _get_random_seed(source=PLATFORM_ENTROPY_SOURCE):
"""Return content random seed file if available, otherwise,
return None."""
@@ -1126,6 +1174,7 @@ def _get_random_seed(source=PLATFORM_ENTROPY_SOURCE):
return seed
+@azure_ds_telemetry_reporter
def list_possible_azure_ds_devs():
devlist = []
if util.is_FreeBSD():
@@ -1140,6 +1189,7 @@ def list_possible_azure_ds_devs():
return devlist
+@azure_ds_telemetry_reporter
def load_azure_ds_dir(source_dir):
ovf_file = os.path.join(source_dir, "ovf-env.xml")
@@ -1162,47 +1212,54 @@ def parse_network_config(imds_metadata):
@param: imds_metadata: Dict of content read from IMDS network service.
@return: Dictionary containing network version 2 standard configuration.
"""
- if imds_metadata != sources.UNSET and imds_metadata:
- netconfig = {'version': 2, 'ethernets': {}}
- LOG.debug('Azure: generating network configuration from IMDS')
- network_metadata = imds_metadata['network']
- for idx, intf in enumerate(network_metadata['interface']):
- nicname = 'eth{idx}'.format(idx=idx)
- dev_config = {}
- for addr4 in intf['ipv4']['ipAddress']:
- privateIpv4 = addr4['privateIpAddress']
- if privateIpv4:
- if dev_config.get('dhcp4', False):
- # Append static address config for nic > 1
- netPrefix = intf['ipv4']['subnet'][0].get(
- 'prefix', '24')
- if not dev_config.get('addresses'):
- dev_config['addresses'] = []
- dev_config['addresses'].append(
- '{ip}/{prefix}'.format(
- ip=privateIpv4, prefix=netPrefix))
- else:
- dev_config['dhcp4'] = True
- for addr6 in intf['ipv6']['ipAddress']:
- privateIpv6 = addr6['privateIpAddress']
- if privateIpv6:
- dev_config['dhcp6'] = True
- break
- if dev_config:
- mac = ':'.join(re.findall(r'..', intf['macAddress']))
- dev_config.update(
- {'match': {'macaddress': mac.lower()},
- 'set-name': nicname})
- netconfig['ethernets'][nicname] = dev_config
- else:
- blacklist = ['mlx4_core']
- LOG.debug('Azure: generating fallback configuration')
- # generate a network config, blacklist picking mlx4_core devs
- netconfig = net.generate_fallback_config(
- blacklist_drivers=blacklist, config_driver=True)
- return netconfig
+ with events.ReportEventStack(
+ name="parse_network_config",
+ description="",
+ parent=azure_ds_reporter) as evt:
+ if imds_metadata != sources.UNSET and imds_metadata:
+ netconfig = {'version': 2, 'ethernets': {}}
+ LOG.debug('Azure: generating network configuration from IMDS')
+ network_metadata = imds_metadata['network']
+ for idx, intf in enumerate(network_metadata['interface']):
+ nicname = 'eth{idx}'.format(idx=idx)
+ dev_config = {}
+ for addr4 in intf['ipv4']['ipAddress']:
+ privateIpv4 = addr4['privateIpAddress']
+ if privateIpv4:
+ if dev_config.get('dhcp4', False):
+ # Append static address config for nic > 1
+ netPrefix = intf['ipv4']['subnet'][0].get(
+ 'prefix', '24')
+ if not dev_config.get('addresses'):
+ dev_config['addresses'] = []
+ dev_config['addresses'].append(
+ '{ip}/{prefix}'.format(
+ ip=privateIpv4, prefix=netPrefix))
+ else:
+ dev_config['dhcp4'] = True
+ for addr6 in intf['ipv6']['ipAddress']:
+ privateIpv6 = addr6['privateIpAddress']
+ if privateIpv6:
+ dev_config['dhcp6'] = True
+ break
+ if dev_config:
+ mac = ':'.join(re.findall(r'..', intf['macAddress']))
+ dev_config.update(
+ {'match': {'macaddress': mac.lower()},
+ 'set-name': nicname})
+ netconfig['ethernets'][nicname] = dev_config
+ evt.description = "network config from imds"
+ else:
+ blacklist = ['mlx4_core']
+ LOG.debug('Azure: generating fallback configuration')
+ # generate a network config, blacklist picking mlx4_core devs
+ netconfig = net.generate_fallback_config(
+ blacklist_drivers=blacklist, config_driver=True)
+ evt.description = "network config from fallback"
+ return netconfig
+@azure_ds_telemetry_reporter
def get_metadata_from_imds(fallback_nic, retries):
"""Query Azure's network metadata service, returning a dictionary.
@@ -1227,6 +1284,7 @@ def get_metadata_from_imds(fallback_nic, retries):
return util.log_time(**kwargs)
+@azure_ds_telemetry_reporter
def _get_metadata_from_imds(retries):
url = IMDS_URL + "instance?api-version=2017-12-01"
@@ -1246,6 +1304,7 @@ def _get_metadata_from_imds(retries):
return {}
+@azure_ds_telemetry_reporter
def maybe_remove_ubuntu_network_config_scripts(paths=None):
"""Remove Azure-specific ubuntu network config for non-primary nics.
@@ -1283,14 +1342,20 @@ def maybe_remove_ubuntu_network_config_scripts(paths=None):
def _is_platform_viable(seed_dir):
- """Check platform environment to report if this datasource may run."""
- asset_tag = util.read_dmi_data('chassis-asset-tag')
- if asset_tag == AZURE_CHASSIS_ASSET_TAG:
- return True
- LOG.debug("Non-Azure DMI asset tag '%s' discovered.", asset_tag)
- if os.path.exists(os.path.join(seed_dir, 'ovf-env.xml')):
- return True
- return False
+ with events.ReportEventStack(
+ name="check-platform-viability",
+ description="found azure asset tag",
+ parent=azure_ds_reporter) as evt:
+
+ """Check platform environment to report if this datasource may run."""
+ asset_tag = util.read_dmi_data('chassis-asset-tag')
+ if asset_tag == AZURE_CHASSIS_ASSET_TAG:
+ return True
+ LOG.debug("Non-Azure DMI asset tag '%s' discovered.", asset_tag)
+ evt.description = "Non-Azure DMI asset tag '%s' discovered.", asset_tag
+ if os.path.exists(os.path.join(seed_dir, 'ovf-env.xml')):
+ return True
+ return False
class BrokenAzureDataSource(Exception):
diff --git a/cloudinit/sources/helpers/azure.py b/cloudinit/sources/helpers/azure.py
index 2829dd20..d3af05ee 100644..100755
--- a/cloudinit/sources/helpers/azure.py
+++ b/cloudinit/sources/helpers/azure.py
@@ -16,10 +16,27 @@ from xml.etree import ElementTree
from cloudinit import url_helper
from cloudinit import util
+from cloudinit.reporting import events
LOG = logging.getLogger(__name__)
+azure_ds_reporter = events.ReportEventStack(
+ name="azure-ds",
+ description="initialize reporter for azure ds",
+ reporting_enabled=True)
+
+
+def azure_ds_telemetry_reporter(func):
+ def impl(*args, **kwargs):
+ with events.ReportEventStack(
+ name=func.__name__,
+ description=func.__name__,
+ parent=azure_ds_reporter):
+ return func(*args, **kwargs)
+ return impl
+
+
@contextmanager
def cd(newdir):
prevdir = os.getcwd()
@@ -119,6 +136,7 @@ class OpenSSLManager(object):
def clean_up(self):
util.del_dir(self.tmpdir)
+ @azure_ds_telemetry_reporter
def generate_certificate(self):
LOG.debug('Generating certificate for communication with fabric...')
if self.certificate is not None:
@@ -139,17 +157,20 @@ class OpenSSLManager(object):
LOG.debug('New certificate generated.')
@staticmethod
+ @azure_ds_telemetry_reporter
def _run_x509_action(action, cert):
cmd = ['openssl', 'x509', '-noout', action]
result, _ = util.subp(cmd, data=cert)
return result
+ @azure_ds_telemetry_reporter
def _get_ssh_key_from_cert(self, certificate):
pub_key = self._run_x509_action('-pubkey', certificate)
keygen_cmd = ['ssh-keygen', '-i', '-m', 'PKCS8', '-f', '/dev/stdin']
ssh_key, _ = util.subp(keygen_cmd, data=pub_key)
return ssh_key
+ @azure_ds_telemetry_reporter
def _get_fingerprint_from_cert(self, certificate):
"""openssl x509 formats fingerprints as so:
'SHA1 Fingerprint=07:3E:19:D1:4D:1C:79:92:24:C6:A0:FD:8D:DA:\
@@ -163,6 +184,7 @@ class OpenSSLManager(object):
octets = raw_fp[eq+1:-1].split(':')
return ''.join(octets)
+ @azure_ds_telemetry_reporter
def _decrypt_certs_from_xml(self, certificates_xml):
"""Decrypt the certificates XML document using the our private key;
return the list of certs and private keys contained in the doc.
@@ -185,6 +207,7 @@ class OpenSSLManager(object):
shell=True, data=b'\n'.join(lines))
return out
+ @azure_ds_telemetry_reporter
def parse_certificates(self, certificates_xml):
"""Given the Certificates XML document, return a dictionary of
fingerprints and associated SSH keys derived from the certs."""
@@ -265,11 +288,13 @@ class WALinuxAgentShim(object):
return socket.inet_ntoa(packed_bytes)
@staticmethod
+ @azure_ds_telemetry_reporter
def _networkd_get_value_from_leases(leases_d=None):
return dhcp.networkd_get_option_from_leases(
'OPTION_245', leases_d=leases_d)
@staticmethod
+ @azure_ds_telemetry_reporter
def _get_value_from_leases_file(fallback_lease_file):
leases = []
content = util.load_file(fallback_lease_file)
@@ -287,6 +312,7 @@ class WALinuxAgentShim(object):
return leases[-1]
@staticmethod
+ @azure_ds_telemetry_reporter
def _load_dhclient_json():
dhcp_options = {}
hooks_dir = WALinuxAgentShim._get_hooks_dir()
@@ -305,6 +331,7 @@ class WALinuxAgentShim(object):
return dhcp_options
@staticmethod
+ @azure_ds_telemetry_reporter
def _get_value_from_dhcpoptions(dhcp_options):
if dhcp_options is None:
return None
@@ -318,6 +345,7 @@ class WALinuxAgentShim(object):
return _value
@staticmethod
+ @azure_ds_telemetry_reporter
def find_endpoint(fallback_lease_file=None, dhcp245=None):
value = None
if dhcp245 is not None:
@@ -352,6 +380,7 @@ class WALinuxAgentShim(object):
LOG.debug('Azure endpoint found at %s', endpoint_ip_address)
return endpoint_ip_address
+ @azure_ds_telemetry_reporter
def register_with_azure_and_fetch_data(self, pubkey_info=None):
if self.openssl_manager is None:
self.openssl_manager = OpenSSLManager()
@@ -404,6 +433,7 @@ class WALinuxAgentShim(object):
return keys
+ @azure_ds_telemetry_reporter
def _report_ready(self, goal_state, http_client):
LOG.debug('Reporting ready to Azure fabric.')
document = self.REPORT_READY_XML_TEMPLATE.format(
@@ -419,6 +449,7 @@ class WALinuxAgentShim(object):
LOG.info('Reported ready to Azure fabric.')
+@azure_ds_telemetry_reporter
def get_metadata_from_fabric(fallback_lease_file=None, dhcp_opts=None,
pubkey_info=None):
shim = WALinuxAgentShim(fallback_lease_file=fallback_lease_file,