summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChad Smith <chad.smith@canonical.com>2018-09-25 21:59:16 +0000
committerServer Team CI Bot <josh.powers+server-team-bot@canonical.com>2018-09-25 21:59:16 +0000
commitfc4b966ba928b30b1c586407e752e0b51b1031e8 (patch)
tree4a23ee46076d56c14396f40c2a1abb828e630aa5
parent0b0378dd07f16d45c16e5750b6815b22a771860d (diff)
downloadcloud-init-git-fc4b966ba928b30b1c586407e752e0b51b1031e8.tar.gz
cli: add cloud-init query subcommand to query instance metadata
Cloud-init caches any cloud metadata crawled during boot in the file /run/cloud-init/instance-data.json. Cloud-init also standardizes some of that metadata across all clouds. The command 'cloud-init query' surfaces a simple CLI to query or format any cached instance metadata so that scripts or end-users do not have to write tools to crawl metadata themselves. Since 'cloud-init query' is runnable by non-root users, redact any sensitive data from instance-data.json and provide a root-readable unredacted instance-data-sensitive.json. Datasources can now define a sensitive_metadata_keys tuple which will redact any matching keys which could contain passwords or credentials from instance-data.json. Also add the following standardized 'v1' instance-data.json keys:   - user_data: The base64encoded user-data provided at instance launch   - vendor_data: Any vendor_data provided to the instance at launch   - underscore_delimited versions of existing hyphenated keys:     instance_id, local_hostname, availability_zone, cloud_name
-rw-r--r--bash_completion/cloud-init4
-rwxr-xr-xcloudinit/cmd/devel/render.py7
-rw-r--r--cloudinit/cmd/main.py10
-rw-r--r--cloudinit/cmd/query.py155
-rw-r--r--cloudinit/cmd/tests/test_query.py193
-rw-r--r--cloudinit/helpers.py4
-rw-r--r--cloudinit/sources/__init__.py76
-rw-r--r--cloudinit/sources/tests/test_init.py130
-rw-r--r--doc/rtd/index.rst1
-rw-r--r--doc/rtd/topics/capabilities.rst105
-rw-r--r--doc/rtd/topics/datasources.rst148
-rw-r--r--doc/rtd/topics/instancedata.rst297
-rw-r--r--integration-requirements.txt3
-rw-r--r--tests/cloud_tests/testcases/base.py52
14 files changed, 952 insertions, 233 deletions
diff --git a/bash_completion/cloud-init b/bash_completion/cloud-init
index 6d01bf3a..8c25032f 100644
--- a/bash_completion/cloud-init
+++ b/bash_completion/cloud-init
@@ -10,7 +10,7 @@ _cloudinit_complete()
cur_word="${COMP_WORDS[COMP_CWORD]}"
prev_word="${COMP_WORDS[COMP_CWORD-1]}"
- subcmds="analyze clean collect-logs devel dhclient-hook features init modules single status"
+ subcmds="analyze clean collect-logs devel dhclient-hook features init modules query single status"
base_params="--help --file --version --debug --force"
case ${COMP_CWORD} in
1)
@@ -40,6 +40,8 @@ _cloudinit_complete()
COMPREPLY=($(compgen -W "--help --mode" -- $cur_word))
;;
+ query)
+ COMPREPLY=($(compgen -W "--all --help --instance-data --list-keys --user-data --vendor-data --debug" -- $cur_word));;
single)
COMPREPLY=($(compgen -W "--help --name --frequency --report" -- $cur_word))
;;
diff --git a/cloudinit/cmd/devel/render.py b/cloudinit/cmd/devel/render.py
index e85933db..2ba6b681 100755
--- a/cloudinit/cmd/devel/render.py
+++ b/cloudinit/cmd/devel/render.py
@@ -9,7 +9,6 @@ import sys
from cloudinit.handlers.jinja_template import render_jinja_payload_from_file
from cloudinit import log
from cloudinit.sources import INSTANCE_JSON_FILE
-from cloudinit import util
from . import addLogHandlerCLI, read_cfg_paths
NAME = 'render'
@@ -54,11 +53,7 @@ def handle_args(name, args):
paths.run_dir, INSTANCE_JSON_FILE)
else:
instance_data_fn = args.instance_data
- try:
- with open(instance_data_fn) as stream:
- instance_data = stream.read()
- instance_data = util.load_json(instance_data)
- except IOError:
+ if not os.path.exists(instance_data_fn):
LOG.error('Missing instance-data.json file: %s', instance_data_fn)
return 1
try:
diff --git a/cloudinit/cmd/main.py b/cloudinit/cmd/main.py
index 0eee583c..5a437020 100644
--- a/cloudinit/cmd/main.py
+++ b/cloudinit/cmd/main.py
@@ -791,6 +791,10 @@ def main(sysv_args=None):
' pass to this module'))
parser_single.set_defaults(action=('single', main_single))
+ parser_query = subparsers.add_parser(
+ 'query',
+ help='Query standardized instance metadata from the command line.')
+
parser_dhclient = subparsers.add_parser('dhclient-hook',
help=('run the dhclient hook'
'to record network info'))
@@ -842,6 +846,12 @@ def main(sysv_args=None):
clean_parser(parser_clean)
parser_clean.set_defaults(
action=('clean', handle_clean_args))
+ elif sysv_args[0] == 'query':
+ from cloudinit.cmd.query import (
+ get_parser as query_parser, handle_args as handle_query_args)
+ query_parser(parser_query)
+ parser_query.set_defaults(
+ action=('render', handle_query_args))
elif sysv_args[0] == 'status':
from cloudinit.cmd.status import (
get_parser as status_parser, handle_status_args)
diff --git a/cloudinit/cmd/query.py b/cloudinit/cmd/query.py
new file mode 100644
index 00000000..7d2d4fe4
--- /dev/null
+++ b/cloudinit/cmd/query.py
@@ -0,0 +1,155 @@
+# This file is part of cloud-init. See LICENSE file for license information.
+
+"""Query standardized instance metadata from the command line."""
+
+import argparse
+import os
+import six
+import sys
+
+from cloudinit.handlers.jinja_template import (
+ convert_jinja_instance_data, render_jinja_payload)
+from cloudinit.cmd.devel import addLogHandlerCLI, read_cfg_paths
+from cloudinit import log
+from cloudinit.sources import (
+ INSTANCE_JSON_FILE, INSTANCE_JSON_SENSITIVE_FILE, REDACT_SENSITIVE_VALUE)
+from cloudinit import util
+
+NAME = 'query'
+LOG = log.getLogger(NAME)
+
+
+def get_parser(parser=None):
+ """Build or extend an arg parser for query utility.
+
+ @param parser: Optional existing ArgumentParser instance representing the
+ query subcommand which will be extended to support the args of
+ this utility.
+
+ @returns: ArgumentParser with proper argument configuration.
+ """
+ if not parser:
+ parser = argparse.ArgumentParser(
+ prog=NAME, description='Query cloud-init instance data')
+ parser.add_argument(
+ '-d', '--debug', action='store_true', default=False,
+ help='Add verbose messages during template render')
+ parser.add_argument(
+ '-i', '--instance-data', type=str,
+ help=('Path to instance-data.json file. Default is /run/cloud-init/%s'
+ % INSTANCE_JSON_FILE))
+ parser.add_argument(
+ '-l', '--list-keys', action='store_true', default=False,
+ help=('List query keys available at the provided instance-data'
+ ' <varname>.'))
+ parser.add_argument(
+ '-u', '--user-data', type=str,
+ help=('Path to user-data file. Default is'
+ ' /var/lib/cloud/instance/user-data.txt'))
+ parser.add_argument(
+ '-v', '--vendor-data', type=str,
+ help=('Path to vendor-data file. Default is'
+ ' /var/lib/cloud/instance/vendor-data.txt'))
+ parser.add_argument(
+ 'varname', type=str, nargs='?',
+ help=('A dot-delimited instance data variable to query from'
+ ' instance-data query. For example: v2.local_hostname'))
+ parser.add_argument(
+ '-a', '--all', action='store_true', default=False, dest='dump_all',
+ help='Dump all available instance-data')
+ parser.add_argument(
+ '-f', '--format', type=str, dest='format',
+ help=('Optionally specify a custom output format string. Any'
+ ' instance-data variable can be specified between double-curly'
+ ' braces. For example -f "{{ v2.cloud_name }}"'))
+ return parser
+
+
+def handle_args(name, args):
+ """Handle calls to 'cloud-init query' as a subcommand."""
+ paths = None
+ addLogHandlerCLI(LOG, log.DEBUG if args.debug else log.WARNING)
+ if not any([args.list_keys, args.varname, args.format, args.dump_all]):
+ LOG.error(
+ 'Expected one of the options: --all, --format,'
+ ' --list-keys or varname')
+ get_parser().print_help()
+ return 1
+
+ uid = os.getuid()
+ if not all([args.instance_data, args.user_data, args.vendor_data]):
+ paths = read_cfg_paths()
+ if not args.instance_data:
+ if uid == 0:
+ default_json_fn = INSTANCE_JSON_SENSITIVE_FILE
+ else:
+ default_json_fn = INSTANCE_JSON_FILE # World readable
+ instance_data_fn = os.path.join(paths.run_dir, default_json_fn)
+ else:
+ instance_data_fn = args.instance_data
+ if not args.user_data:
+ user_data_fn = os.path.join(paths.instance_link, 'user-data.txt')
+ else:
+ user_data_fn = args.user_data
+ if not args.vendor_data:
+ vendor_data_fn = os.path.join(paths.instance_link, 'vendor-data.txt')
+ else:
+ vendor_data_fn = args.vendor_data
+
+ try:
+ instance_json = util.load_file(instance_data_fn)
+ except IOError:
+ LOG.error('Missing instance-data.json file: %s', instance_data_fn)
+ return 1
+
+ instance_data = util.load_json(instance_json)
+ if uid != 0:
+ instance_data['userdata'] = (
+ '<%s> file:%s' % (REDACT_SENSITIVE_VALUE, user_data_fn))
+ instance_data['vendordata'] = (
+ '<%s> file:%s' % (REDACT_SENSITIVE_VALUE, vendor_data_fn))
+ else:
+ instance_data['userdata'] = util.load_file(user_data_fn)
+ instance_data['vendordata'] = util.load_file(vendor_data_fn)
+ if args.format:
+ payload = '## template: jinja\n{fmt}'.format(fmt=args.format)
+ rendered_payload = render_jinja_payload(
+ payload=payload, payload_fn='query commandline',
+ instance_data=instance_data,
+ debug=True if args.debug else False)
+ if rendered_payload:
+ print(rendered_payload)
+ return 0
+ return 1
+
+ response = convert_jinja_instance_data(instance_data)
+ if args.varname:
+ try:
+ for var in args.varname.split('.'):
+ response = response[var]
+ except KeyError:
+ LOG.error('Undefined instance-data key %s', args.varname)
+ return 1
+ if args.list_keys:
+ if not isinstance(response, dict):
+ LOG.error("--list-keys provided but '%s' is not a dict", var)
+ return 1
+ response = '\n'.join(sorted(response.keys()))
+ elif args.list_keys:
+ response = '\n'.join(sorted(response.keys()))
+ if not isinstance(response, six.string_types):
+ response = util.json_dumps(response)
+ print(response)
+ return 0
+
+
+def main():
+ """Tool to query specific instance-data values."""
+ parser = get_parser()
+ sys.exit(handle_args(NAME, parser.parse_args()))
+
+
+if __name__ == '__main__':
+ main()
+
+# vi: ts=4 expandtab
diff --git a/cloudinit/cmd/tests/test_query.py b/cloudinit/cmd/tests/test_query.py
new file mode 100644
index 00000000..fb87c6ab
--- /dev/null
+++ b/cloudinit/cmd/tests/test_query.py
@@ -0,0 +1,193 @@
+# This file is part of cloud-init. See LICENSE file for license information.
+
+from six import StringIO
+from textwrap import dedent
+import os
+
+from collections import namedtuple
+from cloudinit.cmd import query
+from cloudinit.helpers import Paths
+from cloudinit.sources import REDACT_SENSITIVE_VALUE, INSTANCE_JSON_FILE
+from cloudinit.tests.helpers import CiTestCase, mock
+from cloudinit.util import ensure_dir, write_file
+
+
+class TestQuery(CiTestCase):
+
+ with_logs = True
+
+ args = namedtuple(
+ 'queryargs',
+ ('debug dump_all format instance_data list_keys user_data vendor_data'
+ ' varname'))
+
+ def setUp(self):
+ super(TestQuery, self).setUp()
+ self.tmp = self.tmp_dir()
+ self.instance_data = self.tmp_path('instance-data', dir=self.tmp)
+
+ def test_handle_args_error_on_missing_param(self):
+ """Error when missing required parameters and print usage."""
+ args = self.args(
+ debug=False, dump_all=False, format=None, instance_data=None,
+ list_keys=False, user_data=None, vendor_data=None, varname=None)
+ with mock.patch('sys.stderr', new_callable=StringIO) as m_stderr:
+ with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout:
+ self.assertEqual(1, query.handle_args('anyname', args))
+ expected_error = (
+ 'ERROR: Expected one of the options: --all, --format, --list-keys'
+ ' or varname\n')
+ self.assertIn(expected_error, self.logs.getvalue())
+ self.assertIn('usage: query', m_stdout.getvalue())
+ self.assertIn(expected_error, m_stderr.getvalue())
+
+ def test_handle_args_error_on_missing_instance_data(self):
+ """When instance_data file path does not exist, log an error."""
+ absent_fn = self.tmp_path('absent', dir=self.tmp)
+ args = self.args(
+ debug=False, dump_all=True, format=None, instance_data=absent_fn,
+ list_keys=False, user_data='ud', vendor_data='vd', varname=None)
+ with mock.patch('sys.stderr', new_callable=StringIO) as m_stderr:
+ self.assertEqual(1, query.handle_args('anyname', args))
+ self.assertIn(
+ 'ERROR: Missing instance-data.json file: %s' % absent_fn,
+ self.logs.getvalue())
+ self.assertIn(
+ 'ERROR: Missing instance-data.json file: %s' % absent_fn,
+ m_stderr.getvalue())
+
+ def test_handle_args_defaults_instance_data(self):
+ """When no instance_data argument, default to configured run_dir."""
+ args = self.args(
+ debug=False, dump_all=True, format=None, instance_data=None,
+ list_keys=False, user_data=None, vendor_data=None, varname=None)
+ run_dir = self.tmp_path('run_dir', dir=self.tmp)
+ ensure_dir(run_dir)
+ paths = Paths({'run_dir': run_dir})
+ self.add_patch('cloudinit.cmd.query.read_cfg_paths', 'm_paths')
+ self.m_paths.return_value = paths
+ with mock.patch('sys.stderr', new_callable=StringIO) as m_stderr:
+ self.assertEqual(1, query.handle_args('anyname', args))
+ json_file = os.path.join(run_dir, INSTANCE_JSON_FILE)
+ self.assertIn(
+ 'ERROR: Missing instance-data.json file: %s' % json_file,
+ self.logs.getvalue())
+ self.assertIn(
+ 'ERROR: Missing instance-data.json file: %s' % json_file,
+ m_stderr.getvalue())
+
+ def test_handle_args_dumps_all_instance_data(self):
+ """When --all is specified query will dump all instance data vars."""
+ write_file(self.instance_data, '{"my-var": "it worked"}')
+ args = self.args(
+ debug=False, dump_all=True, format=None,
+ instance_data=self.instance_data, list_keys=False,
+ user_data='ud', vendor_data='vd', varname=None)
+ with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout:
+ self.assertEqual(0, query.handle_args('anyname', args))
+ self.assertEqual(
+ '{\n "my_var": "it worked",\n "userdata": "<%s> file:ud",\n'
+ ' "vendordata": "<%s> file:vd"\n}\n' % (
+ REDACT_SENSITIVE_VALUE, REDACT_SENSITIVE_VALUE),
+ m_stdout.getvalue())
+
+ def test_handle_args_returns_top_level_varname(self):
+ """When the argument varname is passed, report its value."""
+ write_file(self.instance_data, '{"my-var": "it worked"}')
+ args = self.args(
+ debug=False, dump_all=True, format=None,
+ instance_data=self.instance_data, list_keys=False,
+ user_data='ud', vendor_data='vd', varname='my_var')
+ with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout:
+ self.assertEqual(0, query.handle_args('anyname', args))
+ self.assertEqual('it worked\n', m_stdout.getvalue())
+
+ def test_handle_args_returns_nested_varname(self):
+ """If user_data file is a jinja template render instance-data vars."""
+ write_file(self.instance_data,
+ '{"v1": {"key-2": "value-2"}, "my-var": "it worked"}')
+ args = self.args(
+ debug=False, dump_all=False, format=None,
+ instance_data=self.instance_data, user_data='ud', vendor_data='vd',
+ list_keys=False, varname='v1.key_2')
+ with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout:
+ self.assertEqual(0, query.handle_args('anyname', args))
+ self.assertEqual('value-2\n', m_stdout.getvalue())
+
+ def test_handle_args_returns_standardized_vars_to_top_level_aliases(self):
+ """Any standardized vars under v# are promoted as top-level aliases."""
+ write_file(
+ self.instance_data,
+ '{"v1": {"v1_1": "val1.1"}, "v2": {"v2_2": "val2.2"},'
+ ' "top": "gun"}')
+ expected = dedent("""\
+ {
+ "top": "gun",
+ "userdata": "<redacted for non-root user> file:ud",
+ "v1": {
+ "v1_1": "val1.1"
+ },
+ "v1_1": "val1.1",
+ "v2": {
+ "v2_2": "val2.2"
+ },
+ "v2_2": "val2.2",
+ "vendordata": "<redacted for non-root user> file:vd"
+ }
+ """)
+ args = self.args(
+ debug=False, dump_all=True, format=None,
+ instance_data=self.instance_data, user_data='ud', vendor_data='vd',
+ list_keys=False, varname=None)
+ with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout:
+ self.assertEqual(0, query.handle_args('anyname', args))
+ self.assertEqual(expected, m_stdout.getvalue())
+
+ def test_handle_args_list_keys_sorts_top_level_keys_when_no_varname(self):
+ """Sort all top-level keys when only --list-keys provided."""
+ write_file(
+ self.instance_data,
+ '{"v1": {"v1_1": "val1.1"}, "v2": {"v2_2": "val2.2"},'
+ ' "top": "gun"}')
+ expected = 'top\nuserdata\nv1\nv1_1\nv2\nv2_2\nvendordata\n'
+ args = self.args(
+ debug=False, dump_all=False, format=None,
+ instance_data=self.instance_data, list_keys=True, user_data='ud',
+ vendor_data='vd', varname=None)
+ with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout:
+ self.assertEqual(0, query.handle_args('anyname', args))
+ self.assertEqual(expected, m_stdout.getvalue())
+
+ def test_handle_args_list_keys_sorts_nested_keys_when_varname(self):
+ """Sort all nested keys of varname object when --list-keys provided."""
+ write_file(
+ self.instance_data,
+ '{"v1": {"v1_1": "val1.1", "v1_2": "val1.2"}, "v2":' +
+ ' {"v2_2": "val2.2"}, "top": "gun"}')
+ expected = 'v1_1\nv1_2\n'
+ args = self.args(
+ debug=False, dump_all=False, format=None,
+ instance_data=self.instance_data, list_keys=True,
+ user_data='ud', vendor_data='vd', varname='v1')
+ with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout:
+ self.assertEqual(0, query.handle_args('anyname', args))
+ self.assertEqual(expected, m_stdout.getvalue())
+
+ def test_handle_args_list_keys_errors_when_varname_is_not_a_dict(self):
+ """Raise an error when --list-keys and varname specify a non-list."""
+ write_file(
+ self.instance_data,
+ '{"v1": {"v1_1": "val1.1", "v1_2": "val1.2"}, "v2": ' +
+ '{"v2_2": "val2.2"}, "top": "gun"}')
+ expected_error = "ERROR: --list-keys provided but 'top' is not a dict"
+ args = self.args(
+ debug=False, dump_all=False, format=None,
+ instance_data=self.instance_data, list_keys=True, user_data='ud',
+ vendor_data='vd', varname='top')
+ with mock.patch('sys.stderr', new_callable=StringIO) as m_stderr:
+ with mock.patch('sys.stdout', new_callable=StringIO) as m_stdout:
+ self.assertEqual(1, query.handle_args('anyname', args))
+ self.assertEqual('', m_stdout.getvalue())
+ self.assertIn(expected_error, m_stderr.getvalue())
+
+# vi: ts=4 expandtab
diff --git a/cloudinit/helpers.py b/cloudinit/helpers.py
index 3cc1fb19..dcd2645e 100644
--- a/cloudinit/helpers.py
+++ b/cloudinit/helpers.py
@@ -239,6 +239,10 @@ class ConfigMerger(object):
if cc_fn and os.path.isfile(cc_fn):
try:
i_cfgs.append(util.read_conf(cc_fn))
+ except PermissionError:
+ LOG.debug(
+ 'Skipped loading cloud-config from %s due to'
+ ' non-root.', cc_fn)
except Exception:
util.logexc(LOG, 'Failed loading of cloud-config from %s',
cc_fn)
diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py
index a775f1a8..730e8174 100644
--- a/cloudinit/sources/__init__.py
+++ b/cloudinit/sources/__init__.py
@@ -38,8 +38,12 @@ DEP_FILESYSTEM = "FILESYSTEM"
DEP_NETWORK = "NETWORK"
DS_PREFIX = 'DataSource'
-# File in which instance meta-data, user-data and vendor-data is written
+# File in which public available instance meta-data is written
+# security-sensitive key values are redacted from this world-readable file
INSTANCE_JSON_FILE = 'instance-data.json'
+# security-sensitive key values are present in this root-readable file
+INSTANCE_JSON_SENSITIVE_FILE = 'instance-data-sensitive.json'
+REDACT_SENSITIVE_VALUE = 'redacted for non-root user'
# Key which can be provide a cloud's official product name to cloud-init
METADATA_CLOUD_NAME_KEY = 'cloud-name'
@@ -58,7 +62,7 @@ class InvalidMetaDataException(Exception):
pass
-def process_instance_metadata(metadata, key_path=''):
+def process_instance_metadata(metadata, key_path='', sensitive_keys=()):
"""Process all instance metadata cleaning it up for persisting as json.
Strip ci-b64 prefix and catalog any 'base64_encoded_keys' as a list
@@ -67,22 +71,46 @@ def process_instance_metadata(metadata, key_path=''):
"""
md_copy = copy.deepcopy(metadata)
md_copy['base64_encoded_keys'] = []
+ md_copy['sensitive_keys'] = []
for key, val in metadata.items():
if key_path:
sub_key_path = key_path + '/' + key
else:
sub_key_path = key
+ if key in sensitive_keys or sub_key_path in sensitive_keys:
+ md_copy['sensitive_keys'].append(sub_key_path)
if isinstance(val, str) and val.startswith('ci-b64:'):
md_copy['base64_encoded_keys'].append(sub_key_path)
md_copy[key] = val.replace('ci-b64:', '')
if isinstance(val, dict):
- return_val = process_instance_metadata(val, sub_key_path)
+ return_val = process_instance_metadata(
+ val, sub_key_path, sensitive_keys)
md_copy['base64_encoded_keys'].extend(
return_val.pop('base64_encoded_keys'))
+ md_copy['sensitive_keys'].extend(
+ return_val.pop('sensitive_keys'))
md_copy[key] = return_val
return md_copy
+def redact_sensitive_keys(metadata, redact_value=REDACT_SENSITIVE_VALUE):
+ """Redact any sensitive keys from to provided metadata dictionary.
+
+ Replace any keys values listed in 'sensitive_keys' with redact_value.
+ """
+ if not metadata.get('sensitive_keys', []):
+ return metadata
+ md_copy = copy.deepcopy(metadata)
+ for key_path in metadata.get('sensitive_keys'):
+ path_parts = key_path.split('/')
+ obj = md_copy
+ for path in path_parts:
+ if isinstance(obj[path], dict) and path != path_parts[-1]:
+ obj = obj[path]
+ obj[path] = redact_value
+ return md_copy
+
+
URLParams = namedtuple(
'URLParms', ['max_wait_seconds', 'timeout_seconds', 'num_retries'])
@@ -127,6 +155,10 @@ class DataSource(object):
_dirty_cache = False
+ # N-tuple of keypaths or keynames redact from instance-data.json for
+ # non-root users
+ sensitive_metadata_keys = ('security-credentials',)
+
def __init__(self, sys_cfg, distro, paths, ud_proc=None):
self.sys_cfg = sys_cfg
self.distro = distro
@@ -152,12 +184,24 @@ class DataSource(object):
def _get_standardized_metadata(self):
"""Return a dictionary of standardized metadata keys."""
- return {'v1': {
- 'local-hostname': self.get_hostname(),
- 'instance-id': self.get_instance_id(),
- 'cloud-name': self.cloud_name,
- 'region': self.region,
- 'availability-zone': self.availability_zone}}
+ local_hostname = self.get_hostname()
+ instance_id = self.get_instance_id()
+ availability_zone = self.availability_zone
+ cloud_name = self.cloud_name
+ # When adding new standard keys prefer underscore-delimited instead
+ # of hyphen-delimted to support simple variable references in jinja
+ # templates.
+ return {
+ 'v1': {
+ 'availability-zone': availability_zone,
+ 'availability_zone': availability_zone,
+ 'cloud-name': cloud_name,
+ 'cloud_name': cloud_name,
+ 'instance-id': instance_id,
+ 'instance_id': instance_id,
+ 'local-hostname': local_hostname,
+ 'local_hostname': local_hostname,
+ 'region': self.region}}
def clear_cached_attrs(self, attr_defaults=()):
"""Reset any cached metadata attributes to datasource defaults.
@@ -200,9 +244,7 @@ class DataSource(object):
"""
instance_data = {
'ds': {
- 'meta_data': self.metadata,
- 'user_data': self.get_userdata_raw(),
- 'vendor_data': self.get_vendordata_raw()}}
+ 'meta_data': self.metadata}}
if hasattr(self, 'network_json'):
network_json = getattr(self, 'network_json')
if network_json != UNSET:
@@ -217,7 +259,9 @@ class DataSource(object):
# Process content base64encoding unserializable values
content = util.json_dumps(instance_data)
# Strip base64: prefix and set base64_encoded_keys list.
- processed_data = process_instance_metadata(json.loads(content))
+ processed_data = process_instance_metadata(
+ json.loads(content),
+ sensitive_keys=self.sensitive_metadata_keys)
except TypeError as e:
LOG.warning('Error persisting instance-data.json: %s', str(e))
return False
@@ -225,7 +269,11 @@ class DataSource(object):
LOG.warning('Error persisting instance-data.json: %s', str(e))
return False
json_file = os.path.join(self.paths.run_dir, INSTANCE_JSON_FILE)
- write_json(json_file, processed_data, mode=0o600)
+ write_json(json_file, processed_data) # World readable
+ json_sensitive_file = os.path.join(self.paths.run_dir,
+ INSTANCE_JSON_SENSITIVE_FILE)
+ write_json(json_sensitive_file,
+ redact_sensitive_keys(processed_data), mode=0o600)
return True
def _get_data(self):
diff --git a/cloudinit/sources/tests/test_init.py b/cloudinit/sources/tests/test_init.py
index 8299af23..6b965750 100644
--- a/cloudinit/sources/tests/test_init.py
+++ b/cloudinit/sources/tests/test_init.py
@@ -1,5 +1,6 @@
# This file is part of cloud-init. See LICENSE file for license information.
+import copy
import inspect
import os
import six
@@ -9,7 +10,8 @@ from cloudinit.event import EventType
from cloudinit.helpers import Paths
from cloudinit import importer
from cloudinit.sources import (
- INSTANCE_JSON_FILE, DataSource, UNSET)
+ INSTANCE_JSON_FILE, INSTANCE_JSON_SENSITIVE_FILE, REDACT_SENSITIVE_VALUE,
+ UNSET, DataSource, redact_sensitive_keys)
from cloudinit.tests.helpers import CiTestCase, skipIf, mock
from cloudinit.user_data import UserDataProcessor
from cloudinit import util
@@ -20,20 +22,24 @@ class DataSourceTestSubclassNet(DataSource):
dsname = 'MyTestSubclass'
url_max_wait = 55
- def __init__(self, sys_cfg, distro, paths, custom_userdata=None,
- get_data_retval=True):
+ def __init__(self, sys_cfg, distro, paths, custom_metadata=None,
+ custom_userdata=None, get_data_retval=True):
super(DataSourceTestSubclassNet, self).__init__(
sys_cfg, distro, paths)
self._custom_userdata = custom_userdata
+ self._custom_metadata = custom_metadata
self._get_data_retval = get_data_retval
def _get_cloud_name(self):
return 'SubclassCloudName'
def _get_data(self):
- self.metadata = {'availability_zone': 'myaz',
- 'local-hostname': 'test-subclass-hostname',
- 'region': 'myregion'}
+ if self._custom_metadata:
+ self.metadata = self._custom_metadata
+ else:
+ self.metadata = {'availability_zone': 'myaz',
+ 'local-hostname': 'test-subclass-hostname',
+ 'region': 'myregion'}
if self._custom_userdata:
self.userdata_raw = self._custom_userdata
else:
@@ -278,7 +284,7 @@ class TestDataSource(CiTestCase):
os.path.exists(json_file), 'Found unexpected file %s' % json_file)
def test_get_data_writes_json_instance_data_on_success(self):
- """get_data writes INSTANCE_JSON_FILE to run_dir as readonly root."""
+ """get_data writes INSTANCE_JSON_FILE to run_dir as world readable."""
tmp = self.tmp_dir()
datasource = DataSourceTestSubclassNet(
self.sys_cfg, self.distro, Paths({'run_dir': tmp}))
@@ -287,40 +293,90 @@ class TestDataSource(CiTestCase):
content = util.load_file(json_file)
expected = {
'base64_encoded_keys': [],
+ 'sensitive_keys': [],
'v1': {
'availability-zone': 'myaz',
+ 'availability_zone': 'myaz',
'cloud-name': 'subclasscloudname',
+ 'cloud_name': 'subclasscloudname',
'instance-id': 'iid-datasource',
+ 'instance_id': 'iid-datasource',
'local-hostname': 'test-subclass-hostname',
+ 'local_hostname': 'test-subclass-hostname',
'region': 'myregion'},
'ds': {
'meta_data': {'availability_zone': 'myaz',
'local-hostname': 'test-subclass-hostname',
- 'region': 'myregion'},
- 'user_data': 'userdata_raw',
- 'vendor_data': 'vendordata_raw'}}
- self.maxDiff = None
+ 'region': 'myregion'}}}
self.assertEqual(expected, util.load_json(content))
file_stat = os.stat(json_file)
+ self.assertEqual(0o644, stat.S_IMODE(file_stat.st_mode))
+ self.assertEqual(expected, util.load_json(content))
+
+ def test_get_data_writes_json_instance_data_sensitive(self):
+ """get_data writes INSTANCE_JSON_SENSITIVE_FILE as readonly root."""
+ tmp = self.tmp_dir()
+ datasource = DataSourceTestSubclassNet(
+ self.sys_cfg, self.distro, Paths({'run_dir': tmp}),
+ custom_metadata={
+ 'availability_zone': 'myaz',
+ 'local-hostname': 'test-subclass-hostname',
+ 'region': 'myregion',
+ 'some': {'security-credentials': {
+ 'cred1': 'sekret', 'cred2': 'othersekret'}}})
+ self.assertEqual(
+ ('security-credentials',), datasource.sensitive_metadata_keys)
+ datasource.get_data()
+ json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp)
+ sensitive_json_file = self.tmp_path(INSTANCE_JSON_SENSITIVE_FILE, tmp)
+ redacted = util.load_json(util.load_file(json_file))
+ self.assertEqual(
+ {'cred1': 'sekret', 'cred2': 'othersekret'},
+ redacted['ds']['meta_data']['some']['security-credentials'])
+ content = util.load_file(sensitive_json_file)
+ expected = {
+ 'base64_encoded_keys': [],
+ 'sensitive_keys': ['ds/meta_data/some/security-credentials'],
+ 'v1': {
+ 'availability-zone': 'myaz',
+ 'availability_zone': 'myaz',
+ 'cloud-name': 'subclasscloudname',
+ 'cloud_name': 'subclasscloudname',
+ 'instance-id': 'iid-datasource',
+ 'instance_id': 'iid-datasource',
+ 'local-hostname': 'test-subclass-hostname',
+ 'local_hostname': 'test-subclass-hostname',
+ 'region': 'myregion'},
+ 'ds': {
+ 'meta_data': {
+ 'availability_zone': 'myaz',
+ 'local-hostname': 'test-subclass-hostname',
+ 'region': 'myregion',
+ 'some': {'security-credentials': REDACT_SENSITIVE_VALUE}}}
+ }
+ self.maxDiff = None
+ self.assertEqual(expected, util.load_json(content))
+ file_stat = os.stat(sensitive_json_file)
self.assertEqual(0o600, stat.S_IMODE(file_stat.st_mode))
+ self.assertEqual(expected, util.load_json(content))
def test_get_data_handles_redacted_unserializable_content(self):
"""get_data warns unserializable content in INSTANCE_JSON_FILE."""
tmp = self.tmp_dir()
datasource = DataSourceTestSubclassNet(
self.sys_cfg, self.distro, Paths({'run_dir': tmp}),
- custom_userdata={'key1': 'val1', 'key2': {'key2.1': self.paths}})
+ custom_metadata={'key1': 'val1', 'key2': {'key2.1': self.paths}})
datasource.get_data()
json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp)
content = util.load_file(json_file)
- expected_userdata = {
+ expected_metadata = {
'key1': 'val1',
'key2': {
'key2.1': "Warning: redacted unserializable type <class"
" 'cloudinit.helpers.Paths'>"}}
instance_json = util.load_json(content)
self.assertEqual(
- expected_userdata, instance_json['ds']['user_data'])
+ expected_metadata, instance_json['ds']['meta_data'])
def test_persist_instance_data_writes_ec2_metadata_when_set(self):
"""When ec2_metadata class attribute is set, persist to json."""
@@ -361,17 +417,17 @@ class TestDataSource(CiTestCase):
tmp = self.tmp_dir()
datasource = DataSourceTestSubclassNet(
self.sys_cfg, self.distro, Paths({'run_dir': tmp}),
- custom_userdata={'key1': 'val1', 'key2': {'key2.1': b'\x123'}})
+ custom_metadata={'key1': 'val1', 'key2': {'key2.1': b'\x123'}})
self.assertTrue(datasource.get_data())
json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp)
content = util.load_file(json_file)
instance_json = util.load_json(content)
- self.assertEqual(
- ['ds/user_data/key2/key2.1'],
+ self.assertItemsEqual(
+ ['ds/meta_data/key2/key2.1'],
instance_json['base64_encoded_keys'])
self.assertEqual(
{'key1': 'val1', 'key2': {'key2.1': 'EjM='}},
- instance_json['ds']['user_data'])
+ instance_json['ds']['meta_data'])
@skipIf(not six.PY2, "json serialization on <= py2.7 handles bytes")
def test_get_data_handles_bytes_values(self):
@@ -379,7 +435,7 @@ class TestDataSource(CiTestCase):
tmp = self.tmp_dir()
datasource = DataSourceTestSubclassNet(
self.sys_cfg, self.distro, Paths({'run_dir': tmp}),
- custom_userdata={'key1': 'val1', 'key2': {'key2.1': b'\x123'}})
+ custom_metadata={'key1': 'val1', 'key2': {'key2.1': b'\x123'}})
self.assertTrue(datasource.get_data())
json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp)
content = util.load_file(json_file)
@@ -387,7 +443,7 @@ class TestDataSource(CiTestCase):
self.assertEqual([], instance_json['base64_encoded_keys'])
self.assertEqual(
{'key1': 'val1', 'key2': {'key2.1': '\x123'}},
- instance_json['ds']['user_data'])
+ instance_json['ds']['meta_data'])
@skipIf(not six.PY2, "Only python2 hits UnicodeDecodeErrors on non-utf8")
def test_non_utf8_encoding_logs_warning(self):
@@ -395,7 +451,7 @@ class TestDataSource(CiTestCase):
tmp = self.tmp_dir()
datasource = DataSourceTestSubclassNet(
self.sys_cfg, self.distro, Paths({'run_dir': tmp}),
- custom_userdata={'key1': 'val1', 'key2': {'key2.1': b'ab\xaadef'}})
+ custom_metadata={'key1': 'val1', 'key2': {'key2.1': b'ab\xaadef'}})
self.assertTrue(datasource.get_data())
json_file = self.tmp_path(INSTANCE_JSON_FILE, tmp)
self.assertFalse(os.path.exists(json_file))
@@ -509,4 +565,36 @@ class TestDataSource(CiTestCase):
self.logs.getvalue())
+class TestRedactSensitiveData(CiTestCase):
+
+ def test_redact_sensitive_data_noop_when_no_sensitive_keys_present(self):
+ """When sensitive_keys is absent or empty from metadata do nothing."""
+ md = {'my': 'data'}
+ self.assertEqual(
+ md, redact_sensitive_keys(md, redact_value='redacted'))
+ md['sensitive_keys'] = []
+ self.assertEqual(
+ md, redact_sensitive_keys(md, redact_value='redacted'))
+
+ def test_redact_sensitive_data_redacts_exact_match_name(self):
+ """Only exact matched sensitive_keys are redacted from metadata."""
+ md = {'sensitive_keys': ['md/secure'],
+ 'md': {'secure': 's3kr1t', 'insecure': 'publik'}}
+ secure_md = copy.deepcopy(md)
+ secure_md['md']['secure'] = 'redacted'
+ self.assertEqual(
+ secure_md,
+ redact_sensitive_keys(md, redact_value='redacted'))
+
+ def test_redact_sensitive_data_does_redacts_with_default_string(self):
+ """When redact_value is absent, REDACT_SENSITIVE_VALUE is used."""
+ md = {'sensitive_keys': ['md/secure'],
+ 'md': {'secure': 's3kr1t', 'insecure': 'publik'}}
+ secure_md = copy.deepcopy(md)
+ secure_md['md']['secure'] = 'redacted for non-root user'
+ self.assertEqual(
+ secure_md,
+ redact_sensitive_keys(md))
+
+
# vi: ts=4 expandtab
diff --git a/doc/rtd/index.rst b/doc/rtd/index.rst
index de67f361..20a99a30 100644
--- a/doc/rtd/index.rst
+++ b/doc/rtd/index.rst
@@ -31,6 +31,7 @@ initialization of a cloud instance.
topics/capabilities.rst
topics/availability.rst
topics/format.rst
+ topics/instancedata.rst
topics/dir_layout.rst
topics/examples.rst
topics/boot.rst
diff --git a/doc/rtd/topics/capabilities.rst b/doc/rtd/topics/capabilities.rst
index 2d8e2538..0d8b8947 100644
--- a/doc/rtd/topics/capabilities.rst
+++ b/doc/rtd/topics/capabilities.rst
@@ -18,7 +18,7 @@ User configurability
User-data can be given by the user at instance launch time. See
:ref:`user_data_formats` for acceptable user-data content.
-
+
This is done via the ``--user-data`` or ``--user-data-file`` argument to
ec2-run-instances for example.
@@ -53,10 +53,9 @@ system:
% cloud-init --help
usage: cloud-init [-h] [--version] [--file FILES]
-
[--debug] [--force]
- {init,modules,single,dhclient-hook,features,analyze,devel,collect-logs,clean,status}
- ...
+ {init,modules,single,query,dhclient-hook,features,analyze,devel,collect-logs,clean,status}
+ ...
optional arguments:
-h, --help show this help message and exit
@@ -68,17 +67,19 @@ system:
your own risk)
Subcommands:
- {init,modules,single,dhclient-hook,features,analyze,devel,collect-logs,clean,status}
+ {init,modules,single,query,dhclient-hook,features,analyze,devel,collect-logs,clean,status}
init initializes cloud-init and performs initial modules
modules activates modules using a given configuration key
single run a single module
+ query Query instance metadata from the command line
dhclient-hook run the dhclient hookto record network info
features list defined features
analyze Devel tool: Analyze cloud-init logs and data
devel Run development tools
collect-logs Collect and tar all cloud-init debug info
- clean Remove logs and artifacts so cloud-init can re-run.
- status Report cloud-init status or wait on completion.
+ clean Remove logs and artifacts so cloud-init can re-run
+ status Report cloud-init status or wait on completion
+
CLI Subcommand details
======================
@@ -104,8 +105,8 @@ cloud-init status
Report whether cloud-init is running, done, disabled or errored. Exits
non-zero if an error is detected in cloud-init.
- * **--long**: Detailed status information.
- * **--wait**: Block until cloud-init completes.
+* **--long**: Detailed status information.
+* **--wait**: Block until cloud-init completes.
.. code-block:: shell-session
@@ -143,6 +144,68 @@ Logs collected are:
* journalctl output
* /var/lib/cloud/instance/user-data.txt
+.. _cli_query:
+
+cloud-init query
+------------------
+Query standardized cloud instance metadata crawled by cloud-init and stored
+in ``/run/cloud-init/instance-data.json``. This is a convenience command-line
+interface to reference any cached configuration metadata that cloud-init
+crawls when booting the instance. See :ref:`instance_metadata` for more info.
+
+* **--all**: Dump all available instance data as json which can be queried.
+* **--instance-data**: Optional path to a different instance-data.json file to
+ source for queries.
+* **--list-keys**: List available query keys from cached instance data.
+
+.. code-block:: shell-session
+
+ # List all top-level query keys available (includes standardized aliases)
+ % cloud-init query --list-keys
+ availability_zone
+ base64_encoded_keys
+ cloud_name
+ ds
+ instance_id
+ local_hostname
+ region
+ v1
+
+* **<varname>**: A dot-delimited variable path into the instance-data.json
+ object.
+
+.. code-block:: shell-session
+
+ # Query cloud-init standardized metadata on any cloud
+ % cloud-init query v1.cloud_name
+ aws # or openstack, azure, gce etc.
+
+ # Any standardized instance-data under a <v#> key is aliased as a top-level
+ # key for convenience.
+ % cloud-init query cloud_name
+ aws # or openstack, azure, gce etc.
+
+ # Query datasource-specific metadata on EC2
+ % cloud-init query ds.meta_data.public_ipv4
+
+* **--format** A string that will use jinja-template syntax to render a string
+ replacing
+
+.. code-block:: shell-session
+
+ # Generate a custom hostname fqdn based on instance-id, cloud and region
+ % cloud-init query --format 'custom-{{instance_id}}.{{region}}.{{v1.cloud_name}}.com'
+ custom-i-0e91f69987f37ec74.us-east-2.aws.com
+
+
+.. note::
+ The standardized instance data keys under **v#** are guaranteed not to change
+ behavior or format. If using top-level convenience aliases for any
+ standardized instance data keys, the most value (highest **v#**) of that key
+ name is what is reported as the top-level value. So these aliases act as a
+ 'latest'.
+
+
.. _cli_analyze:
cloud-init analyze
@@ -150,10 +213,10 @@ cloud-init analyze
Get detailed reports of where cloud-init spends most of its time. See
:ref:`boot_time_analysis` for more info.
- * **blame** Report ordered by most costly operations.
- * **dump** Machine-readable JSON dump of all cloud-init tracked events.
- * **show** show time-ordered report of the cost of operations during each
- boot stage.
+* **blame** Report ordered by most costly operations.
+* **dump** Machine-readable JSON dump of all cloud-init tracked events.
+* **show** show time-ordered report of the cost of operations during each
+ boot stage.
.. _cli_devel:
@@ -182,8 +245,8 @@ cloud-init clean
Remove cloud-init artifacts from /var/lib/cloud and optionally reboot the
machine to so cloud-init re-runs all stages as it did on first boot.
- * **--logs**: Optionally remove /var/log/cloud-init*log files.
- * **--reboot**: Reboot the system after removing artifacts.
+* **--logs**: Optionally remove /var/log/cloud-init*log files.
+* **--reboot**: Reboot the system after removing artifacts.
.. _cli_init:
@@ -195,7 +258,7 @@ Can be run on the commandline, but is generally gated to run only once
due to semaphores in **/var/lib/cloud/instance/sem/** and
**/var/lib/cloud/sem**.
- * **--local**: Run *init-local* stage instead of *init*.
+* **--local**: Run *init-local* stage instead of *init*.
.. _cli_modules:
@@ -210,8 +273,8 @@ declared to run in various boot stages in the file
commandline, but each module is gated to run only once due to semaphores
in ``/var/lib/cloud/``.
- * **--mode (init|config|final)**: Run *modules:init*, *modules:config* or
- *modules:final* cloud-init stages. See :ref:`boot_stages` for more info.
+* **--mode (init|config|final)**: Run *modules:init*, *modules:config* or
+ *modules:final* cloud-init stages. See :ref:`boot_stages` for more info.
.. _cli_single:
@@ -221,9 +284,9 @@ Attempt to run a single named cloud config module. The following example
re-runs the cc_set_hostname module ignoring the module default frequency
of once-per-instance:
- * **--name**: The cloud-config module name to run
- * **--frequency**: Optionally override the declared module frequency
- with one of (always|once-per-instance|once)
+* **--name**: The cloud-config module name to run
+* **--frequency**: Optionally override the declared module frequency
+ with one of (always|once-per-instance|once)
.. code-block:: shell-session
diff --git a/doc/rtd/topics/datasources.rst b/doc/rtd/topics/datasources.rst
index 14432e65..e34f145c 100644
--- a/doc/rtd/topics/datasources.rst
+++ b/doc/rtd/topics/datasources.rst
@@ -17,146 +17,10 @@ own way) internally a datasource abstract class was created to allow for a
single way to access the different cloud systems methods to provide this data
through the typical usage of subclasses.
-
-.. _instance_metadata:
-
-instance-data
--------------
-For reference, cloud-init stores all the metadata, vendordata and userdata
-provided by a cloud in a json blob at ``/run/cloud-init/instance-data.json``.
-While the json contains datasource-specific keys and names, cloud-init will
-maintain a minimal set of standardized keys that will remain stable on any
-cloud. Standardized instance-data keys will be present under a "v1" key.
-Any datasource metadata cloud-init consumes will all be present under the
-"ds" key.
-
-Below is an instance-data.json example from an OpenStack instance:
-
-.. sourcecode:: json
-
- {
- "base64-encoded-keys": [
- "ds/meta-data/random_seed",
- "ds/user-data"
- ],
- "ds": {
- "ec2_metadata": {
- "ami-id": "ami-0000032f",
- "ami-launch-index": "0",
- "ami-manifest-path": "FIXME",
- "block-device-mapping": {
- "ami": "vda",
- "ephemeral0": "/dev/vdb",
- "root": "/dev/vda"
- },
- "hostname": "xenial-test.novalocal",
- "instance-action": "none",
- "instance-id": "i-0006e030",
- "instance-type": "m1.small",
- "local-hostname": "xenial-test.novalocal",
- "local-ipv4": "10.5.0.6",
- "placement": {
- "availability-zone": "None"
- },
- "public-hostname": "xenial-test.novalocal",
- "public-ipv4": "10.245.162.145",
- "reservation-id": "r-fxm623oa",
- "security-groups": "default"
- },
- "meta-data": {
- "availability_zone": null,
- "devices": [],
- "hostname": "xenial-test.novalocal",
- "instance-id": "3e39d278-0644-4728-9479-678f9212d8f0",
- "launch_index": 0,
- "local-hostname": "xenial-test.novalocal",
- "name": "xenial-test",
- "project_id": "e0eb2d2538814...",
- "random_seed": "A6yPN...",
- "uuid": "3e39d278-0644-4728-9479-678f92..."
- },
- "network_json": {
- "links": [
- {
- "ethernet_mac_address": "fa:16:3e:7d:74:9b",
- "id": "tap9ca524d5-6e",
- "mtu": 8958,
- "type": "ovs",
- "vif_id": "9ca524d5-6e5a-4809-936a-6901..."
- }
- ],
- "networks": [
- {
- "id": "network0",
- "link": "tap9ca524d5-6e",
- "network_id": "c6adfc18-9753-42eb-b3ea-18b57e6b837f",
- "type": "ipv4_dhcp"
- }
- ],
- "services": [
- {
- "address": "10.10.160.2",
- "type": "dns"
- }
- ]
- },
- "user-data": "I2Nsb3VkLWNvbmZpZ...",
- "vendor-data": null
- },
- "v1": {
- "availability-zone": null,
- "cloud-name": "openstack",
- "instance-id": "3e39d278-0644-4728-9479-678f9212d8f0",
- "local-hostname": "xenial-test",
- "region": null
- }
- }
-
-
-As of cloud-init v. 18.4, any values present in
-``/run/cloud-init/instance-data.json`` can be used in cloud-init user data
-scripts or cloud config data. This allows consumers to use cloud-init's
-vendor-neutral, standardized metadata keys as well as datasource-specific
-content for any scripts or cloud-config modules they are using.
-
-To use instance-data.json values in scripts and **#config-config** files the
-user-data will need to contain the following header as the first line **## template: jinja**. Cloud-init will source all variables defined in
-``/run/cloud-init/instance-data.json`` and allow scripts or cloud-config files
-to reference those paths. Below are two examples::
-
- * Cloud config calling home with the ec2 public hostname and avaliability-zone
- ```
- ## template: jinja
- #cloud-config
- runcmd:
- - echo 'EC2 public hostname allocated to instance: {{ ds.meta_data.public_hostname }}' > /tmp/instance_metadata
- - echo 'EC2 avaiability zone: {{ v1.availability_zone }}' >> /tmp/instance_metadata
- - curl -X POST -d '{"hostname": "{{ds.meta_data.public_hostname }}", "availability-zone": "{{ v1.availability_zone }}"}' https://example.com.com
- ```
-
- * Custom user script performing different operations based on region
- ```
- ## template: jinja
- #!/bin/bash
- {% if v1.region == 'us-east-2' -%}
- echo 'Installing custom proxies for {{ v1.region }}
- sudo apt-get install my-xtra-fast-stack
- {%- endif %}
- ...
-
- ```
-
-.. note::
- Trying to reference jinja variables that don't exist in
- instance-data.json will result in warnings in ``/var/log/cloud-init.log``
- and the following string in your rendered user-data:
- ``CI_MISSING_JINJA_VAR/<your_varname>``.
-
-.. note::
- To save time designing your user-data for a specific cloud's
- instance-data.json, use the 'render' cloud-init command on an
- instance booted on your favorite cloud. See :ref:`cli_devel` for more
- information.
+Any metadata processed by cloud-init's datasources is persisted as
+``/run/cloud0-init/instance-data.json``. Cloud-init provides tooling
+to quickly introspect some of that data. See :ref:`instance_metadata` for
+more information.
Datasource API
@@ -196,14 +60,14 @@ The current interface that a datasource object must provide is the following:
# or does not exist)
def device_name_to_device(self, name)
- # gets the locale string this instance should be applying
+ # gets the locale string this instance should be applying
# which typically used to adjust the instances locale settings files
def get_locale(self)
@property
def availability_zone(self)
- # gets the instance id that was assigned to this instance by the
+ # gets the instance id that was assigned to this instance by the
# cloud provider or when said instance id does not exist in the backing
# metadata this will return 'iid-datasource'
def get_instance_id(self)
diff --git a/doc/rtd/topics/instancedata.rst b/doc/rtd/topics/instancedata.rst
new file mode 100644
index 00000000..634e1807
--- /dev/null
+++ b/doc/rtd/topics/instancedata.rst
@@ -0,0 +1,297 @@
+.. _instance_metadata:
+
+*****************
+Instance Metadata
+*****************
+
+What is a instance data?
+========================
+
+Instance data is the collection of all configuration data that cloud-init
+processes to configure the instance. This configuration typically
+comes from any number of sources:
+
+* cloud-provided metadata services (aka metadata)
+* custom config-drive attached to the instance
+* cloud-config seed files in the booted cloud image or distribution
+* vendordata provided from files or cloud metadata services
+* userdata provided at instance creation
+
+Each cloud provider presents unique configuration metadata in different
+formats to the instance. Cloud-init provides a cache of any crawled metadata
+as well as a versioned set of standardized instance data keys which it makes
+available on all platforms.
+
+Cloud-init produces a simple json object in
+``/run/cloud-init/instance-data.json`` which represents standardized and
+versioned representation of the metadata it consumes during initial boot. The
+intent is to provide the following benefits to users or scripts on any system
+deployed with cloud-init:
+
+* simple static object to query to obtain a instance's metadata
+* speed: avoid costly network transactions for metadata that is already cached
+ on the filesytem
+* reduce need to recrawl metadata services for static metadata that is already
+ cached
+* leverage cloud-init's best practices for crawling cloud-metadata services
+* avoid rolling unique metadata crawlers on each cloud platform to get
+ metadata configuration values
+
+Cloud-init stores any instance data processed in the following files:
+
+* ``/run/cloud-init/instance-data.json``: world-readable json containing
+ standardized keys, sensitive keys redacted
+* ``/run/cloud-init/instance-data-sensitive.json``: root-readable unredacted
+ json blob
+* ``/var/lib/cloud/instance/user-data.txt``: root-readable sensitive raw
+ userdata
+* ``/var/lib/cloud/instance/vendor-data.txt``: root-readable sensitive raw
+ vendordata
+
+Cloud-init redacts any security sensitive content from instance-data.json,
+stores ``/run/cloud-init/instance-data.json`` as a world-readable json file.
+Because user-data and vendor-data can contain passwords both of these files
+are readonly for *root* as well. The *root* user can also read
+``/run/cloud-init/instance-data-sensitive.json`` which is all instance data
+from instance-data.json as well as unredacted sensitive content.
+
+
+Format of instance-data.json
+============================
+
+The instance-data.json and instance-data-sensitive.json files are well-formed
+JSON and record the set of keys and values for any metadata processed by
+cloud-init. Cloud-init standardizes the format for this content so that it
+can be generalized across different cloud platforms.
+
+There are three basic top-level keys:
+
+* **base64_encoded_keys**: A list of forward-slash delimited key paths into
+ the instance-data.json object whose value is base64encoded for json
+ compatibility. Values at these paths should be decoded to get the original
+ value.
+
+* **sensitive_keys**: A list of forward-slash delimited key paths into
+ the instance-data.json object whose value is considered by the datasource as
+ 'security sensitive'. Only the keys listed here will be redacted from
+ instance-data.json for non-root users.
+
+* **ds**: Datasource-specific metadata crawled for the specific cloud
+ platform. It should closely represent the structure of the cloud metadata
+ crawled. The structure of content and details provided are entirely
+ cloud-dependent. Mileage will vary depending on what the cloud exposes.
+ The content exposed under the 'ds' key is currently **experimental** and
+ expected to change slightly in the upcoming cloud-init release.
+
+* **v1**: Standardized cloud-init metadata keys, these keys are guaranteed to
+ exist on all cloud platforms. They will also retain their current behavior
+ and format and will be carried forward even if cloud-init introduces a new
+ version of standardized keys with **v2**.
+
+The standardized keys present:
+
++----------------------+-----------------------------------------------+---------------------------+
+| Key path | Description | Examples |
++======================+===============================================+===========================+
+| v1.cloud_name | The name of the cloud provided by metadata | aws, openstack, azure, |
+| | key 'cloud-name' or the cloud-init datasource | configdrive, nocloud, |
+| | name which was discovered. | ovf, etc. |
++----------------------+-----------------------------------------------+---------------------------+
+| v1.instance_id | Unique instance_id allocated by the cloud | i-<somehash> |
++----------------------+-----------------------------------------------+---------------------------+
+| v1.local_hostname | The internal or local hostname of the system | ip-10-41-41-70, |
+| | | <user-provided-hostname> |
++----------------------+-----------------------------------------------+---------------------------+
+| v1.region | The physical region/datacenter in which the | us-east-2 |
+| | instance is deployed | |
++----------------------+-----------------------------------------------+---------------------------+
+| v1.availability_zone | The physical availability zone in which the | us-east-2b, nova, null |
+| | instance is deployed | |
++----------------------+-----------------------------------------------+---------------------------+
+
+
+Below is an example of ``/run/cloud-init/instance_data.json`` on an EC2
+instance:
+
+.. sourcecode:: json
+
+ {
+ "base64_encoded_keys": [],
+ "sensitive_keys": [],
+ "ds": {
+ "meta_data": {
+ "ami-id": "ami-014e1416b628b0cbf",
+ "ami-launch-index": "0",
+ "ami-manifest-path": "(unknown)",
+ "block-device-mapping": {
+ "ami": "/dev/sda1",
+ "ephemeral0": "sdb",
+ "ephemeral1": "sdc",
+ "root": "/dev/sda1"
+ },
+ "hostname": "ip-10-41-41-70.us-east-2.compute.internal",
+ "instance-action": "none",
+ "instance-id": "i-04fa31cfc55aa7976",
+ "instance-type": "t2.micro",
+ "local-hostname": "ip-10-41-41-70.us-east-2.compute.internal",
+ "local-ipv4": "10.41.41.70",
+ "mac": "06:b6:92:dd:9d:24",
+ "metrics": {
+ "vhostmd": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
+ },
+ "network": {
+ "interfaces": {
+ "macs": {
+ "06:b6:92:dd:9d:24": {
+ "device-number": "0",
+ "interface-id": "eni-08c0c9fdb99b6e6f4",
+ "ipv4-associations": {
+ "18.224.22.43": "10.41.41.70"
+ },
+ "local-hostname": "ip-10-41-41-70.us-east-2.compute.internal",
+ "local-ipv4s": "10.41.41.70",
+ "mac": "06:b6:92:dd:9d:24",
+ "owner-id": "437526006925",
+ "public-hostname": "ec2-18-224-22-43.us-east-2.compute.amazonaws.com",
+ "public-ipv4s": "18.224.22.43",
+ "security-group-ids": "sg-828247e9",
+ "security-groups": "Cloud-init integration test secgroup",
+ "subnet-id": "subnet-282f3053",
+ "subnet-ipv4-cidr-block": "10.41.41.0/24",
+ "subnet-ipv6-cidr-blocks": "2600:1f16:b80:ad00::/64",
+ "vpc-id": "vpc-252ef24d",
+ "vpc-ipv4-cidr-block": "10.41.0.0/16",
+ "vpc-ipv4-cidr-blocks": "10.41.0.0/16",
+ "vpc-ipv6-cidr-blocks": "2600:1f16:b80:ad00::/56"
+ }
+ }
+ }
+ },
+ "placement": {
+ "availability-zone": "us-east-2b"
+ },
+ "profile": "default-hvm",
+ "public-hostname": "ec2-18-224-22-43.us-east-2.compute.amazonaws.com",
+ "public-ipv4": "18.224.22.43",
+ "public-keys": {
+ "cloud-init-integration": [
+ "ssh-rsa
+ AAAAB3NzaC1yc2EAAAADAQABAAABAQDSL7uWGj8cgWyIOaspgKdVy0cKJ+UTjfv7jBOjG2H/GN8bJVXy72XAvnhM0dUM+CCs8FOf0YlPX+Frvz2hKInrmRhZVwRSL129PasD12MlI3l44u6IwS1o/W86Q+tkQYEljtqDOo0a+cOsaZkvUNzUyEXUwz/lmYa6G4hMKZH4NBj7nbAAF96wsMCoyNwbWryBnDYUr6wMbjRR1J9Pw7Xh7WRC73wy4Va2YuOgbD3V/5ZrFPLbWZW/7TFXVrql04QVbyei4aiFR5n//GvoqwQDNe58LmbzX/xvxyKJYdny2zXmdAhMxbrpFQsfpkJ9E/H5w0yOdSvnWbUoG5xNGoOB
+ cloud-init-integration"
+ ]
+ },
+ "reservation-id": "r-06ab75e9346f54333",
+ "security-groups": "Cloud-init integration test secgroup",
+ "services": {
+ "domain": "amazonaws.com",
+ "partition": "aws"
+ }
+ }
+ },
+ "v1": {
+ "availability-zone": "us-east-2b",
+ "availability_zone": "us-east-2b",
+ "cloud-name": "aws",
+ "cloud_name": "aws",
+ "instance-id": "i-04fa31cfc55aa7976",
+ "instance_id": "i-04fa31cfc55aa7976",
+ "local-hostname": "ip-10-41-41-70",
+ "local_hostname": "ip-10-41-41-70",
+ "region": "us-east-2"
+ }
+ }
+
+
+Using instance-data
+===================
+
+As of cloud-init v. 18.4, any variables present in
+``/run/cloud-init/instance-data.json`` can be used in:
+
+* User-data scripts
+* Cloud config data
+* Command line interface via **cloud-init query** or
+ **cloud-init devel render**
+
+Many clouds allow users to provide user-data to an instance at
+the time the instance is launched. Cloud-init supports a number of
+:ref:`user_data_formats`.
+
+Both user-data scripts and **#cloud-config** data support jinja template
+rendering.
+When the first line of the provided user-data begins with,
+**## template: jinja** cloud-init will use jinja to render that file.
+Any instance-data-sensitive.json variables are surfaced as dot-delimited
+jinja template variables because cloud-config modules are run as 'root'
+user.
+
+
+Below are some examples of providing these types of user-data:
+
+* Cloud config calling home with the ec2 public hostname and avaliability-zone
+
+.. code-block:: shell-session
+
+ ## template: jinja
+ #cloud-config
+ runcmd:
+ - echo 'EC2 public hostname allocated to instance: {{
+ ds.meta_data.public_hostname }}' > /tmp/instance_metadata
+ - echo 'EC2 avaiability zone: {{ v1.availability_zone }}' >>
+ /tmp/instance_metadata
+ - curl -X POST -d '{"hostname": "{{ds.meta_data.public_hostname }}",
+ "availability-zone": "{{ v1.availability_zone }}"}'
+ https://example.com
+
+* Custom user-data script performing different operations based on region
+
+.. code-block:: shell-session
+
+ ## template: jinja
+ #!/bin/bash
+ {% if v1.region == 'us-east-2' -%}
+ echo 'Installing custom proxies for {{ v1.region }}
+ sudo apt-get install my-xtra-fast-stack
+ {%- endif %}
+ ...
+
+.. note::
+ Trying to reference jinja variables that don't exist in
+ instance-data.json will result in warnings in ``/var/log/cloud-init.log``
+ and the following string in your rendered user-data:
+ ``CI_MISSING_JINJA_VAR/<your_varname>``.
+
+Cloud-init also surfaces a commandline tool **cloud-init query** which can
+assist developers or scripts with obtaining instance metadata easily. See
+:ref:`cli_query` for more information.
+
+To cut down on keystrokes on the command line, cloud-init also provides
+top-level key aliases for any standardized ``v#`` keys present. The preceding
+``v1`` is not required of ``v1.var_name`` These aliases will represent the
+value of the highest versioned standard key. For example, ``cloud_name``
+value will be ``v2.cloud_name`` if both ``v1`` and ``v2`` keys are present in
+instance-data.json.
+The **query** command also publishes ``userdata`` and ``vendordata`` keys to
+the root user which will contain the decoded user and vendor data provided to
+this instance. Non-root users referencing userdata or vendordata keys will
+see only redacted values.
+
+.. code-block:: shell-session
+
+ # List all top-level instance-data keys available
+ % cloud-init query --list-keys
+
+ # Find your EC2 ami-id
+ % cloud-init query ds.metadata.ami_id
+
+ # Format your cloud_name and region using jinja template syntax
+ % cloud-init query --format 'cloud: {{ v1.cloud_name }} myregion: {{
+ % v1.region }}'
+
+.. note::
+ To save time designing a user-data template for a specific cloud's
+ instance-data.json, use the 'render' cloud-init command on an
+ instance booted on your favorite cloud. See :ref:`cli_devel` for more
+ information.
+
+.. vi: textwidth=78
diff --git a/integration-requirements.txt b/integration-requirements.txt
index f80cb942..880d9886 100644
--- a/integration-requirements.txt
+++ b/integration-requirements.txt
@@ -5,16 +5,17 @@
# the packages/pkg-deps.json file as well.
#
+unittest2
# ec2 backend
boto3==1.5.9
# ssh communication
paramiko==2.4.1
+
# lxd backend
# 04/03/2018: enables use of lxd 3.0
git+https://github.com/lxc/pylxd.git@4b8ab1802f9aee4eb29cf7b119dae0aa47150779
-
# finds latest image information
git+https://git.launchpad.net/simplestreams
diff --git a/tests/cloud_tests/testcases/base.py b/tests/cloud_tests/testcases/base.py
index 27458271..c5457968 100644
--- a/tests/cloud_tests/testcases/base.py
+++ b/tests/cloud_tests/testcases/base.py
@@ -5,15 +5,15 @@
import crypt
import json
import re
-import unittest
+import unittest2
from cloudinit import util as c_util
-SkipTest = unittest.SkipTest
+SkipTest = unittest2.SkipTest
-class CloudTestCase(unittest.TestCase):
+class CloudTestCase(unittest2.TestCase):
"""Base test class for verifiers."""
# data gets populated in get_suite.setUpClass
@@ -167,8 +167,9 @@ class CloudTestCase(unittest.TestCase):
'Skipping instance-data.json test.'
' OS: %s not bionic or newer' % self.os_name)
instance_data = json.loads(out)
- self.assertEqual(
- ['ds/user_data'], instance_data['base64_encoded_keys'])
+ self.assertItemsEqual(
+ [],
+ instance_data['base64_encoded_keys'])
ds = instance_data.get('ds', {})
v1_data = instance_data.get('v1', {})
metadata = ds.get('meta-data', {})
@@ -187,10 +188,10 @@ class CloudTestCase(unittest.TestCase):
metadata.get('placement', {}).get('availability-zone'),
'Could not determine EC2 Availability zone placement')
self.assertIsNotNone(
- v1_data['availability-zone'], 'expected ec2 availability-zone')
- self.assertEqual('aws', v1_data['cloud-name'])
- self.assertIn('i-', v1_data['instance-id'])
- self.assertIn('ip-', v1_data['local-hostname'])
+ v1_data['availability_zone'], 'expected ec2 availability_zone')
+ self.assertEqual('aws', v1_data['cloud_name'])
+ self.assertIn('i-', v1_data['instance_id'])
+ self.assertIn('ip-', v1_data['local_hostname'])
self.assertIsNotNone(v1_data['region'], 'expected ec2 region')
def test_instance_data_json_lxd(self):
@@ -213,16 +214,14 @@ class CloudTestCase(unittest.TestCase):
' OS: %s not bionic or newer' % self.os_name)
instance_data = json.loads(out)
v1_data = instance_data.get('v1', {})
- self.assertEqual(
- ['ds/user_data', 'ds/vendor_data'],
- sorted(instance_data['base64_encoded_keys']))
- self.assertEqual('nocloud', v1_data['cloud-name'])
+ self.assertItemsEqual([], sorted(instance_data['base64_encoded_keys']))
+ self.assertEqual('nocloud', v1_data['cloud_name'])
self.assertIsNone(
- v1_data['availability-zone'],
- 'found unexpected lxd availability-zone %s' %
- v1_data['availability-zone'])
- self.assertIn('cloud-test', v1_data['instance-id'])
- self.assertIn('cloud-test', v1_data['local-hostname'])
+ v1_data['availability_zone'],
+ 'found unexpected lxd availability_zone %s' %
+ v1_data['availability_zone'])
+ self.assertIn('cloud-test', v1_data['instance_id'])
+ self.assertIn('cloud-test', v1_data['local_hostname'])
self.assertIsNone(
v1_data['region'],
'found unexpected lxd region %s' % v1_data['region'])
@@ -248,18 +247,17 @@ class CloudTestCase(unittest.TestCase):
' OS: %s not bionic or newer' % self.os_name)
instance_data = json.loads(out)
v1_data = instance_data.get('v1', {})
- self.assertEqual(
- ['ds/user_data'], instance_data['base64_encoded_keys'])
- self.assertEqual('nocloud', v1_data['cloud-name'])
+ self.assertItemsEqual([], instance_data['base64_encoded_keys'])
+ self.assertEqual('nocloud', v1_data['cloud_name'])
self.assertIsNone(
- v1_data['availability-zone'],
- 'found unexpected kvm availability-zone %s' %
- v1_data['availability-zone'])
+ v1_data['availability_zone'],
+ 'found unexpected kvm availability_zone %s' %
+ v1_data['availability_zone'])
self.assertIsNotNone(
re.match(r'[\da-f]{8}(-[\da-f]{4}){3}-[\da-f]{12}',
- v1_data['instance-id']),
- 'kvm instance-id is not a UUID: %s' % v1_data['instance-id'])
- self.assertIn('ubuntu', v1_data['local-hostname'])
+ v1_data['instance_id']),
+ 'kvm instance_id is not a UUID: %s' % v1_data['instance_id'])
+ self.assertIn('ubuntu', v1_data['local_hostname'])
self.assertIsNone(
v1_data['region'],
'found unexpected lxd region %s' % v1_data['region'])