From e9d1d3a0efce95b6bc7f63fe01d579b98664c69f Mon Sep 17 00:00:00 2001 From: Chad Smith Date: Fri, 28 Apr 2023 02:01:31 -0600 Subject: cli: schema validation of jinja template user-data (SC-1385) (#2132) The CLI cloud-init schema now asserts that the leading header comment in user-data files is a valid user-data type. Raise an informative error otherwise about valid user-data types. For user-data files declared with '## template: jinja', render those files first sourcing jinja variables from /run/cloud-init/instance-data.json or a new --instance-data parameter. Once the jinja template is rendered, validate schema of the resulting #cloud-config user-data. This branch also ensures any errors and deprecation warnings are unique. LP: #1881925 --- cloudinit/config/schema.py | 158 +++++++++++++++++++++------- tests/unittests/config/test_schema.py | 193 ++++++++++++++++++++++++++++------ 2 files changed, 278 insertions(+), 73 deletions(-) diff --git a/cloudinit/config/schema.py b/cloudinit/config/schema.py index 9886bde6..9005e924 100644 --- a/cloudinit/config/schema.py +++ b/cloudinit/config/schema.py @@ -17,8 +17,14 @@ from typing import TYPE_CHECKING, List, NamedTuple, Optional, Type, Union, cast import yaml from cloudinit import importer, safeyaml -from cloudinit.stages import Init -from cloudinit.util import error, get_modules_from_dir, load_file +from cloudinit.cmd.devel import read_cfg_paths +from cloudinit.handlers import INCLUSION_TYPES_MAP, type_from_starts_with +from cloudinit.util import ( + decode_binary, + error, + get_modules_from_dir, + load_file, +) try: from jsonschema import ValidationError as _ValidationError @@ -35,7 +41,6 @@ VERSIONED_USERDATA_SCHEMA_FILE = "versions.schema.cloud-config.json" # Also add new version definition to versions.schema.json. USERDATA_SCHEMA_FILE = "schema-cloud-config-v1.json" _YAML_MAP = {True: "true", False: "false", None: "null"} -CLOUD_CONFIG_HEADER = b"#cloud-config" SCHEMA_DOC_TMPL = """ {name} {title_underbar} @@ -64,6 +69,10 @@ SCHEMA_EXAMPLES_SPACER_TEMPLATE = "\n # --- Example{0} ---" DEPRECATED_KEY = "deprecated" DEPRECATED_PREFIX = "DEPRECATED: " +# user-data files typically must begin with a leading '#' +USERDATA_VALID_HEADERS = sorted( + [t for t in INCLUSION_TYPES_MAP.keys() if t[0] == "#"] +) # type-annotate only if type-checking. # Consider to add `type_extensions` as a dependency when Bionic is EOL. @@ -129,20 +138,26 @@ class SchemaValidationError(ValueError): ((flat.config.key, msg),) """ message = "" - if schema_errors: - message += _format_schema_problems( - schema_errors, prefix="Cloud config schema errors: " - ) - if schema_deprecations: + + def handle_problems(problems, prefix): + if not problems: + return problems + nonlocal message if message: message += "\n\n" - message += _format_schema_problems( - schema_deprecations, - prefix="Cloud config schema deprecations: ", - ) + problems = sorted(list(set(problems))) + message += _format_schema_problems(problems, prefix=prefix) + return problems + + self.schema_errors = handle_problems( + schema_errors, + prefix="Cloud config schema errors: ", + ) + self.schema_deprecations = handle_problems( + schema_deprecations, + prefix="Cloud config schema deprecations: ", + ) super().__init__(message) - self.schema_errors = schema_errors - self.schema_deprecations = schema_deprecations def has_errors(self) -> bool: return bool(self.schema_errors) @@ -639,7 +654,12 @@ def annotated_cloudconfig_file( ) -def validate_cloudconfig_file(config_path, schema, annotate=False): +def validate_cloudconfig_file( + config_path: str, + schema: dict, + annotate: bool = False, + instance_data_path: str = None, +): """Validate cloudconfig file adheres to a specific jsonschema. @param config_path: Path to the yaml cloud-config file to parse, or None @@ -647,28 +667,69 @@ def validate_cloudconfig_file(config_path, schema, annotate=False): @param schema: Dict describing a valid jsonschema to validate against. @param annotate: Boolean set True to print original config file with error annotations on the offending lines. + @param instance_data_path: Path to instance_data JSON, used for text/jinja + rendering. @raises SchemaValidationError containing any of schema_errors encountered. @raises RuntimeError when config_path does not exist. """ + from cloudinit.handlers.jinja_template import ( + JinjaLoadError, + NotJinjaError, + render_jinja_payload_from_file, + ) + content = load_file(config_path, decode=False) - if not content.startswith(CLOUD_CONFIG_HEADER): - errors = [ - SchemaProblem( - "format-l1.c1", - 'File {0} needs to begin with "{1}"'.format( - config_path, CLOUD_CONFIG_HEADER.decode() - ), - ), - ] - error = SchemaValidationError(errors) - if annotate: - print( - annotated_cloudconfig_file( - {}, content, {}, schema_errors=error.schema_errors + user_data_type = type_from_starts_with(content) + schema_position = "format-l1.c1" + if not user_data_type: + raise SchemaValidationError( + [ + SchemaProblem( + schema_position, + f"No valid cloud-init user-data header in {config_path}.\n" + "Expected first line to be one of: " + f"{', '.join(USERDATA_VALID_HEADERS)}", ) - ) - raise error + ] + ) + if user_data_type not in ("text/cloud-config", "text/jinja2"): + print( + f"User-data type '{user_data_type}' not currently evaluated" + " by cloud-init schema" + ) + return + if user_data_type == "text/jinja2": + try: + content = render_jinja_payload_from_file( + decode_binary(content), config_path, instance_data_path + ).encode() + except NotJinjaError as e: + raise SchemaValidationError( + [ + SchemaProblem( + schema_position, + "Detected type '{user_data_type}' from header. " + "But, content is not a jinja template", + ) + ] + ) from e + except JinjaLoadError as e: + error(str(e), sys_exit=True) + schema_position = "format-l2.c1" + user_data_type = type_from_starts_with(content) + if not user_data_type: + content_header = content[: decode_binary(content).find("\n")] + raise SchemaValidationError( + [ + SchemaProblem( + schema_position, + f"Unrecognized user-data header in {config_path}:" + f" {content_header}. Expected one of the following " + f"headers: {', '.join(USERDATA_VALID_HEADERS)}", + ) + ] + ) try: if annotate: cloudconfig, marks = safeyaml.load_with_marks(content) @@ -691,14 +752,14 @@ def validate_cloudconfig_file(config_path, schema, annotate=False): "File {0} is not valid yaml. {1}".format(config_path, str(e)), ), ] - error = SchemaValidationError(errors) + schema_error = SchemaValidationError(errors) if annotate: print( annotated_cloudconfig_file( - {}, content, {}, schema_errors=error.schema_errors + {}, content, {}, schema_errors=schema_error.schema_errors ) ) - raise error from e + raise schema_error from e if not isinstance(cloudconfig, dict): # Return a meaningful message on empty cloud-config if not annotate: @@ -1152,6 +1213,16 @@ def get_parser(parser=None): "--config-file", help="Path of the cloud-config yaml file to validate", ) + parser.add_argument( + "-i", + "--instance-data", + type=str, + help=( + "Path to instance-data.json file for variable expansion " + "of '##template: jinja' user-data. Default: " + f"{read_cfg_paths().get_runpath('instance_data')}" + ), + ) parser.add_argument( "--system", action="store_true", @@ -1193,6 +1264,13 @@ def handle_schema_args(name, args): if args.docs: print(load_doc(args.docs)) return + paths = read_cfg_paths() + if args.instance_data: + instance_data_path = args.instance_data + elif os.getuid() != 0: + instance_data_path = paths.get_runpath("instance_data") + else: + instance_data_path = paths.get_runpath("instance_data_sensitive") if args.config_file: config_files = (("user-data", args.config_file),) else: @@ -1202,9 +1280,7 @@ def handle_schema_args(name, args): " user. Try using sudo.", sys_exit=True, ) - init = Init(ds_deps=[]) - init.fetch(existing="trust") - userdata_file = init.paths.get_ipath("cloud_config") + userdata_file = paths.get_ipath("cloud_config") if not userdata_file: error( "Unable to obtain user data file. No instance data available", @@ -1213,8 +1289,8 @@ def handle_schema_args(name, args): return # Helps typing config_files = (("user-data", userdata_file),) vendor_config_files = ( - ("vendor-data", init.paths.get_ipath("vendor_cloud_config")), - ("vendor2-data", init.paths.get_ipath("vendor2_cloud_config")), + ("vendor-data", paths.get_ipath("vendor_cloud_config")), + ("vendor2-data", paths.get_ipath("vendor2_cloud_config")), ) for cfg_type, vendor_file in vendor_config_files: if vendor_file and os.path.exists(vendor_file): @@ -1240,7 +1316,9 @@ def handle_schema_args(name, args): if multi_config_output: print(f"\n{idx}. {cfg_type} at {cfg_file}:") try: - validate_cloudconfig_file(cfg_file, full_schema, args.annotate) + validate_cloudconfig_file( + cfg_file, full_schema, args.annotate, instance_data_path + ) except SchemaValidationError as e: if not args.annotate: print(f"{nested_output_prefix}Invalid cloud-config {cfg_file}") diff --git a/tests/unittests/config/test_schema.py b/tests/unittests/config/test_schema.py index 0e49dbf3..ceb689a4 100644 --- a/tests/unittests/config/test_schema.py +++ b/tests/unittests/config/test_schema.py @@ -19,9 +19,8 @@ from typing import List, Optional, Sequence, Set import pytest -from cloudinit import log, stages +from cloudinit import log from cloudinit.config.schema import ( - CLOUD_CONFIG_HEADER, VERSIONED_USERDATA_SCHEMA_FILE, MetaSchema, SchemaProblem, @@ -53,7 +52,6 @@ from tests.unittests.helpers import ( skipUnlessJsonSchema, skipUnlessJsonSchemaVersionGreaterThan, ) -from tests.unittests.util import FakeDataSource M_PATH = "cloudinit.config.schema." DEPRECATED_LOG_LEVEL = 35 @@ -720,14 +718,14 @@ class TestValidateCloudConfigFile: """On invalid header, validate_cloudconfig_file errors. A SchemaValidationError is raised when the file doesn't begin with - CLOUD_CONFIG_HEADER. + known headers. """ config_file = tmpdir.join("my.yaml") config_file.write("#junk") error_msg = ( - "Cloud config schema errors: format-l1.c1: File" - f" {config_file} needs to begin with" - f' "{CLOUD_CONFIG_HEADER.decode()}"' + f"No valid cloud-init user-data header in {config_file}.\n" + "Expected first line to be one of: #!, ## template: jinja, " + "#cloud-boothook, #cloud-config," ) with pytest.raises(SchemaValidationError, match=error_msg): validate_cloudconfig_file(config_file.strpath, {}, annotate) @@ -777,28 +775,46 @@ class TestValidateCloudConfigFile: @skipUnlessJsonSchema() @pytest.mark.parametrize("annotate", (True, False)) + def test_validateconfig_file_squelches_duplicate_errors( + self, annotate, tmpdir + ): + """validate_cloudconfig_file raises only unique errors.""" + config_file = tmpdir.join("my.yaml") + schema = { # Define duplicate schema definitions in different sections + "allOf": [ + {"properties": {"p1": {"type": "string", "format": "string"}}}, + {"properties": {"p1": {"type": "string", "format": "string"}}}, + ] + } + config_file.write("#cloud-config\np1: -1") + error_msg = ( # Strict match of full error + "Cloud config schema errors: p1: -1 is not of type 'string'$" + ) + with pytest.raises(SchemaValidationError, match=error_msg): + validate_cloudconfig_file(config_file.strpath, schema, annotate) + + @skipUnlessJsonSchema() + @pytest.mark.parametrize("annotate", (True, False)) + @mock.patch(M_PATH + "read_cfg_paths") @mock.patch("cloudinit.url_helper.time.sleep") def test_validateconfig_file_no_cloud_cfg( - self, m_sleep, annotate, capsys, mocker + self, m_sleep, read_cfg_paths, annotate, paths, capsys, mocker ): """validate_cloudconfig_file does noop with empty user-data.""" schema = {"properties": {"p1": {"type": "string", "format": "string"}}} - blob = "" - ci = stages.Init() - ci.datasource = FakeDataSource(blob) - mocker.patch(M_PATH + "Init", return_value=ci) - cloud_config_file = ci.paths.get_ipath_cur("cloud_config") + paths.get_ipath = paths.get_ipath_cur + read_cfg_paths.return_value = paths + cloud_config_file = paths.get_ipath_cur("cloud_config") write_file(cloud_config_file, b"") - with pytest.raises( - SchemaValidationError, - match=re.escape( - "Cloud config schema errors: format-l1.c1:" - f" File {cloud_config_file} needs to begin with" - ' "#cloud-config"' - ), - ): + error_msg = ( + "Cloud config schema errors: format-l1.c1: " + f"No valid cloud-init user-data header in {cloud_config_file}.\n" + "Expected first line to be one of: #!, ## template: jinja," + " #cloud-boothook, #cloud-config," + ) + with pytest.raises(SchemaValidationError, match=error_msg): validate_cloudconfig_file(cloud_config_file, schema, annotate) @@ -1603,6 +1619,7 @@ class TestAnnotatedCloudconfigFile: ) +@mock.patch(M_PATH + "read_cfg_paths") # called by parse_args help docs class TestMain: exclusive_combinations = itertools.combinations( @@ -1610,7 +1627,7 @@ class TestMain: ) @pytest.mark.parametrize("params", exclusive_combinations) - def test_main_exclusive_args(self, params, capsys): + def test_main_exclusive_args(self, _read_cfg_paths, params, capsys): """Main exits non-zero and error on required exclusive args.""" params = list(itertools.chain(*[a.split() for a in params])) with mock.patch("sys.argv", ["mycmd"] + params): @@ -1625,7 +1642,7 @@ class TestMain: ) assert expected == err - def test_main_missing_args(self, capsys): + def test_main_missing_args(self, _read_cfg_paths, capsys): """Main exits non-zero and reports an error on missing parameters.""" with mock.patch("sys.argv", ["mycmd"]): with pytest.raises(SystemExit) as context_manager: @@ -1639,7 +1656,7 @@ class TestMain: ) assert expected == err - def test_main_absent_config_file(self, capsys): + def test_main_absent_config_file(self, _read_cfg_paths, capsys): """Main exits non-zero when config file is absent.""" myargs = ["mycmd", "--annotate", "--config-file", "NOT_A_FILE"] with mock.patch("sys.argv", myargs): @@ -1649,7 +1666,7 @@ class TestMain: _out, err = capsys.readouterr() assert "Error: Config file NOT_A_FILE does not exist\n" == err - def test_main_invalid_flag_combo(self, capsys): + def test_main_invalid_flag_combo(self, _read_cfg_paths, capsys): """Main exits non-zero when invalid flag combo used.""" myargs = ["mycmd", "--annotate", "--docs", "DOES_NOT_MATTER"] with mock.patch("sys.argv", myargs): @@ -1662,7 +1679,7 @@ class TestMain: "Cannot use --annotate with --docs\n" == err ) - def test_main_prints_docs(self, capsys): + def test_main_prints_docs(self, _read_cfg_paths, capsys): """When --docs parameter is provided, main generates documentation.""" myargs = ["mycmd", "--docs", "all"] with mock.patch("sys.argv", myargs): @@ -1671,7 +1688,7 @@ class TestMain: assert "\nNTP\n---\n" in out assert "\nRuncmd\n------\n" in out - def test_main_validates_config_file(self, tmpdir, capsys): + def test_main_validates_config_file(self, _read_cfg_paths, tmpdir, capsys): """When --config-file parameter is provided, main validates schema.""" myyaml = tmpdir.join("my.yaml") myargs = ["mycmd", "--config-file", myyaml.strpath] @@ -1681,13 +1698,14 @@ class TestMain: out, _err = capsys.readouterr() assert f"Valid cloud-config: {myyaml}\n" == out + @mock.patch(M_PATH + "read_cfg_paths") @mock.patch(M_PATH + "os.getuid", return_value=0) def test_main_validates_system_userdata_and_vendordata( - self, _getuid, capsys, mocker, paths + self, _read_cfg_paths, _getuid, read_cfg_paths, capsys, mocker, paths ): """When --system is provided, main validates system userdata.""" - m_init = mocker.patch(M_PATH + "Init") - m_init.return_value.paths.get_ipath = paths.get_ipath_cur + paths.get_ipath = paths.get_ipath_cur + read_cfg_paths.return_value = paths cloud_config_file = paths.get_ipath_cur("cloud_config") write_file(cloud_config_file, b"#cloud-config\nntp:") vd_file = paths.get_ipath_cur("vendor_cloud_config") @@ -1721,7 +1739,9 @@ class TestMain: ) @mock.patch(M_PATH + "os.getuid", return_value=1000) - def test_main_system_userdata_requires_root(self, m_getuid, capsys, paths): + def test_main_system_userdata_requires_root( + self, _read_cfg_paths, m_getuid, capsys, paths + ): """Non-root user can't use --system param""" myargs = ["mycmd", "--system"] with mock.patch("sys.argv", myargs): @@ -1861,7 +1881,7 @@ class TestSchemaFuzz: class TestHandleSchemaArgs: - Args = namedtuple("Args", "config_file docs system annotate") + Args = namedtuple("Args", "config_file docs system annotate instance_data") @pytest.mark.parametrize( "annotate, expected_output", @@ -1902,9 +1922,19 @@ apt_reboot_if_required: Default: ``false``. Deprecated in version 22.2.\ ), ], ) + @mock.patch(M_PATH + "read_cfg_paths") def test_handle_schema_args_annotate_deprecated_config( - self, annotate, expected_output, caplog, capsys, tmpdir + self, + read_cfg_paths, + annotate, + expected_output, + paths, + caplog, + capsys, + tmpdir, ): + paths.get_ipath = paths.get_ipath_cur + read_cfg_paths.return_value = paths user_data_fn = tmpdir.join("user-data") with open(user_data_fn, "w") as f: f.write( @@ -1924,6 +1954,7 @@ apt_reboot_if_required: Default: ``false``. Deprecated in version 22.2.\ annotate=annotate, docs=None, system=None, + instance_data=None, ) handle_schema_args("unused", args) out, err = capsys.readouterr() @@ -1933,3 +1964,99 @@ apt_reboot_if_required: Default: ``false``. Deprecated in version 22.2.\ ) assert not err assert "deprec" not in caplog.text + + @pytest.mark.parametrize( + "uid, annotate, expected_out, expected_err, expectation", + [ + pytest.param( + 0, + True, + dedent( + """\ + #cloud-config + hostname: 123 # E1 + + # Errors: ------------- + # E1: 123 is not of type 'string' + + + """ # noqa: E501 + ), + "", + does_not_raise(), + id="root_annotate_unique_errors_no_exception", + ), + pytest.param( + 0, + False, + dedent( + """\ + Invalid cloud-config {cfg_file} + """ # noqa: E501 + ), + dedent( + """\ + Error: Cloud config schema errors: hostname: 123 is not of type 'string' + + Error: Invalid cloud-config schema: user-data + + """ # noqa: E501 + ), + pytest.raises(SystemExit), + id="root_no_annotate_exception_with_unique_errors", + ), + ], + ) + @mock.patch(M_PATH + "os.getuid") + @mock.patch(M_PATH + "read_cfg_paths") + def test_handle_schema_args_jinja_with_errors( + self, + read_cfg_paths, + getuid, + uid, + annotate, + expected_out, + expected_err, + expectation, + paths, + caplog, + capsys, + tmpdir, + ): + getuid.return_value = uid + paths.get_ipath = paths.get_ipath_cur + read_cfg_paths.return_value = paths + user_data_fn = tmpdir.join("user-data") + if uid == 0: + id_path = paths.get_runpath("instance_data_sensitive") + else: + id_path = paths.get_runpath("instance_data") + with open(id_path, "w") as f: + f.write(json.dumps({"ds": {"asdf": 123}})) + with open(user_data_fn, "w") as f: + f.write( + dedent( + """\ + ## template: jinja + #cloud-config + hostname: {{ ds.asdf }} + """ + ) + ) + args = self.Args( + config_file=str(user_data_fn), + annotate=annotate, + docs=None, + system=None, + instance_data=None, + ) + with expectation: + handle_schema_args("unused", args) + out, err = capsys.readouterr() + assert ( + expected_out.format(cfg_file=user_data_fn, id_path=id_path) == out + ) + assert ( + expected_err.format(cfg_file=user_data_fn, id_path=id_path) == err + ) + assert "deprec" not in caplog.text -- cgit v1.2.1