summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChad Smith <chad.smith@canonical.com>2023-04-28 02:01:31 -0600
committerGitHub <noreply@github.com>2023-04-28 10:01:31 +0200
commite9d1d3a0efce95b6bc7f63fe01d579b98664c69f (patch)
tree91b8868fa804517d54f500e5b5e0a562fd41dae1
parentaa0cd62c81866d632522bbc54dc03eb4fa7fd913 (diff)
downloadcloud-init-git-e9d1d3a0efce95b6bc7f63fe01d579b98664c69f.tar.gz
cli: schema validation of jinja template user-data (SC-1385) (#2132)
The CLI cloud-init schema now asserts that the leading header comment in user-data files is a valid user-data type. Raise an informative error otherwise about valid user-data types. For user-data files declared with '## template: jinja', render those files first sourcing jinja variables from /run/cloud-init/instance-data.json or a new --instance-data parameter. Once the jinja template is rendered, validate schema of the resulting #cloud-config user-data. This branch also ensures any errors and deprecation warnings are unique. LP: #1881925
-rw-r--r--cloudinit/config/schema.py158
-rw-r--r--tests/unittests/config/test_schema.py193
2 files changed, 278 insertions, 73 deletions
diff --git a/cloudinit/config/schema.py b/cloudinit/config/schema.py
index 9886bde6..9005e924 100644
--- a/cloudinit/config/schema.py
+++ b/cloudinit/config/schema.py
@@ -17,8 +17,14 @@ from typing import TYPE_CHECKING, List, NamedTuple, Optional, Type, Union, cast
import yaml
from cloudinit import importer, safeyaml
-from cloudinit.stages import Init
-from cloudinit.util import error, get_modules_from_dir, load_file
+from cloudinit.cmd.devel import read_cfg_paths
+from cloudinit.handlers import INCLUSION_TYPES_MAP, type_from_starts_with
+from cloudinit.util import (
+ decode_binary,
+ error,
+ get_modules_from_dir,
+ load_file,
+)
try:
from jsonschema import ValidationError as _ValidationError
@@ -35,7 +41,6 @@ VERSIONED_USERDATA_SCHEMA_FILE = "versions.schema.cloud-config.json"
# Also add new version definition to versions.schema.json.
USERDATA_SCHEMA_FILE = "schema-cloud-config-v1.json"
_YAML_MAP = {True: "true", False: "false", None: "null"}
-CLOUD_CONFIG_HEADER = b"#cloud-config"
SCHEMA_DOC_TMPL = """
{name}
{title_underbar}
@@ -64,6 +69,10 @@ SCHEMA_EXAMPLES_SPACER_TEMPLATE = "\n # --- Example{0} ---"
DEPRECATED_KEY = "deprecated"
DEPRECATED_PREFIX = "DEPRECATED: "
+# user-data files typically must begin with a leading '#'
+USERDATA_VALID_HEADERS = sorted(
+ [t for t in INCLUSION_TYPES_MAP.keys() if t[0] == "#"]
+)
# type-annotate only if type-checking.
# Consider to add `type_extensions` as a dependency when Bionic is EOL.
@@ -129,20 +138,26 @@ class SchemaValidationError(ValueError):
((flat.config.key, msg),)
"""
message = ""
- if schema_errors:
- message += _format_schema_problems(
- schema_errors, prefix="Cloud config schema errors: "
- )
- if schema_deprecations:
+
+ def handle_problems(problems, prefix):
+ if not problems:
+ return problems
+ nonlocal message
if message:
message += "\n\n"
- message += _format_schema_problems(
- schema_deprecations,
- prefix="Cloud config schema deprecations: ",
- )
+ problems = sorted(list(set(problems)))
+ message += _format_schema_problems(problems, prefix=prefix)
+ return problems
+
+ self.schema_errors = handle_problems(
+ schema_errors,
+ prefix="Cloud config schema errors: ",
+ )
+ self.schema_deprecations = handle_problems(
+ schema_deprecations,
+ prefix="Cloud config schema deprecations: ",
+ )
super().__init__(message)
- self.schema_errors = schema_errors
- self.schema_deprecations = schema_deprecations
def has_errors(self) -> bool:
return bool(self.schema_errors)
@@ -639,7 +654,12 @@ def annotated_cloudconfig_file(
)
-def validate_cloudconfig_file(config_path, schema, annotate=False):
+def validate_cloudconfig_file(
+ config_path: str,
+ schema: dict,
+ annotate: bool = False,
+ instance_data_path: str = None,
+):
"""Validate cloudconfig file adheres to a specific jsonschema.
@param config_path: Path to the yaml cloud-config file to parse, or None
@@ -647,28 +667,69 @@ def validate_cloudconfig_file(config_path, schema, annotate=False):
@param schema: Dict describing a valid jsonschema to validate against.
@param annotate: Boolean set True to print original config file with error
annotations on the offending lines.
+ @param instance_data_path: Path to instance_data JSON, used for text/jinja
+ rendering.
@raises SchemaValidationError containing any of schema_errors encountered.
@raises RuntimeError when config_path does not exist.
"""
+ from cloudinit.handlers.jinja_template import (
+ JinjaLoadError,
+ NotJinjaError,
+ render_jinja_payload_from_file,
+ )
+
content = load_file(config_path, decode=False)
- if not content.startswith(CLOUD_CONFIG_HEADER):
- errors = [
- SchemaProblem(
- "format-l1.c1",
- 'File {0} needs to begin with "{1}"'.format(
- config_path, CLOUD_CONFIG_HEADER.decode()
- ),
- ),
- ]
- error = SchemaValidationError(errors)
- if annotate:
- print(
- annotated_cloudconfig_file(
- {}, content, {}, schema_errors=error.schema_errors
+ user_data_type = type_from_starts_with(content)
+ schema_position = "format-l1.c1"
+ if not user_data_type:
+ raise SchemaValidationError(
+ [
+ SchemaProblem(
+ schema_position,
+ f"No valid cloud-init user-data header in {config_path}.\n"
+ "Expected first line to be one of: "
+ f"{', '.join(USERDATA_VALID_HEADERS)}",
)
- )
- raise error
+ ]
+ )
+ if user_data_type not in ("text/cloud-config", "text/jinja2"):
+ print(
+ f"User-data type '{user_data_type}' not currently evaluated"
+ " by cloud-init schema"
+ )
+ return
+ if user_data_type == "text/jinja2":
+ try:
+ content = render_jinja_payload_from_file(
+ decode_binary(content), config_path, instance_data_path
+ ).encode()
+ except NotJinjaError as e:
+ raise SchemaValidationError(
+ [
+ SchemaProblem(
+ schema_position,
+ "Detected type '{user_data_type}' from header. "
+ "But, content is not a jinja template",
+ )
+ ]
+ ) from e
+ except JinjaLoadError as e:
+ error(str(e), sys_exit=True)
+ schema_position = "format-l2.c1"
+ user_data_type = type_from_starts_with(content)
+ if not user_data_type:
+ content_header = content[: decode_binary(content).find("\n")]
+ raise SchemaValidationError(
+ [
+ SchemaProblem(
+ schema_position,
+ f"Unrecognized user-data header in {config_path}:"
+ f" {content_header}. Expected one of the following "
+ f"headers: {', '.join(USERDATA_VALID_HEADERS)}",
+ )
+ ]
+ )
try:
if annotate:
cloudconfig, marks = safeyaml.load_with_marks(content)
@@ -691,14 +752,14 @@ def validate_cloudconfig_file(config_path, schema, annotate=False):
"File {0} is not valid yaml. {1}".format(config_path, str(e)),
),
]
- error = SchemaValidationError(errors)
+ schema_error = SchemaValidationError(errors)
if annotate:
print(
annotated_cloudconfig_file(
- {}, content, {}, schema_errors=error.schema_errors
+ {}, content, {}, schema_errors=schema_error.schema_errors
)
)
- raise error from e
+ raise schema_error from e
if not isinstance(cloudconfig, dict):
# Return a meaningful message on empty cloud-config
if not annotate:
@@ -1153,6 +1214,16 @@ def get_parser(parser=None):
help="Path of the cloud-config yaml file to validate",
)
parser.add_argument(
+ "-i",
+ "--instance-data",
+ type=str,
+ help=(
+ "Path to instance-data.json file for variable expansion "
+ "of '##template: jinja' user-data. Default: "
+ f"{read_cfg_paths().get_runpath('instance_data')}"
+ ),
+ )
+ parser.add_argument(
"--system",
action="store_true",
default=False,
@@ -1193,6 +1264,13 @@ def handle_schema_args(name, args):
if args.docs:
print(load_doc(args.docs))
return
+ paths = read_cfg_paths()
+ if args.instance_data:
+ instance_data_path = args.instance_data
+ elif os.getuid() != 0:
+ instance_data_path = paths.get_runpath("instance_data")
+ else:
+ instance_data_path = paths.get_runpath("instance_data_sensitive")
if args.config_file:
config_files = (("user-data", args.config_file),)
else:
@@ -1202,9 +1280,7 @@ def handle_schema_args(name, args):
" user. Try using sudo.",
sys_exit=True,
)
- init = Init(ds_deps=[])
- init.fetch(existing="trust")
- userdata_file = init.paths.get_ipath("cloud_config")
+ userdata_file = paths.get_ipath("cloud_config")
if not userdata_file:
error(
"Unable to obtain user data file. No instance data available",
@@ -1213,8 +1289,8 @@ def handle_schema_args(name, args):
return # Helps typing
config_files = (("user-data", userdata_file),)
vendor_config_files = (
- ("vendor-data", init.paths.get_ipath("vendor_cloud_config")),
- ("vendor2-data", init.paths.get_ipath("vendor2_cloud_config")),
+ ("vendor-data", paths.get_ipath("vendor_cloud_config")),
+ ("vendor2-data", paths.get_ipath("vendor2_cloud_config")),
)
for cfg_type, vendor_file in vendor_config_files:
if vendor_file and os.path.exists(vendor_file):
@@ -1240,7 +1316,9 @@ def handle_schema_args(name, args):
if multi_config_output:
print(f"\n{idx}. {cfg_type} at {cfg_file}:")
try:
- validate_cloudconfig_file(cfg_file, full_schema, args.annotate)
+ validate_cloudconfig_file(
+ cfg_file, full_schema, args.annotate, instance_data_path
+ )
except SchemaValidationError as e:
if not args.annotate:
print(f"{nested_output_prefix}Invalid cloud-config {cfg_file}")
diff --git a/tests/unittests/config/test_schema.py b/tests/unittests/config/test_schema.py
index 0e49dbf3..ceb689a4 100644
--- a/tests/unittests/config/test_schema.py
+++ b/tests/unittests/config/test_schema.py
@@ -19,9 +19,8 @@ from typing import List, Optional, Sequence, Set
import pytest
-from cloudinit import log, stages
+from cloudinit import log
from cloudinit.config.schema import (
- CLOUD_CONFIG_HEADER,
VERSIONED_USERDATA_SCHEMA_FILE,
MetaSchema,
SchemaProblem,
@@ -53,7 +52,6 @@ from tests.unittests.helpers import (
skipUnlessJsonSchema,
skipUnlessJsonSchemaVersionGreaterThan,
)
-from tests.unittests.util import FakeDataSource
M_PATH = "cloudinit.config.schema."
DEPRECATED_LOG_LEVEL = 35
@@ -720,14 +718,14 @@ class TestValidateCloudConfigFile:
"""On invalid header, validate_cloudconfig_file errors.
A SchemaValidationError is raised when the file doesn't begin with
- CLOUD_CONFIG_HEADER.
+ known headers.
"""
config_file = tmpdir.join("my.yaml")
config_file.write("#junk")
error_msg = (
- "Cloud config schema errors: format-l1.c1: File"
- f" {config_file} needs to begin with"
- f' "{CLOUD_CONFIG_HEADER.decode()}"'
+ f"No valid cloud-init user-data header in {config_file}.\n"
+ "Expected first line to be one of: #!, ## template: jinja, "
+ "#cloud-boothook, #cloud-config,"
)
with pytest.raises(SchemaValidationError, match=error_msg):
validate_cloudconfig_file(config_file.strpath, {}, annotate)
@@ -777,28 +775,46 @@ class TestValidateCloudConfigFile:
@skipUnlessJsonSchema()
@pytest.mark.parametrize("annotate", (True, False))
+ def test_validateconfig_file_squelches_duplicate_errors(
+ self, annotate, tmpdir
+ ):
+ """validate_cloudconfig_file raises only unique errors."""
+ config_file = tmpdir.join("my.yaml")
+ schema = { # Define duplicate schema definitions in different sections
+ "allOf": [
+ {"properties": {"p1": {"type": "string", "format": "string"}}},
+ {"properties": {"p1": {"type": "string", "format": "string"}}},
+ ]
+ }
+ config_file.write("#cloud-config\np1: -1")
+ error_msg = ( # Strict match of full error
+ "Cloud config schema errors: p1: -1 is not of type 'string'$"
+ )
+ with pytest.raises(SchemaValidationError, match=error_msg):
+ validate_cloudconfig_file(config_file.strpath, schema, annotate)
+
+ @skipUnlessJsonSchema()
+ @pytest.mark.parametrize("annotate", (True, False))
+ @mock.patch(M_PATH + "read_cfg_paths")
@mock.patch("cloudinit.url_helper.time.sleep")
def test_validateconfig_file_no_cloud_cfg(
- self, m_sleep, annotate, capsys, mocker
+ self, m_sleep, read_cfg_paths, annotate, paths, capsys, mocker
):
"""validate_cloudconfig_file does noop with empty user-data."""
schema = {"properties": {"p1": {"type": "string", "format": "string"}}}
- blob = ""
- ci = stages.Init()
- ci.datasource = FakeDataSource(blob)
- mocker.patch(M_PATH + "Init", return_value=ci)
- cloud_config_file = ci.paths.get_ipath_cur("cloud_config")
+ paths.get_ipath = paths.get_ipath_cur
+ read_cfg_paths.return_value = paths
+ cloud_config_file = paths.get_ipath_cur("cloud_config")
write_file(cloud_config_file, b"")
- with pytest.raises(
- SchemaValidationError,
- match=re.escape(
- "Cloud config schema errors: format-l1.c1:"
- f" File {cloud_config_file} needs to begin with"
- ' "#cloud-config"'
- ),
- ):
+ error_msg = (
+ "Cloud config schema errors: format-l1.c1: "
+ f"No valid cloud-init user-data header in {cloud_config_file}.\n"
+ "Expected first line to be one of: #!, ## template: jinja,"
+ " #cloud-boothook, #cloud-config,"
+ )
+ with pytest.raises(SchemaValidationError, match=error_msg):
validate_cloudconfig_file(cloud_config_file, schema, annotate)
@@ -1603,6 +1619,7 @@ class TestAnnotatedCloudconfigFile:
)
+@mock.patch(M_PATH + "read_cfg_paths") # called by parse_args help docs
class TestMain:
exclusive_combinations = itertools.combinations(
@@ -1610,7 +1627,7 @@ class TestMain:
)
@pytest.mark.parametrize("params", exclusive_combinations)
- def test_main_exclusive_args(self, params, capsys):
+ def test_main_exclusive_args(self, _read_cfg_paths, params, capsys):
"""Main exits non-zero and error on required exclusive args."""
params = list(itertools.chain(*[a.split() for a in params]))
with mock.patch("sys.argv", ["mycmd"] + params):
@@ -1625,7 +1642,7 @@ class TestMain:
)
assert expected == err
- def test_main_missing_args(self, capsys):
+ def test_main_missing_args(self, _read_cfg_paths, capsys):
"""Main exits non-zero and reports an error on missing parameters."""
with mock.patch("sys.argv", ["mycmd"]):
with pytest.raises(SystemExit) as context_manager:
@@ -1639,7 +1656,7 @@ class TestMain:
)
assert expected == err
- def test_main_absent_config_file(self, capsys):
+ def test_main_absent_config_file(self, _read_cfg_paths, capsys):
"""Main exits non-zero when config file is absent."""
myargs = ["mycmd", "--annotate", "--config-file", "NOT_A_FILE"]
with mock.patch("sys.argv", myargs):
@@ -1649,7 +1666,7 @@ class TestMain:
_out, err = capsys.readouterr()
assert "Error: Config file NOT_A_FILE does not exist\n" == err
- def test_main_invalid_flag_combo(self, capsys):
+ def test_main_invalid_flag_combo(self, _read_cfg_paths, capsys):
"""Main exits non-zero when invalid flag combo used."""
myargs = ["mycmd", "--annotate", "--docs", "DOES_NOT_MATTER"]
with mock.patch("sys.argv", myargs):
@@ -1662,7 +1679,7 @@ class TestMain:
"Cannot use --annotate with --docs\n" == err
)
- def test_main_prints_docs(self, capsys):
+ def test_main_prints_docs(self, _read_cfg_paths, capsys):
"""When --docs parameter is provided, main generates documentation."""
myargs = ["mycmd", "--docs", "all"]
with mock.patch("sys.argv", myargs):
@@ -1671,7 +1688,7 @@ class TestMain:
assert "\nNTP\n---\n" in out
assert "\nRuncmd\n------\n" in out
- def test_main_validates_config_file(self, tmpdir, capsys):
+ def test_main_validates_config_file(self, _read_cfg_paths, tmpdir, capsys):
"""When --config-file parameter is provided, main validates schema."""
myyaml = tmpdir.join("my.yaml")
myargs = ["mycmd", "--config-file", myyaml.strpath]
@@ -1681,13 +1698,14 @@ class TestMain:
out, _err = capsys.readouterr()
assert f"Valid cloud-config: {myyaml}\n" == out
+ @mock.patch(M_PATH + "read_cfg_paths")
@mock.patch(M_PATH + "os.getuid", return_value=0)
def test_main_validates_system_userdata_and_vendordata(
- self, _getuid, capsys, mocker, paths
+ self, _read_cfg_paths, _getuid, read_cfg_paths, capsys, mocker, paths
):
"""When --system is provided, main validates system userdata."""
- m_init = mocker.patch(M_PATH + "Init")
- m_init.return_value.paths.get_ipath = paths.get_ipath_cur
+ paths.get_ipath = paths.get_ipath_cur
+ read_cfg_paths.return_value = paths
cloud_config_file = paths.get_ipath_cur("cloud_config")
write_file(cloud_config_file, b"#cloud-config\nntp:")
vd_file = paths.get_ipath_cur("vendor_cloud_config")
@@ -1721,7 +1739,9 @@ class TestMain:
)
@mock.patch(M_PATH + "os.getuid", return_value=1000)
- def test_main_system_userdata_requires_root(self, m_getuid, capsys, paths):
+ def test_main_system_userdata_requires_root(
+ self, _read_cfg_paths, m_getuid, capsys, paths
+ ):
"""Non-root user can't use --system param"""
myargs = ["mycmd", "--system"]
with mock.patch("sys.argv", myargs):
@@ -1861,7 +1881,7 @@ class TestSchemaFuzz:
class TestHandleSchemaArgs:
- Args = namedtuple("Args", "config_file docs system annotate")
+ Args = namedtuple("Args", "config_file docs system annotate instance_data")
@pytest.mark.parametrize(
"annotate, expected_output",
@@ -1902,9 +1922,19 @@ apt_reboot_if_required: Default: ``false``. Deprecated in version 22.2.\
),
],
)
+ @mock.patch(M_PATH + "read_cfg_paths")
def test_handle_schema_args_annotate_deprecated_config(
- self, annotate, expected_output, caplog, capsys, tmpdir
+ self,
+ read_cfg_paths,
+ annotate,
+ expected_output,
+ paths,
+ caplog,
+ capsys,
+ tmpdir,
):
+ paths.get_ipath = paths.get_ipath_cur
+ read_cfg_paths.return_value = paths
user_data_fn = tmpdir.join("user-data")
with open(user_data_fn, "w") as f:
f.write(
@@ -1924,6 +1954,7 @@ apt_reboot_if_required: Default: ``false``. Deprecated in version 22.2.\
annotate=annotate,
docs=None,
system=None,
+ instance_data=None,
)
handle_schema_args("unused", args)
out, err = capsys.readouterr()
@@ -1933,3 +1964,99 @@ apt_reboot_if_required: Default: ``false``. Deprecated in version 22.2.\
)
assert not err
assert "deprec" not in caplog.text
+
+ @pytest.mark.parametrize(
+ "uid, annotate, expected_out, expected_err, expectation",
+ [
+ pytest.param(
+ 0,
+ True,
+ dedent(
+ """\
+ #cloud-config
+ hostname: 123 # E1
+
+ # Errors: -------------
+ # E1: 123 is not of type 'string'
+
+
+ """ # noqa: E501
+ ),
+ "",
+ does_not_raise(),
+ id="root_annotate_unique_errors_no_exception",
+ ),
+ pytest.param(
+ 0,
+ False,
+ dedent(
+ """\
+ Invalid cloud-config {cfg_file}
+ """ # noqa: E501
+ ),
+ dedent(
+ """\
+ Error: Cloud config schema errors: hostname: 123 is not of type 'string'
+
+ Error: Invalid cloud-config schema: user-data
+
+ """ # noqa: E501
+ ),
+ pytest.raises(SystemExit),
+ id="root_no_annotate_exception_with_unique_errors",
+ ),
+ ],
+ )
+ @mock.patch(M_PATH + "os.getuid")
+ @mock.patch(M_PATH + "read_cfg_paths")
+ def test_handle_schema_args_jinja_with_errors(
+ self,
+ read_cfg_paths,
+ getuid,
+ uid,
+ annotate,
+ expected_out,
+ expected_err,
+ expectation,
+ paths,
+ caplog,
+ capsys,
+ tmpdir,
+ ):
+ getuid.return_value = uid
+ paths.get_ipath = paths.get_ipath_cur
+ read_cfg_paths.return_value = paths
+ user_data_fn = tmpdir.join("user-data")
+ if uid == 0:
+ id_path = paths.get_runpath("instance_data_sensitive")
+ else:
+ id_path = paths.get_runpath("instance_data")
+ with open(id_path, "w") as f:
+ f.write(json.dumps({"ds": {"asdf": 123}}))
+ with open(user_data_fn, "w") as f:
+ f.write(
+ dedent(
+ """\
+ ## template: jinja
+ #cloud-config
+ hostname: {{ ds.asdf }}
+ """
+ )
+ )
+ args = self.Args(
+ config_file=str(user_data_fn),
+ annotate=annotate,
+ docs=None,
+ system=None,
+ instance_data=None,
+ )
+ with expectation:
+ handle_schema_args("unused", args)
+ out, err = capsys.readouterr()
+ assert (
+ expected_out.format(cfg_file=user_data_fn, id_path=id_path) == out
+ )
+ assert (
+ expected_err.format(cfg_file=user_data_fn, id_path=id_path) == err
+ )
+ assert "deprec" not in caplog.text