1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
|
# Copyright (C) 2012 Canonical Ltd.
#
# Author: Scott Moser <scott.moser@canonical.com>
#
# This file is part of cloud-init. See LICENSE file for license information.
from collections import namedtuple
from itertools import chain
from typing import Any, Dict, List, Tuple
import yaml
YAMLError = yaml.YAMLError
# SchemaPathMarks track the path to an element within a loaded YAML file.
# The start_mark and end_mark contain the row and column indicators
# which represent the coordinates where the schema element begins and ends.
SchemaPathMarks = namedtuple(
"SchemaPathMarks", ("path", "start_mark", "end_mark")
)
class _CustomSafeLoader(yaml.SafeLoader):
def construct_python_unicode(self, node):
return super().construct_scalar(node)
def _fix_nested_map_index(new_key_path, marks):
new_marks = []
for mark in marks:
if "." not in mark.path:
new_marks.append(mark)
continue
path_prefix, _path_idx = mark.path.rsplit(".", 1)
if new_key_path not in mark.path and path_prefix in mark.path:
new_marks.append(
SchemaPathMarks(
# Replace only the first match of path_prefix
mark.path.replace(path_prefix, new_key_path, 1),
mark.start_mark,
mark.end_mark,
)
)
else:
new_marks.append(mark)
return new_marks
class _CustomSafeLoaderWithMarks(yaml.SafeLoader):
"""A loader which provides line and column start and end marks for YAML.
If the YAML loaded represents a dictionary, get_single_data will inject
a top-level "schemamarks" key in that dictionary which can be used at
call-sites to process YAML paths schemamark metadata when annotating
YAML files for errors.
The schemamarks key is dictionary where each key is a dot-delimited path
into the YAML object. Each dot represents an element that is nested under
a parent and list items are represented with the format
`<parent>.<list-index>`.
The values in schemamarks will be the line number in the original content
where YAML element begins to aid in annotation when encountering schema
errors.
The example YAML shows expected schemamarks for both dicts and lists:
one: val1
two:
subtwo: val2
three: [val3, val4]
schemamarks == {
"one": 1, "two": 2, "two.subtwo": 3, "three": 4, "three.0": 4,
"three.1": 4
}
"""
def __init__(self, stream):
super().__init__(stream)
self.schemamarks_by_line: Dict[int, List[SchemaPathMarks]] = {}
def _get_nested_path_prefix(self, node):
if node.start_mark.line in self.schemamarks_by_line:
return f"{self.schemamarks_by_line[node.start_mark.line][0][0]}."
for _line_num, schema_marks in sorted(
self.schemamarks_by_line.items(), reverse=True
):
for mark in schema_marks[::-1]:
if ( # Is the node within the scope of the furthest mark
node.start_mark.line >= mark.start_mark.line
and node.start_mark.column >= mark.start_mark.column
and node.end_mark.line <= mark.end_mark.line
and node.end_mark.column <= mark.end_mark.column
):
return f"{mark.path}."
return ""
def construct_mapping(self, node):
mapping = super().construct_mapping(node)
nested_path_prefix = self._get_nested_path_prefix(node)
for key_node, value_node in node.value:
node_key_path = f"{nested_path_prefix}{key_node.value}"
line_num = key_node.start_mark.line
mark = SchemaPathMarks(
node_key_path, key_node.start_mark, value_node.end_mark
)
if line_num not in self.schemamarks_by_line:
self.schemamarks_by_line[line_num] = [mark]
else:
self.schemamarks_by_line[line_num].append(mark)
return mapping
def construct_sequence(self, node, deep=False):
sequence = super().construct_sequence(node, deep=True)
nested_path_prefix = self._get_nested_path_prefix(node)
for index, sequence_item in enumerate(node.value):
line_num = sequence_item.start_mark.line
node_key_path = f"{nested_path_prefix}{index}"
marks = SchemaPathMarks(
node_key_path, sequence_item.start_mark, sequence_item.end_mark
)
if line_num not in self.schemamarks_by_line:
self.schemamarks_by_line[line_num] = [marks]
else:
if line_num == sequence_item.end_mark.line:
self.schemamarks_by_line[line_num].append(marks)
else: # Incorrect multi-line mapping or sequence object.
for inner_line in range(
line_num, sequence_item.end_mark.line
):
if inner_line in self.schemamarks_by_line:
schema_marks = self.schemamarks_by_line[inner_line]
new_marks = _fix_nested_map_index(
node_key_path, schema_marks
)
if (
inner_line == line_num
and schema_marks[0].path != node_key_path
):
new_marks.insert(
0,
SchemaPathMarks(
node_key_path,
schema_marks[0].start_mark,
schema_marks[-1].end_mark,
),
)
self.schemamarks_by_line[inner_line] = new_marks
return sequence
def get_single_data(self):
data = super().get_single_data()
if isinstance(data, dict): # valid cloud-config schema is a dict
data["schemamarks"] = dict(
[
(v.path, v.start_mark.line + 1) # 1-based human-readable
for v in chain(*self.schemamarks_by_line.values())
]
)
return data
_CustomSafeLoader.add_constructor(
"tag:yaml.org,2002:python/unicode",
_CustomSafeLoader.construct_python_unicode,
)
class NoAliasSafeDumper(yaml.dumper.SafeDumper):
"""A class which avoids constructing anchors/aliases on yaml dump"""
def ignore_aliases(self, data):
return True
def load_with_marks(blob) -> Tuple[Any, Dict[str, int]]:
"""Perform YAML SafeLoad and track start and end marks during parse.
JSON schema errors come with an encoded object path such as:
<key1>.<key2>.<list_item_index>
YAML loader needs to preserve a mapping of schema path to line and column
marks to annotate original content with JSON schema error marks for the
command:
cloud-init devel schema --annotate
"""
result = yaml.load(blob, Loader=_CustomSafeLoaderWithMarks)
if not isinstance(result, dict):
schemamarks = {}
else:
schemamarks = result.pop("schemamarks")
return result, schemamarks
def load(blob):
return yaml.load(blob, Loader=_CustomSafeLoader)
def dumps(obj, explicit_start=True, explicit_end=True, noalias=False):
"""Return data in nicely formatted yaml."""
return yaml.dump(
obj,
line_break="\n",
indent=4,
explicit_start=explicit_start,
explicit_end=explicit_end,
default_flow_style=False,
Dumper=(NoAliasSafeDumper if noalias else yaml.dumper.SafeDumper),
)
# vi: ts=4 expandtab
|