summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorxi <xi@18f92427-320e-0410-9341-c67f048884a3>2006-03-20 19:25:39 +0000
committerxi <xi@18f92427-320e-0410-9341-c67f048884a3>2006-03-20 19:25:39 +0000
commit04cc6bab4adff87f260cc7611df36af5b5804d07 (patch)
tree5282f6cc23575d6f1eab01739428b586bb9464df
parent30cd8e58dda9bba797aef7fbdfc6e6e86cbd4f6a (diff)
downloadpyyaml-04cc6bab4adff87f260cc7611df36af5b5804d07.tar.gz
Add a branch for working on Emitter.
git-svn-id: http://svn.pyyaml.org/pyyaml/branches/working-on-emitter@121 18f92427-320e-0410-9341-c67f048884a3
-rwxr-xr-xexamples/yaml-hl/yaml_hl.py73
-rw-r--r--examples/yaml-hl/yaml_hl_ascii.cfg43
-rw-r--r--examples/yaml-hl/yaml_hl_html.cfg45
-rw-r--r--lib/yaml/composer.py32
-rw-r--r--lib/yaml/constructor.py62
-rw-r--r--lib/yaml/error.py39
-rw-r--r--lib/yaml/events.py35
-rw-r--r--lib/yaml/nodes.py6
-rw-r--r--lib/yaml/parser.py159
-rw-r--r--lib/yaml/reader.py11
-rw-r--r--lib/yaml/scanner.py407
-rw-r--r--lib/yaml/tokens.py41
-rw-r--r--tests/data/document-separator-in-quoted-scalar.error-message11
-rw-r--r--tests/data/invalid-indentation-for-quoted-scalar.error-message2
-rw-r--r--tests/data/sloppy-indentation.canonical18
-rw-r--r--tests/data/sloppy-indentation.data17
-rw-r--r--tests/data/test_mark.marks (renamed from tests/data/test_marker.markers)0
-rw-r--r--tests/test_appliance.py8
-rw-r--r--tests/test_mark.py (renamed from tests/test_marker.py)14
-rw-r--r--tests/test_structure.py5
-rw-r--r--tests/test_tokens.py4
-rw-r--r--tests/test_yaml.py2
22 files changed, 662 insertions, 372 deletions
diff --git a/examples/yaml-hl/yaml_hl.py b/examples/yaml-hl/yaml_hl.py
new file mode 100755
index 0000000..dd81b3f
--- /dev/null
+++ b/examples/yaml-hl/yaml_hl.py
@@ -0,0 +1,73 @@
+#!/usr/bin/python
+
+import yaml, codecs, sys, optparse
+
+class YAMLHighlight:
+
+ def __init__(self, config):
+ parameters = yaml.load_document(config)
+ self.replaces = parameters['replaces']
+ self.substitutions = {}
+ for domain, items in [('Token', parameters['tokens']),
+ ('Event', parameters['events'])]:
+ for code in items:
+ name = ''.join([part.capitalize() for part in code.split('-')]+[domain])
+ cls = getattr(yaml, name)
+ value = items[code]
+ if value:
+ if 'start' in value:
+ self.substitutions[cls, -1] = value['start']
+ if 'end' in value:
+ self.substitutions[cls, +1] = value['end']
+
+ def highlight(self, input):
+ if isinstance(input, str):
+ if input.startswith(codecs.BOM_UTF16_LE):
+ input = unicode(input, 'utf-16-le')
+ elif input.startswith(codecs.BOM_UTF16_BE):
+ input = unicode(input, 'utf-16-be')
+ else:
+ input = unicode(input, 'utf-8')
+ tokens = yaml.parse(input, Parser=iter)
+ events = yaml.parse(input)
+ markers = []
+ number = 0
+ for token in tokens:
+ number += 1
+ if token.start_mark.index != token.end_mark.index:
+ cls = token.__class__
+ if (cls, -1) in self.substitutions:
+ markers.append([token.start_mark.index, +2, number, self.substitutions[cls, -1]])
+ if (cls, +1) in self.substitutions:
+ markers.append([token.end_mark.index, -2, number, self.substitutions[cls, +1]])
+ number = 0
+ for event in events:
+ number += 1
+ cls = event.__class__
+ if (cls, -1) in self.substitutions:
+ markers.append([event.start_mark.index, +1, number, self.substitutions[cls, -1]])
+ if (cls, +1) in self.substitutions:
+ markers.append([event.end_mark.index, -1, number, self.substitutions[cls, +1]])
+ markers.sort()
+ markers.reverse()
+ chunks = []
+ position = len(input)
+ for index, weight1, weight2, substitution in markers:
+ if index < position:
+ chunk = input[index:position]
+ for substring, replacement in self.replaces:
+ chunk = chunk.replace(substring, replacement)
+ chunks.append(chunk)
+ position = index
+ chunks.append(substitution)
+ chunks.reverse()
+ result = u''.join(chunks)
+ return result.encode('utf-8')
+
+if __name__ == '__main__':
+ parser = optparse.OptionParser()
+ parser.add_option('-c', '--config', dest='config', default='yaml_hl_ascii.cfg', metavar='CONFIG')
+ (options, args) = parser.parse_args()
+ hl = YAMLHighlight(file(options.config))
+ sys.stdout.write(hl.highlight(sys.stdin.read()))
+
diff --git a/examples/yaml-hl/yaml_hl_ascii.cfg b/examples/yaml-hl/yaml_hl_ascii.cfg
new file mode 100644
index 0000000..851d0f8
--- /dev/null
+++ b/examples/yaml-hl/yaml_hl_ascii.cfg
@@ -0,0 +1,43 @@
+%YAML 1.1
+---
+tokens:
+ stream-start:
+ stream-end:
+ directive: { start: "\e[35m", end: "\e[0;1;30;40m" }
+ document-start: { start: "\e[35m", end: "\e[0;1;30;40m" }
+ document-end: { start: "\e[35m", end: "\e[0;1;30;40m" }
+ block-sequence-start:
+ block-mapping-start:
+ block-end:
+ flow-sequence-start: { start: "\e[33m", end: "\e[0;1;30;40m" }
+ flow-mapping-start: { start: "\e[33m", end: "\e[0;1;30;40m" }
+ flow-sequence-end: { start: "\e[33m", end: "\e[0;1;30;40m" }
+ flow-mapping-end: { start: "\e[33m", end: "\e[0;1;30;40m" }
+ key: { start: "\e[33m", end: "\e[0;1;30;40m" }
+ value: { start: "\e[33m", end: "\e[0;1;30;40m" }
+ block-entry: { start: "\e[33m", end: "\e[0;1;30;40m" }
+ flow-entry: { start: "\e[33m", end: "\e[0;1;30;40m" }
+ alias: { start: "\e[32m", end: "\e[0;1;30;40m" }
+ anchor: { start: "\e[32m", end: "\e[0;1;30;40m" }
+ tag: { start: "\e[32m", end: "\e[0;1;30;40m" }
+ scalar: { start: "\e[36m", end: "\e[0;1;30;40m" }
+
+events:
+ stream-start: { start: "\e[0;1;30;40m" }
+ stream-end: { end: "\e[0m" }
+ document-start:
+ document-end:
+ sequence:
+ mapping:
+ collection-end:
+ scalar:
+
+replaces: !!pairs
+ - "\r\n": "\n"
+ - "\r": "\n"
+ - "\n": "\n"
+ - "\x85": "\n"
+ - "\u2028": "\n"
+ - "\u2029": "\n"
+
+# vim: ft=yaml
diff --git a/examples/yaml-hl/yaml_hl_html.cfg b/examples/yaml-hl/yaml_hl_html.cfg
new file mode 100644
index 0000000..903810e
--- /dev/null
+++ b/examples/yaml-hl/yaml_hl_html.cfg
@@ -0,0 +1,45 @@
+
+tokens:
+ stream-start:
+ stream-end:
+ directive: { start: <code class="directive-token">, end: </code> }
+ document-start: { start: <code class="document-start-token">, end: </code> }
+ document-end: { start: <code class="document-end-token">, end: </code> }
+ block-sequence-start:
+ block-mapping-start:
+ block-end:
+ flow-sequence-start: { start: <code class="delimiter-token">, end: </code> }
+ flow-mapping-start: { start: <code class="delimiter-token">, end: </code> }
+ flow-sequence-end: { start: <code class="delimiter-token">, end: </code> }
+ flow-mapping-end: { start: <code class="delimiter-token">, end: </code> }
+ key: { start: <code class="delimiter-token">, end: </code> }
+ value: { start: <code class="delimiter-token">, end: </code> }
+ block-entry: { start: <code class="delimiter-token">, end: </code> }
+ flow-entry: { start: <code class="delimiter-token">, end: </code> }
+ alias: { start: <code class="anchor-token">, end: </code> }
+ anchor: { start: <code class="anchor-token">, end: </code> }
+ tag: { start: <code class="tag-token">, end: </code> }
+ scalar: { start: <code class="scalar-token">, end: </code> }
+
+events:
+ stream-start: { start: <pre class="yaml-stream"> }
+ stream-end: { end: </pre> }
+ document-start: { start: <span class="document"> }
+ document-end: { end: </span> }
+ sequence: { start: <span class="sequence"> }
+ mapping: { start: <span class="mapping"> }
+ collection-end: { end: </span> }
+ scalar: { start: <span class="scalar">, end: </span> }
+
+replaces: !!pairs
+ - "\r\n": "\n"
+ - "\r": "\n"
+ - "\n": "\n"
+ - "\x85": "\n"
+ - "\u2028": "\n"
+ - "\u2029": "\n"
+ - "<": "&lt;"
+ - ">": "&gt;"
+ - "&": "&amp;"
+
+# vim: ft=yaml
diff --git a/lib/yaml/composer.py b/lib/yaml/composer.py
index 0f00062..9b8c1e2 100644
--- a/lib/yaml/composer.py
+++ b/lib/yaml/composer.py
@@ -15,6 +15,9 @@ class Composer:
self.all_anchors = {}
self.complete_anchors = {}
+ # Drop the STREAM-START event.
+ self.parser.get()
+
def check(self):
# If there are more documents available?
return not self.parser.check(StreamEndEvent)
@@ -30,7 +33,16 @@ class Composer:
yield self.compose_document()
def compose_document(self):
+
+ # Drop the DOCUMENT-START event.
+ self.parser.get()
+
+ # Compose the root node.
node = self.compose_node()
+
+ # Drop the DOCUMENT-END event.
+ self.parser.get()
+
self.all_anchors = {}
self.complete_anchors = {}
return node
@@ -41,21 +53,21 @@ class Composer:
anchor = event.anchor
if anchor not in self.all_anchors:
raise ComposerError(None, None, "found undefined alias %r"
- % anchor.encode('utf-8'), event.start_marker)
+ % anchor.encode('utf-8'), event.start_mark)
if anchor not in self.complete_anchors:
collection_event = self.all_anchors[anchor]
raise ComposerError("while composing a collection",
- collection_event.start_marker,
+ collection_event.start_mark,
"found recursive anchor %r" % anchor.encode('utf-8'),
- event.start_marker)
+ event.start_mark)
return self.complete_anchors[anchor]
event = self.parser.peek()
anchor = event.anchor
if anchor is not None:
if anchor in self.all_anchors:
raise ComposerError("found duplicate anchor %r; first occurence"
- % anchor.encode('utf-8'), self.all_anchors[anchor].start_marker,
- "second occurence", event.start_marker)
+ % anchor.encode('utf-8'), self.all_anchors[anchor].start_mark,
+ "second occurence", event.start_mark)
self.all_anchors[anchor] = event
if self.parser.check(ScalarEvent):
node = self.compose_scalar_node()
@@ -70,7 +82,7 @@ class Composer:
def compose_scalar_node(self):
event = self.parser.get()
return ScalarNode(event.tag, event.value,
- event.start_marker, event.end_marker)
+ event.start_mark, event.end_mark)
def compose_sequence_node(self):
start_event = self.parser.get()
@@ -79,7 +91,7 @@ class Composer:
value.append(self.compose_node())
end_event = self.parser.get()
return SequenceNode(start_event.tag, value,
- start_event.start_marker, end_event.end_marker)
+ start_event.start_mark, end_event.end_mark)
def compose_mapping_node(self):
start_event = self.parser.get()
@@ -89,10 +101,10 @@ class Composer:
item_key = self.compose_node()
item_value = self.compose_node()
if item_key in value:
- raise ComposerError("while composing a mapping", start_event.start_marker,
- "found duplicate key", key_event.start_marker)
+ raise ComposerError("while composing a mapping", start_event.start_mark,
+ "found duplicate key", key_event.start_mark)
value[item_key] = item_value
end_event = self.parser.get()
return MappingNode(start_event.tag, value,
- start_event.start_marker, end_event.end_marker)
+ start_event.start_mark, end_event.end_mark)
diff --git a/lib/yaml/constructor.py b/lib/yaml/constructor.py
index 1fc3b7d..b3f5a88 100644
--- a/lib/yaml/constructor.py
+++ b/lib/yaml/constructor.py
@@ -70,28 +70,28 @@ class BaseConstructor:
return self.construct_scalar(node.value[key_node])
raise ConstructorError(None, None,
"expected a scalar node, but found %s" % node.id,
- node.start_marker)
+ node.start_mark)
return node.value
def construct_sequence(self, node):
if not isinstance(node, SequenceNode):
raise ConstructorError(None, None,
"expected a sequence node, but found %s" % node.id,
- node.start_marker)
+ node.start_mark)
return [self.construct_object(child) for child in node.value]
def construct_mapping(self, node):
if not isinstance(node, MappingNode):
raise ConstructorError(None, None,
"expected a mapping node, but found %s" % node.id,
- node.start_marker)
+ node.start_mark)
mapping = {}
merge = None
for key_node in node.value:
if key_node.tag == u'tag:yaml.org,2002:merge':
if merge is not None:
- raise ConstructorError("while constructing a mapping", node.start_marker,
- "found duplicate merge key", key_node.start_marker)
+ raise ConstructorError("while constructing a mapping", node.start_mark,
+ "found duplicate merge key", key_node.start_mark)
value_node = node.value[key_node]
if isinstance(value_node, MappingNode):
merge = [self.construct_mapping(value_node)]
@@ -100,19 +100,19 @@ class BaseConstructor:
for subnode in value_node.value:
if not isinstance(subnode, MappingNode):
raise ConstructorError("while constructing a mapping",
- node.start_marker,
+ node.start_mark,
"expected a mapping for merging, but found %s"
- % subnode.id, subnode.start_marker)
+ % subnode.id, subnode.start_mark)
merge.append(self.construct_mapping(subnode))
merge.reverse()
else:
- raise ConstructorError("while constructing a mapping", node.start_marker,
+ raise ConstructorError("while constructing a mapping", node.start_mark,
"expected a mapping or list of mappings for merging, but found %s"
- % value_node.id, value_node.start_marker)
+ % value_node.id, value_node.start_mark)
elif key_node.tag == u'tag:yaml.org,2002:value':
if '=' in mapping:
- raise ConstructorError("while construction a mapping", node.start_marker,
- "found duplicate value key", key_node.start_marker)
+ raise ConstructorError("while construction a mapping", node.start_mark,
+ "found duplicate value key", key_node.start_mark)
value = self.construct_object(node.value[key_node])
mapping['='] = value
else:
@@ -120,11 +120,11 @@ class BaseConstructor:
try:
duplicate_key = key in mapping
except TypeError, exc:
- raise ConstructorError("while constructing a mapping", node.start_marker,
- "found unacceptable key (%s)" % exc, key_node.start_marker)
+ raise ConstructorError("while constructing a mapping", node.start_mark,
+ "found unacceptable key (%s)" % exc, key_node.start_mark)
if duplicate_key:
- raise ConstructorError("while constructing a mapping", node.start_marker,
- "found duplicate key", key_node.start_marker)
+ raise ConstructorError("while constructing a mapping", node.start_mark,
+ "found duplicate key", key_node.start_mark)
value = self.construct_object(node.value[key_node])
mapping[key] = value
if merge is not None:
@@ -138,7 +138,7 @@ class BaseConstructor:
if not isinstance(node, MappingNode):
raise ConstructorError(None, None,
"expected a mapping node, but found %s" % node.id,
- node.start_marker)
+ node.start_mark)
pairs = []
for key_node in node.value:
key = self.construct_object(key_node)
@@ -234,7 +234,7 @@ class Constructor(BaseConstructor):
return str(value).decode('base64')
except (binascii.Error, UnicodeEncodeError), exc:
raise ConstructorError(None, None,
- "failed to decode base64 data: %s" % exc, node.start_marker)
+ "failed to decode base64 data: %s" % exc, node.start_mark)
timestamp_regexp = re.compile(
ur'''^(?P<year>[0-9][0-9][0-9][0-9])
@@ -271,18 +271,18 @@ class Constructor(BaseConstructor):
# Note: we do not check for duplicate keys, because it's too
# CPU-expensive.
if not isinstance(node, SequenceNode):
- raise ConstructorError("while constructing an ordered map", node.start_marker,
- "expected a sequence, but found %s" % node.id, node.start_marker)
+ raise ConstructorError("while constructing an ordered map", node.start_mark,
+ "expected a sequence, but found %s" % node.id, node.start_mark)
omap = []
for subnode in node.value:
if not isinstance(subnode, MappingNode):
- raise ConstructorError("while constructing an ordered map", node.start_marker,
+ raise ConstructorError("while constructing an ordered map", node.start_mark,
"expected a mapping of length 1, but found %s" % subnode.id,
- subnode.start_marker)
+ subnode.start_mark)
if len(subnode.value) != 1:
- raise ConstructorError("while constructing an ordered map", node.start_marker,
+ raise ConstructorError("while constructing an ordered map", node.start_mark,
"expected a single mapping item, but found %d items" % len(subnode.value),
- subnode.start_marker)
+ subnode.start_mark)
key_node = subnode.value.keys()[0]
key = self.construct_object(key_node)
value = self.construct_object(subnode.value[key_node])
@@ -292,18 +292,18 @@ class Constructor(BaseConstructor):
def construct_yaml_pairs(self, node):
# Note: the same code as `construct_yaml_omap`.
if not isinstance(node, SequenceNode):
- raise ConstructorError("while constructing pairs", node.start_marker,
- "expected a sequence, but found %s" % node.id, node.start_marker)
+ raise ConstructorError("while constructing pairs", node.start_mark,
+ "expected a sequence, but found %s" % node.id, node.start_mark)
pairs = []
for subnode in node.value:
if not isinstance(subnode, MappingNode):
- raise ConstructorError("while constructing pairs", node.start_marker,
+ raise ConstructorError("while constructing pairs", node.start_mark,
"expected a mapping of length 1, but found %s" % subnode.id,
- subnode.start_marker)
+ subnode.start_mark)
if len(subnode.value) != 1:
- raise ConstructorError("while constructing pairs", node.start_marker,
+ raise ConstructorError("while constructing pairs", node.start_mark,
"expected a single mapping item, but found %d items" % len(subnode.value),
- subnode.start_marker)
+ subnode.start_mark)
key_node = subnode.value.keys()[0]
key = self.construct_object(key_node)
value = self.construct_object(subnode.value[key_node])
@@ -330,7 +330,7 @@ class Constructor(BaseConstructor):
def construct_undefined(self, node):
raise ConstructorError(None, None,
"could not determine a constructor for the tag %r" % node.tag.encode('utf-8'),
- node.start_marker)
+ node.start_mark)
Constructor.add_constructor(
u'tag:yaml.org,2002:null',
@@ -402,7 +402,7 @@ class YAMLObject(object):
def from_yaml(cls, constructor, node):
raise ConstructorError(None, None,
"found undefined constructor for the tag %r"
- % node.tag.encode('utf-8'), node.start_marker)
+ % node.tag.encode('utf-8'), node.start_mark)
from_yaml = classmethod(from_yaml)
def to_yaml(self):
diff --git a/lib/yaml/error.py b/lib/yaml/error.py
index 38f143e..a818210 100644
--- a/lib/yaml/error.py
+++ b/lib/yaml/error.py
@@ -1,10 +1,11 @@
-__all__ = ['Marker', 'YAMLError', 'MarkedYAMLError']
+__all__ = ['Mark', 'YAMLError', 'MarkedYAMLError']
-class Marker:
+class Mark:
- def __init__(self, name, line, column, buffer, pointer):
+ def __init__(self, name, index, line, column, buffer, pointer):
self.name = name
+ self.index = index
self.line = line
self.column = column
self.buffer = buffer
@@ -46,33 +47,33 @@ class YAMLError(Exception):
class MarkedYAMLError(YAMLError):
- def __init__(self, context=None, context_marker=None,
- problem=None, problem_marker=None):
+ def __init__(self, context=None, context_mark=None,
+ problem=None, problem_mark=None):
self.context = context
- self.context_marker = context_marker
+ self.context_mark = context_mark
self.problem = problem
- self.problem_marker = problem_marker
+ self.problem_mark = problem_mark
def __str__(self):
lines = []
- #for (place, marker) in [(self.context, self.context_marker),
- # (self.problem, self.problem_marker)]:
+ #for (place, mark) in [(self.context, self.context_mark),
+ # (self.problem, self.problem_mark)]:
# if place is not None:
# lines.append(place)
- # if marker is not None:
- # lines.append(str(marker))
+ # if mark is not None:
+ # lines.append(str(mark))
if self.context is not None:
lines.append(self.context)
- if self.context_marker is not None \
- and (self.problem is None or self.problem_marker is None
- or self.context_marker.name != self.problem_marker.name
- or self.context_marker.line != self.problem_marker.line
- or self.context_marker.column != self.problem_marker.column):
- lines.append(str(self.context_marker))
+ if self.context_mark is not None \
+ and (self.problem is None or self.problem_mark is None
+ or self.context_mark.name != self.problem_mark.name
+ or self.context_mark.line != self.problem_mark.line
+ or self.context_mark.column != self.problem_mark.column):
+ lines.append(str(self.context_mark))
if self.problem is not None:
lines.append(self.problem)
- if self.problem_marker is not None:
- lines.append(str(self.problem_marker))
+ if self.problem_mark is not None:
+ lines.append(str(self.problem_mark))
return '\n'.join(lines)
diff --git a/lib/yaml/events.py b/lib/yaml/events.py
index d468c53..8837633 100644
--- a/lib/yaml/events.py
+++ b/lib/yaml/events.py
@@ -1,39 +1,39 @@
class Event:
- def __init__(self, start_marker, end_marker):
- self.start_marker = start_marker
- self.end_marker = end_marker
+ def __init__(self, start_mark, end_mark):
+ self.start_mark = start_mark
+ self.end_mark = end_mark
def __repr__(self):
attributes = [key for key in self.__dict__
- if not key.endswith('_marker')]
+ if not key.endswith('_mark')]
attributes.sort()
arguments = ', '.join(['%s=%r' % (key, getattr(self, key))
for key in attributes])
return '%s(%s)' % (self.__class__.__name__, arguments)
class NodeEvent(Event):
- def __init__(self, anchor, start_marker, end_marker):
+ def __init__(self, anchor, start_mark, end_mark):
self.anchor = anchor
- self.start_marker = start_marker
- self.end_marker = end_marker
+ self.start_mark = start_mark
+ self.end_mark = end_mark
class AliasEvent(NodeEvent):
pass
class ScalarEvent(NodeEvent):
- def __init__(self, anchor, tag, value, start_marker, end_marker):
+ def __init__(self, anchor, tag, value, start_mark, end_mark):
self.anchor = anchor
self.tag = tag
self.value = value
- self.start_marker = start_marker
- self.end_marker = end_marker
+ self.start_mark = start_mark
+ self.end_mark = end_mark
class CollectionEvent(NodeEvent):
- def __init__(self, anchor, tag, start_marker, end_marker):
+ def __init__(self, anchor, tag, start_mark, end_mark):
self.anchor = anchor
self.tag = tag
- self.start_marker = start_marker
- self.end_marker = end_marker
+ self.start_mark = start_mark
+ self.end_mark = end_mark
class SequenceEvent(CollectionEvent):
pass
@@ -44,6 +44,15 @@ class MappingEvent(CollectionEvent):
class CollectionEndEvent(Event):
pass
+class DocumentStartEvent(Event):
+ pass
+
+class DocumentEndEvent(Event):
+ pass
+
+class StreamStartEvent(Event):
+ pass
+
class StreamEndEvent(Event):
pass
diff --git a/lib/yaml/nodes.py b/lib/yaml/nodes.py
index 377d24c..6c27421 100644
--- a/lib/yaml/nodes.py
+++ b/lib/yaml/nodes.py
@@ -1,10 +1,10 @@
class Node:
- def __init__(self, tag, value, start_marker, end_marker):
+ def __init__(self, tag, value, start_mark, end_mark):
self.tag = tag
self.value = value
- self.start_marker = start_marker
- self.end_marker = end_marker
+ self.start_mark = start_mark
+ self.end_mark = end_mark
def __repr__(self):
value = self.value
if isinstance(value, list):
diff --git a/lib/yaml/parser.py b/lib/yaml/parser.py
index a66dc81..9870699 100644
--- a/lib/yaml/parser.py
+++ b/lib/yaml/parser.py
@@ -2,7 +2,7 @@
# YAML can be parsed by an LL(1) parser!
#
# We use the following production rules:
-# stream ::= implicit_document? explicit_document* STREAM-END
+# stream ::= STREAM-START implicit_document? explicit_document* STREAM-END
# explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END?
# implicit_document ::= block_node DOCUMENT-END?
# block_node ::= ALIAS | properties? block_content
@@ -42,7 +42,7 @@
# or line breaks.
# FIRST sets:
-# stream: FIRST(block_node) + { DIRECTIVE DOCUMENT-START }
+# stream: { STREAM-START }
# explicit_document: { DIRECTIVE DOCUMENT-START }
# implicit_document: FIRST(block_node)
# block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START }
@@ -126,36 +126,57 @@ class Parser:
return self.event_generator
def parse_stream(self):
- # implicit_document? explicit_document* STREAM-END
+ # STREAM-START implicit_document? explicit_document* STREAM-END
+
+ # Parse start of stream.
+ token = self.scanner.get()
+ yield StreamStartEvent(token.start_mark, token.end_mark)
# Parse implicit document.
if not self.scanner.check(DirectiveToken, DocumentStartToken,
StreamEndToken):
self.tag_handles = self.DEFAULT_TAGS
+ token = self.scanner.peek()
+ start_mark = end_mark = token.start_mark
+ yield DocumentStartEvent(start_mark, end_mark)
for event in self.parse_block_node():
yield event
+ token = self.scanner.peek()
+ start_mark = end_mark = token.start_mark
+ while self.scanner.check(DocumentEndToken):
+ token = self.scanner.get()
+ end_mark = token.end_mark
+ yield DocumentEndEvent(start_mark, end_mark)
# Parse explicit documents.
while not self.scanner.check(StreamEndToken):
+ token = self.scanner.peek()
+ start_mark = token.start_mark
self.process_directives()
if not self.scanner.check(DocumentStartToken):
raise ParserError(None, None,
"expected '<document start>', but found %r"
% self.scanner.peek().id,
- self.scanner.peek().start_marker)
+ self.scanner.peek().start_mark)
token = self.scanner.get()
+ end_mark = token.end_mark
+ yield DocumentStartEvent(start_mark, end_mark)
if self.scanner.check(DirectiveToken,
DocumentStartToken, DocumentEndToken, StreamEndToken):
- yield self.process_empty_scalar(token.end_marker)
+ yield self.process_empty_scalar(token.end_mark)
else:
for event in self.parse_block_node():
yield event
+ token = self.scanner.peek()
+ start_mark = end_mark = token.start_mark
while self.scanner.check(DocumentEndToken):
- self.scanner.get()
+ token = self.scanner.get()
+ end_mark = token.end_mark
+ yield DocumentEndEvent(start_mark, end_mark)
# Parse end of stream.
token = self.scanner.get()
- yield StreamEndEvent(token.start_marker, token.end_marker)
+ yield StreamEndEvent(token.start_mark, token.end_mark)
def process_directives(self):
# DIRECTIVE*
@@ -166,19 +187,19 @@ class Parser:
if token.name == u'YAML':
if self.yaml_version is not None:
raise ParserError(None, None,
- "found duplicate YAML directive", token.start_marker)
+ "found duplicate YAML directive", token.start_mark)
major, minor = token.value
if major != 1:
raise ParserError(None, None,
"found incompatible YAML document (version 1.* is required)",
- token.start_marker)
+ token.start_mark)
self.yaml_version = token.value
elif token.name == u'TAG':
handle, prefix = token.value
if handle in self.tag_handles:
raise ParserError(None, None,
"duplicate tag handle %r" % handle.encode('utf-8'),
- token.start_marker)
+ token.start_mark)
self.tag_handles[handle] = prefix
for key in self.DEFAULT_TAGS:
if key not in self.tag_handles:
@@ -204,34 +225,34 @@ class Parser:
# (block_content | indentless_block_sequence)
if self.scanner.check(AliasToken):
token = self.scanner.get()
- yield AliasEvent(token.value, token.start_marker, token.end_marker)
+ yield AliasEvent(token.value, token.start_mark, token.end_mark)
else:
anchor = None
tag = None
- start_marker = end_marker = tag_marker = None
+ start_mark = end_mark = tag_mark = None
if self.scanner.check(AnchorToken):
token = self.scanner.get()
- start_marker = end_marker = token.start_marker
+ start_mark = end_mark = token.start_mark
anchor = token.value
if self.scanner.check(TagToken):
token = self.scanner.get()
- end_marker = tag_marker = token.start_marker
+ end_mark = tag_mark = token.start_mark
tag = token.value
elif self.scanner.check(TagToken):
token = self.scanner.get()
- start_marker = end_marker = tag_marker = token.start_marker
+ start_mark = end_mark = tag_mark = token.start_mark
tag = token.value
if self.scanner.check(AnchorToken):
token = self.scanner.get()
- end_marker = token.start_marker
+ end_mark = token.start_mark
anchor = token.value
if tag is not None:
handle, suffix = tag
if handle is not None:
if handle not in self.tag_handles:
- raise ParserError("while parsing a node", start_marker,
+ raise ParserError("while parsing a node", start_mark,
"found undefined tag handle %r" % handle.encode('utf-8'),
- tag_marker)
+ tag_mark)
tag = self.tag_handles[handle]+suffix
else:
tag = suffix
@@ -239,35 +260,35 @@ class Parser:
if not (self.scanner.check(ScalarToken) and
self.scanner.peek().plain):
tag = u'!'
- if start_marker is None:
- start_marker = self.scanner.peek().start_marker
+ if start_mark is None:
+ start_mark = self.scanner.peek().start_mark
event = None
collection_events = None
if indentless_sequence and self.scanner.check(BlockEntryToken):
- end_marker = self.scanner.peek().end_marker
- event = SequenceEvent(anchor, tag, start_marker, end_marker)
+ end_mark = self.scanner.peek().end_mark
+ event = SequenceEvent(anchor, tag, start_mark, end_mark)
collection_events = self.parse_indentless_sequence()
else:
if self.scanner.check(ScalarToken):
token = self.scanner.get()
- end_marker = token.end_marker
+ end_mark = token.end_mark
event = ScalarEvent(anchor, tag, token.value,
- start_marker, end_marker)
+ start_mark, end_mark)
elif self.scanner.check(FlowSequenceStartToken):
- end_marker = self.scanner.peek().end_marker
- event = SequenceEvent(anchor, tag, start_marker, end_marker)
+ end_mark = self.scanner.peek().end_mark
+ event = SequenceEvent(anchor, tag, start_mark, end_mark)
collection_events = self.parse_flow_sequence()
elif self.scanner.check(FlowMappingStartToken):
- end_marker = self.scanner.peek().end_marker
- event = MappingEvent(anchor, tag, start_marker, end_marker)
+ end_mark = self.scanner.peek().end_mark
+ event = MappingEvent(anchor, tag, start_mark, end_mark)
collection_events = self.parse_flow_mapping()
elif block and self.scanner.check(BlockSequenceStartToken):
- end_marker = self.scanner.peek().start_marker
- event = SequenceEvent(anchor, tag, start_marker, end_marker)
+ end_mark = self.scanner.peek().start_mark
+ event = SequenceEvent(anchor, tag, start_mark, end_mark)
collection_events = self.parse_block_sequence()
elif block and self.scanner.check(BlockMappingStartToken):
- end_marker = self.scanner.peek().start_marker
- event = MappingEvent(anchor, tag, start_marker, end_marker)
+ end_mark = self.scanner.peek().start_mark
+ event = MappingEvent(anchor, tag, start_mark, end_mark)
collection_events = self.parse_block_mapping()
else:
if block:
@@ -275,9 +296,9 @@ class Parser:
else:
node = 'flow'
token = self.scanner.peek()
- raise ParserError("while scanning a %s node" % node, start_marker,
+ raise ParserError("while scanning a %s node" % node, start_mark,
"expected the node content, but found %r" % token.id,
- token.start_marker)
+ token.start_mark)
yield event
if collection_events is not None:
for event in collection_events:
@@ -286,20 +307,20 @@ class Parser:
def parse_block_sequence(self):
# BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END
token = self.scanner.get()
- start_marker = token.start_marker
+ start_mark = token.start_mark
while self.scanner.check(BlockEntryToken):
token = self.scanner.get()
if not self.scanner.check(BlockEntryToken, BlockEndToken):
for event in self.parse_block_node():
yield event
else:
- yield self.process_empty_scalar(token.end_marker)
+ yield self.process_empty_scalar(token.end_mark)
if not self.scanner.check(BlockEndToken):
token = self.scanner.peek()
- raise ParserError("while scanning a block collection", start_marker,
- "expected <block end>, but found %r" % token.id, token.start_marker)
+ raise ParserError("while scanning a block collection", start_mark,
+ "expected <block end>, but found %r" % token.id, token.start_mark)
token = self.scanner.get()
- yield CollectionEndEvent(token.start_marker, token.end_marker)
+ yield CollectionEndEvent(token.start_mark, token.end_mark)
def parse_indentless_sequence(self):
# (BLOCK-ENTRY block_node?)+
@@ -310,9 +331,9 @@ class Parser:
for event in self.parse_block_node():
yield event
else:
- yield self.process_empty_scalar(token.end_marker)
+ yield self.process_empty_scalar(token.end_mark)
token = self.scanner.peek()
- yield CollectionEndEvent(token.start_marker, token.start_marker)
+ yield CollectionEndEvent(token.start_mark, token.start_mark)
def parse_block_mapping(self):
# BLOCK-MAPPING_START
@@ -320,7 +341,7 @@ class Parser:
# (VALUE block_node_or_indentless_sequence?)?)*
# BLOCK-END
token = self.scanner.get()
- start_marker = token.start_marker
+ start_mark = token.start_mark
while self.scanner.check(KeyToken, ValueToken):
if self.scanner.check(KeyToken):
token = self.scanner.get()
@@ -328,23 +349,23 @@ class Parser:
for event in self.parse_block_node_or_indentless_sequence():
yield event
else:
- yield self.process_empty_scalar(token.end_marker)
+ yield self.process_empty_scalar(token.end_mark)
if self.scanner.check(ValueToken):
token = self.scanner.get()
if not self.scanner.check(KeyToken, ValueToken, BlockEndToken):
for event in self.parse_block_node_or_indentless_sequence():
yield event
else:
- yield self.process_empty_scalar(token.end_marker)
+ yield self.process_empty_scalar(token.end_mark)
else:
token = self.scanner.peek()
- yield self.process_empty_scalar(token.start_marker)
+ yield self.process_empty_scalar(token.start_mark)
if not self.scanner.check(BlockEndToken):
token = self.scanner.peek()
- raise ParserError("while scanning a block mapping", start_marker,
- "expected <block end>, but found %r" % token.id, token.start_marker)
+ raise ParserError("while scanning a block mapping", start_mark,
+ "expected <block end>, but found %r" % token.id, token.start_mark)
token = self.scanner.get()
- yield CollectionEndEvent(token.start_marker, token.end_marker)
+ yield CollectionEndEvent(token.start_mark, token.end_mark)
def parse_flow_sequence(self):
# flow_sequence ::= FLOW-SEQUENCE-START
@@ -358,41 +379,41 @@ class Parser:
# For `flow_sequence_entry`, the part `KEY flow_node? (VALUE flow_node?)?`
# generate an inline mapping (set syntax).
token = self.scanner.get()
- start_marker = token.start_marker
+ start_mark = token.start_mark
while not self.scanner.check(FlowSequenceEndToken):
if self.scanner.check(KeyToken):
token = self.scanner.get()
yield MappingEvent(None, u'!',
- token.start_marker, token.end_marker)
+ token.start_mark, token.end_mark)
if not self.scanner.check(ValueToken,
FlowEntryToken, FlowSequenceEndToken):
for event in self.parse_flow_node():
yield event
else:
- yield self.process_empty_scalar(token.end_marker)
+ yield self.process_empty_scalar(token.end_mark)
if self.scanner.check(ValueToken):
token = self.scanner.get()
if not self.scanner.check(FlowEntryToken, FlowSequenceEndToken):
for event in self.parse_flow_node():
yield event
else:
- yield self.process_empty_scalar(token.end_marker)
+ yield self.process_empty_scalar(token.end_mark)
else:
token = self.scanner.peek()
- yield self.process_empty_scalar(token.start_marker)
+ yield self.process_empty_scalar(token.start_mark)
token = self.scanner.peek()
- yield CollectionEndEvent(token.start_marker, token.start_marker)
+ yield CollectionEndEvent(token.start_mark, token.start_mark)
else:
for event in self.parse_flow_node():
yield event
if not self.scanner.check(FlowEntryToken, FlowSequenceEndToken):
token = self.scanner.peek()
- raise ParserError("while scanning a flow sequence", start_marker,
- "expected ',' or ']', but got %r" % token.id, token.start_marker)
+ raise ParserError("while scanning a flow sequence", start_mark,
+ "expected ',' or ']', but got %r" % token.id, token.start_mark)
if self.scanner.check(FlowEntryToken):
self.scanner.get()
token = self.scanner.get()
- yield CollectionEndEvent(token.start_marker, token.end_marker)
+ yield CollectionEndEvent(token.start_mark, token.end_mark)
def parse_flow_mapping(self):
# flow_mapping ::= FLOW-MAPPING-START
@@ -401,7 +422,7 @@ class Parser:
# FLOW-MAPPING-END
# flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
token = self.scanner.get()
- start_marker = token.start_marker
+ start_mark = token.start_mark
while not self.scanner.check(FlowMappingEndToken):
if self.scanner.check(KeyToken):
token = self.scanner.get()
@@ -410,34 +431,34 @@ class Parser:
for event in self.parse_flow_node():
yield event
else:
- yield self.process_empty_scalar(token.end_marker)
+ yield self.process_empty_scalar(token.end_mark)
if self.scanner.check(ValueToken):
token = self.scanner.get()
if not self.scanner.check(FlowEntryToken, FlowMappingEndToken):
for event in self.parse_flow_node():
yield event
else:
- yield self.process_empty_scalar(token.end_marker)
+ yield self.process_empty_scalar(token.end_mark)
else:
token = self.scanner.peek()
- yield self.process_empty_scalar(token.start_marker)
+ yield self.process_empty_scalar(token.start_mark)
else:
for event in self.parse_flow_node():
yield event
- yield self.process_empty_scalar(self.scanner.peek().start_marker)
+ yield self.process_empty_scalar(self.scanner.peek().start_mark)
if not self.scanner.check(FlowEntryToken, FlowMappingEndToken):
token = self.scanner.peek()
- raise ParserError("while scanning a flow mapping", start_marker,
- "expected ',' or '}', but got %r" % token.id, token.start_marker)
+ raise ParserError("while scanning a flow mapping", start_mark,
+ "expected ',' or '}', but got %r" % token.id, token.start_mark)
if self.scanner.check(FlowEntryToken):
self.scanner.get()
if not self.scanner.check(FlowMappingEndToken):
token = self.scanner.peek()
- raise ParserError("while scanning a flow mapping", start_marker,
- "expected '}', but found %r" % token.id, token.start_marker)
+ raise ParserError("while scanning a flow mapping", start_mark,
+ "expected '}', but found %r" % token.id, token.start_mark)
token = self.scanner.get()
- yield CollectionEndEvent(token.start_marker, token.end_marker)
+ yield CollectionEndEvent(token.start_mark, token.end_mark)
- def process_empty_scalar(self, marker):
- return ScalarEvent(None, None, u'', marker, marker)
+ def process_empty_scalar(self, mark):
+ return ScalarEvent(None, None, u'', mark, mark)
diff --git a/lib/yaml/reader.py b/lib/yaml/reader.py
index a4d0299..9778943 100644
--- a/lib/yaml/reader.py
+++ b/lib/yaml/reader.py
@@ -3,7 +3,7 @@
#
# We define two classes here.
#
-# Marker(source, line, column)
+# Mark(source, line, column)
# It's just a record and its only use is producing nice error messages.
# Parser does not use it for any other purposes.
#
@@ -17,7 +17,7 @@
__all__ = ['Reader', 'ReaderError']
-from error import YAMLError, Marker
+from error import YAMLError, Mark
import codecs, re
@@ -142,12 +142,13 @@ class Reader:
elif ch != u'\uFEFF':
self.column += 1
- def get_marker(self):
+ def get_mark(self):
if self.stream is None:
- return Marker(self.name, self.line, self.column,
+ return Mark(self.name, self.index, self.line, self.column,
self.buffer, self.pointer)
else:
- return Marker(self.name, self.line, self.column, None, None)
+ return Mark(self.name, self.index, self.line, self.column,
+ None, None)
def determine_encoding(self):
while not self.eof and len(self.raw_buffer) < 2:
diff --git a/lib/yaml/scanner.py b/lib/yaml/scanner.py
index 80d69b6..9c536b4 100644
--- a/lib/yaml/scanner.py
+++ b/lib/yaml/scanner.py
@@ -1,9 +1,10 @@
# Scanner produces tokens of the following types:
+# STREAM-START
+# STREAM-END
# DIRECTIVE(name, value)
# DOCUMENT-START
# DOCUMENT-END
-# STREAM-END
# BLOCK-SEQUENCE-START
# BLOCK-MAPPING-START
# BLOCK-END
@@ -34,13 +35,13 @@ class ScannerError(MarkedYAMLError):
class SimpleKey:
# See below simple keys treatment.
- def __init__(self, token_number, required, index, line, column, marker):
+ def __init__(self, token_number, required, index, line, column, mark):
self.token_number = token_number
self.required = required
self.index = index
self.line = line
self.column = column
- self.marker = marker
+ self.mark = mark
class Scanner:
@@ -68,6 +69,9 @@ class Scanner:
# List of processed tokens that are not yet emitted.
self.tokens = []
+ # Add the STREAM-START token.
+ self.fetch_stream_start()
+
# Number of tokens that were emitted through the `get_token` method.
self.tokens_taken = 0
@@ -102,7 +106,7 @@ class Scanner:
# Keep track of possible simple keys. This is a dictionary. The key
# is `flow_level`; there can be no more that one possible simple key
# for each level. The value is a SimpleKey record:
- # (token_number, required, index, line, column, marker)
+ # (token_number, required, index, line, column, mark)
# A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow),
# '[', or '{' tokens.
self.possible_simple_keys = {}
@@ -261,7 +265,7 @@ class Scanner:
# No? It's an error. Let's produce a nice error message.
raise ScannerError("while scanning for the next token", None,
"found character %r that cannot start any token"
- % ch.encode('utf-8'), self.reader.get_marker())
+ % ch.encode('utf-8'), self.reader.get_mark())
# Simple keys treatment.
@@ -292,8 +296,8 @@ class Scanner:
if key.line != self.reader.line \
or self.reader.index-key.index > 1024:
if key.required:
- raise ScannerError("while scanning a simple key", key.marker,
- "could not found expected ':'", self.reader.get_marker())
+ raise ScannerError("while scanning a simple key", key.mark,
+ "could not found expected ':'", self.reader.get_mark())
del self.possible_simple_keys[level]
def save_possible_simple_key(self):
@@ -316,9 +320,9 @@ class Scanner:
index = self.reader.index
line = self.reader.line
column = self.reader.column
- marker = self.reader.get_marker()
+ mark = self.reader.get_mark()
key = SimpleKey(token_number, required,
- index, line, column, marker)
+ index, line, column, mark)
self.possible_simple_keys[self.flow_level] = key
def remove_possible_simple_key(self):
@@ -329,29 +333,34 @@ class Scanner:
# I don't think it's possible, but I could be wrong.
assert not key.required
#if key.required:
- # raise ScannerError("while scanning a simple key", key.marker,
- # "could not found expected ':'", self.reader.get_marker())
+ # raise ScannerError("while scanning a simple key", key.mark,
+ # "could not found expected ':'", self.reader.get_mark())
# Indentation functions.
def unwind_indent(self, column):
- # In flow context, tokens should respect indentation.
- # Actually the condition should be `self.indent >= column` according to
- # the spec. But this condition will prohibit intuitively correct
- # constructions such as
- # key : {
- # }
- if self.flow_level and self.indent > column:
- raise ScannerError(None, None,
- "invalid intendation or unclosed '[' or '{'",
- self.reader.get_marker())
+ ## In flow context, tokens should respect indentation.
+ ## Actually the condition should be `self.indent >= column` according to
+ ## the spec. But this condition will prohibit intuitively correct
+ ## constructions such as
+ ## key : {
+ ## }
+ #if self.flow_level and self.indent > column:
+ # raise ScannerError(None, None,
+ # "invalid intendation or unclosed '[' or '{'",
+ # self.reader.get_mark())
+
+ # In the flow context, indentation is ignored. We make the scanner less
+ # restrictive then specification requires.
+ if self.flow_level:
+ return
# In block context, we may need to issue the BLOCK-END tokens.
while self.indent > column:
- marker = self.reader.get_marker()
+ mark = self.reader.get_mark()
self.indent = self.indents.pop()
- self.tokens.append(BlockEndToken(marker, marker))
+ self.tokens.append(BlockEndToken(mark, mark))
def add_indent(self, column):
# Check if we need to increase indentation.
@@ -363,6 +372,17 @@ class Scanner:
# Fetchers.
+ def fetch_stream_start(self):
+ # We always add STREAM-START as the first token and STREAM-END as the
+ # last token.
+
+ # Read the token.
+ mark = self.reader.get_mark()
+
+ # Add STREAM-END.
+ self.tokens.append(StreamStartToken(mark, mark))
+
+
def fetch_stream_end(self):
# Set the current intendation to -1.
@@ -373,10 +393,10 @@ class Scanner:
self.possible_simple_keys = {}
# Read the token.
- marker = self.reader.get_marker()
+ mark = self.reader.get_mark()
- # Add END.
- self.tokens.append(StreamEndToken(marker, marker))
+ # Add STREAM-END.
+ self.tokens.append(StreamEndToken(mark, mark))
# The reader is ended.
self.done = True
@@ -410,10 +430,10 @@ class Scanner:
self.allow_simple_key = False
# Add DOCUMENT-START or DOCUMENT-END.
- start_marker = self.reader.get_marker()
+ start_mark = self.reader.get_mark()
self.reader.forward(3)
- end_marker = self.reader.get_marker()
- self.tokens.append(TokenClass(start_marker, end_marker))
+ end_mark = self.reader.get_mark()
+ self.tokens.append(TokenClass(start_mark, end_mark))
def fetch_flow_sequence_start(self):
self.fetch_flow_collection_start(FlowSequenceStartToken)
@@ -433,10 +453,10 @@ class Scanner:
self.allow_simple_key = True
# Add FLOW-SEQUENCE-START or FLOW-MAPPING-START.
- start_marker = self.reader.get_marker()
+ start_mark = self.reader.get_mark()
self.reader.forward()
- end_marker = self.reader.get_marker()
- self.tokens.append(TokenClass(start_marker, end_marker))
+ end_mark = self.reader.get_mark()
+ self.tokens.append(TokenClass(start_mark, end_mark))
def fetch_flow_sequence_end(self):
self.fetch_flow_collection_end(FlowSequenceEndToken)
@@ -456,10 +476,10 @@ class Scanner:
self.allow_simple_key = False
# Add FLOW-SEQUENCE-END or FLOW-MAPPING-END.
- start_marker = self.reader.get_marker()
+ start_mark = self.reader.get_mark()
self.reader.forward()
- end_marker = self.reader.get_marker()
- self.tokens.append(TokenClass(start_marker, end_marker))
+ end_mark = self.reader.get_mark()
+ self.tokens.append(TokenClass(start_mark, end_mark))
def fetch_flow_entry(self):
@@ -470,10 +490,10 @@ class Scanner:
self.remove_possible_simple_key()
# Add FLOW-ENTRY.
- start_marker = self.reader.get_marker()
+ start_mark = self.reader.get_mark()
self.reader.forward()
- end_marker = self.reader.get_marker()
- self.tokens.append(FlowEntryToken(start_marker, end_marker))
+ end_mark = self.reader.get_mark()
+ self.tokens.append(FlowEntryToken(start_mark, end_mark))
def fetch_block_entry(self):
@@ -484,12 +504,12 @@ class Scanner:
if not self.allow_simple_key:
raise ScannerError(None, None,
"sequence entries are not allowed here",
- self.reader.get_marker())
+ self.reader.get_mark())
# We may need to add BLOCK-SEQUENCE-START.
if self.add_indent(self.reader.column):
- marker = self.reader.get_marker()
- self.tokens.append(BlockSequenceStartToken(marker, marker))
+ mark = self.reader.get_mark()
+ self.tokens.append(BlockSequenceStartToken(mark, mark))
# It's an error for the block entry to occur in the flow context,
# but we let the parser detect this.
@@ -503,10 +523,10 @@ class Scanner:
self.remove_possible_simple_key()
# Add BLOCK-ENTRY.
- start_marker = self.reader.get_marker()
+ start_mark = self.reader.get_mark()
self.reader.forward()
- end_marker = self.reader.get_marker()
- self.tokens.append(BlockEntryToken(start_marker, end_marker))
+ end_mark = self.reader.get_mark()
+ self.tokens.append(BlockEntryToken(start_mark, end_mark))
def fetch_key(self):
@@ -517,12 +537,12 @@ class Scanner:
if not self.allow_simple_key:
raise ScannerError(None, None,
"mapping keys are not allowed here",
- self.reader.get_marker())
+ self.reader.get_mark())
# We may need to add BLOCK-MAPPING-START.
if self.add_indent(self.reader.column):
- marker = self.reader.get_marker()
- self.tokens.append(BlockMappingStartToken(marker, marker))
+ mark = self.reader.get_mark()
+ self.tokens.append(BlockMappingStartToken(mark, mark))
# Simple keys are allowed after '?' in the block context.
self.allow_simple_key = not self.flow_level
@@ -531,10 +551,10 @@ class Scanner:
self.remove_possible_simple_key()
# Add KEY.
- start_marker = self.reader.get_marker()
+ start_mark = self.reader.get_mark()
self.reader.forward()
- end_marker = self.reader.get_marker()
- self.tokens.append(KeyToken(start_marker, end_marker))
+ end_mark = self.reader.get_mark()
+ self.tokens.append(KeyToken(start_mark, end_mark))
def fetch_value(self):
@@ -545,14 +565,14 @@ class Scanner:
key = self.possible_simple_keys[self.flow_level]
del self.possible_simple_keys[self.flow_level]
self.tokens.insert(key.token_number-self.tokens_taken,
- KeyToken(key.marker, key.marker))
+ KeyToken(key.mark, key.mark))
# If this key starts a new block mapping, we need to add
# BLOCK-MAPPING-START.
if not self.flow_level:
if self.add_indent(key.column):
self.tokens.insert(key.token_number-self.tokens_taken,
- BlockMappingStartToken(key.marker, key.marker))
+ BlockMappingStartToken(key.mark, key.mark))
# There cannot be two simple keys one after another.
self.allow_simple_key = False
@@ -570,7 +590,7 @@ class Scanner:
if not self.allow_simple_key:
raise ScannerError(None, None,
"mapping values are not allowed here",
- self.reader.get_marker())
+ self.reader.get_mark())
# Simple keys are allowed after ':' in the block context.
self.allow_simple_key = not self.flow_level
@@ -579,10 +599,10 @@ class Scanner:
self.remove_possible_simple_key()
# Add VALUE.
- start_marker = self.reader.get_marker()
+ start_mark = self.reader.get_mark()
self.reader.forward()
- end_marker = self.reader.get_marker()
- self.tokens.append(ValueToken(start_marker, end_marker))
+ end_mark = self.reader.get_mark()
+ self.tokens.append(ValueToken(start_mark, end_mark))
def fetch_alias(self):
@@ -773,24 +793,24 @@ class Scanner:
def scan_directive(self):
# See the specification for details.
- start_marker = self.reader.get_marker()
+ start_mark = self.reader.get_mark()
self.reader.forward()
- name = self.scan_directive_name(start_marker)
+ name = self.scan_directive_name(start_mark)
value = None
if name == u'YAML':
- value = self.scan_yaml_directive_value(start_marker)
- end_marker = self.reader.get_marker()
+ value = self.scan_yaml_directive_value(start_mark)
+ end_mark = self.reader.get_mark()
elif name == u'TAG':
- value = self.scan_tag_directive_value(start_marker)
- end_marker = self.reader.get_marker()
+ value = self.scan_tag_directive_value(start_mark)
+ end_mark = self.reader.get_mark()
else:
- end_marker = self.reader.get_marker()
+ end_mark = self.reader.get_mark()
while self.reader.peek() not in u'\0\r\n\x85\u2028\u2029':
self.reader.forward()
- self.scan_directive_ignored_line(start_marker)
- return DirectiveToken(name, value, start_marker, end_marker)
+ self.scan_directive_ignored_line(start_mark)
+ return DirectiveToken(name, value, start_mark, end_mark)
- def scan_directive_name(self, start_marker):
+ def scan_directive_name(self, start_mark):
# See the specification for details.
length = 0
ch = self.reader.peek(length)
@@ -799,44 +819,44 @@ class Scanner:
length += 1
ch = self.reader.peek(length)
if not length:
- raise ScannerError("while scanning a directive", start_marker,
+ raise ScannerError("while scanning a directive", start_mark,
"expected alphabetic or numeric character, but found %r"
- % ch.encode('utf-8'), self.reader.get_marker())
+ % ch.encode('utf-8'), self.reader.get_mark())
value = self.reader.prefix(length)
self.reader.forward(length)
ch = self.reader.peek()
if ch not in u'\0 \r\n\x85\u2028\u2029':
- raise ScannerError("while scanning a directive", start_marker,
+ raise ScannerError("while scanning a directive", start_mark,
"expected alphabetic or numeric character, but found %r"
- % ch.encode('utf-8'), self.reader.get_marker())
+ % ch.encode('utf-8'), self.reader.get_mark())
return value
- def scan_yaml_directive_value(self, start_marker):
+ def scan_yaml_directive_value(self, start_mark):
# See the specification for details.
while self.reader.peek() == u' ':
self.reader.forward()
- major = self.scan_yaml_directive_number(start_marker)
+ major = self.scan_yaml_directive_number(start_mark)
if self.reader.peek() != '.':
- raise ScannerError("while scanning a directive", start_marker,
+ raise ScannerError("while scanning a directive", start_mark,
"expected a digit or '.', but found %r"
% self.reader.peek().encode('utf-8'),
- self.reader.get_marker())
+ self.reader.get_mark())
self.reader.forward()
- minor = self.scan_yaml_directive_number(start_marker)
+ minor = self.scan_yaml_directive_number(start_mark)
if self.reader.peek() not in u'\0 \r\n\x85\u2028\u2029':
- raise ScannerError("while scanning a directive", start_marker,
+ raise ScannerError("while scanning a directive", start_mark,
"expected a digit or ' ', but found %r"
% self.reader.peek().encode('utf-8'),
- self.reader.get_marker())
+ self.reader.get_mark())
return (major, minor)
- def scan_yaml_directive_number(self, start_marker):
+ def scan_yaml_directive_number(self, start_mark):
# See the specification for details.
ch = self.reader.peek()
if not (u'0' <= ch <= '9'):
- raise ScannerError("while scanning a directive", start_marker,
+ raise ScannerError("while scanning a directive", start_mark,
"expected a digit, but found %r" % ch.encode('utf-8'),
- self.reader.get_marker())
+ self.reader.get_mark())
length = 0
while u'0' <= self.reader.peek(length) <= u'9':
length += 1
@@ -844,37 +864,37 @@ class Scanner:
self.reader.forward(length)
return value
- def scan_tag_directive_value(self, start_marker):
+ def scan_tag_directive_value(self, start_mark):
# See the specification for details.
while self.reader.peek() == u' ':
self.reader.forward()
- handle = self.scan_tag_directive_handle(start_marker)
+ handle = self.scan_tag_directive_handle(start_mark)
while self.reader.peek() == u' ':
self.reader.forward()
- prefix = self.scan_tag_directive_prefix(start_marker)
+ prefix = self.scan_tag_directive_prefix(start_mark)
return (handle, prefix)
- def scan_tag_directive_handle(self, start_marker):
+ def scan_tag_directive_handle(self, start_mark):
# See the specification for details.
- value = self.scan_tag_handle('directive', start_marker)
+ value = self.scan_tag_handle('directive', start_mark)
ch = self.reader.peek()
if ch != u' ':
- raise ScannerError("while scanning a directive", start_marker,
+ raise ScannerError("while scanning a directive", start_mark,
"expected ' ', but found %r" % ch.encode('utf-8'),
- self.reader.get_marker())
+ self.reader.get_mark())
return value
- def scan_tag_directive_prefix(self, start_marker):
+ def scan_tag_directive_prefix(self, start_mark):
# See the specification for details.
- value = self.scan_tag_uri('directive', start_marker)
+ value = self.scan_tag_uri('directive', start_mark)
ch = self.reader.peek()
if ch not in u'\0 \r\n\x85\u2028\u2029':
- raise ScannerError("while scanning a directive", start_marker,
+ raise ScannerError("while scanning a directive", start_mark,
"expected ' ', but found %r" % ch.encode('utf-8'),
- self.reader.get_marker())
+ self.reader.get_mark())
return value
- def scan_directive_ignored_line(self, start_marker):
+ def scan_directive_ignored_line(self, start_mark):
# See the specification for details.
while self.reader.peek() == u' ':
self.reader.forward()
@@ -883,9 +903,9 @@ class Scanner:
self.reader.forward()
ch = self.reader.peek()
if ch not in u'\0\r\n\x85\u2028\u2029':
- raise ScannerError("while scanning a directive", start_marker,
+ raise ScannerError("while scanning a directive", start_mark,
"expected a comment or a line break, but found %r"
- % ch.encode('utf-8'), self.reader.get_marker())
+ % ch.encode('utf-8'), self.reader.get_mark())
self.scan_line_break()
def scan_anchor(self, TokenClass):
@@ -897,7 +917,7 @@ class Scanner:
# and
# [ *alias , "value" ]
# Therefore we restrict aliases to numbers and ASCII letters.
- start_marker = self.reader.get_marker()
+ start_mark = self.reader.get_mark()
indicator = self.reader.peek()
if indicator == '*':
name = 'alias'
@@ -911,31 +931,31 @@ class Scanner:
length += 1
ch = self.reader.peek(length)
if not length:
- raise ScannerError("while scanning an %s" % name, start_marker,
+ raise ScannerError("while scanning an %s" % name, start_mark,
"expected alphabetic or numeric character, but found %r"
- % ch.encode('utf-8'), self.reader.get_marker())
+ % ch.encode('utf-8'), self.reader.get_mark())
value = self.reader.prefix(length)
self.reader.forward(length)
ch = self.reader.peek()
if ch not in u'\0 \t\r\n\x85\u2028\u2029?:,]}%@`':
- raise ScannerError("while scanning an %s" % name, start_marker,
+ raise ScannerError("while scanning an %s" % name, start_mark,
"expected alphabetic or numeric character, but found %r"
- % ch.encode('utf-8'), self.reader.get_marker())
- end_marker = self.reader.get_marker()
- return TokenClass(value, start_marker, end_marker)
+ % ch.encode('utf-8'), self.reader.get_mark())
+ end_mark = self.reader.get_mark()
+ return TokenClass(value, start_mark, end_mark)
def scan_tag(self):
# See the specification for details.
- start_marker = self.reader.get_marker()
+ start_mark = self.reader.get_mark()
ch = self.reader.peek(1)
if ch == u'<':
handle = None
self.reader.forward(2)
- suffix = self.scan_tag_uri('tag', start_marker)
+ suffix = self.scan_tag_uri('tag', start_mark)
if self.reader.peek() != u'>':
- raise ScannerError("while parsing a tag", start_marker,
+ raise ScannerError("while parsing a tag", start_mark,
"expected '>', but found %r" % self.reader.peek().encode('utf-8'),
- self.reader.get_marker())
+ self.reader.get_mark())
self.reader.forward()
elif ch in u'\0 \t\r\n\x85\u2028\u2029':
handle = None
@@ -952,41 +972,41 @@ class Scanner:
ch = self.reader.peek(length)
handle = u'!'
if use_handle:
- handle = self.scan_tag_handle('tag', start_marker)
+ handle = self.scan_tag_handle('tag', start_mark)
else:
handle = u'!'
self.reader.forward()
- suffix = self.scan_tag_uri('tag', start_marker)
+ suffix = self.scan_tag_uri('tag', start_mark)
ch = self.reader.peek()
if ch not in u'\0 \r\n\x85\u2028\u2029':
- raise ScannerError("while scanning a tag", start_marker,
+ raise ScannerError("while scanning a tag", start_mark,
"expected ' ', but found %r" % ch.encode('utf-8'),
- self.reader.get_marker())
+ self.reader.get_mark())
value = (handle, suffix)
- end_marker = self.reader.get_marker()
- return TagToken(value, start_marker, end_marker)
+ end_mark = self.reader.get_mark()
+ return TagToken(value, start_mark, end_mark)
def scan_block_scalar(self, folded):
# See the specification for details.
chunks = []
- start_marker = self.reader.get_marker()
+ start_mark = self.reader.get_mark()
# Scan the header.
self.reader.forward()
- chomping, increment = self.scan_block_scalar_indicators(start_marker)
- self.scan_block_scalar_ignored_line(start_marker)
+ chomping, increment = self.scan_block_scalar_indicators(start_mark)
+ self.scan_block_scalar_ignored_line(start_mark)
# Determine the indentation level and go to the first non-empty line.
min_indent = self.indent+1
if min_indent < 1:
min_indent = 1
if increment is None:
- breaks, max_indent, end_marker = self.scan_block_scalar_indentation()
+ breaks, max_indent, end_mark = self.scan_block_scalar_indentation()
indent = max(min_indent, max_indent)
else:
indent = min_indent+increment-1
- breaks, end_marker = self.scan_block_scalar_breaks(indent)
+ breaks, end_mark = self.scan_block_scalar_breaks(indent)
line_break = u''
# Scan the inner part of the block scalar.
@@ -999,7 +1019,7 @@ class Scanner:
chunks.append(self.reader.prefix(length))
self.reader.forward(length)
line_break = self.scan_line_break()
- breaks, end_marker = self.scan_block_scalar_breaks(indent)
+ breaks, end_mark = self.scan_block_scalar_breaks(indent)
if self.reader.column == indent and self.reader.peek() != u'\0':
# Unfortunately, folding rules are ambiguous.
#
@@ -1033,9 +1053,9 @@ class Scanner:
chunks.extend(breaks)
# We are done.
- return ScalarToken(u''.join(chunks), False, start_marker, end_marker)
+ return ScalarToken(u''.join(chunks), False, start_mark, end_mark)
- def scan_block_scalar_indicators(self, start_marker):
+ def scan_block_scalar_indicators(self, start_mark):
# See the specification for details.
chomping = None
increment = None
@@ -1050,16 +1070,16 @@ class Scanner:
if ch in u'0123456789':
increment = int(ch)
if increment == 0:
- raise ScannerError("while scanning a block scalar", start_marker,
+ raise ScannerError("while scanning a block scalar", start_mark,
"expected indentation indicator in the range 1-9, but found 0",
- self.reader.get_marker())
+ self.reader.get_mark())
self.reader.forward()
elif ch in u'0123456789':
increment = int(ch)
if increment == 0:
- raise ScannerError("while scanning a block scalar", start_marker,
+ raise ScannerError("while scanning a block scalar", start_mark,
"expected indentation indicator in the range 1-9, but found 0",
- self.reader.get_marker())
+ self.reader.get_mark())
self.reader.forward()
ch = self.reader.peek()
if ch in u'+-':
@@ -1070,12 +1090,12 @@ class Scanner:
self.reader.forward()
ch = self.reader.peek()
if ch not in u'\0 \r\n\x85\u2028\u2029':
- raise ScannerError("while scanning a block scalar", start_marker,
+ raise ScannerError("while scanning a block scalar", start_mark,
"expected chomping or indentation indicators, but found %r"
- % ch.encode('utf-8'), self.reader.get_marker())
+ % ch.encode('utf-8'), self.reader.get_mark())
return chomping, increment
- def scan_block_scalar_ignored_line(self, start_marker):
+ def scan_block_scalar_ignored_line(self, start_mark):
# See the specification for details.
while self.reader.peek() == u' ':
self.reader.forward()
@@ -1084,55 +1104,57 @@ class Scanner:
self.reader.forward()
ch = self.reader.peek()
if ch not in u'\0\r\n\x85\u2028\u2029':
- raise ScannerError("while scanning a block scalar", start_marker,
+ raise ScannerError("while scanning a block scalar", start_mark,
"expected a comment or a line break, but found %r"
- % ch.encode('utf-8'), self.reader.get_marker())
+ % ch.encode('utf-8'), self.reader.get_mark())
self.scan_line_break()
def scan_block_scalar_indentation(self):
# See the specification for details.
chunks = []
max_indent = 0
- end_marker = self.reader.get_marker()
+ end_mark = self.reader.get_mark()
while self.reader.peek() in u' \r\n\x85\u2028\u2029':
if self.reader.peek() != u' ':
chunks.append(self.scan_line_break())
- end_marker = self.reader.get_marker()
+ end_mark = self.reader.get_mark()
else:
self.reader.forward()
if self.reader.column > max_indent:
max_indent = self.reader.column
- return chunks, max_indent, end_marker
+ return chunks, max_indent, end_mark
def scan_block_scalar_breaks(self, indent):
# See the specification for details.
chunks = []
- end_marker = self.reader.get_marker()
+ end_mark = self.reader.get_mark()
while self.reader.column < indent and self.reader.peek() == u' ':
self.reader.forward()
while self.reader.peek() in u'\r\n\x85\u2028\u2029':
chunks.append(self.scan_line_break())
- end_marker = self.reader.get_marker()
+ end_mark = self.reader.get_mark()
while self.reader.column < indent and self.reader.peek() == u' ':
self.reader.forward()
- return chunks, end_marker
+ return chunks, end_mark
def scan_flow_scalar(self, double):
# See the specification for details.
+ # Note that we loose indentation rules for quoted scalars. Quoted
+ # scalars don't need to adhere indentation because " and ' clearly
+ # mark the beginning and the end of them. Therefore we are less
+ # restrictive then the specification requires. We only need to check
+ # that document separators are not included in scalars.
chunks = []
- start_marker = self.reader.get_marker()
- indent = self.indent+1
- if indent == 0:
- indent = 1
+ start_mark = self.reader.get_mark()
quote = self.reader.peek()
self.reader.forward()
- chunks.extend(self.scan_flow_scalar_non_spaces(double, indent, start_marker))
+ chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark))
while self.reader.peek() != quote:
- chunks.extend(self.scan_flow_scalar_spaces(double, indent, start_marker))
- chunks.extend(self.scan_flow_scalar_non_spaces(double, indent, start_marker))
+ chunks.extend(self.scan_flow_scalar_spaces(double, start_mark))
+ chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark))
self.reader.forward()
- end_marker = self.reader.get_marker()
- return ScalarToken(u''.join(chunks), False, start_marker, end_marker)
+ end_mark = self.reader.get_mark()
+ return ScalarToken(u''.join(chunks), False, start_mark, end_mark)
ESCAPE_REPLACEMENTS = {
u'0': u'\0',
@@ -1160,7 +1182,7 @@ class Scanner:
u'U': 8,
}
- def scan_flow_scalar_non_spaces(self, double, indent, start_marker):
+ def scan_flow_scalar_non_spaces(self, double, start_mark):
# See the specification for details.
chunks = []
while True:
@@ -1188,22 +1210,22 @@ class Scanner:
self.reader.forward()
for k in range(length):
if self.reader.peek(k) not in u'0123456789ABCDEFabcdef':
- raise ScannerError("while scanning a double-quoted scalar", start_marker,
+ raise ScannerError("while scanning a double-quoted scalar", start_mark,
"expected escape sequence of %d hexdecimal numbers, but found %r" %
- (length, self.reader.peek(k).encode('utf-8')), self.reader.get_marker())
+ (length, self.reader.peek(k).encode('utf-8')), self.reader.get_mark())
code = int(self.reader.prefix(length), 16)
chunks.append(unichr(code))
self.reader.forward(length)
elif ch in u'\r\n\x85\u2028\u2029':
self.scan_line_break()
- chunks.extend(self.scan_flow_scalar_breaks(double, indent, start_marker))
+ chunks.extend(self.scan_flow_scalar_breaks(double, start_mark))
else:
- raise ScannerError("while scanning a double-quoted scalar", start_marker,
- "found unknown escape character %r" % ch.encode('utf-8'), self.reader.get_marker())
+ raise ScannerError("while scanning a double-quoted scalar", start_mark,
+ "found unknown escape character %r" % ch.encode('utf-8'), self.reader.get_mark())
else:
return chunks
- def scan_flow_scalar_spaces(self, double, indent, start_marker):
+ def scan_flow_scalar_spaces(self, double, start_mark):
# See the specification for details.
chunks = []
length = 0
@@ -1213,11 +1235,11 @@ class Scanner:
self.reader.forward(length)
ch = self.reader.peek()
if ch == u'\0':
- raise ScannerError("while scanning a quoted scalar", start_marker,
- "found unexpected end of stream", self.reader.get_marker())
+ raise ScannerError("while scanning a quoted scalar", start_mark,
+ "found unexpected end of stream", self.reader.get_mark())
elif ch in u'\r\n\x85\u2028\u2029':
line_break = self.scan_line_break()
- breaks = self.scan_flow_scalar_breaks(double, indent, start_marker)
+ breaks = self.scan_flow_scalar_breaks(double, start_mark)
if line_break != u'\n':
chunks.append(line_break)
elif not breaks:
@@ -1227,21 +1249,17 @@ class Scanner:
chunks.append(whitespaces)
return chunks
- def scan_flow_scalar_breaks(self, double, indent, start_marker):
+ def scan_flow_scalar_breaks(self, double, start_mark):
# See the specification for details.
chunks = []
while True:
- while self.reader.column < indent and self.reader.peek() == u' ':
- self.reader.forward()
- if self.reader.column < indent \
- and self.reader.peek() not in u'\0\r\n\x85\u2028\u2029':
- s = 's'
- if indent == 1:
- s = ''
- raise ScannerError("while scanning a quoted scalar", start_marker,
- "expected %d space%s indentation, but found %r"
- % (indent, s, self.reader.peek().encode('utf-8')),
- self.reader.get_marker())
+ # Instead of checking indentation, we check for document
+ # separators.
+ prefix = self.reader.prefix(3)
+ if (prefix == u'---' or prefix == u'...') \
+ and self.reader.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
+ raise ScannerError("while scanning a quoted scalar", start_mark,
+ "found unexpected document separator", self.reader.get_mark())
while self.reader.peek() in u' \t':
self.reader.forward()
if self.reader.peek() in u'\r\n\x85\u2028\u2029':
@@ -1252,14 +1270,17 @@ class Scanner:
def scan_plain(self):
# See the specification for details.
# We add an additional restriction for the flow context:
- # plain scalars in the flow context cannot contain ':' and '?'.
+ # plain scalars in the flow context cannot contain ',', ':' and '?'.
# We also keep track of the `allow_simple_key` flag here.
+ # Indentation rules are loosed for the flow context.
chunks = []
- start_marker = self.reader.get_marker()
- end_marker = start_marker
+ start_mark = self.reader.get_mark()
+ end_mark = start_mark
indent = self.indent+1
- if indent == 0:
- indent = 1
+ # We allow zero indentation for scalars, but then we need to check for
+ # document separators at the beginning of the line.
+ #if indent == 0:
+ # indent = 1
spaces = []
while True:
length = 0
@@ -1279,14 +1300,14 @@ class Scanner:
chunks.extend(spaces)
chunks.append(self.reader.prefix(length))
self.reader.forward(length)
- end_marker = self.reader.get_marker()
- spaces = self.scan_plain_spaces(indent)
+ end_mark = self.reader.get_mark()
+ spaces = self.scan_plain_spaces(indent, start_mark)
if not spaces or self.reader.peek() == u'#' \
- or self.reader.column < indent:
+ or (not self.flow_level and self.reader.column < indent):
break
- return ScalarToken(u''.join(chunks), True, start_marker, end_marker)
+ return ScalarToken(u''.join(chunks), True, start_mark, end_mark)
- def scan_plain_spaces(self, indent):
+ def scan_plain_spaces(self, indent, start_mark):
# See the specification for details.
# The specification is really confusing about tabs in plain scalars.
# We just forbid them completely. Do not use tabs in YAML!
@@ -1300,12 +1321,20 @@ class Scanner:
if ch in u'\r\n\x85\u2028\u2029':
line_break = self.scan_line_break()
self.allow_simple_key = True
+ prefix = self.reader.prefix(3)
+ if (prefix == u'---' or prefix == u'...') \
+ and self.reader.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
+ return
breaks = []
while self.reader.peek() in u' \r\n\x85\u2028\u2029':
if self.reader.peek() == ' ':
self.reader.forward()
else:
breaks.append(self.scan_line_break())
+ prefix = self.reader.prefix(3)
+ if (prefix == u'---' or prefix == u'...') \
+ and self.reader.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
+ return
if line_break != u'\n':
chunks.append(line_break)
elif not breaks:
@@ -1315,15 +1344,15 @@ class Scanner:
chunks.append(whitespaces)
return chunks
- def scan_tag_handle(self, name, start_marker):
+ def scan_tag_handle(self, name, start_mark):
# See the specification for details.
# For some strange reasons, the specification does not allow '_' in
# tag handles. I have allowed it anyway.
ch = self.reader.peek()
if ch != u'!':
- raise ScannerError("while scanning a %s" % name, start_marker,
+ raise ScannerError("while scanning a %s" % name, start_mark,
"expected '!', but found %r" % ch.encode('utf-8'),
- self.reader.get_marker())
+ self.reader.get_mark())
length = 1
ch = self.reader.peek(length)
if ch != u' ':
@@ -1333,15 +1362,15 @@ class Scanner:
ch = self.reader.peek(length)
if ch != u'!':
self.reader.forward(length)
- raise ScannerError("while scanning a %s" % name, start_marker,
+ raise ScannerError("while scanning a %s" % name, start_mark,
"expected '!', but found %r" % ch.encode('utf-8'),
- self.reader.get_marker())
+ self.reader.get_mark())
length += 1
value = self.reader.prefix(length)
self.reader.forward(length)
return value
- def scan_tag_uri(self, name, start_marker):
+ def scan_tag_uri(self, name, start_mark):
# See the specification for details.
# Note: we do not check if URI is well-formed.
chunks = []
@@ -1353,7 +1382,7 @@ class Scanner:
chunks.append(self.reader.prefix(length))
self.reader.forward(length)
length = 0
- chunks.append(self.scan_uri_escapes(name, start_marker))
+ chunks.append(self.scan_uri_escapes(name, start_mark))
else:
length += 1
ch = self.reader.peek(length)
@@ -1362,28 +1391,28 @@ class Scanner:
self.reader.forward(length)
length = 0
if not chunks:
- raise ScannerError("while parsing a %s" % name, start_marker,
+ raise ScannerError("while parsing a %s" % name, start_mark,
"expected URI, but found %r" % ch.encode('utf-8'),
- self.reader.get_marker())
+ self.reader.get_mark())
return u''.join(chunks)
- def scan_uri_escapes(self, name, start_marker):
+ def scan_uri_escapes(self, name, start_mark):
# See the specification for details.
bytes = []
- marker = self.reader.get_marker()
+ mark = self.reader.get_mark()
while self.reader.peek() == u'%':
self.reader.forward()
for k in range(2):
if self.reader.peek(k) not in u'0123456789ABCDEFabcdef':
- raise ScannerError("while scanning a %s" % name, start_marker,
+ raise ScannerError("while scanning a %s" % name, start_mark,
"expected URI escape sequence of 2 hexdecimal numbers, but found %r" %
- (self.reader.peek(k).encode('utf-8')), self.reader.get_marker())
+ (self.reader.peek(k).encode('utf-8')), self.reader.get_mark())
bytes.append(chr(int(self.reader.prefix(2), 16)))
self.reader.forward(2)
try:
value = unicode(''.join(bytes), 'utf-8')
except UnicodeDecodeError, exc:
- raise ScannerError("while scanning a %s" % name, start_marker, str(exc), marker)
+ raise ScannerError("while scanning a %s" % name, start_mark, str(exc), mark)
return value
def scan_line_break(self):
diff --git a/lib/yaml/tokens.py b/lib/yaml/tokens.py
index 863bb44..59b36af 100644
--- a/lib/yaml/tokens.py
+++ b/lib/yaml/tokens.py
@@ -1,11 +1,11 @@
class Token:
- def __init__(self, start_marker, end_marker):
- self.start_marker = start_marker
- self.end_marker = end_marker
+ def __init__(self, start_mark, end_mark):
+ self.start_mark = start_mark
+ self.end_mark = end_mark
def __repr__(self):
attributes = [key for key in self.__dict__
- if not key.endswith('_marker')]
+ if not key.endswith('_mark')]
attributes.sort()
arguments = ', '.join(['%s=%r' % (key, getattr(self, key))
for key in attributes])
@@ -16,11 +16,11 @@ class Token:
class DirectiveToken(Token):
id = '<directive>'
- def __init__(self, name, value, start_marker, end_marker):
+ def __init__(self, name, value, start_mark, end_mark):
self.name = name
self.value = value
- self.start_marker = start_marker
- self.end_marker = end_marker
+ self.start_mark = start_mark
+ self.end_mark = end_mark
class DocumentStartToken(Token):
id = '<document start>'
@@ -28,6 +28,9 @@ class DocumentStartToken(Token):
class DocumentEndToken(Token):
id = '<document end>'
+class StreamStartToken(Token):
+ id = '<stream start>'
+
class StreamEndToken(Token):
id = '<stream end>'
@@ -66,30 +69,30 @@ class FlowEntryToken(Token):
class AliasToken(Token):
id = '<alias>'
- def __init__(self, value, start_marker, end_marker):
+ def __init__(self, value, start_mark, end_mark):
self.value = value
- self.start_marker = start_marker
- self.end_marker = end_marker
+ self.start_mark = start_mark
+ self.end_mark = end_mark
class AnchorToken(Token):
id = '<anchor>'
- def __init__(self, value, start_marker, end_marker):
+ def __init__(self, value, start_mark, end_mark):
self.value = value
- self.start_marker = start_marker
- self.end_marker = end_marker
+ self.start_mark = start_mark
+ self.end_mark = end_mark
class TagToken(Token):
id = '<tag>'
- def __init__(self, value, start_marker, end_marker):
+ def __init__(self, value, start_mark, end_mark):
self.value = value
- self.start_marker = start_marker
- self.end_marker = end_marker
+ self.start_mark = start_mark
+ self.end_mark = end_mark
class ScalarToken(Token):
id = '<scalar>'
- def __init__(self, value, plain, start_marker, end_marker):
+ def __init__(self, value, plain, start_mark, end_mark):
self.value = value
self.plain = plain
- self.start_marker = start_marker
- self.end_marker = end_marker
+ self.start_mark = start_mark
+ self.end_mark = end_mark
diff --git a/tests/data/document-separator-in-quoted-scalar.error-message b/tests/data/document-separator-in-quoted-scalar.error-message
new file mode 100644
index 0000000..9eeb0d6
--- /dev/null
+++ b/tests/data/document-separator-in-quoted-scalar.error-message
@@ -0,0 +1,11 @@
+---
+"this --- is correct"
+---
+"this
+...is also
+correct"
+---
+"a quoted scalar
+cannot contain
+---
+document separators"
diff --git a/tests/data/invalid-indentation-for-quoted-scalar.error-message b/tests/data/invalid-indentation-for-quoted-scalar.error-message
deleted file mode 100644
index b885db3..0000000
--- a/tests/data/invalid-indentation-for-quoted-scalar.error-message
+++ /dev/null
@@ -1,2 +0,0 @@
-test: "foo
-bar"
diff --git a/tests/data/sloppy-indentation.canonical b/tests/data/sloppy-indentation.canonical
new file mode 100644
index 0000000..0d312cc
--- /dev/null
+++ b/tests/data/sloppy-indentation.canonical
@@ -0,0 +1,18 @@
+%YAML 1.1
+---
+!!map {
+ ? !!str "in the block context"
+ : !!map {
+ ? !!str "indentation should be kept"
+ : !!map {
+ ? !!str "but in the flow context"
+ : !!seq [ !!str "it may be violated" ]
+ }
+ }
+}
+--- !!str
+"the parser does not require scalars to be indented with at least one space"
+--- !!str
+"the parser does not require scalars to be indented with at least one space"
+--- !!map
+{ ? !!str "foo": { ? !!str "bar" : "quoted scalars may not adhere indentation" } }
diff --git a/tests/data/sloppy-indentation.data b/tests/data/sloppy-indentation.data
new file mode 100644
index 0000000..2eb4f5a
--- /dev/null
+++ b/tests/data/sloppy-indentation.data
@@ -0,0 +1,17 @@
+---
+in the block context:
+ indentation should be kept: {
+ but in the flow context: [
+it may be violated]
+}
+---
+the parser does not require scalars
+to be indented with at least one space
+...
+---
+"the parser does not require scalars
+to be indented with at least one space"
+---
+foo:
+ bar: 'quoted scalars
+may not adhere indentation'
diff --git a/tests/data/test_marker.markers b/tests/data/test_mark.marks
index 7b08ee4..7b08ee4 100644
--- a/tests/data/test_marker.markers
+++ b/tests/data/test_mark.marks
diff --git a/tests/test_appliance.py b/tests/test_appliance.py
index 12239eb..eb54faa 100644
--- a/tests/test_appliance.py
+++ b/tests/test_appliance.py
@@ -47,6 +47,7 @@ class CanonicalScanner:
def scan(self):
#print self.data[self.index:]
tokens = []
+ tokens.append(StreamStartToken(None, None))
while True:
self.find_token()
ch = self.data[self.index]
@@ -206,13 +207,16 @@ class CanonicalParser:
self.scanner = CanonicalScanner(data)
self.events = []
- # stream: document* END
+ # stream: STREAM-START document* STREAM-END
def parse_stream(self):
+ self.consume_token(StreamStartToken)
+ self.events.append(StreamStartEvent(None, None))
while not self.test_token(StreamEndToken):
if self.test_token(DirectiveToken, DocumentStartToken):
self.parse_document()
else:
raise Error("document is expected, got "+repr(self.tokens[self.index]))
+ self.consume_token(StreamEndToken)
self.events.append(StreamEndEvent(None, None))
# document: DIRECTIVE? DOCUMENT-START node
@@ -221,7 +225,9 @@ class CanonicalParser:
if self.test_token(DirectiveToken):
self.consume_token(DirectiveToken)
self.consume_token(DocumentStartToken)
+ self.events.append(DocumentStartEvent(None, None))
self.parse_node()
+ self.events.append(DocumentEndEvent(None, None))
# node: ALIAS | ANCHOR? TAG? (SCALAR|sequence|mapping)
def parse_node(self):
diff --git a/tests/test_marker.py b/tests/test_mark.py
index 9ea4474..4fa665e 100644
--- a/tests/test_marker.py
+++ b/tests/test_mark.py
@@ -1,12 +1,12 @@
import test_appliance
-from yaml.reader import Marker
+from yaml.reader import Mark
-class TestMarker(test_appliance.TestAppliance):
+class TestMark(test_appliance.TestAppliance):
- def _testMarkers(self, test_name, markers_filename):
- inputs = file(markers_filename, 'rb').read().split('---\n')[1:]
+ def _testMarks(self, test_name, marks_filename):
+ inputs = file(marks_filename, 'rb').read().split('---\n')[1:]
for input in inputs:
index = 0
line = 0
@@ -18,8 +18,8 @@ class TestMarker(test_appliance.TestAppliance):
else:
column += 1
index += 1
- marker = Marker(test_name, line, column, unicode(input), index)
- snippet = marker.get_snippet(indent=2, max_length=79)
+ mark = Mark(test_name, index, line, column, unicode(input), index)
+ snippet = mark.get_snippet(indent=2, max_length=79)
#print "INPUT:"
#print input
#print "SNIPPET:"
@@ -30,5 +30,5 @@ class TestMarker(test_appliance.TestAppliance):
self.failUnless(len(data) < 82)
self.failUnlessEqual(data[len(pointer)-1], '*')
-TestMarker.add_tests('testMarkers', '.markers')
+TestMark.add_tests('testMarks', '.marks')
diff --git a/tests/test_structure.py b/tests/test_structure.py
index 985d2c3..ca3cb60 100644
--- a/tests/test_structure.py
+++ b/tests/test_structure.py
@@ -12,7 +12,10 @@ class TestStructure(test_appliance.TestAppliance):
parser = Parser(Scanner(Reader(file(data_filename, 'rb'))))
node1 = []
while not parser.check(StreamEndEvent):
- node1.append(self._convert(parser))
+ if not parser.check(StreamStartEvent, DocumentStartEvent, DocumentEndEvent):
+ node1.append(self._convert(parser))
+ else:
+ parser.get()
parser.get()
if len(node1) == 1:
node1 = node1[0]
diff --git a/tests/test_tokens.py b/tests/test_tokens.py
index 2ccc305..38026d5 100644
--- a/tests/test_tokens.py
+++ b/tests/test_tokens.py
@@ -54,7 +54,7 @@ class TestTokens(test_appliance.TestAppliance):
scanner = Scanner(Reader(file(data_filename, 'rb')))
tokens1 = []
for token in scanner:
- if not isinstance(token, StreamEndToken):
+ if not isinstance(token, (StreamStartToken, StreamEndToken)):
tokens1.append(token)
tokens1 = [self.replaces[t.__class__] for t in tokens1]
self.failUnlessEqual(tokens1, tokens2)
@@ -77,7 +77,7 @@ class TestScanner(test_appliance.TestAppliance):
scanner = Scanner(Reader(file(filename, 'rb')))
tokens = []
for token in scanner:
- if not isinstance(token, StreamEndToken):
+ if not isinstance(token, (StreamStartToken, StreamEndToken)):
tokens.append(token.__class__.__name__)
except:
print
diff --git a/tests/test_yaml.py b/tests/test_yaml.py
index cfd4e79..bb5a9f1 100644
--- a/tests/test_yaml.py
+++ b/tests/test_yaml.py
@@ -1,7 +1,7 @@
import unittest
-from test_marker import *
+from test_mark import *
from test_reader import *
from test_canonical import *
from test_tokens import *