From 04cc6bab4adff87f260cc7611df36af5b5804d07 Mon Sep 17 00:00:00 2001 From: xi Date: Mon, 20 Mar 2006 19:25:39 +0000 Subject: Add a branch for working on Emitter. git-svn-id: http://svn.pyyaml.org/pyyaml/branches/working-on-emitter@121 18f92427-320e-0410-9341-c67f048884a3 --- examples/yaml-hl/yaml_hl.py | 73 ++++ examples/yaml-hl/yaml_hl_ascii.cfg | 43 +++ examples/yaml-hl/yaml_hl_html.cfg | 45 +++ lib/yaml/composer.py | 32 +- lib/yaml/constructor.py | 62 ++-- lib/yaml/error.py | 39 +- lib/yaml/events.py | 35 +- lib/yaml/nodes.py | 6 +- lib/yaml/parser.py | 159 ++++---- lib/yaml/reader.py | 11 +- lib/yaml/scanner.py | 407 +++++++++++---------- lib/yaml/tokens.py | 41 ++- ...cument-separator-in-quoted-scalar.error-message | 11 + ...lid-indentation-for-quoted-scalar.error-message | 2 - tests/data/sloppy-indentation.canonical | 18 + tests/data/sloppy-indentation.data | 17 + tests/data/test_mark.marks | 38 ++ tests/data/test_marker.markers | 38 -- tests/test_appliance.py | 8 +- tests/test_mark.py | 34 ++ tests/test_marker.py | 34 -- tests/test_structure.py | 5 +- tests/test_tokens.py | 4 +- tests/test_yaml.py | 2 +- 24 files changed, 727 insertions(+), 437 deletions(-) create mode 100755 examples/yaml-hl/yaml_hl.py create mode 100644 examples/yaml-hl/yaml_hl_ascii.cfg create mode 100644 examples/yaml-hl/yaml_hl_html.cfg create mode 100644 tests/data/document-separator-in-quoted-scalar.error-message delete mode 100644 tests/data/invalid-indentation-for-quoted-scalar.error-message create mode 100644 tests/data/sloppy-indentation.canonical create mode 100644 tests/data/sloppy-indentation.data create mode 100644 tests/data/test_mark.marks delete mode 100644 tests/data/test_marker.markers create mode 100644 tests/test_mark.py delete mode 100644 tests/test_marker.py diff --git a/examples/yaml-hl/yaml_hl.py b/examples/yaml-hl/yaml_hl.py new file mode 100755 index 0000000..dd81b3f --- /dev/null +++ b/examples/yaml-hl/yaml_hl.py @@ -0,0 +1,73 @@ +#!/usr/bin/python + +import yaml, codecs, sys, optparse + +class YAMLHighlight: + + def __init__(self, config): + parameters = yaml.load_document(config) + self.replaces = parameters['replaces'] + self.substitutions = {} + for domain, items in [('Token', parameters['tokens']), + ('Event', parameters['events'])]: + for code in items: + name = ''.join([part.capitalize() for part in code.split('-')]+[domain]) + cls = getattr(yaml, name) + value = items[code] + if value: + if 'start' in value: + self.substitutions[cls, -1] = value['start'] + if 'end' in value: + self.substitutions[cls, +1] = value['end'] + + def highlight(self, input): + if isinstance(input, str): + if input.startswith(codecs.BOM_UTF16_LE): + input = unicode(input, 'utf-16-le') + elif input.startswith(codecs.BOM_UTF16_BE): + input = unicode(input, 'utf-16-be') + else: + input = unicode(input, 'utf-8') + tokens = yaml.parse(input, Parser=iter) + events = yaml.parse(input) + markers = [] + number = 0 + for token in tokens: + number += 1 + if token.start_mark.index != token.end_mark.index: + cls = token.__class__ + if (cls, -1) in self.substitutions: + markers.append([token.start_mark.index, +2, number, self.substitutions[cls, -1]]) + if (cls, +1) in self.substitutions: + markers.append([token.end_mark.index, -2, number, self.substitutions[cls, +1]]) + number = 0 + for event in events: + number += 1 + cls = event.__class__ + if (cls, -1) in self.substitutions: + markers.append([event.start_mark.index, +1, number, self.substitutions[cls, -1]]) + if (cls, +1) in self.substitutions: + markers.append([event.end_mark.index, -1, number, self.substitutions[cls, +1]]) + markers.sort() + markers.reverse() + chunks = [] + position = len(input) + for index, weight1, weight2, substitution in markers: + if index < position: + chunk = input[index:position] + for substring, replacement in self.replaces: + chunk = chunk.replace(substring, replacement) + chunks.append(chunk) + position = index + chunks.append(substitution) + chunks.reverse() + result = u''.join(chunks) + return result.encode('utf-8') + +if __name__ == '__main__': + parser = optparse.OptionParser() + parser.add_option('-c', '--config', dest='config', default='yaml_hl_ascii.cfg', metavar='CONFIG') + (options, args) = parser.parse_args() + hl = YAMLHighlight(file(options.config)) + sys.stdout.write(hl.highlight(sys.stdin.read())) + diff --git a/examples/yaml-hl/yaml_hl_ascii.cfg b/examples/yaml-hl/yaml_hl_ascii.cfg new file mode 100644 index 0000000..851d0f8 --- /dev/null +++ b/examples/yaml-hl/yaml_hl_ascii.cfg @@ -0,0 +1,43 @@ +%YAML 1.1 +--- +tokens: + stream-start: + stream-end: + directive: { start: "\e[35m", end: "\e[0;1;30;40m" } + document-start: { start: "\e[35m", end: "\e[0;1;30;40m" } + document-end: { start: "\e[35m", end: "\e[0;1;30;40m" } + block-sequence-start: + block-mapping-start: + block-end: + flow-sequence-start: { start: "\e[33m", end: "\e[0;1;30;40m" } + flow-mapping-start: { start: "\e[33m", end: "\e[0;1;30;40m" } + flow-sequence-end: { start: "\e[33m", end: "\e[0;1;30;40m" } + flow-mapping-end: { start: "\e[33m", end: "\e[0;1;30;40m" } + key: { start: "\e[33m", end: "\e[0;1;30;40m" } + value: { start: "\e[33m", end: "\e[0;1;30;40m" } + block-entry: { start: "\e[33m", end: "\e[0;1;30;40m" } + flow-entry: { start: "\e[33m", end: "\e[0;1;30;40m" } + alias: { start: "\e[32m", end: "\e[0;1;30;40m" } + anchor: { start: "\e[32m", end: "\e[0;1;30;40m" } + tag: { start: "\e[32m", end: "\e[0;1;30;40m" } + scalar: { start: "\e[36m", end: "\e[0;1;30;40m" } + +events: + stream-start: { start: "\e[0;1;30;40m" } + stream-end: { end: "\e[0m" } + document-start: + document-end: + sequence: + mapping: + collection-end: + scalar: + +replaces: !!pairs + - "\r\n": "\n" + - "\r": "\n" + - "\n": "\n" + - "\x85": "\n" + - "\u2028": "\n" + - "\u2029": "\n" + +# vim: ft=yaml diff --git a/examples/yaml-hl/yaml_hl_html.cfg b/examples/yaml-hl/yaml_hl_html.cfg new file mode 100644 index 0000000..903810e --- /dev/null +++ b/examples/yaml-hl/yaml_hl_html.cfg @@ -0,0 +1,45 @@ + +tokens: + stream-start: + stream-end: + directive: { start: , end: } + document-start: { start: , end: } + document-end: { start: , end: } + block-sequence-start: + block-mapping-start: + block-end: + flow-sequence-start: { start: , end: } + flow-mapping-start: { start: , end: } + flow-sequence-end: { start: , end: } + flow-mapping-end: { start: , end: } + key: { start: , end: } + value: { start: , end: } + block-entry: { start: , end: } + flow-entry: { start: , end: } + alias: { start: , end: } + anchor: { start: , end: } + tag: { start: , end: } + scalar: { start: , end: } + +events: + stream-start: { start:
 }
+    stream-end:     { end: 
} + document-start: { start: } + document-end: { end: } + sequence: { start: } + mapping: { start: } + collection-end: { end: } + scalar: { start: , end: } + +replaces: !!pairs + - "\r\n": "\n" + - "\r": "\n" + - "\n": "\n" + - "\x85": "\n" + - "\u2028": "\n" + - "\u2029": "\n" + - "<": "<" + - ">": ">" + - "&": "&" + +# vim: ft=yaml diff --git a/lib/yaml/composer.py b/lib/yaml/composer.py index 0f00062..9b8c1e2 100644 --- a/lib/yaml/composer.py +++ b/lib/yaml/composer.py @@ -15,6 +15,9 @@ class Composer: self.all_anchors = {} self.complete_anchors = {} + # Drop the STREAM-START event. + self.parser.get() + def check(self): # If there are more documents available? return not self.parser.check(StreamEndEvent) @@ -30,7 +33,16 @@ class Composer: yield self.compose_document() def compose_document(self): + + # Drop the DOCUMENT-START event. + self.parser.get() + + # Compose the root node. node = self.compose_node() + + # Drop the DOCUMENT-END event. + self.parser.get() + self.all_anchors = {} self.complete_anchors = {} return node @@ -41,21 +53,21 @@ class Composer: anchor = event.anchor if anchor not in self.all_anchors: raise ComposerError(None, None, "found undefined alias %r" - % anchor.encode('utf-8'), event.start_marker) + % anchor.encode('utf-8'), event.start_mark) if anchor not in self.complete_anchors: collection_event = self.all_anchors[anchor] raise ComposerError("while composing a collection", - collection_event.start_marker, + collection_event.start_mark, "found recursive anchor %r" % anchor.encode('utf-8'), - event.start_marker) + event.start_mark) return self.complete_anchors[anchor] event = self.parser.peek() anchor = event.anchor if anchor is not None: if anchor in self.all_anchors: raise ComposerError("found duplicate anchor %r; first occurence" - % anchor.encode('utf-8'), self.all_anchors[anchor].start_marker, - "second occurence", event.start_marker) + % anchor.encode('utf-8'), self.all_anchors[anchor].start_mark, + "second occurence", event.start_mark) self.all_anchors[anchor] = event if self.parser.check(ScalarEvent): node = self.compose_scalar_node() @@ -70,7 +82,7 @@ class Composer: def compose_scalar_node(self): event = self.parser.get() return ScalarNode(event.tag, event.value, - event.start_marker, event.end_marker) + event.start_mark, event.end_mark) def compose_sequence_node(self): start_event = self.parser.get() @@ -79,7 +91,7 @@ class Composer: value.append(self.compose_node()) end_event = self.parser.get() return SequenceNode(start_event.tag, value, - start_event.start_marker, end_event.end_marker) + start_event.start_mark, end_event.end_mark) def compose_mapping_node(self): start_event = self.parser.get() @@ -89,10 +101,10 @@ class Composer: item_key = self.compose_node() item_value = self.compose_node() if item_key in value: - raise ComposerError("while composing a mapping", start_event.start_marker, - "found duplicate key", key_event.start_marker) + raise ComposerError("while composing a mapping", start_event.start_mark, + "found duplicate key", key_event.start_mark) value[item_key] = item_value end_event = self.parser.get() return MappingNode(start_event.tag, value, - start_event.start_marker, end_event.end_marker) + start_event.start_mark, end_event.end_mark) diff --git a/lib/yaml/constructor.py b/lib/yaml/constructor.py index 1fc3b7d..b3f5a88 100644 --- a/lib/yaml/constructor.py +++ b/lib/yaml/constructor.py @@ -70,28 +70,28 @@ class BaseConstructor: return self.construct_scalar(node.value[key_node]) raise ConstructorError(None, None, "expected a scalar node, but found %s" % node.id, - node.start_marker) + node.start_mark) return node.value def construct_sequence(self, node): if not isinstance(node, SequenceNode): raise ConstructorError(None, None, "expected a sequence node, but found %s" % node.id, - node.start_marker) + node.start_mark) return [self.construct_object(child) for child in node.value] def construct_mapping(self, node): if not isinstance(node, MappingNode): raise ConstructorError(None, None, "expected a mapping node, but found %s" % node.id, - node.start_marker) + node.start_mark) mapping = {} merge = None for key_node in node.value: if key_node.tag == u'tag:yaml.org,2002:merge': if merge is not None: - raise ConstructorError("while constructing a mapping", node.start_marker, - "found duplicate merge key", key_node.start_marker) + raise ConstructorError("while constructing a mapping", node.start_mark, + "found duplicate merge key", key_node.start_mark) value_node = node.value[key_node] if isinstance(value_node, MappingNode): merge = [self.construct_mapping(value_node)] @@ -100,19 +100,19 @@ class BaseConstructor: for subnode in value_node.value: if not isinstance(subnode, MappingNode): raise ConstructorError("while constructing a mapping", - node.start_marker, + node.start_mark, "expected a mapping for merging, but found %s" - % subnode.id, subnode.start_marker) + % subnode.id, subnode.start_mark) merge.append(self.construct_mapping(subnode)) merge.reverse() else: - raise ConstructorError("while constructing a mapping", node.start_marker, + raise ConstructorError("while constructing a mapping", node.start_mark, "expected a mapping or list of mappings for merging, but found %s" - % value_node.id, value_node.start_marker) + % value_node.id, value_node.start_mark) elif key_node.tag == u'tag:yaml.org,2002:value': if '=' in mapping: - raise ConstructorError("while construction a mapping", node.start_marker, - "found duplicate value key", key_node.start_marker) + raise ConstructorError("while construction a mapping", node.start_mark, + "found duplicate value key", key_node.start_mark) value = self.construct_object(node.value[key_node]) mapping['='] = value else: @@ -120,11 +120,11 @@ class BaseConstructor: try: duplicate_key = key in mapping except TypeError, exc: - raise ConstructorError("while constructing a mapping", node.start_marker, - "found unacceptable key (%s)" % exc, key_node.start_marker) + raise ConstructorError("while constructing a mapping", node.start_mark, + "found unacceptable key (%s)" % exc, key_node.start_mark) if duplicate_key: - raise ConstructorError("while constructing a mapping", node.start_marker, - "found duplicate key", key_node.start_marker) + raise ConstructorError("while constructing a mapping", node.start_mark, + "found duplicate key", key_node.start_mark) value = self.construct_object(node.value[key_node]) mapping[key] = value if merge is not None: @@ -138,7 +138,7 @@ class BaseConstructor: if not isinstance(node, MappingNode): raise ConstructorError(None, None, "expected a mapping node, but found %s" % node.id, - node.start_marker) + node.start_mark) pairs = [] for key_node in node.value: key = self.construct_object(key_node) @@ -234,7 +234,7 @@ class Constructor(BaseConstructor): return str(value).decode('base64') except (binascii.Error, UnicodeEncodeError), exc: raise ConstructorError(None, None, - "failed to decode base64 data: %s" % exc, node.start_marker) + "failed to decode base64 data: %s" % exc, node.start_mark) timestamp_regexp = re.compile( ur'''^(?P[0-9][0-9][0-9][0-9]) @@ -271,18 +271,18 @@ class Constructor(BaseConstructor): # Note: we do not check for duplicate keys, because it's too # CPU-expensive. if not isinstance(node, SequenceNode): - raise ConstructorError("while constructing an ordered map", node.start_marker, - "expected a sequence, but found %s" % node.id, node.start_marker) + raise ConstructorError("while constructing an ordered map", node.start_mark, + "expected a sequence, but found %s" % node.id, node.start_mark) omap = [] for subnode in node.value: if not isinstance(subnode, MappingNode): - raise ConstructorError("while constructing an ordered map", node.start_marker, + raise ConstructorError("while constructing an ordered map", node.start_mark, "expected a mapping of length 1, but found %s" % subnode.id, - subnode.start_marker) + subnode.start_mark) if len(subnode.value) != 1: - raise ConstructorError("while constructing an ordered map", node.start_marker, + raise ConstructorError("while constructing an ordered map", node.start_mark, "expected a single mapping item, but found %d items" % len(subnode.value), - subnode.start_marker) + subnode.start_mark) key_node = subnode.value.keys()[0] key = self.construct_object(key_node) value = self.construct_object(subnode.value[key_node]) @@ -292,18 +292,18 @@ class Constructor(BaseConstructor): def construct_yaml_pairs(self, node): # Note: the same code as `construct_yaml_omap`. if not isinstance(node, SequenceNode): - raise ConstructorError("while constructing pairs", node.start_marker, - "expected a sequence, but found %s" % node.id, node.start_marker) + raise ConstructorError("while constructing pairs", node.start_mark, + "expected a sequence, but found %s" % node.id, node.start_mark) pairs = [] for subnode in node.value: if not isinstance(subnode, MappingNode): - raise ConstructorError("while constructing pairs", node.start_marker, + raise ConstructorError("while constructing pairs", node.start_mark, "expected a mapping of length 1, but found %s" % subnode.id, - subnode.start_marker) + subnode.start_mark) if len(subnode.value) != 1: - raise ConstructorError("while constructing pairs", node.start_marker, + raise ConstructorError("while constructing pairs", node.start_mark, "expected a single mapping item, but found %d items" % len(subnode.value), - subnode.start_marker) + subnode.start_mark) key_node = subnode.value.keys()[0] key = self.construct_object(key_node) value = self.construct_object(subnode.value[key_node]) @@ -330,7 +330,7 @@ class Constructor(BaseConstructor): def construct_undefined(self, node): raise ConstructorError(None, None, "could not determine a constructor for the tag %r" % node.tag.encode('utf-8'), - node.start_marker) + node.start_mark) Constructor.add_constructor( u'tag:yaml.org,2002:null', @@ -402,7 +402,7 @@ class YAMLObject(object): def from_yaml(cls, constructor, node): raise ConstructorError(None, None, "found undefined constructor for the tag %r" - % node.tag.encode('utf-8'), node.start_marker) + % node.tag.encode('utf-8'), node.start_mark) from_yaml = classmethod(from_yaml) def to_yaml(self): diff --git a/lib/yaml/error.py b/lib/yaml/error.py index 38f143e..a818210 100644 --- a/lib/yaml/error.py +++ b/lib/yaml/error.py @@ -1,10 +1,11 @@ -__all__ = ['Marker', 'YAMLError', 'MarkedYAMLError'] +__all__ = ['Mark', 'YAMLError', 'MarkedYAMLError'] -class Marker: +class Mark: - def __init__(self, name, line, column, buffer, pointer): + def __init__(self, name, index, line, column, buffer, pointer): self.name = name + self.index = index self.line = line self.column = column self.buffer = buffer @@ -46,33 +47,33 @@ class YAMLError(Exception): class MarkedYAMLError(YAMLError): - def __init__(self, context=None, context_marker=None, - problem=None, problem_marker=None): + def __init__(self, context=None, context_mark=None, + problem=None, problem_mark=None): self.context = context - self.context_marker = context_marker + self.context_mark = context_mark self.problem = problem - self.problem_marker = problem_marker + self.problem_mark = problem_mark def __str__(self): lines = [] - #for (place, marker) in [(self.context, self.context_marker), - # (self.problem, self.problem_marker)]: + #for (place, mark) in [(self.context, self.context_mark), + # (self.problem, self.problem_mark)]: # if place is not None: # lines.append(place) - # if marker is not None: - # lines.append(str(marker)) + # if mark is not None: + # lines.append(str(mark)) if self.context is not None: lines.append(self.context) - if self.context_marker is not None \ - and (self.problem is None or self.problem_marker is None - or self.context_marker.name != self.problem_marker.name - or self.context_marker.line != self.problem_marker.line - or self.context_marker.column != self.problem_marker.column): - lines.append(str(self.context_marker)) + if self.context_mark is not None \ + and (self.problem is None or self.problem_mark is None + or self.context_mark.name != self.problem_mark.name + or self.context_mark.line != self.problem_mark.line + or self.context_mark.column != self.problem_mark.column): + lines.append(str(self.context_mark)) if self.problem is not None: lines.append(self.problem) - if self.problem_marker is not None: - lines.append(str(self.problem_marker)) + if self.problem_mark is not None: + lines.append(str(self.problem_mark)) return '\n'.join(lines) diff --git a/lib/yaml/events.py b/lib/yaml/events.py index d468c53..8837633 100644 --- a/lib/yaml/events.py +++ b/lib/yaml/events.py @@ -1,39 +1,39 @@ class Event: - def __init__(self, start_marker, end_marker): - self.start_marker = start_marker - self.end_marker = end_marker + def __init__(self, start_mark, end_mark): + self.start_mark = start_mark + self.end_mark = end_mark def __repr__(self): attributes = [key for key in self.__dict__ - if not key.endswith('_marker')] + if not key.endswith('_mark')] attributes.sort() arguments = ', '.join(['%s=%r' % (key, getattr(self, key)) for key in attributes]) return '%s(%s)' % (self.__class__.__name__, arguments) class NodeEvent(Event): - def __init__(self, anchor, start_marker, end_marker): + def __init__(self, anchor, start_mark, end_mark): self.anchor = anchor - self.start_marker = start_marker - self.end_marker = end_marker + self.start_mark = start_mark + self.end_mark = end_mark class AliasEvent(NodeEvent): pass class ScalarEvent(NodeEvent): - def __init__(self, anchor, tag, value, start_marker, end_marker): + def __init__(self, anchor, tag, value, start_mark, end_mark): self.anchor = anchor self.tag = tag self.value = value - self.start_marker = start_marker - self.end_marker = end_marker + self.start_mark = start_mark + self.end_mark = end_mark class CollectionEvent(NodeEvent): - def __init__(self, anchor, tag, start_marker, end_marker): + def __init__(self, anchor, tag, start_mark, end_mark): self.anchor = anchor self.tag = tag - self.start_marker = start_marker - self.end_marker = end_marker + self.start_mark = start_mark + self.end_mark = end_mark class SequenceEvent(CollectionEvent): pass @@ -44,6 +44,15 @@ class MappingEvent(CollectionEvent): class CollectionEndEvent(Event): pass +class DocumentStartEvent(Event): + pass + +class DocumentEndEvent(Event): + pass + +class StreamStartEvent(Event): + pass + class StreamEndEvent(Event): pass diff --git a/lib/yaml/nodes.py b/lib/yaml/nodes.py index 377d24c..6c27421 100644 --- a/lib/yaml/nodes.py +++ b/lib/yaml/nodes.py @@ -1,10 +1,10 @@ class Node: - def __init__(self, tag, value, start_marker, end_marker): + def __init__(self, tag, value, start_mark, end_mark): self.tag = tag self.value = value - self.start_marker = start_marker - self.end_marker = end_marker + self.start_mark = start_mark + self.end_mark = end_mark def __repr__(self): value = self.value if isinstance(value, list): diff --git a/lib/yaml/parser.py b/lib/yaml/parser.py index a66dc81..9870699 100644 --- a/lib/yaml/parser.py +++ b/lib/yaml/parser.py @@ -2,7 +2,7 @@ # YAML can be parsed by an LL(1) parser! # # We use the following production rules: -# stream ::= implicit_document? explicit_document* STREAM-END +# stream ::= STREAM-START implicit_document? explicit_document* STREAM-END # explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END? # implicit_document ::= block_node DOCUMENT-END? # block_node ::= ALIAS | properties? block_content @@ -42,7 +42,7 @@ # or line breaks. # FIRST sets: -# stream: FIRST(block_node) + { DIRECTIVE DOCUMENT-START } +# stream: { STREAM-START } # explicit_document: { DIRECTIVE DOCUMENT-START } # implicit_document: FIRST(block_node) # block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START } @@ -126,36 +126,57 @@ class Parser: return self.event_generator def parse_stream(self): - # implicit_document? explicit_document* STREAM-END + # STREAM-START implicit_document? explicit_document* STREAM-END + + # Parse start of stream. + token = self.scanner.get() + yield StreamStartEvent(token.start_mark, token.end_mark) # Parse implicit document. if not self.scanner.check(DirectiveToken, DocumentStartToken, StreamEndToken): self.tag_handles = self.DEFAULT_TAGS + token = self.scanner.peek() + start_mark = end_mark = token.start_mark + yield DocumentStartEvent(start_mark, end_mark) for event in self.parse_block_node(): yield event + token = self.scanner.peek() + start_mark = end_mark = token.start_mark + while self.scanner.check(DocumentEndToken): + token = self.scanner.get() + end_mark = token.end_mark + yield DocumentEndEvent(start_mark, end_mark) # Parse explicit documents. while not self.scanner.check(StreamEndToken): + token = self.scanner.peek() + start_mark = token.start_mark self.process_directives() if not self.scanner.check(DocumentStartToken): raise ParserError(None, None, "expected '', but found %r" % self.scanner.peek().id, - self.scanner.peek().start_marker) + self.scanner.peek().start_mark) token = self.scanner.get() + end_mark = token.end_mark + yield DocumentStartEvent(start_mark, end_mark) if self.scanner.check(DirectiveToken, DocumentStartToken, DocumentEndToken, StreamEndToken): - yield self.process_empty_scalar(token.end_marker) + yield self.process_empty_scalar(token.end_mark) else: for event in self.parse_block_node(): yield event + token = self.scanner.peek() + start_mark = end_mark = token.start_mark while self.scanner.check(DocumentEndToken): - self.scanner.get() + token = self.scanner.get() + end_mark = token.end_mark + yield DocumentEndEvent(start_mark, end_mark) # Parse end of stream. token = self.scanner.get() - yield StreamEndEvent(token.start_marker, token.end_marker) + yield StreamEndEvent(token.start_mark, token.end_mark) def process_directives(self): # DIRECTIVE* @@ -166,19 +187,19 @@ class Parser: if token.name == u'YAML': if self.yaml_version is not None: raise ParserError(None, None, - "found duplicate YAML directive", token.start_marker) + "found duplicate YAML directive", token.start_mark) major, minor = token.value if major != 1: raise ParserError(None, None, "found incompatible YAML document (version 1.* is required)", - token.start_marker) + token.start_mark) self.yaml_version = token.value elif token.name == u'TAG': handle, prefix = token.value if handle in self.tag_handles: raise ParserError(None, None, "duplicate tag handle %r" % handle.encode('utf-8'), - token.start_marker) + token.start_mark) self.tag_handles[handle] = prefix for key in self.DEFAULT_TAGS: if key not in self.tag_handles: @@ -204,34 +225,34 @@ class Parser: # (block_content | indentless_block_sequence) if self.scanner.check(AliasToken): token = self.scanner.get() - yield AliasEvent(token.value, token.start_marker, token.end_marker) + yield AliasEvent(token.value, token.start_mark, token.end_mark) else: anchor = None tag = None - start_marker = end_marker = tag_marker = None + start_mark = end_mark = tag_mark = None if self.scanner.check(AnchorToken): token = self.scanner.get() - start_marker = end_marker = token.start_marker + start_mark = end_mark = token.start_mark anchor = token.value if self.scanner.check(TagToken): token = self.scanner.get() - end_marker = tag_marker = token.start_marker + end_mark = tag_mark = token.start_mark tag = token.value elif self.scanner.check(TagToken): token = self.scanner.get() - start_marker = end_marker = tag_marker = token.start_marker + start_mark = end_mark = tag_mark = token.start_mark tag = token.value if self.scanner.check(AnchorToken): token = self.scanner.get() - end_marker = token.start_marker + end_mark = token.start_mark anchor = token.value if tag is not None: handle, suffix = tag if handle is not None: if handle not in self.tag_handles: - raise ParserError("while parsing a node", start_marker, + raise ParserError("while parsing a node", start_mark, "found undefined tag handle %r" % handle.encode('utf-8'), - tag_marker) + tag_mark) tag = self.tag_handles[handle]+suffix else: tag = suffix @@ -239,35 +260,35 @@ class Parser: if not (self.scanner.check(ScalarToken) and self.scanner.peek().plain): tag = u'!' - if start_marker is None: - start_marker = self.scanner.peek().start_marker + if start_mark is None: + start_mark = self.scanner.peek().start_mark event = None collection_events = None if indentless_sequence and self.scanner.check(BlockEntryToken): - end_marker = self.scanner.peek().end_marker - event = SequenceEvent(anchor, tag, start_marker, end_marker) + end_mark = self.scanner.peek().end_mark + event = SequenceEvent(anchor, tag, start_mark, end_mark) collection_events = self.parse_indentless_sequence() else: if self.scanner.check(ScalarToken): token = self.scanner.get() - end_marker = token.end_marker + end_mark = token.end_mark event = ScalarEvent(anchor, tag, token.value, - start_marker, end_marker) + start_mark, end_mark) elif self.scanner.check(FlowSequenceStartToken): - end_marker = self.scanner.peek().end_marker - event = SequenceEvent(anchor, tag, start_marker, end_marker) + end_mark = self.scanner.peek().end_mark + event = SequenceEvent(anchor, tag, start_mark, end_mark) collection_events = self.parse_flow_sequence() elif self.scanner.check(FlowMappingStartToken): - end_marker = self.scanner.peek().end_marker - event = MappingEvent(anchor, tag, start_marker, end_marker) + end_mark = self.scanner.peek().end_mark + event = MappingEvent(anchor, tag, start_mark, end_mark) collection_events = self.parse_flow_mapping() elif block and self.scanner.check(BlockSequenceStartToken): - end_marker = self.scanner.peek().start_marker - event = SequenceEvent(anchor, tag, start_marker, end_marker) + end_mark = self.scanner.peek().start_mark + event = SequenceEvent(anchor, tag, start_mark, end_mark) collection_events = self.parse_block_sequence() elif block and self.scanner.check(BlockMappingStartToken): - end_marker = self.scanner.peek().start_marker - event = MappingEvent(anchor, tag, start_marker, end_marker) + end_mark = self.scanner.peek().start_mark + event = MappingEvent(anchor, tag, start_mark, end_mark) collection_events = self.parse_block_mapping() else: if block: @@ -275,9 +296,9 @@ class Parser: else: node = 'flow' token = self.scanner.peek() - raise ParserError("while scanning a %s node" % node, start_marker, + raise ParserError("while scanning a %s node" % node, start_mark, "expected the node content, but found %r" % token.id, - token.start_marker) + token.start_mark) yield event if collection_events is not None: for event in collection_events: @@ -286,20 +307,20 @@ class Parser: def parse_block_sequence(self): # BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END token = self.scanner.get() - start_marker = token.start_marker + start_mark = token.start_mark while self.scanner.check(BlockEntryToken): token = self.scanner.get() if not self.scanner.check(BlockEntryToken, BlockEndToken): for event in self.parse_block_node(): yield event else: - yield self.process_empty_scalar(token.end_marker) + yield self.process_empty_scalar(token.end_mark) if not self.scanner.check(BlockEndToken): token = self.scanner.peek() - raise ParserError("while scanning a block collection", start_marker, - "expected , but found %r" % token.id, token.start_marker) + raise ParserError("while scanning a block collection", start_mark, + "expected , but found %r" % token.id, token.start_mark) token = self.scanner.get() - yield CollectionEndEvent(token.start_marker, token.end_marker) + yield CollectionEndEvent(token.start_mark, token.end_mark) def parse_indentless_sequence(self): # (BLOCK-ENTRY block_node?)+ @@ -310,9 +331,9 @@ class Parser: for event in self.parse_block_node(): yield event else: - yield self.process_empty_scalar(token.end_marker) + yield self.process_empty_scalar(token.end_mark) token = self.scanner.peek() - yield CollectionEndEvent(token.start_marker, token.start_marker) + yield CollectionEndEvent(token.start_mark, token.start_mark) def parse_block_mapping(self): # BLOCK-MAPPING_START @@ -320,7 +341,7 @@ class Parser: # (VALUE block_node_or_indentless_sequence?)?)* # BLOCK-END token = self.scanner.get() - start_marker = token.start_marker + start_mark = token.start_mark while self.scanner.check(KeyToken, ValueToken): if self.scanner.check(KeyToken): token = self.scanner.get() @@ -328,23 +349,23 @@ class Parser: for event in self.parse_block_node_or_indentless_sequence(): yield event else: - yield self.process_empty_scalar(token.end_marker) + yield self.process_empty_scalar(token.end_mark) if self.scanner.check(ValueToken): token = self.scanner.get() if not self.scanner.check(KeyToken, ValueToken, BlockEndToken): for event in self.parse_block_node_or_indentless_sequence(): yield event else: - yield self.process_empty_scalar(token.end_marker) + yield self.process_empty_scalar(token.end_mark) else: token = self.scanner.peek() - yield self.process_empty_scalar(token.start_marker) + yield self.process_empty_scalar(token.start_mark) if not self.scanner.check(BlockEndToken): token = self.scanner.peek() - raise ParserError("while scanning a block mapping", start_marker, - "expected , but found %r" % token.id, token.start_marker) + raise ParserError("while scanning a block mapping", start_mark, + "expected , but found %r" % token.id, token.start_mark) token = self.scanner.get() - yield CollectionEndEvent(token.start_marker, token.end_marker) + yield CollectionEndEvent(token.start_mark, token.end_mark) def parse_flow_sequence(self): # flow_sequence ::= FLOW-SEQUENCE-START @@ -358,41 +379,41 @@ class Parser: # For `flow_sequence_entry`, the part `KEY flow_node? (VALUE flow_node?)?` # generate an inline mapping (set syntax). token = self.scanner.get() - start_marker = token.start_marker + start_mark = token.start_mark while not self.scanner.check(FlowSequenceEndToken): if self.scanner.check(KeyToken): token = self.scanner.get() yield MappingEvent(None, u'!', - token.start_marker, token.end_marker) + token.start_mark, token.end_mark) if not self.scanner.check(ValueToken, FlowEntryToken, FlowSequenceEndToken): for event in self.parse_flow_node(): yield event else: - yield self.process_empty_scalar(token.end_marker) + yield self.process_empty_scalar(token.end_mark) if self.scanner.check(ValueToken): token = self.scanner.get() if not self.scanner.check(FlowEntryToken, FlowSequenceEndToken): for event in self.parse_flow_node(): yield event else: - yield self.process_empty_scalar(token.end_marker) + yield self.process_empty_scalar(token.end_mark) else: token = self.scanner.peek() - yield self.process_empty_scalar(token.start_marker) + yield self.process_empty_scalar(token.start_mark) token = self.scanner.peek() - yield CollectionEndEvent(token.start_marker, token.start_marker) + yield CollectionEndEvent(token.start_mark, token.start_mark) else: for event in self.parse_flow_node(): yield event if not self.scanner.check(FlowEntryToken, FlowSequenceEndToken): token = self.scanner.peek() - raise ParserError("while scanning a flow sequence", start_marker, - "expected ',' or ']', but got %r" % token.id, token.start_marker) + raise ParserError("while scanning a flow sequence", start_mark, + "expected ',' or ']', but got %r" % token.id, token.start_mark) if self.scanner.check(FlowEntryToken): self.scanner.get() token = self.scanner.get() - yield CollectionEndEvent(token.start_marker, token.end_marker) + yield CollectionEndEvent(token.start_mark, token.end_mark) def parse_flow_mapping(self): # flow_mapping ::= FLOW-MAPPING-START @@ -401,7 +422,7 @@ class Parser: # FLOW-MAPPING-END # flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? token = self.scanner.get() - start_marker = token.start_marker + start_mark = token.start_mark while not self.scanner.check(FlowMappingEndToken): if self.scanner.check(KeyToken): token = self.scanner.get() @@ -410,34 +431,34 @@ class Parser: for event in self.parse_flow_node(): yield event else: - yield self.process_empty_scalar(token.end_marker) + yield self.process_empty_scalar(token.end_mark) if self.scanner.check(ValueToken): token = self.scanner.get() if not self.scanner.check(FlowEntryToken, FlowMappingEndToken): for event in self.parse_flow_node(): yield event else: - yield self.process_empty_scalar(token.end_marker) + yield self.process_empty_scalar(token.end_mark) else: token = self.scanner.peek() - yield self.process_empty_scalar(token.start_marker) + yield self.process_empty_scalar(token.start_mark) else: for event in self.parse_flow_node(): yield event - yield self.process_empty_scalar(self.scanner.peek().start_marker) + yield self.process_empty_scalar(self.scanner.peek().start_mark) if not self.scanner.check(FlowEntryToken, FlowMappingEndToken): token = self.scanner.peek() - raise ParserError("while scanning a flow mapping", start_marker, - "expected ',' or '}', but got %r" % token.id, token.start_marker) + raise ParserError("while scanning a flow mapping", start_mark, + "expected ',' or '}', but got %r" % token.id, token.start_mark) if self.scanner.check(FlowEntryToken): self.scanner.get() if not self.scanner.check(FlowMappingEndToken): token = self.scanner.peek() - raise ParserError("while scanning a flow mapping", start_marker, - "expected '}', but found %r" % token.id, token.start_marker) + raise ParserError("while scanning a flow mapping", start_mark, + "expected '}', but found %r" % token.id, token.start_mark) token = self.scanner.get() - yield CollectionEndEvent(token.start_marker, token.end_marker) + yield CollectionEndEvent(token.start_mark, token.end_mark) - def process_empty_scalar(self, marker): - return ScalarEvent(None, None, u'', marker, marker) + def process_empty_scalar(self, mark): + return ScalarEvent(None, None, u'', mark, mark) diff --git a/lib/yaml/reader.py b/lib/yaml/reader.py index a4d0299..9778943 100644 --- a/lib/yaml/reader.py +++ b/lib/yaml/reader.py @@ -3,7 +3,7 @@ # # We define two classes here. # -# Marker(source, line, column) +# Mark(source, line, column) # It's just a record and its only use is producing nice error messages. # Parser does not use it for any other purposes. # @@ -17,7 +17,7 @@ __all__ = ['Reader', 'ReaderError'] -from error import YAMLError, Marker +from error import YAMLError, Mark import codecs, re @@ -142,12 +142,13 @@ class Reader: elif ch != u'\uFEFF': self.column += 1 - def get_marker(self): + def get_mark(self): if self.stream is None: - return Marker(self.name, self.line, self.column, + return Mark(self.name, self.index, self.line, self.column, self.buffer, self.pointer) else: - return Marker(self.name, self.line, self.column, None, None) + return Mark(self.name, self.index, self.line, self.column, + None, None) def determine_encoding(self): while not self.eof and len(self.raw_buffer) < 2: diff --git a/lib/yaml/scanner.py b/lib/yaml/scanner.py index 80d69b6..9c536b4 100644 --- a/lib/yaml/scanner.py +++ b/lib/yaml/scanner.py @@ -1,9 +1,10 @@ # Scanner produces tokens of the following types: +# STREAM-START +# STREAM-END # DIRECTIVE(name, value) # DOCUMENT-START # DOCUMENT-END -# STREAM-END # BLOCK-SEQUENCE-START # BLOCK-MAPPING-START # BLOCK-END @@ -34,13 +35,13 @@ class ScannerError(MarkedYAMLError): class SimpleKey: # See below simple keys treatment. - def __init__(self, token_number, required, index, line, column, marker): + def __init__(self, token_number, required, index, line, column, mark): self.token_number = token_number self.required = required self.index = index self.line = line self.column = column - self.marker = marker + self.mark = mark class Scanner: @@ -68,6 +69,9 @@ class Scanner: # List of processed tokens that are not yet emitted. self.tokens = [] + # Add the STREAM-START token. + self.fetch_stream_start() + # Number of tokens that were emitted through the `get_token` method. self.tokens_taken = 0 @@ -102,7 +106,7 @@ class Scanner: # Keep track of possible simple keys. This is a dictionary. The key # is `flow_level`; there can be no more that one possible simple key # for each level. The value is a SimpleKey record: - # (token_number, required, index, line, column, marker) + # (token_number, required, index, line, column, mark) # A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow), # '[', or '{' tokens. self.possible_simple_keys = {} @@ -261,7 +265,7 @@ class Scanner: # No? It's an error. Let's produce a nice error message. raise ScannerError("while scanning for the next token", None, "found character %r that cannot start any token" - % ch.encode('utf-8'), self.reader.get_marker()) + % ch.encode('utf-8'), self.reader.get_mark()) # Simple keys treatment. @@ -292,8 +296,8 @@ class Scanner: if key.line != self.reader.line \ or self.reader.index-key.index > 1024: if key.required: - raise ScannerError("while scanning a simple key", key.marker, - "could not found expected ':'", self.reader.get_marker()) + raise ScannerError("while scanning a simple key", key.mark, + "could not found expected ':'", self.reader.get_mark()) del self.possible_simple_keys[level] def save_possible_simple_key(self): @@ -316,9 +320,9 @@ class Scanner: index = self.reader.index line = self.reader.line column = self.reader.column - marker = self.reader.get_marker() + mark = self.reader.get_mark() key = SimpleKey(token_number, required, - index, line, column, marker) + index, line, column, mark) self.possible_simple_keys[self.flow_level] = key def remove_possible_simple_key(self): @@ -329,29 +333,34 @@ class Scanner: # I don't think it's possible, but I could be wrong. assert not key.required #if key.required: - # raise ScannerError("while scanning a simple key", key.marker, - # "could not found expected ':'", self.reader.get_marker()) + # raise ScannerError("while scanning a simple key", key.mark, + # "could not found expected ':'", self.reader.get_mark()) # Indentation functions. def unwind_indent(self, column): - # In flow context, tokens should respect indentation. - # Actually the condition should be `self.indent >= column` according to - # the spec. But this condition will prohibit intuitively correct - # constructions such as - # key : { - # } - if self.flow_level and self.indent > column: - raise ScannerError(None, None, - "invalid intendation or unclosed '[' or '{'", - self.reader.get_marker()) + ## In flow context, tokens should respect indentation. + ## Actually the condition should be `self.indent >= column` according to + ## the spec. But this condition will prohibit intuitively correct + ## constructions such as + ## key : { + ## } + #if self.flow_level and self.indent > column: + # raise ScannerError(None, None, + # "invalid intendation or unclosed '[' or '{'", + # self.reader.get_mark()) + + # In the flow context, indentation is ignored. We make the scanner less + # restrictive then specification requires. + if self.flow_level: + return # In block context, we may need to issue the BLOCK-END tokens. while self.indent > column: - marker = self.reader.get_marker() + mark = self.reader.get_mark() self.indent = self.indents.pop() - self.tokens.append(BlockEndToken(marker, marker)) + self.tokens.append(BlockEndToken(mark, mark)) def add_indent(self, column): # Check if we need to increase indentation. @@ -363,6 +372,17 @@ class Scanner: # Fetchers. + def fetch_stream_start(self): + # We always add STREAM-START as the first token and STREAM-END as the + # last token. + + # Read the token. + mark = self.reader.get_mark() + + # Add STREAM-END. + self.tokens.append(StreamStartToken(mark, mark)) + + def fetch_stream_end(self): # Set the current intendation to -1. @@ -373,10 +393,10 @@ class Scanner: self.possible_simple_keys = {} # Read the token. - marker = self.reader.get_marker() + mark = self.reader.get_mark() - # Add END. - self.tokens.append(StreamEndToken(marker, marker)) + # Add STREAM-END. + self.tokens.append(StreamEndToken(mark, mark)) # The reader is ended. self.done = True @@ -410,10 +430,10 @@ class Scanner: self.allow_simple_key = False # Add DOCUMENT-START or DOCUMENT-END. - start_marker = self.reader.get_marker() + start_mark = self.reader.get_mark() self.reader.forward(3) - end_marker = self.reader.get_marker() - self.tokens.append(TokenClass(start_marker, end_marker)) + end_mark = self.reader.get_mark() + self.tokens.append(TokenClass(start_mark, end_mark)) def fetch_flow_sequence_start(self): self.fetch_flow_collection_start(FlowSequenceStartToken) @@ -433,10 +453,10 @@ class Scanner: self.allow_simple_key = True # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START. - start_marker = self.reader.get_marker() + start_mark = self.reader.get_mark() self.reader.forward() - end_marker = self.reader.get_marker() - self.tokens.append(TokenClass(start_marker, end_marker)) + end_mark = self.reader.get_mark() + self.tokens.append(TokenClass(start_mark, end_mark)) def fetch_flow_sequence_end(self): self.fetch_flow_collection_end(FlowSequenceEndToken) @@ -456,10 +476,10 @@ class Scanner: self.allow_simple_key = False # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END. - start_marker = self.reader.get_marker() + start_mark = self.reader.get_mark() self.reader.forward() - end_marker = self.reader.get_marker() - self.tokens.append(TokenClass(start_marker, end_marker)) + end_mark = self.reader.get_mark() + self.tokens.append(TokenClass(start_mark, end_mark)) def fetch_flow_entry(self): @@ -470,10 +490,10 @@ class Scanner: self.remove_possible_simple_key() # Add FLOW-ENTRY. - start_marker = self.reader.get_marker() + start_mark = self.reader.get_mark() self.reader.forward() - end_marker = self.reader.get_marker() - self.tokens.append(FlowEntryToken(start_marker, end_marker)) + end_mark = self.reader.get_mark() + self.tokens.append(FlowEntryToken(start_mark, end_mark)) def fetch_block_entry(self): @@ -484,12 +504,12 @@ class Scanner: if not self.allow_simple_key: raise ScannerError(None, None, "sequence entries are not allowed here", - self.reader.get_marker()) + self.reader.get_mark()) # We may need to add BLOCK-SEQUENCE-START. if self.add_indent(self.reader.column): - marker = self.reader.get_marker() - self.tokens.append(BlockSequenceStartToken(marker, marker)) + mark = self.reader.get_mark() + self.tokens.append(BlockSequenceStartToken(mark, mark)) # It's an error for the block entry to occur in the flow context, # but we let the parser detect this. @@ -503,10 +523,10 @@ class Scanner: self.remove_possible_simple_key() # Add BLOCK-ENTRY. - start_marker = self.reader.get_marker() + start_mark = self.reader.get_mark() self.reader.forward() - end_marker = self.reader.get_marker() - self.tokens.append(BlockEntryToken(start_marker, end_marker)) + end_mark = self.reader.get_mark() + self.tokens.append(BlockEntryToken(start_mark, end_mark)) def fetch_key(self): @@ -517,12 +537,12 @@ class Scanner: if not self.allow_simple_key: raise ScannerError(None, None, "mapping keys are not allowed here", - self.reader.get_marker()) + self.reader.get_mark()) # We may need to add BLOCK-MAPPING-START. if self.add_indent(self.reader.column): - marker = self.reader.get_marker() - self.tokens.append(BlockMappingStartToken(marker, marker)) + mark = self.reader.get_mark() + self.tokens.append(BlockMappingStartToken(mark, mark)) # Simple keys are allowed after '?' in the block context. self.allow_simple_key = not self.flow_level @@ -531,10 +551,10 @@ class Scanner: self.remove_possible_simple_key() # Add KEY. - start_marker = self.reader.get_marker() + start_mark = self.reader.get_mark() self.reader.forward() - end_marker = self.reader.get_marker() - self.tokens.append(KeyToken(start_marker, end_marker)) + end_mark = self.reader.get_mark() + self.tokens.append(KeyToken(start_mark, end_mark)) def fetch_value(self): @@ -545,14 +565,14 @@ class Scanner: key = self.possible_simple_keys[self.flow_level] del self.possible_simple_keys[self.flow_level] self.tokens.insert(key.token_number-self.tokens_taken, - KeyToken(key.marker, key.marker)) + KeyToken(key.mark, key.mark)) # If this key starts a new block mapping, we need to add # BLOCK-MAPPING-START. if not self.flow_level: if self.add_indent(key.column): self.tokens.insert(key.token_number-self.tokens_taken, - BlockMappingStartToken(key.marker, key.marker)) + BlockMappingStartToken(key.mark, key.mark)) # There cannot be two simple keys one after another. self.allow_simple_key = False @@ -570,7 +590,7 @@ class Scanner: if not self.allow_simple_key: raise ScannerError(None, None, "mapping values are not allowed here", - self.reader.get_marker()) + self.reader.get_mark()) # Simple keys are allowed after ':' in the block context. self.allow_simple_key = not self.flow_level @@ -579,10 +599,10 @@ class Scanner: self.remove_possible_simple_key() # Add VALUE. - start_marker = self.reader.get_marker() + start_mark = self.reader.get_mark() self.reader.forward() - end_marker = self.reader.get_marker() - self.tokens.append(ValueToken(start_marker, end_marker)) + end_mark = self.reader.get_mark() + self.tokens.append(ValueToken(start_mark, end_mark)) def fetch_alias(self): @@ -773,24 +793,24 @@ class Scanner: def scan_directive(self): # See the specification for details. - start_marker = self.reader.get_marker() + start_mark = self.reader.get_mark() self.reader.forward() - name = self.scan_directive_name(start_marker) + name = self.scan_directive_name(start_mark) value = None if name == u'YAML': - value = self.scan_yaml_directive_value(start_marker) - end_marker = self.reader.get_marker() + value = self.scan_yaml_directive_value(start_mark) + end_mark = self.reader.get_mark() elif name == u'TAG': - value = self.scan_tag_directive_value(start_marker) - end_marker = self.reader.get_marker() + value = self.scan_tag_directive_value(start_mark) + end_mark = self.reader.get_mark() else: - end_marker = self.reader.get_marker() + end_mark = self.reader.get_mark() while self.reader.peek() not in u'\0\r\n\x85\u2028\u2029': self.reader.forward() - self.scan_directive_ignored_line(start_marker) - return DirectiveToken(name, value, start_marker, end_marker) + self.scan_directive_ignored_line(start_mark) + return DirectiveToken(name, value, start_mark, end_mark) - def scan_directive_name(self, start_marker): + def scan_directive_name(self, start_mark): # See the specification for details. length = 0 ch = self.reader.peek(length) @@ -799,44 +819,44 @@ class Scanner: length += 1 ch = self.reader.peek(length) if not length: - raise ScannerError("while scanning a directive", start_marker, + raise ScannerError("while scanning a directive", start_mark, "expected alphabetic or numeric character, but found %r" - % ch.encode('utf-8'), self.reader.get_marker()) + % ch.encode('utf-8'), self.reader.get_mark()) value = self.reader.prefix(length) self.reader.forward(length) ch = self.reader.peek() if ch not in u'\0 \r\n\x85\u2028\u2029': - raise ScannerError("while scanning a directive", start_marker, + raise ScannerError("while scanning a directive", start_mark, "expected alphabetic or numeric character, but found %r" - % ch.encode('utf-8'), self.reader.get_marker()) + % ch.encode('utf-8'), self.reader.get_mark()) return value - def scan_yaml_directive_value(self, start_marker): + def scan_yaml_directive_value(self, start_mark): # See the specification for details. while self.reader.peek() == u' ': self.reader.forward() - major = self.scan_yaml_directive_number(start_marker) + major = self.scan_yaml_directive_number(start_mark) if self.reader.peek() != '.': - raise ScannerError("while scanning a directive", start_marker, + raise ScannerError("while scanning a directive", start_mark, "expected a digit or '.', but found %r" % self.reader.peek().encode('utf-8'), - self.reader.get_marker()) + self.reader.get_mark()) self.reader.forward() - minor = self.scan_yaml_directive_number(start_marker) + minor = self.scan_yaml_directive_number(start_mark) if self.reader.peek() not in u'\0 \r\n\x85\u2028\u2029': - raise ScannerError("while scanning a directive", start_marker, + raise ScannerError("while scanning a directive", start_mark, "expected a digit or ' ', but found %r" % self.reader.peek().encode('utf-8'), - self.reader.get_marker()) + self.reader.get_mark()) return (major, minor) - def scan_yaml_directive_number(self, start_marker): + def scan_yaml_directive_number(self, start_mark): # See the specification for details. ch = self.reader.peek() if not (u'0' <= ch <= '9'): - raise ScannerError("while scanning a directive", start_marker, + raise ScannerError("while scanning a directive", start_mark, "expected a digit, but found %r" % ch.encode('utf-8'), - self.reader.get_marker()) + self.reader.get_mark()) length = 0 while u'0' <= self.reader.peek(length) <= u'9': length += 1 @@ -844,37 +864,37 @@ class Scanner: self.reader.forward(length) return value - def scan_tag_directive_value(self, start_marker): + def scan_tag_directive_value(self, start_mark): # See the specification for details. while self.reader.peek() == u' ': self.reader.forward() - handle = self.scan_tag_directive_handle(start_marker) + handle = self.scan_tag_directive_handle(start_mark) while self.reader.peek() == u' ': self.reader.forward() - prefix = self.scan_tag_directive_prefix(start_marker) + prefix = self.scan_tag_directive_prefix(start_mark) return (handle, prefix) - def scan_tag_directive_handle(self, start_marker): + def scan_tag_directive_handle(self, start_mark): # See the specification for details. - value = self.scan_tag_handle('directive', start_marker) + value = self.scan_tag_handle('directive', start_mark) ch = self.reader.peek() if ch != u' ': - raise ScannerError("while scanning a directive", start_marker, + raise ScannerError("while scanning a directive", start_mark, "expected ' ', but found %r" % ch.encode('utf-8'), - self.reader.get_marker()) + self.reader.get_mark()) return value - def scan_tag_directive_prefix(self, start_marker): + def scan_tag_directive_prefix(self, start_mark): # See the specification for details. - value = self.scan_tag_uri('directive', start_marker) + value = self.scan_tag_uri('directive', start_mark) ch = self.reader.peek() if ch not in u'\0 \r\n\x85\u2028\u2029': - raise ScannerError("while scanning a directive", start_marker, + raise ScannerError("while scanning a directive", start_mark, "expected ' ', but found %r" % ch.encode('utf-8'), - self.reader.get_marker()) + self.reader.get_mark()) return value - def scan_directive_ignored_line(self, start_marker): + def scan_directive_ignored_line(self, start_mark): # See the specification for details. while self.reader.peek() == u' ': self.reader.forward() @@ -883,9 +903,9 @@ class Scanner: self.reader.forward() ch = self.reader.peek() if ch not in u'\0\r\n\x85\u2028\u2029': - raise ScannerError("while scanning a directive", start_marker, + raise ScannerError("while scanning a directive", start_mark, "expected a comment or a line break, but found %r" - % ch.encode('utf-8'), self.reader.get_marker()) + % ch.encode('utf-8'), self.reader.get_mark()) self.scan_line_break() def scan_anchor(self, TokenClass): @@ -897,7 +917,7 @@ class Scanner: # and # [ *alias , "value" ] # Therefore we restrict aliases to numbers and ASCII letters. - start_marker = self.reader.get_marker() + start_mark = self.reader.get_mark() indicator = self.reader.peek() if indicator == '*': name = 'alias' @@ -911,31 +931,31 @@ class Scanner: length += 1 ch = self.reader.peek(length) if not length: - raise ScannerError("while scanning an %s" % name, start_marker, + raise ScannerError("while scanning an %s" % name, start_mark, "expected alphabetic or numeric character, but found %r" - % ch.encode('utf-8'), self.reader.get_marker()) + % ch.encode('utf-8'), self.reader.get_mark()) value = self.reader.prefix(length) self.reader.forward(length) ch = self.reader.peek() if ch not in u'\0 \t\r\n\x85\u2028\u2029?:,]}%@`': - raise ScannerError("while scanning an %s" % name, start_marker, + raise ScannerError("while scanning an %s" % name, start_mark, "expected alphabetic or numeric character, but found %r" - % ch.encode('utf-8'), self.reader.get_marker()) - end_marker = self.reader.get_marker() - return TokenClass(value, start_marker, end_marker) + % ch.encode('utf-8'), self.reader.get_mark()) + end_mark = self.reader.get_mark() + return TokenClass(value, start_mark, end_mark) def scan_tag(self): # See the specification for details. - start_marker = self.reader.get_marker() + start_mark = self.reader.get_mark() ch = self.reader.peek(1) if ch == u'<': handle = None self.reader.forward(2) - suffix = self.scan_tag_uri('tag', start_marker) + suffix = self.scan_tag_uri('tag', start_mark) if self.reader.peek() != u'>': - raise ScannerError("while parsing a tag", start_marker, + raise ScannerError("while parsing a tag", start_mark, "expected '>', but found %r" % self.reader.peek().encode('utf-8'), - self.reader.get_marker()) + self.reader.get_mark()) self.reader.forward() elif ch in u'\0 \t\r\n\x85\u2028\u2029': handle = None @@ -952,41 +972,41 @@ class Scanner: ch = self.reader.peek(length) handle = u'!' if use_handle: - handle = self.scan_tag_handle('tag', start_marker) + handle = self.scan_tag_handle('tag', start_mark) else: handle = u'!' self.reader.forward() - suffix = self.scan_tag_uri('tag', start_marker) + suffix = self.scan_tag_uri('tag', start_mark) ch = self.reader.peek() if ch not in u'\0 \r\n\x85\u2028\u2029': - raise ScannerError("while scanning a tag", start_marker, + raise ScannerError("while scanning a tag", start_mark, "expected ' ', but found %r" % ch.encode('utf-8'), - self.reader.get_marker()) + self.reader.get_mark()) value = (handle, suffix) - end_marker = self.reader.get_marker() - return TagToken(value, start_marker, end_marker) + end_mark = self.reader.get_mark() + return TagToken(value, start_mark, end_mark) def scan_block_scalar(self, folded): # See the specification for details. chunks = [] - start_marker = self.reader.get_marker() + start_mark = self.reader.get_mark() # Scan the header. self.reader.forward() - chomping, increment = self.scan_block_scalar_indicators(start_marker) - self.scan_block_scalar_ignored_line(start_marker) + chomping, increment = self.scan_block_scalar_indicators(start_mark) + self.scan_block_scalar_ignored_line(start_mark) # Determine the indentation level and go to the first non-empty line. min_indent = self.indent+1 if min_indent < 1: min_indent = 1 if increment is None: - breaks, max_indent, end_marker = self.scan_block_scalar_indentation() + breaks, max_indent, end_mark = self.scan_block_scalar_indentation() indent = max(min_indent, max_indent) else: indent = min_indent+increment-1 - breaks, end_marker = self.scan_block_scalar_breaks(indent) + breaks, end_mark = self.scan_block_scalar_breaks(indent) line_break = u'' # Scan the inner part of the block scalar. @@ -999,7 +1019,7 @@ class Scanner: chunks.append(self.reader.prefix(length)) self.reader.forward(length) line_break = self.scan_line_break() - breaks, end_marker = self.scan_block_scalar_breaks(indent) + breaks, end_mark = self.scan_block_scalar_breaks(indent) if self.reader.column == indent and self.reader.peek() != u'\0': # Unfortunately, folding rules are ambiguous. # @@ -1033,9 +1053,9 @@ class Scanner: chunks.extend(breaks) # We are done. - return ScalarToken(u''.join(chunks), False, start_marker, end_marker) + return ScalarToken(u''.join(chunks), False, start_mark, end_mark) - def scan_block_scalar_indicators(self, start_marker): + def scan_block_scalar_indicators(self, start_mark): # See the specification for details. chomping = None increment = None @@ -1050,16 +1070,16 @@ class Scanner: if ch in u'0123456789': increment = int(ch) if increment == 0: - raise ScannerError("while scanning a block scalar", start_marker, + raise ScannerError("while scanning a block scalar", start_mark, "expected indentation indicator in the range 1-9, but found 0", - self.reader.get_marker()) + self.reader.get_mark()) self.reader.forward() elif ch in u'0123456789': increment = int(ch) if increment == 0: - raise ScannerError("while scanning a block scalar", start_marker, + raise ScannerError("while scanning a block scalar", start_mark, "expected indentation indicator in the range 1-9, but found 0", - self.reader.get_marker()) + self.reader.get_mark()) self.reader.forward() ch = self.reader.peek() if ch in u'+-': @@ -1070,12 +1090,12 @@ class Scanner: self.reader.forward() ch = self.reader.peek() if ch not in u'\0 \r\n\x85\u2028\u2029': - raise ScannerError("while scanning a block scalar", start_marker, + raise ScannerError("while scanning a block scalar", start_mark, "expected chomping or indentation indicators, but found %r" - % ch.encode('utf-8'), self.reader.get_marker()) + % ch.encode('utf-8'), self.reader.get_mark()) return chomping, increment - def scan_block_scalar_ignored_line(self, start_marker): + def scan_block_scalar_ignored_line(self, start_mark): # See the specification for details. while self.reader.peek() == u' ': self.reader.forward() @@ -1084,55 +1104,57 @@ class Scanner: self.reader.forward() ch = self.reader.peek() if ch not in u'\0\r\n\x85\u2028\u2029': - raise ScannerError("while scanning a block scalar", start_marker, + raise ScannerError("while scanning a block scalar", start_mark, "expected a comment or a line break, but found %r" - % ch.encode('utf-8'), self.reader.get_marker()) + % ch.encode('utf-8'), self.reader.get_mark()) self.scan_line_break() def scan_block_scalar_indentation(self): # See the specification for details. chunks = [] max_indent = 0 - end_marker = self.reader.get_marker() + end_mark = self.reader.get_mark() while self.reader.peek() in u' \r\n\x85\u2028\u2029': if self.reader.peek() != u' ': chunks.append(self.scan_line_break()) - end_marker = self.reader.get_marker() + end_mark = self.reader.get_mark() else: self.reader.forward() if self.reader.column > max_indent: max_indent = self.reader.column - return chunks, max_indent, end_marker + return chunks, max_indent, end_mark def scan_block_scalar_breaks(self, indent): # See the specification for details. chunks = [] - end_marker = self.reader.get_marker() + end_mark = self.reader.get_mark() while self.reader.column < indent and self.reader.peek() == u' ': self.reader.forward() while self.reader.peek() in u'\r\n\x85\u2028\u2029': chunks.append(self.scan_line_break()) - end_marker = self.reader.get_marker() + end_mark = self.reader.get_mark() while self.reader.column < indent and self.reader.peek() == u' ': self.reader.forward() - return chunks, end_marker + return chunks, end_mark def scan_flow_scalar(self, double): # See the specification for details. + # Note that we loose indentation rules for quoted scalars. Quoted + # scalars don't need to adhere indentation because " and ' clearly + # mark the beginning and the end of them. Therefore we are less + # restrictive then the specification requires. We only need to check + # that document separators are not included in scalars. chunks = [] - start_marker = self.reader.get_marker() - indent = self.indent+1 - if indent == 0: - indent = 1 + start_mark = self.reader.get_mark() quote = self.reader.peek() self.reader.forward() - chunks.extend(self.scan_flow_scalar_non_spaces(double, indent, start_marker)) + chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) while self.reader.peek() != quote: - chunks.extend(self.scan_flow_scalar_spaces(double, indent, start_marker)) - chunks.extend(self.scan_flow_scalar_non_spaces(double, indent, start_marker)) + chunks.extend(self.scan_flow_scalar_spaces(double, start_mark)) + chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) self.reader.forward() - end_marker = self.reader.get_marker() - return ScalarToken(u''.join(chunks), False, start_marker, end_marker) + end_mark = self.reader.get_mark() + return ScalarToken(u''.join(chunks), False, start_mark, end_mark) ESCAPE_REPLACEMENTS = { u'0': u'\0', @@ -1160,7 +1182,7 @@ class Scanner: u'U': 8, } - def scan_flow_scalar_non_spaces(self, double, indent, start_marker): + def scan_flow_scalar_non_spaces(self, double, start_mark): # See the specification for details. chunks = [] while True: @@ -1188,22 +1210,22 @@ class Scanner: self.reader.forward() for k in range(length): if self.reader.peek(k) not in u'0123456789ABCDEFabcdef': - raise ScannerError("while scanning a double-quoted scalar", start_marker, + raise ScannerError("while scanning a double-quoted scalar", start_mark, "expected escape sequence of %d hexdecimal numbers, but found %r" % - (length, self.reader.peek(k).encode('utf-8')), self.reader.get_marker()) + (length, self.reader.peek(k).encode('utf-8')), self.reader.get_mark()) code = int(self.reader.prefix(length), 16) chunks.append(unichr(code)) self.reader.forward(length) elif ch in u'\r\n\x85\u2028\u2029': self.scan_line_break() - chunks.extend(self.scan_flow_scalar_breaks(double, indent, start_marker)) + chunks.extend(self.scan_flow_scalar_breaks(double, start_mark)) else: - raise ScannerError("while scanning a double-quoted scalar", start_marker, - "found unknown escape character %r" % ch.encode('utf-8'), self.reader.get_marker()) + raise ScannerError("while scanning a double-quoted scalar", start_mark, + "found unknown escape character %r" % ch.encode('utf-8'), self.reader.get_mark()) else: return chunks - def scan_flow_scalar_spaces(self, double, indent, start_marker): + def scan_flow_scalar_spaces(self, double, start_mark): # See the specification for details. chunks = [] length = 0 @@ -1213,11 +1235,11 @@ class Scanner: self.reader.forward(length) ch = self.reader.peek() if ch == u'\0': - raise ScannerError("while scanning a quoted scalar", start_marker, - "found unexpected end of stream", self.reader.get_marker()) + raise ScannerError("while scanning a quoted scalar", start_mark, + "found unexpected end of stream", self.reader.get_mark()) elif ch in u'\r\n\x85\u2028\u2029': line_break = self.scan_line_break() - breaks = self.scan_flow_scalar_breaks(double, indent, start_marker) + breaks = self.scan_flow_scalar_breaks(double, start_mark) if line_break != u'\n': chunks.append(line_break) elif not breaks: @@ -1227,21 +1249,17 @@ class Scanner: chunks.append(whitespaces) return chunks - def scan_flow_scalar_breaks(self, double, indent, start_marker): + def scan_flow_scalar_breaks(self, double, start_mark): # See the specification for details. chunks = [] while True: - while self.reader.column < indent and self.reader.peek() == u' ': - self.reader.forward() - if self.reader.column < indent \ - and self.reader.peek() not in u'\0\r\n\x85\u2028\u2029': - s = 's' - if indent == 1: - s = '' - raise ScannerError("while scanning a quoted scalar", start_marker, - "expected %d space%s indentation, but found %r" - % (indent, s, self.reader.peek().encode('utf-8')), - self.reader.get_marker()) + # Instead of checking indentation, we check for document + # separators. + prefix = self.reader.prefix(3) + if (prefix == u'---' or prefix == u'...') \ + and self.reader.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + raise ScannerError("while scanning a quoted scalar", start_mark, + "found unexpected document separator", self.reader.get_mark()) while self.reader.peek() in u' \t': self.reader.forward() if self.reader.peek() in u'\r\n\x85\u2028\u2029': @@ -1252,14 +1270,17 @@ class Scanner: def scan_plain(self): # See the specification for details. # We add an additional restriction for the flow context: - # plain scalars in the flow context cannot contain ':' and '?'. + # plain scalars in the flow context cannot contain ',', ':' and '?'. # We also keep track of the `allow_simple_key` flag here. + # Indentation rules are loosed for the flow context. chunks = [] - start_marker = self.reader.get_marker() - end_marker = start_marker + start_mark = self.reader.get_mark() + end_mark = start_mark indent = self.indent+1 - if indent == 0: - indent = 1 + # We allow zero indentation for scalars, but then we need to check for + # document separators at the beginning of the line. + #if indent == 0: + # indent = 1 spaces = [] while True: length = 0 @@ -1279,14 +1300,14 @@ class Scanner: chunks.extend(spaces) chunks.append(self.reader.prefix(length)) self.reader.forward(length) - end_marker = self.reader.get_marker() - spaces = self.scan_plain_spaces(indent) + end_mark = self.reader.get_mark() + spaces = self.scan_plain_spaces(indent, start_mark) if not spaces or self.reader.peek() == u'#' \ - or self.reader.column < indent: + or (not self.flow_level and self.reader.column < indent): break - return ScalarToken(u''.join(chunks), True, start_marker, end_marker) + return ScalarToken(u''.join(chunks), True, start_mark, end_mark) - def scan_plain_spaces(self, indent): + def scan_plain_spaces(self, indent, start_mark): # See the specification for details. # The specification is really confusing about tabs in plain scalars. # We just forbid them completely. Do not use tabs in YAML! @@ -1300,12 +1321,20 @@ class Scanner: if ch in u'\r\n\x85\u2028\u2029': line_break = self.scan_line_break() self.allow_simple_key = True + prefix = self.reader.prefix(3) + if (prefix == u'---' or prefix == u'...') \ + and self.reader.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + return breaks = [] while self.reader.peek() in u' \r\n\x85\u2028\u2029': if self.reader.peek() == ' ': self.reader.forward() else: breaks.append(self.scan_line_break()) + prefix = self.reader.prefix(3) + if (prefix == u'---' or prefix == u'...') \ + and self.reader.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + return if line_break != u'\n': chunks.append(line_break) elif not breaks: @@ -1315,15 +1344,15 @@ class Scanner: chunks.append(whitespaces) return chunks - def scan_tag_handle(self, name, start_marker): + def scan_tag_handle(self, name, start_mark): # See the specification for details. # For some strange reasons, the specification does not allow '_' in # tag handles. I have allowed it anyway. ch = self.reader.peek() if ch != u'!': - raise ScannerError("while scanning a %s" % name, start_marker, + raise ScannerError("while scanning a %s" % name, start_mark, "expected '!', but found %r" % ch.encode('utf-8'), - self.reader.get_marker()) + self.reader.get_mark()) length = 1 ch = self.reader.peek(length) if ch != u' ': @@ -1333,15 +1362,15 @@ class Scanner: ch = self.reader.peek(length) if ch != u'!': self.reader.forward(length) - raise ScannerError("while scanning a %s" % name, start_marker, + raise ScannerError("while scanning a %s" % name, start_mark, "expected '!', but found %r" % ch.encode('utf-8'), - self.reader.get_marker()) + self.reader.get_mark()) length += 1 value = self.reader.prefix(length) self.reader.forward(length) return value - def scan_tag_uri(self, name, start_marker): + def scan_tag_uri(self, name, start_mark): # See the specification for details. # Note: we do not check if URI is well-formed. chunks = [] @@ -1353,7 +1382,7 @@ class Scanner: chunks.append(self.reader.prefix(length)) self.reader.forward(length) length = 0 - chunks.append(self.scan_uri_escapes(name, start_marker)) + chunks.append(self.scan_uri_escapes(name, start_mark)) else: length += 1 ch = self.reader.peek(length) @@ -1362,28 +1391,28 @@ class Scanner: self.reader.forward(length) length = 0 if not chunks: - raise ScannerError("while parsing a %s" % name, start_marker, + raise ScannerError("while parsing a %s" % name, start_mark, "expected URI, but found %r" % ch.encode('utf-8'), - self.reader.get_marker()) + self.reader.get_mark()) return u''.join(chunks) - def scan_uri_escapes(self, name, start_marker): + def scan_uri_escapes(self, name, start_mark): # See the specification for details. bytes = [] - marker = self.reader.get_marker() + mark = self.reader.get_mark() while self.reader.peek() == u'%': self.reader.forward() for k in range(2): if self.reader.peek(k) not in u'0123456789ABCDEFabcdef': - raise ScannerError("while scanning a %s" % name, start_marker, + raise ScannerError("while scanning a %s" % name, start_mark, "expected URI escape sequence of 2 hexdecimal numbers, but found %r" % - (self.reader.peek(k).encode('utf-8')), self.reader.get_marker()) + (self.reader.peek(k).encode('utf-8')), self.reader.get_mark()) bytes.append(chr(int(self.reader.prefix(2), 16))) self.reader.forward(2) try: value = unicode(''.join(bytes), 'utf-8') except UnicodeDecodeError, exc: - raise ScannerError("while scanning a %s" % name, start_marker, str(exc), marker) + raise ScannerError("while scanning a %s" % name, start_mark, str(exc), mark) return value def scan_line_break(self): diff --git a/lib/yaml/tokens.py b/lib/yaml/tokens.py index 863bb44..59b36af 100644 --- a/lib/yaml/tokens.py +++ b/lib/yaml/tokens.py @@ -1,11 +1,11 @@ class Token: - def __init__(self, start_marker, end_marker): - self.start_marker = start_marker - self.end_marker = end_marker + def __init__(self, start_mark, end_mark): + self.start_mark = start_mark + self.end_mark = end_mark def __repr__(self): attributes = [key for key in self.__dict__ - if not key.endswith('_marker')] + if not key.endswith('_mark')] attributes.sort() arguments = ', '.join(['%s=%r' % (key, getattr(self, key)) for key in attributes]) @@ -16,11 +16,11 @@ class Token: class DirectiveToken(Token): id = '' - def __init__(self, name, value, start_marker, end_marker): + def __init__(self, name, value, start_mark, end_mark): self.name = name self.value = value - self.start_marker = start_marker - self.end_marker = end_marker + self.start_mark = start_mark + self.end_mark = end_mark class DocumentStartToken(Token): id = '' @@ -28,6 +28,9 @@ class DocumentStartToken(Token): class DocumentEndToken(Token): id = '' +class StreamStartToken(Token): + id = '' + class StreamEndToken(Token): id = '' @@ -66,30 +69,30 @@ class FlowEntryToken(Token): class AliasToken(Token): id = '' - def __init__(self, value, start_marker, end_marker): + def __init__(self, value, start_mark, end_mark): self.value = value - self.start_marker = start_marker - self.end_marker = end_marker + self.start_mark = start_mark + self.end_mark = end_mark class AnchorToken(Token): id = '' - def __init__(self, value, start_marker, end_marker): + def __init__(self, value, start_mark, end_mark): self.value = value - self.start_marker = start_marker - self.end_marker = end_marker + self.start_mark = start_mark + self.end_mark = end_mark class TagToken(Token): id = '' - def __init__(self, value, start_marker, end_marker): + def __init__(self, value, start_mark, end_mark): self.value = value - self.start_marker = start_marker - self.end_marker = end_marker + self.start_mark = start_mark + self.end_mark = end_mark class ScalarToken(Token): id = '' - def __init__(self, value, plain, start_marker, end_marker): + def __init__(self, value, plain, start_mark, end_mark): self.value = value self.plain = plain - self.start_marker = start_marker - self.end_marker = end_marker + self.start_mark = start_mark + self.end_mark = end_mark diff --git a/tests/data/document-separator-in-quoted-scalar.error-message b/tests/data/document-separator-in-quoted-scalar.error-message new file mode 100644 index 0000000..9eeb0d6 --- /dev/null +++ b/tests/data/document-separator-in-quoted-scalar.error-message @@ -0,0 +1,11 @@ +--- +"this --- is correct" +--- +"this +...is also +correct" +--- +"a quoted scalar +cannot contain +--- +document separators" diff --git a/tests/data/invalid-indentation-for-quoted-scalar.error-message b/tests/data/invalid-indentation-for-quoted-scalar.error-message deleted file mode 100644 index b885db3..0000000 --- a/tests/data/invalid-indentation-for-quoted-scalar.error-message +++ /dev/null @@ -1,2 +0,0 @@ -test: "foo -bar" diff --git a/tests/data/sloppy-indentation.canonical b/tests/data/sloppy-indentation.canonical new file mode 100644 index 0000000..0d312cc --- /dev/null +++ b/tests/data/sloppy-indentation.canonical @@ -0,0 +1,18 @@ +%YAML 1.1 +--- +!!map { + ? !!str "in the block context" + : !!map { + ? !!str "indentation should be kept" + : !!map { + ? !!str "but in the flow context" + : !!seq [ !!str "it may be violated" ] + } + } +} +--- !!str +"the parser does not require scalars to be indented with at least one space" +--- !!str +"the parser does not require scalars to be indented with at least one space" +--- !!map +{ ? !!str "foo": { ? !!str "bar" : "quoted scalars may not adhere indentation" } } diff --git a/tests/data/sloppy-indentation.data b/tests/data/sloppy-indentation.data new file mode 100644 index 0000000..2eb4f5a --- /dev/null +++ b/tests/data/sloppy-indentation.data @@ -0,0 +1,17 @@ +--- +in the block context: + indentation should be kept: { + but in the flow context: [ +it may be violated] +} +--- +the parser does not require scalars +to be indented with at least one space +... +--- +"the parser does not require scalars +to be indented with at least one space" +--- +foo: + bar: 'quoted scalars +may not adhere indentation' diff --git a/tests/data/test_mark.marks b/tests/data/test_mark.marks new file mode 100644 index 0000000..7b08ee4 --- /dev/null +++ b/tests/data/test_mark.marks @@ -0,0 +1,38 @@ +--- +*The first line. +The last line. +--- +The first*line. +The last line. +--- +The first line.* +The last line. +--- +The first line. +*The last line. +--- +The first line. +The last*line. +--- +The first line. +The last line.* +--- +The first line. +*The selected line. +The last line. +--- +The first line. +The selected*line. +The last line. +--- +The first line. +The selected line.* +The last line. +--- +*The only line. +--- +The only*line. +--- +The only line.* +--- +Loooooooooooooooooooooooooooooooooooooooooooooong*Liiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiine diff --git a/tests/data/test_marker.markers b/tests/data/test_marker.markers deleted file mode 100644 index 7b08ee4..0000000 --- a/tests/data/test_marker.markers +++ /dev/null @@ -1,38 +0,0 @@ ---- -*The first line. -The last line. ---- -The first*line. -The last line. ---- -The first line.* -The last line. ---- -The first line. -*The last line. ---- -The first line. -The last*line. ---- -The first line. -The last line.* ---- -The first line. -*The selected line. -The last line. ---- -The first line. -The selected*line. -The last line. ---- -The first line. -The selected line.* -The last line. ---- -*The only line. ---- -The only*line. ---- -The only line.* ---- -Loooooooooooooooooooooooooooooooooooooooooooooong*Liiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiine diff --git a/tests/test_appliance.py b/tests/test_appliance.py index 12239eb..eb54faa 100644 --- a/tests/test_appliance.py +++ b/tests/test_appliance.py @@ -47,6 +47,7 @@ class CanonicalScanner: def scan(self): #print self.data[self.index:] tokens = [] + tokens.append(StreamStartToken(None, None)) while True: self.find_token() ch = self.data[self.index] @@ -206,13 +207,16 @@ class CanonicalParser: self.scanner = CanonicalScanner(data) self.events = [] - # stream: document* END + # stream: STREAM-START document* STREAM-END def parse_stream(self): + self.consume_token(StreamStartToken) + self.events.append(StreamStartEvent(None, None)) while not self.test_token(StreamEndToken): if self.test_token(DirectiveToken, DocumentStartToken): self.parse_document() else: raise Error("document is expected, got "+repr(self.tokens[self.index])) + self.consume_token(StreamEndToken) self.events.append(StreamEndEvent(None, None)) # document: DIRECTIVE? DOCUMENT-START node @@ -221,7 +225,9 @@ class CanonicalParser: if self.test_token(DirectiveToken): self.consume_token(DirectiveToken) self.consume_token(DocumentStartToken) + self.events.append(DocumentStartEvent(None, None)) self.parse_node() + self.events.append(DocumentEndEvent(None, None)) # node: ALIAS | ANCHOR? TAG? (SCALAR|sequence|mapping) def parse_node(self): diff --git a/tests/test_mark.py b/tests/test_mark.py new file mode 100644 index 0000000..4fa665e --- /dev/null +++ b/tests/test_mark.py @@ -0,0 +1,34 @@ + +import test_appliance + +from yaml.reader import Mark + +class TestMark(test_appliance.TestAppliance): + + def _testMarks(self, test_name, marks_filename): + inputs = file(marks_filename, 'rb').read().split('---\n')[1:] + for input in inputs: + index = 0 + line = 0 + column = 0 + while input[index] != '*': + if input[index] == '\n': + line += 1 + column = 0 + else: + column += 1 + index += 1 + mark = Mark(test_name, index, line, column, unicode(input), index) + snippet = mark.get_snippet(indent=2, max_length=79) + #print "INPUT:" + #print input + #print "SNIPPET:" + #print snippet + self.failUnless(isinstance(snippet, str)) + self.failUnlessEqual(snippet.count('\n'), 1) + data, pointer = snippet.split('\n') + self.failUnless(len(data) < 82) + self.failUnlessEqual(data[len(pointer)-1], '*') + +TestMark.add_tests('testMarks', '.marks') + diff --git a/tests/test_marker.py b/tests/test_marker.py deleted file mode 100644 index 9ea4474..0000000 --- a/tests/test_marker.py +++ /dev/null @@ -1,34 +0,0 @@ - -import test_appliance - -from yaml.reader import Marker - -class TestMarker(test_appliance.TestAppliance): - - def _testMarkers(self, test_name, markers_filename): - inputs = file(markers_filename, 'rb').read().split('---\n')[1:] - for input in inputs: - index = 0 - line = 0 - column = 0 - while input[index] != '*': - if input[index] == '\n': - line += 1 - column = 0 - else: - column += 1 - index += 1 - marker = Marker(test_name, line, column, unicode(input), index) - snippet = marker.get_snippet(indent=2, max_length=79) - #print "INPUT:" - #print input - #print "SNIPPET:" - #print snippet - self.failUnless(isinstance(snippet, str)) - self.failUnlessEqual(snippet.count('\n'), 1) - data, pointer = snippet.split('\n') - self.failUnless(len(data) < 82) - self.failUnlessEqual(data[len(pointer)-1], '*') - -TestMarker.add_tests('testMarkers', '.markers') - diff --git a/tests/test_structure.py b/tests/test_structure.py index 985d2c3..ca3cb60 100644 --- a/tests/test_structure.py +++ b/tests/test_structure.py @@ -12,7 +12,10 @@ class TestStructure(test_appliance.TestAppliance): parser = Parser(Scanner(Reader(file(data_filename, 'rb')))) node1 = [] while not parser.check(StreamEndEvent): - node1.append(self._convert(parser)) + if not parser.check(StreamStartEvent, DocumentStartEvent, DocumentEndEvent): + node1.append(self._convert(parser)) + else: + parser.get() parser.get() if len(node1) == 1: node1 = node1[0] diff --git a/tests/test_tokens.py b/tests/test_tokens.py index 2ccc305..38026d5 100644 --- a/tests/test_tokens.py +++ b/tests/test_tokens.py @@ -54,7 +54,7 @@ class TestTokens(test_appliance.TestAppliance): scanner = Scanner(Reader(file(data_filename, 'rb'))) tokens1 = [] for token in scanner: - if not isinstance(token, StreamEndToken): + if not isinstance(token, (StreamStartToken, StreamEndToken)): tokens1.append(token) tokens1 = [self.replaces[t.__class__] for t in tokens1] self.failUnlessEqual(tokens1, tokens2) @@ -77,7 +77,7 @@ class TestScanner(test_appliance.TestAppliance): scanner = Scanner(Reader(file(filename, 'rb'))) tokens = [] for token in scanner: - if not isinstance(token, StreamEndToken): + if not isinstance(token, (StreamStartToken, StreamEndToken)): tokens.append(token.__class__.__name__) except: print diff --git a/tests/test_yaml.py b/tests/test_yaml.py index cfd4e79..bb5a9f1 100644 --- a/tests/test_yaml.py +++ b/tests/test_yaml.py @@ -1,7 +1,7 @@ import unittest -from test_marker import * +from test_mark import * from test_reader import * from test_canonical import * from test_tokens import * -- cgit v1.2.1