summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorxi <xi@18f92427-320e-0410-9341-c67f048884a3>2006-02-19 22:17:28 +0000
committerxi <xi@18f92427-320e-0410-9341-c67f048884a3>2006-02-19 22:17:28 +0000
commit282bbe171b1c36ce94185dbb95fbd9d5b971d21f (patch)
tree76547cfe9bdb93f11d732ea7b388e69d55dfa5bf
parent6070191305af2f626f202d33470e0f76cd0aaa1f (diff)
downloadpyyaml-282bbe171b1c36ce94185dbb95fbd9d5b971d21f.tar.gz
Parser is done. Add iterator interfaces for Scanner and Parser.
git-svn-id: http://svn.pyyaml.org/branches/pyyaml3000@51 18f92427-320e-0410-9341-c67f048884a3
-rw-r--r--lib/yaml/__init__.py19
-rw-r--r--lib/yaml/events.py49
-rw-r--r--lib/yaml/parser.py583
-rw-r--r--lib/yaml/scanner.py150
-rw-r--r--lib/yaml/tokens.py51
-rw-r--r--tests/data/spec-05-08.canonical4
-rw-r--r--tests/data/spec-06-01.canonical2
-rw-r--r--tests/data/spec-09-20.canonical2
-rw-r--r--tests/data/spec-09-30.canonical4
-rw-r--r--tests/data/spec-09-31.canonical4
-rw-r--r--tests/data/spec-09-32.canonical4
-rw-r--r--tests/data/spec-09-33.canonical4
-rw-r--r--tests/test_appliance.py198
-rw-r--r--tests/test_canonical.py13
-rw-r--r--tests/test_errors.py6
-rw-r--r--tests/test_structure.py129
-rw-r--r--tests/test_tokens.py13
17 files changed, 713 insertions, 522 deletions
diff --git a/lib/yaml/__init__.py b/lib/yaml/__init__.py
index e69de29..cae7cde 100644
--- a/lib/yaml/__init__.py
+++ b/lib/yaml/__init__.py
@@ -0,0 +1,19 @@
+
+from reader import Reader
+from scanner import Scanner
+from parser import Parser
+
+from tokens import *
+from events import *
+
+def scan(data, Reader=Reader, Scanner=Scanner):
+ reader = Reader(data)
+ scanner = Scanner(reader)
+ return iter(scanner)
+
+def parse(data, Reader=Reader, Scanner=Scanner, Parser=Parser):
+ reader = Reader(data)
+ scanner = Scanner(reader)
+ parser = Parser(scanner)
+ return iter(parser)
+
diff --git a/lib/yaml/events.py b/lib/yaml/events.py
new file mode 100644
index 0000000..6ecb772
--- /dev/null
+++ b/lib/yaml/events.py
@@ -0,0 +1,49 @@
+
+class Event:
+ def __init__(self, start_marker, end_marker):
+ self.start_marker = start_marker
+ self.end_marker = end_marker
+ def __repr__(self):
+ attributes = [key for key in self.__dict__
+ if not key.endswith('_marker')]
+ attributes.sort()
+ arguments = ', '.join(['%s=%r' % (key, getattr(self, key))
+ for key in attributes])
+ return '%s(%s)' % (self.__class__.__name__, arguments)
+
+class NodeEvent(Event):
+ def __init__(self, anchor, tag, start_marker, end_marker):
+ self.anchor = anchor
+ self.tag = tag
+ self.start_marker = start_marker
+ self.end_marker = end_marker
+
+class AliasEvent(NodeEvent):
+ def __init__(self, name, start_marker, end_marker):
+ self.name = name
+ self.start_marker = start_marker
+ self.end_marker = end_marker
+
+class ScalarEvent(NodeEvent):
+ def __init__(self, anchor, tag, value, start_marker, end_marker):
+ self.anchor = anchor
+ self.tag = tag
+ self.value = value
+ self.start_marker = start_marker
+ self.end_marker = end_marker
+
+class CollectionEvent(NodeEvent):
+ pass
+
+class SequenceEvent(CollectionEvent):
+ pass
+
+class MappingEvent(CollectionEvent):
+ pass
+
+class CollectionEndEvent(Event):
+ pass
+
+class StreamEndEvent(Event):
+ pass
+
diff --git a/lib/yaml/parser.py b/lib/yaml/parser.py
index b7c5aa7..858d906 100644
--- a/lib/yaml/parser.py
+++ b/lib/yaml/parser.py
@@ -1,6 +1,8 @@
-# Production rules:
-# stream ::= implicit_document? explicit_document* END
+# YAML can be parsed by an LL(1) parser!
+#
+# We use the following production rules:
+# stream ::= implicit_document? explicit_document* STREAM-END
# explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END?
# implicit_document ::= block_node DOCUMENT-END?
# block_node ::= ALIAS | properties? block_content
@@ -9,20 +11,37 @@
# block_content ::= block_collection | flow_collection | SCALAR
# flow_content ::= flow_collection | SCALAR
# block_collection ::= block_sequence | block_mapping
-# block_sequence ::= BLOCK-SEQUENCE-START (ENTRY block_node?)* BLOCK-END
+# block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END
# block_mapping ::= BLOCK-MAPPING_START ((KEY block_node_or_indentless_sequence?)? (VALUE block_node_or_indentless_sequence?)?)* BLOCK-END
# block_node_or_indentless_sequence ::= ALIAS | properties? (block_content | indentless_block_sequence)
-# indentless_block_sequence ::= (ENTRY block_node?)+
+# indentless_block_sequence ::= (BLOCK-ENTRY block_node?)+
# flow_collection ::= flow_sequence | flow_mapping
-# flow_sequence ::= FLOW-SEQUENCE-START (flow_sequence_entry ENTRY)* flow_sequence_entry? FLOW-SEQUENCE-END
-# flow_mapping ::= FLOW-MAPPING-START flow_mapping_entry ENTRY)* flow_mapping_entry? FLOW-MAPPING-END
-# flow_sequence_entry ::= flow_node | KEY flow_node (VALUE flow_node?)?
-# flow_mapping_entry ::= flow_node | KEY flow_node (VALUE flow_node?)?
+# flow_sequence ::= FLOW-SEQUENCE-START (flow_sequence_entry FLOW-ENTRY)* flow_sequence_entry? FLOW-SEQUENCE-END
+# flow_mapping ::= FLOW-MAPPING-START (flow_mapping_entry FLOW-ENTRY)* flow_mapping_entry? FLOW-MAPPING-END
+# flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
+# flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
+#
+# Note that there is a slight deviation from the specification. We require a
+# non-empty node content if ANCHOR or TAG is specified. This disallow such
+# documents as
+#
+# key: !!str # empty value
+#
+# This is done to prevent ambiguity in parsing tags and aliases:
+#
+# { !!perl/YAML::Parser: value }
+#
+# What is it? Should it be interpreted as
+# { ? !<tag:yaml.org,2002:perl/YAML::Parser> '' : value }
+# or
+# { ? !<tag:yaml.org,2002:perl/YAML::Parser:> value : '' }
+# Since we disallow non-empty node content, tags are always followed by spaces
+# or line breaks.
-# FIRST(rule) sets:
-# stream: {}
+# FIRST sets:
+# stream: FIRST(block_node) + { DIRECTIVE DOCUMENT-START }
# explicit_document: { DIRECTIVE DOCUMENT-START }
-# implicit_document: block_node
+# implicit_document: FIRST(block_node)
# block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START }
# flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START }
# block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
@@ -31,7 +50,7 @@
# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
# block_sequence: { BLOCK-SEQUENCE-START }
# block_mapping: { BLOCK-MAPPING-START }
-# block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START ENTRY }
+# block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START BLOCK-ENTRY }
# indentless_sequence: { ENTRY }
# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
# flow_sequence: { FLOW-SEQUENCE-START }
@@ -41,78 +60,131 @@
from error import YAMLError
from tokens import *
+from events import *
class ParserError(YAMLError):
- pass
-class Node:
- def __repr__(self):
- args = []
- for attribute in ['anchor', 'tag', 'value']:
- if hasattr(self, attribute):
- args.append(repr(getattr(self, attribute)))
- return "%s(%s)" % (self.__class__.__name__, ', '.join(args))
+ def __init__(self, context=None, context_marker=None,
+ problem=None, problem_marker=None):
+ self.context = context
+ self.context_marker = context_marker
+ self.problem = problem
+ self.problem_marker = problem_marker
-class AliasNode(Node):
- def __init__(self, anchor):
- self.anchor = anchor
-
-class ScalarNode(Node):
- def __init__(self, anchor, tag, value):
- self.anchor = anchor
- self.tag = tag
- self.value = value
-
-class SequenceNode(Node):
- def __init__(self, anchor, tag, value):
- self.anchor = anchor
- self.tag = tag
- self.value = value
-
-class MappingNode(Node):
- def __init__(self, anchor, tag, value):
- self.anchor = anchor
- self.tag = tag
- self.value = value
+ def __str__(self):
+ lines = []
+ for (place, marker) in [(self.context, self.context_marker),
+ (self.problem, self.problem_marker)]:
+ if place is not None:
+ lines.append(place)
+ if marker is not None:
+ lines.append(str(marker))
+ return '\n'.join(lines)
class Parser:
+ # Since writing an LL(1) parser is a straightforward task, we do not give
+ # many comments here.
+ # Note that we use Python generators. If you rewrite the parser to another
+ # language, you may replace all 'yield'-s with event handler calls.
+
+ DEFAULT_TAGS = {
+ u'!': u'!',
+ u'!!': u'tag:yaml.org,2002:',
+ }
def __init__(self, scanner):
self.scanner = scanner
+ self.current_event = None
+ self.yaml_version = None
+ self.tag_handles = {}
+ self.event_generator = self.parse_stream()
- def is_token(self, *choices):
- token = self.scanner.peek_token()
- for choice in choices:
- if isinstance(token, choices):
- return True
+ def check(self, *choices):
+ # Check the type of the next event.
+ if self.current_event is None:
+ try:
+ self.current_event = self.event_generator.next()
+ except StopIteration:
+ pass
+ if self.current_event is not None:
+ for choice in choices:
+ if isinstance(self.current_event, choice):
+ return True
return False
- def get_token(self):
- return self.scanner.get_token()
+ def get(self):
+ # Get the next event.
+ if self.current_event is None:
+ try:
+ self.current_event = self.event_generator.next()
+ except StopIteration:
+ pass
+ value = self.current_event
+ self.current_event = None
+ return value
- def parse(self):
- return self.parse_stream()
+ def __iter__(self):
+ # Iterator protocol.
+ return self.event_generator
def parse_stream(self):
- documents = []
- if not self.is_token(DirectiveToken, DocumentStartToken, StreamEndToken):
- documents.append(self.parse_block_node())
- while not self.is_token(StreamEndToken):
- while self.is_token(DirectiveToken):
- self.get_token()
- if not self.is_token(DocumentStartToken):
- self.fail('DOCUMENT-START is expected')
- self.get_token()
- if self.is_token(DirectiveToken,
+ # implicit_document? explicit_document* STREAM-END
+
+ # Parse implicit document.
+ if not self.scanner.check(DirectiveToken, DocumentStartToken,
+ StreamEndToken):
+ self.tag_handles = self.DEFAULT_TAGS
+ for event in self.parse_block_node():
+ yield event
+
+ # Parse explicit documents.
+ while not self.scanner.check(StreamEndToken):
+ self.process_directives()
+ if not self.scanner.check(DocumentStartToken):
+ raise ParserError(None, None,
+ "expected '<document start>', but found %r"
+ % self.scanner.peek().id,
+ self.scanner.peek().start_marker)
+ token = self.scanner.get()
+ if self.scanner.check(DirectiveToken,
DocumentStartToken, DocumentEndToken, StreamEndToken):
- documents.append(None)
+ yield self.process_empty_scalar(token.end_marker)
else:
- documents.append(self.parse_block_node())
- while self.is_token(DocumentEndToken):
- self.get_token()
- if not self.is_token(StreamEndToken):
- self.fail("STREAM-END is expected")
- return documents
+ for event in self.parse_block_node():
+ yield event
+ while self.scanner.check(DocumentEndToken):
+ self.scanner.get()
+
+ # Parse end of stream.
+ token = self.scanner.get()
+ yield StreamEndEvent(token.start_marker, token.end_marker)
+
+ def process_directives(self):
+ # DIRECTIVE*
+ self.yaml_version = None
+ self.tag_handles = {}
+ while self.scanner.check(DirectiveToken):
+ token = self.scanner.get()
+ if token.name == u'YAML':
+ if self.yaml_version is not None:
+ raise ParserError(None, None,
+ "found duplicate YAML directive", token.start_marker())
+ major, minor = token.value
+ if major != 1:
+ raise ParserError(None, None,
+ "found incompatible YAML document (version 1.* is required)",
+ token.start_marker())
+ self.yaml_version = token.value
+ elif token.name == u'TAG':
+ handle, prefix = token.value
+ if handle in self.tag_handles:
+ raise ParserError(None, None,
+ "duplicate tag handle %r" % handle.encode('utf-8'),
+ token.start_marker())
+ self.tag_handles[handle] = prefix
+ for key in self.DEFAULT_TAGS:
+ if key not in self.tag_handles:
+ self.tag_handles[key] = self.DEFAULT_TAGS[key]
def parse_block_node(self):
return self.parse_node(block=True)
@@ -124,165 +196,254 @@ class Parser:
return self.parse_node(block=True, indentless_sequence=True)
def parse_node(self, block=False, indentless_sequence=False):
- if self.is_token(AliasToken):
- token = self.get_token()
- return AliasNode(token.value)
- anchor = None
- tag = None
- if self.is_token(AnchorToken):
- anchor = self.get_token().value
- if self.is_token(TagToken):
- tag = self.get_token().value
- elif self.is_token(TagToken):
- tag = self.get_token().value
- if self.is_token(AnchorToken):
- anchor = self.get_token().value
- if indentless_sequence and self.is_token(EntryToken):
- NodeClass = SequenceNode
- value = self.parse_indentless_sequence()
+ # block_node ::= ALIAS | properties? block_content
+ # flow_node ::= ALIAS | properties? flow_content
+ # properties ::= TAG ANCHOR? | ANCHOR TAG?
+ # block_content ::= block_collection | flow_collection | SCALAR
+ # flow_content ::= flow_collection | SCALAR
+ # block_collection ::= block_sequence | block_mapping
+ # block_node_or_indentless_sequence ::= ALIAS | properties?
+ # (block_content | indentless_block_sequence)
+ if self.scanner.check(AliasToken):
+ token = self.scanner.get()
+ yield AliasEvent(token.value, token.start_marker, token.end_marker)
else:
- if self.is_token(ScalarToken):
- NodeClass = ScalarNode
- elif self.is_token(BlockSequenceStartToken, FlowSequenceStartToken):
- NodeClass = SequenceNode
- elif self.is_token(BlockMappingStartToken, FlowMappingStartToken):
- NodeClass = MappingNode
- if block:
- value = self.parse_block_content()
+ anchor = None
+ tag = None
+ start_marker = end_marker = tag_marker = None
+ if self.scanner.check(AnchorToken):
+ token = self.scanner.get()
+ start_marker = end_marker = token.start_marker
+ anchor = token.value
+ if self.scanner.check(TagToken):
+ token = self.scanner.get()
+ end_marker = tag_marker = token.start_marker
+ tag = token.value
+ elif self.scanner.check(TagToken):
+ token = self.scanner.get()
+ start_marker = end_marker = tag_marker = token.start_marker
+ tag = token.value
+ if self.scanner.check(AnchorToken):
+ token = self.scanner.get()
+ end_marker = token.start_marker
+ anchor = token.value
+ if tag is not None:
+ handle, suffix = tag
+ if handle is not None:
+ if handle not in self.tag_handles:
+ raise ParserError("while parsing a node", start_marker,
+ "found undefined tag handle %r" % handle.encode('utf-8'),
+ tag_marker)
+ tag = self.tag_handles[handle]+suffix
+ else:
+ tag = suffix
+ if tag is None:
+ if not (self.scanner.check(ScalarToken) and
+ self.scanner.peek().plain):
+ tag = u'!'
+ if start_marker is None:
+ start_marker = self.scanner.peek().start_marker
+ event = None
+ collection_events = None
+ if indentless_sequence and self.scanner.check(BlockEntryToken):
+ end_marker = self.scanner.peek().end_marker
+ event = SequenceEvent(anchor, tag, start_marker, end_marker)
+ collection_events = self.parse_indentless_sequence()
else:
- value = self.parse_flow_content()
- return NodeClass(anchor, tag, value)
-
- def parse_block_content(self):
- if self.is_token(ScalarToken):
- return self.get_token().value
- elif self.is_token(BlockSequenceStartToken):
- return self.parse_block_sequence()
- elif self.is_token(BlockMappingStartToken):
- return self.parse_block_mapping()
- elif self.is_token(FlowSequenceStartToken):
- return self.parse_flow_sequence()
- elif self.is_token(FlowMappingStartToken):
- return self.parse_flow_mapping()
- else:
- self.fail('block content is expected')
-
- def parse_flow_content(self):
- if self.is_token(ScalarToken):
- return self.get_token().value
- elif self.is_token(FlowSequenceStartToken):
- return self.parse_flow_sequence()
- elif self.is_token(FlowMappingStartToken):
- return self.parse_flow_mapping()
- else:
- self.fail('flow content is expected')
+ if self.scanner.check(ScalarToken):
+ token = self.scanner.get()
+ end_marker = token.end_marker
+ event = ScalarEvent(anchor, tag, token.value,
+ start_marker, end_marker)
+ elif self.scanner.check(FlowSequenceStartToken):
+ end_marker = self.scanner.peek().end_marker
+ event = SequenceEvent(anchor, tag, start_marker, end_marker)
+ collection_events = self.parse_flow_sequence()
+ elif self.scanner.check(FlowMappingStartToken):
+ end_marker = self.scanner.peek().end_marker
+ event = MappingEvent(anchor, tag, start_marker, end_marker)
+ collection_events = self.parse_flow_mapping()
+ elif block and self.scanner.check(BlockSequenceStartToken):
+ end_marker = self.scanner.peek().start_marker
+ event = SequenceEvent(anchor, tag, start_marker, end_marker)
+ collection_events = self.parse_block_sequence()
+ elif block and self.scanner.check(BlockMappingStartToken):
+ end_marker = self.scanner.peek().start_marker
+ event = MappingEvent(anchor, tag, start_marker, end_marker)
+ collection_events = self.parse_block_mapping()
+ else:
+ if block:
+ node = 'block'
+ else:
+ node = 'flow'
+ token = self.scanner.peek()
+ raise ParserError("while scanning a %s node" % node, start_marker,
+ "expected the node content, but found %r" % token.id,
+ token.start_marker)
+ yield event
+ if collection_events is not None:
+ for event in collection_events:
+ yield event
def parse_block_sequence(self):
- sequence = []
- if not self.is_token(BlockSequenceStartToken):
- self.fail('BLOCK-SEQUENCE-START is expected')
- self.get_token()
- while self.is_token(EntryToken):
- self.get_token()
- if not self.is_token(EntryToken, BlockEndToken):
- sequence.append(self.parse_block_node())
+ # BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END
+ token = self.scanner.get()
+ start_marker = token.start_marker
+ while self.scanner.check(BlockEntryToken):
+ token = self.scanner.get()
+ if not self.scanner.check(BlockEntryToken, BlockEndToken):
+ for event in self.parse_block_node():
+ yield event
else:
- sequence.append(None)
- if not self.is_token(BlockEndToken):
- self.fail('BLOCK-END is expected')
- self.get_token()
- return sequence
+ yield self.process_empty_scalar(token.end_marker)
+ if not self.scanner.check(BlockEndToken):
+ token = self.scanner.peek()
+ raise ParserError("while scanning a block collection", start_marker,
+ "expected <block end>, but found %r" % token.id, token.start_marker)
+ token = self.scanner.get()
+ yield CollectionEndEvent(token.start_marker, token.end_marker)
def parse_indentless_sequence(self):
- sequence = []
- while self.is_token(EntryToken):
- self.get_token()
- if not self.is_token(EntryToken):
- sequence.append(self.parse_block_node())
+ # (BLOCK-ENTRY block_node?)+
+ while self.scanner.check(BlockEntryToken):
+ token = self.scanner.get()
+ if not self.scanner.check(BlockEntryToken,
+ KeyToken, ValueToken, BlockEndToken):
+ for event in self.parse_block_node():
+ yield event
else:
- sequence.append(None)
- return sequence
+ yield self.process_empty_scalar(token.end_marker)
+ token = self.scanner.peek()
+ yield CollectionEndEvent(token.start_marker, token.start_marker)
def parse_block_mapping(self):
- mapping = []
- if not self.is_token(BlockMappingStartToken):
- self.fail('BLOCK-MAPPING-START is expected')
- self.get_token()
- while self.is_token(KeyToken, ValueToken):
- key = None
- value = None
- if self.is_token(KeyToken):
- self.get_token()
- if not self.is_token(KeyToken, ValueToken, BlockEndToken):
- key = self.parse_block_node_or_indentless_sequence()
- if self.is_token(ValueToken):
- self.get_token()
- if not self.is_token(KeyToken, ValueToken, BlockEndToken):
- value = self.parse_block_node_or_indentless_sequence()
- mapping.append((key, value))
- if not self.is_token(BlockEndToken):
- self.fail('BLOCK-END is expected')
- self.get_token()
- return mapping
+ # BLOCK-MAPPING_START
+ # ((KEY block_node_or_indentless_sequence?)?
+ # (VALUE block_node_or_indentless_sequence?)?)*
+ # BLOCK-END
+ token = self.scanner.get()
+ start_marker = token.start_marker
+ while self.scanner.check(KeyToken, ValueToken):
+ if self.scanner.check(KeyToken):
+ token = self.scanner.get()
+ if not self.scanner.check(KeyToken, ValueToken, BlockEndToken):
+ for event in self.parse_block_node_or_indentless_sequence():
+ yield event
+ else:
+ yield self.process_empty_scalar(token.end_marker)
+ if self.scanner.check(ValueToken):
+ token = self.scanner.get()
+ if not self.scanner.check(KeyToken, ValueToken, BlockEndToken):
+ for event in self.parse_block_node_or_indentless_sequence():
+ yield event
+ else:
+ yield self.process_empty_scalar(token.end_marker)
+ else:
+ token = self.scanner.peek()
+ yield self.process_empty_scalar(token.start_marker)
+ if not self.scanner.check(BlockEndToken):
+ token = self.scanner.peek()
+ raise ParserError("while scanning a block mapping", start_marker,
+ "expected <block end>, but found %r" % token.id, token.start_marker)
+ token = self.scanner.get()
+ yield CollectionEndEvent(token.start_marker, token.end_marker)
def parse_flow_sequence(self):
- sequence = []
- if not self.is_token(FlowSequenceStartToken):
- self.fail('FLOW-SEQUENCE-START is expected')
- self.get_token()
- while not self.is_token(FlowSequenceEndToken):
- if self.is_token(KeyToken):
- self.get_token()
- key = None
- value = None
- if not self.is_token(ValueToken):
- key = self.parse_flow_node()
- if self.is_token(ValueToken):
- self.get_token()
- if not self.is_token(EntryToken, FlowSequenceEndToken):
- value = self.parse_flow_node()
- node = MappingNode(None, None, [(key, value)])
- sequence.append(node)
+ # flow_sequence ::= FLOW-SEQUENCE-START
+ # (flow_sequence_entry FLOW-ENTRY)*
+ # flow_sequence_entry?
+ # FLOW-SEQUENCE-END
+ # flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
+ #
+ # Note that while production rules for both flow_sequence_entry and
+ # flow_mapping_entry are equal, their interpretations are different.
+ # For `flow_sequence_entry`, the part `KEY flow_node? (VALUE flow_node?)?`
+ # generate an inline mapping (set syntax).
+ token = self.scanner.get()
+ start_marker = token.start_marker
+ while not self.scanner.check(FlowSequenceEndToken):
+ if self.scanner.check(KeyToken):
+ token = self.scanner.get()
+ yield MappingEvent(None, u'!',
+ token.start_marker, token.end_marker)
+ if not self.scanner.check(ValueToken,
+ FlowEntryToken, FlowSequenceEndToken):
+ for event in self.parse_flow_node():
+ yield event
+ else:
+ yield self.process_empty_scalar(token.end_marker)
+ if self.scanner.check(ValueToken):
+ token = self.scanner.get()
+ if not self.scanner.check(FlowEntryToken, FlowSequenceEndToken):
+ for event in self.parse_flow_node():
+ yield event
+ else:
+ yield self.process_empty_scalar(token.end_marker)
+ else:
+ token = self.scanner.peek()
+ yield self.process_empty_scalar(token.start_marker)
+ token = self.scanner.peek()
+ yield CollectionEndEvent(token.start_marker, token.start_marker)
else:
- sequence.append(self.parse_flow_node())
- if not self.is_token(EntryToken, FlowSequenceEndToken):
- self.fail("ENTRY or FLOW-SEQUENCE-END are expected")
- if self.is_token(EntryToken):
- self.get_token()
- if not self.is_token(FlowSequenceEndToken):
- self.fail('FLOW-SEQUENCE-END is expected')
- self.get_token()
- return sequence
+ for event in self.parse_flow_node():
+ yield event
+ if not self.scanner.check(FlowEntryToken, FlowSequenceEndToken):
+ token = self.scanner.peek()
+ raise ParserError("while scanning a flow sequence", start_marker,
+ "expected ',' or ']', but got %r" % token.id, token.start_marker)
+ if self.scanner.check(FlowEntryToken):
+ self.scanner.get()
+ if not self.scanner.check(FlowSequenceEndToken):
+ token = self.scanner.peek()
+ raise ParserError("while scanning a flow sequence", start_marker,
+ "expected ']', but found %r" % token.id, token.start_marker)
+ token = self.scanner.get()
+ yield CollectionEndEvent(token.start_marker, token.end_marker)
def parse_flow_mapping(self):
- mapping = []
- if not self.is_token(FlowMappingStartToken):
- self.fail('FLOW-MAPPING-START is expected')
- self.get_token()
- while not self.is_token(FlowMappingEndToken):
- if self.is_token(KeyToken):
- self.get_token()
- key = None
- value = None
- if not self.is_token(ValueToken):
- key = self.parse_flow_node()
- if self.is_token(ValueToken):
- self.get_token()
- if not self.is_token(EntryToken, FlowMappingEndToken):
- value = self.parse_flow_node()
- mapping.append((key, value))
+ # flow_mapping ::= FLOW-MAPPING-START
+ # (flow_mapping_entry FLOW-ENTRY)*
+ # flow_mapping_entry?
+ # FLOW-MAPPING-END
+ # flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
+ token = self.scanner.get()
+ start_marker = token.start_marker
+ while not self.scanner.check(FlowMappingEndToken):
+ if self.scanner.check(KeyToken):
+ token = self.scanner.get()
+ if not self.scanner.check(ValueToken,
+ FlowEntryToken, FlowMappingEndToken):
+ for event in self.parse_flow_node():
+ yield event
+ else:
+ yield self.process_empty_scalar(token.end_marker)
+ if self.scanner.check(ValueToken):
+ token = self.scanner.get()
+ if not self.scanner.check(FlowEntryToken, FlowMappingEndToken):
+ for event in self.parse_flow_node():
+ yield event
+ else:
+ yield self.process_empty_scalar(token.end_marker)
+ else:
+ token = self.scanner.peek()
+ yield self.process_empty_scalar(token.start_marker)
else:
- mapping.append((self.parse_flow_node(), None))
- if not self.is_token(EntryToken, FlowMappingEndToken):
- self.fail("ENTRY or FLOW-MAPPING-END are expected")
- if self.is_token(EntryToken):
- self.get_token()
- if not self.is_token(FlowMappingEndToken):
- self.fail('FLOW-MAPPING-END is expected')
- self.get_token()
- return mapping
+ for event in self.parse_flow_node():
+ yield event
+ yield self.process_empty_scalar(self.scanner.peek().start_marker)
+ if not self.scanner.check(FlowEntryToken, FlowMappingEndToken):
+ token = self.scanner.peek()
+ raise ParserError("while scanning a flow mapping", start_marker,
+ "expected ',' or '}', but got %r" % token.id, token.start_marker)
+ if self.scanner.check(FlowEntryToken):
+ self.scanner.get()
+ if not self.scanner.check(FlowMappingEndToken):
+ token = self.scanner.peek()
+ raise ParserError("while scanning a flow mapping", start_marker,
+ "expected '}', but found %r" % token.id, token.start_marker)
+ token = self.scanner.get()
+ yield CollectionEndEvent(token.start_marker, token.end_marker)
- def fail(self, message):
- marker = self.scanner.peek_token().start_marker
- raise ParserError(message+':\n'+marker.get_snippet())
+ def process_empty_scalar(self, marker):
+ return ScalarEvent(None, None, u'', marker, marker)
diff --git a/lib/yaml/scanner.py b/lib/yaml/scanner.py
index c83a551..220a99b 100644
--- a/lib/yaml/scanner.py
+++ b/lib/yaml/scanner.py
@@ -14,7 +14,6 @@ from error import YAMLError
from tokens import *
class ScannerError(YAMLError):
- # TODO:
# ScannerError: while reading a quoted string
# in '...', line 5, column 10:
# key: "valu\?e"
@@ -23,6 +22,7 @@ class ScannerError(YAMLError):
# in '...', line 5, column 15:
# key: "valu\?e"
# ^
+
def __init__(self, context=None, context_marker=None,
problem=None, problem_marker=None):
self.context = context
@@ -41,6 +41,8 @@ class ScannerError(YAMLError):
return '\n'.join(lines)
class SimpleKey:
+ # See below simple keys treatment.
+
def __init__(self, token_number, required, index, line, column, marker):
self.token_number = token_number
self.required = required
@@ -114,23 +116,43 @@ class Scanner:
# '[', or '{' tokens.
self.possible_simple_keys = {}
- # Two public methods.
+ # Public methods.
+
+ def check(self, *choices):
+ # Check if the next token is one of the given types.
+ while self.need_more_tokens():
+ self.fetch_more_tokens()
+ if self.tokens:
+ for choice in choices:
+ if isinstance(self.tokens[0], choice):
+ return True
+ return False
- def peek_token(self):
- """Get the current token."""
+ def peek(self):
+ # Return the next token, but do not delete if from the queue.
while self.need_more_tokens():
self.fetch_more_tokens()
if self.tokens:
return self.tokens[0]
- def get_token(self):
- "Get the current token and remove it from the list of pending tokens."""
+ def get(self):
+ # Return the next token.
while self.need_more_tokens():
self.fetch_more_tokens()
if self.tokens:
self.tokens_taken += 1
return self.tokens.pop(0)
+ def __iter__(self):
+ # Iterator protocol.
+ while self.need_more_tokens():
+ self.fetch_more_tokens()
+ while self.tokens:
+ self.tokens_taken += 1
+ yield self.tokens.pop(0)
+ while self.need_more_tokens():
+ self.fetch_more_tokens()
+
# Private methods.
def need_more_tokens(self):
@@ -163,10 +185,6 @@ class Scanner:
if ch == u'\0':
return self.fetch_stream_end()
- # Is it the byte order mark?
- if ch == u'\uFEFF':
- return self.fetch_bom()
-
# Is it a directive?
if ch == u'%' and self.check_directive():
return self.fetch_directive()
@@ -197,9 +215,13 @@ class Scanner:
if ch == u'}':
return self.fetch_flow_mapping_end()
- # Is it the entry indicator?
- if ch in u'-,' and self.check_entry():
- return self.fetch_entry()
+ # Is it the flow entry indicator?
+ if ch in u',':
+ return self.fetch_flow_entry()
+
+ # Is it the block entry indicator?
+ if ch in u'-' and self.check_block_entry():
+ return self.fetch_block_entry()
# Is it the key indicator?
if ch == u'?' and self.check_key():
@@ -364,33 +386,6 @@ class Scanner:
# The reader is ended.
self.done = True
- def fetch_bom(self):
- # We consider the BOM marker as a DOCUMENT-END indicator unless it's
- # the first character in the stream. It's a reasonable approximation
- # of the specification requirements. We can follow the specification
- # literally, but it will require a new token class. Probably later.
-
- # We ignore BOM if it is the first character in the stream.
- if self.reader.index == 0:
- slef.reader.forward()
-
- # Otherwise we issue DOCUMENT-END.
- else:
-
- # Set the current intendation to -1.
- self.unwind_indent(-1)
-
- # Reset simple keys. Note that there could not be a block
- # collection after BOM.
- self.remove_possible_simple_key()
- self.allow_simple_key = False
-
- # Add DOCUMENT-END.
- start_marker = self.reader.get_marker()
- self.reader.forward()
- end_marker = self.reader.get_marker()
- self.tokens.append(DocumentEndToken(start_marker, end_marker))
-
def fetch_directive(self):
# Set the current intendation to -1.
@@ -471,7 +466,21 @@ class Scanner:
end_marker = self.reader.get_marker()
self.tokens.append(TokenClass(start_marker, end_marker))
- def fetch_entry(self):
+ def fetch_flow_entry(self):
+
+ # Simple keys are allowed after ','.
+ self.allow_simple_key = True
+
+ # Reset possible simple key on the current level.
+ self.remove_possible_simple_key()
+
+ # Add FLOW-ENTRY.
+ start_marker = self.reader.get_marker()
+ self.reader.forward()
+ end_marker = self.reader.get_marker()
+ self.tokens.append(FlowEntryToken(start_marker, end_marker))
+
+ def fetch_block_entry(self):
# Block context needs additional checks.
if not self.flow_level:
@@ -487,17 +496,22 @@ class Scanner:
marker = self.reader.get_marker()
self.tokens.append(BlockSequenceStartToken(marker, marker))
- # Simple keys are allowed after '-' and ','.
+ # It's an error for the block entry to occur in the flow context,
+ # but we let the parser detect this.
+ else:
+ pass
+
+ # Simple keys are allowed after '-'.
self.allow_simple_key = True
# Reset possible simple key on the current level.
self.remove_possible_simple_key()
- # Add ENTRY.
+ # Add BLOCK-ENTRY.
start_marker = self.reader.get_marker()
self.reader.forward()
end_marker = self.reader.get_marker()
- self.tokens.append(EntryToken(start_marker, end_marker))
+ self.tokens.append(BlockEntryToken(start_marker, end_marker))
def fetch_key(self):
@@ -681,16 +695,10 @@ class Scanner:
and self.reader.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
return True
- def check_entry(self):
+ def check_block_entry(self):
- # ENTRY(flow context): ','
- if self.flow_level:
- return self.reader.peek() == u','
-
- # ENTRY(block context): '-' (' '|'\n')
- else:
- return self.reader.peek() == u'-' \
- and self.reader.peek(1) in u'\0 \t\r\n\x85\u2028\u2029'
+ # BLOCK-ENTRY: '-' (' '|'\n')
+ return self.reader.peek(1) in u'\0 \t\r\n\x85\u2028\u2029'
def check_key(self):
@@ -737,6 +745,12 @@ class Scanner:
# We ignore spaces, line breaks and comments.
# If we find a line break in the block context, we set the flag
# `allow_simple_key` on.
+ # The byte order mark is stripped if it's the first character in the
+ # stream. We do not yet support BOM inside the stream as the
+ # specification requires. Any such mark will be considered as a part
+ # of the document.
+ if self.reader.index == 0 and self.reader.peek() == u'\uFEFF':
+ self.reader.forward()
found = False
while not found:
while self.reader.peek() == u' ':
@@ -980,25 +994,25 @@ class Scanner:
# Unfortunately, folding rules are ambiguous.
#
# This is the folding according to the specification:
+
+ if folded and line_break == u'\n' \
+ and leading_non_space and self.reader.peek() not in u' \t':
+ if not breaks:
+ chunks.append(u' ')
+ else:
+ chunks.append(line_break)
+
+ # This is Clark Evans's interpretation (also in the spec
+ # examples):
#
- #if folded and line_break == u'\n' \
- # and leading_non_space and self.reader.peek() not in u' \t':
+ #if folded and line_break == u'\n':
# if not breaks:
- # chunks.append(u' ')
+ # if self.reader.peek() not in ' \t':
+ # chunks.append(u' ')
+ # else:
+ # chunks.append(line_break)
#else:
# chunks.append(line_break)
- #
- # This is Clark Evans's interpretation (also in the spec
- # examples):
- #
- if folded and line_break == u'\n':
- if not breaks:
- if self.reader.peek() not in ' \t':
- chunks.append(u' ')
- else:
- chunks.append(line_break)
- else:
- chunks.append(line_break)
else:
break
diff --git a/lib/yaml/tokens.py b/lib/yaml/tokens.py
index 85d0b30..637ec87 100644
--- a/lib/yaml/tokens.py
+++ b/lib/yaml/tokens.py
@@ -3,9 +3,19 @@ class Token:
def __init__(self, start_marker, end_marker):
self.start_marker = start_marker
self.end_marker = end_marker
+ def __repr__(self):
+ attributes = [key for key in self.__dict__
+ if not key.endswith('_marker')]
+ attributes.sort()
+ arguments = ', '.join(['%s=%r' % (key, getattr(self, key))
+ for key in attributes])
+ return '%s(%s)' % (self.__class__.__name__, arguments)
+
+#class BOMToken(Token):
+# id = '<byte order mark>'
class DirectiveToken(Token):
- code = '<directive>'
+ id = '<directive>'
def __init__(self, name, value, start_marker, end_marker):
self.name = name
self.value = value
@@ -13,67 +23,70 @@ class DirectiveToken(Token):
self.end_marker = end_marker
class DocumentStartToken(Token):
- code = '<document start>'
+ id = '<document start>'
class DocumentEndToken(Token):
- code = '<document end>'
+ id = '<document end>'
class StreamEndToken(Token):
- code = '<stream end>'
+ id = '<stream end>'
class BlockSequenceStartToken(Token):
- code = '<block sequence start>'
+ id = '<block sequence start>'
class BlockMappingStartToken(Token):
- code = '<block mapping end>'
+ id = '<block mapping end>'
class BlockEndToken(Token):
- code = '<block end>'
+ id = '<block end>'
class FlowSequenceStartToken(Token):
- code = '['
+ id = '['
class FlowMappingStartToken(Token):
- code = '{'
+ id = '{'
class FlowSequenceEndToken(Token):
- code = ']'
+ id = ']'
class FlowMappingEndToken(Token):
- code = '}'
+ id = '}'
class KeyToken(Token):
- code = '?'
+ id = '?'
class ValueToken(Token):
- code = ':'
+ id = ':'
+
+class BlockEntryToken(Token):
+ id = '-'
-class EntryToken(Token):
- code = '- or ,'
+class FlowEntryToken(Token):
+ id = ','
class AliasToken(Token):
- code = '<alias>'
+ id = '<alias>'
def __init__(self, value, start_marker, end_marker):
self.value = value
self.start_marker = start_marker
self.end_marker = end_marker
class AnchorToken(Token):
- code = '<anchor>'
+ id = '<anchor>'
def __init__(self, value, start_marker, end_marker):
self.value = value
self.start_marker = start_marker
self.end_marker = end_marker
class TagToken(Token):
- code = '<tag>'
+ id = '<tag>'
def __init__(self, value, start_marker, end_marker):
self.value = value
self.start_marker = start_marker
self.end_marker = end_marker
class ScalarToken(Token):
- code = '<scalar>'
+ id = '<scalar>'
def __init__(self, value, plain, start_marker, end_marker):
self.value = value
self.plain = plain
diff --git a/tests/data/spec-05-08.canonical b/tests/data/spec-05-08.canonical
index fd8af6a..610bd68 100644
--- a/tests/data/spec-05-08.canonical
+++ b/tests/data/spec-05-08.canonical
@@ -1,8 +1,8 @@
%YAML 1.1
---
!!map {
- ? !!str "double"
- : !!str "text",
? !!str "single"
: !!str "text",
+ ? !!str "double"
+ : !!str "text",
}
diff --git a/tests/data/spec-06-01.canonical b/tests/data/spec-06-01.canonical
index a1e43ff..f17ec92 100644
--- a/tests/data/spec-06-01.canonical
+++ b/tests/data/spec-06-01.canonical
@@ -8,8 +8,8 @@
? !!str "Flow style"
: !!seq [
!!str "By two",
+ !!str "Also by two",
!!str "Still by two",
- !!str "Again by two",
]
}
}
diff --git a/tests/data/spec-09-20.canonical b/tests/data/spec-09-20.canonical
index 3f697e2..d03bef5 100644
--- a/tests/data/spec-09-20.canonical
+++ b/tests/data/spec-09-20.canonical
@@ -4,5 +4,5 @@
!!str "detected\n",
!!str "\n\n# detected\n",
!!str " explicit\n",
- !!str "\t detected\n",
+ !!str "\t\ndetected\n",
]
diff --git a/tests/data/spec-09-30.canonical b/tests/data/spec-09-30.canonical
index 5c32f16..fc37db1 100644
--- a/tests/data/spec-09-30.canonical
+++ b/tests/data/spec-09-30.canonical
@@ -1,7 +1,7 @@
%YAML 1.1
---
!!str "folded line\n\
- next line\n\
+ next line\n\n\
\ * bullet\n\
- \ * list\n\
+ \ * list\n\n\
last line\n"
diff --git a/tests/data/spec-09-31.canonical b/tests/data/spec-09-31.canonical
index 5c32f16..fc37db1 100644
--- a/tests/data/spec-09-31.canonical
+++ b/tests/data/spec-09-31.canonical
@@ -1,7 +1,7 @@
%YAML 1.1
---
!!str "folded line\n\
- next line\n\
+ next line\n\n\
\ * bullet\n\
- \ * list\n\
+ \ * list\n\n\
last line\n"
diff --git a/tests/data/spec-09-32.canonical b/tests/data/spec-09-32.canonical
index 5c32f16..fc37db1 100644
--- a/tests/data/spec-09-32.canonical
+++ b/tests/data/spec-09-32.canonical
@@ -1,7 +1,7 @@
%YAML 1.1
---
!!str "folded line\n\
- next line\n\
+ next line\n\n\
\ * bullet\n\
- \ * list\n\
+ \ * list\n\n\
last line\n"
diff --git a/tests/data/spec-09-33.canonical b/tests/data/spec-09-33.canonical
index 5c32f16..fc37db1 100644
--- a/tests/data/spec-09-33.canonical
+++ b/tests/data/spec-09-33.canonical
@@ -1,7 +1,7 @@
%YAML 1.1
---
!!str "folded line\n\
- next line\n\
+ next line\n\n\
\ * bullet\n\
- \ * list\n\
+ \ * list\n\n\
last line\n"
diff --git a/tests/test_appliance.py b/tests/test_appliance.py
index c471398..6879036 100644
--- a/tests/test_appliance.py
+++ b/tests/test_appliance.py
@@ -1,6 +1,9 @@
import unittest, os
+from yaml.tokens import *
+from yaml.events import *
+
class TestAppliance(unittest.TestCase):
DATA = 'tests/data'
@@ -32,96 +35,12 @@ class TestAppliance(unittest.TestCase):
setattr(cls, test_method.__name__, test_method)
add_tests = classmethod(add_tests)
-class Node:
- def __repr__(self):
- args = []
- for attribute in ['anchor', 'tag', 'value']:
- if hasattr(self, attribute):
- args.append(repr(getattr(self, attribute)))
- return "%s(%s)" % (self.__class__.__name__, ', '.join(args))
-
-class AliasNode(Node):
- def __init__(self, anchor):
- self.anchor = anchor
-
-class ScalarNode(Node):
- def __init__(self, anchor, tag, value):
- self.anchor = anchor
- self.tag = tag
- self.value = value
-
-class SequenceNode(Node):
- def __init__(self, anchor, tag, value):
- self.anchor = anchor
- self.tag = tag
- self.value = value
-
-class MappingNode(Node):
- def __init__(self, anchor, tag, value):
- self.anchor = anchor
- self.tag = tag
- self.value = value
-
-class Token:
- def __repr__(self):
- args = []
- if hasattr(self, 'value'):
- args.append(repr(self.value))
- return "%s(%s)" % (self.__class__.__name__, ''.join(args))
-
-class StreamEndToken(Token):
- pass
-
-class DirectiveToken(Token):
- pass
-
-class DocumentStartToken(Token):
- pass
-
-class SequenceStartToken(Token):
- pass
-
-class MappingStartToken(Token):
- pass
-
-class SequenceEndToken(Token):
- pass
-
-class MappingEndToken(Token):
- pass
-
-class KeyToken(Token):
- pass
-
-class ValueToken(Token):
- pass
-
-class EntryToken(Token):
- pass
-
-class AliasToken(Token):
- def __init__(self, value):
- self.value = value
-
-class AnchorToken(Token):
- def __init__(self, value):
- self.value = value
-
-class TagToken(Token):
- def __init__(self, value):
- self.value = value
-
-class ScalarToken(Token):
- def __init__(self, value):
- self.value = value
-
class Error(Exception):
pass
class CanonicalScanner:
- def __init__(self, source, data):
- self.source = source
+ def __init__(self, data):
self.data = unicode(data, 'utf-8')+u'\0'
self.index = 0
@@ -132,34 +51,34 @@ class CanonicalScanner:
self.find_token()
ch = self.data[self.index]
if ch == u'\0':
- tokens.append(StreamEndToken())
+ tokens.append(StreamEndToken(None, None))
break
elif ch == u'%':
tokens.append(self.scan_directive())
elif ch == u'-' and self.data[self.index:self.index+3] == u'---':
self.index += 3
- tokens.append(DocumentStartToken())
+ tokens.append(DocumentStartToken(None, None))
elif ch == u'[':
self.index += 1
- tokens.append(SequenceStartToken())
+ tokens.append(FlowSequenceStartToken(None, None))
elif ch == u'{':
self.index += 1
- tokens.append(MappingStartToken())
+ tokens.append(FlowMappingStartToken(None, None))
elif ch == u']':
self.index += 1
- tokens.append(SequenceEndToken())
+ tokens.append(FlowSequenceEndToken(None, None))
elif ch == u'}':
self.index += 1
- tokens.append(MappingEndToken())
+ tokens.append(FlowMappingEndToken(None, None))
elif ch == u'?':
self.index += 1
- tokens.append(KeyToken())
+ tokens.append(KeyToken(None, None))
elif ch == u':':
self.index += 1
- tokens.append(ValueToken())
+ tokens.append(ValueToken(None, None))
elif ch == u',':
self.index += 1
- tokens.append(EntryToken())
+ tokens.append(FlowEntryToken(None, None))
elif ch == u'*' or ch == u'&':
tokens.append(self.scan_alias())
elif ch == u'!':
@@ -176,7 +95,7 @@ class CanonicalScanner:
if self.data[self.index:self.index+len(self.DIRECTIVE)] == self.DIRECTIVE and \
self.data[self.index+len(self.DIRECTIVE)] in u' \n\0':
self.index += len(self.DIRECTIVE)
- return DirectiveToken()
+ return DirectiveToken('YAML', (1, 1), None, None)
def scan_alias(self):
if self.data[self.index] == u'*':
@@ -188,7 +107,7 @@ class CanonicalScanner:
while self.data[self.index] not in u', \n\0':
self.index += 1
value = self.data[start:self.index]
- return TokenClass(value)
+ return TokenClass(value, None, None)
def scan_tag(self):
self.index += 1
@@ -198,9 +117,11 @@ class CanonicalScanner:
value = self.data[start:self.index]
if value[0] == u'!':
value = 'tag:yaml.org,2002:'+value[1:]
- else:
+ elif value[0] == u'<' and value[-1] == u'>':
value = value[1:-1]
- return TagToken(value)
+ else:
+ value = u'!'+value
+ return TagToken(value, None, None)
QUOTE_CODES = {
'x': 2,
@@ -264,7 +185,7 @@ class CanonicalScanner:
self.index += 1
chunks.append(self.data[start:self.index])
self.index += 1
- return ScalarToken(u''.join(chunks))
+ return ScalarToken(u''.join(chunks), False, None, None)
def find_token(self):
found = False
@@ -281,83 +202,79 @@ class CanonicalScanner:
class CanonicalParser:
- def __init__(self, source, data):
- self.scanner = CanonicalScanner(source, data)
+ def __init__(self, data):
+ self.scanner = CanonicalScanner(data)
+ self.events = []
# stream: document* END
def parse_stream(self):
- documents = []
while not self.test_token(StreamEndToken):
if self.test_token(DirectiveToken, DocumentStartToken):
- documents.append(self.parse_document())
+ self.parse_document()
else:
raise Error("document is expected, got "+repr(self.tokens[self.index]))
- return documents
+ self.events.append(StreamEndEvent(None, None))
- # document: DIRECTIVE? DOCUMENT-START node?
+ # document: DIRECTIVE? DOCUMENT-START node
def parse_document(self):
node = None
if self.test_token(DirectiveToken):
self.consume_token(DirectiveToken)
self.consume_token(DocumentStartToken)
- if self.test_token(TagToken, AliasToken, AnchorToken, TagToken,
- SequenceStartToken, MappingStartToken, ScalarToken):
- node = self.parse_node()
- return node
+ self.parse_node()
# node: ALIAS | ANCHOR? TAG? (SCALAR|sequence|mapping)
def parse_node(self):
if self.test_token(AliasToken):
- return AliasNode(self.get_value())
+ self.events.append(AliasEvent(self.get_value(), None, None))
else:
anchor = None
if self.test_token(AnchorToken):
anchor = self.get_value()
- tag = None
+ tag = u'!'
if self.test_token(TagToken):
tag = self.get_value()
if self.test_token(ScalarToken):
- return ScalarNode(anchor, tag, self.get_value())
- elif self.test_token(SequenceStartToken):
- return SequenceNode(anchor, tag, self.parse_sequence())
- elif self.test_token(MappingStartToken):
- return MappingNode(anchor, tag, self.parse_mapping())
+ self.events.append(ScalarEvent(anchor, tag, self.get_value(), None, None))
+ elif self.test_token(FlowSequenceStartToken):
+ self.events.append(SequenceEvent(anchor, tag, None, None))
+ self.parse_sequence()
+ elif self.test_token(FlowMappingStartToken):
+ self.events.append(MappingEvent(anchor, tag, None, None))
+ self.parse_mapping()
else:
raise Error("SCALAR, '[', or '{' is expected, got "+repr(self.tokens[self.index]))
# sequence: SEQUENCE-START (node (ENTRY node)*)? ENTRY? SEQUENCE-END
def parse_sequence(self):
- values = []
- self.consume_token(SequenceStartToken)
- if not self.test_token(SequenceEndToken):
- values.append(self.parse_node())
- while not self.test_token(SequenceEndToken):
- self.consume_token(EntryToken)
- if not self.test_token(SequenceEndToken):
- values.append(self.parse_node())
- self.consume_token(SequenceEndToken)
- return values
+ self.consume_token(FlowSequenceStartToken)
+ if not self.test_token(FlowSequenceEndToken):
+ self.parse_node()
+ while not self.test_token(FlowSequenceEndToken):
+ self.consume_token(FlowEntryToken)
+ if not self.test_token(FlowSequenceEndToken):
+ self.parse_node()
+ self.consume_token(FlowSequenceEndToken)
+ self.events.append(CollectionEndEvent(None, None))
# mapping: MAPPING-START (map_entry (ENTRY map_entry)*)? ENTRY? MAPPING-END
def parse_mapping(self):
- values = []
- self.consume_token(MappingStartToken)
- if not self.test_token(MappingEndToken):
- values.append(self.parse_map_entry())
- while not self.test_token(MappingEndToken):
- self.consume_token(EntryToken)
- if not self.test_token(MappingEndToken):
- values.append(self.parse_map_entry())
- self.consume_token(MappingEndToken)
- return values
+ self.consume_token(FlowMappingStartToken)
+ if not self.test_token(FlowMappingEndToken):
+ self.parse_map_entry()
+ while not self.test_token(FlowMappingEndToken):
+ self.consume_token(FlowEntryToken)
+ if not self.test_token(FlowMappingEndToken):
+ self.parse_map_entry()
+ self.consume_token(FlowMappingEndToken)
+ self.events.append(CollectionEndEvent(None, None))
# map_entry: KEY node VALUE node
def parse_map_entry(self):
self.consume_token(KeyToken)
- key = self.parse_node()
+ self.parse_node()
self.consume_token(ValueToken)
- value = self.parse_node()
- return (key, value)
+ self.parse_node()
def test_token(self, *choices):
for choice in choices:
@@ -378,5 +295,6 @@ class CanonicalParser:
def parse(self):
self.tokens = self.scanner.scan()
self.index = 0
- return self.parse_stream()
+ self.parse_stream()
+ return self.events
diff --git a/tests/test_canonical.py b/tests/test_canonical.py
index add1f8e..7fa85dc 100644
--- a/tests/test_canonical.py
+++ b/tests/test_canonical.py
@@ -5,16 +5,17 @@ class TestCanonicalAppliance(test_appliance.TestAppliance):
def _testCanonicalScanner(self, test_name, canonical_filename):
data = file(canonical_filename, 'rb').read()
- scanner = test_appliance.CanonicalScanner(canonical_filename, data)
+ scanner = test_appliance.CanonicalScanner(data)
tokens = scanner.scan()
- #print tokens
+ #for token in tokens:
+ # print token
def _testCanonicalParser(self, test_name, canonical_filename):
data = file(canonical_filename, 'rb').read()
- parser = test_appliance.CanonicalParser(canonical_filename, data)
- documents = parser.parse()
- #for document in documents:
- # print document
+ parser = test_appliance.CanonicalParser(data)
+ events = parser.parse()
+ #for event in events:
+ # print event
TestCanonicalAppliance.add_tests('testCanonicalScanner', '.canonical')
TestCanonicalAppliance.add_tests('testCanonicalParser', '.canonical')
diff --git a/tests/test_errors.py b/tests/test_errors.py
index 2b6e9a2..431258b 100644
--- a/tests/test_errors.py
+++ b/tests/test_errors.py
@@ -18,14 +18,12 @@ class TestErrors(test_appliance.TestAppliance):
def _load(self, filename):
reader = Reader(file(filename, 'rb'))
scanner = Scanner(reader)
- while scanner.peek_token():
- scanner.get_token()
+ return list(scanner)
def _load_string(self, filename):
reader = Reader(file(filename, 'rb').read())
scanner = Scanner(reader)
- while scanner.peek_token():
- scanner.get_token()
+ return list(scanner)
TestErrors.add_tests('testErrors', '.error-message')
TestErrors.add_tests('testStringErrors', '.error-message')
diff --git a/tests/test_structure.py b/tests/test_structure.py
index 12c87d0..0bffbcd 100644
--- a/tests/test_structure.py
+++ b/tests/test_structure.py
@@ -12,8 +12,10 @@ class TestStructure(test_appliance.TestAppliance):
node2 = eval(file(structure_filename, 'rb').read())
try:
parser = Parser(Scanner(Reader(file(data_filename, 'rb'))))
- node1 = parser.parse()
- node1 = [self._convert(n) for n in node1]
+ node1 = []
+ while not parser.check(StreamEndEvent):
+ node1.append(self._convert(parser))
+ parser.get()
if len(node1) == 1:
node1 = node1[0]
self.failUnlessEqual(node1, node2)
@@ -25,97 +27,110 @@ class TestStructure(test_appliance.TestAppliance):
print "NODE2:", node2
raise
- def _convert(self, node):
- if isinstance(node, ScalarNode):
- return True
- elif isinstance(node, SequenceNode):
+ def _convert(self, parser):
+ if parser.check(ScalarEvent):
+ event = parser.get()
+ if event.tag or event.anchor or event.value:
+ return True
+ else:
+ return None
+ elif parser.check(SequenceEvent):
+ parser.get()
sequence = []
- for item in node.value:
- sequence.append(self._convert(item))
+ while not parser.check(CollectionEndEvent):
+ sequence.append(self._convert(parser))
+ parser.get()
return sequence
- elif isinstance(node, MappingNode):
+ elif parser.check(MappingEvent):
+ parser.get()
mapping = []
- for key, value in node.value:
- mapping.append((self._convert(key), self._convert(value)))
+ while not parser.check(CollectionEndEvent):
+ key = self._convert(parser)
+ value = self._convert(parser)
+ mapping.append((key, value))
+ parser.get()
return mapping
- elif isinstance(node, AliasNode):
+ elif parser.check(AliasEvent):
+ parser.get()
return '*'
else:
- return node
+ parser.get()
+ return '?'
TestStructure.add_tests('testStructure', '.data', '.structure')
class TestParser(test_appliance.TestAppliance):
def _testParser(self, test_name, data_filename, canonical_filename):
- documents1 = None
- documents2 = None
+ events1 = None
+ events2 = None
try:
parser = Parser(Scanner(Reader(file(data_filename, 'rb'))))
- documents1 = parser.parse()
- canonical = test_appliance.CanonicalParser(canonical_filename, file(canonical_filename, 'rb').read())
- documents2 = canonical.parse()
- self._compare(documents1, documents2)
+ events1 = list(iter(parser))
+ canonical = test_appliance.CanonicalParser(file(canonical_filename, 'rb').read())
+ events2 = canonical.parse()
+ self._compare(events1, events2)
except:
print
print "DATA1:"
print file(data_filename, 'rb').read()
print "DATA2:"
print file(canonical_filename, 'rb').read()
- print "DOCUMENTS1:", documents1
- print "DOCUMENTS2:", documents2
+ print "EVENTS1:", events1
+ print "EVENTS2:", events2
raise
- def _compare(self, value1, value2):
- if value1 is None and hasattr(value2, 'tag') and value2.tag == 'tag:yaml.org,2002:null':
- return
- self.failUnlessEqual(type(value1), type(value2))
- if isinstance(value1, list) or isinstance(value1, tuple):
- self.failUnlessEqual(len(value1), len(value2))
- for item1, item2 in zip(value1, value2):
- self._compare(item1, item2)
- else:
- self.failUnlessEqual(value1.__class__.__name__, value2.__class__.__name__)
- if isinstance(value1, SequenceNode): # or isinstance(value1, MappingNode):
- self._compare(value1.value, value2.value)
- elif isinstance(value1, ScalarNode):
- self.failUnlessEqual(value1.value, value2.value)
+ def _compare(self, events1, events2):
+ self.failUnlessEqual(len(events1), len(events2))
+ for event1, event2 in zip(events1, events2):
+ self.failUnlessEqual(event1.__class__, event2.__class__)
+ if isinstance(event1, AliasEvent):
+ #self.failUnlessEqual(event1.name, event2.name)
+ pass
+ elif isinstance(event1, ScalarEvent):
+ #self.failUnlessEqual(event1.anchor, event2.anchor)
+ #self.failUnlessEqual(event1.tag, event2.tag)
+ self.failUnlessEqual(event1.value, event2.value)
+ if isinstance(event1, CollectionEvent):
+ #self.failUnlessEqual(event1.anchor, event2.anchor)
+ #self.failUnlessEqual(event1.tag, event2.tag)
+ pass
+
TestParser.add_tests('testParser', '.data', '.canonical')
class TestParserOnCanonical(test_appliance.TestAppliance):
def _testParserOnCanonical(self, test_name, canonical_filename):
- documents1 = None
- documents2 = None
+ events1 = None
+ events2 = None
try:
parser = Parser(Scanner(Reader(file(canonical_filename, 'rb'))))
- documents1 = parser.parse()
- canonical = test_appliance.CanonicalParser(canonical_filename, file(canonical_filename, 'rb').read())
- documents2 = canonical.parse()
- self._compare(documents1, documents2)
+ events1 = list(iter(parser))
+ canonical = test_appliance.CanonicalParser(file(canonical_filename, 'rb').read())
+ events2 = canonical.parse()
+ self._compare(events1, events2)
except:
print
print "DATA:"
print file(canonical_filename, 'rb').read()
- print "DOCUMENTS1:", documents1
- print "DOCUMENTS2:", documents2
+ print "EVENTS1:", events1
+ print "EVENTS2:", events2
raise
- def _compare(self, value1, value2):
- if value1 is None and hasattr(value2, 'tag') and value2.tag == 'tag:yaml.org,2002:null':
- return
- self.failUnlessEqual(type(value1), type(value2))
- if isinstance(value1, list) or isinstance(value1, tuple):
- self.failUnlessEqual(len(value1), len(value2))
- for item1, item2 in zip(value1, value2):
- self._compare(item1, item2)
- else:
- self.failUnlessEqual(value1.__class__.__name__, value2.__class__.__name__)
- if isinstance(value1, SequenceNode) or isinstance(value1, MappingNode):
- self._compare(value1.value, value2.value)
- elif isinstance(value1, ScalarNode):
- self.failUnlessEqual(value1.value, value2.value)
+ def _compare(self, events1, events2):
+ self.failUnlessEqual(len(events1), len(events2))
+ for event1, event2 in zip(events1, events2):
+ self.failUnlessEqual(event1.__class__, event2.__class__)
+ if isinstance(event1, AliasEvent):
+ self.failUnlessEqual(event1.name, event2.name)
+ elif isinstance(event1, ScalarEvent):
+ self.failUnlessEqual(event1.anchor, event2.anchor)
+ self.failUnlessEqual(event1.tag, event2.tag)
+ self.failUnlessEqual(event1.value, event2.value)
+ if isinstance(event1, CollectionEvent):
+ self.failUnlessEqual(event1.anchor, event2.anchor)
+ self.failUnlessEqual(event1.tag, event2.tag)
TestParserOnCanonical.add_tests('testParserOnCanonical', '.canonical')
diff --git a/tests/test_tokens.py b/tests/test_tokens.py
index f5daaf2..2ccc305 100644
--- a/tests/test_tokens.py
+++ b/tests/test_tokens.py
@@ -41,7 +41,8 @@ class TestTokens(test_appliance.TestAppliance):
FlowSequenceEndToken: ']',
FlowMappingStartToken: '{',
FlowMappingEndToken: '}',
- EntryToken: ',',
+ BlockEntryToken: ',',
+ FlowEntryToken: ',',
KeyToken: '?',
ValueToken: ':',
}
@@ -52,8 +53,9 @@ class TestTokens(test_appliance.TestAppliance):
try:
scanner = Scanner(Reader(file(data_filename, 'rb')))
tokens1 = []
- while not isinstance(scanner.peek_token(), StreamEndToken):
- tokens1.append(scanner.get_token())
+ for token in scanner:
+ if not isinstance(token, StreamEndToken):
+ tokens1.append(token)
tokens1 = [self.replaces[t.__class__] for t in tokens1]
self.failUnlessEqual(tokens1, tokens2)
except:
@@ -74,8 +76,9 @@ class TestScanner(test_appliance.TestAppliance):
try:
scanner = Scanner(Reader(file(filename, 'rb')))
tokens = []
- while not isinstance(scanner.peek_token(), StreamEndToken):
- tokens.append(scanner.get_token().__class__.__name__)
+ for token in scanner:
+ if not isinstance(token, StreamEndToken):
+ tokens.append(token.__class__.__name__)
except:
print
print "DATA:"