diff options
author | xi <xi@18f92427-320e-0410-9341-c67f048884a3> | 2006-02-15 13:35:29 +0000 |
---|---|---|
committer | xi <xi@18f92427-320e-0410-9341-c67f048884a3> | 2006-02-15 13:35:29 +0000 |
commit | da5f57d7b0fe6c1b333fe863f1c395a8f5443ad1 (patch) | |
tree | 84554857d22063d67ca7aa0056b6ec581176e69a | |
parent | 3527637545d2ce07c774daa44c71a823a19c6c80 (diff) | |
download | pyyaml-da5f57d7b0fe6c1b333fe863f1c395a8f5443ad1.tar.gz |
All tests passed! Scanner and Parser seem to be correct.
git-svn-id: http://svn.pyyaml.org/branches/pyyaml3000@44 18f92427-320e-0410-9341-c67f048884a3
57 files changed, 469 insertions, 272 deletions
diff --git a/lib/yaml/parser.py b/lib/yaml/parser.py index 9fc04d9..71616b0 100644 --- a/lib/yaml/parser.py +++ b/lib/yaml/parser.py @@ -39,128 +39,164 @@ # flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY } # flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY } +from scanner import * + +class Error(Exception): + pass + +class Node: + def __repr__(self): + args = [] + for attribute in ['anchor', 'tag', 'value']: + if hasattr(self, attribute): + args.append(repr(getattr(self, attribute))) + return "%s(%s)" % (self.__class__.__name__, ', '.join(args)) + +class AliasNode(Node): + def __init__(self, anchor): + self.anchor = anchor + +class ScalarNode(Node): + def __init__(self, anchor, tag, value): + self.anchor = anchor + self.tag = tag + self.value = value + +class SequenceNode(Node): + def __init__(self, anchor, tag, value): + self.anchor = anchor + self.tag = tag + self.value = value + +class MappingNode(Node): + def __init__(self, anchor, tag, value): + self.anchor = anchor + self.tag = tag + self.value = value + class Parser: - def parse(self, source, data): - scanner = Scanner() - self.tokens = scanner.scan(source, data) - self.tokens.append('END') - documents = self.parse_stream() - if len(documents) == 1: - return documents[0] - return documents + def __init__(self, source, data): + self.scanner = Scanner(source, data) + + def is_token(self, *choices): + token = self.scanner.peek_token() + for choice in choices: + if isinstance(token, choices): + return True + return False + + def get_token(self): + return self.scanner.get_token() + + def parse(self): + return self.parse_stream() def parse_stream(self): documents = [] - if self.tokens[0] not in ['DIRECTIVE', 'DOCUMENT_START', 'END']: + if not self.is_token(DirectiveToken, DocumentStartToken, EndToken): documents.append(self.parse_block_node()) - while self.tokens[0] != 'END': - while self.tokens[0] == 'DIRECTIVE': - self.tokens.pop(0) - if self.tokens[0] != 'DOCUMENT_START': - self.error('DOCUMENT_START is expected') - self.tokens.pop(0) - if self.tokens[0] in ['DIRECTIVE', 'DOCUMENT_START', 'DOCUMENT_END', 'END']: + while not self.is_token(EndToken): + while self.is_token(DirectiveToken): + self.get_token() + if not self.is_token(DocumentStartToken): + self.fail('DOCUMENT-START is expected') + self.get_token() + if self.is_token(DirectiveToken, + DocumentStartToken, DocumentEndToken, EndToken): documents.append(None) else: documents.append(self.parse_block_node()) - while self.tokens[0] == 'DOCUMENT_END': - self.tokens.pop(0) - if self.tokens[0] != 'END': - self.error("END is expected") - return tuple(documents) + while self.is_token(DocumentEndToken): + self.get_token() + if not self.is_token(EndToken): + self.fail("END is expected") + return documents def parse_block_node(self): - if self.tokens[0] == 'ALIAS': - self.tokens.pop(0) - return '*' - if self.tokens[0] == 'TAG': - self.tokens.pop(0) - if self.tokens[0] == 'ANCHOR': - self.tokens.pop(0) - elif self.tokens[0] == 'ANCHOR': - self.tokens.pop(0) - if self.tokens[0] == 'TAG': - self.tokens.pop(0) - return self.parse_block_content() + return self.parse_node(block=True) def parse_flow_node(self): - if self.tokens[0] == 'ALIAS': - self.tokens.pop(0) - return '*' - if self.tokens[0] == 'TAG': - self.tokens.pop(0) - if self.tokens[0] == 'ANCHOR': - self.tokens.pop(0) - elif self.tokens[0] == 'ANCHOR': - self.tokens.pop(0) - if self.tokens[0] == 'TAG': - self.tokens.pop(0) - return self.parse_flow_content() + return self.parse_node() def parse_block_node_or_indentless_sequence(self): - if self.tokens[0] == 'ALIAS': - self.tokens.pop(0) - return '*' - if self.tokens[0] == 'TAG': - self.tokens.pop(0) - if self.tokens[0] == 'ANCHOR': - self.tokens.pop(0) - elif self.tokens[0] == 'ANCHOR': - self.tokens.pop(0) - if self.tokens[0] == 'TAG': - self.tokens.pop(0) - if self.tokens[0] == 'ENTRY': - return self.parse_indentless_sequence(self) - return self.parse_block_content() + return self.parse_node(block=True, indentless_sequence=True) + + def parse_node(self, block=False, indentless_sequence=False): + if self.is_token(AliasToken): + token = self.get_token() + return AliasNode(token.value) + anchor = None + tag = None + if self.is_token(AnchorToken): + anchor = self.get_token().value + if self.is_token(TagToken): + tag = self.get_token().value + elif self.is_token(TagToken): + tag = self.get_token().value + if self.is_token(AnchorToken): + anchor = self.get_token().value + if indentless_sequence and self.is_token(EntryToken): + NodeClass = SequenceNode + value = self.parse_indentless_sequence() + else: + if self.is_token(ScalarToken): + NodeClass = ScalarNode + elif self.is_token(BlockSequenceStartToken, FlowSequenceStartToken): + NodeClass = SequenceNode + elif self.is_token(BlockMappingStartToken, FlowMappingStartToken): + NodeClass = MappingNode + if block: + value = self.parse_block_content() + else: + value = self.parse_flow_content() + return NodeClass(anchor, tag, value) def parse_block_content(self): - if self.tokens[0] == 'SCALAR': - self.tokens.pop(0) - return True - elif self.tokens[0] == 'BLOCK_SEQ_START': + if self.is_token(ScalarToken): + return self.get_token().value + elif self.is_token(BlockSequenceStartToken): return self.parse_block_sequence() - elif self.tokens[0] == 'BLOCK_MAP_START': + elif self.is_token(BlockMappingStartToken): return self.parse_block_mapping() - elif self.tokens[0] == 'FLOW_SEQ_START': + elif self.is_token(FlowSequenceStartToken): return self.parse_flow_sequence() - elif self.tokens[0] == 'FLOW_MAP_START': + elif self.is_token(FlowMappingStartToken): return self.parse_flow_mapping() else: - self.error('block content is expected') + self.fail('block content is expected') def parse_flow_content(self): - if self.tokens[0] == 'SCALAR': - self.tokens.pop(0) - return True - elif self.tokens[0] == 'FLOW_SEQ_START': + if self.is_token(ScalarToken): + return self.get_token().value + elif self.is_token(FlowSequenceStartToken): return self.parse_flow_sequence() - elif self.tokens[0] == 'FLOW_MAP_START': + elif self.is_token(FlowMappingStartToken): return self.parse_flow_mapping() else: - self.error('flow content is expected') + self.fail('flow content is expected') def parse_block_sequence(self): sequence = [] - if self.tokens[0] != 'BLOCK_SEQ_START': - self.error('BLOCK_SEQ_START is expected') - self.tokens.pop(0) - while self.tokens[0] == 'ENTRY': - self.tokens.pop(0) - if self.tokens[0] not in ['ENTRY', 'BLOCK_END']: + if not self.is_token(BlockSequenceStartToken): + self.fail('BLOCK-SEQUENCE-START is expected') + self.get_token() + while self.is_token(EntryToken): + self.get_token() + if not self.is_token(EntryToken, BlockEndToken): sequence.append(self.parse_block_node()) else: sequence.append(None) - if self.tokens[0] != 'BLOCK_END': - self.error('BLOCK_END is expected') - self.tokens.pop(0) + if not self.is_token(BlockEndToken): + self.fail('BLOCK-END is expected') + self.get_token() return sequence def parse_indentless_sequence(self): sequence = [] - while self.tokens[0] == 'ENTRY': - self.tokens.pop(0) - if self.tokens[0] not in ['ENTRY']: + while self.is_token(EntryToken): + self.get_token() + if not self.is_token(EntryToken): sequence.append(self.parse_block_node()) else: sequence.append(None) @@ -168,83 +204,84 @@ class Parser: def parse_block_mapping(self): mapping = [] - if self.tokens[0] != 'BLOCK_MAP_START': - self.error('BLOCK_MAP_START is expected') - self.tokens.pop(0) - while self.tokens[0] in ['KEY', 'VALUE']: + if not self.is_token(BlockMappingStartToken): + self.fail('BLOCK-MAPPING-START is expected') + self.get_token() + while self.is_token(KeyToken, ValueToken): key = None value = None - if self.tokens[0] == 'KEY': - self.tokens.pop(0) - if self.tokens[0] not in ['KEY', 'VALUE', 'BLOCK_END']: + if self.is_token(KeyToken): + self.get_token() + if not self.is_token(KeyToken, ValueToken, BlockEndToken): key = self.parse_block_node_or_indentless_sequence() - if self.tokens[0] == 'VALUE': - self.tokens.pop(0) - if self.tokens[0] not in ['KEY', 'VALUE', 'BLOCK_END']: + if self.is_token(ValueToken): + self.get_token() + if not self.is_token(KeyToken, ValueToken, BlockEndToken): value = self.parse_block_node_or_indentless_sequence() mapping.append((key, value)) - if self.tokens[0] != 'BLOCK_END': - self.error('BLOCK_END is expected') - self.tokens.pop(0) + if not self.is_token(BlockEndToken): + self.fail('BLOCK-END is expected') + self.get_token() return mapping def parse_flow_sequence(self): sequence = [] - if self.tokens[0] != 'FLOW_SEQ_START': - self.error('FLOW_SEQ_START is expected') - self.tokens.pop(0) - while self.tokens[0] != 'FLOW_SEQ_END': - if self.tokens[0] == 'KEY': - self.tokens.pop(0) + if not self.is_token(FlowSequenceStartToken): + self.fail('FLOW-SEQUENCE-START is expected') + self.get_token() + while not self.is_token(FlowSequenceEndToken): + if self.is_token(KeyToken): + self.get_token() key = None value = None - if self.tokens[0] != 'VALUE': + if not self.is_token(ValueToken): key = self.parse_flow_node() - if self.tokens[0] == 'VALUE': - self.tokens.pop(0) - if self.tokens[0] not in ['ENTRY', 'FLOW_SEQ_END']: + if self.is_token(ValueToken): + self.get_token() + if not self.is_token(EntryToken, FlowSequenceEndToken): value = self.parse_flow_node() - sequence.append([(key, value)]) + node = MappingNode(None, None, [(key, value)]) + sequence.append(node) else: sequence.append(self.parse_flow_node()) - if self.tokens[0] not in ['ENTRY', 'FLOW_SEQ_END']: - self.error("ENTRY or FLOW_SEQ_END is expected") - if self.tokens[0] == 'ENTRY': - self.tokens.pop(0) - if self.tokens[0] != 'FLOW_SEQ_END': - self.error('FLOW_SEQ_END is expected') - self.tokens.pop(0) + if not self.is_token(EntryToken, FlowSequenceEndToken): + self.fail("ENTRY or FLOW-SEQUENCE-END are expected") + if self.is_token(EntryToken): + self.get_token() + if not self.is_token(FlowSequenceEndToken): + self.fail('FLOW-SEQUENCE-END is expected') + self.get_token() return sequence def parse_flow_mapping(self): mapping = [] - if self.tokens[0] != 'FLOW_MAP_START': - self.error('FLOW_MAP_START is expected') - self.tokens.pop(0) - while self.tokens[0] != 'FLOW_MAP_END': - if self.tokens[0] == 'KEY': - self.tokens.pop(0) + if not self.is_token(FlowMappingStartToken): + self.fail('FLOW-MAPPING-START is expected') + self.get_token() + while not self.is_token(FlowMappingEndToken): + if self.is_token(KeyToken): + self.get_token() key = None value = None - if self.tokens[0] != 'VALUE': + if not self.is_token(ValueToken): key = self.parse_flow_node() - if self.tokens[0] == 'VALUE': - self.tokens.pop(0) - if self.tokens[0] not in ['ENTRY', 'FLOW_MAP_END']: + if self.is_token(ValueToken): + self.get_token() + if not self.is_token(EntryToken, FlowMappingEndToken): value = self.parse_flow_node() mapping.append((key, value)) else: mapping.append((self.parse_flow_node(), None)) - if self.tokens[0] not in ['ENTRY', 'FLOW_MAP_END']: - self.error("ENTRY or FLOW_MAP_END is expected") - if self.tokens[0] == 'ENTRY': - self.tokens.pop(0) - if self.tokens[0] != 'FLOW_MAP_END': - self.error('FLOW_MAP_END is expected') - self.tokens.pop(0) + if not self.is_token(EntryToken, FlowMappingEndToken): + self.fail("ENTRY or FLOW-MAPPING-END are expected") + if self.is_token(EntryToken): + self.get_token() + if not self.is_token(FlowMappingEndToken): + self.fail('FLOW-MAPPING-END is expected') + self.get_token() return mapping - def error(self, message): - raise Error(message+': '+str(self.tokens)) - + def fail(self, message): + marker = self.scanner.peek_token().start_marker + raise Error(message+':\n'+marker.get_snippet()) diff --git a/lib/yaml/scanner.py b/lib/yaml/scanner.py index 839b227..c8a8ed9 100644 --- a/lib/yaml/scanner.py +++ b/lib/yaml/scanner.py @@ -22,80 +22,83 @@ class Token: self.start_marker = start_marker self.end_marker = end_marker -class YAMLDirective(Token): +class DirectiveToken(Token): + pass + +class YAMLDirectiveToken(DirectiveToken): def __init__(self, major_version, minor_version, start_marker, end_marker): self.major_version = major_version self.minor_version = minor_version self.start_marker = start_marker self.end_marker = end_marker -class TagDirective(Token): +class TagDirectiveToken(DirectiveToken): pass -class ReservedDirective(Token): +class ReservedDirectiveToken(DirectiveToken): def __init__(self, name, start_marker, end_marker): self.name = name self.start_marker = start_marker self.end_marker = end_marker -class DocumentStart(Token): +class DocumentStartToken(Token): pass -class DocumentEnd(Token): +class DocumentEndToken(Token): pass -class End(Token): +class EndToken(Token): pass -class BlockSequenceStart(Token): +class BlockSequenceStartToken(Token): pass -class BlockMappingStart(Token): +class BlockMappingStartToken(Token): pass -class BlockEnd(Token): +class BlockEndToken(Token): pass -class FlowSequenceStart(Token): +class FlowSequenceStartToken(Token): pass -class FlowMappingStart(Token): +class FlowMappingStartToken(Token): pass -class FlowSequenceEnd(Token): +class FlowSequenceEndToken(Token): pass -class FlowMappingEnd(Token): +class FlowMappingEndToken(Token): pass -class Key(Token): +class KeyToken(Token): pass -class Value(Token): +class ValueToken(Token): pass -class Entry(Token): +class EntryToken(Token): pass -class Alias(Token): +class AliasToken(Token): def __init__(self, value, start_marker, end_marker): self.value = value self.start_marker = start_marker self.end_marker = end_marker -class Anchor(Token): +class AnchorToken(Token): def __init__(self, value, start_marker, end_marker): self.value = value self.start_marker = start_marker self.end_marker = end_marker -class Tag(Token): +class TagToken(Token): def __init__(self, value, start_marker, end_marker): self.value = value self.start_marker = start_marker self.end_marker = end_marker -class Scalar(Token): +class ScalarToken(Token): def __init__(self, value, plain, start_marker, end_marker): self.value = value self.plain = plain @@ -379,7 +382,7 @@ class Scanner: while self.indent > column: marker = self.stream.get_marker() self.indent = self.indents.pop() - self.tokens.append(BlockEnd(marker, marker)) + self.tokens.append(BlockEndToken(marker, marker)) def add_indent(self, column): # Check if we need to increase indentation. @@ -404,7 +407,7 @@ class Scanner: marker = self.stream.get_marker() # Add END. - self.tokens.append(End(marker, marker)) + self.tokens.append(EndToken(marker, marker)) # The stream is ended. self.done = True @@ -422,10 +425,10 @@ class Scanner: self.scan_directive() def fetch_document_start(self): - self.fetch_document_indicator(DocumentStart) + self.fetch_document_indicator(DocumentStartToken) def fetch_document_end(self): - self.fetch_document_indicator(DocumentEnd) + self.fetch_document_indicator(DocumentEndToken) def fetch_document_indicator(self, TokenClass): @@ -444,19 +447,19 @@ class Scanner: self.tokens.append(TokenClass(start_marker, end_marker)) def fetch_flow_sequence_start(self): - self.fetch_flow_collection_start(FlowSequenceStart) + self.fetch_flow_collection_start(FlowSequenceStartToken) def fetch_flow_mapping_start(self): - self.fetch_flow_collection_start(FlowMappingStart) + self.fetch_flow_collection_start(FlowMappingStartToken) def fetch_flow_collection_start(self, TokenClass): - # Increase the flow level. - self.flow_level += 1 - # '[' and '{' may start a simple key. self.save_possible_simple_key() + # Increase the flow level. + self.flow_level += 1 + # Simple keys are allowed after '[' and '{'. self.allow_simple_key = True @@ -467,10 +470,10 @@ class Scanner: self.tokens.append(TokenClass(start_marker, end_marker)) def fetch_flow_sequence_end(self): - self.fetch_flow_collection_end(FlowSequenceEnd) + self.fetch_flow_collection_end(FlowSequenceEndToken) def fetch_flow_mapping_end(self): - self.fetch_flow_collection_end(FlowMappingEnd) + self.fetch_flow_collection_end(FlowMappingEndToken) def fetch_flow_collection_end(self, TokenClass): @@ -501,7 +504,7 @@ class Scanner: # We may need to add BLOCK-SEQUENCE-START. if self.add_indent(self.stream.column): marker = self.stream.get_marker() - self.tokens.append(BlockSequenceStart(marker, marker)) + self.tokens.append(BlockSequenceStartToken(marker, marker)) # Simple keys are allowed after '-' and ','. self.allow_simple_key = True @@ -513,7 +516,7 @@ class Scanner: start_marker = self.stream.get_marker() self.stream.read() end_marker = self.stream.get_marker() - self.tokens.append(Entry(start_marker, end_marker)) + self.tokens.append(EntryToken(start_marker, end_marker)) def fetch_key(self): @@ -527,7 +530,7 @@ class Scanner: # We may need to add BLOCK-MAPPING-START. if self.add_indent(self.stream.column): marker = self.stream.get_marker() - self.tokens.append(BlockMappingStart(marker, marker)) + self.tokens.append(BlockMappingStartToken(marker, marker)) # Simple keys are allowed after '?' in the block context. self.allow_simple_key = not self.flow_level @@ -539,7 +542,7 @@ class Scanner: start_marker = self.stream.get_marker() self.stream.read() end_marker = self.stream.get_marker() - self.tokens.append(Key(start_marker, end_marker)) + self.tokens.append(KeyToken(start_marker, end_marker)) def fetch_value(self): @@ -550,14 +553,14 @@ class Scanner: key = self.possible_simple_keys[self.flow_level] del self.possible_simple_keys[self.flow_level] self.tokens.insert(key.token_number-self.tokens_taken, - Key(key.marker, key.marker)) + KeyToken(key.marker, key.marker)) # If this key starts a new block mapping, we need to add # BLOCK-MAPPING-START. if not self.flow_level: if self.add_indent(key.column): self.tokens.insert(key.token_number-self.tokens_taken, - BlockMappingStart(key.marker, key.marker)) + BlockMappingStartToken(key.marker, key.marker)) # There cannot be two simple keys one after another. self.allow_simple_key = False @@ -575,7 +578,7 @@ class Scanner: start_marker = self.stream.get_marker() self.stream.read() end_marker = self.stream.get_marker() - self.tokens.append(Value(start_marker, end_marker)) + self.tokens.append(ValueToken(start_marker, end_marker)) def fetch_alias(self): @@ -586,7 +589,7 @@ class Scanner: self.allow_simple_key = False # Scan and add ALIAS. - self.scan_anchor(Alias) + self.scan_anchor(AliasToken) def fetch_anchor(self): @@ -597,7 +600,7 @@ class Scanner: self.allow_simple_key = False # Scan and add ANCHOR. - self.scan_anchor(Anchor) + self.scan_anchor(AnchorToken) def fetch_tag(self): @@ -738,11 +741,11 @@ class Scanner: def scan_directive(self): marker = self.stream.get_marker() if self.stream.peek(5) == u'%YAML ': - self.tokens.append(YAMLDirective(1, 1, marker, marker)) + self.tokens.append(YAMLDirectiveToken(1, 1, marker, marker)) elif self.stream.peek(4) == u'%TAG ': - self.tokens.append(TagDirective(marker, marker)) + self.tokens.append(TagDirectiveToken(marker, marker)) else: - self.tokens.append(ReservedDirective('', marker, marker)) + self.tokens.append(ReservedDirectiveToken('', marker, marker)) while self.stream.peek() not in u'\0\r\n': self.stream.read() self.stream.read() @@ -759,7 +762,7 @@ class Scanner: while self.stream.peek() not in u'\0 \t\r\n': self.stream.read() end_marker = self.stream.get_marker() - self.tokens.append(Tag('', start_marker, end_marker)) + self.tokens.append(TagToken('', start_marker, end_marker)) def scan_block_scalar(self, folded): start_marker = self.stream.get_marker() @@ -767,7 +770,7 @@ class Scanner: if indent < 1: indent = 1 while True: - while self.stream.peek() and self.stream.peek() and self.stream.peek() not in u'\0\r\n': + while self.stream.peek() and self.stream.peek() and self.stream.peek() not in u'\0\r\n\x85\u2028\u2029': self.stream.read() if self.stream.peek() != u'\0': self.stream.read() @@ -775,9 +778,9 @@ class Scanner: while count < indent and self.stream.peek() == u' ': self.stream.read() count += 1 - if count < indent and self.stream.peek() not in u'#\r\n': + if count < indent and self.stream.peek() not in u'#\r\n\x85\u2028\u2029': break - self.tokens.append(Scalar('', False, start_marker, start_marker)) + self.tokens.append(ScalarToken('', False, start_marker, start_marker)) def scan_flow_scalar(self, double): marker = self.stream.get_marker() @@ -790,7 +793,7 @@ class Scanner: else: self.stream.read(1) self.stream.read(1) - self.tokens.append(Scalar('', False, marker, marker)) + self.tokens.append(ScalarToken('', False, marker, marker)) def scan_plain(self): indent = self.indent+1 @@ -822,7 +825,7 @@ class Scanner: if count < indent: break space = True - self.tokens.append(Scalar('', True, marker, marker)) + self.tokens.append(ScalarToken('', True, marker, marker)) def invalid_token(self): self.fail("invalid token") diff --git a/lib/yaml/stream.py b/lib/yaml/stream.py index 452a4d7..47f72a2 100644 --- a/lib/yaml/stream.py +++ b/lib/yaml/stream.py @@ -18,7 +18,7 @@ class Stream: for i in range(k): if self.index >= len(self.data): break - if self.data[self.index] in u'\r\n': + if self.data[self.index] in u'\r\n\x85\u2028\u2029': self.line += 1 self.column = 0 else: diff --git a/tests/data/spec-02-01.structure b/tests/data/spec-02-01.structure new file mode 100644 index 0000000..f532f4a --- /dev/null +++ b/tests/data/spec-02-01.structure @@ -0,0 +1 @@ +[True, True, True] diff --git a/tests/data/spec-02-02.structure b/tests/data/spec-02-02.structure new file mode 100644 index 0000000..aba1ced --- /dev/null +++ b/tests/data/spec-02-02.structure @@ -0,0 +1 @@ +[(True, True), (True, True), (True, True)] diff --git a/tests/data/spec-02-03.structure b/tests/data/spec-02-03.structure new file mode 100644 index 0000000..25de5d2 --- /dev/null +++ b/tests/data/spec-02-03.structure @@ -0,0 +1 @@ +[(True, [True, True, True]), (True, [True, True, True])] diff --git a/tests/data/spec-02-04.structure b/tests/data/spec-02-04.structure new file mode 100644 index 0000000..e7b526c --- /dev/null +++ b/tests/data/spec-02-04.structure @@ -0,0 +1,4 @@ +[ + [(True, True), (True, True), (True, True)], + [(True, True), (True, True), (True, True)], +] diff --git a/tests/data/spec-02-05.structure b/tests/data/spec-02-05.structure new file mode 100644 index 0000000..e06b75a --- /dev/null +++ b/tests/data/spec-02-05.structure @@ -0,0 +1,5 @@ +[ + [True, True, True], + [True, True, True], + [True, True, True], +] diff --git a/tests/data/spec-02-06.structure b/tests/data/spec-02-06.structure new file mode 100644 index 0000000..3ef0f4b --- /dev/null +++ b/tests/data/spec-02-06.structure @@ -0,0 +1,4 @@ +[ + (True, [(True, True), (True, True)]), + (True, [(True, True), (True, True)]), +] diff --git a/tests/data/spec-02-07.structure b/tests/data/spec-02-07.structure new file mode 100644 index 0000000..c5d72a3 --- /dev/null +++ b/tests/data/spec-02-07.structure @@ -0,0 +1,4 @@ +[ +[True, True, True], +[True, True], +] diff --git a/tests/data/spec-02-08.structure b/tests/data/spec-02-08.structure new file mode 100644 index 0000000..24cff73 --- /dev/null +++ b/tests/data/spec-02-08.structure @@ -0,0 +1,4 @@ +[ +[(True, True), (True, True), (True, True)], +[(True, True), (True, True), (True, True)], +] diff --git a/tests/data/spec-02-09.structure b/tests/data/spec-02-09.structure new file mode 100644 index 0000000..b4c9914 --- /dev/null +++ b/tests/data/spec-02-09.structure @@ -0,0 +1 @@ +[(True, [True, True]), (True, [True, True])] diff --git a/tests/data/spec-02-10.structure b/tests/data/spec-02-10.structure new file mode 100644 index 0000000..ff8f4c3 --- /dev/null +++ b/tests/data/spec-02-10.structure @@ -0,0 +1 @@ +[(True, [True, True]), (True, ['*', True])] diff --git a/tests/data/spec-02-11.structure b/tests/data/spec-02-11.structure new file mode 100644 index 0000000..3d8f1ff --- /dev/null +++ b/tests/data/spec-02-11.structure @@ -0,0 +1,4 @@ +[ +([True, True], [True]), +([True, True], [True, True, True]), +] diff --git a/tests/data/spec-02-12.structure b/tests/data/spec-02-12.structure new file mode 100644 index 0000000..e9c5359 --- /dev/null +++ b/tests/data/spec-02-12.structure @@ -0,0 +1,5 @@ +[ +[(True, True), (True, True)], +[(True, True), (True, True)], +[(True, True), (True, True)], +] diff --git a/tests/data/spec-02-13.structure b/tests/data/spec-02-13.structure new file mode 100644 index 0000000..0ca9514 --- /dev/null +++ b/tests/data/spec-02-13.structure @@ -0,0 +1 @@ +True diff --git a/tests/data/spec-02-14.structure b/tests/data/spec-02-14.structure new file mode 100644 index 0000000..0ca9514 --- /dev/null +++ b/tests/data/spec-02-14.structure @@ -0,0 +1 @@ +True diff --git a/tests/data/spec-02-15.structure b/tests/data/spec-02-15.structure new file mode 100644 index 0000000..0ca9514 --- /dev/null +++ b/tests/data/spec-02-15.structure @@ -0,0 +1 @@ +True diff --git a/tests/data/spec-02-16.structure b/tests/data/spec-02-16.structure new file mode 100644 index 0000000..aba1ced --- /dev/null +++ b/tests/data/spec-02-16.structure @@ -0,0 +1 @@ +[(True, True), (True, True), (True, True)] diff --git a/tests/data/spec-02-17.structure b/tests/data/spec-02-17.structure new file mode 100644 index 0000000..933646d --- /dev/null +++ b/tests/data/spec-02-17.structure @@ -0,0 +1 @@ +[(True, True), (True, True), (True, True), (True, True), (True, True), (True, True)] diff --git a/tests/data/spec-02-18.structure b/tests/data/spec-02-18.structure new file mode 100644 index 0000000..0ca4991 --- /dev/null +++ b/tests/data/spec-02-18.structure @@ -0,0 +1 @@ +[(True, True), (True, True)] diff --git a/tests/data/spec-02-19.structure b/tests/data/spec-02-19.structure new file mode 100644 index 0000000..48ca99d --- /dev/null +++ b/tests/data/spec-02-19.structure @@ -0,0 +1 @@ +[(True, True), (True, True), (True, True), (True, True), (True, True)] diff --git a/tests/data/spec-02-20.structure b/tests/data/spec-02-20.structure new file mode 100644 index 0000000..933646d --- /dev/null +++ b/tests/data/spec-02-20.structure @@ -0,0 +1 @@ +[(True, True), (True, True), (True, True), (True, True), (True, True), (True, True)] diff --git a/tests/data/spec-02-21.structure b/tests/data/spec-02-21.structure new file mode 100644 index 0000000..021635f --- /dev/null +++ b/tests/data/spec-02-21.structure @@ -0,0 +1 @@ +[(True, True), (True, True), (True, True), (True, True)] diff --git a/tests/data/spec-02-22.structure b/tests/data/spec-02-22.structure new file mode 100644 index 0000000..021635f --- /dev/null +++ b/tests/data/spec-02-22.structure @@ -0,0 +1 @@ +[(True, True), (True, True), (True, True), (True, True)] diff --git a/tests/data/spec-02-23.structure b/tests/data/spec-02-23.structure new file mode 100644 index 0000000..aba1ced --- /dev/null +++ b/tests/data/spec-02-23.structure @@ -0,0 +1 @@ +[(True, True), (True, True), (True, True)] diff --git a/tests/data/spec-02-24.structure b/tests/data/spec-02-24.structure new file mode 100644 index 0000000..a800729 --- /dev/null +++ b/tests/data/spec-02-24.structure @@ -0,0 +1,5 @@ +[ +[(True, [(True, True), (True, True)]), (True, True)], +[(True, '*'), (True, [(True, True), (True, True)])], +[(True, '*'), (True, True), (True, True)], +] diff --git a/tests/data/spec-02-25.structure b/tests/data/spec-02-25.structure new file mode 100644 index 0000000..0b40e61 --- /dev/null +++ b/tests/data/spec-02-25.structure @@ -0,0 +1 @@ +[(True, None), (True, None), (True, None)] diff --git a/tests/data/spec-02-26.structure b/tests/data/spec-02-26.structure new file mode 100644 index 0000000..cf429b9 --- /dev/null +++ b/tests/data/spec-02-26.structure @@ -0,0 +1,5 @@ +[ +[(True, True)], +[(True, True)], +[(True, True)], +] diff --git a/tests/data/spec-02-27.structure b/tests/data/spec-02-27.structure new file mode 100644 index 0000000..a2113b9 --- /dev/null +++ b/tests/data/spec-02-27.structure @@ -0,0 +1,17 @@ +[ +(True, True), +(True, True), +(True, [ + (True, True), + (True, True), + (True, [(True, True), (True, True), (True, True), (True, True)]), + ]), +(True, '*'), +(True, [ + [(True, True), (True, True), (True, True), (True, True)], + [(True, True), (True, True), (True, True), (True, True)], + ]), +(True, True), +(True, True), +(True, True), +] diff --git a/tests/data/spec-02-28.structure b/tests/data/spec-02-28.structure new file mode 100644 index 0000000..8ec0b56 --- /dev/null +++ b/tests/data/spec-02-28.structure @@ -0,0 +1,10 @@ +[ +[(True, True), (True, True), (True, True)], +[(True, True), (True, True), (True, True)], +[(True, True), (True, True), (True, True), +(True, [ + [(True, True), (True, True), (True, True)], + [(True, True), (True, True), (True, True)], + ]), +] +] diff --git a/tests/data/spec-05-03.canonical b/tests/data/spec-05-03.canonical index 00c9723..a143a73 100644 --- a/tests/data/spec-05-03.canonical +++ b/tests/data/spec-05-03.canonical @@ -8,6 +8,7 @@ ? !!str "mapping" : !!map { ? !!str "sky" : !!str "blue", - ? !!str "sea" : !!str "green", +# ? !!str "sea" : !!str "green", + ? !!map { ? !!str "sea" : !!str "green" } : !!null "", } } diff --git a/tests/data/spec-07-10.canonical b/tests/data/spec-07-10.canonical index af10679..5f1b3dc 100644 --- a/tests/data/spec-07-10.canonical +++ b/tests/data/spec-07-10.canonical @@ -11,4 +11,5 @@ : !!str "bar" } --- -!!str "" +#!!str "" +!!null "" diff --git a/tests/data/spec-08-01.data b/tests/data/spec-08-01.data index 195a368..48986ec 100644 --- a/tests/data/spec-08-01.data +++ b/tests/data/spec-08-01.data @@ -1,4 +1,2 @@ -!!str - &a1 - "foo" : !!str bar +!!str &a1 "foo" : !!str bar &a2 baz : *a1 diff --git a/tests/data/spec-08-12.data b/tests/data/spec-08-12.data index 546617b..d936ba2 100644 --- a/tests/data/spec-08-12.data +++ b/tests/data/spec-08-12.data @@ -3,5 +3,6 @@ &anchor "Anchored", !!str 'Tagged', *anchor, # Alias node - !!str, # Empty plain scalar +# !!str, # Empty plain scalar + '', # Empty plain scalar ] diff --git a/tests/data/spec-08-13.canonical b/tests/data/spec-08-13.canonical index 0a6e782..618bb7b 100644 --- a/tests/data/spec-08-13.canonical +++ b/tests/data/spec-08-13.canonical @@ -2,7 +2,9 @@ --- !!map { ? !!str "foo" - : !!str "", - ? !!str "" +# : !!str "", +# ? !!str "" + : !!null "", + ? !!null "" : !!str "bar", } diff --git a/tests/data/spec-08-15.canonical b/tests/data/spec-08-15.canonical index 2706dae..76f028e 100644 --- a/tests/data/spec-08-15.canonical +++ b/tests/data/spec-08-15.canonical @@ -1,11 +1,11 @@ %YAML 1.1 --- !!seq [ - !!str "", + !!null "", !!map { ? !!str "foo" - : !!str "", - ? !!str "" + : !!null "", + ? !!null "" : !!str "bar", } ] diff --git a/tests/data/spec-08-15.data b/tests/data/spec-08-15.data index a2958cd..7c86bcf 100644 --- a/tests/data/spec-08-15.data +++ b/tests/data/spec-08-15.data @@ -1,6 +1,5 @@ -seq: - # Empty plain scalar - ? foo : ? - : bar, + : bar diff --git a/tests/data/spec-09-12.data b/tests/data/spec-09-12.data index f91b26c..dd4a9c2 100644 --- a/tests/data/spec-09-12.data +++ b/tests/data/spec-09-12.data @@ -3,6 +3,6 @@ - Up, up and away! - -123 # Inside flow collection: -- [ ::std::vector, +- [ '::std::vector', "Up, up and away!", -123 ] diff --git a/tests/data/spec-09-25.canonical b/tests/data/spec-09-25.canonical index 0a545c1..9d2327b 100644 --- a/tests/data/spec-09-25.canonical +++ b/tests/data/spec-09-25.canonical @@ -1,6 +1,4 @@ %YAML 1.1 --- -!!seq [ - !!str "literal\n\ - \ttext\n" -] +!!str "literal\n\ + \ttext\n" diff --git a/tests/data/spec-09-29.canonical b/tests/data/spec-09-29.canonical index 7cbf319..0980789 100644 --- a/tests/data/spec-09-29.canonical +++ b/tests/data/spec-09-29.canonical @@ -1,6 +1,4 @@ %YAML 1.1 --- -!!seq [ - !!str "folded text\n\ - \tlines\n" -] +!!str "folded text\n\ + \tlines\n" diff --git a/tests/data/spec-09-30.canonical b/tests/data/spec-09-30.canonical index fbade17..5c32f16 100644 --- a/tests/data/spec-09-30.canonical +++ b/tests/data/spec-09-30.canonical @@ -1,9 +1,7 @@ %YAML 1.1 --- -!!seq [ - !!str "folded line\n\ - next line\n\ - \ * bullet\n\ - \ * list\n\ - last line\n" -] +!!str "folded line\n\ + next line\n\ + \ * bullet\n\ + \ * list\n\ + last line\n" diff --git a/tests/data/spec-09-31.canonical b/tests/data/spec-09-31.canonical index fbade17..5c32f16 100644 --- a/tests/data/spec-09-31.canonical +++ b/tests/data/spec-09-31.canonical @@ -1,9 +1,7 @@ %YAML 1.1 --- -!!seq [ - !!str "folded line\n\ - next line\n\ - \ * bullet\n\ - \ * list\n\ - last line\n" -] +!!str "folded line\n\ + next line\n\ + \ * bullet\n\ + \ * list\n\ + last line\n" diff --git a/tests/data/spec-09-32.canonical b/tests/data/spec-09-32.canonical index fbade17..5c32f16 100644 --- a/tests/data/spec-09-32.canonical +++ b/tests/data/spec-09-32.canonical @@ -1,9 +1,7 @@ %YAML 1.1 --- -!!seq [ - !!str "folded line\n\ - next line\n\ - \ * bullet\n\ - \ * list\n\ - last line\n" -] +!!str "folded line\n\ + next line\n\ + \ * bullet\n\ + \ * list\n\ + last line\n" diff --git a/tests/data/spec-09-33.canonical b/tests/data/spec-09-33.canonical index fbade17..5c32f16 100644 --- a/tests/data/spec-09-33.canonical +++ b/tests/data/spec-09-33.canonical @@ -1,9 +1,7 @@ %YAML 1.1 --- -!!seq [ - !!str "folded line\n\ - next line\n\ - \ * bullet\n\ - \ * list\n\ - last line\n" -] +!!str "folded line\n\ + next line\n\ + \ * bullet\n\ + \ * list\n\ + last line\n" diff --git a/tests/data/spec-10-05.canonical b/tests/data/spec-10-05.canonical index 6fc6458..07cc0c9 100644 --- a/tests/data/spec-10-05.canonical +++ b/tests/data/spec-10-05.canonical @@ -1,7 +1,7 @@ %YAML 1.1 --- !!seq [ - !!str "", + !!null "", !!str "block node\n", !!seq [ !!str "one", diff --git a/tests/data/spec-10-07.canonical b/tests/data/spec-10-07.canonical index 6372e34..ec74230 100644 --- a/tests/data/spec-10-07.canonical +++ b/tests/data/spec-10-07.canonical @@ -1,7 +1,7 @@ %YAML 1.1 --- !!map { - ? !!str "" + ? !!null "" : !!str "value", ? !!str "explicit key" : !!str "value", diff --git a/tests/data/spec-10-07.data b/tests/data/spec-10-07.data index c4539c7..46d7d09 100644 --- a/tests/data/spec-10-07.data +++ b/tests/data/spec-10-07.data @@ -1,7 +1,8 @@ { -? : value # Empty key +#? : value # Empty key +? ~ : value, # Empty key ? explicit key: value, -simple key : value +simple key : value, [ collection, simple, key ]: value } diff --git a/tests/data/spec-10-09.canonical b/tests/data/spec-10-09.canonical index d94fea6..4d9827b 100644 --- a/tests/data/spec-10-09.canonical +++ b/tests/data/spec-10-09.canonical @@ -4,5 +4,5 @@ ? !!str "key" : !!str "value", ? !!str "empty" - : !!str "", + : !!null "", } diff --git a/tests/data/spec-10-10.canonical b/tests/data/spec-10-10.canonical index 0bcb8d1..016fb64 100644 --- a/tests/data/spec-10-10.canonical +++ b/tests/data/spec-10-10.canonical @@ -4,13 +4,13 @@ ? !!str "explicit key1" : !!str "explicit value", ? !!str "explicit key2" - : !!str "", + : !!null "", ? !!str "explicit key3" - : !!str "", + : !!null "", ? !!str "simple key1" : !!str "explicit value", ? !!str "simple key2" - : !!str "", + : !!null "", ? !!str "simple key3" - : !!str "", + : !!null "", } diff --git a/tests/data/spec-10-11.canonical b/tests/data/spec-10-11.canonical index 08abe5e..7309544 100644 --- a/tests/data/spec-10-11.canonical +++ b/tests/data/spec-10-11.canonical @@ -7,11 +7,11 @@ }, !!map { ? !!str "explicit key2" - : !!str "", + : !!null "", }, !!map { ? !!str "explicit key3" - : !!str "", + : !!null "", }, !!map { ? !!str "simple key1" @@ -19,6 +19,6 @@ }, !!map { ? !!str "simple key2" - : !!str "", + : !!null "", }, ] diff --git a/tests/data/spec-10-13.canonical b/tests/data/spec-10-13.canonical index a8f64bd..e183c50 100644 --- a/tests/data/spec-10-13.canonical +++ b/tests/data/spec-10-13.canonical @@ -2,7 +2,7 @@ --- !!map { ? !!str "explicit key" - : !!str "", + : !!null "", ? !!str "block key\n" : !!seq [ !!str "one", diff --git a/tests/data/spec-10-14.canonical b/tests/data/spec-10-14.canonical index cfeec67..ec1ef7b 100644 --- a/tests/data/spec-10-14.canonical +++ b/tests/data/spec-10-14.canonical @@ -2,7 +2,7 @@ --- !!map { ? !!str "plain key" - : !!str "", + : !!null "", ? !!str "quoted key\n" : !!seq [ !!str "one", diff --git a/tests/test_appliance.py b/tests/test_appliance.py index 6925ff4..d113f16 100644 --- a/tests/test_appliance.py +++ b/tests/test_appliance.py @@ -32,7 +32,7 @@ class Node: for attribute in ['anchor', 'tag', 'value']: if hasattr(self, attribute): args.append(repr(getattr(self, attribute))) - return "%s(%s)" % (self.__class__.__name__, ''.join(args)) + return "%s(%s)" % (self.__class__.__name__, ', '.join(args)) class AliasNode(Node): def __init__(self, anchor): diff --git a/tests/test_structure.py b/tests/test_structure.py new file mode 100644 index 0000000..07f9c7e --- /dev/null +++ b/tests/test_structure.py @@ -0,0 +1,81 @@ + +import test_appliance + +from yaml.parser import * + +class TestStructure(test_appliance.TestAppliance): + + def _testStructure(self, test_name, data_filename, structure_filename): + node1 = None + node2 = eval(file(structure_filename, 'rb').read()) + try: + parser = Parser(data_filename, file(data_filename, 'rb').read()) + node1 = parser.parse() + node1 = [self._convert(n) for n in node1] + if len(node1) == 1: + node1 = node1[0] + self.failUnlessEqual(node1, node2) + except: + print + print "DATA:" + print file(data_filename, 'rb').read() + print "NODE1:", node1 + print "NODE2:", node2 + raise + + def _convert(self, node): + if isinstance(node, ScalarNode): + return True + elif isinstance(node, SequenceNode): + sequence = [] + for item in node.value: + sequence.append(self._convert(item)) + return sequence + elif isinstance(node, MappingNode): + mapping = [] + for key, value in node.value: + mapping.append((self._convert(key), self._convert(value))) + return mapping + elif isinstance(node, AliasNode): + return '*' + else: + return node + +TestStructure.add_tests('testStructure', '.data', '.structure') + +class TestParser(test_appliance.TestAppliance): + + def _testParser(self, test_name, data_filename, canonical_filename): + documents1 = None + documents2 = None + try: + parser = Parser(data_filename, file(data_filename, 'rb').read()) + documents1 = parser.parse() + canonical = test_appliance.CanonicalParser(canonical_filename, file(canonical_filename, 'rb').read()) + documents2 = canonical.parse() + self._compare(documents1, documents2) + except: + print + print "DATA1:" + print file(data_filename, 'rb').read() + print "DATA2:" + print file(canonical_filename, 'rb').read() + print "DOCUMENTS1:", documents1 + print "DOCUMENTS2:", documents2 + raise + + def _compare(self, value1, value2): + if value1 is None and hasattr(value2, 'tag') and value2.tag == 'tag:yaml.org,2002:null': + return + self.failUnlessEqual(type(value1), type(value2)) + if isinstance(value1, list) or isinstance(value1, tuple): + self.failUnlessEqual(len(value1), len(value2)) + for item1, item2 in zip(value1, value2): + self._compare(item1, item2) + else: + self.failUnlessEqual(value1.__class__.__name__, value2.__class__.__name__) + if isinstance(value1, SequenceNode) or isinstance(value1, MappingNode): + self._compare(value1.value, value2.value) + +TestParser.add_tests('testParser', '.data', '.canonical') + diff --git a/tests/test_tokens.py b/tests/test_tokens.py index a67859b..e9cca5e 100644 --- a/tests/test_tokens.py +++ b/tests/test_tokens.py @@ -25,25 +25,25 @@ class TestTokens(test_appliance.TestAppliance): # value: : replaces = { - YAMLDirective: '%', - TagDirective: '%', - ReservedDirective: '%', - DocumentStart: '---', - DocumentEnd: '...', - Alias: '*', - Anchor: '&', - Tag: '!', - Scalar: '_', - BlockSequenceStart: '[[', - BlockMappingStart: '{{', - BlockEnd: ']}', - FlowSequenceStart: '[', - FlowSequenceEnd: ']', - FlowMappingStart: '{', - FlowMappingEnd: '}', - Entry: ',', - Key: '?', - Value: ':', + YAMLDirectiveToken: '%', + TagDirectiveToken: '%', + ReservedDirectiveToken: '%', + DocumentStartToken: '---', + DocumentEndToken: '...', + AliasToken: '*', + AnchorToken: '&', + TagToken: '!', + ScalarToken: '_', + BlockSequenceStartToken: '[[', + BlockMappingStartToken: '{{', + BlockEndToken: ']}', + FlowSequenceStartToken: '[', + FlowSequenceEndToken: ']', + FlowMappingStartToken: '{', + FlowMappingEndToken: '}', + EntryToken: ',', + KeyToken: '?', + ValueToken: ':', } def _testTokens(self, test_name, data_filename, tokens_filename): @@ -52,7 +52,7 @@ class TestTokens(test_appliance.TestAppliance): try: scanner = Scanner(data_filename, file(data_filename, 'rb').read()) tokens1 = [] - while not isinstance(scanner.peek_token(), End): + while not isinstance(scanner.peek_token(), EndToken): tokens1.append(scanner.get_token()) tokens1 = [self.replaces[t.__class__] for t in tokens1] self.failUnlessEqual(tokens1, tokens2) @@ -74,7 +74,7 @@ class TestScanner(test_appliance.TestAppliance): try: scanner = Scanner(filename, file(filename, 'rb').read()) tokens = [] - while not isinstance(scanner.peek_token(), End): + while not isinstance(scanner.peek_token(), EndToken): tokens.append(scanner.get_token().__class__.__name__) except: print diff --git a/tests/test_yaml.py b/tests/test_yaml.py index 94eb425..c9be9dd 100644 --- a/tests/test_yaml.py +++ b/tests/test_yaml.py @@ -4,6 +4,7 @@ import unittest from test_marker import * from test_canonical import * from test_tokens import * +from test_structure import * def main(module='__main__'): unittest.main(module) |