From c6f2fc9875595b01f4249fbe87b0b846d0a2adc9 Mon Sep 17 00:00:00 2001 From: xi Date: Sun, 26 Mar 2006 22:57:23 +0000 Subject: Parser now provides style information. Allow empty plain scalars if a tag or anchor is given. git-svn-id: http://svn.pyyaml.org/pyyaml/branches/working-on-emitter@127 18f92427-320e-0410-9341-c67f048884a3 --- lib/yaml/emitter.py | 2 +- lib/yaml/events.py | 27 ++++++++++++++++--- lib/yaml/parser.py | 58 +++++++++++++++++++++++++++++------------ lib/yaml/reader.py | 4 +++ lib/yaml/scanner.py | 45 +++++++++++++++++++++++++------- lib/yaml/tokens.py | 24 ++++++++++++++++- tests/data/spec-08-12.canonical | 1 + tests/data/spec-08-12.data | 2 +- tests/test_emitter.py | 11 ++++---- 9 files changed, 135 insertions(+), 39 deletions(-) diff --git a/lib/yaml/emitter.py b/lib/yaml/emitter.py index 4f33cb8..d2b372f 100644 --- a/lib/yaml/emitter.py +++ b/lib/yaml/emitter.py @@ -175,6 +175,6 @@ class Emitter: self.soft_space = True def write_indent(self): - self.writer.write("\n"+" "*(self.level*4)) + self.writer.write("\n"+" "*(self.level*2)) self.soft_space = False diff --git a/lib/yaml/events.py b/lib/yaml/events.py index 97bccb3..0c27903 100644 --- a/lib/yaml/events.py +++ b/lib/yaml/events.py @@ -20,19 +20,25 @@ class AliasEvent(NodeEvent): pass class ScalarEvent(NodeEvent): - def __init__(self, anchor, tag, value, start_mark=None, end_mark=None): + def __init__(self, anchor, tag, value, start_mark=None, end_mark=None, + implicit=None, style=None): self.anchor = anchor self.tag = tag self.value = value self.start_mark = start_mark self.end_mark = end_mark + self.implicit = implicit + self.style = style class CollectionEvent(NodeEvent): - def __init__(self, anchor, tag, start_mark=None, end_mark=None): + def __init__(self, anchor, tag, start_mark=None, end_mark=None, + flow=None, compact=None): self.anchor = anchor self.tag = tag self.start_mark = start_mark self.end_mark = end_mark + self.flow = flow + self.compact = compact class SequenceEvent(CollectionEvent): pass @@ -44,13 +50,26 @@ class CollectionEndEvent(Event): pass class DocumentStartEvent(Event): - pass + def __init__(self, start_mark=None, end_mark=None, + indent=None, implicit=None, version=None, tags=None, + canonical=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.indent = indent + self.implicit = implicit + self.version = version + self.tags = tags + self.canonical = canonical class DocumentEndEvent(Event): pass class StreamStartEvent(Event): - pass + def __init__(self, start_mark=None, end_mark=None, + encoding=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.encoding = encoding class StreamEndEvent(Event): pass diff --git a/lib/yaml/parser.py b/lib/yaml/parser.py index 9870699..f245f8f 100644 --- a/lib/yaml/parser.py +++ b/lib/yaml/parser.py @@ -130,7 +130,8 @@ class Parser: # Parse start of stream. token = self.scanner.get() - yield StreamStartEvent(token.start_mark, token.end_mark) + yield StreamStartEvent(token.start_mark, token.end_mark, + encoding=token.encoding) # Parse implicit document. if not self.scanner.check(DirectiveToken, DocumentStartToken, @@ -138,7 +139,7 @@ class Parser: self.tag_handles = self.DEFAULT_TAGS token = self.scanner.peek() start_mark = end_mark = token.start_mark - yield DocumentStartEvent(start_mark, end_mark) + yield DocumentStartEvent(start_mark, end_mark, implicit=True) for event in self.parse_block_node(): yield event token = self.scanner.peek() @@ -152,7 +153,7 @@ class Parser: while not self.scanner.check(StreamEndToken): token = self.scanner.peek() start_mark = token.start_mark - self.process_directives() + version, tags = self.process_directives() if not self.scanner.check(DocumentStartToken): raise ParserError(None, None, "expected '', but found %r" @@ -160,7 +161,8 @@ class Parser: self.scanner.peek().start_mark) token = self.scanner.get() end_mark = token.end_mark - yield DocumentStartEvent(start_mark, end_mark) + yield DocumentStartEvent(start_mark, end_mark, + implicit=False, version=version, tags=tags) if self.scanner.check(DirectiveToken, DocumentStartToken, DocumentEndToken, StreamEndToken): yield self.process_empty_scalar(token.end_mark) @@ -201,9 +203,14 @@ class Parser: "duplicate tag handle %r" % handle.encode('utf-8'), token.start_mark) self.tag_handles[handle] = prefix + version_value = self.yaml_version + tags_value = None + if self.tag_handles: + tags_value = self.tag_handles.copy() for key in self.DEFAULT_TAGS: if key not in self.tag_handles: self.tag_handles[key] = self.DEFAULT_TAGS[key] + return version_value, tags_value def parse_block_node(self): return self.parse_node(block=True) @@ -232,19 +239,22 @@ class Parser: start_mark = end_mark = tag_mark = None if self.scanner.check(AnchorToken): token = self.scanner.get() - start_mark = end_mark = token.start_mark + start_mark = token.start_mark + end_mark = token.end_mark anchor = token.value if self.scanner.check(TagToken): token = self.scanner.get() - end_mark = tag_mark = token.start_mark + tag_mark = token.start_mark + end_mark = token.end_mark tag = token.value elif self.scanner.check(TagToken): token = self.scanner.get() - start_mark = end_mark = tag_mark = token.start_mark + start_mark = tag_mark = token.start_mark + end_mark = token.end_mark tag = token.value if self.scanner.check(AnchorToken): token = self.scanner.get() - end_mark = token.start_mark + end_mark = token.end_mark anchor = token.value if tag is not None: handle, suffix = tag @@ -261,35 +271,48 @@ class Parser: self.scanner.peek().plain): tag = u'!' if start_mark is None: - start_mark = self.scanner.peek().start_mark + start_mark = end_mark = self.scanner.peek().start_mark event = None collection_events = None if indentless_sequence and self.scanner.check(BlockEntryToken): end_mark = self.scanner.peek().end_mark - event = SequenceEvent(anchor, tag, start_mark, end_mark) + event = SequenceEvent(anchor, tag, start_mark, end_mark, + flow=False, compact=False) collection_events = self.parse_indentless_sequence() else: if self.scanner.check(ScalarToken): token = self.scanner.get() end_mark = token.end_mark event = ScalarEvent(anchor, tag, token.value, - start_mark, end_mark) + start_mark, end_mark, + implicit=(tag is None), style=token.style) elif self.scanner.check(FlowSequenceStartToken): end_mark = self.scanner.peek().end_mark - event = SequenceEvent(anchor, tag, start_mark, end_mark) + event = SequenceEvent(anchor, tag, start_mark, end_mark, + flow=True) collection_events = self.parse_flow_sequence() elif self.scanner.check(FlowMappingStartToken): end_mark = self.scanner.peek().end_mark - event = MappingEvent(anchor, tag, start_mark, end_mark) + event = MappingEvent(anchor, tag, start_mark, end_mark, + flow=True) collection_events = self.parse_flow_mapping() elif block and self.scanner.check(BlockSequenceStartToken): end_mark = self.scanner.peek().start_mark - event = SequenceEvent(anchor, tag, start_mark, end_mark) + compact = self.scanner.peek().inline + event = SequenceEvent(anchor, tag, start_mark, end_mark, + flow=False, compact=compact) collection_events = self.parse_block_sequence() elif block and self.scanner.check(BlockMappingStartToken): end_mark = self.scanner.peek().start_mark - event = MappingEvent(anchor, tag, start_mark, end_mark) + compact = self.scanner.peek().inline + event = MappingEvent(anchor, tag, start_mark, end_mark, + flow=False, compact=compact) collection_events = self.parse_block_mapping() + elif anchor is not None or tag is not None: + # Empty scalars are allowed even if a tag or an anchor is + # specified. + event = ScalarEvent(anchor, tag, u'', start_mark, end_mark, + implicit=False, style='') else: if block: node = 'block' @@ -384,7 +407,8 @@ class Parser: if self.scanner.check(KeyToken): token = self.scanner.get() yield MappingEvent(None, u'!', - token.start_mark, token.end_mark) + token.start_mark, token.end_mark, + flow=True, compact=True) if not self.scanner.check(ValueToken, FlowEntryToken, FlowSequenceEndToken): for event in self.parse_flow_node(): @@ -460,5 +484,5 @@ class Parser: yield CollectionEndEvent(token.start_mark, token.end_mark) def process_empty_scalar(self, mark): - return ScalarEvent(None, None, u'', mark, mark) + return ScalarEvent(None, None, u'', mark, mark, implicit=True) diff --git a/lib/yaml/reader.py b/lib/yaml/reader.py index 9778943..ab16a13 100644 --- a/lib/yaml/reader.py +++ b/lib/yaml/reader.py @@ -100,6 +100,7 @@ class Reader: self.pointer = 0 self.raw_buffer = None self.raw_decode = None + self.encoding = None self.index = 0 self.line = 0 self.column = 0 @@ -156,10 +157,13 @@ class Reader: if not isinstance(self.raw_buffer, unicode): if self.raw_buffer.startswith(codecs.BOM_UTF16_LE): self.raw_decode = utf_16_le_decode + self.encoding = 'utf-16-le' elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE): self.raw_decode = utf_16_be_decode + self.encoding = 'utf-16-be' else: self.raw_decode = utf_8_decode + self.encoding = 'utf-8' self.update(1) NON_PRINTABLE = re.compile(u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]') diff --git a/lib/yaml/scanner.py b/lib/yaml/scanner.py index 9c536b4..a665da9 100644 --- a/lib/yaml/scanner.py +++ b/lib/yaml/scanner.py @@ -35,13 +35,15 @@ class ScannerError(MarkedYAMLError): class SimpleKey: # See below simple keys treatment. - def __init__(self, token_number, required, index, line, column, mark): + def __init__(self, token_number, required, index, line, column, mark=None, + inline=None): self.token_number = token_number self.required = required self.index = index self.line = line self.column = column self.mark = mark + self.inline = inline class Scanner: @@ -81,6 +83,10 @@ class Scanner: # Past indentation levels. self.indents = [] + # Used for providing style information to the parser. + self.current_line = self.previous_line = self.reader.line + self.current_column = self.previus_column = self.reader.column + # Variables related to simple keys treatment. # A simple key is a key that is not denoted by the '?' indicator. @@ -321,8 +327,9 @@ class Scanner: line = self.reader.line column = self.reader.column mark = self.reader.get_mark() + inline = (self.current_line == self.previous_line) key = SimpleKey(token_number, required, - index, line, column, mark) + index, line, column, mark, inline) self.possible_simple_keys[self.flow_level] = key def remove_possible_simple_key(self): @@ -380,8 +387,8 @@ class Scanner: mark = self.reader.get_mark() # Add STREAM-END. - self.tokens.append(StreamStartToken(mark, mark)) - + self.tokens.append(StreamStartToken(mark, mark, + encoding=self.reader.encoding)) def fetch_stream_end(self): @@ -509,7 +516,8 @@ class Scanner: # We may need to add BLOCK-SEQUENCE-START. if self.add_indent(self.reader.column): mark = self.reader.get_mark() - self.tokens.append(BlockSequenceStartToken(mark, mark)) + inline = (self.current_line == self.previous_line) + self.tokens.append(BlockSequenceStartToken(mark, mark, inline)) # It's an error for the block entry to occur in the flow context, # but we let the parser detect this. @@ -542,7 +550,8 @@ class Scanner: # We may need to add BLOCK-MAPPING-START. if self.add_indent(self.reader.column): mark = self.reader.get_mark() - self.tokens.append(BlockMappingStartToken(mark, mark)) + inline = (self.current_line == self.previous_line) + self.tokens.append(BlockMappingStartToken(mark, mark, inline)) # Simple keys are allowed after '?' in the block context. self.allow_simple_key = not self.flow_level @@ -572,7 +581,8 @@ class Scanner: if not self.flow_level: if self.add_indent(key.column): self.tokens.insert(key.token_number-self.tokens_taken, - BlockMappingStartToken(key.mark, key.mark)) + BlockMappingStartToken(key.mark, key.mark, + key.inline)) # There cannot be two simple keys one after another. self.allow_simple_key = False @@ -791,6 +801,11 @@ class Scanner: else: found = True + self.previous_line = self.current_line + self.previous_column = self.current_column + self.current_line = self.reader.line + self.current_column = self.reader.column + def scan_directive(self): # See the specification for details. start_mark = self.reader.get_mark() @@ -1053,7 +1068,12 @@ class Scanner: chunks.extend(breaks) # We are done. - return ScalarToken(u''.join(chunks), False, start_mark, end_mark) + if folded: + style = '>' + else: + style = '|' + return ScalarToken(u''.join(chunks), False, start_mark, end_mark, + style) def scan_block_scalar_indicators(self, start_mark): # See the specification for details. @@ -1154,7 +1174,12 @@ class Scanner: chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) self.reader.forward() end_mark = self.reader.get_mark() - return ScalarToken(u''.join(chunks), False, start_mark, end_mark) + if double: + style = '"' + else: + style = '\'' + return ScalarToken(u''.join(chunks), False, start_mark, end_mark, + style) ESCAPE_REPLACEMENTS = { u'0': u'\0', @@ -1305,7 +1330,7 @@ class Scanner: if not spaces or self.reader.peek() == u'#' \ or (not self.flow_level and self.reader.column < indent): break - return ScalarToken(u''.join(chunks), True, start_mark, end_mark) + return ScalarToken(u''.join(chunks), True, start_mark, end_mark, '') def scan_plain_spaces(self, indent, start_mark): # See the specification for details. diff --git a/lib/yaml/tokens.py b/lib/yaml/tokens.py index 93c3005..e315374 100644 --- a/lib/yaml/tokens.py +++ b/lib/yaml/tokens.py @@ -24,21 +24,41 @@ class DirectiveToken(Token): class DocumentStartToken(Token): id = '' + def __init__(self, name, value, start_mark=None, end_mark=None): + self.name = name + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark class DocumentEndToken(Token): id = '' class StreamStartToken(Token): id = '' + def __init__(self, start_mark=None, end_mark=None, + encoding=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.encoding = encoding class StreamEndToken(Token): id = '' class BlockSequenceStartToken(Token): id = '' + def __init__(self, start_mark=None, end_mark=None, + inline=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.inline = inline class BlockMappingStartToken(Token): id = '' + def __init__(self, start_mark=None, end_mark=None, + inline=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.inline = inline class BlockEndToken(Token): id = '' @@ -90,9 +110,11 @@ class TagToken(Token): class ScalarToken(Token): id = '' - def __init__(self, value, plain, start_mark=None, end_mark=None): + def __init__(self, value, plain, start_mark=None, end_mark=None, + style=None): self.value = value self.plain = plain self.start_mark = start_mark self.end_mark = end_mark + self.style = style diff --git a/tests/data/spec-08-12.canonical b/tests/data/spec-08-12.canonical index dc3b81a..93899f4 100644 --- a/tests/data/spec-08-12.canonical +++ b/tests/data/spec-08-12.canonical @@ -6,4 +6,5 @@ !!str "Tagged", *A, !!str "", + !!str "", ] diff --git a/tests/data/spec-08-12.data b/tests/data/spec-08-12.data index d936ba2..3d4c6b7 100644 --- a/tests/data/spec-08-12.data +++ b/tests/data/spec-08-12.data @@ -3,6 +3,6 @@ &anchor "Anchored", !!str 'Tagged', *anchor, # Alias node -# !!str, # Empty plain scalar + !!str , # Empty plain scalar '', # Empty plain scalar ] diff --git a/tests/test_emitter.py b/tests/test_emitter.py index 1ff0ecc..fed6953 100644 --- a/tests/test_emitter.py +++ b/tests/test_emitter.py @@ -1,5 +1,5 @@ -import test_appliance, sys +import test_appliance, sys, StringIO from yaml import * @@ -7,11 +7,12 @@ class TestEmitterOnCanonical(test_appliance.TestAppliance): def _testEmitterOnCanonical(self, test_name, canonical_filename): events = list(iter(Parser(Scanner(Reader(file(canonical_filename, 'rb')))))) - writer = sys.stdout + #writer = sys.stdout + writer = StringIO.StringIO() emitter = Emitter(writer) - print "-"*30 - print "ORIGINAL DATA:" - print file(canonical_filename, 'rb').read() + #print "-"*30 + #print "ORIGINAL DATA:" + #print file(canonical_filename, 'rb').read() for event in events: emitter.emit(event) -- cgit v1.2.1