summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorxi <xi@18f92427-320e-0410-9341-c67f048884a3>2006-04-03 23:23:10 +0000
committerxi <xi@18f92427-320e-0410-9341-c67f048884a3>2006-04-03 23:23:10 +0000
commitc8d7e90db338acfb7f6f29334a94638344f71199 (patch)
tree37fe4390e2584102d3d14b56dfd73520db65eadc
parent3e3ce1443520ece1e3468f9d16d003cffcda9824 (diff)
downloadpyyaml-c8d7e90db338acfb7f6f29334a94638344f71199.tar.gz
Working on emitter: implement the state machine.
git-svn-id: http://svn.pyyaml.org/pyyaml/trunk@131 18f92427-320e-0410-9341-c67f048884a3
-rw-r--r--lib/yaml/emitter.py421
-rw-r--r--lib/yaml/events.py4
2 files changed, 424 insertions, 1 deletions
diff --git a/lib/yaml/emitter.py b/lib/yaml/emitter.py
new file mode 100644
index 0000000..1b81a5b
--- /dev/null
+++ b/lib/yaml/emitter.py
@@ -0,0 +1,421 @@
+
+# Emitter expects events obeying the following grammar:
+# stream ::= STREAM-START document* STREAM-END
+# document ::= DOCUMENT-START node DOCUMENT-END
+# node ::= SCALAR | sequence | mapping
+# sequence ::= SEQUENCE-START node* SEQUENCE-END
+# mapping ::= MAPPING-START (node node)* MAPPING-END
+
+__all__ = ['Emitter', 'EmitterError']
+
+from error import YAMLError
+from events import *
+
+class EmitterError(YAMLError):
+ pass
+
+class Emitter:
+
+ DEFAULT_TAG_PREFIXES = {
+ u'!' : u'!',
+ u'tag:yaml.org,2002:' : u'!!',
+ }
+
+ def __init__(self, writer):
+
+ # The writer should have the methods `write` and possibly `flush`.
+ self.writer = writer
+
+ # Encoding is provided by STREAM-START.
+ self.encoding = None
+
+ # Emitter is a state machine with a stack of states to handle nested
+ # structures.
+ self.states = []
+ self.state = self.expect_stream_start
+
+ # Current event and the event queue.
+ self.events = []
+ self.event = None
+
+ # The current indentation level and the stack of previous indents.
+ self.indents = []
+ self.indent = None
+
+ # Flow level.
+ self.flow_level = 0
+
+ # Contexts.
+ self.root_context = False
+ self.sequence_context = False
+ self.mapping_context = False
+ self.simple_key_context = False
+
+ # Characteristics of the last emitted character:
+ # - current position.
+ # - is it a line break?
+ # - is it a whitespace?
+ # - is it an indention character
+ # (indentation space, '-', '?', or ':')?
+ self.line = 0
+ self.column = 0
+ self.whitespace = True
+ self.indention = True
+
+ # Formatting details.
+ self.canonical = False
+ self.best_line_break = u'\n'
+ self.best_indent = 2
+ self.best_width = 80
+ self.tag_prefixes = None
+
+ # Scalar analysis.
+ self.analysis = None
+
+ def emit(self, event):
+ if self.events:
+ self.events.append(event)
+ event = self.events.pop(0)
+ self.event = event
+ if self.need_more_events():
+ self.event.insert(0, event)
+ return
+ self.state()
+ self.event = None
+
+ # In some cases, we wait for a few next events before emitting.
+
+ def need_more_events(self):
+ if isinstance(self.event, DocumentStartEvent):
+ return self.need_events(1)
+ elif isinstance(self.event, SequenceStartEvent):
+ return self.need_events(2)
+ elif isinstance(self.event, MappingStartEvent):
+ return self.need_events(3)
+ else:
+ return False
+
+ def need_events(self, count):
+ level = 0
+ for event in self.events:
+ if isinstance(event, (DocumentStart, CollectionStart)):
+ level += 1
+ elif isinstance(event, (DocumentEnd, CollectionEnd)):
+ level -= 1
+ elif isinstance(event, StreamEnd):
+ level = -1
+ if level < 0:
+ return False
+ return (len(self.events) < count)
+
+ def increase_indent(self, flow=False, indentless=False):
+ self.indents.append(self.indent)
+ if self.indent is None:
+ if flow:
+ self.indent = self.best_indent
+ else:
+ self.indent = 0
+ elif not indentless:
+ self.indent += self.best_indent
+
+ # States.
+
+ # Stream handlers.
+
+ def expect_stream_start(self):
+ if isinstance(self.event, StreamStartEvent):
+ self.encoding = event.encoding
+ self.canonical = event.canonical
+ if self.event.indent and self.event.indent > 1:
+ self.best_indent = self.event.indent
+ if self.event.width and self.event.width > self.best_indent:
+ self.best_width = self.event.width
+ if self.event.line_break in [u'\r', u'\n', u'\r\n']:
+ self.best_line_break = self.event.line_break
+ self.write_stream_start()
+ self.state = self.expect_first_document_start
+ else:
+ raise EmitterError("expected StreamStartEvent, but got %s"
+ % self.event)
+
+ def expect_nothing(self):
+ raise EmitterError("expected nothing, but got %s" % self.event)
+
+ # Document handlers.
+
+ def expect_first_document_start(self):
+ return self.expect_document_start(first=True)
+
+ def expect_document_start(self, first=False):
+ if isinstance(self.event, DocumentStartEvent):
+ if self.event.version:
+ self.write_version_directive(self.event.version)
+ self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy()
+ if self.event.tags:
+ for handle in self.event.tags:
+ prefix = self.event.tags[handle]
+ self.tag_prefixes[prefix] = handle
+ self.write_tag_directive(handle, prefix)
+ implicit = (first and self.event.implicit and not self.canonical
+ and not self.event.version and not self.event.tags
+ and not self.check_next_empty_scalar())
+ if not implicit:
+ self.write_indent()
+ self.write_indicator(u'---', True)
+ if self.canonical:
+ self.write_indent()
+ self.state = self.expect_document_root
+ elif isinstance(self.event, StreamEndEvent):
+ self.write_stream_end()
+ self.state = self.expect_nothing
+ else:
+ raise EmitterError("expected DocumentStartEvent, but got %s"
+ % self.event)
+
+ def expect_document_end(self):
+ if isinstance(self.event, DocumentEndEvent):
+ self.write_indent()
+ if not event.implicit:
+ self.write_indicator(u'...', True)
+ self.write_indent()
+ self.state = self.expect_document_start
+ else:
+ raise EmitterError("expected DocumentEndEvent, but got %s"
+ % self.event)
+
+ def expect_document_root(self):
+ self.expect_node(root=True)
+
+ # Node handlers.
+
+ def expect_node(self, root=False, sequence=False, mapping=False,
+ simple_key=False):
+ self.root_context = root
+ self.sequence_context = sequence
+ self.mapping_context = mapping
+ self.simple_key_context = simple_key
+ if isinstance(self.event, AliasEvent):
+ self.expect_alias()
+ elif isinstance(event, (ScalarEvent, CollectionEvent)):
+ self.process_anchor()
+ self.process_tag()
+ if isinstance(self.event, ScalarEvent):
+ self.expect_scalar()
+ elif isinstance(self.event, SequenceEvent):
+ if self.flow_level or self.canonical or self.event.flow_style \
+ or self.check_empty_sequence():
+ self.expect_flow_sequence()
+ else:
+ self.expect_block_sequence()
+ elif isinstance(self.event, MappingEvent):
+ if self.flow_level or self.canonical or self.event.flow_style \
+ or self.check_empty_mapping():
+ self.expect_flow_mapping()
+ else:
+ self.expect_block_mapping()
+ else:
+ raise EmitterError("expected NodeEvent, but got %s" % self.event)
+
+ def expect_alias(self):
+ self.write_anchor(u'*', self.event.anchor)
+ self.state = self.states.pop()
+
+ def expect_scalar(self):
+ self.increase_indent(flow=True)
+ self.process_scalar()
+ self.indent = self.indents.pop()
+ self.state = self.states.pop()
+
+ # Flow sequence handlers.
+
+ def expect_flow_sequence(self):
+ self.write_indicator(u'[', True, whitespace=True)
+ self.flow_level += 1
+ self.increase_indent(flow=True)
+ self.state = self.expect_first_flow_sequence_item
+
+ def expect_first_flow_sequence_item(self):
+ if isinstance(self.event, SequenceEndEvent):
+ self.indent = self.indents.pop()
+ self.flow_level -= 1
+ self.write_indicator(u']', False)
+ self.state = self.states.pop()
+ else:
+ if self.canonical or self.column > self.best_width:
+ self.write_indent()
+ self.states.append(self.expect_flow_sequence_item)
+ self.expect_node(sequence=True)
+
+ def expect_flow_sequence_item(self):
+ if isinstance(self.event, SequenceEndEvent):
+ self.indent = self.indents.pop()
+ self.flow_level -= 1
+ if self.canonical:
+ self.write_indicator(u',', False)
+ self.write_indent()
+ self.write_indicator(u']', False)
+ self.state = self.states.pop()
+ else:
+ self.write_indicator(u',', False)
+ if self.canonical or self.column > self.best_width:
+ self.write_indent()
+ self.states.append(self.expect_flow_sequence_item)
+ self.expect_node(sequence=True)
+
+ # Flow mapping handlers.
+
+ def expect_flow_mapping(self):
+ self.write_indicator(u'{', True, whitespace=True)
+ self.flow_level += 1
+ self.increase_indent(flow=True)
+ self.state = self.expect_first_flow_mapping_key
+
+ def expect_first_flow_mapping_key(self):
+ if isinstance(self.event, MappingEndEvent):
+ self.indent = self.indents.pop()
+ self.flow_level -= 1
+ self.write_indicator(u'}', False)
+ self.state = self.states.pop()
+ else:
+ if self.canonical or self.column > self.best_width:
+ self.write_indent()
+ if not self.canonical and self.check_simple_key():
+ self.states.append(self.expect_flow_mapping_simple_value)
+ self.expect_node(mapping=True, simple_key=True)
+ else:
+ self.write_indicator(u'?', True)
+ self.states.append(self.expect_flow_mapping_value)
+ self.expect_node(mapping=True)
+
+ def expect_flow_mapping_key(self):
+ if isinstance(self.event, MappingEndEvent):
+ self.indent = self.indents.pop()
+ self.flow_level -= 1
+ if self.canonical:
+ self.write_indicator(u',', False)
+ self.write_indent()
+ self.write_indicator(u'}', False)
+ self.state = self.states.pop()
+ else:
+ self.write_indicator(u',', False)
+ if self.canonical or self.column > self.best_width:
+ self.write_indent()
+ if not self.canonical and self.check_simple_key():
+ self.states.append(self.expect_flow_mapping_simple_value)
+ self.expect_node(mapping=True, simple_key=True)
+ else:
+ self.write_indicator(u'?', True)
+ self.states.append(self.expect_flow_mapping_value)
+ self.expect_node(mapping=True)
+
+ def expect_flow_mapping_simple_value(self):
+ self.write_indicator(u':', False)
+ self.states.append(self.expect_flow_mapping_key)
+ self.expect_node(mapping=True)
+
+ def expect_flow_mapping_value(self):
+ if self.canonical or self.column > self.best_width:
+ self.write_indent()
+ self.write_indicator(u':', True)
+ self.states.append(self.expect_flow_mapping_key)
+ self.expect_node(mapping=True)
+
+ # Block sequence handlers.
+
+ def expect_block_sequence(self):
+ indentless = (self.mapping_context and not self.indention)
+ self.increase_indent(flow=False, indentless=indentless)
+ self.state = self.expect_first_block_sequence_item
+
+ def expect_first_block_sequence_item(self):
+ return self.expect_block_sequence_item(first=True)
+
+ def expect_block_sequence_item(self, first=False):
+ if not first and isinstance(self.event, SequenceEndEvent):
+ self.indent = self.indents.pop()
+ self.state = self.states.pop()
+ else:
+ self.write_indent()
+ self.write_indicator(u'-', True, indention=True)
+ self.states.append(self.expect_block_sequence_item)
+ self.expect_node(sequence=True)
+
+ # Block mapping handlers.
+
+ def expect_block_mapping(self):
+ self.increase_indent(flow=False)
+ self.state = self.expect_first_block_mapping_key
+
+ def expect_first_block_mapping_key(self):
+ return self.expect_block_mapping_key(first=True)
+
+ def expect_block_mapping_key(self, first=False):
+ if not first and isinstance(self.event, SequenceEndEvent):
+ self.indent = self.indents.pop()
+ self.state = self.states.pop()
+ else:
+ self.write_indent()
+ if self.check_simple_key():
+ self.states.append(self.expect_block_mapping_simple_value)
+ self.expect_node(mapping=True, simple_key=True)
+ else:
+ self.write_indicator(u'?', True, indention=True)
+ self.states.append(self.expect_block_mapping_value)
+ self.expect_node(mapping=True)
+
+ def expect_block_mapping_simple_value(self):
+ self.write_indicator(u':', False)
+ self.states.append(self.expect_block_mapping_key)
+ self.expect_node(mapping=True)
+
+ def expect_block_mapping_value(self):
+ self.write_indent()
+ self.write_indicator(u':', True, indention=True)
+ self.states.append(self.expect_block_mapping_key)
+ self.expect_node(mapping=True)
+
+ # Writers.
+
+ def write_stream_start(self):
+ # Write BOM if needed.
+ if self.encoding and self.encoding.startswith('utf-16'):
+ self.writer.write(u'\xFF\xFE'.encode(self.encoding))
+
+ def write_stream_end(self):
+ if hasattr(self.writer, 'flush'):
+ self.writer.flush()
+
+ def write_indicator(self, indicator, need_whitespace,
+ whitespace=False, indention=False):
+ if self.whitespace:
+ data = indicator
+ else:
+ data = u' '+indicator
+ self.writespace = whitespace
+ self.indention = self.indention and indention
+ self.column += len(data)
+ if self.encoding:
+ data = data.encode(self.encoding)
+ self.writer.write(data)
+
+ def write_indent(self):
+ indent = self.indent or 0
+ if not self.indention or self.column > indent:
+ self.write_line_break()
+ if self.column < indent:
+ data = u' '*(indent-self.column)
+ self.column = indent
+ if self.encoding:
+ data = data.encode(self.encoding)
+ self.writer.write(data)
+
+ def write_line_break(self):
+ data = self.best_line_break
+ self.whitespace = True
+ self.indention = True
+ self.line += 1
+ self.column = 0
+ if self.encoding:
+ data = data.encode(self.encoding)
+ self.writer.write(data)
+
diff --git a/lib/yaml/events.py b/lib/yaml/events.py
index f9a695f..a61ec26 100644
--- a/lib/yaml/events.py
+++ b/lib/yaml/events.py
@@ -34,10 +34,12 @@ class CollectionEndEvent(Event):
class StreamStartEvent(Event):
def __init__(self, start_mark=None, end_mark=None,
- encoding=None, canonical=None, indent=None, width=None):
+ encoding=None, line_break=None, canonical=None,
+ indent=None, width=None):
self.start_mark = start_mark
self.end_mark = end_mark
self.encoding = encoding
+ self.line_break = line_break
self.canonical = canonical
self.indent = indent
self.width = width