summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorxi <xi@18f92427-320e-0410-9341-c67f048884a3>2006-04-15 23:54:52 +0000
committerxi <xi@18f92427-320e-0410-9341-c67f048884a3>2006-04-15 23:54:52 +0000
commit6d52fd5c231b9810da0e68d0e24f954803229357 (patch)
tree01a40354d0f9dda749f4601b6a21e1e8b85ebc2e
parent5b3ffc5848ec0b8c5555fc66e6f67b9baab4739a (diff)
downloadpyyaml-6d52fd5c231b9810da0e68d0e24f954803229357.tar.gz
Major refactoring.
git-svn-id: http://svn.pyyaml.org/pyyaml/trunk@136 18f92427-320e-0410-9341-c67f048884a3
-rwxr-xr-xexamples/yaml-hl/yaml_hl.py8
-rw-r--r--lib/yaml/__init__.py319
-rw-r--r--lib/yaml/composer.py176
-rw-r--r--lib/yaml/constructor.py81
-rw-r--r--lib/yaml/detector.py7
-rw-r--r--lib/yaml/dumper.py56
-rw-r--r--lib/yaml/emitter.py500
-rw-r--r--lib/yaml/error.py13
-rw-r--r--lib/yaml/events.py15
-rw-r--r--lib/yaml/loader.py41
-rw-r--r--lib/yaml/nodes.py3
-rw-r--r--lib/yaml/parser.py254
-rw-r--r--lib/yaml/reader.py16
-rw-r--r--lib/yaml/representer.py156
-rw-r--r--lib/yaml/resolver.py72
-rw-r--r--lib/yaml/scanner.py533
-rw-r--r--lib/yaml/serializer.py72
-rw-r--r--lib/yaml/yaml_object.py34
-rw-r--r--tests/data/construct-custom.code1
-rw-r--r--tests/data/construct-custom.data3
-rw-r--r--tests/data/spec-08-07.canonical3
-rw-r--r--tests/data/tags.events2
-rw-r--r--tests/test_appliance.py186
-rw-r--r--tests/test_canonical.py6
-rw-r--r--tests/test_constructor.py55
-rw-r--r--tests/test_detector.py11
-rw-r--r--tests/test_emitter.py45
-rw-r--r--tests/test_errors.py16
-rw-r--r--tests/test_representer.py57
-rw-r--r--tests/test_structure.py97
-rw-r--r--tests/test_tokens.py10
31 files changed, 1622 insertions, 1226 deletions
diff --git a/examples/yaml-hl/yaml_hl.py b/examples/yaml-hl/yaml_hl.py
index dd81b3f..0801521 100755
--- a/examples/yaml-hl/yaml_hl.py
+++ b/examples/yaml-hl/yaml_hl.py
@@ -2,6 +2,14 @@
import yaml, codecs, sys, optparse
+
+
+yaml.add_resolver(u'!Config', [])
+yaml.add_resolver(u'!TokensConfig', [u'tokens'])
+yaml.add_resolver(u'!EventsConfig', [u'events'])
+yaml.add_resolver(u'!StartEndConfig', [u'tokens', None])
+yaml.add_resolver(u'!StartEndConfig', [u'events', None])
+
class YAMLHighlight:
def __init__(self, config):
diff --git a/lib/yaml/__init__.py b/lib/yaml/__init__.py
index e6a3526..15fae85 100644
--- a/lib/yaml/__init__.py
+++ b/lib/yaml/__init__.py
@@ -5,7 +5,6 @@ from reader import *
from scanner import *
from parser import *
from composer import *
-from resolver import *
from constructor import *
from emitter import *
@@ -18,87 +17,257 @@ from tokens import *
from events import *
from nodes import *
-from yaml_object import *
-
-def parse(data, Reader=Reader, Scanner=Scanner, Parser=Parser):
- reader = Reader(data)
- scanner = Scanner(reader)
- parser = Parser(scanner)
- return parser
-
-def load_all(data, Reader=Reader, Scanner=Scanner, Parser=Parser,
- Composer=Composer, Resolver=Resolver, Constructor=Constructor):
- reader = Reader(data)
- scanner = Scanner(reader)
- parser = Parser(scanner)
- composer = Composer(parser)
- resolver = Resolver(composer)
- constructor = Constructor(resolver)
- return constructor
-
-def safe_load_all(data, Reader=Reader, Scanner=Scanner, Parser=Parser,
- Composer=Composer, Resolver=Resolver, Constructor=SafeConstructor):
- return load_all(data, Reader, Scanner, Parser, Composer, Resolver,
- Constructor)
-
-def load(data, *args, **kwds):
- for document in load_all(data, *args, **kwds):
- return document
-
-def safe_load(data, *args, **kwds):
- for document in safe_load_all(data, *args, **kwds):
- return document
-
-def emit(events, writer=None, Emitter=Emitter):
- if writer is None:
+from loader import *
+from dumper import *
+
+def scan(stream, Loader=Loader):
+ """
+ Scan a YAML stream and produce scanning tokens.
+ """
+ loader = Loader(stream)
+ while loader.check_token():
+ yield loader.get_token()
+
+def parse(stream, Loader=Loader):
+ """
+ Parse a YAML stream and produce parsing events.
+ """
+ loader = Loader(stream)
+ while loader.check_event():
+ yield loader.get_event()
+
+def compose(stream, Loader=Loader):
+ """
+ Parse the first YAML document in a stream
+ and produce the corresponding representation tree.
+ """
+ loader = Loader(stream)
+ if loader.check_node():
+ return loader.get_node()
+
+def compose_all(stream, Loader=Loader):
+ """
+ Parse all YAML documents in a stream
+ and produce corresponsing representation trees.
+ """
+ loader = Loader(stream)
+ while loader.check_node():
+ yield loader.get_node()
+
+def load_all(stream, Loader=Loader):
+ """
+ Parse all YAML documents in a stream
+ and produce corresponding Python objects.
+ """
+ loader = Loader(stream)
+ while loader.check_data():
+ yield loader.get_data()
+
+def load(stream, Loader=Loader):
+ """
+ Parse the first YAML document in a stream
+ and produce the corresponding Python object.
+ """
+ loader = Loader(stream)
+ if loader.check_data():
+ return loader.get_data()
+
+def safe_load_all(stream):
+ """
+ Parse all YAML documents in a stream
+ and produce corresponding Python objects.
+ Resolve only basic YAML tags.
+ """
+ return load_all(stream, SafeLoader)
+
+def safe_load(stream):
+ """
+ Parse the first YAML document in a stream
+ and produce the corresponding Python object.
+ Resolve only basic YAML tags.
+ """
+ return load(stream, SafeLoader)
+
+def emit(events, stream=None, Dumper=Dumper,
+ canonical=None, indent=None, width=None,
+ allow_unicode=None, line_break=None):
+ """
+ Emit YAML parsing events into a stream.
+ If stream is None, return the produced string instead.
+ """
+ getvalue = None
+ if stream is None:
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
- writer = StringIO()
- return_value = True
- else:
- return_value = False
- emitter = Emitter(writer)
+ stream = StringIO()
+ getvalue = stream.getvalue
+ dumper = Dumper(stream, canonical=canonical, indent=indent, width=width,
+ allow_unicode=allow_unicode, line_break=line_break)
for event in events:
- emitter.emit(event)
- if return_value:
- return writer.getvalue()
-
-def dump_all(natives, writer=None, Emitter=Emitter,
- Serializer=Serializer, Representer=Representer,
- encoding='utf-8', line_break=None, canonical=None,
- indent=None, width=None, allow_unicode=None):
- if writer is None:
+ dumper.emit(event)
+ if getvalue:
+ return getvalue()
+
+def serialize_all(nodes, stream=None, Dumper=Dumper,
+ canonical=None, indent=None, width=None,
+ allow_unicode=None, line_break=None,
+ encoding='utf-8', explicit_start=None, explicit_end=None,
+ version=None, tags=None):
+ """
+ Serialize a sequence of representation trees into a YAML stream.
+ If stream is None, return the produced string instead.
+ """
+ getvalue = None
+ if stream is None:
+ try:
+ from cStringIO import StringIO
+ except ImportError:
+ from StringIO import StringIO
+ stream = StringIO()
+ getvalue = stream.getvalue
+ dumper = Dumper(stream, canonical=canonical, indent=indent, width=width,
+ allow_unicode=allow_unicode, line_break=line_break,
+ encoding=encoding, version=version, tags=tags,
+ explicit_start=explicit_start, explicit_end=explicit_end)
+ dumper.open()
+ for node in nodes:
+ dumper.serialize(node)
+ dumper.close()
+ if getvalue:
+ return getvalue()
+
+def serialize(node, stream=None, Dumper=Dumper, **kwds):
+ """
+ Serialize a representation tree into a YAML stream.
+ If stream is None, return the produced string instead.
+ """
+ return serialize_all([node], stream, Dumper=Dumper, **kwds)
+
+def dump_all(documents, stream=None, Dumper=Dumper,
+ canonical=None, indent=None, width=None,
+ allow_unicode=None, line_break=None,
+ encoding='utf-8', explicit_start=None, explicit_end=None,
+ version=None, tags=None):
+ """
+ Serialize a sequence of Python objects into a YAML stream.
+ If stream is None, return the produced string instead.
+ """
+ getvalue = None
+ if stream is None:
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
- writer = StringIO()
- return_value = True
- else:
- return_value = False
- emitter = Emitter(writer)
- serializer = Serializer(emitter, encoding=encoding, line_break=line_break,
- canonical=canonical, indent=indent, width=width,
- allow_unicode=allow_unicode)
- representer = Representer(serializer)
- for native in natives:
- representer.represent(native)
- representer.close()
- if return_value:
- return writer.getvalue()
-
-def safe_dump_all(natives, writer=None, Emitter=Emitter,
- Serializer=Serializer, Representer=SafeRepresenter,
- encoding='utf-8', line_break=None, canonical=None,
- indent=None, width=None, allow_unicode=None):
- return dump_all(natives, writer, Emitter, Serializer, Representer,
- encoding, line_break, canonical, indent, width, allow_unicode)
-
-def dump(native, *args, **kwds):
- return dump_all([native], *args, **kwds)
-
-def safe_dump(native, *args, **kwds):
- return safe_dump_all([native], *args, **kwds)
+ stream = StringIO()
+ getvalue = stream.getvalue
+ dumper = Dumper(stream, canonical=canonical, indent=indent, width=width,
+ allow_unicode=allow_unicode, line_break=line_break,
+ encoding=encoding, version=version, tags=tags,
+ explicit_start=explicit_start, explicit_end=explicit_end)
+ dumper.open()
+ for data in documents:
+ dumper.represent(data)
+ dumper.close()
+ if getvalue:
+ return getvalue()
+
+def dump(data, stream=None, Dumper=Dumper, **kwds):
+ """
+ Serialize a Python object into a YAML stream.
+ If stream is None, return the produced string instead.
+ """
+ return dump_all([data], stream, Dumper=Dumper, **kwds)
+
+def safe_dump_all(documents, stream=None, **kwds):
+ """
+ Serialize a sequence of Python objects into a YAML stream.
+ Produce only basic YAML tags.
+ If stream is None, return the produced string instead.
+ """
+ return dump_all(documents, stream, Dumper=SafeDumper, **kwds)
+
+def safe_dump(data, stream=None, **kwds):
+ """
+ Serialize a Python object into a YAML stream.
+ Produce only basic YAML tags.
+ If stream is None, return the produced string instead.
+ """
+ return dump_all([data], stream, Dumper=SafeDumper, **kwds)
+
+def add_detector(tag, regexp, first=None, Loader=Loader, Dumper=Dumper):
+ """
+ Add an implicit scalar detector.
+ If an implicit scalar value matches the given regexp,
+ the corresponding tag is assigned to the scalar.
+ first is a sequence of possible initial characters or None.
+ """
+ Loader.add_detector(tag, regexp, first)
+ Dumper.add_detector(tag, regexp, first)
+
+def add_resolver(tag, path, Loader=Loader):
+ """
+ Add a path based resolver for the given tag.
+ A path is a list of keys that forms a path
+ to a node in the representation tree.
+ Keys can be string values, integers, or None.
+ """
+ Loader.add_resolver(tag, path)
+
+def add_constructor(tag, constructor, Loader=Loader):
+ """
+ Add a constructor for the given tag.
+ Constructor is a function that accepts a Loader instance
+ and a node object and produces the corresponding Python object.
+ """
+ Loader.add_constructor(tag, constructor)
+
+def add_multi_constructor(tag_prefix, multi_constructor, Loader=Loader):
+ """
+ Add a multi-constructor for the given tag prefix.
+ Multi-constructor is called for a node if its tag starts with tag_prefix.
+ Multi-constructor accepts a Loader instance, a tag suffix,
+ and a node object and produces the corresponding Python object.
+ """
+ Loader.add_multi_constructor(tag_prefix, multi_constructor)
+
+class YAMLObjectMetaclass(type):
+ """
+ The metaclass for YAMLObject.
+ """
+ def __init__(cls, name, bases, kwds):
+ super(YAMLObjectMetaclass, cls).__init__(name, bases, kwds)
+ if 'yaml_tag' in kwds and kwds['yaml_tag'] is not None:
+ cls.yaml_loader.add_constructor(cls.yaml_tag, cls.from_yaml)
+ cls.yaml_dumper.add_representer(cls, cls.to_yaml)
+
+class YAMLObject(object):
+ """
+ An object that can dump itself to a YAML stream
+ and load itself from a YAML stream.
+ """
+
+ __metaclass__ = YAMLObjectMetaclass
+
+ yaml_loader = Loader
+ yaml_dumper = Dumper
+
+ yaml_tag = None
+ yaml_flow_style = None
+
+ def from_yaml(cls, loader, node):
+ """
+ Convert a representation node to a Python object.
+ """
+ return loader.construct_yaml_object(node, cls)
+ from_yaml = classmethod(from_yaml)
+
+ def to_yaml(cls, dumper, data):
+ """
+ Convert a Python object to a representation node.
+ """
+ return dumper.represent_yaml_object(cls.yaml_tag, data, cls,
+ flow_style=cls.yaml_flow_style)
+ to_yaml = classmethod(to_yaml)
diff --git a/lib/yaml/composer.py b/lib/yaml/composer.py
index bb26f36..4805091 100644
--- a/lib/yaml/composer.py
+++ b/lib/yaml/composer.py
@@ -1,5 +1,5 @@
-__all__ = ['Composer', 'ComposerError']
+__all__ = ['BaseComposer', 'Composer', 'ComposerError']
from error import MarkedYAMLError
from events import *
@@ -8,48 +8,85 @@ from nodes import *
class ComposerError(MarkedYAMLError):
pass
-class Composer:
+class BaseComposer:
- def __init__(self, parser):
- self.parser = parser
+ yaml_resolvers = {}
+
+ def __init__(self):
self.all_anchors = {}
self.complete_anchors = {}
+ self.resolver_tags = []
+ self.resolver_paths = []
- # Drop the STREAM-START event.
- self.parser.get()
-
- def check(self):
+ def check_node(self):
# If there are more documents available?
- return not self.parser.check(StreamEndEvent)
+ return not self.check_event(StreamEndEvent)
- def get(self):
+ def get_node(self):
# Get the root node of the next document.
- if not self.parser.check(StreamEndEvent):
+ if not self.check_event(StreamEndEvent):
return self.compose_document()
def __iter__(self):
# Iterator protocol.
- while not self.parser.check(StreamEndEvent):
+ while not self.check_event(StreamEndEvent):
yield self.compose_document()
def compose_document(self):
+ # Drop the STREAM-START event.
+ if self.check_event(StreamStartEvent):
+ self.get_event()
+
# Drop the DOCUMENT-START event.
- self.parser.get()
+ self.get_event()
# Compose the root node.
- node = self.compose_node()
+ node = self.compose_node([])
# Drop the DOCUMENT-END event.
- self.parser.get()
+ self.get_event()
self.all_anchors = {}
self.complete_anchors = {}
+ self.resolver_tags = []
+ self.resolver_paths = []
return node
- def compose_node(self):
- if self.parser.check(AliasEvent):
- event = self.parser.get()
+ def increase_resolver_depth(self, path):
+ depth = len(path)
+ tag = None
+ paths = []
+ if not depth:
+ for resolver_path in self.yaml_resolvers.keys():
+ if resolver_path:
+ paths.append(resolver_path)
+ else:
+ tag = self.yaml_resolvers[resolver_path]
+ else:
+ base, index = path[-1]
+ if isinstance(index, ScalarNode) \
+ and index.tag == self.DEFAULT_SCALAR_TAG:
+ index = index.value
+ elif isinstance(index, Node):
+ index = None
+ for resolver_path in self.resolver_paths[-1]:
+ resolver_index = resolver_path[depth-1]
+ if resolver_index is None or resolver_index == index:
+ if len(resolver_index) > depth:
+ paths.append(resolver_path)
+ else:
+ tag = self.yaml_resolvers[resolver_path]
+ self.resolver_tags.append(tag)
+ self.resolver_paths.append(paths)
+
+ def decrease_resolver_depth(self):
+ del self.resolver_tags[-1]
+ del self.resolver_paths[-1]
+
+ def compose_node(self, path):
+ if self.check_event(AliasEvent):
+ event = self.get_event()
anchor = event.anchor
if anchor not in self.all_anchors:
raise ComposerError(None, None, "found undefined alias %r"
@@ -61,7 +98,8 @@ class Composer:
"found recursive anchor %r" % anchor.encode('utf-8'),
event.start_mark)
return self.complete_anchors[anchor]
- event = self.parser.peek()
+ self.increase_resolver_depth(path)
+ event = self.peek_event()
anchor = event.anchor
if anchor is not None:
if anchor in self.all_anchors:
@@ -69,44 +107,84 @@ class Composer:
% anchor.encode('utf-8'), self.all_anchors[anchor].start_mark,
"second occurence", event.start_mark)
self.all_anchors[anchor] = event
- if self.parser.check(ScalarEvent):
- node = self.compose_scalar_node()
- elif self.parser.check(SequenceStartEvent):
- node = self.compose_sequence_node()
- elif self.parser.check(MappingStartEvent):
- node = self.compose_mapping_node()
+ if self.check_event(ScalarEvent):
+ node = self.compose_scalar_node(path)
+ elif self.check_event(SequenceStartEvent):
+ node = self.compose_sequence_node(path)
+ elif self.check_event(MappingStartEvent):
+ node = self.compose_mapping_node(path)
if anchor is not None:
self.complete_anchors[anchor] = node
+ self.decrease_resolver_depth()
return node
- def compose_scalar_node(self):
- event = self.parser.get()
- return ScalarNode(event.tag, event.value, event.implicit,
+ def compose_scalar_node(self, path):
+ event = self.get_event()
+ tag = self.resolve_scalar(path, event.tag, event.implicit, event.value)
+ return ScalarNode(tag, event.value,
event.start_mark, event.end_mark, style=event.style)
- def compose_sequence_node(self):
- start_event = self.parser.get()
- value = []
- while not self.parser.check(SequenceEndEvent):
- value.append(self.compose_node())
- end_event = self.parser.get()
- return SequenceNode(start_event.tag, value,
- start_event.start_mark, end_event.end_mark,
+ def compose_sequence_node(self, path):
+ start_event = self.get_event()
+ tag = self.resolve_sequence(path, start_event.tag)
+ node = SequenceNode(tag, [],
+ start_event.start_mark, None,
flow_style=start_event.flow_style)
+ index = 0
+ while not self.check_event(SequenceEndEvent):
+ node.value.append(self.compose_node(path+[(node, index)]))
+ index += 1
+ end_event = self.get_event()
+ node.end_mark = end_event.end_mark
+ return node
- def compose_mapping_node(self):
- start_event = self.parser.get()
- value = {}
- while not self.parser.check(MappingEndEvent):
- key_event = self.parser.peek()
- item_key = self.compose_node()
- item_value = self.compose_node()
- if item_key in value:
+ def compose_mapping_node(self, path):
+ start_event = self.get_event()
+ tag = self.resolve_mapping(path, start_event.tag)
+ node = MappingNode(tag, {},
+ start_event.start_mark, None,
+ flow_style=start_event.flow_style)
+ while not self.check_event(MappingEndEvent):
+ key_event = self.peek_event()
+ item_key = self.compose_node(path+[(node, None)])
+ item_value = self.compose_node(path+[(node, item_key)])
+ if item_key in node.value:
raise ComposerError("while composing a mapping", start_event.start_mark,
"found duplicate key", key_event.start_mark)
- value[item_key] = item_value
- end_event = self.parser.get()
- return MappingNode(start_event.tag, value,
- start_event.start_mark, end_event.end_mark,
- flow_style=start_event.flow_style)
+ node.value[item_key] = item_value
+ end_event = self.get_event()
+ node.end_mark = end_event.end_mark
+ return node
+
+ def resolve_scalar(self, path, tag, implicit, value):
+ if implicit:
+ tag = self.detect(value)
+ if tag is None and self.resolver_tags[-1]:
+ tag = self.resolver_tags[-1]
+ if tag is None or tag == u'!':
+ tag = self.DEFAULT_SCALAR_TAG
+ return tag
+
+ def resolve_sequence(self, path, tag):
+ if tag is None and self.resolver_tags[-1]:
+ tag = self.resolver_tags[-1]
+ if tag is None or tag == u'!':
+ tag = self.DEFAULT_SEQUENCE_TAG
+ return tag
+
+ def resolve_mapping(self, path, tag):
+ if tag is None and self.resolver_tags[-1]:
+ tag = self.resolver_tags[-1]
+ if tag is None or tag == u'!':
+ tag = self.DEFAULT_MAPPING_TAG
+ return tag
+
+ def add_resolver(self, tag, path):
+ if not 'yaml_resolvers' in cls.__dict__:
+ cls.yaml_resolvers = cls.yaml_resolvers.copy()
+ cls.yaml_resolvers[tuple(path)] = tag
+ add_resolver = classmethod(add_resolver)
+
+class Composer(BaseComposer):
+ pass
diff --git a/lib/yaml/constructor.py b/lib/yaml/constructor.py
index 88a82f3..9fa9085 100644
--- a/lib/yaml/constructor.py
+++ b/lib/yaml/constructor.py
@@ -4,6 +4,7 @@ __all__ = ['BaseConstructor', 'SafeConstructor', 'Constructor',
from error import *
from nodes import *
+from composer import *
try:
import datetime
@@ -21,46 +22,62 @@ import binascii, re
class ConstructorError(MarkedYAMLError):
pass
-class BaseConstructor:
+class BaseConstructor(Composer):
- def __init__(self, resolver):
- self.resolver = resolver
+ yaml_constructors = {}
+ yaml_multi_constructors = {}
+
+ def __init__(self):
self.constructed_objects = {}
- def check(self):
+ def check_data(self):
# If there are more documents available?
- return self.resolver.check()
+ return self.check_node()
- def get(self):
+ def get_data(self):
# Construct and return the next document.
- if self.resolver.check():
- return self.construct_document(self.resolver.get())
+ if self.check_node():
+ return self.construct_document(self.get_node())
def __iter__(self):
# Iterator protocol.
- while self.resolver.check():
- yield self.construct_document(self.resolver.get())
+ while self.check_node():
+ yield self.construct_document(self.get_node())
def construct_document(self, node):
- native = self.construct_object(node)
+ data = self.construct_object(node)
self.constructed_objects = {}
- return native
+ return data
def construct_object(self, node):
if node in self.constructed_objects:
return self.constructed_objects[node]
+ constructor = None
if node.tag in self.yaml_constructors:
- native = self.yaml_constructors[node.tag](self, node)
- elif None in self.yaml_constructors:
- native = self.yaml_constructors[None](self, node)
- elif isinstance(node, ScalarNode):
- native = self.construct_scalar(node)
- elif isinstance(node, SequenceNode):
- native = self.construct_sequence(node)
- elif isinstance(node, MappingNode):
- native = self.construct_mapping(node)
- self.constructed_objects[node] = native
- return native
+ constructor = lambda node: self.yaml_constructors[node.tag](self, node)
+ else:
+ for tag_prefix in self.yaml_multi_constructors:
+ if node.tag.startswith(tag_prefix):
+ tag_suffix = node.tag[len(tag_prefix):]
+ constructor = lambda node: \
+ self.yaml_multi_constructors[tag_prefix](self, tag_suffix, node)
+ break
+ else:
+ if None in self.yaml_multi_constructors:
+ constructor = lambda node: \
+ self.yaml_multi_constructors[None](self, node.tag, node)
+ elif None in self.yaml_constructors:
+ constructor = lambda node: \
+ self.yaml_constructors[None](self, node)
+ elif isinstance(node, ScalarNode):
+ constructor = self.construct_scalar
+ elif isinstance(node, SequenceNode):
+ constructor = self.construct_sequence
+ elif isinstance(node, MappingNode):
+ constructor = self.construct_mapping
+ data = constructor(node)
+ self.constructed_objects[node] = data
+ return data
def construct_scalar(self, node):
if not isinstance(node, ScalarNode):
@@ -152,7 +169,11 @@ class BaseConstructor:
cls.yaml_constructors[tag] = constructor
add_constructor = classmethod(add_constructor)
- yaml_constructors = {}
+ def add_multi_constructor(cls, tag_prefix, multi_constructor):
+ if not 'yaml_multi_constructors' in cls.__dict__:
+ cls.yaml_multi_constructors = cls.yaml_multi_constructors.copy()
+ cls.yaml_multi_constructors[tag_prefix] = multi_constructor
+ add_multi_constructor = classmethod(add_multi_constructor)
class SafeConstructor(BaseConstructor):
@@ -327,6 +348,18 @@ class SafeConstructor(BaseConstructor):
def construct_yaml_map(self, node):
return self.construct_mapping(node)
+ def construct_yaml_object(self, node, cls):
+ mapping = self.construct_mapping(node)
+ state = {}
+ for key in mapping:
+ state[key.replace('-', '_')] = mapping[key]
+ data = cls.__new__(cls)
+ if hasattr(data, '__setstate__'):
+ data.__setstate__(mapping)
+ else:
+ data.__dict__.update(mapping)
+ return data
+
def construct_undefined(self, node):
raise ConstructorError(None, None,
"could not determine a constructor for the tag %r" % node.tag.encode('utf-8'),
diff --git a/lib/yaml/detector.py b/lib/yaml/detector.py
index 30b180e..363783e 100644
--- a/lib/yaml/detector.py
+++ b/lib/yaml/detector.py
@@ -5,8 +5,15 @@ import re
class BaseDetector:
+ DEFAULT_SCALAR_TAG = u'tag:yaml.org,2002:str'
+ DEFAULT_SEQUENCE_TAG = u'tag:yaml.org,2002:seq'
+ DEFAULT_MAPPING_TAG = u'tag:yaml.org,2002:map'
+
yaml_detectors = {}
+ def __init__(self):
+ pass
+
def add_detector(cls, tag, regexp, first):
if not 'yaml_detectors' in cls.__dict__:
cls.yaml_detectors = cls.yaml_detectors.copy()
diff --git a/lib/yaml/dumper.py b/lib/yaml/dumper.py
new file mode 100644
index 0000000..0a7c2d9
--- /dev/null
+++ b/lib/yaml/dumper.py
@@ -0,0 +1,56 @@
+
+__all__ = ['BaseDumper', 'SafeDumper', 'Dumper']
+
+from emitter import *
+from serializer import *
+from representer import *
+from detector import *
+
+class BaseDumper(Emitter, Serializer, BaseRepresenter, BaseDetector):
+
+ def __init__(self, stream,
+ canonical=None, indent=None, width=None,
+ allow_unicode=None, line_break=None,
+ encoding=None, explicit_start=None, explicit_end=None,
+ version=None, tags=None):
+ Emitter.__init__(self, stream, canonical=canonical,
+ indent=indent, width=width,
+ allow_uncode=allow_unicode, line_break=line_break)
+ Serializer.__init__(self, encoding=encoding,
+ explicit_start=explicit_start, explicit_end=explicit_end,
+ version=version, tags=tags)
+ Representer.__init__(self)
+ Detector.__init__(self)
+
+class SafeDumper(Emitter, Serializer, SafeRepresenter, Detector):
+
+ def __init__(self, stream,
+ canonical=None, indent=None, width=None,
+ allow_unicode=None, line_break=None,
+ encoding=None, explicit_start=None, explicit_end=None,
+ version=None, tags=None):
+ Emitter.__init__(self, stream, canonical=canonical,
+ indent=indent, width=width,
+ allow_unicode=allow_unicode, line_break=line_break)
+ Serializer.__init__(self, encoding=encoding,
+ explicit_start=explicit_start, explicit_end=explicit_end,
+ version=version, tags=tags)
+ SafeRepresenter.__init__(self)
+ Detector.__init__(self)
+
+class Dumper(Emitter, Serializer, Representer, Detector):
+
+ def __init__(self, stream,
+ canonical=None, indent=None, width=None,
+ allow_unicode=None, line_break=None,
+ encoding=None, explicit_start=None, explicit_end=None,
+ version=None, tags=None):
+ Emitter.__init__(self, stream, canonical=canonical,
+ indent=indent, width=width,
+ allow_unicode=allow_unicode, line_break=line_break)
+ Serializer.__init__(self, encoding=encoding,
+ explicit_start=explicit_start, explicit_end=explicit_end,
+ version=version, tags=tags)
+ Representer.__init__(self)
+ Detector.__init__(self)
+
diff --git a/lib/yaml/emitter.py b/lib/yaml/emitter.py
index 69a3c74..985ce63 100644
--- a/lib/yaml/emitter.py
+++ b/lib/yaml/emitter.py
@@ -11,13 +11,16 @@ __all__ = ['Emitter', 'EmitterError']
from error import YAMLError
from events import *
+import re
+
class EmitterError(YAMLError):
pass
class ScalarAnalysis:
def __init__(self, scalar, empty, multiline,
allow_flow_plain, allow_block_plain,
- allow_single_quoted, allow_double_quoted, allow_block):
+ allow_single_quoted, allow_double_quoted,
+ allow_block):
self.scalar = scalar
self.empty = empty
self.multiline = multiline
@@ -34,12 +37,13 @@ class Emitter:
u'tag:yaml.org,2002:' : u'!!',
}
- def __init__(self, writer):
+ def __init__(self, stream, canonical=None, indent=None, width=None,
+ allow_unicode=None, line_break=None):
- # The writer should have the methods `write` and possibly `flush`.
- self.writer = writer
+ # The stream should have the methods `write` and possibly `flush`.
+ self.stream = stream
- # Encoding is provided by STREAM-START.
+ # Encoding can be overriden by STREAM-START.
self.encoding = None
# Emitter is a state machine with a stack of states to handle nested
@@ -75,18 +79,28 @@ class Emitter:
self.indention = True
# Formatting details.
- self.canonical = False
- self.allow_unicode = False
- self.best_line_break = u'\n'
+ self.canonical = canonical
+ self.allow_unicode = allow_unicode
self.best_indent = 2
+ if indent and 1 < indent < 10:
+ self.best_indent = indent
self.best_width = 80
+ if width and width > self.best_indent*2:
+ self.best_width = width
+ self.best_line_break = u'\n'
+ if line_break in [u'\r', u'\n', u'\r\n']:
+ self.best_line_break = line_break
+
+ # Tag prefixes.
self.tag_prefixes = None
- # Analyses cache.
- self.anchor_text = None
- self.tag_text = None
- self.scalar_analysis = None
- self.scalar_style = None
+ # Prepared anchor and tag.
+ self.prepared_anchor = None
+ self.prepared_tag = None
+
+ # Scalar analysis and style.
+ self.analysis = None
+ self.style = None
def emit(self, event):
self.events.append(event)
@@ -139,15 +153,8 @@ class Emitter:
def expect_stream_start(self):
if isinstance(self.event, StreamStartEvent):
- self.encoding = self.event.encoding
- self.canonical = self.event.canonical
- self.allow_unicode = self.event.allow_unicode
- if self.event.indent and self.event.indent > 1:
- self.best_indent = self.event.indent
- if self.event.width and self.event.width > self.best_indent:
- self.best_width = self.event.width
- if self.event.line_break in [u'\r', u'\n', u'\r\n']:
- self.best_line_break = self.event.line_break
+ if self.event.encoding:
+ self.encoding = self.event.encoding
self.write_stream_start()
self.state = self.expect_first_document_start
else:
@@ -165,7 +172,7 @@ class Emitter:
def expect_document_start(self, first=False):
if isinstance(self.event, DocumentStartEvent):
if self.event.version:
- version_text = self.analyze_version(self.event.version)
+ version_text = self.prepare_version(self.event.version)
self.write_version_directive(version_text)
self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy()
if self.event.tags:
@@ -174,8 +181,8 @@ class Emitter:
for handle in handles:
prefix = self.event.tags[handle]
self.tag_prefixes[prefix] = handle
- handle_text = self.analyze_tag_handle(handle)
- prefix_text = self.analyze_tag_prefix(prefix)
+ handle_text = self.prepare_tag_handle(handle)
+ prefix_text = self.prepare_tag_prefix(prefix)
self.write_tag_directive(handle_text, prefix_text)
implicit = (first and not self.event.explicit and not self.canonical
and not self.event.version and not self.event.tags
@@ -199,6 +206,7 @@ class Emitter:
if self.event.explicit:
self.write_indicator(u'...', True)
self.write_indent()
+ self.flush_stream()
self.state = self.expect_document_start
else:
raise EmitterError("expected DocumentEndEvent, but got %s"
@@ -418,94 +426,106 @@ class Emitter:
def check_simple_key(self):
length = 0
if isinstance(self.event, NodeEvent) and self.event.anchor is not None:
- if self.anchor_text is None:
- self.anchor_text = self.analyze_anchor(self.event.anchor)
- length += len(self.anchor_text)
+ if self.prepared_anchor is None:
+ self.prepared_anchor = self.prepare_anchor(self.event.anchor)
+ length += len(self.prepared_anchor)
if isinstance(self.event, (ScalarEvent, CollectionStartEvent)) \
and self.event.tag is not None:
- if self.tag_text is None:
- self.tag_text = self.analyze_tag(self.event.tag)
- length += len(self.tag_text)
+ if self.prepared_tag is None:
+ self.prepared_tag = self.prepare_tag(self.event.tag)
+ length += len(self.prepared_tag)
if isinstance(self.event, ScalarEvent):
- if self.scalar_analysis is None:
- self.scalar_analysis = self.analyze_scalar(self.event.value)
- length += len(self.scalar_analysis.scalar)
+ if self.analysis is None:
+ self.analysis = self.analyze_scalar(self.event.value)
+ length += len(self.analysis.scalar)
return (length < 128 and (isinstance(self.event, AliasEvent)
- or (isinstance(self.event, ScalarEvent) and not self.scalar_analysis.multiline)
+ or (isinstance(self.event, ScalarEvent)
+ and not self.analysis.empty and not self.analysis.multiline)
or self.check_empty_sequence() or self.check_empty_mapping()))
# Anchor, Tag, and Scalar processors.
def process_anchor(self, indicator):
if self.event.anchor is None:
+ self.prepared_anchor = None
return
- if self.anchor_text is None:
- self.anchor_text = self.analyze_anchor(self.event.anchor)
- if self.anchor_text:
- self.write_indicator(indicator+self.anchor_text, True)
- self.anchor_text = None
+ if self.prepared_anchor is None:
+ self.prepared_anchor = self.prepare_anchor(self.event.anchor)
+ if self.prepared_anchor:
+ self.write_indicator(indicator+self.prepared_anchor, True)
+ self.prepared_anchor = None
def process_tag(self):
- if self.event.tag is None:
- return
- if isinstance(self.event, ScalarEvent) and self.best_scalar_style() == '':
+ tag = self.event.tag
+ if isinstance(self.event, ScalarEvent):
+ if self.style is None:
+ self.style = self.choose_scalar_style()
+ if self.style == '':
+ self.prepared_tag = None
+ return
+ if self.event.implicit and not tag:
+ tag = u'!'
+ self.prepared_tag = None
+ if not tag:
+ self.prepared_tag = None
return
- if self.tag_text is None:
- self.tag_text = self.analyze_tag(self.event.tag)
- if self.tag_text:
- self.write_indicator(self.tag_text, True)
- self.tag_text = None
-
- def best_scalar_style(self):
- if self.scalar_analysis is None:
- self.scalar_analysis = self.analyze_scalar(self.event.value)
- if self.canonical:
- return '"'
- if (self.event.implicit and not self.event.style
- and ((self.flow_level and self.scalar_analysis.allow_flow_plain)
- or (not self.flow_level and self.scalar_analysis.allow_block_plain))
- and (len(self.scalar_analysis.scalar) > 0
- or (not self.flow_level and not self.simple_key_context))):
- return ''
- elif self.event.style == '\'' and self.scalar_analysis.allow_single_quoted:
- return '\''
- elif self.event.style in ['|', '>'] and not self.flow_level and self.scalar_analysis.allow_block:
- return self.event.style
- else:
+ if self.prepared_tag is None:
+ self.prepared_tag = self.prepare_tag(tag)
+ if self.prepared_tag:
+ self.write_indicator(self.prepared_tag, True)
+ self.prepared_tag = None
+
+ def choose_scalar_style(self):
+ if self.analysis is None:
+ self.analysis = self.analyze_scalar(self.event.value)
+ if self.event.style == '"' or self.canonical:
return '"'
- return style
+ if not self.event.style and self.event.implicit:
+ if (not (self.simple_key_context and
+ (self.analysis.empty or self.analysis.multiline))
+ and (self.flow_level and self.analysis.allow_flow_plain
+ or (not self.flow_level and self.analysis.allow_block_plain))):
+ return ''
+ if self.event.style and self.event.style in '|>':
+ if not self.flow_level and self.analysis.allow_block:
+ return self.event.style
+ if not self.event.style or self.event.style == '\'':
+ if (self.analysis.allow_single_quoted and
+ not (self.simple_key_context and self.analysis.multiline)):
+ return '\''
+ return '"'
def process_scalar(self):
- if self.scalar_analysis is None:
- self.scalar_analysis = self.analyze_scalar(self.event.value)
- style = self.best_scalar_style()
- if self.scalar_analysis.multiline and not self.simple_key_context \
- and style not in ['|', '>']:
- self.write_indent()
- if style == '"':
- self.write_double_quoted(self.scalar_analysis.scalar,
- split=(not self.simple_key_context))
- elif style == '\'':
- self.write_single_quoted(self.scalar_analysis.scalar,
- split=(not self.simple_key_context))
- elif style == '>':
- self.write_folded(self.scalar_analysis.scalar)
- elif style == '|':
- self.write_literal(self.scalar_analysis.scalar)
+ if self.analysis is None:
+ self.analysis = self.analyze_scalar(self.event.value)
+ if self.style is None:
+ self.style = self.choose_scalar_style()
+ split = (not self.simple_key_context)
+ #if self.analysis.multiline and split \
+ # and (not self.style or self.style in '\'\"'):
+ # self.write_indent()
+ if self.style == '"':
+ self.write_double_quoted(self.analysis.scalar, split)
+ elif self.style == '\'':
+ self.write_single_quoted(self.analysis.scalar, split)
+ elif self.style == '>':
+ self.write_folded(self.analysis.scalar)
+ elif self.style == '|':
+ self.write_literal(self.analysis.scalar)
else:
- self.write_plain(self.scalar_analysis.scalar,
- split=(not self.simple_key_context))
- self.scalar_analysis = None
+ self.write_plain(self.analysis.scalar, split)
+ self.analysis = None
+ self.style = None
# Analyzers.
- def analyze_version(self, version):
+ def prepare_version(self, version):
major, minor = version
if major != 1:
raise EmitterError("unsupported YAML version: %d.%d" % (major, minor))
return u'%d.%d' % (major, minor)
- def analyze_tag_handle(self, handle):
+ def prepare_tag_handle(self, handle):
if not handle:
raise EmitterError("tag handle must not be empty")
if handle[0] != u'!' or handle[-1] != u'!':
@@ -518,7 +538,7 @@ class Emitter:
% (ch.encode('utf-8'), handle.encode('utf-8')))
return handle
- def analyze_tag_prefix(self, prefix):
+ def prepare_tag_prefix(self, prefix):
if not prefix:
raise EmitterError("tag prefix must not be empty")
chunks = []
@@ -541,9 +561,11 @@ class Emitter:
chunks.append(prefix[start:end])
return u''.join(chunks)
- def analyze_tag(self, tag):
+ def prepare_tag(self, tag):
if not tag:
raise EmitterError("tag must not be empty")
+ if tag == u'!':
+ return tag
handle = None
suffix = tag
for prefix in self.tag_prefixes:
@@ -574,7 +596,7 @@ class Emitter:
else:
return u'!<%s>' % suffix_text
- def analyze_anchor(self, anchor):
+ def prepare_anchor(self, anchor):
if not anchor:
raise EmitterError("anchor must not be empty")
for ch in anchor:
@@ -584,143 +606,221 @@ class Emitter:
% (ch.encode('utf-8'), text.encode('utf-8')))
return anchor
- def analyze_scalar(self, scalar): # It begs for refactoring.
+ def analyze_scalar(self, scalar):
+
+ # Empty scalar is a special case.
if not scalar:
return ScalarAnalysis(scalar=scalar, empty=True, multiline=False,
allow_flow_plain=False, allow_block_plain=True,
allow_single_quoted=True, allow_double_quoted=True,
allow_block=False)
- contains_block_indicator = False
- contains_flow_indicator = False
- contains_line_breaks = False
- contains_unicode_characters = False
- contains_special_characters = False
- contains_inline_spaces = False # non-space space+ non-space
- contains_inline_breaks = False # non-space break+ non-space
- contains_leading_spaces = False # ^ space+ (non-space | $)
- contains_leading_breaks = False # ^ break+ (non-space | $)
- contains_trailing_spaces = False # non-space space+ $
- contains_trailing_breaks = False # non-space break+ $
- contains_inline_breaks_spaces = False # non-space break+ space+ non-space
- contains_mixed_breaks_spaces = False # anything else
+
+ # Indicators and special characters.
+ block_indicators = False
+ flow_indicators = False
+ line_breaks = False
+ special_characters = False
+
+ # Whitespaces.
+ inline_spaces = False # non-space space+ non-space
+ inline_breaks = False # non-space break+ non-space
+ leading_spaces = False # ^ space+ (non-space | $)
+ leading_breaks = False # ^ break+ (non-space | $)
+ trailing_spaces = False # (^ | non-space) space+ $
+ trailing_breaks = False # (^ | non-space) break+ $
+ inline_breaks_spaces = False # non-space break+ space+ non-space
+ mixed_breaks_spaces = False # anything else
+
+ # Check document indicators.
if scalar.startswith(u'---') or scalar.startswith(u'...'):
- contains_block_indicator = True
- contains_flow_indicator = True
- first = True
- last = (len(scalar) == 1)
- preceeded_by_space = False
- followed_by_space = (len(scalar) > 1 and
+ block_indicators = True
+ flow_indicators = True
+
+ # First character or preceded by a whitespace.
+ preceeded_by_space = True
+
+ # Last character or followed by a whitespace.
+ followed_by_space = (len(scalar) == 1 or
scalar[1] in u'\0 \t\r\n\x85\u2028\u2029')
- spaces = breaks = mixed = leading = False
+
+ # The current series of whitespaces contain plain spaces.
+ spaces = False
+
+ # The current series of whitespaces contain line breaks.
+ breaks = False
+
+ # The current series of whitespaces contain a space followed by a
+ # break.
+ mixed = False
+
+ # The current series of whitespaces start at the beginning of the
+ # scalar.
+ leading = False
+
index = 0
while index < len(scalar):
ch = scalar[index]
- if first:
+
+ # Check for indicators.
+
+ if index == 0:
+ # Leading indicators are special characters.
if ch in u'#,[]{}#&*!|>\'\"%@`':
- contains_flow_indicator = True
- contains_block_indicator = True
+ flow_indicators = True
+ block_indicators = True
if ch in u'?:':
- contains_flow_indicator = True
- if followed_by_space or last:
- contains_block_indicator = True
- if ch == u'-' and (followed_by_space or last):
- contains_flow_indicator = True
- contains_block_indicator = True
+ flow_indicators = True
+ if followed_by_space:
+ block_indicators = True
+ if ch == u'-' and followed_by_space:
+ flow_indicators = True
+ block_indicators = True
else:
+ # Some indicators cannot appear within a scalar as well.
if ch in u',?[]{}':
- contains_flow_indicator = True
+ flow_indicators = True
if ch == u':':
- contains_flow_indicator = True
- if followed_by_space or last:
- contains_block_indicator = True
- if ch == u'#' and (preceeded_by_space or first):
- contains_flow_indicator = True
- contains_block_indicator = True
+ flow_indicators = True
+ if followed_by_space:
+ block_indicators = True
+ if ch == u'#' and preceeded_by_space:
+ flow_indicators = True
+ block_indicators = True
+
+ # Check for line breaks, special, and unicode characters.
+
if ch in u'\n\x85\u2028\u2029':
- contains_line_breaks = True
+ line_breaks = True
if not (ch == u'\n' or u'\x20' <= ch <= u'\x7E'):
- if ch < u'\x80':
- contains_special_characters = True
+ if ch < u'\x80' or ch == u'\uFEFF': # '\uFEFF' is BOM.
+ special_characters = True
else:
- contains_unicode_characters = True
- if ch == u' ':
- if not spaces and not breaks:
- leading = first
- spaces = True
- elif ch in u'\n\x85\u2028\u2029':
- if not spaces and not breaks:
- leading = first
- breaks = True
- if spaces:
- mixed = True
- if ch not in u' \n\x85\u2028\u2029':
+ unicode_characters = True
+ if not self.allow_unicode:
+ special_characters = True
+
+ # Spaces, line breaks, and how they are mixed. State machine.
+
+ # Start or continue series of whitespaces.
+ if ch in u' \n\x85\u2028\u2029':
+ if spaces and breaks:
+ if ch != u' ': # break+ (space+ break+) => mixed
+ mixed = True
+ elif spaces:
+ if ch != u' ': # (space+ break+) => mixed
+ breaks = True
+ mixed = True
+ elif breaks:
+ if ch == u' ': # break+ space+
+ spaces = True
+ else:
+ leading = (index == 0)
+ if ch == u' ': # space+
+ spaces = True
+ else: # break+
+ breaks = True
+
+ # Series of whitespaces ended with a non-space.
+ elif spaces or breaks:
if leading:
if spaces and breaks:
- contains_mixed_breaks_spaces = True
+ mixed_breaks_spaces = True
elif spaces:
- contains_leading_spaces = True
+ leading_spaces = True
elif breaks:
- contains_leading_breaks = True
+ leading_breaks = True
else:
if mixed:
- contains_mixed_break_spaces = True
+ mixed_breaks_spaces = True
elif spaces and breaks:
- contains_inline_breaks_spaces = True
+ inline_breaks_spaces = True
elif spaces:
- contains_inline_spaces = True
+ inline_spaces = True
elif breaks:
- contains_inline_breaks = True
+ inline_breaks = True
spaces = breaks = mixed = leading = False
- elif last:
+
+ # Series of whitespaces reach the end.
+ if (spaces or breaks) and (index == len(scalar)-1):
if spaces and breaks:
- contains_mixed_break_spaces = True
+ mixed_breaks_spaces = True
elif spaces:
+ trailing_spaces = True
if leading:
- contains_leading_spaces = True
- else:
- contains_trailing_spaces = True
+ leading_spaces = True
elif breaks:
+ trailing_breaks = True
if leading:
- contains_leading_breaks = True
- else:
- contains_trailing_breaks = True
+ leading_breaks = True
+ spaces = breaks = mixed = leading = False
+
+ # Prepare for the next character.
index += 1
- first = False
- last = (index+1 == len(scalar))
preceeded_by_space = (ch in u'\0 \t\r\n\x85\u2028\u2029')
- followed_by_space = (index+1 < len(scalar) and
+ followed_by_space = (index+1 >= len(scalar) or
scalar[index+1] in u'\0 \t\r\n\x85\u2028\u2029')
- if contains_unicode_characters and not self.allow_unicode:
- contains_special_characters = True
- allow_flow_plain = not (contains_flow_indicator or contains_special_characters
- or contains_leading_spaces or contains_leading_breaks
- or contains_trailing_spaces or contains_trailing_breaks
- or contains_inline_breaks_spaces or contains_mixed_breaks_spaces)
- allow_block_plain = not (contains_block_indicator or contains_special_characters
- or contains_leading_spaces or contains_leading_breaks
- or contains_trailing_spaces or contains_trailing_breaks
- or contains_inline_breaks_spaces or contains_mixed_breaks_spaces)
- allow_single_quoted = not (contains_special_characters
- or contains_inline_breaks_spaces or contains_mixed_breaks_spaces)
+
+ # Let's decide what styles are allowed.
+ allow_flow_plain = True
+ allow_block_plain = True
+ allow_single_quoted = True
allow_double_quoted = True
- allow_block = not (contains_special_characters
- or contains_leading_spaces or contains_leading_breaks
- or contains_trailing_spaces or contains_mixed_breaks_spaces)
- return ScalarAnalysis(scalar=scalar, empty=False, multiline=contains_line_breaks,
- allow_flow_plain=allow_flow_plain, allow_block_plain=allow_block_plain,
- allow_single_quoted=allow_single_quoted, allow_double_quoted=allow_double_quoted,
+ allow_block = True
+
+ # Leading and trailing whitespace are bad for plain scalars. We also
+ # do not want to mess with leading whitespaces for block scalars.
+ if leading_spaces or leading_breaks or trailing_spaces:
+ allow_flow_plain = allow_block_plain = allow_block = False
+
+ # Trailing breaks are fine for block scalars, but unacceptable for
+ # plain scalars.
+ if trailing_breaks:
+ allow_flow_plain = allow_block_plain = False
+
+ # The combination of (space+ break+) is only acceptable for block
+ # scalars.
+ if inline_breaks_spaces:
+ allow_flow_plain = allow_block_plain = allow_single_quoted = False
+
+ # Mixed spaces and breaks, as well as special character are only
+ # allowed for double quoted scalars.
+ if mixed_breaks_spaces or special_characters:
+ allow_flow_plain = allow_block_plain = \
+ allow_single_quoted = allow_block = False
+
+ # We don't emit multiline plain scalars.
+ if line_breaks:
+ allow_flow_plain = allow_block_plain = False
+
+ # Flow indicators are forbidden for flow plain scalars.
+ if flow_indicators:
+ allow_flow_plain = False
+
+ # Block indicators are forbidden for block plain scalars.
+ if block_indicators:
+ allow_block_plain = False
+
+ return ScalarAnalysis(scalar=scalar,
+ empty=False, multiline=line_breaks,
+ allow_flow_plain=allow_flow_plain,
+ allow_block_plain=allow_block_plain,
+ allow_single_quoted=allow_single_quoted,
+ allow_double_quoted=allow_double_quoted,
allow_block=allow_block)
# Writers.
+ def flush_stream(self):
+ if hasattr(self.stream, 'flush'):
+ self.stream.flush()
+
def write_stream_start(self):
# Write BOM if needed.
if self.encoding and self.encoding.startswith('utf-16'):
- self.writer.write(u'\xFF\xFE'.encode(self.encoding))
+ self.stream.write(u'\xFF\xFE'.encode(self.encoding))
def write_stream_end(self):
- if hasattr(self.writer, 'flush'):
- self.writer.flush()
+ self.flush_stream()
def write_indicator(self, indicator, need_whitespace,
whitespace=False, indention=False):
@@ -733,7 +833,7 @@ class Emitter:
self.column += len(data)
if self.encoding:
data = data.encode(self.encoding)
- self.writer.write(data)
+ self.stream.write(data)
def write_indent(self):
indent = self.indent or 0
@@ -746,7 +846,7 @@ class Emitter:
self.column = indent
if self.encoding:
data = data.encode(self.encoding)
- self.writer.write(data)
+ self.stream.write(data)
def write_line_break(self, data=None):
if data is None:
@@ -757,23 +857,23 @@ class Emitter:
self.column = 0
if self.encoding:
data = data.encode(self.encoding)
- self.writer.write(data)
+ self.stream.write(data)
def write_version_directive(self, version_text):
data = u'%%YAML %s' % version_text
if self.encoding:
data = data.encode(self.encoding)
- self.writer.write(data)
+ self.stream.write(data)
self.write_line_break()
def write_tag_directive(self, handle_text, prefix_text):
data = u'%%TAG %s %s' % (handle_text, prefix_text)
if self.encoding:
data = data.encode(self.encoding)
- self.writer.write(data)
+ self.stream.write(data)
self.write_line_break()
- # Scalar writers.
+ # Scalar streams.
def write_single_quoted(self, text, split=True):
self.write_indicator(u'\'', True)
@@ -794,7 +894,7 @@ class Emitter:
self.column += len(data)
if self.encoding:
data = data.encode(self.encoding)
- self.writer.write(data)
+ self.stream.write(data)
start = end
elif breaks:
if ch is None or ch not in u'\n\x85\u2028\u2029':
@@ -814,14 +914,14 @@ class Emitter:
self.column += len(data)
if self.encoding:
data = data.encode(self.encoding)
- self.writer.write(data)
+ self.stream.write(data)
start = end
if ch == u'\'':
data = u'\'\''
self.column += 2
if self.encoding:
data = data.encode(self.encoding)
- self.writer.write(data)
+ self.stream.write(data)
start = end + 1
if ch is not None:
spaces = (ch == u' ')
@@ -863,7 +963,7 @@ class Emitter:
self.column += len(data)
if self.encoding:
data = data.encode(self.encoding)
- self.writer.write(data)
+ self.stream.write(data)
start = end
if ch is not None:
if ch in self.ESCAPE_REPLACEMENTS:
@@ -877,7 +977,7 @@ class Emitter:
self.column += len(data)
if self.encoding:
data = data.encode(self.encoding)
- self.writer.write(data)
+ self.stream.write(data)
start = end+1
if 0 < end < len(text)-1 and (ch == u' ' or start >= end) \
and self.column+(end-start) > self.best_width and split:
@@ -887,7 +987,7 @@ class Emitter:
self.column += len(data)
if self.encoding:
data = data.encode(self.encoding)
- self.writer.write(data)
+ self.stream.write(data)
self.write_indent()
self.whitespace = False
self.indention = False
@@ -896,7 +996,7 @@ class Emitter:
self.column += len(data)
if self.encoding:
data = data.encode(self.encoding)
- self.writer.write(data)
+ self.stream.write(data)
end += 1
self.write_indicator(u'"', False)
@@ -947,14 +1047,14 @@ class Emitter:
self.column += len(data)
if self.encoding:
data = data.encode(self.encoding)
- self.writer.write(data)
+ self.stream.write(data)
start = end
else:
if ch is None or ch in u' \n\x85\u2028\u2029':
data = text[start:end]
if self.encoding:
data = data.encode(self.encoding)
- self.writer.write(data)
+ self.stream.write(data)
if ch is None:
self.write_line_break()
start = end
@@ -988,7 +1088,7 @@ class Emitter:
data = text[start:end]
if self.encoding:
data = data.encode(self.encoding)
- self.writer.write(data)
+ self.stream.write(data)
if ch is None:
self.write_line_break()
start = end
@@ -1004,7 +1104,7 @@ class Emitter:
self.column += len(data)
if self.encoding:
data = data.encode(self.encoding)
- self.writer.write(data)
+ self.stream.write(data)
self.writespace = False
self.indention = False
spaces = False
@@ -1025,7 +1125,7 @@ class Emitter:
self.column += len(data)
if self.encoding:
data = data.encode(self.encoding)
- self.writer.write(data)
+ self.stream.write(data)
start = end
elif breaks:
if ch not in u'\n\x85\u2028\u2029':
@@ -1046,7 +1146,7 @@ class Emitter:
self.column += len(data)
if self.encoding:
data = data.encode(self.encoding)
- self.writer.write(data)
+ self.stream.write(data)
start = end
if ch is not None:
spaces = (ch == u' ')
diff --git a/lib/yaml/error.py b/lib/yaml/error.py
index a818210..8fa916b 100644
--- a/lib/yaml/error.py
+++ b/lib/yaml/error.py
@@ -48,20 +48,15 @@ class YAMLError(Exception):
class MarkedYAMLError(YAMLError):
def __init__(self, context=None, context_mark=None,
- problem=None, problem_mark=None):
+ problem=None, problem_mark=None, note=None):
self.context = context
self.context_mark = context_mark
self.problem = problem
self.problem_mark = problem_mark
+ self.note = note
def __str__(self):
lines = []
- #for (place, mark) in [(self.context, self.context_mark),
- # (self.problem, self.problem_mark)]:
- # if place is not None:
- # lines.append(place)
- # if mark is not None:
- # lines.append(str(mark))
if self.context is not None:
lines.append(self.context)
if self.context_mark is not None \
@@ -74,7 +69,7 @@ class MarkedYAMLError(YAMLError):
lines.append(self.problem)
if self.problem_mark is not None:
lines.append(str(self.problem_mark))
+ if self.note is not None:
+ lines.append(self.note)
return '\n'.join(lines)
-
-
diff --git a/lib/yaml/events.py b/lib/yaml/events.py
index 325ea9c..eebf955 100644
--- a/lib/yaml/events.py
+++ b/lib/yaml/events.py
@@ -33,17 +33,10 @@ class CollectionEndEvent(Event):
# Implementations.
class StreamStartEvent(Event):
- def __init__(self, start_mark=None, end_mark=None,
- encoding=None, line_break=None, canonical=None,
- indent=None, width=None, allow_unicode=None):
+ def __init__(self, start_mark=None, end_mark=None, encoding=None):
self.start_mark = start_mark
self.end_mark = end_mark
self.encoding = encoding
- self.line_break = line_break
- self.canonical = canonical
- self.indent = indent
- self.width = width
- self.allow_unicode = allow_unicode
class StreamEndEvent(Event):
pass
@@ -68,14 +61,14 @@ class AliasEvent(NodeEvent):
pass
class ScalarEvent(NodeEvent):
- def __init__(self, anchor, tag, value, start_mark=None, end_mark=None,
- implicit=None, style=None):
+ def __init__(self, anchor, tag, implicit, value,
+ start_mark=None, end_mark=None, style=None):
self.anchor = anchor
self.tag = tag
+ self.implicit = implicit
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
- self.implicit = implicit
self.style = style
class SequenceStartEvent(CollectionStartEvent):
diff --git a/lib/yaml/loader.py b/lib/yaml/loader.py
new file mode 100644
index 0000000..f8d8673
--- /dev/null
+++ b/lib/yaml/loader.py
@@ -0,0 +1,41 @@
+
+__all__ = ['BaseLoader', 'SafeLoader', 'Loader']
+
+from reader import *
+from scanner import *
+from parser import *
+from composer import *
+from constructor import *
+from detector import *
+
+class BaseLoader(Reader, Scanner, Parser,
+ BaseComposer, BaseConstructor, BaseDetector):
+
+ def __init__(self, stream):
+ Reader.__init__(self, stream)
+ Scanner.__init__(self)
+ Parser.__init__(self)
+ BaseComposer.__init__(self)
+ BaseConstructor.__init__(self)
+ BaseDetector.__init__(self)
+
+class SafeLoader(Reader, Scanner, Parser, Composer, SafeConstructor, Detector):
+
+ def __init__(self, stream):
+ Reader.__init__(self, stream)
+ Scanner.__init__(self)
+ Parser.__init__(self)
+ Composer.__init__(self)
+ SafeConstructor.__init__(self)
+ Detector.__init__(self)
+
+class Loader(Reader, Scanner, Parser, Composer, Constructor, Detector):
+
+ def __init__(self, stream):
+ Reader.__init__(self, stream)
+ Scanner.__init__(self)
+ Parser.__init__(self)
+ Composer.__init__(self)
+ Constructor.__init__(self)
+ Detector.__init__(self)
+
diff --git a/lib/yaml/nodes.py b/lib/yaml/nodes.py
index bad4935..1f9f094 100644
--- a/lib/yaml/nodes.py
+++ b/lib/yaml/nodes.py
@@ -23,11 +23,10 @@ class Node:
class ScalarNode(Node):
id = 'scalar'
- def __init__(self, tag, value, implicit,
+ def __init__(self, tag, value,
start_mark=None, end_mark=None, style=None):
self.tag = tag
self.value = value
- self.implicit = implicit
self.start_mark = start_mark
self.end_mark = end_mark
self.style = style
diff --git a/lib/yaml/parser.py b/lib/yaml/parser.py
index cf17e8e..38508d2 100644
--- a/lib/yaml/parser.py
+++ b/lib/yaml/parser.py
@@ -24,23 +24,6 @@
# TODO: support for BOM within a stream.
# stream ::= (BOM? implicit_document)? (BOM? explicit_document)* STREAM-END
-# Note that there is a slight deviation from the specification. We require a
-# non-empty node content if ANCHOR or TAG is specified. This disallow such
-# documents as
-#
-# key: !!str # empty value
-#
-# This is done to prevent ambiguity in parsing tags and aliases:
-#
-# { !!perl/YAML::Parser: value }
-#
-# What is it? Should it be interpreted as
-# { ? !<tag:yaml.org,2002:perl/YAML::Parser> '' : value }
-# or
-# { ? !<tag:yaml.org,2002:perl/YAML::Parser:> value : '' }
-# Since we disallow non-empty node content, tags are always followed by spaces
-# or line breaks.
-
# FIRST sets:
# stream: { STREAM-START }
# explicit_document: { DIRECTIVE DOCUMENT-START }
@@ -66,13 +49,14 @@ __all__ = ['Parser', 'ParserError']
from error import MarkedYAMLError
from tokens import *
from events import *
+from scanner import *
class ParserError(MarkedYAMLError):
pass
class Parser:
- # Since writing an LL(1) parser is a straightforward task, we do not give
- # many comments here.
+ # Since writing a recursive-descendant parser is a straightforward task, we
+ # do not give many comments here.
# Note that we use Python generators. If you rewrite the parser in another
# language, you may replace all 'yield'-s with event handler calls.
@@ -81,14 +65,13 @@ class Parser:
u'!!': u'tag:yaml.org,2002:',
}
- def __init__(self, scanner):
- self.scanner = scanner
+ def __init__(self):
self.current_event = None
self.yaml_version = None
self.tag_handles = {}
self.event_generator = self.parse_stream()
- def check(self, *choices):
+ def check_event(self, *choices):
# Check the type of the next event.
if self.current_event is None:
try:
@@ -96,12 +79,14 @@ class Parser:
except StopIteration:
pass
if self.current_event is not None:
+ if not choices:
+ return True
for choice in choices:
if isinstance(self.current_event, choice):
return True
return False
- def peek(self):
+ def peek_event(self):
# Get the next event.
if self.current_event is None:
try:
@@ -110,7 +95,7 @@ class Parser:
pass
return self.current_event
- def get(self):
+ def get_event(self):
# Get the next event.
if self.current_event is None:
try:
@@ -129,70 +114,70 @@ class Parser:
# STREAM-START implicit_document? explicit_document* STREAM-END
# Parse start of stream.
- token = self.scanner.get()
+ token = self.get_token()
yield StreamStartEvent(token.start_mark, token.end_mark,
encoding=token.encoding)
# Parse implicit document.
- if not self.scanner.check(DirectiveToken, DocumentStartToken,
+ if not self.check_token(DirectiveToken, DocumentStartToken,
StreamEndToken):
self.tag_handles = self.DEFAULT_TAGS
- token = self.scanner.peek()
+ token = self.peek_token()
start_mark = end_mark = token.start_mark
yield DocumentStartEvent(start_mark, end_mark,
explicit=False)
for event in self.parse_block_node():
yield event
- token = self.scanner.peek()
+ token = self.peek_token()
start_mark = end_mark = token.start_mark
explicit = False
- while self.scanner.check(DocumentEndToken):
- token = self.scanner.get()
+ while self.check_token(DocumentEndToken):
+ token = self.get_token()
end_mark = token.end_mark
explicit = True
yield DocumentEndEvent(start_mark, end_mark,
explicit=explicit)
# Parse explicit documents.
- while not self.scanner.check(StreamEndToken):
- token = self.scanner.peek()
+ while not self.check_token(StreamEndToken):
+ token = self.peek_token()
start_mark = token.start_mark
version, tags = self.process_directives()
- if not self.scanner.check(DocumentStartToken):
+ if not self.check_token(DocumentStartToken):
raise ParserError(None, None,
"expected '<document start>', but found %r"
- % self.scanner.peek().id,
- self.scanner.peek().start_mark)
- token = self.scanner.get()
+ % self.peek_token().id,
+ self.peek_token().start_mark)
+ token = self.get_token()
end_mark = token.end_mark
yield DocumentStartEvent(start_mark, end_mark,
explicit=True, version=version, tags=tags)
- if self.scanner.check(DirectiveToken,
+ if self.check_token(DirectiveToken,
DocumentStartToken, DocumentEndToken, StreamEndToken):
yield self.process_empty_scalar(token.end_mark)
else:
for event in self.parse_block_node():
yield event
- token = self.scanner.peek()
+ token = self.peek_token()
start_mark = end_mark = token.start_mark
explicit = False
- while self.scanner.check(DocumentEndToken):
- token = self.scanner.get()
+ while self.check_token(DocumentEndToken):
+ token = self.get_token()
end_mark = token.end_mark
explicit=True
yield DocumentEndEvent(start_mark, end_mark,
explicit=explicit)
# Parse end of stream.
- token = self.scanner.get()
+ token = self.get_token()
yield StreamEndEvent(token.start_mark, token.end_mark)
def process_directives(self):
# DIRECTIVE*
self.yaml_version = None
self.tag_handles = {}
- while self.scanner.check(DirectiveToken):
- token = self.scanner.get()
+ while self.check_token(DirectiveToken):
+ token = self.get_token()
if token.name == u'YAML':
if self.yaml_version is not None:
raise ParserError(None, None,
@@ -237,33 +222,33 @@ class Parser:
# block_collection ::= block_sequence | block_mapping
# block_node_or_indentless_sequence ::= ALIAS | properties?
# (block_content | indentless_block_sequence)
- if self.scanner.check(AliasToken):
- token = self.scanner.get()
+ if self.check_token(AliasToken):
+ token = self.get_token()
yield AliasEvent(token.value, token.start_mark, token.end_mark)
else:
anchor = None
tag = None
start_mark = end_mark = tag_mark = None
- if self.scanner.check(AnchorToken):
- token = self.scanner.get()
+ if self.check_token(AnchorToken):
+ token = self.get_token()
start_mark = token.start_mark
end_mark = token.end_mark
anchor = token.value
- if self.scanner.check(TagToken):
- token = self.scanner.get()
+ if self.check_token(TagToken):
+ token = self.get_token()
tag_mark = token.start_mark
end_mark = token.end_mark
tag = token.value
- elif self.scanner.check(TagToken):
- token = self.scanner.get()
+ elif self.check_token(TagToken):
+ token = self.get_token()
start_mark = tag_mark = token.start_mark
end_mark = token.end_mark
tag = token.value
- if self.scanner.check(AnchorToken):
- token = self.scanner.get()
+ if self.check_token(AnchorToken):
+ token = self.get_token()
end_mark = token.end_mark
anchor = token.value
- if tag is not None:
+ if tag is not None and tag != u'!':
handle, suffix = tag
if handle is not None:
if handle not in self.tag_handles:
@@ -273,57 +258,57 @@ class Parser:
tag = self.tag_handles[handle]+suffix
else:
tag = suffix
- #if tag is None:
- # if not (self.scanner.check(ScalarToken) and
- # self.scanner.peek().implicit):
- # tag = u'!'
+ #if tag == u'!':
+ # raise ParserError("while parsing a node", start_mark,
+ # "found non-specific tag '!'", tag_mark,
+ # "Please check 'http://pyyaml.org/wiki/YAMLNonSpecificTag' and share your opinion.")
if start_mark is None:
- start_mark = end_mark = self.scanner.peek().start_mark
+ start_mark = end_mark = self.peek_token().start_mark
event = None
collection_events = None
- if indentless_sequence and self.scanner.check(BlockEntryToken):
- end_mark = self.scanner.peek().end_mark
+ if indentless_sequence and self.check_token(BlockEntryToken):
+ end_mark = self.peek_token().end_mark
event = SequenceStartEvent(anchor, tag, start_mark, end_mark)
collection_events = self.parse_indentless_sequence()
else:
- if self.scanner.check(ScalarToken):
- token = self.scanner.get()
+ if self.check_token(ScalarToken):
+ token = self.get_token()
end_mark = token.end_mark
- implicit = (tag is None and token.implicit)
- event = ScalarEvent(anchor, tag, token.value,
- start_mark, end_mark,
- implicit=implicit, style=token.style)
- elif self.scanner.check(FlowSequenceStartToken):
- end_mark = self.scanner.peek().end_mark
+ implicit = ((tag is None or tag == u'!') and token.implicit)
+ event = ScalarEvent(anchor, tag, implicit, token.value,
+ start_mark, end_mark, style=token.style)
+ elif self.check_token(FlowSequenceStartToken):
+ end_mark = self.peek_token().end_mark
event = SequenceStartEvent(anchor, tag, start_mark, end_mark,
flow_style=True)
collection_events = self.parse_flow_sequence()
- elif self.scanner.check(FlowMappingStartToken):
- end_mark = self.scanner.peek().end_mark
+ elif self.check_token(FlowMappingStartToken):
+ end_mark = self.peek_token().end_mark
event = MappingStartEvent(anchor, tag, start_mark, end_mark,
flow_style=True)
collection_events = self.parse_flow_mapping()
- elif block and self.scanner.check(BlockSequenceStartToken):
- end_mark = self.scanner.peek().start_mark
+ elif block and self.check_token(BlockSequenceStartToken):
+ end_mark = self.peek_token().start_mark
event = SequenceStartEvent(anchor, tag, start_mark, end_mark,
flow_style=False)
collection_events = self.parse_block_sequence()
- elif block and self.scanner.check(BlockMappingStartToken):
- end_mark = self.scanner.peek().start_mark
+ elif block and self.check_token(BlockMappingStartToken):
+ end_mark = self.peek_token().start_mark
event = MappingStartEvent(anchor, tag, start_mark, end_mark,
flow_style=False)
collection_events = self.parse_block_mapping()
elif anchor is not None or tag is not None:
# Empty scalars are allowed even if a tag or an anchor is
# specified.
- event = ScalarEvent(anchor, tag, u'', start_mark, end_mark,
- implicit=True)
+ implicit = (tag is None or tag == u'!')
+ event = ScalarEvent(anchor, tag, implicit, u'',
+ start_mark, end_mark)
else:
if block:
node = 'block'
else:
node = 'flow'
- token = self.scanner.peek()
+ token = self.peek_token()
raise ParserError("while scanning a %s node" % node, start_mark,
"expected the node content, but found %r" % token.id,
token.start_mark)
@@ -334,33 +319,33 @@ class Parser:
def parse_block_sequence(self):
# BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END
- token = self.scanner.get()
+ token = self.get_token()
start_mark = token.start_mark
- while self.scanner.check(BlockEntryToken):
- token = self.scanner.get()
- if not self.scanner.check(BlockEntryToken, BlockEndToken):
+ while self.check_token(BlockEntryToken):
+ token = self.get_token()
+ if not self.check_token(BlockEntryToken, BlockEndToken):
for event in self.parse_block_node():
yield event
else:
yield self.process_empty_scalar(token.end_mark)
- if not self.scanner.check(BlockEndToken):
- token = self.scanner.peek()
+ if not self.check_token(BlockEndToken):
+ token = self.peek_token()
raise ParserError("while scanning a block collection", start_mark,
"expected <block end>, but found %r" % token.id, token.start_mark)
- token = self.scanner.get()
+ token = self.get_token()
yield SequenceEndEvent(token.start_mark, token.end_mark)
def parse_indentless_sequence(self):
# (BLOCK-ENTRY block_node?)+
- while self.scanner.check(BlockEntryToken):
- token = self.scanner.get()
- if not self.scanner.check(BlockEntryToken,
+ while self.check_token(BlockEntryToken):
+ token = self.get_token()
+ if not self.check_token(BlockEntryToken,
KeyToken, ValueToken, BlockEndToken):
for event in self.parse_block_node():
yield event
else:
yield self.process_empty_scalar(token.end_mark)
- token = self.scanner.peek()
+ token = self.peek_token()
yield SequenceEndEvent(token.start_mark, token.start_mark)
def parse_block_mapping(self):
@@ -368,31 +353,31 @@ class Parser:
# ((KEY block_node_or_indentless_sequence?)?
# (VALUE block_node_or_indentless_sequence?)?)*
# BLOCK-END
- token = self.scanner.get()
+ token = self.get_token()
start_mark = token.start_mark
- while self.scanner.check(KeyToken, ValueToken):
- if self.scanner.check(KeyToken):
- token = self.scanner.get()
- if not self.scanner.check(KeyToken, ValueToken, BlockEndToken):
+ while self.check_token(KeyToken, ValueToken):
+ if self.check_token(KeyToken):
+ token = self.get_token()
+ if not self.check_token(KeyToken, ValueToken, BlockEndToken):
for event in self.parse_block_node_or_indentless_sequence():
yield event
else:
yield self.process_empty_scalar(token.end_mark)
- if self.scanner.check(ValueToken):
- token = self.scanner.get()
- if not self.scanner.check(KeyToken, ValueToken, BlockEndToken):
+ if self.check_token(ValueToken):
+ token = self.get_token()
+ if not self.check_token(KeyToken, ValueToken, BlockEndToken):
for event in self.parse_block_node_or_indentless_sequence():
yield event
else:
yield self.process_empty_scalar(token.end_mark)
else:
- token = self.scanner.peek()
+ token = self.peek_token()
yield self.process_empty_scalar(token.start_mark)
- if not self.scanner.check(BlockEndToken):
- token = self.scanner.peek()
+ if not self.check_token(BlockEndToken):
+ token = self.peek_token()
raise ParserError("while scanning a block mapping", start_mark,
"expected <block end>, but found %r" % token.id, token.start_mark)
- token = self.scanner.get()
+ token = self.get_token()
yield MappingEndEvent(token.start_mark, token.end_mark)
def parse_flow_sequence(self):
@@ -406,42 +391,42 @@ class Parser:
# flow_mapping_entry are equal, their interpretations are different.
# For `flow_sequence_entry`, the part `KEY flow_node? (VALUE flow_node?)?`
# generate an inline mapping (set syntax).
- token = self.scanner.get()
+ token = self.get_token()
start_mark = token.start_mark
- while not self.scanner.check(FlowSequenceEndToken):
- if self.scanner.check(KeyToken):
- token = self.scanner.get()
+ while not self.check_token(FlowSequenceEndToken):
+ if self.check_token(KeyToken):
+ token = self.get_token()
yield MappingStartEvent(None, None, # u'!',
token.start_mark, token.end_mark,
flow_style=True)
- if not self.scanner.check(ValueToken,
+ if not self.check_token(ValueToken,
FlowEntryToken, FlowSequenceEndToken):
for event in self.parse_flow_node():
yield event
else:
yield self.process_empty_scalar(token.end_mark)
- if self.scanner.check(ValueToken):
- token = self.scanner.get()
- if not self.scanner.check(FlowEntryToken, FlowSequenceEndToken):
+ if self.check_token(ValueToken):
+ token = self.get_token()
+ if not self.check_token(FlowEntryToken, FlowSequenceEndToken):
for event in self.parse_flow_node():
yield event
else:
yield self.process_empty_scalar(token.end_mark)
else:
- token = self.scanner.peek()
+ token = self.peek_token()
yield self.process_empty_scalar(token.start_mark)
- token = self.scanner.peek()
+ token = self.peek_token()
yield MappingEndEvent(token.start_mark, token.start_mark)
else:
for event in self.parse_flow_node():
yield event
- if not self.scanner.check(FlowEntryToken, FlowSequenceEndToken):
- token = self.scanner.peek()
+ if not self.check_token(FlowEntryToken, FlowSequenceEndToken):
+ token = self.peek_token()
raise ParserError("while scanning a flow sequence", start_mark,
"expected ',' or ']', but got %r" % token.id, token.start_mark)
- if self.scanner.check(FlowEntryToken):
- self.scanner.get()
- token = self.scanner.get()
+ if self.check_token(FlowEntryToken):
+ self.get_token()
+ token = self.get_token()
yield SequenceEndEvent(token.start_mark, token.end_mark)
def parse_flow_mapping(self):
@@ -450,45 +435,44 @@ class Parser:
# flow_mapping_entry?
# FLOW-MAPPING-END
# flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
- token = self.scanner.get()
+ token = self.get_token()
start_mark = token.start_mark
- while not self.scanner.check(FlowMappingEndToken):
- if self.scanner.check(KeyToken):
- token = self.scanner.get()
- if not self.scanner.check(ValueToken,
+ while not self.check_token(FlowMappingEndToken):
+ if self.check_token(KeyToken):
+ token = self.get_token()
+ if not self.check_token(ValueToken,
FlowEntryToken, FlowMappingEndToken):
for event in self.parse_flow_node():
yield event
else:
yield self.process_empty_scalar(token.end_mark)
- if self.scanner.check(ValueToken):
- token = self.scanner.get()
- if not self.scanner.check(FlowEntryToken, FlowMappingEndToken):
+ if self.check_token(ValueToken):
+ token = self.get_token()
+ if not self.check_token(FlowEntryToken, FlowMappingEndToken):
for event in self.parse_flow_node():
yield event
else:
yield self.process_empty_scalar(token.end_mark)
else:
- token = self.scanner.peek()
+ token = self.peek_token()
yield self.process_empty_scalar(token.start_mark)
else:
for event in self.parse_flow_node():
yield event
- yield self.process_empty_scalar(self.scanner.peek().start_mark)
- if not self.scanner.check(FlowEntryToken, FlowMappingEndToken):
- token = self.scanner.peek()
+ yield self.process_empty_scalar(self.peek_token().start_mark)
+ if not self.check_token(FlowEntryToken, FlowMappingEndToken):
+ token = self.peek_token()
raise ParserError("while scanning a flow mapping", start_mark,
"expected ',' or '}', but got %r" % token.id, token.start_mark)
- if self.scanner.check(FlowEntryToken):
- self.scanner.get()
- if not self.scanner.check(FlowMappingEndToken):
- token = self.scanner.peek()
+ if self.check_token(FlowEntryToken):
+ self.get_token()
+ if not self.check_token(FlowMappingEndToken):
+ token = self.peek_token()
raise ParserError("while scanning a flow mapping", start_mark,
"expected '}', but found %r" % token.id, token.start_mark)
- token = self.scanner.get()
+ token = self.get_token()
yield MappingEndEvent(token.start_mark, token.end_mark)
def process_empty_scalar(self, mark):
- return ScalarEvent(None, None, u'', mark, mark,
- implicit=True)
+ return ScalarEvent(None, None, True, u'', mark, mark)
diff --git a/lib/yaml/reader.py b/lib/yaml/reader.py
index ab16a13..beb76d0 100644
--- a/lib/yaml/reader.py
+++ b/lib/yaml/reader.py
@@ -91,7 +91,7 @@ class Reader:
# Yeah, it's ugly and slow.
- def __init__(self, data):
+ def __init__(self, stream):
self.name = None
self.stream = None
self.stream_pointer = 0
@@ -104,17 +104,17 @@ class Reader:
self.index = 0
self.line = 0
self.column = 0
- if isinstance(data, unicode):
+ if isinstance(stream, unicode):
self.name = "<unicode string>"
- self.check_printable(data)
- self.buffer = data+u'\0'
- elif isinstance(data, str):
+ self.check_printable(stream)
+ self.buffer = stream+u'\0'
+ elif isinstance(stream, str):
self.name = "<string>"
- self.raw_buffer = data
+ self.raw_buffer = stream
self.determine_encoding()
else:
- self.stream = data
- self.name = getattr(data, 'name', "<file>")
+ self.stream = stream
+ self.name = getattr(stream, 'name', "<file>")
self.eof = False
self.raw_buffer = ''
self.determine_encoding()
diff --git a/lib/yaml/representer.py b/lib/yaml/representer.py
index 6fe74fc..a12c34a 100644
--- a/lib/yaml/representer.py
+++ b/lib/yaml/representer.py
@@ -20,77 +20,59 @@ except NameError:
class RepresenterError(YAMLError):
pass
-class BaseRepresenter(BaseDetector):
+class BaseRepresenter:
- DEFAULT_SCALAR_TAG = u'tag:yaml.org,2002:str'
- DEFAULT_SEQUENCE_TAG = u'tag:yaml.org,2002:seq'
- DEFAULT_MAPPING_TAG = u'tag:yaml.org,2002:map'
+ yaml_representers = {}
- def __init__(self, serializer):
- self.serializer = serializer
+ def __init__(self):
self.represented_objects = {}
- def close(self):
- self.serializer.close()
-
- def represent(self, native):
- node = self.represent_object(native)
- self.serializer.serialize(node)
+ def represent(self, data):
+ node = self.represent_object(data)
+ self.serialize(node)
self.represented_objects = {}
- def represent_object(self, native):
- if self.ignore_aliases(native):
+ def represent_object(self, data):
+ if self.ignore_aliases(data):
alias_key = None
else:
- alias_key = id(native)
+ alias_key = id(data)
if alias_key is not None:
if alias_key in self.represented_objects:
node = self.represented_objects[alias_key]
if node is None:
- raise RepresenterError("recursive objects are not allowed: %r" % native)
+ raise RepresenterError("recursive objects are not allowed: %r" % data)
return node
self.represented_objects[alias_key] = None
- for native_type in type(native).__mro__:
- if native_type in self.yaml_representers:
- node = self.yaml_representers[native_type](self, native)
+ for data_type in type(data).__mro__:
+ if data_type in self.yaml_representers:
+ node = self.yaml_representers[data_type](self, data)
break
else:
if None in self.yaml_representers:
- node = self.yaml_representers[None](self, native)
+ node = self.yaml_representers[None](self, data)
else:
- node = ScalarNode(None, unicode(native))
+ node = ScalarNode(None, unicode(data))
if alias_key is not None:
self.represented_objects[alias_key] = node
return node
- def add_representer(cls, native_type, representer):
+ def add_representer(cls, data_type, representer):
if not 'yaml_representers' in cls.__dict__:
cls.yaml_representers = cls.yaml_representers.copy()
- cls.yaml_representers[native_type] = representer
+ cls.yaml_representers[data_type] = representer
add_representer = classmethod(add_representer)
- yaml_representers = {}
-
def represent_scalar(self, tag, value, style=None):
- detected_tag = self.detect(value)
- if detected_tag is None:
- detected_tag = self.DEFAULT_SCALAR_TAG
- implicit = (tag == detected_tag)
- if tag == self.DEFAULT_SCALAR_TAG:
- tag = None
- return ScalarNode(tag, value, implicit=implicit, style=style)
+ return ScalarNode(tag, value, style=style)
def represent_sequence(self, tag, sequence, flow_style=None):
- if tag == self.DEFAULT_SEQUENCE_TAG:
- tag = None
value = []
for item in sequence:
value.append(self.represent_object(item))
return SequenceNode(tag, value, flow_style=flow_style)
def represent_mapping(self, tag, mapping, flow_style=None):
- if tag == self.DEFAULT_MAPPING_TAG:
- tag = None
value = {}
if hasattr(mapping, 'keys'):
for item_key in mapping.keys():
@@ -103,108 +85,122 @@ class BaseRepresenter(BaseDetector):
self.represent_object(item_value)
return MappingNode(tag, value, flow_style=flow_style)
- def ignore_aliases(self, native):
+ def ignore_aliases(self, data):
return False
-class SafeRepresenter(Detector, BaseRepresenter):
+class SafeRepresenter(BaseRepresenter):
- def ignore_aliases(self, native):
- if native in [None, ()]:
+ def ignore_aliases(self, data):
+ if data in [None, ()]:
return True
- if isinstance(native, (str, unicode, bool, int, float)):
+ if isinstance(data, (str, unicode, bool, int, float)):
return True
- def represent_none(self, native):
+ def represent_none(self, data):
return self.represent_scalar(u'tag:yaml.org,2002:null',
u'null')
- def represent_str(self, native):
+ def represent_str(self, data):
encoding = None
try:
- unicode(native, 'ascii')
+ unicode(data, 'ascii')
encoding = 'ascii'
except UnicodeDecodeError:
try:
- unicode(native, 'utf-8')
+ unicode(data, 'utf-8')
encoding = 'utf-8'
except UnicodeDecodeError:
pass
if encoding:
return self.represent_scalar(u'tag:yaml.org,2002:str',
- unicode(native, encoding))
+ unicode(data, encoding))
else:
return self.represent_scalar(u'tag:yaml.org,2002:binary',
- unicode(native.encode('base64')), style='|')
+ unicode(data.encode('base64')), style='|')
- def represent_unicode(self, native):
- return self.represent_scalar(u'tag:yaml.org,2002:str', native)
+ def represent_unicode(self, data):
+ return self.represent_scalar(u'tag:yaml.org,2002:str', data)
- def represent_bool(self, native):
- if native:
+ def represent_bool(self, data):
+ if data:
value = u'true'
else:
value = u'false'
return self.represent_scalar(u'tag:yaml.org,2002:bool', value)
- def represent_int(self, native):
- return self.represent_scalar(u'tag:yaml.org,2002:int', unicode(native))
+ def represent_int(self, data):
+ return self.represent_scalar(u'tag:yaml.org,2002:int', unicode(data))
- def represent_long(self, native):
- return self.represent_scalar(u'tag:yaml.org,2002:int', unicode(native))
+ def represent_long(self, data):
+ return self.represent_scalar(u'tag:yaml.org,2002:int', unicode(data))
inf_value = 1e300000
nan_value = inf_value/inf_value
- def represent_float(self, native):
- if native == self.inf_value:
+ def represent_float(self, data):
+ if data == self.inf_value:
value = u'.inf'
- elif native == -self.inf_value:
+ elif data == -self.inf_value:
value = u'-.inf'
- elif native == self.nan_value or native != native:
+ elif data == self.nan_value or data != data:
value = u'.nan'
else:
- value = unicode(native)
+ value = unicode(data)
return self.represent_scalar(u'tag:yaml.org,2002:float', value)
- def represent_list(self, native):
- pairs = (len(native) > 0)
- for item in native:
+ def represent_list(self, data):
+ pairs = (len(data) > 0)
+ for item in data:
if not isinstance(item, tuple) or len(item) != 2:
pairs = False
break
if not pairs:
- return self.represent_sequence(u'tag:yaml.org,2002:seq', native)
+ return self.represent_sequence(u'tag:yaml.org,2002:seq', data)
value = []
- for item_key, item_value in native:
+ for item_key, item_value in data:
value.append(self.represent_mapping(u'tag:yaml.org,2002:map',
[(item_key, item_value)]))
return SequenceNode(u'tag:yaml.org,2002:pairs', value)
- def represent_dict(self, native):
- return self.represent_mapping(u'tag:yaml.org,2002:map', native)
+ def represent_dict(self, data):
+ return self.represent_mapping(u'tag:yaml.org,2002:map', data)
- def represent_set(self, native):
+ def represent_set(self, data):
value = {}
- for key in native:
+ for key in data:
value[key] = None
return self.represent_mapping(u'tag:yaml.org,2002:set', value)
- def represent_date(self, native):
- value = u'%04d-%02d-%02d' % (native.year, native.month, native.day)
+ def represent_date(self, data):
+ value = u'%04d-%02d-%02d' % (data.year, data.month, data.day)
return self.represent_scalar(u'tag:yaml.org,2002:timestamp', value)
- def represent_datetime(self, native):
+ def represent_datetime(self, data):
value = u'%04d-%02d-%02d %02d:%02d:%02d' \
- % (native.year, native.month, native.day,
- native.hour, native.minute, native.second)
- if native.microsecond:
- value += u'.' + unicode(native.microsecond/1000000.0).split(u'.')[1]
- if native.utcoffset():
- value += unicode(native.utcoffset())
+ % (data.year, data.month, data.day,
+ data.hour, data.minute, data.second)
+ if data.microsecond:
+ value += u'.' + unicode(data.microsecond/1000000.0).split(u'.')[1]
+ if data.utcoffset():
+ value += unicode(data.utcoffset())
return self.represent_scalar(u'tag:yaml.org,2002:timestamp', value)
- def represent_undefined(self, native):
- raise RepresenterError("cannot represent an object: %s" % native)
+ def represent_yaml_object(self, tag, data, cls, flow_style=None):
+ if hasattr(data, '__getstate__'):
+ state = data.__getstate__()
+ else:
+ state = data.__dict__.copy()
+ mapping = state
+ if hasattr(state, 'keys'):
+ mapping = []
+ keys = state.keys()
+ keys.sort()
+ for key in keys:
+ mapping.append((key.replace('_', '-'), state[key]))
+ return self.represent_mapping(tag, mapping, flow_style=flow_style)
+
+ def represent_undefined(self, data):
+ raise RepresenterError("cannot represent an object: %s" % data)
SafeRepresenter.add_representer(type(None),
SafeRepresenter.represent_none)
diff --git a/lib/yaml/resolver.py b/lib/yaml/resolver.py
deleted file mode 100644
index b25857b..0000000
--- a/lib/yaml/resolver.py
+++ /dev/null
@@ -1,72 +0,0 @@
-
-__all__ = ['Resolver', 'ResolverError']
-
-from error import MarkedYAMLError
-from detector import Detector
-from nodes import *
-
-import re
-
-# Not really used.
-class ResolverError(MarkedYAMLError):
- pass
-
-class Resolver(Detector):
-
- DEFAULT_SCALAR_TAG = u'tag:yaml.org,2002:str'
- DEFAULT_SEQUENCE_TAG = u'tag:yaml.org,2002:seq'
- DEFAULT_MAPPING_TAG = u'tag:yaml.org,2002:map'
-
- def __init__(self, composer):
- self.composer = composer
- self.resolved_nodes = {}
-
- def check(self):
- # If there are more documents available?
- return self.composer.check()
-
- def get(self):
- # Resolve and return the root node of the next document.
- if self.composer.check():
- return self.resolve_document(self.composer.get())
-
- def __iter__(self):
- # Iterator protocol.
- while self.composer.check():
- yield self.resolve_document(self.composer.get())
-
- def resolve_document(self, node):
- self.resolve_node([], node)
- return node
- self.resolved_nodes = {}
-
- def resolve_node(self, path, node):
- if node in self.resolved_nodes:
- return
- self.resolved_nodes[node] = None
- if isinstance(node, ScalarNode):
- self.resolve_scalar(path, node)
- elif isinstance(node, SequenceNode):
- self.resolve_sequence(path, node)
- for index in range(len(node.value)):
- self.resolve_node(path+[(node, index)], node.value[index])
- elif isinstance(node, MappingNode):
- self.resolve_mapping(path, node)
- for key in node.value:
- self.resolve_node(path+[node, None], key)
- self.resolve_node(path+[node, key], node.value[key])
-
- def resolve_scalar(self, path, node):
- if node.tag is None and node.implicit:
- node.tag = self.detect(node.value)
- if node.tag is None or node.tag == u'!':
- node.tag = self.DEFAULT_SCALAR_TAG
-
- def resolve_sequence(self, path, node):
- if node.tag is None or node.tag == u'!':
- node.tag = self.DEFAULT_SEQUENCE_TAG
-
- def resolve_mapping(self, path, node):
- if node.tag is None or node.tag == u'!':
- node.tag = self.DEFAULT_MAPPING_TAG
-
diff --git a/lib/yaml/scanner.py b/lib/yaml/scanner.py
index 7a1d273..e9780c4 100644
--- a/lib/yaml/scanner.py
+++ b/lib/yaml/scanner.py
@@ -45,19 +45,16 @@ class SimpleKey:
class Scanner:
-
- def __init__(self, reader):
+ def __init__(self):
"""Initialize the scanner."""
- # The input stream. The Reader class do the dirty work of checking for
- # BOM and converting the input data to Unicode. It also adds NUL to
- # the end.
+ # It is assumed that Scanner and Reader will have a common descendant.
+ # Reader do the dirty work of checking for BOM and converting the
+ # input data to Unicode. It also adds NUL to the end.
#
# Reader supports the following methods
- # self.reader.peek(i=0) # peek the next i-th character
- # self.reader.prefix(l=1) # peek the next l characters
- # self.reader.forward(l=1) # read the next l characters
- # and move the pointer
- self.reader = reader
+ # self.peek(i=0) # peek the next i-th character
+ # self.prefix(l=1) # peek the next l characters
+ # self.forward(l=1) # read the next l characters and move the pointer.
# Had we reached the end of the stream?
self.done = False
@@ -113,24 +110,26 @@ class Scanner:
# Public methods.
- def check(self, *choices):
+ def check_token(self, *choices):
# Check if the next token is one of the given types.
while self.need_more_tokens():
self.fetch_more_tokens()
if self.tokens:
+ if not choices:
+ return True
for choice in choices:
if isinstance(self.tokens[0], choice):
return True
return False
- def peek(self):
+ def peek_token(self):
# Return the next token, but do not delete if from the queue.
while self.need_more_tokens():
self.fetch_more_tokens()
if self.tokens:
return self.tokens[0]
- def get(self):
+ def get_token(self):
# Return the next token.
while self.need_more_tokens():
self.fetch_more_tokens()
@@ -171,10 +170,10 @@ class Scanner:
# Compare the current indentation and column. It may add some tokens
# and decrease the current indentation level.
- self.unwind_indent(self.reader.column)
+ self.unwind_indent(self.column)
# Peek the next character.
- ch = self.reader.peek()
+ ch = self.peek()
# Is it the end of stream?
if ch == u'\0':
@@ -265,7 +264,7 @@ class Scanner:
# No? It's an error. Let's produce a nice error message.
raise ScannerError("while scanning for the next token", None,
"found character %r that cannot start any token"
- % ch.encode('utf-8'), self.reader.get_mark())
+ % ch.encode('utf-8'), self.get_mark())
# Simple keys treatment.
@@ -293,11 +292,11 @@ class Scanner:
# height (may cause problems if indentation is broken though).
for level in self.possible_simple_keys.keys():
key = self.possible_simple_keys[level]
- if key.line != self.reader.line \
- or self.reader.index-key.index > 1024:
+ if key.line != self.line \
+ or self.index-key.index > 1024:
if key.required:
raise ScannerError("while scanning a simple key", key.mark,
- "could not found expected ':'", self.reader.get_mark())
+ "could not found expected ':'", self.get_mark())
del self.possible_simple_keys[level]
def save_possible_simple_key(self):
@@ -306,7 +305,7 @@ class Scanner:
# ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
# Check if a simple key is required at the current position.
- required = not self.flow_level and self.indent == self.reader.column
+ required = not self.flow_level and self.indent == self.column
# A simple key is required only if it is the first token in the current
# line. Therefore it is always allowed.
@@ -317,12 +316,8 @@ class Scanner:
if self.allow_simple_key:
self.remove_possible_simple_key()
token_number = self.tokens_taken+len(self.tokens)
- index = self.reader.index
- line = self.reader.line
- column = self.reader.column
- mark = self.reader.get_mark()
key = SimpleKey(token_number, required,
- index, line, column, mark)
+ self.index, self.line, self.column, self.get_mark())
self.possible_simple_keys[self.flow_level] = key
def remove_possible_simple_key(self):
@@ -334,7 +329,7 @@ class Scanner:
assert not key.required
#if key.required:
# raise ScannerError("while scanning a simple key", key.mark,
- # "could not found expected ':'", self.reader.get_mark())
+ # "could not found expected ':'", self.get_mark())
# Indentation functions.
@@ -349,7 +344,7 @@ class Scanner:
#if self.flow_level and self.indent > column:
# raise ScannerError(None, None,
# "invalid intendation or unclosed '[' or '{'",
- # self.reader.get_mark())
+ # self.get_mark())
# In the flow context, indentation is ignored. We make the scanner less
# restrictive then specification requires.
@@ -358,7 +353,7 @@ class Scanner:
# In block context, we may need to issue the BLOCK-END tokens.
while self.indent > column:
- mark = self.reader.get_mark()
+ mark = self.get_mark()
self.indent = self.indents.pop()
self.tokens.append(BlockEndToken(mark, mark))
@@ -377,11 +372,11 @@ class Scanner:
# last token.
# Read the token.
- mark = self.reader.get_mark()
+ mark = self.get_mark()
# Add STREAM-START.
self.tokens.append(StreamStartToken(mark, mark,
- encoding=self.reader.encoding))
+ encoding=self.encoding))
def fetch_stream_end(self):
@@ -394,12 +389,12 @@ class Scanner:
self.possible_simple_keys = {}
# Read the token.
- mark = self.reader.get_mark()
+ mark = self.get_mark()
# Add STREAM-END.
self.tokens.append(StreamEndToken(mark, mark))
- # The reader is ended.
+ # The steam is finished.
self.done = True
def fetch_directive(self):
@@ -431,9 +426,9 @@ class Scanner:
self.allow_simple_key = False
# Add DOCUMENT-START or DOCUMENT-END.
- start_mark = self.reader.get_mark()
- self.reader.forward(3)
- end_mark = self.reader.get_mark()
+ start_mark = self.get_mark()
+ self.forward(3)
+ end_mark = self.get_mark()
self.tokens.append(TokenClass(start_mark, end_mark))
def fetch_flow_sequence_start(self):
@@ -454,9 +449,9 @@ class Scanner:
self.allow_simple_key = True
# Add FLOW-SEQUENCE-START or FLOW-MAPPING-START.
- start_mark = self.reader.get_mark()
- self.reader.forward()
- end_mark = self.reader.get_mark()
+ start_mark = self.get_mark()
+ self.forward()
+ end_mark = self.get_mark()
self.tokens.append(TokenClass(start_mark, end_mark))
def fetch_flow_sequence_end(self):
@@ -477,9 +472,9 @@ class Scanner:
self.allow_simple_key = False
# Add FLOW-SEQUENCE-END or FLOW-MAPPING-END.
- start_mark = self.reader.get_mark()
- self.reader.forward()
- end_mark = self.reader.get_mark()
+ start_mark = self.get_mark()
+ self.forward()
+ end_mark = self.get_mark()
self.tokens.append(TokenClass(start_mark, end_mark))
def fetch_flow_entry(self):
@@ -491,9 +486,9 @@ class Scanner:
self.remove_possible_simple_key()
# Add FLOW-ENTRY.
- start_mark = self.reader.get_mark()
- self.reader.forward()
- end_mark = self.reader.get_mark()
+ start_mark = self.get_mark()
+ self.forward()
+ end_mark = self.get_mark()
self.tokens.append(FlowEntryToken(start_mark, end_mark))
def fetch_block_entry(self):
@@ -505,11 +500,11 @@ class Scanner:
if not self.allow_simple_key:
raise ScannerError(None, None,
"sequence entries are not allowed here",
- self.reader.get_mark())
+ self.get_mark())
# We may need to add BLOCK-SEQUENCE-START.
- if self.add_indent(self.reader.column):
- mark = self.reader.get_mark()
+ if self.add_indent(self.column):
+ mark = self.get_mark()
self.tokens.append(BlockSequenceStartToken(mark, mark))
# It's an error for the block entry to occur in the flow context,
@@ -524,9 +519,9 @@ class Scanner:
self.remove_possible_simple_key()
# Add BLOCK-ENTRY.
- start_mark = self.reader.get_mark()
- self.reader.forward()
- end_mark = self.reader.get_mark()
+ start_mark = self.get_mark()
+ self.forward()
+ end_mark = self.get_mark()
self.tokens.append(BlockEntryToken(start_mark, end_mark))
def fetch_key(self):
@@ -538,11 +533,11 @@ class Scanner:
if not self.allow_simple_key:
raise ScannerError(None, None,
"mapping keys are not allowed here",
- self.reader.get_mark())
+ self.get_mark())
# We may need to add BLOCK-MAPPING-START.
- if self.add_indent(self.reader.column):
- mark = self.reader.get_mark()
+ if self.add_indent(self.column):
+ mark = self.get_mark()
self.tokens.append(BlockMappingStartToken(mark, mark))
# Simple keys are allowed after '?' in the block context.
@@ -552,9 +547,9 @@ class Scanner:
self.remove_possible_simple_key()
# Add KEY.
- start_mark = self.reader.get_mark()
- self.reader.forward()
- end_mark = self.reader.get_mark()
+ start_mark = self.get_mark()
+ self.forward()
+ end_mark = self.get_mark()
self.tokens.append(KeyToken(start_mark, end_mark))
def fetch_value(self):
@@ -591,7 +586,7 @@ class Scanner:
if not self.allow_simple_key:
raise ScannerError(None, None,
"mapping values are not allowed here",
- self.reader.get_mark())
+ self.get_mark())
# Simple keys are allowed after ':' in the block context.
self.allow_simple_key = not self.flow_level
@@ -600,9 +595,9 @@ class Scanner:
self.remove_possible_simple_key()
# Add VALUE.
- start_mark = self.reader.get_mark()
- self.reader.forward()
- end_mark = self.reader.get_mark()
+ start_mark = self.get_mark()
+ self.forward()
+ end_mark = self.get_mark()
self.tokens.append(ValueToken(start_mark, end_mark))
def fetch_alias(self):
@@ -691,30 +686,30 @@ class Scanner:
# DIRECTIVE: ^ '%' ...
# The '%' indicator is already checked.
- if self.reader.column == 0:
+ if self.column == 0:
return True
def check_document_start(self):
# DOCUMENT-START: ^ '---' (' '|'\n')
- if self.reader.column == 0:
- if self.reader.prefix(3) == u'---' \
- and self.reader.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
+ if self.column == 0:
+ if self.prefix(3) == u'---' \
+ and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
return True
def check_document_end(self):
# DOCUMENT-END: ^ '...' (' '|'\n')
- if self.reader.column == 0:
- prefix = self.reader.peek(4)
- if self.reader.prefix(3) == u'...' \
- and self.reader.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
+ if self.column == 0:
+ prefix = self.peek(4)
+ if self.prefix(3) == u'...' \
+ and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
return True
def check_block_entry(self):
# BLOCK-ENTRY: '-' (' '|'\n')
- return self.reader.peek(1) in u'\0 \t\r\n\x85\u2028\u2029'
+ return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029'
def check_key(self):
@@ -724,7 +719,7 @@ class Scanner:
# KEY(block context): '?' (' '|'\n')
else:
- return self.reader.peek(1) in u'\0 \t\r\n\x85\u2028\u2029'
+ return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029'
def check_value(self):
@@ -734,7 +729,7 @@ class Scanner:
# VALUE(block context): ':' (' '|'\n')
else:
- return self.reader.peek(1) in u'\0 \t\r\n\x85\u2028\u2029'
+ return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029'
def check_plain(self):
@@ -750,9 +745,9 @@ class Scanner:
# Note that we limit the last rule to the block context (except the
# '-' character) because we want the flow context to be space
# independent.
- ch = self.reader.peek()
+ ch = self.peek()
return ch not in u'\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'\"%@`' \
- or (self.reader.peek(1) not in u'\0 \t\r\n\x85\u2028\u2029'
+ or (self.peek(1) not in u'\0 \t\r\n\x85\u2028\u2029'
and (ch == u'-' or (not self.flow_level and ch in u'?:')))
# Scanners.
@@ -777,15 +772,15 @@ class Scanner:
# `unwind_indent` before issuing BLOCK-END.
# Scanners for block, flow, and plain scalars need to be modified.
- if self.reader.index == 0 and self.reader.peek() == u'\uFEFF':
- self.reader.forward()
+ if self.index == 0 and self.peek() == u'\uFEFF':
+ self.forward()
found = False
while not found:
- while self.reader.peek() == u' ':
- self.reader.forward()
- if self.reader.peek() == u'#':
- while self.reader.peek() not in u'\0\r\n\x85\u2028\u2029':
- self.reader.forward()
+ while self.peek() == u' ':
+ self.forward()
+ if self.peek() == u'#':
+ while self.peek() not in u'\0\r\n\x85\u2028\u2029':
+ self.forward()
if self.scan_line_break():
if not self.flow_level:
self.allow_simple_key = True
@@ -794,119 +789,119 @@ class Scanner:
def scan_directive(self):
# See the specification for details.
- start_mark = self.reader.get_mark()
- self.reader.forward()
+ start_mark = self.get_mark()
+ self.forward()
name = self.scan_directive_name(start_mark)
value = None
if name == u'YAML':
value = self.scan_yaml_directive_value(start_mark)
- end_mark = self.reader.get_mark()
+ end_mark = self.get_mark()
elif name == u'TAG':
value = self.scan_tag_directive_value(start_mark)
- end_mark = self.reader.get_mark()
+ end_mark = self.get_mark()
else:
- end_mark = self.reader.get_mark()
- while self.reader.peek() not in u'\0\r\n\x85\u2028\u2029':
- self.reader.forward()
+ end_mark = self.get_mark()
+ while self.peek() not in u'\0\r\n\x85\u2028\u2029':
+ self.forward()
self.scan_directive_ignored_line(start_mark)
return DirectiveToken(name, value, start_mark, end_mark)
def scan_directive_name(self, start_mark):
# See the specification for details.
length = 0
- ch = self.reader.peek(length)
+ ch = self.peek(length)
while u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \
or ch in u'-_':
length += 1
- ch = self.reader.peek(length)
+ ch = self.peek(length)
if not length:
raise ScannerError("while scanning a directive", start_mark,
"expected alphabetic or numeric character, but found %r"
- % ch.encode('utf-8'), self.reader.get_mark())
- value = self.reader.prefix(length)
- self.reader.forward(length)
- ch = self.reader.peek()
+ % ch.encode('utf-8'), self.get_mark())
+ value = self.prefix(length)
+ self.forward(length)
+ ch = self.peek()
if ch not in u'\0 \r\n\x85\u2028\u2029':
raise ScannerError("while scanning a directive", start_mark,
"expected alphabetic or numeric character, but found %r"
- % ch.encode('utf-8'), self.reader.get_mark())
+ % ch.encode('utf-8'), self.get_mark())
return value
def scan_yaml_directive_value(self, start_mark):
# See the specification for details.
- while self.reader.peek() == u' ':
- self.reader.forward()
+ while self.peek() == u' ':
+ self.forward()
major = self.scan_yaml_directive_number(start_mark)
- if self.reader.peek() != '.':
+ if self.peek() != '.':
raise ScannerError("while scanning a directive", start_mark,
"expected a digit or '.', but found %r"
- % self.reader.peek().encode('utf-8'),
- self.reader.get_mark())
- self.reader.forward()
+ % self.peek().encode('utf-8'),
+ self.get_mark())
+ self.forward()
minor = self.scan_yaml_directive_number(start_mark)
- if self.reader.peek() not in u'\0 \r\n\x85\u2028\u2029':
+ if self.peek() not in u'\0 \r\n\x85\u2028\u2029':
raise ScannerError("while scanning a directive", start_mark,
"expected a digit or ' ', but found %r"
- % self.reader.peek().encode('utf-8'),
- self.reader.get_mark())
+ % self.peek().encode('utf-8'),
+ self.get_mark())
return (major, minor)
def scan_yaml_directive_number(self, start_mark):
# See the specification for details.
- ch = self.reader.peek()
+ ch = self.peek()
if not (u'0' <= ch <= '9'):
raise ScannerError("while scanning a directive", start_mark,
"expected a digit, but found %r" % ch.encode('utf-8'),
- self.reader.get_mark())
+ self.get_mark())
length = 0
- while u'0' <= self.reader.peek(length) <= u'9':
+ while u'0' <= self.peek(length) <= u'9':
length += 1
- value = int(self.reader.prefix(length))
- self.reader.forward(length)
+ value = int(self.prefix(length))
+ self.forward(length)
return value
def scan_tag_directive_value(self, start_mark):
# See the specification for details.
- while self.reader.peek() == u' ':
- self.reader.forward()
+ while self.peek() == u' ':
+ self.forward()
handle = self.scan_tag_directive_handle(start_mark)
- while self.reader.peek() == u' ':
- self.reader.forward()
+ while self.peek() == u' ':
+ self.forward()
prefix = self.scan_tag_directive_prefix(start_mark)
return (handle, prefix)
def scan_tag_directive_handle(self, start_mark):
# See the specification for details.
value = self.scan_tag_handle('directive', start_mark)
- ch = self.reader.peek()
+ ch = self.peek()
if ch != u' ':
raise ScannerError("while scanning a directive", start_mark,
"expected ' ', but found %r" % ch.encode('utf-8'),
- self.reader.get_mark())
+ self.get_mark())
return value
def scan_tag_directive_prefix(self, start_mark):
# See the specification for details.
value = self.scan_tag_uri('directive', start_mark)
- ch = self.reader.peek()
+ ch = self.peek()
if ch not in u'\0 \r\n\x85\u2028\u2029':
raise ScannerError("while scanning a directive", start_mark,
"expected ' ', but found %r" % ch.encode('utf-8'),
- self.reader.get_mark())
+ self.get_mark())
return value
def scan_directive_ignored_line(self, start_mark):
# See the specification for details.
- while self.reader.peek() == u' ':
- self.reader.forward()
- if self.reader.peek() == u'#':
- while self.reader.peek() not in u'\0\r\n\x85\u2028\u2029':
- self.reader.forward()
- ch = self.reader.peek()
+ while self.peek() == u' ':
+ self.forward()
+ if self.peek() == u'#':
+ while self.peek() not in u'\0\r\n\x85\u2028\u2029':
+ self.forward()
+ ch = self.peek()
if ch not in u'\0\r\n\x85\u2028\u2029':
raise ScannerError("while scanning a directive", start_mark,
"expected a comment or a line break, but found %r"
- % ch.encode('utf-8'), self.reader.get_mark())
+ % ch.encode('utf-8'), self.get_mark())
self.scan_line_break()
def scan_anchor(self, TokenClass):
@@ -918,50 +913,50 @@ class Scanner:
# and
# [ *alias , "value" ]
# Therefore we restrict aliases to numbers and ASCII letters.
- start_mark = self.reader.get_mark()
- indicator = self.reader.peek()
+ start_mark = self.get_mark()
+ indicator = self.peek()
if indicator == '*':
name = 'alias'
else:
name = 'anchor'
- self.reader.forward()
+ self.forward()
length = 0
- ch = self.reader.peek(length)
+ ch = self.peek(length)
while u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \
or ch in u'-_':
length += 1
- ch = self.reader.peek(length)
+ ch = self.peek(length)
if not length:
raise ScannerError("while scanning an %s" % name, start_mark,
"expected alphabetic or numeric character, but found %r"
- % ch.encode('utf-8'), self.reader.get_mark())
- value = self.reader.prefix(length)
- self.reader.forward(length)
- ch = self.reader.peek()
+ % ch.encode('utf-8'), self.get_mark())
+ value = self.prefix(length)
+ self.forward(length)
+ ch = self.peek()
if ch not in u'\0 \t\r\n\x85\u2028\u2029?:,]}%@`':
raise ScannerError("while scanning an %s" % name, start_mark,
"expected alphabetic or numeric character, but found %r"
- % ch.encode('utf-8'), self.reader.get_mark())
- end_mark = self.reader.get_mark()
+ % ch.encode('utf-8'), self.get_mark())
+ end_mark = self.get_mark()
return TokenClass(value, start_mark, end_mark)
def scan_tag(self):
# See the specification for details.
- start_mark = self.reader.get_mark()
- ch = self.reader.peek(1)
+ start_mark = self.get_mark()
+ ch = self.peek(1)
if ch == u'<':
handle = None
- self.reader.forward(2)
+ self.forward(2)
suffix = self.scan_tag_uri('tag', start_mark)
- if self.reader.peek() != u'>':
+ if self.peek() != u'>':
raise ScannerError("while parsing a tag", start_mark,
- "expected '>', but found %r" % self.reader.peek().encode('utf-8'),
- self.reader.get_mark())
- self.reader.forward()
+ "expected '>', but found %r" % self.peek().encode('utf-8'),
+ self.get_mark())
+ self.forward()
elif ch in u'\0 \t\r\n\x85\u2028\u2029':
handle = None
suffix = u'!'
- self.reader.forward()
+ self.forward()
else:
length = 1
use_handle = False
@@ -970,21 +965,21 @@ class Scanner:
use_handle = True
break
length += 1
- ch = self.reader.peek(length)
+ ch = self.peek(length)
handle = u'!'
if use_handle:
handle = self.scan_tag_handle('tag', start_mark)
else:
handle = u'!'
- self.reader.forward()
+ self.forward()
suffix = self.scan_tag_uri('tag', start_mark)
- ch = self.reader.peek()
+ ch = self.peek()
if ch not in u'\0 \r\n\x85\u2028\u2029':
raise ScannerError("while scanning a tag", start_mark,
"expected ' ', but found %r" % ch.encode('utf-8'),
- self.reader.get_mark())
+ self.get_mark())
value = (handle, suffix)
- end_mark = self.reader.get_mark()
+ end_mark = self.get_mark()
return TagToken(value, start_mark, end_mark)
def scan_block_scalar(self, style):
@@ -996,10 +991,10 @@ class Scanner:
folded = False
chunks = []
- start_mark = self.reader.get_mark()
+ start_mark = self.get_mark()
# Scan the header.
- self.reader.forward()
+ self.forward()
chomping, increment = self.scan_block_scalar_indicators(start_mark)
self.scan_block_scalar_ignored_line(start_mark)
@@ -1016,24 +1011,24 @@ class Scanner:
line_break = u''
# Scan the inner part of the block scalar.
- while self.reader.column == indent and self.reader.peek() != u'\0':
+ while self.column == indent and self.peek() != u'\0':
chunks.extend(breaks)
- leading_non_space = self.reader.peek() not in u' \t'
+ leading_non_space = self.peek() not in u' \t'
length = 0
- while self.reader.peek(length) not in u'\0\r\n\x85\u2028\u2029':
+ while self.peek(length) not in u'\0\r\n\x85\u2028\u2029':
length += 1
- chunks.append(self.reader.prefix(length))
- self.reader.forward(length)
+ chunks.append(self.prefix(length))
+ self.forward(length)
line_break = self.scan_line_break()
breaks, end_mark = self.scan_block_scalar_breaks(indent)
- if self.reader.column == indent and self.reader.peek() != u'\0':
+ if self.column == indent and self.peek() != u'\0':
# Unfortunately, folding rules are ambiguous.
#
# This is the folding according to the specification:
if folded and line_break == u'\n' \
- and leading_non_space and self.reader.peek() not in u' \t':
+ and leading_non_space and self.peek() not in u' \t':
if not breaks:
chunks.append(u' ')
else:
@@ -1044,7 +1039,7 @@ class Scanner:
#
#if folded and line_break == u'\n':
# if not breaks:
- # if self.reader.peek() not in ' \t':
+ # if self.peek() not in ' \t':
# chunks.append(u' ')
# else:
# chunks.append(line_break)
@@ -1067,82 +1062,82 @@ class Scanner:
# See the specification for details.
chomping = None
increment = None
- ch = self.reader.peek()
+ ch = self.peek()
if ch in u'+-':
if ch == '+':
chomping = True
else:
chomping = False
- self.reader.forward()
- ch = self.reader.peek()
+ self.forward()
+ ch = self.peek()
if ch in u'0123456789':
increment = int(ch)
if increment == 0:
raise ScannerError("while scanning a block scalar", start_mark,
"expected indentation indicator in the range 1-9, but found 0",
- self.reader.get_mark())
- self.reader.forward()
+ self.get_mark())
+ self.forward()
elif ch in u'0123456789':
increment = int(ch)
if increment == 0:
raise ScannerError("while scanning a block scalar", start_mark,
"expected indentation indicator in the range 1-9, but found 0",
- self.reader.get_mark())
- self.reader.forward()
- ch = self.reader.peek()
+ self.get_mark())
+ self.forward()
+ ch = self.peek()
if ch in u'+-':
if ch == '+':
chomping = True
else:
chomping = False
- self.reader.forward()
- ch = self.reader.peek()
+ self.forward()
+ ch = self.peek()
if ch not in u'\0 \r\n\x85\u2028\u2029':
raise ScannerError("while scanning a block scalar", start_mark,
"expected chomping or indentation indicators, but found %r"
- % ch.encode('utf-8'), self.reader.get_mark())
+ % ch.encode('utf-8'), self.get_mark())
return chomping, increment
def scan_block_scalar_ignored_line(self, start_mark):
# See the specification for details.
- while self.reader.peek() == u' ':
- self.reader.forward()
- if self.reader.peek() == u'#':
- while self.reader.peek() not in u'\0\r\n\x85\u2028\u2029':
- self.reader.forward()
- ch = self.reader.peek()
+ while self.peek() == u' ':
+ self.forward()
+ if self.peek() == u'#':
+ while self.peek() not in u'\0\r\n\x85\u2028\u2029':
+ self.forward()
+ ch = self.peek()
if ch not in u'\0\r\n\x85\u2028\u2029':
raise ScannerError("while scanning a block scalar", start_mark,
"expected a comment or a line break, but found %r"
- % ch.encode('utf-8'), self.reader.get_mark())
+ % ch.encode('utf-8'), self.get_mark())
self.scan_line_break()
def scan_block_scalar_indentation(self):
# See the specification for details.
chunks = []
max_indent = 0
- end_mark = self.reader.get_mark()
- while self.reader.peek() in u' \r\n\x85\u2028\u2029':
- if self.reader.peek() != u' ':
+ end_mark = self.get_mark()
+ while self.peek() in u' \r\n\x85\u2028\u2029':
+ if self.peek() != u' ':
chunks.append(self.scan_line_break())
- end_mark = self.reader.get_mark()
+ end_mark = self.get_mark()
else:
- self.reader.forward()
- if self.reader.column > max_indent:
- max_indent = self.reader.column
+ self.forward()
+ if self.column > max_indent:
+ max_indent = self.column
return chunks, max_indent, end_mark
def scan_block_scalar_breaks(self, indent):
# See the specification for details.
chunks = []
- end_mark = self.reader.get_mark()
- while self.reader.column < indent and self.reader.peek() == u' ':
- self.reader.forward()
- while self.reader.peek() in u'\r\n\x85\u2028\u2029':
+ end_mark = self.get_mark()
+ while self.column < indent and self.peek() == u' ':
+ self.forward()
+ while self.peek() in u'\r\n\x85\u2028\u2029':
chunks.append(self.scan_line_break())
- end_mark = self.reader.get_mark()
- while self.reader.column < indent and self.reader.peek() == u' ':
- self.reader.forward()
+ end_mark = self.get_mark()
+ while self.column < indent and self.peek() == u' ':
+ self.forward()
return chunks, end_mark
def scan_flow_scalar(self, style):
@@ -1157,15 +1152,15 @@ class Scanner:
else:
double = False
chunks = []
- start_mark = self.reader.get_mark()
- quote = self.reader.peek()
- self.reader.forward()
+ start_mark = self.get_mark()
+ quote = self.peek()
+ self.forward()
chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark))
- while self.reader.peek() != quote:
+ while self.peek() != quote:
chunks.extend(self.scan_flow_scalar_spaces(double, start_mark))
chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark))
- self.reader.forward()
- end_mark = self.reader.get_mark()
+ self.forward()
+ end_mark = self.get_mark()
return ScalarToken(u''.join(chunks), False, start_mark, end_mark,
style)
@@ -1200,41 +1195,41 @@ class Scanner:
chunks = []
while True:
length = 0
- while self.reader.peek(length) not in u'\'\"\\\0 \t\r\n\x85\u2028\u2029':
+ while self.peek(length) not in u'\'\"\\\0 \t\r\n\x85\u2028\u2029':
length += 1
if length:
- chunks.append(self.reader.prefix(length))
- self.reader.forward(length)
- ch = self.reader.peek()
- if not double and ch == u'\'' and self.reader.peek(1) == u'\'':
+ chunks.append(self.prefix(length))
+ self.forward(length)
+ ch = self.peek()
+ if not double and ch == u'\'' and self.peek(1) == u'\'':
chunks.append(u'\'')
- self.reader.forward(2)
+ self.forward(2)
elif (double and ch == u'\'') or (not double and ch in u'\"\\'):
chunks.append(ch)
- self.reader.forward()
+ self.forward()
elif double and ch == u'\\':
- self.reader.forward()
- ch = self.reader.peek()
+ self.forward()
+ ch = self.peek()
if ch in self.ESCAPE_REPLACEMENTS:
chunks.append(self.ESCAPE_REPLACEMENTS[ch])
- self.reader.forward()
+ self.forward()
elif ch in self.ESCAPE_CODES:
length = self.ESCAPE_CODES[ch]
- self.reader.forward()
+ self.forward()
for k in range(length):
- if self.reader.peek(k) not in u'0123456789ABCDEFabcdef':
+ if self.peek(k) not in u'0123456789ABCDEFabcdef':
raise ScannerError("while scanning a double-quoted scalar", start_mark,
"expected escape sequence of %d hexdecimal numbers, but found %r" %
- (length, self.reader.peek(k).encode('utf-8')), self.reader.get_mark())
- code = int(self.reader.prefix(length), 16)
+ (length, self.peek(k).encode('utf-8')), self.get_mark())
+ code = int(self.prefix(length), 16)
chunks.append(unichr(code))
- self.reader.forward(length)
+ self.forward(length)
elif ch in u'\r\n\x85\u2028\u2029':
self.scan_line_break()
chunks.extend(self.scan_flow_scalar_breaks(double, start_mark))
else:
raise ScannerError("while scanning a double-quoted scalar", start_mark,
- "found unknown escape character %r" % ch.encode('utf-8'), self.reader.get_mark())
+ "found unknown escape character %r" % ch.encode('utf-8'), self.get_mark())
else:
return chunks
@@ -1242,14 +1237,14 @@ class Scanner:
# See the specification for details.
chunks = []
length = 0
- while self.reader.peek(length) in u' \t':
+ while self.peek(length) in u' \t':
length += 1
- whitespaces = self.reader.prefix(length)
- self.reader.forward(length)
- ch = self.reader.peek()
+ whitespaces = self.prefix(length)
+ self.forward(length)
+ ch = self.peek()
if ch == u'\0':
raise ScannerError("while scanning a quoted scalar", start_mark,
- "found unexpected end of stream", self.reader.get_mark())
+ "found unexpected end of stream", self.get_mark())
elif ch in u'\r\n\x85\u2028\u2029':
line_break = self.scan_line_break()
breaks = self.scan_flow_scalar_breaks(double, start_mark)
@@ -1268,14 +1263,14 @@ class Scanner:
while True:
# Instead of checking indentation, we check for document
# separators.
- prefix = self.reader.prefix(3)
+ prefix = self.prefix(3)
if (prefix == u'---' or prefix == u'...') \
- and self.reader.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
+ and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
raise ScannerError("while scanning a quoted scalar", start_mark,
- "found unexpected document separator", self.reader.get_mark())
- while self.reader.peek() in u' \t':
- self.reader.forward()
- if self.reader.peek() in u'\r\n\x85\u2028\u2029':
+ "found unexpected document separator", self.get_mark())
+ while self.peek() in u' \t':
+ self.forward()
+ if self.peek() in u'\r\n\x85\u2028\u2029':
chunks.append(self.scan_line_break())
else:
return chunks
@@ -1287,7 +1282,7 @@ class Scanner:
# We also keep track of the `allow_simple_key` flag here.
# Indentation rules are loosed for the flow context.
chunks = []
- start_mark = self.reader.get_mark()
+ start_mark = self.get_mark()
end_mark = start_mark
indent = self.indent+1
# We allow zero indentation for scalars, but then we need to check for
@@ -1297,13 +1292,13 @@ class Scanner:
spaces = []
while True:
length = 0
- if self.reader.peek() == u'#':
+ if self.peek() == u'#':
break
while True:
- ch = self.reader.peek(length)
+ ch = self.peek(length)
if ch in u'\0 \t\r\n\x85\u2028\u2029' \
or (not self.flow_level and ch == u':' and
- self.reader.peek(length+1) in u'\0 \t\r\n\x28\u2028\u2029') \
+ self.peek(length+1) in u'\0 \t\r\n\x28\u2028\u2029') \
or (self.flow_level and ch in u',:?[]{}'):
break
length += 1
@@ -1311,12 +1306,12 @@ class Scanner:
break
self.allow_simple_key = False
chunks.extend(spaces)
- chunks.append(self.reader.prefix(length))
- self.reader.forward(length)
- end_mark = self.reader.get_mark()
+ chunks.append(self.prefix(length))
+ self.forward(length)
+ end_mark = self.get_mark()
spaces = self.scan_plain_spaces(indent, start_mark)
- if not spaces or self.reader.peek() == u'#' \
- or (not self.flow_level and self.reader.column < indent):
+ if not spaces or self.peek() == u'#' \
+ or (not self.flow_level and self.column < indent):
break
return ScalarToken(u''.join(chunks), True, start_mark, end_mark)
@@ -1326,27 +1321,27 @@ class Scanner:
# We just forbid them completely. Do not use tabs in YAML!
chunks = []
length = 0
- while self.reader.peek(length) in u' ':
+ while self.peek(length) in u' ':
length += 1
- whitespaces = self.reader.prefix(length)
- self.reader.forward(length)
- ch = self.reader.peek()
+ whitespaces = self.prefix(length)
+ self.forward(length)
+ ch = self.peek()
if ch in u'\r\n\x85\u2028\u2029':
line_break = self.scan_line_break()
self.allow_simple_key = True
- prefix = self.reader.prefix(3)
+ prefix = self.prefix(3)
if (prefix == u'---' or prefix == u'...') \
- and self.reader.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
+ and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
return
breaks = []
- while self.reader.peek() in u' \r\n\x85\u2028\u2029':
- if self.reader.peek() == ' ':
- self.reader.forward()
+ while self.peek() in u' \r\n\x85\u2028\u2029':
+ if self.peek() == ' ':
+ self.forward()
else:
breaks.append(self.scan_line_break())
- prefix = self.reader.prefix(3)
+ prefix = self.prefix(3)
if (prefix == u'---' or prefix == u'...') \
- and self.reader.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
+ and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
return
if line_break != u'\n':
chunks.append(line_break)
@@ -1361,26 +1356,26 @@ class Scanner:
# See the specification for details.
# For some strange reasons, the specification does not allow '_' in
# tag handles. I have allowed it anyway.
- ch = self.reader.peek()
+ ch = self.peek()
if ch != u'!':
raise ScannerError("while scanning a %s" % name, start_mark,
"expected '!', but found %r" % ch.encode('utf-8'),
- self.reader.get_mark())
+ self.get_mark())
length = 1
- ch = self.reader.peek(length)
+ ch = self.peek(length)
if ch != u' ':
while u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \
or ch in u'-_':
length += 1
- ch = self.reader.peek(length)
+ ch = self.peek(length)
if ch != u'!':
- self.reader.forward(length)
+ self.forward(length)
raise ScannerError("while scanning a %s" % name, start_mark,
"expected '!', but found %r" % ch.encode('utf-8'),
- self.reader.get_mark())
+ self.get_mark())
length += 1
- value = self.reader.prefix(length)
- self.reader.forward(length)
+ value = self.prefix(length)
+ self.forward(length)
return value
def scan_tag_uri(self, name, start_mark):
@@ -1388,40 +1383,40 @@ class Scanner:
# Note: we do not check if URI is well-formed.
chunks = []
length = 0
- ch = self.reader.peek(length)
+ ch = self.peek(length)
while u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \
or ch in u'-;/?:@&=+$,_.!~*\'()[]%':
if ch == u'%':
- chunks.append(self.reader.prefix(length))
- self.reader.forward(length)
+ chunks.append(self.prefix(length))
+ self.forward(length)
length = 0
chunks.append(self.scan_uri_escapes(name, start_mark))
else:
length += 1
- ch = self.reader.peek(length)
+ ch = self.peek(length)
if length:
- chunks.append(self.reader.prefix(length))
- self.reader.forward(length)
+ chunks.append(self.prefix(length))
+ self.forward(length)
length = 0
if not chunks:
raise ScannerError("while parsing a %s" % name, start_mark,
"expected URI, but found %r" % ch.encode('utf-8'),
- self.reader.get_mark())
+ self.get_mark())
return u''.join(chunks)
def scan_uri_escapes(self, name, start_mark):
# See the specification for details.
bytes = []
- mark = self.reader.get_mark()
- while self.reader.peek() == u'%':
- self.reader.forward()
+ mark = self.get_mark()
+ while self.peek() == u'%':
+ self.forward()
for k in range(2):
- if self.reader.peek(k) not in u'0123456789ABCDEFabcdef':
+ if self.peek(k) not in u'0123456789ABCDEFabcdef':
raise ScannerError("while scanning a %s" % name, start_mark,
"expected URI escape sequence of 2 hexdecimal numbers, but found %r" %
- (self.reader.peek(k).encode('utf-8')), self.reader.get_mark())
- bytes.append(chr(int(self.reader.prefix(2), 16)))
- self.reader.forward(2)
+ (self.peek(k).encode('utf-8')), self.get_mark())
+ bytes.append(chr(int(self.prefix(2), 16)))
+ self.forward(2)
try:
value = unicode(''.join(bytes), 'utf-8')
except UnicodeDecodeError, exc:
@@ -1437,15 +1432,15 @@ class Scanner:
# '\u2028' : '\u2028'
# '\u2029 : '\u2029'
# default : ''
- ch = self.reader.peek()
+ ch = self.peek()
if ch in u'\r\n\x85':
- if self.reader.prefix(2) == u'\r\n':
- self.reader.forward(2)
+ if self.prefix(2) == u'\r\n':
+ self.forward(2)
else:
- self.reader.forward()
+ self.forward()
return u'\n'
elif ch in u'\u2028\u2029':
- self.reader.forward()
+ self.forward()
return ch
return u''
diff --git a/lib/yaml/serializer.py b/lib/yaml/serializer.py
index 5807a3a..bd79830 100644
--- a/lib/yaml/serializer.py
+++ b/lib/yaml/serializer.py
@@ -12,21 +12,21 @@ class Serializer:
ANCHOR_TEMPLATE = u'id%03d'
- def __init__(self, emitter, encoding=None, line_break=None, canonical=None,
- indent=None, width=None, allow_unicode=None):
- self.emitter = emitter
+ def __init__(self, encoding=None,
+ explicit_start=None, explicit_end=None, version=None, tags=None):
+ self.use_encoding = encoding
+ self.use_explicit_start = explicit_start
+ self.use_explicit_end = explicit_end
+ self.use_version = version
+ self.use_tags = tags
self.serialized_nodes = {}
self.anchors = {}
self.last_anchor_id = 0
self.closed = None
- self.open(encoding, line_break, canonical, indent, width, allow_unicode)
- def open(self, encoding=None, line_break=None, canonical=None,
- indent=None, width=None, allow_unicode=None):
+ def open(self):
if self.closed is None:
- self.emitter.emit(StreamStartEvent(encoding=encoding,
- line_break=line_break, canonical=canonical,
- indent=indent, width=width, allow_unicode=allow_unicode))
+ self.emit(StreamStartEvent(encoding=self.use_encoding))
self.closed = False
elif self.closed:
raise SerializerError("serializer is closed")
@@ -37,23 +37,22 @@ class Serializer:
if self.closed is None:
raise SerializerError("serializer is not opened")
elif not self.closed:
- self.emitter.emit(StreamEndEvent())
+ self.emit(StreamEndEvent())
self.closed = True
- def __del__(self):
- self.close()
+ #def __del__(self):
+ # self.close()
- def serialize(self, node, explicit_start=None, explicit_end=None,
- version=None, tags=None):
+ def serialize(self, node):
if self.closed is None:
raise SerializerError("serializer is not opened")
elif self.closed:
raise SerializerError("serializer is closed")
- self.emitter.emit(DocumentStartEvent(explicit=explicit_start,
- version=version, tags=tags))
+ self.emit(DocumentStartEvent(explicit=self.use_explicit_start,
+ version=self.use_version, tags=self.use_tags))
self.anchor_node(node)
self.serialize_node(node)
- self.emitter.emit(DocumentEndEvent(explicit=explicit_end))
+ self.emit(DocumentEndEvent(explicit=self.use_explicit_end))
self.serialized_nodes = {}
self.anchors = {}
self.last_alias_id = 0
@@ -79,23 +78,42 @@ class Serializer:
def serialize_node(self, node):
alias = self.anchors[node]
if node in self.serialized_nodes:
- self.emitter.emit(AliasEvent(alias))
+ self.emit(AliasEvent(alias))
else:
self.serialized_nodes[node] = True
if isinstance(node, ScalarNode):
- self.emitter.emit(ScalarEvent(alias, node.tag, node.value,
- implicit=node.implicit, style=node.style))
+ detected_tag = self.detect(node.value)
+ implicit = (node.tag == self.detect(node.value)
+ or (node.tag == self.DEFAULT_SCALAR_TAG
+ and detected_tag is None))
+ self.emit(ScalarEvent(alias, node.tag, implicit, node.value,
+ style=node.style))
elif isinstance(node, SequenceNode):
- self.emitter.emit(SequenceStartEvent(alias, node.tag,
+ # TODO:
+ # 1) Check the current path in the Resolver.
+ # 2) Add the implicit flag to the SequenceStartEvent and
+ # MappingStartEvent.
+ tag = node.tag
+ if tag == self.DEFAULT_SEQUENCE_TAG and not self.canonical:
+ tag = None
+ self.emit(SequenceStartEvent(alias, tag,
flow_style=node.flow_style))
for item in node.value:
self.serialize_node(item)
- self.emitter.emit(SequenceEndEvent())
+ self.emit(SequenceEndEvent())
elif isinstance(node, MappingNode):
- self.emitter.emit(MappingStartEvent(alias, node.tag,
+ tag = node.tag
+ if tag == self.DEFAULT_MAPPING_TAG and not self.canonical:
+ tag = None
+ self.emit(MappingStartEvent(alias, tag,
flow_style=node.flow_style))
- for key in node.value:
- self.serialize_node(key)
- self.serialize_node(node.value[key])
- self.emitter.emit(MappingEndEvent())
+ if hasattr(node.value, 'keys'):
+ for key in node.value.keys():
+ self.serialize_node(key)
+ self.serialize_node(node.value[key])
+ else:
+ for key, value in node.value:
+ self.serialize_node(key)
+ self.serialize_node(value)
+ self.emit(MappingEndEvent())
diff --git a/lib/yaml/yaml_object.py b/lib/yaml/yaml_object.py
deleted file mode 100644
index b66bd1a..0000000
--- a/lib/yaml/yaml_object.py
+++ /dev/null
@@ -1,34 +0,0 @@
-
-__all__ = ['YAMLObject', 'YAMLObjectMetaclass']
-
-from constructor import *
-from representer import *
-
-class YAMLObjectMetaclass(type):
-
- def __init__(cls, name, bases, kwds):
- super(YAMLObjectMetaclass, cls).__init__(name, bases, kwds)
- if 'yaml_tag' in kwds and kwds['yaml_tag'] is not None:
- cls.yaml_constructor.add_constructor(cls.yaml_tag, cls.from_yaml)
- cls.yaml_representer.add_representer(cls, cls.to_yaml)
-
-class YAMLObject(object):
-
- __metaclass__ = YAMLObjectMetaclass
-
- yaml_constructor = Constructor
- yaml_representer = Representer
-
- yaml_tag = None
-
- def from_yaml(cls, constructor, node):
- raise ConstructorError(None, None,
- "found undefined constructor for the tag %r"
- % node.tag.encode('utf-8'), node.start_mark)
- from_yaml = classmethod(from_yaml)
-
- def to_yaml(cls, representer, native):
- raise RepresenterError(
- "found undefined representer for the object: %s" % native)
- to_yaml = classmethod(to_yaml)
-
diff --git a/tests/data/construct-custom.code b/tests/data/construct-custom.code
index bcc283f..2d5f063 100644
--- a/tests/data/construct-custom.code
+++ b/tests/data/construct-custom.code
@@ -6,4 +6,5 @@
MyTestClass3(x=1),
MyTestClass3(x=1, y=2, z=3),
MyTestClass3(x=1, y=2, z=3),
+ YAMLObject1(my_parameter='foo', my_another_parameter=[1,2,3])
]
diff --git a/tests/data/construct-custom.data b/tests/data/construct-custom.data
index 053d028..9db0f64 100644
--- a/tests/data/construct-custom.data
+++ b/tests/data/construct-custom.data
@@ -21,3 +21,6 @@
=: 1
'y': 2
z: 3
+- !foo
+ my-parameter: foo
+ my-another-parameter: [1,2,3]
diff --git a/tests/data/spec-08-07.canonical b/tests/data/spec-08-07.canonical
index eda1adf..e2f43d9 100644
--- a/tests/data/spec-08-07.canonical
+++ b/tests/data/spec-08-07.canonical
@@ -3,5 +3,6 @@
!!seq [
!<tag:yaml.org,2002:str> "12",
!<tag:yaml.org,2002:int> "12",
- !<tag:yaml.org,2002:str> "12",
+# !<tag:yaml.org,2002:str> "12",
+ !<tag:yaml.org,2002:int> "12",
]
diff --git a/tests/data/tags.events b/tests/data/tags.events
index 4271d47..bb93dce 100644
--- a/tests/data/tags.events
+++ b/tests/data/tags.events
@@ -2,7 +2,7 @@
- !DocumentStart
- !SequenceStart
- !Scalar { value: 'data' }
-- !Scalar { tag: '!', value: 'data' }
+#- !Scalar { tag: '!', value: 'data' }
- !Scalar { tag: 'tag:yaml.org,2002:str', value: 'data' }
- !Scalar { tag: '!myfunnytag', value: 'data' }
- !Scalar { tag: '!my!ugly!tag', value: 'data' }
diff --git a/tests/test_appliance.py b/tests/test_appliance.py
index 010f06f..bf907f3 100644
--- a/tests/test_appliance.py
+++ b/tests/test_appliance.py
@@ -1,8 +1,7 @@
import unittest, os
-from yaml.tokens import *
-from yaml.events import *
+from yaml import *
class TestAppliance(unittest.TestCase):
@@ -43,52 +42,74 @@ class CanonicalScanner:
def __init__(self, data):
self.data = unicode(data, 'utf-8')+u'\0'
self.index = 0
+ self.scan()
+
+ def check_token(self, *choices):
+ if self.tokens:
+ if not choices:
+ return True
+ for choice in choices:
+ if isinstance(self.tokens[0], choice):
+ return True
+ return False
+
+ def peek_token(self):
+ if self.tokens:
+ return self.tokens[0]
+
+ def get_token(self, choice=None):
+ token = self.tokens.pop(0)
+ if choice and not isinstance(token, choice):
+ raise Error("unexpected token "+repr(token))
+ return token
+
+ def get_token_value(self):
+ token = self.get_token()
+ return token.value
def scan(self):
- #print self.data[self.index:]
- tokens = []
- tokens.append(StreamStartToken(None, None))
+ self.tokens = []
+ self.tokens.append(StreamStartToken(None, None))
while True:
self.find_token()
ch = self.data[self.index]
if ch == u'\0':
- tokens.append(StreamEndToken(None, None))
+ self.tokens.append(StreamEndToken(None, None))
break
elif ch == u'%':
- tokens.append(self.scan_directive())
+ self.tokens.append(self.scan_directive())
elif ch == u'-' and self.data[self.index:self.index+3] == u'---':
self.index += 3
- tokens.append(DocumentStartToken(None, None))
+ self.tokens.append(DocumentStartToken(None, None))
elif ch == u'[':
self.index += 1
- tokens.append(FlowSequenceStartToken(None, None))
+ self.tokens.append(FlowSequenceStartToken(None, None))
elif ch == u'{':
self.index += 1
- tokens.append(FlowMappingStartToken(None, None))
+ self.tokens.append(FlowMappingStartToken(None, None))
elif ch == u']':
self.index += 1
- tokens.append(FlowSequenceEndToken(None, None))
+ self.tokens.append(FlowSequenceEndToken(None, None))
elif ch == u'}':
self.index += 1
- tokens.append(FlowMappingEndToken(None, None))
+ self.tokens.append(FlowMappingEndToken(None, None))
elif ch == u'?':
self.index += 1
- tokens.append(KeyToken(None, None))
+ self.tokens.append(KeyToken(None, None))
elif ch == u':':
self.index += 1
- tokens.append(ValueToken(None, None))
+ self.tokens.append(ValueToken(None, None))
elif ch == u',':
self.index += 1
- tokens.append(FlowEntryToken(None, None))
+ self.tokens.append(FlowEntryToken(None, None))
elif ch == u'*' or ch == u'&':
- tokens.append(self.scan_alias())
+ self.tokens.append(self.scan_alias())
elif ch == u'!':
- tokens.append(self.scan_tag())
+ self.tokens.append(self.scan_tag())
elif ch == u'"':
- tokens.append(self.scan_scalar())
+ self.tokens.append(self.scan_scalar())
else:
raise Error("invalid token")
- return tokens
DIRECTIVE = u'%YAML 1.1'
@@ -203,49 +224,49 @@ class CanonicalScanner:
class CanonicalParser:
- def __init__(self, data):
- self.scanner = CanonicalScanner(data)
+ def __init__(self):
self.events = []
+ self.parse()
# stream: STREAM-START document* STREAM-END
def parse_stream(self):
- self.consume_token(StreamStartToken)
+ self.get_token(StreamStartToken)
self.events.append(StreamStartEvent(None, None))
- while not self.test_token(StreamEndToken):
- if self.test_token(DirectiveToken, DocumentStartToken):
+ while not self.check_token(StreamEndToken):
+ if self.check_token(DirectiveToken, DocumentStartToken):
self.parse_document()
else:
raise Error("document is expected, got "+repr(self.tokens[self.index]))
- self.consume_token(StreamEndToken)
+ self.get_token(StreamEndToken)
self.events.append(StreamEndEvent(None, None))
# document: DIRECTIVE? DOCUMENT-START node
def parse_document(self):
node = None
- if self.test_token(DirectiveToken):
- self.consume_token(DirectiveToken)
- self.consume_token(DocumentStartToken)
+ if self.check_token(DirectiveToken):
+ self.get_token(DirectiveToken)
+ self.get_token(DocumentStartToken)
self.events.append(DocumentStartEvent(None, None))
self.parse_node()
self.events.append(DocumentEndEvent(None, None))
# node: ALIAS | ANCHOR? TAG? (SCALAR|sequence|mapping)
def parse_node(self):
- if self.test_token(AliasToken):
- self.events.append(AliasEvent(self.get_value(), None, None))
+ if self.check_token(AliasToken):
+ self.events.append(AliasEvent(self.get_token_value(), None, None))
else:
anchor = None
- if self.test_token(AnchorToken):
- anchor = self.get_value()
+ if self.check_token(AnchorToken):
+ anchor = self.get_token_value()
tag = None
- if self.test_token(TagToken):
- tag = self.get_value()
- if self.test_token(ScalarToken):
- self.events.append(ScalarEvent(anchor, tag, self.get_value(), None, None))
- elif self.test_token(FlowSequenceStartToken):
+ if self.check_token(TagToken):
+ tag = self.get_token_value()
+ if self.check_token(ScalarToken):
+ self.events.append(ScalarEvent(anchor, tag, False, self.get_token_value(), None, None))
+ elif self.check_token(FlowSequenceStartToken):
self.events.append(SequenceStartEvent(anchor, tag, None, None))
self.parse_sequence()
- elif self.test_token(FlowMappingStartToken):
+ elif self.check_token(FlowMappingStartToken):
self.events.append(MappingStartEvent(anchor, tag, None, None))
self.parse_mapping()
else:
@@ -253,66 +274,79 @@ class CanonicalParser:
# sequence: SEQUENCE-START (node (ENTRY node)*)? ENTRY? SEQUENCE-END
def parse_sequence(self):
- self.consume_token(FlowSequenceStartToken)
- if not self.test_token(FlowSequenceEndToken):
+ self.get_token(FlowSequenceStartToken)
+ if not self.check_token(FlowSequenceEndToken):
self.parse_node()
- while not self.test_token(FlowSequenceEndToken):
- self.consume_token(FlowEntryToken)
- if not self.test_token(FlowSequenceEndToken):
+ while not self.check_token(FlowSequenceEndToken):
+ self.get_token(FlowEntryToken)
+ if not self.check_token(FlowSequenceEndToken):
self.parse_node()
- self.consume_token(FlowSequenceEndToken)
+ self.get_token(FlowSequenceEndToken)
self.events.append(SequenceEndEvent(None, None))
# mapping: MAPPING-START (map_entry (ENTRY map_entry)*)? ENTRY? MAPPING-END
def parse_mapping(self):
- self.consume_token(FlowMappingStartToken)
- if not self.test_token(FlowMappingEndToken):
+ self.get_token(FlowMappingStartToken)
+ if not self.check_token(FlowMappingEndToken):
self.parse_map_entry()
- while not self.test_token(FlowMappingEndToken):
- self.consume_token(FlowEntryToken)
- if not self.test_token(FlowMappingEndToken):
+ while not self.check_token(FlowMappingEndToken):
+ self.get_token(FlowEntryToken)
+ if not self.check_token(FlowMappingEndToken):
self.parse_map_entry()
- self.consume_token(FlowMappingEndToken)
+ self.get_token(FlowMappingEndToken)
self.events.append(MappingEndEvent(None, None))
# map_entry: KEY node VALUE node
def parse_map_entry(self):
- self.consume_token(KeyToken)
+ self.get_token(KeyToken)
self.parse_node()
- self.consume_token(ValueToken)
+ self.get_token(ValueToken)
self.parse_node()
- def test_token(self, *choices):
- for choice in choices:
- if isinstance(self.tokens[self.index], choice):
- return True
- return False
-
- def consume_token(self, cls):
- if not isinstance(self.tokens[self.index], cls):
- raise Error("unexpected token "+repr(self.tokens[self.index]))
- self.index += 1
-
- def get_value(self):
- value = self.tokens[self.index].value
- self.index += 1
- return value
-
def parse(self):
- self.tokens = self.scanner.scan()
- self.index = 0
self.parse_stream()
- return self.events
- def get(self):
+ def get_event(self):
return self.events.pop(0)
- def check(self, *choices):
- for choice in choices:
- if isinstance(self.events[0], choice):
+ def check_event(self, *choices):
+ if self.events:
+ if not choices:
return True
+ for choice in choices:
+ if isinstance(self.events[0], choice):
+ return True
return False
- def peek(self):
+ def peek_event(self):
return self.events[0]
+class CanonicalLoader(CanonicalScanner, CanonicalParser, Composer, Constructor, Detector):
+
+ def __init__(self, stream):
+ if hasattr(stream, 'read'):
+ stream = stream.read()
+ CanonicalScanner.__init__(self, stream)
+ CanonicalParser.__init__(self)
+ Composer.__init__(self)
+ Constructor.__init__(self)
+ Detector.__init__(self)
+
+def canonical_scan(stream):
+ return scan(stream, Loader=CanonicalLoader)
+
+def canonical_parse(stream):
+ return parse(stream, Loader=CanonicalLoader)
+
+def canonical_compose(stream):
+ return compose(stream, Loader=CanonicalLoader)
+
+def canonical_compose_all(stream):
+ return compose_all(stream, Loader=CanonicalLoader)
+
+def canonical_load(stream):
+ return load(stream, Loader=CanonicalLoader)
+
+def canonical_load_all(stream):
+ return load_all(stream, Loader=CanonicalLoader)
+
diff --git a/tests/test_canonical.py b/tests/test_canonical.py
index 7fa85dc..4416902 100644
--- a/tests/test_canonical.py
+++ b/tests/test_canonical.py
@@ -5,15 +5,13 @@ class TestCanonicalAppliance(test_appliance.TestAppliance):
def _testCanonicalScanner(self, test_name, canonical_filename):
data = file(canonical_filename, 'rb').read()
- scanner = test_appliance.CanonicalScanner(data)
- tokens = scanner.scan()
+ tokens = list(test_appliance.canonical_scan(data))
#for token in tokens:
# print token
def _testCanonicalParser(self, test_name, canonical_filename):
data = file(canonical_filename, 'rb').read()
- parser = test_appliance.CanonicalParser(data)
- events = parser.parse()
+ event = list(test_appliance.canonical_parse(data))
#for event in events:
# print event
diff --git a/tests/test_constructor.py b/tests/test_constructor.py
index 794d3e4..1c666f1 100644
--- a/tests/test_constructor.py
+++ b/tests/test_constructor.py
@@ -11,7 +11,7 @@ except NameError:
from yaml import *
-class MyConstructor(Constructor):
+class MyLoader(Loader):
pass
class MyTestClass1:
@@ -28,11 +28,11 @@ def construct1(constructor, node):
mapping = constructor.construct_mapping(node)
return MyTestClass1(**mapping)
-MyConstructor.add_constructor("!tag1", construct1)
+MyLoader.add_constructor("!tag1", construct1)
class MyTestClass2(MyTestClass1, YAMLObject):
- yaml_constructor = MyConstructor
+ yaml_loader = MyLoader
yaml_tag = "!tag2"
def from_yaml(cls, constructor, node):
@@ -53,28 +53,41 @@ class MyTestClass3(MyTestClass2):
return cls(**mapping)
from_yaml = classmethod(from_yaml)
+class YAMLObject1(YAMLObject):
+ yaml_loader = MyLoader
+ yaml_tag = '!foo'
+
+ def __init__(self, my_parameter=None, my_another_parameter=None):
+ self.my_parameter = my_parameter
+ self.my_another_parameter = my_another_parameter
+
+ def __eq__(self, other):
+ if isinstance(other, YAMLObject1):
+ return self.__class__, self.__dict__ == other.__class__, other.__dict__
+ else:
+ return False
+
class TestTypes(test_appliance.TestAppliance):
def _testTypes(self, test_name, data_filename, code_filename):
- natives1 = None
- natives2 = None
+ data1 = None
+ data2 = None
try:
- constructor1 = MyConstructor(Resolver(Composer(Parser(Scanner(Reader(file(data_filename, 'rb')))))))
- natives1 = list(iter(constructor1))
- if len(natives1) == 1:
- natives1 = natives1[0]
- natives2 = eval(file(code_filename, 'rb').read())
+ data1 = list(load_all(file(data_filename, 'rb'), Loader=MyLoader))
+ if len(data1) == 1:
+ data1 = data1[0]
+ data2 = eval(file(code_filename, 'rb').read())
try:
- self.failUnlessEqual(natives1, natives2)
+ self.failUnlessEqual(data1, data2)
except AssertionError:
- if isinstance(natives1, dict):
- natives1 = natives1.items()
- natives1.sort()
- natives1 = repr(natives1)
- natives2 = natives2.items()
- natives2.sort()
- natives2 = repr(natives2)
- if natives1 != natives2:
+ if isinstance(data1, dict):
+ data1 = data1.items()
+ data1.sort()
+ data1 = repr(data1)
+ data2 = data2.items()
+ data2.sort()
+ data2 = repr(data2)
+ if data1 != data2:
raise
except:
print
@@ -82,8 +95,8 @@ class TestTypes(test_appliance.TestAppliance):
print file(data_filename, 'rb').read()
print "CODE:"
print file(code_filename, 'rb').read()
- print "NATIVES1:", natives1
- print "NATIVES2:", natives2
+ print "NATIVES1:", data1
+ print "NATIVES2:", data2
raise
TestTypes.add_tests('testTypes', '.data', '.code')
diff --git a/tests/test_detector.py b/tests/test_detector.py
index 491929d..661b24b 100644
--- a/tests/test_detector.py
+++ b/tests/test_detector.py
@@ -1,12 +1,7 @@
import test_appliance
-from yaml.reader import Reader
-from yaml.scanner import Scanner
-from yaml.parser import *
-from yaml.composer import *
-from yaml.resolver import *
-from yaml.nodes import *
+from yaml import *
class TestDetector(test_appliance.TestAppliance):
@@ -15,8 +10,7 @@ class TestDetector(test_appliance.TestAppliance):
correct_tag = None
try:
correct_tag = file(detect_filename, 'rb').read().strip()
- resolver = Resolver(Composer(Parser(Scanner(Reader(file(data_filename, 'rb'))))))
- node = list(iter(resolver))[0]
+ node = compose(file(data_filename, 'rb'))
self.failUnless(isinstance(node, SequenceNode))
for scalar in node.value:
self.failUnless(isinstance(scalar, ScalarNode))
@@ -33,4 +27,3 @@ class TestDetector(test_appliance.TestAppliance):
TestDetector.add_tests('testDetector', '.data', '.detect')
-
diff --git a/tests/test_emitter.py b/tests/test_emitter.py
index eb0910d..0480924 100644
--- a/tests/test_emitter.py
+++ b/tests/test_emitter.py
@@ -16,15 +16,11 @@ class TestEmitter(test_appliance.TestAppliance):
self._testEmitter(test_name, canonical_filename, True)
def _testEmitter(self, test_name, filename, canonical=None):
- events = list(iter(Parser(Scanner(Reader(file(filename, 'rb'))))))
- if canonical is not None:
- events[0].canonical = canonical
- #self._dump(filename, events)
- writer = StringIO.StringIO()
- emitter = Emitter(writer)
- for event in events:
- emitter.emit(event)
- data = writer.getvalue()
+ events = list(parse(file(filename, 'rb')))
+ #self._dump(filename, events, canonical)
+ stream = StringIO.StringIO()
+ emit(events, stream, canonical=canonical)
+ data = stream.getvalue()
new_events = list(parse(data))
for event, new_event in zip(events, new_events):
self.failUnlessEqual(event.__class__, new_event.__class__)
@@ -38,22 +34,19 @@ class TestEmitter(test_appliance.TestAppliance):
self.failUnlessEqual(event.tag, new_event.tag)
self.failUnlessEqual(event.value, new_event.value)
- def _dump(self, filename, events):
- writer = sys.stdout
- emitter = Emitter(writer)
+ def _dump(self, filename, events, canonical):
print "="*30
print "ORIGINAL DOCUMENT:"
print file(filename, 'rb').read()
print '-'*30
print "EMITTED DOCUMENT:"
- for event in events:
- emitter.emit(event)
+ emit(events, sys.stdout, canonical=canonical)
TestEmitter.add_tests('testEmitterOnData', '.canonical', '.data')
-#TestEmitter.add_tests('testEmitterOnCanonicalNormally', '.canonical')
-#TestEmitter.add_tests('testEmitterOnCanonicalCanonically', '.canonical')
+TestEmitter.add_tests('testEmitterOnCanonicalNormally', '.canonical')
+TestEmitter.add_tests('testEmitterOnCanonicalCanonically', '.canonical')
-class EventsConstructor(Constructor):
+class EventsLoader(Loader):
def construct_event(self, node):
if isinstance(node, ScalarNode):
@@ -66,22 +59,21 @@ class EventsConstructor(Constructor):
if class_name in ['ScalarEvent', 'SequenceStartEvent', 'MappingStartEvent']:
mapping.setdefault('tag', None)
if class_name == 'ScalarEvent':
+ mapping.setdefault('implicit', False)
mapping.setdefault('value', '')
value = getattr(yaml, class_name)(**mapping)
return value
-EventsConstructor.add_constructor(None, EventsConstructor.construct_event)
+EventsLoader.add_constructor(None, EventsLoader.construct_event)
class TestEmitterEvents(test_appliance.TestAppliance):
def _testEmitterEvents(self, test_name, events_filename):
- events = list(load(file(events_filename, 'rb'), Constructor=EventsConstructor))
+ events = list(load(file(events_filename, 'rb'), Loader=EventsLoader))
#self._dump(events_filename, events)
- writer = StringIO.StringIO()
- emitter = Emitter(writer)
- for event in events:
- emitter.emit(event)
- data = writer.getvalue()
+ stream = StringIO.StringIO()
+ emit(events, stream)
+ data = stream.getvalue()
new_events = list(parse(data))
self.failUnlessEqual(len(events), len(new_events))
for event, new_event in zip(events, new_events):
@@ -96,15 +88,12 @@ class TestEmitterEvents(test_appliance.TestAppliance):
self.failUnlessEqual(event.value, new_event.value)
def _dump(self, events_filename, events):
- writer = sys.stdout
- emitter = Emitter(writer)
print "="*30
print "EVENTS:"
print file(events_filename, 'rb').read()
print '-'*30
print "OUTPUT:"
- for event in events:
- emitter.emit(event)
+ emit(events, sys.stdout)
TestEmitterEvents.add_tests('testEmitterEvents', '.events')
diff --git a/tests/test_errors.py b/tests/test_errors.py
index 626b4e4..898c5a1 100644
--- a/tests/test_errors.py
+++ b/tests/test_errors.py
@@ -15,13 +15,7 @@ class TestErrors(test_appliance.TestAppliance):
def _load(self, filename):
try:
- reader = Reader(file(filename, 'rb'))
- scanner = Scanner(reader)
- parser = Parser(scanner)
- composer = Composer(parser)
- resolver = Resolver(composer)
- constructor = Constructor(resolver)
- return list(constructor)
+ return list(load_all(file(filename, 'rb')))
except YAMLError, exc:
#except ScannerError, exc:
#except ParserError, exc:
@@ -33,13 +27,7 @@ class TestErrors(test_appliance.TestAppliance):
def _load_string(self, filename):
try:
- reader = Reader(file(filename, 'rb').read())
- scanner = Scanner(reader)
- parser = Parser(scanner)
- composer = Composer(parser)
- resolver = Resolver(composer)
- constructor = Constructor(resolver)
- return list(constructor)
+ return list(load_all(file(filename, 'rb').read()))
except YAMLError, exc:
#except ScannerError, exc:
#except ParserError, exc:
diff --git a/tests/test_representer.py b/tests/test_representer.py
index ffee7aa..82ad46e 100644
--- a/tests/test_representer.py
+++ b/tests/test_representer.py
@@ -12,9 +12,9 @@ except NameError:
from yaml import *
-class MyConstructor(Constructor):
+class MyLoader(Loader):
pass
-class MyRepresenter(Representer):
+class MyDumper(Dumper):
pass
class MyTestClass1(object):
@@ -38,7 +38,8 @@ def represent1(representer, native):
class MyTestClass2(MyTestClass1, YAMLObject):
- yaml_constructor = MyConstructor
+ yaml_loader = MyLoader
+ yaml_dumper = MyDumper
yaml_tag = "!tag2"
def from_yaml(cls, constructor, node):
@@ -67,36 +68,52 @@ class MyTestClass3(MyTestClass2):
return representer.represent_mapping(cls.yaml_tag, native.__dict__)
to_yaml = classmethod(to_yaml)
-MyConstructor.add_constructor("!tag1", construct1)
-MyRepresenter.add_representer(MyTestClass1, represent1)
+MyLoader.add_constructor("!tag1", construct1)
+MyDumper.add_representer(MyTestClass1, represent1)
+
+class YAMLObject1(YAMLObject):
+ yaml_loader = MyLoader
+ yaml_dumper = MyDumper
+ yaml_tag = '!foo'
+ yaml_flow_style = True
+
+ def __init__(self, my_parameter=None, my_another_parameter=None):
+ self.my_parameter = my_parameter
+ self.my_another_parameter = my_another_parameter
+
+ def __eq__(self, other):
+ if isinstance(other, YAMLObject1):
+ return self.__class__, self.__dict__ == other.__class__, other.__dict__
+ else:
+ return False
class TestTypeRepresenter(test_appliance.TestAppliance):
def _testTypes(self, test_name, data_filename, code_filename):
- natives1 = eval(file(code_filename, 'rb').read())
- natives2 = None
+ data1 = eval(file(code_filename, 'rb').read())
+ data2 = None
output = None
try:
- output = dump(natives1, Representer=MyRepresenter)
- natives2 = load(output, Constructor=MyConstructor)
+ output = dump(data1, Dumper=MyDumper)
+ data2 = load(output, Loader=MyLoader)
try:
- self.failUnlessEqual(natives1, natives2)
+ self.failUnlessEqual(data1, data2)
except AssertionError:
- if isinstance(natives1, dict):
- natives1 = natives1.items()
- natives1.sort()
- natives1 = repr(natives1)
- natives2 = natives2.items()
- natives2.sort()
- natives2 = repr(natives2)
- if natives1 != natives2:
+ if isinstance(data1, dict):
+ data1 = data1.items()
+ data1.sort()
+ data1 = repr(data1)
+ data2 = data2.items()
+ data2.sort()
+ data2 = repr(data2)
+ if data1 != data2:
raise
except:
print
print "OUTPUT:"
print output
- print "NATIVES1:", natives1
- print "NATIVES2:", natives2
+ print "NATIVES1:", data1
+ print "NATIVES2:", data2
raise
TestTypeRepresenter.add_tests('testTypes', '.data', '.code')
diff --git a/tests/test_structure.py b/tests/test_structure.py
index cb9a017..b1fd91c 100644
--- a/tests/test_structure.py
+++ b/tests/test_structure.py
@@ -9,14 +9,14 @@ class TestStructure(test_appliance.TestAppliance):
node1 = None
node2 = eval(file(structure_filename, 'rb').read())
try:
- parser = Parser(Scanner(Reader(file(data_filename, 'rb'))))
+ loader = Loader(file(data_filename, 'rb'))
node1 = []
- while not parser.check(StreamEndEvent):
- if not parser.check(StreamStartEvent, DocumentStartEvent, DocumentEndEvent):
- node1.append(self._convert(parser))
+ while not loader.check_event(StreamEndEvent):
+ if not loader.check_event(StreamStartEvent, DocumentStartEvent, DocumentEndEvent):
+ node1.append(self._convert(loader))
else:
- parser.get()
- parser.get()
+ loader.get_event()
+ loader.get_event()
if len(node1) == 1:
node1 = node1[0]
self.failUnlessEqual(node1, node2)
@@ -28,34 +28,34 @@ class TestStructure(test_appliance.TestAppliance):
print "NODE2:", node2
raise
- def _convert(self, parser):
- if parser.check(ScalarEvent):
- event = parser.get()
+ def _convert(self, loader):
+ if loader.check_event(ScalarEvent):
+ event = loader.get_event()
if event.tag or event.anchor or event.value:
return True
else:
return None
- elif parser.check(SequenceStartEvent):
- parser.get()
+ elif loader.check_event(SequenceStartEvent):
+ loader.get_event()
sequence = []
- while not parser.check(SequenceEndEvent):
- sequence.append(self._convert(parser))
- parser.get()
+ while not loader.check_event(SequenceEndEvent):
+ sequence.append(self._convert(loader))
+ loader.get_event()
return sequence
- elif parser.check(MappingStartEvent):
- parser.get()
+ elif loader.check_event(MappingStartEvent):
+ loader.get_event()
mapping = []
- while not parser.check(MappingEndEvent):
- key = self._convert(parser)
- value = self._convert(parser)
+ while not loader.check_event(MappingEndEvent):
+ key = self._convert(loader)
+ value = self._convert(loader)
mapping.append((key, value))
- parser.get()
+ loader.get_event()
return mapping
- elif parser.check(AliasEvent):
- parser.get()
+ elif loader.check_event(AliasEvent):
+ loader.get_event()
return '*'
else:
- parser.get()
+ loader.get_event()
return '?'
TestStructure.add_tests('testStructure', '.data', '.structure')
@@ -66,10 +66,8 @@ class TestParser(test_appliance.TestAppliance):
events1 = None
events2 = None
try:
- parser = Parser(Scanner(Reader(file(data_filename, 'rb'))))
- events1 = list(iter(parser))
- canonical = test_appliance.CanonicalParser(file(canonical_filename, 'rb').read())
- events2 = canonical.parse()
+ events1 = list(parse(file(data_filename, 'rb')))
+ events2 = list(test_appliance.canonical_parse(file(canonical_filename, 'rb')))
self._compare(events1, events2)
except:
print
@@ -105,12 +103,8 @@ class TestResolver(test_appliance.TestAppliance):
nodes1 = None
nodes2 = None
try:
- resolver1 = Resolver(Composer(Parser(Scanner(Reader(file(data_filename, 'rb'))))))
- nodes1 = list(iter(resolver1))
- canonical = test_appliance.CanonicalParser(file(canonical_filename, 'rb').read())
- canonical.parse()
- resolver2 = Resolver(Composer(canonical))
- nodes2 = list(iter(resolver2))
+ nodes1 = list(compose_all(file(data_filename, 'rb')))
+ nodes2 = list(test_appliance.canonical_compose_all(file(canonical_filename, 'rb')))
self.failUnlessEqual(len(nodes1), len(nodes2))
for node1, node2 in zip(nodes1, nodes2):
self._compare(node1, node2)
@@ -147,7 +141,7 @@ class TestResolver(test_appliance.TestAppliance):
TestResolver.add_tests('testResolver', '.data', '.canonical')
-class MyConstructor(Constructor):
+class MyConstructor:
def construct_sequence(self, node):
return tuple(Constructor.construct_sequence(self, node))
@@ -157,29 +151,34 @@ class MyConstructor(Constructor):
pairs.sort()
return pairs
-MyConstructor.add_constructor(None, MyConstructor.construct_scalar)
+ def construct_undefined(self, node):
+ return self.construct_scalar(node)
+
+class MyLoader(MyConstructor, Loader):
+ pass
+MyLoader.add_constructor(None, MyLoader.construct_undefined)
+
+class MyCanonicalLoader(MyConstructor, test_appliance.CanonicalLoader):
+ pass
+MyCanonicalLoader.add_constructor(None, MyCanonicalLoader.construct_undefined)
class TestConstructor(test_appliance.TestAppliance):
def _testConstructor(self, test_name, data_filename, canonical_filename):
- natives1 = None
- natives2 = None
+ data1 = None
+ data2 = None
try:
- constructor1 = MyConstructor(Resolver(Composer(Parser(Scanner(Reader(file(data_filename, 'rb')))))))
- natives1 = list(iter(constructor1))
- canonical = test_appliance.CanonicalParser(file(canonical_filename, 'rb').read())
- canonical.parse()
- constructor2 = MyConstructor(Resolver(Composer(canonical)))
- natives2 = list(iter(constructor2))
- self.failUnlessEqual(natives1, natives2)
+ data1 = list(load_all(file(data_filename, 'rb'), Loader=MyLoader))
+ data2 = list(load_all(file(canonical_filename, 'rb'), Loader=MyCanonicalLoader))
+ self.failUnlessEqual(data1, data2)
except:
print
print "DATA1:"
print file(data_filename, 'rb').read()
print "DATA2:"
print file(canonical_filename, 'rb').read()
- print "NATIVES1:", natives1
- print "NATIVES2:", natives2
+ print "NATIVES1:", data1
+ print "NATIVES2:", data2
raise
TestConstructor.add_tests('testConstructor', '.data', '.canonical')
@@ -190,10 +189,8 @@ class TestParserOnCanonical(test_appliance.TestAppliance):
events1 = None
events2 = None
try:
- parser = Parser(Scanner(Reader(file(canonical_filename, 'rb'))))
- events1 = list(iter(parser))
- canonical = test_appliance.CanonicalParser(file(canonical_filename, 'rb').read())
- events2 = canonical.parse()
+ events1 = list(parse(file(canonical_filename, 'rb')))
+ events2 = list(test_appliance.canonical_parse(file(canonical_filename, 'rb')))
self._compare(events1, events2)
except:
print
diff --git a/tests/test_tokens.py b/tests/test_tokens.py
index 38026d5..73d07b3 100644
--- a/tests/test_tokens.py
+++ b/tests/test_tokens.py
@@ -1,9 +1,7 @@
import test_appliance
-from yaml.reader import *
-from yaml.tokens import *
-from yaml.scanner import *
+from yaml import *
class TestTokens(test_appliance.TestAppliance):
@@ -51,9 +49,8 @@ class TestTokens(test_appliance.TestAppliance):
tokens1 = None
tokens2 = file(tokens_filename, 'rb').read().split()
try:
- scanner = Scanner(Reader(file(data_filename, 'rb')))
tokens1 = []
- for token in scanner:
+ for token in scan(file(data_filename, 'rb')):
if not isinstance(token, (StreamStartToken, StreamEndToken)):
tokens1.append(token)
tokens1 = [self.replaces[t.__class__] for t in tokens1]
@@ -74,9 +71,8 @@ class TestScanner(test_appliance.TestAppliance):
for filename in [canonical_filename, data_filename]:
tokens = None
try:
- scanner = Scanner(Reader(file(filename, 'rb')))
tokens = []
- for token in scanner:
+ for token in scan(file(filename, 'rb')):
if not isinstance(token, (StreamStartToken, StreamEndToken)):
tokens.append(token.__class__.__name__)
except: