diff options
author | xi <xi@18f92427-320e-0410-9341-c67f048884a3> | 2006-04-15 23:54:52 +0000 |
---|---|---|
committer | xi <xi@18f92427-320e-0410-9341-c67f048884a3> | 2006-04-15 23:54:52 +0000 |
commit | 6d52fd5c231b9810da0e68d0e24f954803229357 (patch) | |
tree | 01a40354d0f9dda749f4601b6a21e1e8b85ebc2e | |
parent | 5b3ffc5848ec0b8c5555fc66e6f67b9baab4739a (diff) | |
download | pyyaml-6d52fd5c231b9810da0e68d0e24f954803229357.tar.gz |
Major refactoring.
git-svn-id: http://svn.pyyaml.org/pyyaml/trunk@136 18f92427-320e-0410-9341-c67f048884a3
31 files changed, 1622 insertions, 1226 deletions
diff --git a/examples/yaml-hl/yaml_hl.py b/examples/yaml-hl/yaml_hl.py index dd81b3f..0801521 100755 --- a/examples/yaml-hl/yaml_hl.py +++ b/examples/yaml-hl/yaml_hl.py @@ -2,6 +2,14 @@ import yaml, codecs, sys, optparse + + +yaml.add_resolver(u'!Config', []) +yaml.add_resolver(u'!TokensConfig', [u'tokens']) +yaml.add_resolver(u'!EventsConfig', [u'events']) +yaml.add_resolver(u'!StartEndConfig', [u'tokens', None]) +yaml.add_resolver(u'!StartEndConfig', [u'events', None]) + class YAMLHighlight: def __init__(self, config): diff --git a/lib/yaml/__init__.py b/lib/yaml/__init__.py index e6a3526..15fae85 100644 --- a/lib/yaml/__init__.py +++ b/lib/yaml/__init__.py @@ -5,7 +5,6 @@ from reader import * from scanner import * from parser import * from composer import * -from resolver import * from constructor import * from emitter import * @@ -18,87 +17,257 @@ from tokens import * from events import * from nodes import * -from yaml_object import * - -def parse(data, Reader=Reader, Scanner=Scanner, Parser=Parser): - reader = Reader(data) - scanner = Scanner(reader) - parser = Parser(scanner) - return parser - -def load_all(data, Reader=Reader, Scanner=Scanner, Parser=Parser, - Composer=Composer, Resolver=Resolver, Constructor=Constructor): - reader = Reader(data) - scanner = Scanner(reader) - parser = Parser(scanner) - composer = Composer(parser) - resolver = Resolver(composer) - constructor = Constructor(resolver) - return constructor - -def safe_load_all(data, Reader=Reader, Scanner=Scanner, Parser=Parser, - Composer=Composer, Resolver=Resolver, Constructor=SafeConstructor): - return load_all(data, Reader, Scanner, Parser, Composer, Resolver, - Constructor) - -def load(data, *args, **kwds): - for document in load_all(data, *args, **kwds): - return document - -def safe_load(data, *args, **kwds): - for document in safe_load_all(data, *args, **kwds): - return document - -def emit(events, writer=None, Emitter=Emitter): - if writer is None: +from loader import * +from dumper import * + +def scan(stream, Loader=Loader): + """ + Scan a YAML stream and produce scanning tokens. + """ + loader = Loader(stream) + while loader.check_token(): + yield loader.get_token() + +def parse(stream, Loader=Loader): + """ + Parse a YAML stream and produce parsing events. + """ + loader = Loader(stream) + while loader.check_event(): + yield loader.get_event() + +def compose(stream, Loader=Loader): + """ + Parse the first YAML document in a stream + and produce the corresponding representation tree. + """ + loader = Loader(stream) + if loader.check_node(): + return loader.get_node() + +def compose_all(stream, Loader=Loader): + """ + Parse all YAML documents in a stream + and produce corresponsing representation trees. + """ + loader = Loader(stream) + while loader.check_node(): + yield loader.get_node() + +def load_all(stream, Loader=Loader): + """ + Parse all YAML documents in a stream + and produce corresponding Python objects. + """ + loader = Loader(stream) + while loader.check_data(): + yield loader.get_data() + +def load(stream, Loader=Loader): + """ + Parse the first YAML document in a stream + and produce the corresponding Python object. + """ + loader = Loader(stream) + if loader.check_data(): + return loader.get_data() + +def safe_load_all(stream): + """ + Parse all YAML documents in a stream + and produce corresponding Python objects. + Resolve only basic YAML tags. + """ + return load_all(stream, SafeLoader) + +def safe_load(stream): + """ + Parse the first YAML document in a stream + and produce the corresponding Python object. + Resolve only basic YAML tags. + """ + return load(stream, SafeLoader) + +def emit(events, stream=None, Dumper=Dumper, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None): + """ + Emit YAML parsing events into a stream. + If stream is None, return the produced string instead. + """ + getvalue = None + if stream is None: try: from cStringIO import StringIO except ImportError: from StringIO import StringIO - writer = StringIO() - return_value = True - else: - return_value = False - emitter = Emitter(writer) + stream = StringIO() + getvalue = stream.getvalue + dumper = Dumper(stream, canonical=canonical, indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) for event in events: - emitter.emit(event) - if return_value: - return writer.getvalue() - -def dump_all(natives, writer=None, Emitter=Emitter, - Serializer=Serializer, Representer=Representer, - encoding='utf-8', line_break=None, canonical=None, - indent=None, width=None, allow_unicode=None): - if writer is None: + dumper.emit(event) + if getvalue: + return getvalue() + +def serialize_all(nodes, stream=None, Dumper=Dumper, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding='utf-8', explicit_start=None, explicit_end=None, + version=None, tags=None): + """ + Serialize a sequence of representation trees into a YAML stream. + If stream is None, return the produced string instead. + """ + getvalue = None + if stream is None: + try: + from cStringIO import StringIO + except ImportError: + from StringIO import StringIO + stream = StringIO() + getvalue = stream.getvalue + dumper = Dumper(stream, canonical=canonical, indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break, + encoding=encoding, version=version, tags=tags, + explicit_start=explicit_start, explicit_end=explicit_end) + dumper.open() + for node in nodes: + dumper.serialize(node) + dumper.close() + if getvalue: + return getvalue() + +def serialize(node, stream=None, Dumper=Dumper, **kwds): + """ + Serialize a representation tree into a YAML stream. + If stream is None, return the produced string instead. + """ + return serialize_all([node], stream, Dumper=Dumper, **kwds) + +def dump_all(documents, stream=None, Dumper=Dumper, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding='utf-8', explicit_start=None, explicit_end=None, + version=None, tags=None): + """ + Serialize a sequence of Python objects into a YAML stream. + If stream is None, return the produced string instead. + """ + getvalue = None + if stream is None: try: from cStringIO import StringIO except ImportError: from StringIO import StringIO - writer = StringIO() - return_value = True - else: - return_value = False - emitter = Emitter(writer) - serializer = Serializer(emitter, encoding=encoding, line_break=line_break, - canonical=canonical, indent=indent, width=width, - allow_unicode=allow_unicode) - representer = Representer(serializer) - for native in natives: - representer.represent(native) - representer.close() - if return_value: - return writer.getvalue() - -def safe_dump_all(natives, writer=None, Emitter=Emitter, - Serializer=Serializer, Representer=SafeRepresenter, - encoding='utf-8', line_break=None, canonical=None, - indent=None, width=None, allow_unicode=None): - return dump_all(natives, writer, Emitter, Serializer, Representer, - encoding, line_break, canonical, indent, width, allow_unicode) - -def dump(native, *args, **kwds): - return dump_all([native], *args, **kwds) - -def safe_dump(native, *args, **kwds): - return safe_dump_all([native], *args, **kwds) + stream = StringIO() + getvalue = stream.getvalue + dumper = Dumper(stream, canonical=canonical, indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break, + encoding=encoding, version=version, tags=tags, + explicit_start=explicit_start, explicit_end=explicit_end) + dumper.open() + for data in documents: + dumper.represent(data) + dumper.close() + if getvalue: + return getvalue() + +def dump(data, stream=None, Dumper=Dumper, **kwds): + """ + Serialize a Python object into a YAML stream. + If stream is None, return the produced string instead. + """ + return dump_all([data], stream, Dumper=Dumper, **kwds) + +def safe_dump_all(documents, stream=None, **kwds): + """ + Serialize a sequence of Python objects into a YAML stream. + Produce only basic YAML tags. + If stream is None, return the produced string instead. + """ + return dump_all(documents, stream, Dumper=SafeDumper, **kwds) + +def safe_dump(data, stream=None, **kwds): + """ + Serialize a Python object into a YAML stream. + Produce only basic YAML tags. + If stream is None, return the produced string instead. + """ + return dump_all([data], stream, Dumper=SafeDumper, **kwds) + +def add_detector(tag, regexp, first=None, Loader=Loader, Dumper=Dumper): + """ + Add an implicit scalar detector. + If an implicit scalar value matches the given regexp, + the corresponding tag is assigned to the scalar. + first is a sequence of possible initial characters or None. + """ + Loader.add_detector(tag, regexp, first) + Dumper.add_detector(tag, regexp, first) + +def add_resolver(tag, path, Loader=Loader): + """ + Add a path based resolver for the given tag. + A path is a list of keys that forms a path + to a node in the representation tree. + Keys can be string values, integers, or None. + """ + Loader.add_resolver(tag, path) + +def add_constructor(tag, constructor, Loader=Loader): + """ + Add a constructor for the given tag. + Constructor is a function that accepts a Loader instance + and a node object and produces the corresponding Python object. + """ + Loader.add_constructor(tag, constructor) + +def add_multi_constructor(tag_prefix, multi_constructor, Loader=Loader): + """ + Add a multi-constructor for the given tag prefix. + Multi-constructor is called for a node if its tag starts with tag_prefix. + Multi-constructor accepts a Loader instance, a tag suffix, + and a node object and produces the corresponding Python object. + """ + Loader.add_multi_constructor(tag_prefix, multi_constructor) + +class YAMLObjectMetaclass(type): + """ + The metaclass for YAMLObject. + """ + def __init__(cls, name, bases, kwds): + super(YAMLObjectMetaclass, cls).__init__(name, bases, kwds) + if 'yaml_tag' in kwds and kwds['yaml_tag'] is not None: + cls.yaml_loader.add_constructor(cls.yaml_tag, cls.from_yaml) + cls.yaml_dumper.add_representer(cls, cls.to_yaml) + +class YAMLObject(object): + """ + An object that can dump itself to a YAML stream + and load itself from a YAML stream. + """ + + __metaclass__ = YAMLObjectMetaclass + + yaml_loader = Loader + yaml_dumper = Dumper + + yaml_tag = None + yaml_flow_style = None + + def from_yaml(cls, loader, node): + """ + Convert a representation node to a Python object. + """ + return loader.construct_yaml_object(node, cls) + from_yaml = classmethod(from_yaml) + + def to_yaml(cls, dumper, data): + """ + Convert a Python object to a representation node. + """ + return dumper.represent_yaml_object(cls.yaml_tag, data, cls, + flow_style=cls.yaml_flow_style) + to_yaml = classmethod(to_yaml) diff --git a/lib/yaml/composer.py b/lib/yaml/composer.py index bb26f36..4805091 100644 --- a/lib/yaml/composer.py +++ b/lib/yaml/composer.py @@ -1,5 +1,5 @@ -__all__ = ['Composer', 'ComposerError'] +__all__ = ['BaseComposer', 'Composer', 'ComposerError'] from error import MarkedYAMLError from events import * @@ -8,48 +8,85 @@ from nodes import * class ComposerError(MarkedYAMLError): pass -class Composer: +class BaseComposer: - def __init__(self, parser): - self.parser = parser + yaml_resolvers = {} + + def __init__(self): self.all_anchors = {} self.complete_anchors = {} + self.resolver_tags = [] + self.resolver_paths = [] - # Drop the STREAM-START event. - self.parser.get() - - def check(self): + def check_node(self): # If there are more documents available? - return not self.parser.check(StreamEndEvent) + return not self.check_event(StreamEndEvent) - def get(self): + def get_node(self): # Get the root node of the next document. - if not self.parser.check(StreamEndEvent): + if not self.check_event(StreamEndEvent): return self.compose_document() def __iter__(self): # Iterator protocol. - while not self.parser.check(StreamEndEvent): + while not self.check_event(StreamEndEvent): yield self.compose_document() def compose_document(self): + # Drop the STREAM-START event. + if self.check_event(StreamStartEvent): + self.get_event() + # Drop the DOCUMENT-START event. - self.parser.get() + self.get_event() # Compose the root node. - node = self.compose_node() + node = self.compose_node([]) # Drop the DOCUMENT-END event. - self.parser.get() + self.get_event() self.all_anchors = {} self.complete_anchors = {} + self.resolver_tags = [] + self.resolver_paths = [] return node - def compose_node(self): - if self.parser.check(AliasEvent): - event = self.parser.get() + def increase_resolver_depth(self, path): + depth = len(path) + tag = None + paths = [] + if not depth: + for resolver_path in self.yaml_resolvers.keys(): + if resolver_path: + paths.append(resolver_path) + else: + tag = self.yaml_resolvers[resolver_path] + else: + base, index = path[-1] + if isinstance(index, ScalarNode) \ + and index.tag == self.DEFAULT_SCALAR_TAG: + index = index.value + elif isinstance(index, Node): + index = None + for resolver_path in self.resolver_paths[-1]: + resolver_index = resolver_path[depth-1] + if resolver_index is None or resolver_index == index: + if len(resolver_index) > depth: + paths.append(resolver_path) + else: + tag = self.yaml_resolvers[resolver_path] + self.resolver_tags.append(tag) + self.resolver_paths.append(paths) + + def decrease_resolver_depth(self): + del self.resolver_tags[-1] + del self.resolver_paths[-1] + + def compose_node(self, path): + if self.check_event(AliasEvent): + event = self.get_event() anchor = event.anchor if anchor not in self.all_anchors: raise ComposerError(None, None, "found undefined alias %r" @@ -61,7 +98,8 @@ class Composer: "found recursive anchor %r" % anchor.encode('utf-8'), event.start_mark) return self.complete_anchors[anchor] - event = self.parser.peek() + self.increase_resolver_depth(path) + event = self.peek_event() anchor = event.anchor if anchor is not None: if anchor in self.all_anchors: @@ -69,44 +107,84 @@ class Composer: % anchor.encode('utf-8'), self.all_anchors[anchor].start_mark, "second occurence", event.start_mark) self.all_anchors[anchor] = event - if self.parser.check(ScalarEvent): - node = self.compose_scalar_node() - elif self.parser.check(SequenceStartEvent): - node = self.compose_sequence_node() - elif self.parser.check(MappingStartEvent): - node = self.compose_mapping_node() + if self.check_event(ScalarEvent): + node = self.compose_scalar_node(path) + elif self.check_event(SequenceStartEvent): + node = self.compose_sequence_node(path) + elif self.check_event(MappingStartEvent): + node = self.compose_mapping_node(path) if anchor is not None: self.complete_anchors[anchor] = node + self.decrease_resolver_depth() return node - def compose_scalar_node(self): - event = self.parser.get() - return ScalarNode(event.tag, event.value, event.implicit, + def compose_scalar_node(self, path): + event = self.get_event() + tag = self.resolve_scalar(path, event.tag, event.implicit, event.value) + return ScalarNode(tag, event.value, event.start_mark, event.end_mark, style=event.style) - def compose_sequence_node(self): - start_event = self.parser.get() - value = [] - while not self.parser.check(SequenceEndEvent): - value.append(self.compose_node()) - end_event = self.parser.get() - return SequenceNode(start_event.tag, value, - start_event.start_mark, end_event.end_mark, + def compose_sequence_node(self, path): + start_event = self.get_event() + tag = self.resolve_sequence(path, start_event.tag) + node = SequenceNode(tag, [], + start_event.start_mark, None, flow_style=start_event.flow_style) + index = 0 + while not self.check_event(SequenceEndEvent): + node.value.append(self.compose_node(path+[(node, index)])) + index += 1 + end_event = self.get_event() + node.end_mark = end_event.end_mark + return node - def compose_mapping_node(self): - start_event = self.parser.get() - value = {} - while not self.parser.check(MappingEndEvent): - key_event = self.parser.peek() - item_key = self.compose_node() - item_value = self.compose_node() - if item_key in value: + def compose_mapping_node(self, path): + start_event = self.get_event() + tag = self.resolve_mapping(path, start_event.tag) + node = MappingNode(tag, {}, + start_event.start_mark, None, + flow_style=start_event.flow_style) + while not self.check_event(MappingEndEvent): + key_event = self.peek_event() + item_key = self.compose_node(path+[(node, None)]) + item_value = self.compose_node(path+[(node, item_key)]) + if item_key in node.value: raise ComposerError("while composing a mapping", start_event.start_mark, "found duplicate key", key_event.start_mark) - value[item_key] = item_value - end_event = self.parser.get() - return MappingNode(start_event.tag, value, - start_event.start_mark, end_event.end_mark, - flow_style=start_event.flow_style) + node.value[item_key] = item_value + end_event = self.get_event() + node.end_mark = end_event.end_mark + return node + + def resolve_scalar(self, path, tag, implicit, value): + if implicit: + tag = self.detect(value) + if tag is None and self.resolver_tags[-1]: + tag = self.resolver_tags[-1] + if tag is None or tag == u'!': + tag = self.DEFAULT_SCALAR_TAG + return tag + + def resolve_sequence(self, path, tag): + if tag is None and self.resolver_tags[-1]: + tag = self.resolver_tags[-1] + if tag is None or tag == u'!': + tag = self.DEFAULT_SEQUENCE_TAG + return tag + + def resolve_mapping(self, path, tag): + if tag is None and self.resolver_tags[-1]: + tag = self.resolver_tags[-1] + if tag is None or tag == u'!': + tag = self.DEFAULT_MAPPING_TAG + return tag + + def add_resolver(self, tag, path): + if not 'yaml_resolvers' in cls.__dict__: + cls.yaml_resolvers = cls.yaml_resolvers.copy() + cls.yaml_resolvers[tuple(path)] = tag + add_resolver = classmethod(add_resolver) + +class Composer(BaseComposer): + pass diff --git a/lib/yaml/constructor.py b/lib/yaml/constructor.py index 88a82f3..9fa9085 100644 --- a/lib/yaml/constructor.py +++ b/lib/yaml/constructor.py @@ -4,6 +4,7 @@ __all__ = ['BaseConstructor', 'SafeConstructor', 'Constructor', from error import * from nodes import * +from composer import * try: import datetime @@ -21,46 +22,62 @@ import binascii, re class ConstructorError(MarkedYAMLError): pass -class BaseConstructor: +class BaseConstructor(Composer): - def __init__(self, resolver): - self.resolver = resolver + yaml_constructors = {} + yaml_multi_constructors = {} + + def __init__(self): self.constructed_objects = {} - def check(self): + def check_data(self): # If there are more documents available? - return self.resolver.check() + return self.check_node() - def get(self): + def get_data(self): # Construct and return the next document. - if self.resolver.check(): - return self.construct_document(self.resolver.get()) + if self.check_node(): + return self.construct_document(self.get_node()) def __iter__(self): # Iterator protocol. - while self.resolver.check(): - yield self.construct_document(self.resolver.get()) + while self.check_node(): + yield self.construct_document(self.get_node()) def construct_document(self, node): - native = self.construct_object(node) + data = self.construct_object(node) self.constructed_objects = {} - return native + return data def construct_object(self, node): if node in self.constructed_objects: return self.constructed_objects[node] + constructor = None if node.tag in self.yaml_constructors: - native = self.yaml_constructors[node.tag](self, node) - elif None in self.yaml_constructors: - native = self.yaml_constructors[None](self, node) - elif isinstance(node, ScalarNode): - native = self.construct_scalar(node) - elif isinstance(node, SequenceNode): - native = self.construct_sequence(node) - elif isinstance(node, MappingNode): - native = self.construct_mapping(node) - self.constructed_objects[node] = native - return native + constructor = lambda node: self.yaml_constructors[node.tag](self, node) + else: + for tag_prefix in self.yaml_multi_constructors: + if node.tag.startswith(tag_prefix): + tag_suffix = node.tag[len(tag_prefix):] + constructor = lambda node: \ + self.yaml_multi_constructors[tag_prefix](self, tag_suffix, node) + break + else: + if None in self.yaml_multi_constructors: + constructor = lambda node: \ + self.yaml_multi_constructors[None](self, node.tag, node) + elif None in self.yaml_constructors: + constructor = lambda node: \ + self.yaml_constructors[None](self, node) + elif isinstance(node, ScalarNode): + constructor = self.construct_scalar + elif isinstance(node, SequenceNode): + constructor = self.construct_sequence + elif isinstance(node, MappingNode): + constructor = self.construct_mapping + data = constructor(node) + self.constructed_objects[node] = data + return data def construct_scalar(self, node): if not isinstance(node, ScalarNode): @@ -152,7 +169,11 @@ class BaseConstructor: cls.yaml_constructors[tag] = constructor add_constructor = classmethod(add_constructor) - yaml_constructors = {} + def add_multi_constructor(cls, tag_prefix, multi_constructor): + if not 'yaml_multi_constructors' in cls.__dict__: + cls.yaml_multi_constructors = cls.yaml_multi_constructors.copy() + cls.yaml_multi_constructors[tag_prefix] = multi_constructor + add_multi_constructor = classmethod(add_multi_constructor) class SafeConstructor(BaseConstructor): @@ -327,6 +348,18 @@ class SafeConstructor(BaseConstructor): def construct_yaml_map(self, node): return self.construct_mapping(node) + def construct_yaml_object(self, node, cls): + mapping = self.construct_mapping(node) + state = {} + for key in mapping: + state[key.replace('-', '_')] = mapping[key] + data = cls.__new__(cls) + if hasattr(data, '__setstate__'): + data.__setstate__(mapping) + else: + data.__dict__.update(mapping) + return data + def construct_undefined(self, node): raise ConstructorError(None, None, "could not determine a constructor for the tag %r" % node.tag.encode('utf-8'), diff --git a/lib/yaml/detector.py b/lib/yaml/detector.py index 30b180e..363783e 100644 --- a/lib/yaml/detector.py +++ b/lib/yaml/detector.py @@ -5,8 +5,15 @@ import re class BaseDetector: + DEFAULT_SCALAR_TAG = u'tag:yaml.org,2002:str' + DEFAULT_SEQUENCE_TAG = u'tag:yaml.org,2002:seq' + DEFAULT_MAPPING_TAG = u'tag:yaml.org,2002:map' + yaml_detectors = {} + def __init__(self): + pass + def add_detector(cls, tag, regexp, first): if not 'yaml_detectors' in cls.__dict__: cls.yaml_detectors = cls.yaml_detectors.copy() diff --git a/lib/yaml/dumper.py b/lib/yaml/dumper.py new file mode 100644 index 0000000..0a7c2d9 --- /dev/null +++ b/lib/yaml/dumper.py @@ -0,0 +1,56 @@ + +__all__ = ['BaseDumper', 'SafeDumper', 'Dumper'] + +from emitter import * +from serializer import * +from representer import * +from detector import * + +class BaseDumper(Emitter, Serializer, BaseRepresenter, BaseDetector): + + def __init__(self, stream, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + Emitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, + allow_uncode=allow_unicode, line_break=line_break) + Serializer.__init__(self, encoding=encoding, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + Representer.__init__(self) + Detector.__init__(self) + +class SafeDumper(Emitter, Serializer, SafeRepresenter, Detector): + + def __init__(self, stream, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + Emitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) + Serializer.__init__(self, encoding=encoding, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + SafeRepresenter.__init__(self) + Detector.__init__(self) + +class Dumper(Emitter, Serializer, Representer, Detector): + + def __init__(self, stream, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + Emitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) + Serializer.__init__(self, encoding=encoding, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + Representer.__init__(self) + Detector.__init__(self) + diff --git a/lib/yaml/emitter.py b/lib/yaml/emitter.py index 69a3c74..985ce63 100644 --- a/lib/yaml/emitter.py +++ b/lib/yaml/emitter.py @@ -11,13 +11,16 @@ __all__ = ['Emitter', 'EmitterError'] from error import YAMLError from events import * +import re + class EmitterError(YAMLError): pass class ScalarAnalysis: def __init__(self, scalar, empty, multiline, allow_flow_plain, allow_block_plain, - allow_single_quoted, allow_double_quoted, allow_block): + allow_single_quoted, allow_double_quoted, + allow_block): self.scalar = scalar self.empty = empty self.multiline = multiline @@ -34,12 +37,13 @@ class Emitter: u'tag:yaml.org,2002:' : u'!!', } - def __init__(self, writer): + def __init__(self, stream, canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None): - # The writer should have the methods `write` and possibly `flush`. - self.writer = writer + # The stream should have the methods `write` and possibly `flush`. + self.stream = stream - # Encoding is provided by STREAM-START. + # Encoding can be overriden by STREAM-START. self.encoding = None # Emitter is a state machine with a stack of states to handle nested @@ -75,18 +79,28 @@ class Emitter: self.indention = True # Formatting details. - self.canonical = False - self.allow_unicode = False - self.best_line_break = u'\n' + self.canonical = canonical + self.allow_unicode = allow_unicode self.best_indent = 2 + if indent and 1 < indent < 10: + self.best_indent = indent self.best_width = 80 + if width and width > self.best_indent*2: + self.best_width = width + self.best_line_break = u'\n' + if line_break in [u'\r', u'\n', u'\r\n']: + self.best_line_break = line_break + + # Tag prefixes. self.tag_prefixes = None - # Analyses cache. - self.anchor_text = None - self.tag_text = None - self.scalar_analysis = None - self.scalar_style = None + # Prepared anchor and tag. + self.prepared_anchor = None + self.prepared_tag = None + + # Scalar analysis and style. + self.analysis = None + self.style = None def emit(self, event): self.events.append(event) @@ -139,15 +153,8 @@ class Emitter: def expect_stream_start(self): if isinstance(self.event, StreamStartEvent): - self.encoding = self.event.encoding - self.canonical = self.event.canonical - self.allow_unicode = self.event.allow_unicode - if self.event.indent and self.event.indent > 1: - self.best_indent = self.event.indent - if self.event.width and self.event.width > self.best_indent: - self.best_width = self.event.width - if self.event.line_break in [u'\r', u'\n', u'\r\n']: - self.best_line_break = self.event.line_break + if self.event.encoding: + self.encoding = self.event.encoding self.write_stream_start() self.state = self.expect_first_document_start else: @@ -165,7 +172,7 @@ class Emitter: def expect_document_start(self, first=False): if isinstance(self.event, DocumentStartEvent): if self.event.version: - version_text = self.analyze_version(self.event.version) + version_text = self.prepare_version(self.event.version) self.write_version_directive(version_text) self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy() if self.event.tags: @@ -174,8 +181,8 @@ class Emitter: for handle in handles: prefix = self.event.tags[handle] self.tag_prefixes[prefix] = handle - handle_text = self.analyze_tag_handle(handle) - prefix_text = self.analyze_tag_prefix(prefix) + handle_text = self.prepare_tag_handle(handle) + prefix_text = self.prepare_tag_prefix(prefix) self.write_tag_directive(handle_text, prefix_text) implicit = (first and not self.event.explicit and not self.canonical and not self.event.version and not self.event.tags @@ -199,6 +206,7 @@ class Emitter: if self.event.explicit: self.write_indicator(u'...', True) self.write_indent() + self.flush_stream() self.state = self.expect_document_start else: raise EmitterError("expected DocumentEndEvent, but got %s" @@ -418,94 +426,106 @@ class Emitter: def check_simple_key(self): length = 0 if isinstance(self.event, NodeEvent) and self.event.anchor is not None: - if self.anchor_text is None: - self.anchor_text = self.analyze_anchor(self.event.anchor) - length += len(self.anchor_text) + if self.prepared_anchor is None: + self.prepared_anchor = self.prepare_anchor(self.event.anchor) + length += len(self.prepared_anchor) if isinstance(self.event, (ScalarEvent, CollectionStartEvent)) \ and self.event.tag is not None: - if self.tag_text is None: - self.tag_text = self.analyze_tag(self.event.tag) - length += len(self.tag_text) + if self.prepared_tag is None: + self.prepared_tag = self.prepare_tag(self.event.tag) + length += len(self.prepared_tag) if isinstance(self.event, ScalarEvent): - if self.scalar_analysis is None: - self.scalar_analysis = self.analyze_scalar(self.event.value) - length += len(self.scalar_analysis.scalar) + if self.analysis is None: + self.analysis = self.analyze_scalar(self.event.value) + length += len(self.analysis.scalar) return (length < 128 and (isinstance(self.event, AliasEvent) - or (isinstance(self.event, ScalarEvent) and not self.scalar_analysis.multiline) + or (isinstance(self.event, ScalarEvent) + and not self.analysis.empty and not self.analysis.multiline) or self.check_empty_sequence() or self.check_empty_mapping())) # Anchor, Tag, and Scalar processors. def process_anchor(self, indicator): if self.event.anchor is None: + self.prepared_anchor = None return - if self.anchor_text is None: - self.anchor_text = self.analyze_anchor(self.event.anchor) - if self.anchor_text: - self.write_indicator(indicator+self.anchor_text, True) - self.anchor_text = None + if self.prepared_anchor is None: + self.prepared_anchor = self.prepare_anchor(self.event.anchor) + if self.prepared_anchor: + self.write_indicator(indicator+self.prepared_anchor, True) + self.prepared_anchor = None def process_tag(self): - if self.event.tag is None: - return - if isinstance(self.event, ScalarEvent) and self.best_scalar_style() == '': + tag = self.event.tag + if isinstance(self.event, ScalarEvent): + if self.style is None: + self.style = self.choose_scalar_style() + if self.style == '': + self.prepared_tag = None + return + if self.event.implicit and not tag: + tag = u'!' + self.prepared_tag = None + if not tag: + self.prepared_tag = None return - if self.tag_text is None: - self.tag_text = self.analyze_tag(self.event.tag) - if self.tag_text: - self.write_indicator(self.tag_text, True) - self.tag_text = None - - def best_scalar_style(self): - if self.scalar_analysis is None: - self.scalar_analysis = self.analyze_scalar(self.event.value) - if self.canonical: - return '"' - if (self.event.implicit and not self.event.style - and ((self.flow_level and self.scalar_analysis.allow_flow_plain) - or (not self.flow_level and self.scalar_analysis.allow_block_plain)) - and (len(self.scalar_analysis.scalar) > 0 - or (not self.flow_level and not self.simple_key_context))): - return '' - elif self.event.style == '\'' and self.scalar_analysis.allow_single_quoted: - return '\'' - elif self.event.style in ['|', '>'] and not self.flow_level and self.scalar_analysis.allow_block: - return self.event.style - else: + if self.prepared_tag is None: + self.prepared_tag = self.prepare_tag(tag) + if self.prepared_tag: + self.write_indicator(self.prepared_tag, True) + self.prepared_tag = None + + def choose_scalar_style(self): + if self.analysis is None: + self.analysis = self.analyze_scalar(self.event.value) + if self.event.style == '"' or self.canonical: return '"' - return style + if not self.event.style and self.event.implicit: + if (not (self.simple_key_context and + (self.analysis.empty or self.analysis.multiline)) + and (self.flow_level and self.analysis.allow_flow_plain + or (not self.flow_level and self.analysis.allow_block_plain))): + return '' + if self.event.style and self.event.style in '|>': + if not self.flow_level and self.analysis.allow_block: + return self.event.style + if not self.event.style or self.event.style == '\'': + if (self.analysis.allow_single_quoted and + not (self.simple_key_context and self.analysis.multiline)): + return '\'' + return '"' def process_scalar(self): - if self.scalar_analysis is None: - self.scalar_analysis = self.analyze_scalar(self.event.value) - style = self.best_scalar_style() - if self.scalar_analysis.multiline and not self.simple_key_context \ - and style not in ['|', '>']: - self.write_indent() - if style == '"': - self.write_double_quoted(self.scalar_analysis.scalar, - split=(not self.simple_key_context)) - elif style == '\'': - self.write_single_quoted(self.scalar_analysis.scalar, - split=(not self.simple_key_context)) - elif style == '>': - self.write_folded(self.scalar_analysis.scalar) - elif style == '|': - self.write_literal(self.scalar_analysis.scalar) + if self.analysis is None: + self.analysis = self.analyze_scalar(self.event.value) + if self.style is None: + self.style = self.choose_scalar_style() + split = (not self.simple_key_context) + #if self.analysis.multiline and split \ + # and (not self.style or self.style in '\'\"'): + # self.write_indent() + if self.style == '"': + self.write_double_quoted(self.analysis.scalar, split) + elif self.style == '\'': + self.write_single_quoted(self.analysis.scalar, split) + elif self.style == '>': + self.write_folded(self.analysis.scalar) + elif self.style == '|': + self.write_literal(self.analysis.scalar) else: - self.write_plain(self.scalar_analysis.scalar, - split=(not self.simple_key_context)) - self.scalar_analysis = None + self.write_plain(self.analysis.scalar, split) + self.analysis = None + self.style = None # Analyzers. - def analyze_version(self, version): + def prepare_version(self, version): major, minor = version if major != 1: raise EmitterError("unsupported YAML version: %d.%d" % (major, minor)) return u'%d.%d' % (major, minor) - def analyze_tag_handle(self, handle): + def prepare_tag_handle(self, handle): if not handle: raise EmitterError("tag handle must not be empty") if handle[0] != u'!' or handle[-1] != u'!': @@ -518,7 +538,7 @@ class Emitter: % (ch.encode('utf-8'), handle.encode('utf-8'))) return handle - def analyze_tag_prefix(self, prefix): + def prepare_tag_prefix(self, prefix): if not prefix: raise EmitterError("tag prefix must not be empty") chunks = [] @@ -541,9 +561,11 @@ class Emitter: chunks.append(prefix[start:end]) return u''.join(chunks) - def analyze_tag(self, tag): + def prepare_tag(self, tag): if not tag: raise EmitterError("tag must not be empty") + if tag == u'!': + return tag handle = None suffix = tag for prefix in self.tag_prefixes: @@ -574,7 +596,7 @@ class Emitter: else: return u'!<%s>' % suffix_text - def analyze_anchor(self, anchor): + def prepare_anchor(self, anchor): if not anchor: raise EmitterError("anchor must not be empty") for ch in anchor: @@ -584,143 +606,221 @@ class Emitter: % (ch.encode('utf-8'), text.encode('utf-8'))) return anchor - def analyze_scalar(self, scalar): # It begs for refactoring. + def analyze_scalar(self, scalar): + + # Empty scalar is a special case. if not scalar: return ScalarAnalysis(scalar=scalar, empty=True, multiline=False, allow_flow_plain=False, allow_block_plain=True, allow_single_quoted=True, allow_double_quoted=True, allow_block=False) - contains_block_indicator = False - contains_flow_indicator = False - contains_line_breaks = False - contains_unicode_characters = False - contains_special_characters = False - contains_inline_spaces = False # non-space space+ non-space - contains_inline_breaks = False # non-space break+ non-space - contains_leading_spaces = False # ^ space+ (non-space | $) - contains_leading_breaks = False # ^ break+ (non-space | $) - contains_trailing_spaces = False # non-space space+ $ - contains_trailing_breaks = False # non-space break+ $ - contains_inline_breaks_spaces = False # non-space break+ space+ non-space - contains_mixed_breaks_spaces = False # anything else + + # Indicators and special characters. + block_indicators = False + flow_indicators = False + line_breaks = False + special_characters = False + + # Whitespaces. + inline_spaces = False # non-space space+ non-space + inline_breaks = False # non-space break+ non-space + leading_spaces = False # ^ space+ (non-space | $) + leading_breaks = False # ^ break+ (non-space | $) + trailing_spaces = False # (^ | non-space) space+ $ + trailing_breaks = False # (^ | non-space) break+ $ + inline_breaks_spaces = False # non-space break+ space+ non-space + mixed_breaks_spaces = False # anything else + + # Check document indicators. if scalar.startswith(u'---') or scalar.startswith(u'...'): - contains_block_indicator = True - contains_flow_indicator = True - first = True - last = (len(scalar) == 1) - preceeded_by_space = False - followed_by_space = (len(scalar) > 1 and + block_indicators = True + flow_indicators = True + + # First character or preceded by a whitespace. + preceeded_by_space = True + + # Last character or followed by a whitespace. + followed_by_space = (len(scalar) == 1 or scalar[1] in u'\0 \t\r\n\x85\u2028\u2029') - spaces = breaks = mixed = leading = False + + # The current series of whitespaces contain plain spaces. + spaces = False + + # The current series of whitespaces contain line breaks. + breaks = False + + # The current series of whitespaces contain a space followed by a + # break. + mixed = False + + # The current series of whitespaces start at the beginning of the + # scalar. + leading = False + index = 0 while index < len(scalar): ch = scalar[index] - if first: + + # Check for indicators. + + if index == 0: + # Leading indicators are special characters. if ch in u'#,[]{}#&*!|>\'\"%@`': - contains_flow_indicator = True - contains_block_indicator = True + flow_indicators = True + block_indicators = True if ch in u'?:': - contains_flow_indicator = True - if followed_by_space or last: - contains_block_indicator = True - if ch == u'-' and (followed_by_space or last): - contains_flow_indicator = True - contains_block_indicator = True + flow_indicators = True + if followed_by_space: + block_indicators = True + if ch == u'-' and followed_by_space: + flow_indicators = True + block_indicators = True else: + # Some indicators cannot appear within a scalar as well. if ch in u',?[]{}': - contains_flow_indicator = True + flow_indicators = True if ch == u':': - contains_flow_indicator = True - if followed_by_space or last: - contains_block_indicator = True - if ch == u'#' and (preceeded_by_space or first): - contains_flow_indicator = True - contains_block_indicator = True + flow_indicators = True + if followed_by_space: + block_indicators = True + if ch == u'#' and preceeded_by_space: + flow_indicators = True + block_indicators = True + + # Check for line breaks, special, and unicode characters. + if ch in u'\n\x85\u2028\u2029': - contains_line_breaks = True + line_breaks = True if not (ch == u'\n' or u'\x20' <= ch <= u'\x7E'): - if ch < u'\x80': - contains_special_characters = True + if ch < u'\x80' or ch == u'\uFEFF': # '\uFEFF' is BOM. + special_characters = True else: - contains_unicode_characters = True - if ch == u' ': - if not spaces and not breaks: - leading = first - spaces = True - elif ch in u'\n\x85\u2028\u2029': - if not spaces and not breaks: - leading = first - breaks = True - if spaces: - mixed = True - if ch not in u' \n\x85\u2028\u2029': + unicode_characters = True + if not self.allow_unicode: + special_characters = True + + # Spaces, line breaks, and how they are mixed. State machine. + + # Start or continue series of whitespaces. + if ch in u' \n\x85\u2028\u2029': + if spaces and breaks: + if ch != u' ': # break+ (space+ break+) => mixed + mixed = True + elif spaces: + if ch != u' ': # (space+ break+) => mixed + breaks = True + mixed = True + elif breaks: + if ch == u' ': # break+ space+ + spaces = True + else: + leading = (index == 0) + if ch == u' ': # space+ + spaces = True + else: # break+ + breaks = True + + # Series of whitespaces ended with a non-space. + elif spaces or breaks: if leading: if spaces and breaks: - contains_mixed_breaks_spaces = True + mixed_breaks_spaces = True elif spaces: - contains_leading_spaces = True + leading_spaces = True elif breaks: - contains_leading_breaks = True + leading_breaks = True else: if mixed: - contains_mixed_break_spaces = True + mixed_breaks_spaces = True elif spaces and breaks: - contains_inline_breaks_spaces = True + inline_breaks_spaces = True elif spaces: - contains_inline_spaces = True + inline_spaces = True elif breaks: - contains_inline_breaks = True + inline_breaks = True spaces = breaks = mixed = leading = False - elif last: + + # Series of whitespaces reach the end. + if (spaces or breaks) and (index == len(scalar)-1): if spaces and breaks: - contains_mixed_break_spaces = True + mixed_breaks_spaces = True elif spaces: + trailing_spaces = True if leading: - contains_leading_spaces = True - else: - contains_trailing_spaces = True + leading_spaces = True elif breaks: + trailing_breaks = True if leading: - contains_leading_breaks = True - else: - contains_trailing_breaks = True + leading_breaks = True + spaces = breaks = mixed = leading = False + + # Prepare for the next character. index += 1 - first = False - last = (index+1 == len(scalar)) preceeded_by_space = (ch in u'\0 \t\r\n\x85\u2028\u2029') - followed_by_space = (index+1 < len(scalar) and + followed_by_space = (index+1 >= len(scalar) or scalar[index+1] in u'\0 \t\r\n\x85\u2028\u2029') - if contains_unicode_characters and not self.allow_unicode: - contains_special_characters = True - allow_flow_plain = not (contains_flow_indicator or contains_special_characters - or contains_leading_spaces or contains_leading_breaks - or contains_trailing_spaces or contains_trailing_breaks - or contains_inline_breaks_spaces or contains_mixed_breaks_spaces) - allow_block_plain = not (contains_block_indicator or contains_special_characters - or contains_leading_spaces or contains_leading_breaks - or contains_trailing_spaces or contains_trailing_breaks - or contains_inline_breaks_spaces or contains_mixed_breaks_spaces) - allow_single_quoted = not (contains_special_characters - or contains_inline_breaks_spaces or contains_mixed_breaks_spaces) + + # Let's decide what styles are allowed. + allow_flow_plain = True + allow_block_plain = True + allow_single_quoted = True allow_double_quoted = True - allow_block = not (contains_special_characters - or contains_leading_spaces or contains_leading_breaks - or contains_trailing_spaces or contains_mixed_breaks_spaces) - return ScalarAnalysis(scalar=scalar, empty=False, multiline=contains_line_breaks, - allow_flow_plain=allow_flow_plain, allow_block_plain=allow_block_plain, - allow_single_quoted=allow_single_quoted, allow_double_quoted=allow_double_quoted, + allow_block = True + + # Leading and trailing whitespace are bad for plain scalars. We also + # do not want to mess with leading whitespaces for block scalars. + if leading_spaces or leading_breaks or trailing_spaces: + allow_flow_plain = allow_block_plain = allow_block = False + + # Trailing breaks are fine for block scalars, but unacceptable for + # plain scalars. + if trailing_breaks: + allow_flow_plain = allow_block_plain = False + + # The combination of (space+ break+) is only acceptable for block + # scalars. + if inline_breaks_spaces: + allow_flow_plain = allow_block_plain = allow_single_quoted = False + + # Mixed spaces and breaks, as well as special character are only + # allowed for double quoted scalars. + if mixed_breaks_spaces or special_characters: + allow_flow_plain = allow_block_plain = \ + allow_single_quoted = allow_block = False + + # We don't emit multiline plain scalars. + if line_breaks: + allow_flow_plain = allow_block_plain = False + + # Flow indicators are forbidden for flow plain scalars. + if flow_indicators: + allow_flow_plain = False + + # Block indicators are forbidden for block plain scalars. + if block_indicators: + allow_block_plain = False + + return ScalarAnalysis(scalar=scalar, + empty=False, multiline=line_breaks, + allow_flow_plain=allow_flow_plain, + allow_block_plain=allow_block_plain, + allow_single_quoted=allow_single_quoted, + allow_double_quoted=allow_double_quoted, allow_block=allow_block) # Writers. + def flush_stream(self): + if hasattr(self.stream, 'flush'): + self.stream.flush() + def write_stream_start(self): # Write BOM if needed. if self.encoding and self.encoding.startswith('utf-16'): - self.writer.write(u'\xFF\xFE'.encode(self.encoding)) + self.stream.write(u'\xFF\xFE'.encode(self.encoding)) def write_stream_end(self): - if hasattr(self.writer, 'flush'): - self.writer.flush() + self.flush_stream() def write_indicator(self, indicator, need_whitespace, whitespace=False, indention=False): @@ -733,7 +833,7 @@ class Emitter: self.column += len(data) if self.encoding: data = data.encode(self.encoding) - self.writer.write(data) + self.stream.write(data) def write_indent(self): indent = self.indent or 0 @@ -746,7 +846,7 @@ class Emitter: self.column = indent if self.encoding: data = data.encode(self.encoding) - self.writer.write(data) + self.stream.write(data) def write_line_break(self, data=None): if data is None: @@ -757,23 +857,23 @@ class Emitter: self.column = 0 if self.encoding: data = data.encode(self.encoding) - self.writer.write(data) + self.stream.write(data) def write_version_directive(self, version_text): data = u'%%YAML %s' % version_text if self.encoding: data = data.encode(self.encoding) - self.writer.write(data) + self.stream.write(data) self.write_line_break() def write_tag_directive(self, handle_text, prefix_text): data = u'%%TAG %s %s' % (handle_text, prefix_text) if self.encoding: data = data.encode(self.encoding) - self.writer.write(data) + self.stream.write(data) self.write_line_break() - # Scalar writers. + # Scalar streams. def write_single_quoted(self, text, split=True): self.write_indicator(u'\'', True) @@ -794,7 +894,7 @@ class Emitter: self.column += len(data) if self.encoding: data = data.encode(self.encoding) - self.writer.write(data) + self.stream.write(data) start = end elif breaks: if ch is None or ch not in u'\n\x85\u2028\u2029': @@ -814,14 +914,14 @@ class Emitter: self.column += len(data) if self.encoding: data = data.encode(self.encoding) - self.writer.write(data) + self.stream.write(data) start = end if ch == u'\'': data = u'\'\'' self.column += 2 if self.encoding: data = data.encode(self.encoding) - self.writer.write(data) + self.stream.write(data) start = end + 1 if ch is not None: spaces = (ch == u' ') @@ -863,7 +963,7 @@ class Emitter: self.column += len(data) if self.encoding: data = data.encode(self.encoding) - self.writer.write(data) + self.stream.write(data) start = end if ch is not None: if ch in self.ESCAPE_REPLACEMENTS: @@ -877,7 +977,7 @@ class Emitter: self.column += len(data) if self.encoding: data = data.encode(self.encoding) - self.writer.write(data) + self.stream.write(data) start = end+1 if 0 < end < len(text)-1 and (ch == u' ' or start >= end) \ and self.column+(end-start) > self.best_width and split: @@ -887,7 +987,7 @@ class Emitter: self.column += len(data) if self.encoding: data = data.encode(self.encoding) - self.writer.write(data) + self.stream.write(data) self.write_indent() self.whitespace = False self.indention = False @@ -896,7 +996,7 @@ class Emitter: self.column += len(data) if self.encoding: data = data.encode(self.encoding) - self.writer.write(data) + self.stream.write(data) end += 1 self.write_indicator(u'"', False) @@ -947,14 +1047,14 @@ class Emitter: self.column += len(data) if self.encoding: data = data.encode(self.encoding) - self.writer.write(data) + self.stream.write(data) start = end else: if ch is None or ch in u' \n\x85\u2028\u2029': data = text[start:end] if self.encoding: data = data.encode(self.encoding) - self.writer.write(data) + self.stream.write(data) if ch is None: self.write_line_break() start = end @@ -988,7 +1088,7 @@ class Emitter: data = text[start:end] if self.encoding: data = data.encode(self.encoding) - self.writer.write(data) + self.stream.write(data) if ch is None: self.write_line_break() start = end @@ -1004,7 +1104,7 @@ class Emitter: self.column += len(data) if self.encoding: data = data.encode(self.encoding) - self.writer.write(data) + self.stream.write(data) self.writespace = False self.indention = False spaces = False @@ -1025,7 +1125,7 @@ class Emitter: self.column += len(data) if self.encoding: data = data.encode(self.encoding) - self.writer.write(data) + self.stream.write(data) start = end elif breaks: if ch not in u'\n\x85\u2028\u2029': @@ -1046,7 +1146,7 @@ class Emitter: self.column += len(data) if self.encoding: data = data.encode(self.encoding) - self.writer.write(data) + self.stream.write(data) start = end if ch is not None: spaces = (ch == u' ') diff --git a/lib/yaml/error.py b/lib/yaml/error.py index a818210..8fa916b 100644 --- a/lib/yaml/error.py +++ b/lib/yaml/error.py @@ -48,20 +48,15 @@ class YAMLError(Exception): class MarkedYAMLError(YAMLError): def __init__(self, context=None, context_mark=None, - problem=None, problem_mark=None): + problem=None, problem_mark=None, note=None): self.context = context self.context_mark = context_mark self.problem = problem self.problem_mark = problem_mark + self.note = note def __str__(self): lines = [] - #for (place, mark) in [(self.context, self.context_mark), - # (self.problem, self.problem_mark)]: - # if place is not None: - # lines.append(place) - # if mark is not None: - # lines.append(str(mark)) if self.context is not None: lines.append(self.context) if self.context_mark is not None \ @@ -74,7 +69,7 @@ class MarkedYAMLError(YAMLError): lines.append(self.problem) if self.problem_mark is not None: lines.append(str(self.problem_mark)) + if self.note is not None: + lines.append(self.note) return '\n'.join(lines) - - diff --git a/lib/yaml/events.py b/lib/yaml/events.py index 325ea9c..eebf955 100644 --- a/lib/yaml/events.py +++ b/lib/yaml/events.py @@ -33,17 +33,10 @@ class CollectionEndEvent(Event): # Implementations. class StreamStartEvent(Event): - def __init__(self, start_mark=None, end_mark=None, - encoding=None, line_break=None, canonical=None, - indent=None, width=None, allow_unicode=None): + def __init__(self, start_mark=None, end_mark=None, encoding=None): self.start_mark = start_mark self.end_mark = end_mark self.encoding = encoding - self.line_break = line_break - self.canonical = canonical - self.indent = indent - self.width = width - self.allow_unicode = allow_unicode class StreamEndEvent(Event): pass @@ -68,14 +61,14 @@ class AliasEvent(NodeEvent): pass class ScalarEvent(NodeEvent): - def __init__(self, anchor, tag, value, start_mark=None, end_mark=None, - implicit=None, style=None): + def __init__(self, anchor, tag, implicit, value, + start_mark=None, end_mark=None, style=None): self.anchor = anchor self.tag = tag + self.implicit = implicit self.value = value self.start_mark = start_mark self.end_mark = end_mark - self.implicit = implicit self.style = style class SequenceStartEvent(CollectionStartEvent): diff --git a/lib/yaml/loader.py b/lib/yaml/loader.py new file mode 100644 index 0000000..f8d8673 --- /dev/null +++ b/lib/yaml/loader.py @@ -0,0 +1,41 @@ + +__all__ = ['BaseLoader', 'SafeLoader', 'Loader'] + +from reader import * +from scanner import * +from parser import * +from composer import * +from constructor import * +from detector import * + +class BaseLoader(Reader, Scanner, Parser, + BaseComposer, BaseConstructor, BaseDetector): + + def __init__(self, stream): + Reader.__init__(self, stream) + Scanner.__init__(self) + Parser.__init__(self) + BaseComposer.__init__(self) + BaseConstructor.__init__(self) + BaseDetector.__init__(self) + +class SafeLoader(Reader, Scanner, Parser, Composer, SafeConstructor, Detector): + + def __init__(self, stream): + Reader.__init__(self, stream) + Scanner.__init__(self) + Parser.__init__(self) + Composer.__init__(self) + SafeConstructor.__init__(self) + Detector.__init__(self) + +class Loader(Reader, Scanner, Parser, Composer, Constructor, Detector): + + def __init__(self, stream): + Reader.__init__(self, stream) + Scanner.__init__(self) + Parser.__init__(self) + Composer.__init__(self) + Constructor.__init__(self) + Detector.__init__(self) + diff --git a/lib/yaml/nodes.py b/lib/yaml/nodes.py index bad4935..1f9f094 100644 --- a/lib/yaml/nodes.py +++ b/lib/yaml/nodes.py @@ -23,11 +23,10 @@ class Node: class ScalarNode(Node): id = 'scalar' - def __init__(self, tag, value, implicit, + def __init__(self, tag, value, start_mark=None, end_mark=None, style=None): self.tag = tag self.value = value - self.implicit = implicit self.start_mark = start_mark self.end_mark = end_mark self.style = style diff --git a/lib/yaml/parser.py b/lib/yaml/parser.py index cf17e8e..38508d2 100644 --- a/lib/yaml/parser.py +++ b/lib/yaml/parser.py @@ -24,23 +24,6 @@ # TODO: support for BOM within a stream. # stream ::= (BOM? implicit_document)? (BOM? explicit_document)* STREAM-END -# Note that there is a slight deviation from the specification. We require a -# non-empty node content if ANCHOR or TAG is specified. This disallow such -# documents as -# -# key: !!str # empty value -# -# This is done to prevent ambiguity in parsing tags and aliases: -# -# { !!perl/YAML::Parser: value } -# -# What is it? Should it be interpreted as -# { ? !<tag:yaml.org,2002:perl/YAML::Parser> '' : value } -# or -# { ? !<tag:yaml.org,2002:perl/YAML::Parser:> value : '' } -# Since we disallow non-empty node content, tags are always followed by spaces -# or line breaks. - # FIRST sets: # stream: { STREAM-START } # explicit_document: { DIRECTIVE DOCUMENT-START } @@ -66,13 +49,14 @@ __all__ = ['Parser', 'ParserError'] from error import MarkedYAMLError from tokens import * from events import * +from scanner import * class ParserError(MarkedYAMLError): pass class Parser: - # Since writing an LL(1) parser is a straightforward task, we do not give - # many comments here. + # Since writing a recursive-descendant parser is a straightforward task, we + # do not give many comments here. # Note that we use Python generators. If you rewrite the parser in another # language, you may replace all 'yield'-s with event handler calls. @@ -81,14 +65,13 @@ class Parser: u'!!': u'tag:yaml.org,2002:', } - def __init__(self, scanner): - self.scanner = scanner + def __init__(self): self.current_event = None self.yaml_version = None self.tag_handles = {} self.event_generator = self.parse_stream() - def check(self, *choices): + def check_event(self, *choices): # Check the type of the next event. if self.current_event is None: try: @@ -96,12 +79,14 @@ class Parser: except StopIteration: pass if self.current_event is not None: + if not choices: + return True for choice in choices: if isinstance(self.current_event, choice): return True return False - def peek(self): + def peek_event(self): # Get the next event. if self.current_event is None: try: @@ -110,7 +95,7 @@ class Parser: pass return self.current_event - def get(self): + def get_event(self): # Get the next event. if self.current_event is None: try: @@ -129,70 +114,70 @@ class Parser: # STREAM-START implicit_document? explicit_document* STREAM-END # Parse start of stream. - token = self.scanner.get() + token = self.get_token() yield StreamStartEvent(token.start_mark, token.end_mark, encoding=token.encoding) # Parse implicit document. - if not self.scanner.check(DirectiveToken, DocumentStartToken, + if not self.check_token(DirectiveToken, DocumentStartToken, StreamEndToken): self.tag_handles = self.DEFAULT_TAGS - token = self.scanner.peek() + token = self.peek_token() start_mark = end_mark = token.start_mark yield DocumentStartEvent(start_mark, end_mark, explicit=False) for event in self.parse_block_node(): yield event - token = self.scanner.peek() + token = self.peek_token() start_mark = end_mark = token.start_mark explicit = False - while self.scanner.check(DocumentEndToken): - token = self.scanner.get() + while self.check_token(DocumentEndToken): + token = self.get_token() end_mark = token.end_mark explicit = True yield DocumentEndEvent(start_mark, end_mark, explicit=explicit) # Parse explicit documents. - while not self.scanner.check(StreamEndToken): - token = self.scanner.peek() + while not self.check_token(StreamEndToken): + token = self.peek_token() start_mark = token.start_mark version, tags = self.process_directives() - if not self.scanner.check(DocumentStartToken): + if not self.check_token(DocumentStartToken): raise ParserError(None, None, "expected '<document start>', but found %r" - % self.scanner.peek().id, - self.scanner.peek().start_mark) - token = self.scanner.get() + % self.peek_token().id, + self.peek_token().start_mark) + token = self.get_token() end_mark = token.end_mark yield DocumentStartEvent(start_mark, end_mark, explicit=True, version=version, tags=tags) - if self.scanner.check(DirectiveToken, + if self.check_token(DirectiveToken, DocumentStartToken, DocumentEndToken, StreamEndToken): yield self.process_empty_scalar(token.end_mark) else: for event in self.parse_block_node(): yield event - token = self.scanner.peek() + token = self.peek_token() start_mark = end_mark = token.start_mark explicit = False - while self.scanner.check(DocumentEndToken): - token = self.scanner.get() + while self.check_token(DocumentEndToken): + token = self.get_token() end_mark = token.end_mark explicit=True yield DocumentEndEvent(start_mark, end_mark, explicit=explicit) # Parse end of stream. - token = self.scanner.get() + token = self.get_token() yield StreamEndEvent(token.start_mark, token.end_mark) def process_directives(self): # DIRECTIVE* self.yaml_version = None self.tag_handles = {} - while self.scanner.check(DirectiveToken): - token = self.scanner.get() + while self.check_token(DirectiveToken): + token = self.get_token() if token.name == u'YAML': if self.yaml_version is not None: raise ParserError(None, None, @@ -237,33 +222,33 @@ class Parser: # block_collection ::= block_sequence | block_mapping # block_node_or_indentless_sequence ::= ALIAS | properties? # (block_content | indentless_block_sequence) - if self.scanner.check(AliasToken): - token = self.scanner.get() + if self.check_token(AliasToken): + token = self.get_token() yield AliasEvent(token.value, token.start_mark, token.end_mark) else: anchor = None tag = None start_mark = end_mark = tag_mark = None - if self.scanner.check(AnchorToken): - token = self.scanner.get() + if self.check_token(AnchorToken): + token = self.get_token() start_mark = token.start_mark end_mark = token.end_mark anchor = token.value - if self.scanner.check(TagToken): - token = self.scanner.get() + if self.check_token(TagToken): + token = self.get_token() tag_mark = token.start_mark end_mark = token.end_mark tag = token.value - elif self.scanner.check(TagToken): - token = self.scanner.get() + elif self.check_token(TagToken): + token = self.get_token() start_mark = tag_mark = token.start_mark end_mark = token.end_mark tag = token.value - if self.scanner.check(AnchorToken): - token = self.scanner.get() + if self.check_token(AnchorToken): + token = self.get_token() end_mark = token.end_mark anchor = token.value - if tag is not None: + if tag is not None and tag != u'!': handle, suffix = tag if handle is not None: if handle not in self.tag_handles: @@ -273,57 +258,57 @@ class Parser: tag = self.tag_handles[handle]+suffix else: tag = suffix - #if tag is None: - # if not (self.scanner.check(ScalarToken) and - # self.scanner.peek().implicit): - # tag = u'!' + #if tag == u'!': + # raise ParserError("while parsing a node", start_mark, + # "found non-specific tag '!'", tag_mark, + # "Please check 'http://pyyaml.org/wiki/YAMLNonSpecificTag' and share your opinion.") if start_mark is None: - start_mark = end_mark = self.scanner.peek().start_mark + start_mark = end_mark = self.peek_token().start_mark event = None collection_events = None - if indentless_sequence and self.scanner.check(BlockEntryToken): - end_mark = self.scanner.peek().end_mark + if indentless_sequence and self.check_token(BlockEntryToken): + end_mark = self.peek_token().end_mark event = SequenceStartEvent(anchor, tag, start_mark, end_mark) collection_events = self.parse_indentless_sequence() else: - if self.scanner.check(ScalarToken): - token = self.scanner.get() + if self.check_token(ScalarToken): + token = self.get_token() end_mark = token.end_mark - implicit = (tag is None and token.implicit) - event = ScalarEvent(anchor, tag, token.value, - start_mark, end_mark, - implicit=implicit, style=token.style) - elif self.scanner.check(FlowSequenceStartToken): - end_mark = self.scanner.peek().end_mark + implicit = ((tag is None or tag == u'!') and token.implicit) + event = ScalarEvent(anchor, tag, implicit, token.value, + start_mark, end_mark, style=token.style) + elif self.check_token(FlowSequenceStartToken): + end_mark = self.peek_token().end_mark event = SequenceStartEvent(anchor, tag, start_mark, end_mark, flow_style=True) collection_events = self.parse_flow_sequence() - elif self.scanner.check(FlowMappingStartToken): - end_mark = self.scanner.peek().end_mark + elif self.check_token(FlowMappingStartToken): + end_mark = self.peek_token().end_mark event = MappingStartEvent(anchor, tag, start_mark, end_mark, flow_style=True) collection_events = self.parse_flow_mapping() - elif block and self.scanner.check(BlockSequenceStartToken): - end_mark = self.scanner.peek().start_mark + elif block and self.check_token(BlockSequenceStartToken): + end_mark = self.peek_token().start_mark event = SequenceStartEvent(anchor, tag, start_mark, end_mark, flow_style=False) collection_events = self.parse_block_sequence() - elif block and self.scanner.check(BlockMappingStartToken): - end_mark = self.scanner.peek().start_mark + elif block and self.check_token(BlockMappingStartToken): + end_mark = self.peek_token().start_mark event = MappingStartEvent(anchor, tag, start_mark, end_mark, flow_style=False) collection_events = self.parse_block_mapping() elif anchor is not None or tag is not None: # Empty scalars are allowed even if a tag or an anchor is # specified. - event = ScalarEvent(anchor, tag, u'', start_mark, end_mark, - implicit=True) + implicit = (tag is None or tag == u'!') + event = ScalarEvent(anchor, tag, implicit, u'', + start_mark, end_mark) else: if block: node = 'block' else: node = 'flow' - token = self.scanner.peek() + token = self.peek_token() raise ParserError("while scanning a %s node" % node, start_mark, "expected the node content, but found %r" % token.id, token.start_mark) @@ -334,33 +319,33 @@ class Parser: def parse_block_sequence(self): # BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END - token = self.scanner.get() + token = self.get_token() start_mark = token.start_mark - while self.scanner.check(BlockEntryToken): - token = self.scanner.get() - if not self.scanner.check(BlockEntryToken, BlockEndToken): + while self.check_token(BlockEntryToken): + token = self.get_token() + if not self.check_token(BlockEntryToken, BlockEndToken): for event in self.parse_block_node(): yield event else: yield self.process_empty_scalar(token.end_mark) - if not self.scanner.check(BlockEndToken): - token = self.scanner.peek() + if not self.check_token(BlockEndToken): + token = self.peek_token() raise ParserError("while scanning a block collection", start_mark, "expected <block end>, but found %r" % token.id, token.start_mark) - token = self.scanner.get() + token = self.get_token() yield SequenceEndEvent(token.start_mark, token.end_mark) def parse_indentless_sequence(self): # (BLOCK-ENTRY block_node?)+ - while self.scanner.check(BlockEntryToken): - token = self.scanner.get() - if not self.scanner.check(BlockEntryToken, + while self.check_token(BlockEntryToken): + token = self.get_token() + if not self.check_token(BlockEntryToken, KeyToken, ValueToken, BlockEndToken): for event in self.parse_block_node(): yield event else: yield self.process_empty_scalar(token.end_mark) - token = self.scanner.peek() + token = self.peek_token() yield SequenceEndEvent(token.start_mark, token.start_mark) def parse_block_mapping(self): @@ -368,31 +353,31 @@ class Parser: # ((KEY block_node_or_indentless_sequence?)? # (VALUE block_node_or_indentless_sequence?)?)* # BLOCK-END - token = self.scanner.get() + token = self.get_token() start_mark = token.start_mark - while self.scanner.check(KeyToken, ValueToken): - if self.scanner.check(KeyToken): - token = self.scanner.get() - if not self.scanner.check(KeyToken, ValueToken, BlockEndToken): + while self.check_token(KeyToken, ValueToken): + if self.check_token(KeyToken): + token = self.get_token() + if not self.check_token(KeyToken, ValueToken, BlockEndToken): for event in self.parse_block_node_or_indentless_sequence(): yield event else: yield self.process_empty_scalar(token.end_mark) - if self.scanner.check(ValueToken): - token = self.scanner.get() - if not self.scanner.check(KeyToken, ValueToken, BlockEndToken): + if self.check_token(ValueToken): + token = self.get_token() + if not self.check_token(KeyToken, ValueToken, BlockEndToken): for event in self.parse_block_node_or_indentless_sequence(): yield event else: yield self.process_empty_scalar(token.end_mark) else: - token = self.scanner.peek() + token = self.peek_token() yield self.process_empty_scalar(token.start_mark) - if not self.scanner.check(BlockEndToken): - token = self.scanner.peek() + if not self.check_token(BlockEndToken): + token = self.peek_token() raise ParserError("while scanning a block mapping", start_mark, "expected <block end>, but found %r" % token.id, token.start_mark) - token = self.scanner.get() + token = self.get_token() yield MappingEndEvent(token.start_mark, token.end_mark) def parse_flow_sequence(self): @@ -406,42 +391,42 @@ class Parser: # flow_mapping_entry are equal, their interpretations are different. # For `flow_sequence_entry`, the part `KEY flow_node? (VALUE flow_node?)?` # generate an inline mapping (set syntax). - token = self.scanner.get() + token = self.get_token() start_mark = token.start_mark - while not self.scanner.check(FlowSequenceEndToken): - if self.scanner.check(KeyToken): - token = self.scanner.get() + while not self.check_token(FlowSequenceEndToken): + if self.check_token(KeyToken): + token = self.get_token() yield MappingStartEvent(None, None, # u'!', token.start_mark, token.end_mark, flow_style=True) - if not self.scanner.check(ValueToken, + if not self.check_token(ValueToken, FlowEntryToken, FlowSequenceEndToken): for event in self.parse_flow_node(): yield event else: yield self.process_empty_scalar(token.end_mark) - if self.scanner.check(ValueToken): - token = self.scanner.get() - if not self.scanner.check(FlowEntryToken, FlowSequenceEndToken): + if self.check_token(ValueToken): + token = self.get_token() + if not self.check_token(FlowEntryToken, FlowSequenceEndToken): for event in self.parse_flow_node(): yield event else: yield self.process_empty_scalar(token.end_mark) else: - token = self.scanner.peek() + token = self.peek_token() yield self.process_empty_scalar(token.start_mark) - token = self.scanner.peek() + token = self.peek_token() yield MappingEndEvent(token.start_mark, token.start_mark) else: for event in self.parse_flow_node(): yield event - if not self.scanner.check(FlowEntryToken, FlowSequenceEndToken): - token = self.scanner.peek() + if not self.check_token(FlowEntryToken, FlowSequenceEndToken): + token = self.peek_token() raise ParserError("while scanning a flow sequence", start_mark, "expected ',' or ']', but got %r" % token.id, token.start_mark) - if self.scanner.check(FlowEntryToken): - self.scanner.get() - token = self.scanner.get() + if self.check_token(FlowEntryToken): + self.get_token() + token = self.get_token() yield SequenceEndEvent(token.start_mark, token.end_mark) def parse_flow_mapping(self): @@ -450,45 +435,44 @@ class Parser: # flow_mapping_entry? # FLOW-MAPPING-END # flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? - token = self.scanner.get() + token = self.get_token() start_mark = token.start_mark - while not self.scanner.check(FlowMappingEndToken): - if self.scanner.check(KeyToken): - token = self.scanner.get() - if not self.scanner.check(ValueToken, + while not self.check_token(FlowMappingEndToken): + if self.check_token(KeyToken): + token = self.get_token() + if not self.check_token(ValueToken, FlowEntryToken, FlowMappingEndToken): for event in self.parse_flow_node(): yield event else: yield self.process_empty_scalar(token.end_mark) - if self.scanner.check(ValueToken): - token = self.scanner.get() - if not self.scanner.check(FlowEntryToken, FlowMappingEndToken): + if self.check_token(ValueToken): + token = self.get_token() + if not self.check_token(FlowEntryToken, FlowMappingEndToken): for event in self.parse_flow_node(): yield event else: yield self.process_empty_scalar(token.end_mark) else: - token = self.scanner.peek() + token = self.peek_token() yield self.process_empty_scalar(token.start_mark) else: for event in self.parse_flow_node(): yield event - yield self.process_empty_scalar(self.scanner.peek().start_mark) - if not self.scanner.check(FlowEntryToken, FlowMappingEndToken): - token = self.scanner.peek() + yield self.process_empty_scalar(self.peek_token().start_mark) + if not self.check_token(FlowEntryToken, FlowMappingEndToken): + token = self.peek_token() raise ParserError("while scanning a flow mapping", start_mark, "expected ',' or '}', but got %r" % token.id, token.start_mark) - if self.scanner.check(FlowEntryToken): - self.scanner.get() - if not self.scanner.check(FlowMappingEndToken): - token = self.scanner.peek() + if self.check_token(FlowEntryToken): + self.get_token() + if not self.check_token(FlowMappingEndToken): + token = self.peek_token() raise ParserError("while scanning a flow mapping", start_mark, "expected '}', but found %r" % token.id, token.start_mark) - token = self.scanner.get() + token = self.get_token() yield MappingEndEvent(token.start_mark, token.end_mark) def process_empty_scalar(self, mark): - return ScalarEvent(None, None, u'', mark, mark, - implicit=True) + return ScalarEvent(None, None, True, u'', mark, mark) diff --git a/lib/yaml/reader.py b/lib/yaml/reader.py index ab16a13..beb76d0 100644 --- a/lib/yaml/reader.py +++ b/lib/yaml/reader.py @@ -91,7 +91,7 @@ class Reader: # Yeah, it's ugly and slow. - def __init__(self, data): + def __init__(self, stream): self.name = None self.stream = None self.stream_pointer = 0 @@ -104,17 +104,17 @@ class Reader: self.index = 0 self.line = 0 self.column = 0 - if isinstance(data, unicode): + if isinstance(stream, unicode): self.name = "<unicode string>" - self.check_printable(data) - self.buffer = data+u'\0' - elif isinstance(data, str): + self.check_printable(stream) + self.buffer = stream+u'\0' + elif isinstance(stream, str): self.name = "<string>" - self.raw_buffer = data + self.raw_buffer = stream self.determine_encoding() else: - self.stream = data - self.name = getattr(data, 'name', "<file>") + self.stream = stream + self.name = getattr(stream, 'name', "<file>") self.eof = False self.raw_buffer = '' self.determine_encoding() diff --git a/lib/yaml/representer.py b/lib/yaml/representer.py index 6fe74fc..a12c34a 100644 --- a/lib/yaml/representer.py +++ b/lib/yaml/representer.py @@ -20,77 +20,59 @@ except NameError: class RepresenterError(YAMLError): pass -class BaseRepresenter(BaseDetector): +class BaseRepresenter: - DEFAULT_SCALAR_TAG = u'tag:yaml.org,2002:str' - DEFAULT_SEQUENCE_TAG = u'tag:yaml.org,2002:seq' - DEFAULT_MAPPING_TAG = u'tag:yaml.org,2002:map' + yaml_representers = {} - def __init__(self, serializer): - self.serializer = serializer + def __init__(self): self.represented_objects = {} - def close(self): - self.serializer.close() - - def represent(self, native): - node = self.represent_object(native) - self.serializer.serialize(node) + def represent(self, data): + node = self.represent_object(data) + self.serialize(node) self.represented_objects = {} - def represent_object(self, native): - if self.ignore_aliases(native): + def represent_object(self, data): + if self.ignore_aliases(data): alias_key = None else: - alias_key = id(native) + alias_key = id(data) if alias_key is not None: if alias_key in self.represented_objects: node = self.represented_objects[alias_key] if node is None: - raise RepresenterError("recursive objects are not allowed: %r" % native) + raise RepresenterError("recursive objects are not allowed: %r" % data) return node self.represented_objects[alias_key] = None - for native_type in type(native).__mro__: - if native_type in self.yaml_representers: - node = self.yaml_representers[native_type](self, native) + for data_type in type(data).__mro__: + if data_type in self.yaml_representers: + node = self.yaml_representers[data_type](self, data) break else: if None in self.yaml_representers: - node = self.yaml_representers[None](self, native) + node = self.yaml_representers[None](self, data) else: - node = ScalarNode(None, unicode(native)) + node = ScalarNode(None, unicode(data)) if alias_key is not None: self.represented_objects[alias_key] = node return node - def add_representer(cls, native_type, representer): + def add_representer(cls, data_type, representer): if not 'yaml_representers' in cls.__dict__: cls.yaml_representers = cls.yaml_representers.copy() - cls.yaml_representers[native_type] = representer + cls.yaml_representers[data_type] = representer add_representer = classmethod(add_representer) - yaml_representers = {} - def represent_scalar(self, tag, value, style=None): - detected_tag = self.detect(value) - if detected_tag is None: - detected_tag = self.DEFAULT_SCALAR_TAG - implicit = (tag == detected_tag) - if tag == self.DEFAULT_SCALAR_TAG: - tag = None - return ScalarNode(tag, value, implicit=implicit, style=style) + return ScalarNode(tag, value, style=style) def represent_sequence(self, tag, sequence, flow_style=None): - if tag == self.DEFAULT_SEQUENCE_TAG: - tag = None value = [] for item in sequence: value.append(self.represent_object(item)) return SequenceNode(tag, value, flow_style=flow_style) def represent_mapping(self, tag, mapping, flow_style=None): - if tag == self.DEFAULT_MAPPING_TAG: - tag = None value = {} if hasattr(mapping, 'keys'): for item_key in mapping.keys(): @@ -103,108 +85,122 @@ class BaseRepresenter(BaseDetector): self.represent_object(item_value) return MappingNode(tag, value, flow_style=flow_style) - def ignore_aliases(self, native): + def ignore_aliases(self, data): return False -class SafeRepresenter(Detector, BaseRepresenter): +class SafeRepresenter(BaseRepresenter): - def ignore_aliases(self, native): - if native in [None, ()]: + def ignore_aliases(self, data): + if data in [None, ()]: return True - if isinstance(native, (str, unicode, bool, int, float)): + if isinstance(data, (str, unicode, bool, int, float)): return True - def represent_none(self, native): + def represent_none(self, data): return self.represent_scalar(u'tag:yaml.org,2002:null', u'null') - def represent_str(self, native): + def represent_str(self, data): encoding = None try: - unicode(native, 'ascii') + unicode(data, 'ascii') encoding = 'ascii' except UnicodeDecodeError: try: - unicode(native, 'utf-8') + unicode(data, 'utf-8') encoding = 'utf-8' except UnicodeDecodeError: pass if encoding: return self.represent_scalar(u'tag:yaml.org,2002:str', - unicode(native, encoding)) + unicode(data, encoding)) else: return self.represent_scalar(u'tag:yaml.org,2002:binary', - unicode(native.encode('base64')), style='|') + unicode(data.encode('base64')), style='|') - def represent_unicode(self, native): - return self.represent_scalar(u'tag:yaml.org,2002:str', native) + def represent_unicode(self, data): + return self.represent_scalar(u'tag:yaml.org,2002:str', data) - def represent_bool(self, native): - if native: + def represent_bool(self, data): + if data: value = u'true' else: value = u'false' return self.represent_scalar(u'tag:yaml.org,2002:bool', value) - def represent_int(self, native): - return self.represent_scalar(u'tag:yaml.org,2002:int', unicode(native)) + def represent_int(self, data): + return self.represent_scalar(u'tag:yaml.org,2002:int', unicode(data)) - def represent_long(self, native): - return self.represent_scalar(u'tag:yaml.org,2002:int', unicode(native)) + def represent_long(self, data): + return self.represent_scalar(u'tag:yaml.org,2002:int', unicode(data)) inf_value = 1e300000 nan_value = inf_value/inf_value - def represent_float(self, native): - if native == self.inf_value: + def represent_float(self, data): + if data == self.inf_value: value = u'.inf' - elif native == -self.inf_value: + elif data == -self.inf_value: value = u'-.inf' - elif native == self.nan_value or native != native: + elif data == self.nan_value or data != data: value = u'.nan' else: - value = unicode(native) + value = unicode(data) return self.represent_scalar(u'tag:yaml.org,2002:float', value) - def represent_list(self, native): - pairs = (len(native) > 0) - for item in native: + def represent_list(self, data): + pairs = (len(data) > 0) + for item in data: if not isinstance(item, tuple) or len(item) != 2: pairs = False break if not pairs: - return self.represent_sequence(u'tag:yaml.org,2002:seq', native) + return self.represent_sequence(u'tag:yaml.org,2002:seq', data) value = [] - for item_key, item_value in native: + for item_key, item_value in data: value.append(self.represent_mapping(u'tag:yaml.org,2002:map', [(item_key, item_value)])) return SequenceNode(u'tag:yaml.org,2002:pairs', value) - def represent_dict(self, native): - return self.represent_mapping(u'tag:yaml.org,2002:map', native) + def represent_dict(self, data): + return self.represent_mapping(u'tag:yaml.org,2002:map', data) - def represent_set(self, native): + def represent_set(self, data): value = {} - for key in native: + for key in data: value[key] = None return self.represent_mapping(u'tag:yaml.org,2002:set', value) - def represent_date(self, native): - value = u'%04d-%02d-%02d' % (native.year, native.month, native.day) + def represent_date(self, data): + value = u'%04d-%02d-%02d' % (data.year, data.month, data.day) return self.represent_scalar(u'tag:yaml.org,2002:timestamp', value) - def represent_datetime(self, native): + def represent_datetime(self, data): value = u'%04d-%02d-%02d %02d:%02d:%02d' \ - % (native.year, native.month, native.day, - native.hour, native.minute, native.second) - if native.microsecond: - value += u'.' + unicode(native.microsecond/1000000.0).split(u'.')[1] - if native.utcoffset(): - value += unicode(native.utcoffset()) + % (data.year, data.month, data.day, + data.hour, data.minute, data.second) + if data.microsecond: + value += u'.' + unicode(data.microsecond/1000000.0).split(u'.')[1] + if data.utcoffset(): + value += unicode(data.utcoffset()) return self.represent_scalar(u'tag:yaml.org,2002:timestamp', value) - def represent_undefined(self, native): - raise RepresenterError("cannot represent an object: %s" % native) + def represent_yaml_object(self, tag, data, cls, flow_style=None): + if hasattr(data, '__getstate__'): + state = data.__getstate__() + else: + state = data.__dict__.copy() + mapping = state + if hasattr(state, 'keys'): + mapping = [] + keys = state.keys() + keys.sort() + for key in keys: + mapping.append((key.replace('_', '-'), state[key])) + return self.represent_mapping(tag, mapping, flow_style=flow_style) + + def represent_undefined(self, data): + raise RepresenterError("cannot represent an object: %s" % data) SafeRepresenter.add_representer(type(None), SafeRepresenter.represent_none) diff --git a/lib/yaml/resolver.py b/lib/yaml/resolver.py deleted file mode 100644 index b25857b..0000000 --- a/lib/yaml/resolver.py +++ /dev/null @@ -1,72 +0,0 @@ - -__all__ = ['Resolver', 'ResolverError'] - -from error import MarkedYAMLError -from detector import Detector -from nodes import * - -import re - -# Not really used. -class ResolverError(MarkedYAMLError): - pass - -class Resolver(Detector): - - DEFAULT_SCALAR_TAG = u'tag:yaml.org,2002:str' - DEFAULT_SEQUENCE_TAG = u'tag:yaml.org,2002:seq' - DEFAULT_MAPPING_TAG = u'tag:yaml.org,2002:map' - - def __init__(self, composer): - self.composer = composer - self.resolved_nodes = {} - - def check(self): - # If there are more documents available? - return self.composer.check() - - def get(self): - # Resolve and return the root node of the next document. - if self.composer.check(): - return self.resolve_document(self.composer.get()) - - def __iter__(self): - # Iterator protocol. - while self.composer.check(): - yield self.resolve_document(self.composer.get()) - - def resolve_document(self, node): - self.resolve_node([], node) - return node - self.resolved_nodes = {} - - def resolve_node(self, path, node): - if node in self.resolved_nodes: - return - self.resolved_nodes[node] = None - if isinstance(node, ScalarNode): - self.resolve_scalar(path, node) - elif isinstance(node, SequenceNode): - self.resolve_sequence(path, node) - for index in range(len(node.value)): - self.resolve_node(path+[(node, index)], node.value[index]) - elif isinstance(node, MappingNode): - self.resolve_mapping(path, node) - for key in node.value: - self.resolve_node(path+[node, None], key) - self.resolve_node(path+[node, key], node.value[key]) - - def resolve_scalar(self, path, node): - if node.tag is None and node.implicit: - node.tag = self.detect(node.value) - if node.tag is None or node.tag == u'!': - node.tag = self.DEFAULT_SCALAR_TAG - - def resolve_sequence(self, path, node): - if node.tag is None or node.tag == u'!': - node.tag = self.DEFAULT_SEQUENCE_TAG - - def resolve_mapping(self, path, node): - if node.tag is None or node.tag == u'!': - node.tag = self.DEFAULT_MAPPING_TAG - diff --git a/lib/yaml/scanner.py b/lib/yaml/scanner.py index 7a1d273..e9780c4 100644 --- a/lib/yaml/scanner.py +++ b/lib/yaml/scanner.py @@ -45,19 +45,16 @@ class SimpleKey: class Scanner: - - def __init__(self, reader): + def __init__(self): """Initialize the scanner.""" - # The input stream. The Reader class do the dirty work of checking for - # BOM and converting the input data to Unicode. It also adds NUL to - # the end. + # It is assumed that Scanner and Reader will have a common descendant. + # Reader do the dirty work of checking for BOM and converting the + # input data to Unicode. It also adds NUL to the end. # # Reader supports the following methods - # self.reader.peek(i=0) # peek the next i-th character - # self.reader.prefix(l=1) # peek the next l characters - # self.reader.forward(l=1) # read the next l characters - # and move the pointer - self.reader = reader + # self.peek(i=0) # peek the next i-th character + # self.prefix(l=1) # peek the next l characters + # self.forward(l=1) # read the next l characters and move the pointer. # Had we reached the end of the stream? self.done = False @@ -113,24 +110,26 @@ class Scanner: # Public methods. - def check(self, *choices): + def check_token(self, *choices): # Check if the next token is one of the given types. while self.need_more_tokens(): self.fetch_more_tokens() if self.tokens: + if not choices: + return True for choice in choices: if isinstance(self.tokens[0], choice): return True return False - def peek(self): + def peek_token(self): # Return the next token, but do not delete if from the queue. while self.need_more_tokens(): self.fetch_more_tokens() if self.tokens: return self.tokens[0] - def get(self): + def get_token(self): # Return the next token. while self.need_more_tokens(): self.fetch_more_tokens() @@ -171,10 +170,10 @@ class Scanner: # Compare the current indentation and column. It may add some tokens # and decrease the current indentation level. - self.unwind_indent(self.reader.column) + self.unwind_indent(self.column) # Peek the next character. - ch = self.reader.peek() + ch = self.peek() # Is it the end of stream? if ch == u'\0': @@ -265,7 +264,7 @@ class Scanner: # No? It's an error. Let's produce a nice error message. raise ScannerError("while scanning for the next token", None, "found character %r that cannot start any token" - % ch.encode('utf-8'), self.reader.get_mark()) + % ch.encode('utf-8'), self.get_mark()) # Simple keys treatment. @@ -293,11 +292,11 @@ class Scanner: # height (may cause problems if indentation is broken though). for level in self.possible_simple_keys.keys(): key = self.possible_simple_keys[level] - if key.line != self.reader.line \ - or self.reader.index-key.index > 1024: + if key.line != self.line \ + or self.index-key.index > 1024: if key.required: raise ScannerError("while scanning a simple key", key.mark, - "could not found expected ':'", self.reader.get_mark()) + "could not found expected ':'", self.get_mark()) del self.possible_simple_keys[level] def save_possible_simple_key(self): @@ -306,7 +305,7 @@ class Scanner: # ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'. # Check if a simple key is required at the current position. - required = not self.flow_level and self.indent == self.reader.column + required = not self.flow_level and self.indent == self.column # A simple key is required only if it is the first token in the current # line. Therefore it is always allowed. @@ -317,12 +316,8 @@ class Scanner: if self.allow_simple_key: self.remove_possible_simple_key() token_number = self.tokens_taken+len(self.tokens) - index = self.reader.index - line = self.reader.line - column = self.reader.column - mark = self.reader.get_mark() key = SimpleKey(token_number, required, - index, line, column, mark) + self.index, self.line, self.column, self.get_mark()) self.possible_simple_keys[self.flow_level] = key def remove_possible_simple_key(self): @@ -334,7 +329,7 @@ class Scanner: assert not key.required #if key.required: # raise ScannerError("while scanning a simple key", key.mark, - # "could not found expected ':'", self.reader.get_mark()) + # "could not found expected ':'", self.get_mark()) # Indentation functions. @@ -349,7 +344,7 @@ class Scanner: #if self.flow_level and self.indent > column: # raise ScannerError(None, None, # "invalid intendation or unclosed '[' or '{'", - # self.reader.get_mark()) + # self.get_mark()) # In the flow context, indentation is ignored. We make the scanner less # restrictive then specification requires. @@ -358,7 +353,7 @@ class Scanner: # In block context, we may need to issue the BLOCK-END tokens. while self.indent > column: - mark = self.reader.get_mark() + mark = self.get_mark() self.indent = self.indents.pop() self.tokens.append(BlockEndToken(mark, mark)) @@ -377,11 +372,11 @@ class Scanner: # last token. # Read the token. - mark = self.reader.get_mark() + mark = self.get_mark() # Add STREAM-START. self.tokens.append(StreamStartToken(mark, mark, - encoding=self.reader.encoding)) + encoding=self.encoding)) def fetch_stream_end(self): @@ -394,12 +389,12 @@ class Scanner: self.possible_simple_keys = {} # Read the token. - mark = self.reader.get_mark() + mark = self.get_mark() # Add STREAM-END. self.tokens.append(StreamEndToken(mark, mark)) - # The reader is ended. + # The steam is finished. self.done = True def fetch_directive(self): @@ -431,9 +426,9 @@ class Scanner: self.allow_simple_key = False # Add DOCUMENT-START or DOCUMENT-END. - start_mark = self.reader.get_mark() - self.reader.forward(3) - end_mark = self.reader.get_mark() + start_mark = self.get_mark() + self.forward(3) + end_mark = self.get_mark() self.tokens.append(TokenClass(start_mark, end_mark)) def fetch_flow_sequence_start(self): @@ -454,9 +449,9 @@ class Scanner: self.allow_simple_key = True # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START. - start_mark = self.reader.get_mark() - self.reader.forward() - end_mark = self.reader.get_mark() + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() self.tokens.append(TokenClass(start_mark, end_mark)) def fetch_flow_sequence_end(self): @@ -477,9 +472,9 @@ class Scanner: self.allow_simple_key = False # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END. - start_mark = self.reader.get_mark() - self.reader.forward() - end_mark = self.reader.get_mark() + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() self.tokens.append(TokenClass(start_mark, end_mark)) def fetch_flow_entry(self): @@ -491,9 +486,9 @@ class Scanner: self.remove_possible_simple_key() # Add FLOW-ENTRY. - start_mark = self.reader.get_mark() - self.reader.forward() - end_mark = self.reader.get_mark() + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() self.tokens.append(FlowEntryToken(start_mark, end_mark)) def fetch_block_entry(self): @@ -505,11 +500,11 @@ class Scanner: if not self.allow_simple_key: raise ScannerError(None, None, "sequence entries are not allowed here", - self.reader.get_mark()) + self.get_mark()) # We may need to add BLOCK-SEQUENCE-START. - if self.add_indent(self.reader.column): - mark = self.reader.get_mark() + if self.add_indent(self.column): + mark = self.get_mark() self.tokens.append(BlockSequenceStartToken(mark, mark)) # It's an error for the block entry to occur in the flow context, @@ -524,9 +519,9 @@ class Scanner: self.remove_possible_simple_key() # Add BLOCK-ENTRY. - start_mark = self.reader.get_mark() - self.reader.forward() - end_mark = self.reader.get_mark() + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() self.tokens.append(BlockEntryToken(start_mark, end_mark)) def fetch_key(self): @@ -538,11 +533,11 @@ class Scanner: if not self.allow_simple_key: raise ScannerError(None, None, "mapping keys are not allowed here", - self.reader.get_mark()) + self.get_mark()) # We may need to add BLOCK-MAPPING-START. - if self.add_indent(self.reader.column): - mark = self.reader.get_mark() + if self.add_indent(self.column): + mark = self.get_mark() self.tokens.append(BlockMappingStartToken(mark, mark)) # Simple keys are allowed after '?' in the block context. @@ -552,9 +547,9 @@ class Scanner: self.remove_possible_simple_key() # Add KEY. - start_mark = self.reader.get_mark() - self.reader.forward() - end_mark = self.reader.get_mark() + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() self.tokens.append(KeyToken(start_mark, end_mark)) def fetch_value(self): @@ -591,7 +586,7 @@ class Scanner: if not self.allow_simple_key: raise ScannerError(None, None, "mapping values are not allowed here", - self.reader.get_mark()) + self.get_mark()) # Simple keys are allowed after ':' in the block context. self.allow_simple_key = not self.flow_level @@ -600,9 +595,9 @@ class Scanner: self.remove_possible_simple_key() # Add VALUE. - start_mark = self.reader.get_mark() - self.reader.forward() - end_mark = self.reader.get_mark() + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() self.tokens.append(ValueToken(start_mark, end_mark)) def fetch_alias(self): @@ -691,30 +686,30 @@ class Scanner: # DIRECTIVE: ^ '%' ... # The '%' indicator is already checked. - if self.reader.column == 0: + if self.column == 0: return True def check_document_start(self): # DOCUMENT-START: ^ '---' (' '|'\n') - if self.reader.column == 0: - if self.reader.prefix(3) == u'---' \ - and self.reader.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + if self.column == 0: + if self.prefix(3) == u'---' \ + and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': return True def check_document_end(self): # DOCUMENT-END: ^ '...' (' '|'\n') - if self.reader.column == 0: - prefix = self.reader.peek(4) - if self.reader.prefix(3) == u'...' \ - and self.reader.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + if self.column == 0: + prefix = self.peek(4) + if self.prefix(3) == u'...' \ + and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': return True def check_block_entry(self): # BLOCK-ENTRY: '-' (' '|'\n') - return self.reader.peek(1) in u'\0 \t\r\n\x85\u2028\u2029' + return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029' def check_key(self): @@ -724,7 +719,7 @@ class Scanner: # KEY(block context): '?' (' '|'\n') else: - return self.reader.peek(1) in u'\0 \t\r\n\x85\u2028\u2029' + return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029' def check_value(self): @@ -734,7 +729,7 @@ class Scanner: # VALUE(block context): ':' (' '|'\n') else: - return self.reader.peek(1) in u'\0 \t\r\n\x85\u2028\u2029' + return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029' def check_plain(self): @@ -750,9 +745,9 @@ class Scanner: # Note that we limit the last rule to the block context (except the # '-' character) because we want the flow context to be space # independent. - ch = self.reader.peek() + ch = self.peek() return ch not in u'\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'\"%@`' \ - or (self.reader.peek(1) not in u'\0 \t\r\n\x85\u2028\u2029' + or (self.peek(1) not in u'\0 \t\r\n\x85\u2028\u2029' and (ch == u'-' or (not self.flow_level and ch in u'?:'))) # Scanners. @@ -777,15 +772,15 @@ class Scanner: # `unwind_indent` before issuing BLOCK-END. # Scanners for block, flow, and plain scalars need to be modified. - if self.reader.index == 0 and self.reader.peek() == u'\uFEFF': - self.reader.forward() + if self.index == 0 and self.peek() == u'\uFEFF': + self.forward() found = False while not found: - while self.reader.peek() == u' ': - self.reader.forward() - if self.reader.peek() == u'#': - while self.reader.peek() not in u'\0\r\n\x85\u2028\u2029': - self.reader.forward() + while self.peek() == u' ': + self.forward() + if self.peek() == u'#': + while self.peek() not in u'\0\r\n\x85\u2028\u2029': + self.forward() if self.scan_line_break(): if not self.flow_level: self.allow_simple_key = True @@ -794,119 +789,119 @@ class Scanner: def scan_directive(self): # See the specification for details. - start_mark = self.reader.get_mark() - self.reader.forward() + start_mark = self.get_mark() + self.forward() name = self.scan_directive_name(start_mark) value = None if name == u'YAML': value = self.scan_yaml_directive_value(start_mark) - end_mark = self.reader.get_mark() + end_mark = self.get_mark() elif name == u'TAG': value = self.scan_tag_directive_value(start_mark) - end_mark = self.reader.get_mark() + end_mark = self.get_mark() else: - end_mark = self.reader.get_mark() - while self.reader.peek() not in u'\0\r\n\x85\u2028\u2029': - self.reader.forward() + end_mark = self.get_mark() + while self.peek() not in u'\0\r\n\x85\u2028\u2029': + self.forward() self.scan_directive_ignored_line(start_mark) return DirectiveToken(name, value, start_mark, end_mark) def scan_directive_name(self, start_mark): # See the specification for details. length = 0 - ch = self.reader.peek(length) + ch = self.peek(length) while u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \ or ch in u'-_': length += 1 - ch = self.reader.peek(length) + ch = self.peek(length) if not length: raise ScannerError("while scanning a directive", start_mark, "expected alphabetic or numeric character, but found %r" - % ch.encode('utf-8'), self.reader.get_mark()) - value = self.reader.prefix(length) - self.reader.forward(length) - ch = self.reader.peek() + % ch.encode('utf-8'), self.get_mark()) + value = self.prefix(length) + self.forward(length) + ch = self.peek() if ch not in u'\0 \r\n\x85\u2028\u2029': raise ScannerError("while scanning a directive", start_mark, "expected alphabetic or numeric character, but found %r" - % ch.encode('utf-8'), self.reader.get_mark()) + % ch.encode('utf-8'), self.get_mark()) return value def scan_yaml_directive_value(self, start_mark): # See the specification for details. - while self.reader.peek() == u' ': - self.reader.forward() + while self.peek() == u' ': + self.forward() major = self.scan_yaml_directive_number(start_mark) - if self.reader.peek() != '.': + if self.peek() != '.': raise ScannerError("while scanning a directive", start_mark, "expected a digit or '.', but found %r" - % self.reader.peek().encode('utf-8'), - self.reader.get_mark()) - self.reader.forward() + % self.peek().encode('utf-8'), + self.get_mark()) + self.forward() minor = self.scan_yaml_directive_number(start_mark) - if self.reader.peek() not in u'\0 \r\n\x85\u2028\u2029': + if self.peek() not in u'\0 \r\n\x85\u2028\u2029': raise ScannerError("while scanning a directive", start_mark, "expected a digit or ' ', but found %r" - % self.reader.peek().encode('utf-8'), - self.reader.get_mark()) + % self.peek().encode('utf-8'), + self.get_mark()) return (major, minor) def scan_yaml_directive_number(self, start_mark): # See the specification for details. - ch = self.reader.peek() + ch = self.peek() if not (u'0' <= ch <= '9'): raise ScannerError("while scanning a directive", start_mark, "expected a digit, but found %r" % ch.encode('utf-8'), - self.reader.get_mark()) + self.get_mark()) length = 0 - while u'0' <= self.reader.peek(length) <= u'9': + while u'0' <= self.peek(length) <= u'9': length += 1 - value = int(self.reader.prefix(length)) - self.reader.forward(length) + value = int(self.prefix(length)) + self.forward(length) return value def scan_tag_directive_value(self, start_mark): # See the specification for details. - while self.reader.peek() == u' ': - self.reader.forward() + while self.peek() == u' ': + self.forward() handle = self.scan_tag_directive_handle(start_mark) - while self.reader.peek() == u' ': - self.reader.forward() + while self.peek() == u' ': + self.forward() prefix = self.scan_tag_directive_prefix(start_mark) return (handle, prefix) def scan_tag_directive_handle(self, start_mark): # See the specification for details. value = self.scan_tag_handle('directive', start_mark) - ch = self.reader.peek() + ch = self.peek() if ch != u' ': raise ScannerError("while scanning a directive", start_mark, "expected ' ', but found %r" % ch.encode('utf-8'), - self.reader.get_mark()) + self.get_mark()) return value def scan_tag_directive_prefix(self, start_mark): # See the specification for details. value = self.scan_tag_uri('directive', start_mark) - ch = self.reader.peek() + ch = self.peek() if ch not in u'\0 \r\n\x85\u2028\u2029': raise ScannerError("while scanning a directive", start_mark, "expected ' ', but found %r" % ch.encode('utf-8'), - self.reader.get_mark()) + self.get_mark()) return value def scan_directive_ignored_line(self, start_mark): # See the specification for details. - while self.reader.peek() == u' ': - self.reader.forward() - if self.reader.peek() == u'#': - while self.reader.peek() not in u'\0\r\n\x85\u2028\u2029': - self.reader.forward() - ch = self.reader.peek() + while self.peek() == u' ': + self.forward() + if self.peek() == u'#': + while self.peek() not in u'\0\r\n\x85\u2028\u2029': + self.forward() + ch = self.peek() if ch not in u'\0\r\n\x85\u2028\u2029': raise ScannerError("while scanning a directive", start_mark, "expected a comment or a line break, but found %r" - % ch.encode('utf-8'), self.reader.get_mark()) + % ch.encode('utf-8'), self.get_mark()) self.scan_line_break() def scan_anchor(self, TokenClass): @@ -918,50 +913,50 @@ class Scanner: # and # [ *alias , "value" ] # Therefore we restrict aliases to numbers and ASCII letters. - start_mark = self.reader.get_mark() - indicator = self.reader.peek() + start_mark = self.get_mark() + indicator = self.peek() if indicator == '*': name = 'alias' else: name = 'anchor' - self.reader.forward() + self.forward() length = 0 - ch = self.reader.peek(length) + ch = self.peek(length) while u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \ or ch in u'-_': length += 1 - ch = self.reader.peek(length) + ch = self.peek(length) if not length: raise ScannerError("while scanning an %s" % name, start_mark, "expected alphabetic or numeric character, but found %r" - % ch.encode('utf-8'), self.reader.get_mark()) - value = self.reader.prefix(length) - self.reader.forward(length) - ch = self.reader.peek() + % ch.encode('utf-8'), self.get_mark()) + value = self.prefix(length) + self.forward(length) + ch = self.peek() if ch not in u'\0 \t\r\n\x85\u2028\u2029?:,]}%@`': raise ScannerError("while scanning an %s" % name, start_mark, "expected alphabetic or numeric character, but found %r" - % ch.encode('utf-8'), self.reader.get_mark()) - end_mark = self.reader.get_mark() + % ch.encode('utf-8'), self.get_mark()) + end_mark = self.get_mark() return TokenClass(value, start_mark, end_mark) def scan_tag(self): # See the specification for details. - start_mark = self.reader.get_mark() - ch = self.reader.peek(1) + start_mark = self.get_mark() + ch = self.peek(1) if ch == u'<': handle = None - self.reader.forward(2) + self.forward(2) suffix = self.scan_tag_uri('tag', start_mark) - if self.reader.peek() != u'>': + if self.peek() != u'>': raise ScannerError("while parsing a tag", start_mark, - "expected '>', but found %r" % self.reader.peek().encode('utf-8'), - self.reader.get_mark()) - self.reader.forward() + "expected '>', but found %r" % self.peek().encode('utf-8'), + self.get_mark()) + self.forward() elif ch in u'\0 \t\r\n\x85\u2028\u2029': handle = None suffix = u'!' - self.reader.forward() + self.forward() else: length = 1 use_handle = False @@ -970,21 +965,21 @@ class Scanner: use_handle = True break length += 1 - ch = self.reader.peek(length) + ch = self.peek(length) handle = u'!' if use_handle: handle = self.scan_tag_handle('tag', start_mark) else: handle = u'!' - self.reader.forward() + self.forward() suffix = self.scan_tag_uri('tag', start_mark) - ch = self.reader.peek() + ch = self.peek() if ch not in u'\0 \r\n\x85\u2028\u2029': raise ScannerError("while scanning a tag", start_mark, "expected ' ', but found %r" % ch.encode('utf-8'), - self.reader.get_mark()) + self.get_mark()) value = (handle, suffix) - end_mark = self.reader.get_mark() + end_mark = self.get_mark() return TagToken(value, start_mark, end_mark) def scan_block_scalar(self, style): @@ -996,10 +991,10 @@ class Scanner: folded = False chunks = [] - start_mark = self.reader.get_mark() + start_mark = self.get_mark() # Scan the header. - self.reader.forward() + self.forward() chomping, increment = self.scan_block_scalar_indicators(start_mark) self.scan_block_scalar_ignored_line(start_mark) @@ -1016,24 +1011,24 @@ class Scanner: line_break = u'' # Scan the inner part of the block scalar. - while self.reader.column == indent and self.reader.peek() != u'\0': + while self.column == indent and self.peek() != u'\0': chunks.extend(breaks) - leading_non_space = self.reader.peek() not in u' \t' + leading_non_space = self.peek() not in u' \t' length = 0 - while self.reader.peek(length) not in u'\0\r\n\x85\u2028\u2029': + while self.peek(length) not in u'\0\r\n\x85\u2028\u2029': length += 1 - chunks.append(self.reader.prefix(length)) - self.reader.forward(length) + chunks.append(self.prefix(length)) + self.forward(length) line_break = self.scan_line_break() breaks, end_mark = self.scan_block_scalar_breaks(indent) - if self.reader.column == indent and self.reader.peek() != u'\0': + if self.column == indent and self.peek() != u'\0': # Unfortunately, folding rules are ambiguous. # # This is the folding according to the specification: if folded and line_break == u'\n' \ - and leading_non_space and self.reader.peek() not in u' \t': + and leading_non_space and self.peek() not in u' \t': if not breaks: chunks.append(u' ') else: @@ -1044,7 +1039,7 @@ class Scanner: # #if folded and line_break == u'\n': # if not breaks: - # if self.reader.peek() not in ' \t': + # if self.peek() not in ' \t': # chunks.append(u' ') # else: # chunks.append(line_break) @@ -1067,82 +1062,82 @@ class Scanner: # See the specification for details. chomping = None increment = None - ch = self.reader.peek() + ch = self.peek() if ch in u'+-': if ch == '+': chomping = True else: chomping = False - self.reader.forward() - ch = self.reader.peek() + self.forward() + ch = self.peek() if ch in u'0123456789': increment = int(ch) if increment == 0: raise ScannerError("while scanning a block scalar", start_mark, "expected indentation indicator in the range 1-9, but found 0", - self.reader.get_mark()) - self.reader.forward() + self.get_mark()) + self.forward() elif ch in u'0123456789': increment = int(ch) if increment == 0: raise ScannerError("while scanning a block scalar", start_mark, "expected indentation indicator in the range 1-9, but found 0", - self.reader.get_mark()) - self.reader.forward() - ch = self.reader.peek() + self.get_mark()) + self.forward() + ch = self.peek() if ch in u'+-': if ch == '+': chomping = True else: chomping = False - self.reader.forward() - ch = self.reader.peek() + self.forward() + ch = self.peek() if ch not in u'\0 \r\n\x85\u2028\u2029': raise ScannerError("while scanning a block scalar", start_mark, "expected chomping or indentation indicators, but found %r" - % ch.encode('utf-8'), self.reader.get_mark()) + % ch.encode('utf-8'), self.get_mark()) return chomping, increment def scan_block_scalar_ignored_line(self, start_mark): # See the specification for details. - while self.reader.peek() == u' ': - self.reader.forward() - if self.reader.peek() == u'#': - while self.reader.peek() not in u'\0\r\n\x85\u2028\u2029': - self.reader.forward() - ch = self.reader.peek() + while self.peek() == u' ': + self.forward() + if self.peek() == u'#': + while self.peek() not in u'\0\r\n\x85\u2028\u2029': + self.forward() + ch = self.peek() if ch not in u'\0\r\n\x85\u2028\u2029': raise ScannerError("while scanning a block scalar", start_mark, "expected a comment or a line break, but found %r" - % ch.encode('utf-8'), self.reader.get_mark()) + % ch.encode('utf-8'), self.get_mark()) self.scan_line_break() def scan_block_scalar_indentation(self): # See the specification for details. chunks = [] max_indent = 0 - end_mark = self.reader.get_mark() - while self.reader.peek() in u' \r\n\x85\u2028\u2029': - if self.reader.peek() != u' ': + end_mark = self.get_mark() + while self.peek() in u' \r\n\x85\u2028\u2029': + if self.peek() != u' ': chunks.append(self.scan_line_break()) - end_mark = self.reader.get_mark() + end_mark = self.get_mark() else: - self.reader.forward() - if self.reader.column > max_indent: - max_indent = self.reader.column + self.forward() + if self.column > max_indent: + max_indent = self.column return chunks, max_indent, end_mark def scan_block_scalar_breaks(self, indent): # See the specification for details. chunks = [] - end_mark = self.reader.get_mark() - while self.reader.column < indent and self.reader.peek() == u' ': - self.reader.forward() - while self.reader.peek() in u'\r\n\x85\u2028\u2029': + end_mark = self.get_mark() + while self.column < indent and self.peek() == u' ': + self.forward() + while self.peek() in u'\r\n\x85\u2028\u2029': chunks.append(self.scan_line_break()) - end_mark = self.reader.get_mark() - while self.reader.column < indent and self.reader.peek() == u' ': - self.reader.forward() + end_mark = self.get_mark() + while self.column < indent and self.peek() == u' ': + self.forward() return chunks, end_mark def scan_flow_scalar(self, style): @@ -1157,15 +1152,15 @@ class Scanner: else: double = False chunks = [] - start_mark = self.reader.get_mark() - quote = self.reader.peek() - self.reader.forward() + start_mark = self.get_mark() + quote = self.peek() + self.forward() chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) - while self.reader.peek() != quote: + while self.peek() != quote: chunks.extend(self.scan_flow_scalar_spaces(double, start_mark)) chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) - self.reader.forward() - end_mark = self.reader.get_mark() + self.forward() + end_mark = self.get_mark() return ScalarToken(u''.join(chunks), False, start_mark, end_mark, style) @@ -1200,41 +1195,41 @@ class Scanner: chunks = [] while True: length = 0 - while self.reader.peek(length) not in u'\'\"\\\0 \t\r\n\x85\u2028\u2029': + while self.peek(length) not in u'\'\"\\\0 \t\r\n\x85\u2028\u2029': length += 1 if length: - chunks.append(self.reader.prefix(length)) - self.reader.forward(length) - ch = self.reader.peek() - if not double and ch == u'\'' and self.reader.peek(1) == u'\'': + chunks.append(self.prefix(length)) + self.forward(length) + ch = self.peek() + if not double and ch == u'\'' and self.peek(1) == u'\'': chunks.append(u'\'') - self.reader.forward(2) + self.forward(2) elif (double and ch == u'\'') or (not double and ch in u'\"\\'): chunks.append(ch) - self.reader.forward() + self.forward() elif double and ch == u'\\': - self.reader.forward() - ch = self.reader.peek() + self.forward() + ch = self.peek() if ch in self.ESCAPE_REPLACEMENTS: chunks.append(self.ESCAPE_REPLACEMENTS[ch]) - self.reader.forward() + self.forward() elif ch in self.ESCAPE_CODES: length = self.ESCAPE_CODES[ch] - self.reader.forward() + self.forward() for k in range(length): - if self.reader.peek(k) not in u'0123456789ABCDEFabcdef': + if self.peek(k) not in u'0123456789ABCDEFabcdef': raise ScannerError("while scanning a double-quoted scalar", start_mark, "expected escape sequence of %d hexdecimal numbers, but found %r" % - (length, self.reader.peek(k).encode('utf-8')), self.reader.get_mark()) - code = int(self.reader.prefix(length), 16) + (length, self.peek(k).encode('utf-8')), self.get_mark()) + code = int(self.prefix(length), 16) chunks.append(unichr(code)) - self.reader.forward(length) + self.forward(length) elif ch in u'\r\n\x85\u2028\u2029': self.scan_line_break() chunks.extend(self.scan_flow_scalar_breaks(double, start_mark)) else: raise ScannerError("while scanning a double-quoted scalar", start_mark, - "found unknown escape character %r" % ch.encode('utf-8'), self.reader.get_mark()) + "found unknown escape character %r" % ch.encode('utf-8'), self.get_mark()) else: return chunks @@ -1242,14 +1237,14 @@ class Scanner: # See the specification for details. chunks = [] length = 0 - while self.reader.peek(length) in u' \t': + while self.peek(length) in u' \t': length += 1 - whitespaces = self.reader.prefix(length) - self.reader.forward(length) - ch = self.reader.peek() + whitespaces = self.prefix(length) + self.forward(length) + ch = self.peek() if ch == u'\0': raise ScannerError("while scanning a quoted scalar", start_mark, - "found unexpected end of stream", self.reader.get_mark()) + "found unexpected end of stream", self.get_mark()) elif ch in u'\r\n\x85\u2028\u2029': line_break = self.scan_line_break() breaks = self.scan_flow_scalar_breaks(double, start_mark) @@ -1268,14 +1263,14 @@ class Scanner: while True: # Instead of checking indentation, we check for document # separators. - prefix = self.reader.prefix(3) + prefix = self.prefix(3) if (prefix == u'---' or prefix == u'...') \ - and self.reader.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': raise ScannerError("while scanning a quoted scalar", start_mark, - "found unexpected document separator", self.reader.get_mark()) - while self.reader.peek() in u' \t': - self.reader.forward() - if self.reader.peek() in u'\r\n\x85\u2028\u2029': + "found unexpected document separator", self.get_mark()) + while self.peek() in u' \t': + self.forward() + if self.peek() in u'\r\n\x85\u2028\u2029': chunks.append(self.scan_line_break()) else: return chunks @@ -1287,7 +1282,7 @@ class Scanner: # We also keep track of the `allow_simple_key` flag here. # Indentation rules are loosed for the flow context. chunks = [] - start_mark = self.reader.get_mark() + start_mark = self.get_mark() end_mark = start_mark indent = self.indent+1 # We allow zero indentation for scalars, but then we need to check for @@ -1297,13 +1292,13 @@ class Scanner: spaces = [] while True: length = 0 - if self.reader.peek() == u'#': + if self.peek() == u'#': break while True: - ch = self.reader.peek(length) + ch = self.peek(length) if ch in u'\0 \t\r\n\x85\u2028\u2029' \ or (not self.flow_level and ch == u':' and - self.reader.peek(length+1) in u'\0 \t\r\n\x28\u2028\u2029') \ + self.peek(length+1) in u'\0 \t\r\n\x28\u2028\u2029') \ or (self.flow_level and ch in u',:?[]{}'): break length += 1 @@ -1311,12 +1306,12 @@ class Scanner: break self.allow_simple_key = False chunks.extend(spaces) - chunks.append(self.reader.prefix(length)) - self.reader.forward(length) - end_mark = self.reader.get_mark() + chunks.append(self.prefix(length)) + self.forward(length) + end_mark = self.get_mark() spaces = self.scan_plain_spaces(indent, start_mark) - if not spaces or self.reader.peek() == u'#' \ - or (not self.flow_level and self.reader.column < indent): + if not spaces or self.peek() == u'#' \ + or (not self.flow_level and self.column < indent): break return ScalarToken(u''.join(chunks), True, start_mark, end_mark) @@ -1326,27 +1321,27 @@ class Scanner: # We just forbid them completely. Do not use tabs in YAML! chunks = [] length = 0 - while self.reader.peek(length) in u' ': + while self.peek(length) in u' ': length += 1 - whitespaces = self.reader.prefix(length) - self.reader.forward(length) - ch = self.reader.peek() + whitespaces = self.prefix(length) + self.forward(length) + ch = self.peek() if ch in u'\r\n\x85\u2028\u2029': line_break = self.scan_line_break() self.allow_simple_key = True - prefix = self.reader.prefix(3) + prefix = self.prefix(3) if (prefix == u'---' or prefix == u'...') \ - and self.reader.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': return breaks = [] - while self.reader.peek() in u' \r\n\x85\u2028\u2029': - if self.reader.peek() == ' ': - self.reader.forward() + while self.peek() in u' \r\n\x85\u2028\u2029': + if self.peek() == ' ': + self.forward() else: breaks.append(self.scan_line_break()) - prefix = self.reader.prefix(3) + prefix = self.prefix(3) if (prefix == u'---' or prefix == u'...') \ - and self.reader.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': return if line_break != u'\n': chunks.append(line_break) @@ -1361,26 +1356,26 @@ class Scanner: # See the specification for details. # For some strange reasons, the specification does not allow '_' in # tag handles. I have allowed it anyway. - ch = self.reader.peek() + ch = self.peek() if ch != u'!': raise ScannerError("while scanning a %s" % name, start_mark, "expected '!', but found %r" % ch.encode('utf-8'), - self.reader.get_mark()) + self.get_mark()) length = 1 - ch = self.reader.peek(length) + ch = self.peek(length) if ch != u' ': while u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \ or ch in u'-_': length += 1 - ch = self.reader.peek(length) + ch = self.peek(length) if ch != u'!': - self.reader.forward(length) + self.forward(length) raise ScannerError("while scanning a %s" % name, start_mark, "expected '!', but found %r" % ch.encode('utf-8'), - self.reader.get_mark()) + self.get_mark()) length += 1 - value = self.reader.prefix(length) - self.reader.forward(length) + value = self.prefix(length) + self.forward(length) return value def scan_tag_uri(self, name, start_mark): @@ -1388,40 +1383,40 @@ class Scanner: # Note: we do not check if URI is well-formed. chunks = [] length = 0 - ch = self.reader.peek(length) + ch = self.peek(length) while u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \ or ch in u'-;/?:@&=+$,_.!~*\'()[]%': if ch == u'%': - chunks.append(self.reader.prefix(length)) - self.reader.forward(length) + chunks.append(self.prefix(length)) + self.forward(length) length = 0 chunks.append(self.scan_uri_escapes(name, start_mark)) else: length += 1 - ch = self.reader.peek(length) + ch = self.peek(length) if length: - chunks.append(self.reader.prefix(length)) - self.reader.forward(length) + chunks.append(self.prefix(length)) + self.forward(length) length = 0 if not chunks: raise ScannerError("while parsing a %s" % name, start_mark, "expected URI, but found %r" % ch.encode('utf-8'), - self.reader.get_mark()) + self.get_mark()) return u''.join(chunks) def scan_uri_escapes(self, name, start_mark): # See the specification for details. bytes = [] - mark = self.reader.get_mark() - while self.reader.peek() == u'%': - self.reader.forward() + mark = self.get_mark() + while self.peek() == u'%': + self.forward() for k in range(2): - if self.reader.peek(k) not in u'0123456789ABCDEFabcdef': + if self.peek(k) not in u'0123456789ABCDEFabcdef': raise ScannerError("while scanning a %s" % name, start_mark, "expected URI escape sequence of 2 hexdecimal numbers, but found %r" % - (self.reader.peek(k).encode('utf-8')), self.reader.get_mark()) - bytes.append(chr(int(self.reader.prefix(2), 16))) - self.reader.forward(2) + (self.peek(k).encode('utf-8')), self.get_mark()) + bytes.append(chr(int(self.prefix(2), 16))) + self.forward(2) try: value = unicode(''.join(bytes), 'utf-8') except UnicodeDecodeError, exc: @@ -1437,15 +1432,15 @@ class Scanner: # '\u2028' : '\u2028' # '\u2029 : '\u2029' # default : '' - ch = self.reader.peek() + ch = self.peek() if ch in u'\r\n\x85': - if self.reader.prefix(2) == u'\r\n': - self.reader.forward(2) + if self.prefix(2) == u'\r\n': + self.forward(2) else: - self.reader.forward() + self.forward() return u'\n' elif ch in u'\u2028\u2029': - self.reader.forward() + self.forward() return ch return u'' diff --git a/lib/yaml/serializer.py b/lib/yaml/serializer.py index 5807a3a..bd79830 100644 --- a/lib/yaml/serializer.py +++ b/lib/yaml/serializer.py @@ -12,21 +12,21 @@ class Serializer: ANCHOR_TEMPLATE = u'id%03d' - def __init__(self, emitter, encoding=None, line_break=None, canonical=None, - indent=None, width=None, allow_unicode=None): - self.emitter = emitter + def __init__(self, encoding=None, + explicit_start=None, explicit_end=None, version=None, tags=None): + self.use_encoding = encoding + self.use_explicit_start = explicit_start + self.use_explicit_end = explicit_end + self.use_version = version + self.use_tags = tags self.serialized_nodes = {} self.anchors = {} self.last_anchor_id = 0 self.closed = None - self.open(encoding, line_break, canonical, indent, width, allow_unicode) - def open(self, encoding=None, line_break=None, canonical=None, - indent=None, width=None, allow_unicode=None): + def open(self): if self.closed is None: - self.emitter.emit(StreamStartEvent(encoding=encoding, - line_break=line_break, canonical=canonical, - indent=indent, width=width, allow_unicode=allow_unicode)) + self.emit(StreamStartEvent(encoding=self.use_encoding)) self.closed = False elif self.closed: raise SerializerError("serializer is closed") @@ -37,23 +37,22 @@ class Serializer: if self.closed is None: raise SerializerError("serializer is not opened") elif not self.closed: - self.emitter.emit(StreamEndEvent()) + self.emit(StreamEndEvent()) self.closed = True - def __del__(self): - self.close() + #def __del__(self): + # self.close() - def serialize(self, node, explicit_start=None, explicit_end=None, - version=None, tags=None): + def serialize(self, node): if self.closed is None: raise SerializerError("serializer is not opened") elif self.closed: raise SerializerError("serializer is closed") - self.emitter.emit(DocumentStartEvent(explicit=explicit_start, - version=version, tags=tags)) + self.emit(DocumentStartEvent(explicit=self.use_explicit_start, + version=self.use_version, tags=self.use_tags)) self.anchor_node(node) self.serialize_node(node) - self.emitter.emit(DocumentEndEvent(explicit=explicit_end)) + self.emit(DocumentEndEvent(explicit=self.use_explicit_end)) self.serialized_nodes = {} self.anchors = {} self.last_alias_id = 0 @@ -79,23 +78,42 @@ class Serializer: def serialize_node(self, node): alias = self.anchors[node] if node in self.serialized_nodes: - self.emitter.emit(AliasEvent(alias)) + self.emit(AliasEvent(alias)) else: self.serialized_nodes[node] = True if isinstance(node, ScalarNode): - self.emitter.emit(ScalarEvent(alias, node.tag, node.value, - implicit=node.implicit, style=node.style)) + detected_tag = self.detect(node.value) + implicit = (node.tag == self.detect(node.value) + or (node.tag == self.DEFAULT_SCALAR_TAG + and detected_tag is None)) + self.emit(ScalarEvent(alias, node.tag, implicit, node.value, + style=node.style)) elif isinstance(node, SequenceNode): - self.emitter.emit(SequenceStartEvent(alias, node.tag, + # TODO: + # 1) Check the current path in the Resolver. + # 2) Add the implicit flag to the SequenceStartEvent and + # MappingStartEvent. + tag = node.tag + if tag == self.DEFAULT_SEQUENCE_TAG and not self.canonical: + tag = None + self.emit(SequenceStartEvent(alias, tag, flow_style=node.flow_style)) for item in node.value: self.serialize_node(item) - self.emitter.emit(SequenceEndEvent()) + self.emit(SequenceEndEvent()) elif isinstance(node, MappingNode): - self.emitter.emit(MappingStartEvent(alias, node.tag, + tag = node.tag + if tag == self.DEFAULT_MAPPING_TAG and not self.canonical: + tag = None + self.emit(MappingStartEvent(alias, tag, flow_style=node.flow_style)) - for key in node.value: - self.serialize_node(key) - self.serialize_node(node.value[key]) - self.emitter.emit(MappingEndEvent()) + if hasattr(node.value, 'keys'): + for key in node.value.keys(): + self.serialize_node(key) + self.serialize_node(node.value[key]) + else: + for key, value in node.value: + self.serialize_node(key) + self.serialize_node(value) + self.emit(MappingEndEvent()) diff --git a/lib/yaml/yaml_object.py b/lib/yaml/yaml_object.py deleted file mode 100644 index b66bd1a..0000000 --- a/lib/yaml/yaml_object.py +++ /dev/null @@ -1,34 +0,0 @@ - -__all__ = ['YAMLObject', 'YAMLObjectMetaclass'] - -from constructor import * -from representer import * - -class YAMLObjectMetaclass(type): - - def __init__(cls, name, bases, kwds): - super(YAMLObjectMetaclass, cls).__init__(name, bases, kwds) - if 'yaml_tag' in kwds and kwds['yaml_tag'] is not None: - cls.yaml_constructor.add_constructor(cls.yaml_tag, cls.from_yaml) - cls.yaml_representer.add_representer(cls, cls.to_yaml) - -class YAMLObject(object): - - __metaclass__ = YAMLObjectMetaclass - - yaml_constructor = Constructor - yaml_representer = Representer - - yaml_tag = None - - def from_yaml(cls, constructor, node): - raise ConstructorError(None, None, - "found undefined constructor for the tag %r" - % node.tag.encode('utf-8'), node.start_mark) - from_yaml = classmethod(from_yaml) - - def to_yaml(cls, representer, native): - raise RepresenterError( - "found undefined representer for the object: %s" % native) - to_yaml = classmethod(to_yaml) - diff --git a/tests/data/construct-custom.code b/tests/data/construct-custom.code index bcc283f..2d5f063 100644 --- a/tests/data/construct-custom.code +++ b/tests/data/construct-custom.code @@ -6,4 +6,5 @@ MyTestClass3(x=1), MyTestClass3(x=1, y=2, z=3), MyTestClass3(x=1, y=2, z=3), + YAMLObject1(my_parameter='foo', my_another_parameter=[1,2,3]) ] diff --git a/tests/data/construct-custom.data b/tests/data/construct-custom.data index 053d028..9db0f64 100644 --- a/tests/data/construct-custom.data +++ b/tests/data/construct-custom.data @@ -21,3 +21,6 @@ =: 1 'y': 2 z: 3 +- !foo + my-parameter: foo + my-another-parameter: [1,2,3] diff --git a/tests/data/spec-08-07.canonical b/tests/data/spec-08-07.canonical index eda1adf..e2f43d9 100644 --- a/tests/data/spec-08-07.canonical +++ b/tests/data/spec-08-07.canonical @@ -3,5 +3,6 @@ !!seq [ !<tag:yaml.org,2002:str> "12", !<tag:yaml.org,2002:int> "12", - !<tag:yaml.org,2002:str> "12", +# !<tag:yaml.org,2002:str> "12", + !<tag:yaml.org,2002:int> "12", ] diff --git a/tests/data/tags.events b/tests/data/tags.events index 4271d47..bb93dce 100644 --- a/tests/data/tags.events +++ b/tests/data/tags.events @@ -2,7 +2,7 @@ - !DocumentStart - !SequenceStart - !Scalar { value: 'data' } -- !Scalar { tag: '!', value: 'data' } +#- !Scalar { tag: '!', value: 'data' } - !Scalar { tag: 'tag:yaml.org,2002:str', value: 'data' } - !Scalar { tag: '!myfunnytag', value: 'data' } - !Scalar { tag: '!my!ugly!tag', value: 'data' } diff --git a/tests/test_appliance.py b/tests/test_appliance.py index 010f06f..bf907f3 100644 --- a/tests/test_appliance.py +++ b/tests/test_appliance.py @@ -1,8 +1,7 @@ import unittest, os -from yaml.tokens import * -from yaml.events import * +from yaml import * class TestAppliance(unittest.TestCase): @@ -43,52 +42,74 @@ class CanonicalScanner: def __init__(self, data): self.data = unicode(data, 'utf-8')+u'\0' self.index = 0 + self.scan() + + def check_token(self, *choices): + if self.tokens: + if not choices: + return True + for choice in choices: + if isinstance(self.tokens[0], choice): + return True + return False + + def peek_token(self): + if self.tokens: + return self.tokens[0] + + def get_token(self, choice=None): + token = self.tokens.pop(0) + if choice and not isinstance(token, choice): + raise Error("unexpected token "+repr(token)) + return token + + def get_token_value(self): + token = self.get_token() + return token.value def scan(self): - #print self.data[self.index:] - tokens = [] - tokens.append(StreamStartToken(None, None)) + self.tokens = [] + self.tokens.append(StreamStartToken(None, None)) while True: self.find_token() ch = self.data[self.index] if ch == u'\0': - tokens.append(StreamEndToken(None, None)) + self.tokens.append(StreamEndToken(None, None)) break elif ch == u'%': - tokens.append(self.scan_directive()) + self.tokens.append(self.scan_directive()) elif ch == u'-' and self.data[self.index:self.index+3] == u'---': self.index += 3 - tokens.append(DocumentStartToken(None, None)) + self.tokens.append(DocumentStartToken(None, None)) elif ch == u'[': self.index += 1 - tokens.append(FlowSequenceStartToken(None, None)) + self.tokens.append(FlowSequenceStartToken(None, None)) elif ch == u'{': self.index += 1 - tokens.append(FlowMappingStartToken(None, None)) + self.tokens.append(FlowMappingStartToken(None, None)) elif ch == u']': self.index += 1 - tokens.append(FlowSequenceEndToken(None, None)) + self.tokens.append(FlowSequenceEndToken(None, None)) elif ch == u'}': self.index += 1 - tokens.append(FlowMappingEndToken(None, None)) + self.tokens.append(FlowMappingEndToken(None, None)) elif ch == u'?': self.index += 1 - tokens.append(KeyToken(None, None)) + self.tokens.append(KeyToken(None, None)) elif ch == u':': self.index += 1 - tokens.append(ValueToken(None, None)) + self.tokens.append(ValueToken(None, None)) elif ch == u',': self.index += 1 - tokens.append(FlowEntryToken(None, None)) + self.tokens.append(FlowEntryToken(None, None)) elif ch == u'*' or ch == u'&': - tokens.append(self.scan_alias()) + self.tokens.append(self.scan_alias()) elif ch == u'!': - tokens.append(self.scan_tag()) + self.tokens.append(self.scan_tag()) elif ch == u'"': - tokens.append(self.scan_scalar()) + self.tokens.append(self.scan_scalar()) else: raise Error("invalid token") - return tokens DIRECTIVE = u'%YAML 1.1' @@ -203,49 +224,49 @@ class CanonicalScanner: class CanonicalParser: - def __init__(self, data): - self.scanner = CanonicalScanner(data) + def __init__(self): self.events = [] + self.parse() # stream: STREAM-START document* STREAM-END def parse_stream(self): - self.consume_token(StreamStartToken) + self.get_token(StreamStartToken) self.events.append(StreamStartEvent(None, None)) - while not self.test_token(StreamEndToken): - if self.test_token(DirectiveToken, DocumentStartToken): + while not self.check_token(StreamEndToken): + if self.check_token(DirectiveToken, DocumentStartToken): self.parse_document() else: raise Error("document is expected, got "+repr(self.tokens[self.index])) - self.consume_token(StreamEndToken) + self.get_token(StreamEndToken) self.events.append(StreamEndEvent(None, None)) # document: DIRECTIVE? DOCUMENT-START node def parse_document(self): node = None - if self.test_token(DirectiveToken): - self.consume_token(DirectiveToken) - self.consume_token(DocumentStartToken) + if self.check_token(DirectiveToken): + self.get_token(DirectiveToken) + self.get_token(DocumentStartToken) self.events.append(DocumentStartEvent(None, None)) self.parse_node() self.events.append(DocumentEndEvent(None, None)) # node: ALIAS | ANCHOR? TAG? (SCALAR|sequence|mapping) def parse_node(self): - if self.test_token(AliasToken): - self.events.append(AliasEvent(self.get_value(), None, None)) + if self.check_token(AliasToken): + self.events.append(AliasEvent(self.get_token_value(), None, None)) else: anchor = None - if self.test_token(AnchorToken): - anchor = self.get_value() + if self.check_token(AnchorToken): + anchor = self.get_token_value() tag = None - if self.test_token(TagToken): - tag = self.get_value() - if self.test_token(ScalarToken): - self.events.append(ScalarEvent(anchor, tag, self.get_value(), None, None)) - elif self.test_token(FlowSequenceStartToken): + if self.check_token(TagToken): + tag = self.get_token_value() + if self.check_token(ScalarToken): + self.events.append(ScalarEvent(anchor, tag, False, self.get_token_value(), None, None)) + elif self.check_token(FlowSequenceStartToken): self.events.append(SequenceStartEvent(anchor, tag, None, None)) self.parse_sequence() - elif self.test_token(FlowMappingStartToken): + elif self.check_token(FlowMappingStartToken): self.events.append(MappingStartEvent(anchor, tag, None, None)) self.parse_mapping() else: @@ -253,66 +274,79 @@ class CanonicalParser: # sequence: SEQUENCE-START (node (ENTRY node)*)? ENTRY? SEQUENCE-END def parse_sequence(self): - self.consume_token(FlowSequenceStartToken) - if not self.test_token(FlowSequenceEndToken): + self.get_token(FlowSequenceStartToken) + if not self.check_token(FlowSequenceEndToken): self.parse_node() - while not self.test_token(FlowSequenceEndToken): - self.consume_token(FlowEntryToken) - if not self.test_token(FlowSequenceEndToken): + while not self.check_token(FlowSequenceEndToken): + self.get_token(FlowEntryToken) + if not self.check_token(FlowSequenceEndToken): self.parse_node() - self.consume_token(FlowSequenceEndToken) + self.get_token(FlowSequenceEndToken) self.events.append(SequenceEndEvent(None, None)) # mapping: MAPPING-START (map_entry (ENTRY map_entry)*)? ENTRY? MAPPING-END def parse_mapping(self): - self.consume_token(FlowMappingStartToken) - if not self.test_token(FlowMappingEndToken): + self.get_token(FlowMappingStartToken) + if not self.check_token(FlowMappingEndToken): self.parse_map_entry() - while not self.test_token(FlowMappingEndToken): - self.consume_token(FlowEntryToken) - if not self.test_token(FlowMappingEndToken): + while not self.check_token(FlowMappingEndToken): + self.get_token(FlowEntryToken) + if not self.check_token(FlowMappingEndToken): self.parse_map_entry() - self.consume_token(FlowMappingEndToken) + self.get_token(FlowMappingEndToken) self.events.append(MappingEndEvent(None, None)) # map_entry: KEY node VALUE node def parse_map_entry(self): - self.consume_token(KeyToken) + self.get_token(KeyToken) self.parse_node() - self.consume_token(ValueToken) + self.get_token(ValueToken) self.parse_node() - def test_token(self, *choices): - for choice in choices: - if isinstance(self.tokens[self.index], choice): - return True - return False - - def consume_token(self, cls): - if not isinstance(self.tokens[self.index], cls): - raise Error("unexpected token "+repr(self.tokens[self.index])) - self.index += 1 - - def get_value(self): - value = self.tokens[self.index].value - self.index += 1 - return value - def parse(self): - self.tokens = self.scanner.scan() - self.index = 0 self.parse_stream() - return self.events - def get(self): + def get_event(self): return self.events.pop(0) - def check(self, *choices): - for choice in choices: - if isinstance(self.events[0], choice): + def check_event(self, *choices): + if self.events: + if not choices: return True + for choice in choices: + if isinstance(self.events[0], choice): + return True return False - def peek(self): + def peek_event(self): return self.events[0] +class CanonicalLoader(CanonicalScanner, CanonicalParser, Composer, Constructor, Detector): + + def __init__(self, stream): + if hasattr(stream, 'read'): + stream = stream.read() + CanonicalScanner.__init__(self, stream) + CanonicalParser.__init__(self) + Composer.__init__(self) + Constructor.__init__(self) + Detector.__init__(self) + +def canonical_scan(stream): + return scan(stream, Loader=CanonicalLoader) + +def canonical_parse(stream): + return parse(stream, Loader=CanonicalLoader) + +def canonical_compose(stream): + return compose(stream, Loader=CanonicalLoader) + +def canonical_compose_all(stream): + return compose_all(stream, Loader=CanonicalLoader) + +def canonical_load(stream): + return load(stream, Loader=CanonicalLoader) + +def canonical_load_all(stream): + return load_all(stream, Loader=CanonicalLoader) + diff --git a/tests/test_canonical.py b/tests/test_canonical.py index 7fa85dc..4416902 100644 --- a/tests/test_canonical.py +++ b/tests/test_canonical.py @@ -5,15 +5,13 @@ class TestCanonicalAppliance(test_appliance.TestAppliance): def _testCanonicalScanner(self, test_name, canonical_filename): data = file(canonical_filename, 'rb').read() - scanner = test_appliance.CanonicalScanner(data) - tokens = scanner.scan() + tokens = list(test_appliance.canonical_scan(data)) #for token in tokens: # print token def _testCanonicalParser(self, test_name, canonical_filename): data = file(canonical_filename, 'rb').read() - parser = test_appliance.CanonicalParser(data) - events = parser.parse() + event = list(test_appliance.canonical_parse(data)) #for event in events: # print event diff --git a/tests/test_constructor.py b/tests/test_constructor.py index 794d3e4..1c666f1 100644 --- a/tests/test_constructor.py +++ b/tests/test_constructor.py @@ -11,7 +11,7 @@ except NameError: from yaml import * -class MyConstructor(Constructor): +class MyLoader(Loader): pass class MyTestClass1: @@ -28,11 +28,11 @@ def construct1(constructor, node): mapping = constructor.construct_mapping(node) return MyTestClass1(**mapping) -MyConstructor.add_constructor("!tag1", construct1) +MyLoader.add_constructor("!tag1", construct1) class MyTestClass2(MyTestClass1, YAMLObject): - yaml_constructor = MyConstructor + yaml_loader = MyLoader yaml_tag = "!tag2" def from_yaml(cls, constructor, node): @@ -53,28 +53,41 @@ class MyTestClass3(MyTestClass2): return cls(**mapping) from_yaml = classmethod(from_yaml) +class YAMLObject1(YAMLObject): + yaml_loader = MyLoader + yaml_tag = '!foo' + + def __init__(self, my_parameter=None, my_another_parameter=None): + self.my_parameter = my_parameter + self.my_another_parameter = my_another_parameter + + def __eq__(self, other): + if isinstance(other, YAMLObject1): + return self.__class__, self.__dict__ == other.__class__, other.__dict__ + else: + return False + class TestTypes(test_appliance.TestAppliance): def _testTypes(self, test_name, data_filename, code_filename): - natives1 = None - natives2 = None + data1 = None + data2 = None try: - constructor1 = MyConstructor(Resolver(Composer(Parser(Scanner(Reader(file(data_filename, 'rb'))))))) - natives1 = list(iter(constructor1)) - if len(natives1) == 1: - natives1 = natives1[0] - natives2 = eval(file(code_filename, 'rb').read()) + data1 = list(load_all(file(data_filename, 'rb'), Loader=MyLoader)) + if len(data1) == 1: + data1 = data1[0] + data2 = eval(file(code_filename, 'rb').read()) try: - self.failUnlessEqual(natives1, natives2) + self.failUnlessEqual(data1, data2) except AssertionError: - if isinstance(natives1, dict): - natives1 = natives1.items() - natives1.sort() - natives1 = repr(natives1) - natives2 = natives2.items() - natives2.sort() - natives2 = repr(natives2) - if natives1 != natives2: + if isinstance(data1, dict): + data1 = data1.items() + data1.sort() + data1 = repr(data1) + data2 = data2.items() + data2.sort() + data2 = repr(data2) + if data1 != data2: raise except: print @@ -82,8 +95,8 @@ class TestTypes(test_appliance.TestAppliance): print file(data_filename, 'rb').read() print "CODE:" print file(code_filename, 'rb').read() - print "NATIVES1:", natives1 - print "NATIVES2:", natives2 + print "NATIVES1:", data1 + print "NATIVES2:", data2 raise TestTypes.add_tests('testTypes', '.data', '.code') diff --git a/tests/test_detector.py b/tests/test_detector.py index 491929d..661b24b 100644 --- a/tests/test_detector.py +++ b/tests/test_detector.py @@ -1,12 +1,7 @@ import test_appliance -from yaml.reader import Reader -from yaml.scanner import Scanner -from yaml.parser import * -from yaml.composer import * -from yaml.resolver import * -from yaml.nodes import * +from yaml import * class TestDetector(test_appliance.TestAppliance): @@ -15,8 +10,7 @@ class TestDetector(test_appliance.TestAppliance): correct_tag = None try: correct_tag = file(detect_filename, 'rb').read().strip() - resolver = Resolver(Composer(Parser(Scanner(Reader(file(data_filename, 'rb')))))) - node = list(iter(resolver))[0] + node = compose(file(data_filename, 'rb')) self.failUnless(isinstance(node, SequenceNode)) for scalar in node.value: self.failUnless(isinstance(scalar, ScalarNode)) @@ -33,4 +27,3 @@ class TestDetector(test_appliance.TestAppliance): TestDetector.add_tests('testDetector', '.data', '.detect') - diff --git a/tests/test_emitter.py b/tests/test_emitter.py index eb0910d..0480924 100644 --- a/tests/test_emitter.py +++ b/tests/test_emitter.py @@ -16,15 +16,11 @@ class TestEmitter(test_appliance.TestAppliance): self._testEmitter(test_name, canonical_filename, True) def _testEmitter(self, test_name, filename, canonical=None): - events = list(iter(Parser(Scanner(Reader(file(filename, 'rb')))))) - if canonical is not None: - events[0].canonical = canonical - #self._dump(filename, events) - writer = StringIO.StringIO() - emitter = Emitter(writer) - for event in events: - emitter.emit(event) - data = writer.getvalue() + events = list(parse(file(filename, 'rb'))) + #self._dump(filename, events, canonical) + stream = StringIO.StringIO() + emit(events, stream, canonical=canonical) + data = stream.getvalue() new_events = list(parse(data)) for event, new_event in zip(events, new_events): self.failUnlessEqual(event.__class__, new_event.__class__) @@ -38,22 +34,19 @@ class TestEmitter(test_appliance.TestAppliance): self.failUnlessEqual(event.tag, new_event.tag) self.failUnlessEqual(event.value, new_event.value) - def _dump(self, filename, events): - writer = sys.stdout - emitter = Emitter(writer) + def _dump(self, filename, events, canonical): print "="*30 print "ORIGINAL DOCUMENT:" print file(filename, 'rb').read() print '-'*30 print "EMITTED DOCUMENT:" - for event in events: - emitter.emit(event) + emit(events, sys.stdout, canonical=canonical) TestEmitter.add_tests('testEmitterOnData', '.canonical', '.data') -#TestEmitter.add_tests('testEmitterOnCanonicalNormally', '.canonical') -#TestEmitter.add_tests('testEmitterOnCanonicalCanonically', '.canonical') +TestEmitter.add_tests('testEmitterOnCanonicalNormally', '.canonical') +TestEmitter.add_tests('testEmitterOnCanonicalCanonically', '.canonical') -class EventsConstructor(Constructor): +class EventsLoader(Loader): def construct_event(self, node): if isinstance(node, ScalarNode): @@ -66,22 +59,21 @@ class EventsConstructor(Constructor): if class_name in ['ScalarEvent', 'SequenceStartEvent', 'MappingStartEvent']: mapping.setdefault('tag', None) if class_name == 'ScalarEvent': + mapping.setdefault('implicit', False) mapping.setdefault('value', '') value = getattr(yaml, class_name)(**mapping) return value -EventsConstructor.add_constructor(None, EventsConstructor.construct_event) +EventsLoader.add_constructor(None, EventsLoader.construct_event) class TestEmitterEvents(test_appliance.TestAppliance): def _testEmitterEvents(self, test_name, events_filename): - events = list(load(file(events_filename, 'rb'), Constructor=EventsConstructor)) + events = list(load(file(events_filename, 'rb'), Loader=EventsLoader)) #self._dump(events_filename, events) - writer = StringIO.StringIO() - emitter = Emitter(writer) - for event in events: - emitter.emit(event) - data = writer.getvalue() + stream = StringIO.StringIO() + emit(events, stream) + data = stream.getvalue() new_events = list(parse(data)) self.failUnlessEqual(len(events), len(new_events)) for event, new_event in zip(events, new_events): @@ -96,15 +88,12 @@ class TestEmitterEvents(test_appliance.TestAppliance): self.failUnlessEqual(event.value, new_event.value) def _dump(self, events_filename, events): - writer = sys.stdout - emitter = Emitter(writer) print "="*30 print "EVENTS:" print file(events_filename, 'rb').read() print '-'*30 print "OUTPUT:" - for event in events: - emitter.emit(event) + emit(events, sys.stdout) TestEmitterEvents.add_tests('testEmitterEvents', '.events') diff --git a/tests/test_errors.py b/tests/test_errors.py index 626b4e4..898c5a1 100644 --- a/tests/test_errors.py +++ b/tests/test_errors.py @@ -15,13 +15,7 @@ class TestErrors(test_appliance.TestAppliance): def _load(self, filename): try: - reader = Reader(file(filename, 'rb')) - scanner = Scanner(reader) - parser = Parser(scanner) - composer = Composer(parser) - resolver = Resolver(composer) - constructor = Constructor(resolver) - return list(constructor) + return list(load_all(file(filename, 'rb'))) except YAMLError, exc: #except ScannerError, exc: #except ParserError, exc: @@ -33,13 +27,7 @@ class TestErrors(test_appliance.TestAppliance): def _load_string(self, filename): try: - reader = Reader(file(filename, 'rb').read()) - scanner = Scanner(reader) - parser = Parser(scanner) - composer = Composer(parser) - resolver = Resolver(composer) - constructor = Constructor(resolver) - return list(constructor) + return list(load_all(file(filename, 'rb').read())) except YAMLError, exc: #except ScannerError, exc: #except ParserError, exc: diff --git a/tests/test_representer.py b/tests/test_representer.py index ffee7aa..82ad46e 100644 --- a/tests/test_representer.py +++ b/tests/test_representer.py @@ -12,9 +12,9 @@ except NameError: from yaml import * -class MyConstructor(Constructor): +class MyLoader(Loader): pass -class MyRepresenter(Representer): +class MyDumper(Dumper): pass class MyTestClass1(object): @@ -38,7 +38,8 @@ def represent1(representer, native): class MyTestClass2(MyTestClass1, YAMLObject): - yaml_constructor = MyConstructor + yaml_loader = MyLoader + yaml_dumper = MyDumper yaml_tag = "!tag2" def from_yaml(cls, constructor, node): @@ -67,36 +68,52 @@ class MyTestClass3(MyTestClass2): return representer.represent_mapping(cls.yaml_tag, native.__dict__) to_yaml = classmethod(to_yaml) -MyConstructor.add_constructor("!tag1", construct1) -MyRepresenter.add_representer(MyTestClass1, represent1) +MyLoader.add_constructor("!tag1", construct1) +MyDumper.add_representer(MyTestClass1, represent1) + +class YAMLObject1(YAMLObject): + yaml_loader = MyLoader + yaml_dumper = MyDumper + yaml_tag = '!foo' + yaml_flow_style = True + + def __init__(self, my_parameter=None, my_another_parameter=None): + self.my_parameter = my_parameter + self.my_another_parameter = my_another_parameter + + def __eq__(self, other): + if isinstance(other, YAMLObject1): + return self.__class__, self.__dict__ == other.__class__, other.__dict__ + else: + return False class TestTypeRepresenter(test_appliance.TestAppliance): def _testTypes(self, test_name, data_filename, code_filename): - natives1 = eval(file(code_filename, 'rb').read()) - natives2 = None + data1 = eval(file(code_filename, 'rb').read()) + data2 = None output = None try: - output = dump(natives1, Representer=MyRepresenter) - natives2 = load(output, Constructor=MyConstructor) + output = dump(data1, Dumper=MyDumper) + data2 = load(output, Loader=MyLoader) try: - self.failUnlessEqual(natives1, natives2) + self.failUnlessEqual(data1, data2) except AssertionError: - if isinstance(natives1, dict): - natives1 = natives1.items() - natives1.sort() - natives1 = repr(natives1) - natives2 = natives2.items() - natives2.sort() - natives2 = repr(natives2) - if natives1 != natives2: + if isinstance(data1, dict): + data1 = data1.items() + data1.sort() + data1 = repr(data1) + data2 = data2.items() + data2.sort() + data2 = repr(data2) + if data1 != data2: raise except: print print "OUTPUT:" print output - print "NATIVES1:", natives1 - print "NATIVES2:", natives2 + print "NATIVES1:", data1 + print "NATIVES2:", data2 raise TestTypeRepresenter.add_tests('testTypes', '.data', '.code') diff --git a/tests/test_structure.py b/tests/test_structure.py index cb9a017..b1fd91c 100644 --- a/tests/test_structure.py +++ b/tests/test_structure.py @@ -9,14 +9,14 @@ class TestStructure(test_appliance.TestAppliance): node1 = None node2 = eval(file(structure_filename, 'rb').read()) try: - parser = Parser(Scanner(Reader(file(data_filename, 'rb')))) + loader = Loader(file(data_filename, 'rb')) node1 = [] - while not parser.check(StreamEndEvent): - if not parser.check(StreamStartEvent, DocumentStartEvent, DocumentEndEvent): - node1.append(self._convert(parser)) + while not loader.check_event(StreamEndEvent): + if not loader.check_event(StreamStartEvent, DocumentStartEvent, DocumentEndEvent): + node1.append(self._convert(loader)) else: - parser.get() - parser.get() + loader.get_event() + loader.get_event() if len(node1) == 1: node1 = node1[0] self.failUnlessEqual(node1, node2) @@ -28,34 +28,34 @@ class TestStructure(test_appliance.TestAppliance): print "NODE2:", node2 raise - def _convert(self, parser): - if parser.check(ScalarEvent): - event = parser.get() + def _convert(self, loader): + if loader.check_event(ScalarEvent): + event = loader.get_event() if event.tag or event.anchor or event.value: return True else: return None - elif parser.check(SequenceStartEvent): - parser.get() + elif loader.check_event(SequenceStartEvent): + loader.get_event() sequence = [] - while not parser.check(SequenceEndEvent): - sequence.append(self._convert(parser)) - parser.get() + while not loader.check_event(SequenceEndEvent): + sequence.append(self._convert(loader)) + loader.get_event() return sequence - elif parser.check(MappingStartEvent): - parser.get() + elif loader.check_event(MappingStartEvent): + loader.get_event() mapping = [] - while not parser.check(MappingEndEvent): - key = self._convert(parser) - value = self._convert(parser) + while not loader.check_event(MappingEndEvent): + key = self._convert(loader) + value = self._convert(loader) mapping.append((key, value)) - parser.get() + loader.get_event() return mapping - elif parser.check(AliasEvent): - parser.get() + elif loader.check_event(AliasEvent): + loader.get_event() return '*' else: - parser.get() + loader.get_event() return '?' TestStructure.add_tests('testStructure', '.data', '.structure') @@ -66,10 +66,8 @@ class TestParser(test_appliance.TestAppliance): events1 = None events2 = None try: - parser = Parser(Scanner(Reader(file(data_filename, 'rb')))) - events1 = list(iter(parser)) - canonical = test_appliance.CanonicalParser(file(canonical_filename, 'rb').read()) - events2 = canonical.parse() + events1 = list(parse(file(data_filename, 'rb'))) + events2 = list(test_appliance.canonical_parse(file(canonical_filename, 'rb'))) self._compare(events1, events2) except: print @@ -105,12 +103,8 @@ class TestResolver(test_appliance.TestAppliance): nodes1 = None nodes2 = None try: - resolver1 = Resolver(Composer(Parser(Scanner(Reader(file(data_filename, 'rb')))))) - nodes1 = list(iter(resolver1)) - canonical = test_appliance.CanonicalParser(file(canonical_filename, 'rb').read()) - canonical.parse() - resolver2 = Resolver(Composer(canonical)) - nodes2 = list(iter(resolver2)) + nodes1 = list(compose_all(file(data_filename, 'rb'))) + nodes2 = list(test_appliance.canonical_compose_all(file(canonical_filename, 'rb'))) self.failUnlessEqual(len(nodes1), len(nodes2)) for node1, node2 in zip(nodes1, nodes2): self._compare(node1, node2) @@ -147,7 +141,7 @@ class TestResolver(test_appliance.TestAppliance): TestResolver.add_tests('testResolver', '.data', '.canonical') -class MyConstructor(Constructor): +class MyConstructor: def construct_sequence(self, node): return tuple(Constructor.construct_sequence(self, node)) @@ -157,29 +151,34 @@ class MyConstructor(Constructor): pairs.sort() return pairs -MyConstructor.add_constructor(None, MyConstructor.construct_scalar) + def construct_undefined(self, node): + return self.construct_scalar(node) + +class MyLoader(MyConstructor, Loader): + pass +MyLoader.add_constructor(None, MyLoader.construct_undefined) + +class MyCanonicalLoader(MyConstructor, test_appliance.CanonicalLoader): + pass +MyCanonicalLoader.add_constructor(None, MyCanonicalLoader.construct_undefined) class TestConstructor(test_appliance.TestAppliance): def _testConstructor(self, test_name, data_filename, canonical_filename): - natives1 = None - natives2 = None + data1 = None + data2 = None try: - constructor1 = MyConstructor(Resolver(Composer(Parser(Scanner(Reader(file(data_filename, 'rb'))))))) - natives1 = list(iter(constructor1)) - canonical = test_appliance.CanonicalParser(file(canonical_filename, 'rb').read()) - canonical.parse() - constructor2 = MyConstructor(Resolver(Composer(canonical))) - natives2 = list(iter(constructor2)) - self.failUnlessEqual(natives1, natives2) + data1 = list(load_all(file(data_filename, 'rb'), Loader=MyLoader)) + data2 = list(load_all(file(canonical_filename, 'rb'), Loader=MyCanonicalLoader)) + self.failUnlessEqual(data1, data2) except: print print "DATA1:" print file(data_filename, 'rb').read() print "DATA2:" print file(canonical_filename, 'rb').read() - print "NATIVES1:", natives1 - print "NATIVES2:", natives2 + print "NATIVES1:", data1 + print "NATIVES2:", data2 raise TestConstructor.add_tests('testConstructor', '.data', '.canonical') @@ -190,10 +189,8 @@ class TestParserOnCanonical(test_appliance.TestAppliance): events1 = None events2 = None try: - parser = Parser(Scanner(Reader(file(canonical_filename, 'rb')))) - events1 = list(iter(parser)) - canonical = test_appliance.CanonicalParser(file(canonical_filename, 'rb').read()) - events2 = canonical.parse() + events1 = list(parse(file(canonical_filename, 'rb'))) + events2 = list(test_appliance.canonical_parse(file(canonical_filename, 'rb'))) self._compare(events1, events2) except: print diff --git a/tests/test_tokens.py b/tests/test_tokens.py index 38026d5..73d07b3 100644 --- a/tests/test_tokens.py +++ b/tests/test_tokens.py @@ -1,9 +1,7 @@ import test_appliance -from yaml.reader import * -from yaml.tokens import * -from yaml.scanner import * +from yaml import * class TestTokens(test_appliance.TestAppliance): @@ -51,9 +49,8 @@ class TestTokens(test_appliance.TestAppliance): tokens1 = None tokens2 = file(tokens_filename, 'rb').read().split() try: - scanner = Scanner(Reader(file(data_filename, 'rb'))) tokens1 = [] - for token in scanner: + for token in scan(file(data_filename, 'rb')): if not isinstance(token, (StreamStartToken, StreamEndToken)): tokens1.append(token) tokens1 = [self.replaces[t.__class__] for t in tokens1] @@ -74,9 +71,8 @@ class TestScanner(test_appliance.TestAppliance): for filename in [canonical_filename, data_filename]: tokens = None try: - scanner = Scanner(Reader(file(filename, 'rb'))) tokens = [] - for token in scanner: + for token in scan(file(filename, 'rb')): if not isinstance(token, (StreamStartToken, StreamEndToken)): tokens.append(token.__class__.__name__) except: |