diff options
author | xi <xi@18f92427-320e-0410-9341-c67f048884a3> | 2006-07-07 19:39:29 +0000 |
---|---|---|
committer | xi <xi@18f92427-320e-0410-9341-c67f048884a3> | 2006-07-07 19:39:29 +0000 |
commit | 7204fe315a26c04f467a17d7ec0cac79da435882 (patch) | |
tree | 031452296080b82b72d20666c4098139a486d948 | |
parent | b229670c6969be4a6f01de487138e54288d10f5a (diff) | |
download | pyyaml-7204fe315a26c04f467a17d7ec0cac79da435882.tar.gz |
Update PyRex based bindings to libyaml to include Parser functions.
git-svn-id: http://svn.pyyaml.org/pyyaml/trunk@205 18f92427-320e-0410-9341-c67f048884a3
-rw-r--r-- | ext/_yaml.h | 2 | ||||
-rw-r--r-- | ext/_yaml.pxd | 75 | ||||
-rw-r--r-- | ext/_yaml.pyx | 222 | ||||
-rw-r--r-- | tests/test_yaml_ext.py | 32 |
4 files changed, 315 insertions, 16 deletions
diff --git a/ext/_yaml.h b/ext/_yaml.h index 94068ef..d8070e5 100644 --- a/ext/_yaml.h +++ b/ext/_yaml.h @@ -1,3 +1,3 @@ -#include <yaml/yaml.h> +#include <yaml.h> diff --git a/ext/_yaml.pxd b/ext/_yaml.pxd index bc9af37..79823b4 100644 --- a/ext/_yaml.pxd +++ b/ext/_yaml.pxd @@ -57,6 +57,17 @@ cdef extern from "_yaml.h": YAML_ANCHOR_TOKEN YAML_TAG_TOKEN YAML_SCALAR_TOKEN + cdef enum yaml_event_type_t: + YAML_STREAM_START_EVENT + YAML_STREAM_END_EVENT + YAML_DOCUMENT_START_EVENT + YAML_DOCUMENT_END_EVENT + YAML_ALIAS_EVENT + YAML_SCALAR_EVENT + YAML_SEQUENCE_START_EVENT + YAML_SEQUENCE_END_EVENT + YAML_MAPPING_START_EVENT + YAML_MAPPING_END_EVENT ctypedef int yaml_read_handler_t(void *data, char *buffer, int size, int *size_read) @@ -65,6 +76,19 @@ cdef extern from "_yaml.h": int index int line int column + ctypedef struct yaml_version_directive_t: + int major + int minor + ctypedef struct yaml_tag_directive_t: + char *handle + char *prefix + + ctypedef struct _yaml_token_stream_start_data_t: + yaml_encoding_t encoding + ctypedef struct _yaml_token_alias_data_t: + char *value + ctypedef struct _yaml_token_anchor_data_t: + char *value ctypedef struct _yaml_token_tag_data_t: char *handle char *suffix @@ -79,8 +103,9 @@ cdef extern from "_yaml.h": char *handle char *prefix ctypedef union _yaml_token_data_t: - yaml_encoding_t encoding - char *anchor + _yaml_token_stream_start_data_t stream_start + _yaml_token_alias_data_t alias + _yaml_token_anchor_data_t anchor _yaml_token_tag_data_t tag _yaml_token_scalar_data_t scalar _yaml_token_version_directive_data_t version_directive @@ -90,6 +115,49 @@ cdef extern from "_yaml.h": _yaml_token_data_t data yaml_mark_t start_mark yaml_mark_t end_mark + + ctypedef struct _yaml_event_stream_start_data_t: + yaml_encoding_t encoding + ctypedef struct _yaml_event_document_start_data_t: + yaml_version_directive_t *version_directive + yaml_tag_directive_t **tag_directives + int implicit + ctypedef struct _yaml_event_document_end_data_t: + int implicit + ctypedef struct _yaml_event_alias_data_t: + char *anchor + ctypedef struct _yaml_event_scalar_data_t: + char *anchor + char *tag + char *value + int length + int plain_implicit + int quoted_implicit + yaml_scalar_style_t style + ctypedef struct _yaml_event_sequence_start_data_t: + char *anchor + char *tag + int implicit + yaml_sequence_style_t style + ctypedef struct _yaml_event_mapping_start_data_t: + char *anchor + char *tag + int implicit + yaml_mapping_style_t style + ctypedef union _yaml_event_data_t: + _yaml_event_stream_start_data_t stream_start + _yaml_event_document_start_data_t document_start + _yaml_event_document_end_data_t document_end + _yaml_event_alias_data_t alias + _yaml_event_scalar_data_t scalar + _yaml_event_sequence_start_data_t sequence_start + _yaml_event_mapping_start_data_t mapping_start + ctypedef struct yaml_event_t: + yaml_event_type_t type + _yaml_event_data_t data + yaml_mark_t start_mark + yaml_mark_t end_mark + ctypedef struct yaml_parser_t: yaml_error_type_t error char *problem @@ -102,6 +170,7 @@ cdef extern from "_yaml.h": char *yaml_get_version_string() void yaml_get_version(int *major, int *minor, int *patch) void yaml_token_delete(yaml_token_t *token) + void yaml_event_delete(yaml_event_t *event) yaml_parser_t *yaml_parser_new() void yaml_parser_delete(yaml_parser_t *parser) void yaml_parser_set_input_string(yaml_parser_t *parser, @@ -112,4 +181,6 @@ cdef extern from "_yaml.h": yaml_encoding_t encoding) yaml_token_t *yaml_parser_get_token(yaml_parser_t *parser) yaml_token_t *yaml_parser_peek_token(yaml_parser_t *parser) + yaml_event_t *yaml_parser_get_event(yaml_parser_t *parser) + yaml_event_t *yaml_parser_peek_event(yaml_parser_t *parser) diff --git a/ext/_yaml.pyx b/ext/_yaml.pyx index bfdafe3..65fecaa 100644 --- a/ext/_yaml.pyx +++ b/ext/_yaml.pyx @@ -29,12 +29,33 @@ def test_scanner(data): yaml_token_delete(token) yaml_parser_delete(parser) -cdef class Scanner: +def test_parser(data): + cdef yaml_parser_t *parser + cdef yaml_event_t *event + cdef int done + if PyString_CheckExact(data) == 0: + raise TypeError("string input required") + parser = yaml_parser_new() + if parser == NULL: + raise MemoryError + yaml_parser_set_input_string(parser, PyString_AS_STRING(data), PyString_GET_SIZE(data)) + done = 0 + while done == 0: + event = yaml_parser_get_event(parser) + if event == NULL: + raise MemoryError + if event.type == YAML_STREAM_END_EVENT: + done = 1 + yaml_event_delete(event) + yaml_parser_delete(parser) + +cdef class ScannerAndParser: cdef yaml_parser_t *parser cdef int eof cdef object stream cdef yaml_token_t *cached_token + cdef yaml_event_t *cached_event cdef object cached_obj def __init__(self, stream): @@ -58,7 +79,7 @@ cdef class Scanner: yaml_parser_delete(self.parser) self.parser = NULL - cdef object _convert(self, yaml_token_t *token): + cdef object _convert_token(self, yaml_token_t *token): if token == NULL: if self.parser.error == YAML_MEMORY_ERROR: raise MemoryError @@ -143,10 +164,10 @@ cdef class Scanner: elif token.type == YAML_VALUE_TOKEN: return yaml.ValueToken(start_mark, end_mark) elif token.type == YAML_ALIAS_TOKEN: - return yaml.AliasToken(token.data.anchor, + return yaml.AliasToken(token.data.alias.value, start_mark, end_mark) elif token.type == YAML_ANCHOR_TOKEN: - return yaml.AnchorToken(token.data.anchor, + return yaml.AnchorToken(token.data.anchor.value, start_mark, end_mark) elif token.type == YAML_TAG_TOKEN: handle = token.data.tag.handle @@ -162,6 +183,136 @@ cdef class Scanner: else: raise RuntimeError("unknown token type") + cdef object _convert_event(self, yaml_event_t *event): + if event == NULL: + if self.parser.error == YAML_MEMORY_ERROR: + raise MemoryError + elif self.parser.error == YAML_READER_ERROR: + raise yaml.reader.ReaderError("<input>", + self.parser.problem_offset, + self.parser.problem_value, + '?', self.parser.problem) + elif self.parser.error == YAML_SCANNER_ERROR: + if self.parser.context != NULL: + raise yaml.scanner.ScannerError( + self.parser.context, + yaml.Mark("<input>", + self.parser.context_mark.index, + self.parser.context_mark.line, + self.parser.context_mark.column, + None, None), + self.parser.problem, + yaml.Mark("<input>", + self.parser.problem_mark.index, + self.parser.problem_mark.line, + self.parser.problem_mark.column, + None, None)) + else: + raise yaml.scanner.ScannerError(None, None, + self.parser.problem, + yaml.Mark("<input>", + self.parser.problem_mark.index, + self.parser.problem_mark.line, + self.parser.problem_mark.column, + None, None)) + elif self.parser.error == YAML_PARSER_ERROR: + if self.parser.context != NULL: + raise yaml.parser.ParserError( + self.parser.context, + yaml.Mark("<input>", + self.parser.context_mark.index, + self.parser.context_mark.line, + self.parser.context_mark.column, + None, None), + self.parser.problem, + yaml.Mark("<input>", + self.parser.problem_mark.index, + self.parser.problem_mark.line, + self.parser.problem_mark.column, + None, None)) + else: + raise yaml.parser.ParserError(None, None, + self.parser.problem, + yaml.Mark("<input>", + self.parser.problem_mark.index, + self.parser.problem_mark.line, + self.parser.problem_mark.column, + None, None)) + else: + raise RuntimeError("neither error nor event produced") + start_mark = yaml.Mark("<input>", + event.start_mark.index, + event.start_mark.line, + event.start_mark.column, + None, None) + end_mark = yaml.Mark("<input>", + event.end_mark.index, + event.end_mark.line, + event.end_mark.column, + None, None) + if event.type == YAML_STREAM_START_EVENT: + return yaml.StreamStartEvent(start_mark, end_mark) + elif event.type == YAML_STREAM_END_EVENT: + return yaml.StreamEndEvent(start_mark, end_mark) + elif event.type == YAML_DOCUMENT_START_EVENT: + return yaml.DocumentStartEvent(start_mark, end_mark, + (event.data.document_start.implicit == 0)) + elif event.type == YAML_DOCUMENT_END_EVENT: + return yaml.DocumentEndEvent(start_mark, end_mark, + (event.data.document_end.implicit == 0)) + elif event.type == YAML_SCALAR_EVENT: + if event.data.scalar.anchor == NULL: + anchor = None + else: + anchor = event.data.scalar.anchor + if event.data.scalar.tag == NULL: + tag = None + else: + tag = event.data.scalar.tag + implicit = (event.data.scalar.plain_implicit == 1, event.data.scalar.quoted_implicit == 1) + flow_style = (event.data.sequence_start.style == YAML_FLOW_SEQUENCE_STYLE) + value = PyString_FromStringAndSize(event.data.scalar.value, event.data.scalar.length) + return yaml.ScalarEvent(anchor, tag, implicit, unicode(value, 'utf-8'), + start_mark, end_mark) + elif event.type == YAML_ALIAS_EVENT: + if event.data.alias.anchor == NULL: + anchor = None + else: + anchor = event.data.alias.anchor + return yaml.AliasEvent(anchor, start_mark, end_mark) + elif event.type == YAML_SEQUENCE_START_EVENT: + if event.data.sequence_start.anchor == NULL: + anchor = None + else: + anchor = event.data.sequence_start.anchor + if event.data.sequence_start.tag == NULL: + tag = None + else: + tag = event.data.sequence_start.tag + implicit = (event.data.sequence_start.implicit == 1) + flow_style = (event.data.sequence_start.style == YAML_FLOW_SEQUENCE_STYLE) + return yaml.SequenceStartEvent(anchor, tag, implicit, + start_mark, end_mark, flow_style) + elif event.type == YAML_MAPPING_START_EVENT: + if event.data.mapping_start.anchor == NULL: + anchor = None + else: + anchor = event.data.mapping_start.anchor + if event.data.mapping_start.tag == NULL: + tag = None + else: + tag = event.data.mapping_start.tag + implicit = (event.data.mapping_start.implicit == 1) + flow_style = (event.data.mapping_start.style == YAML_FLOW_MAPPING_STYLE) + return yaml.MappingStartEvent(anchor, tag, implicit, + start_mark, end_mark, flow_style) + elif event.type == YAML_SEQUENCE_END_EVENT: + return yaml.SequenceEndEvent(start_mark, end_mark) + elif event.type == YAML_MAPPING_END_EVENT: + return yaml.MappingEndEvent(start_mark, end_mark) + else: + raise RuntimeError("unknown event type") + def get_token(self): cdef yaml_token_t *token if self.cached_token != NULL: @@ -173,7 +324,7 @@ cdef class Scanner: if self.eof != 0: return None token = yaml_parser_get_token(self.parser) - obj = self._convert(token) + obj = self._convert_token(token) if token.type == YAML_STREAM_END_TOKEN: self.eof = 1 yaml_token_delete(token) @@ -186,7 +337,7 @@ cdef class Scanner: if self.eof != 0: return None token = yaml_parser_peek_token(self.parser) - obj = self._convert(token) + obj = self._convert_token(token) if token.type == YAML_STREAM_END_TOKEN: self.eof = 1 self.cached_token = token @@ -201,7 +352,7 @@ cdef class Scanner: return False else: token = yaml_parser_peek_token(self.parser) - obj = self._convert(token) + obj = self._convert_token(token) if token.type == YAML_STREAM_END_TOKEN: self.eof = 1 self.cached_token = token @@ -213,15 +364,64 @@ cdef class Scanner: return True return False -class Loader(Scanner, - yaml.parser.Parser, + def get_event(self): + cdef yaml_event_t *event + if self.cached_event != NULL: + yaml_event_delete(yaml_parser_get_event(self.parser)) + obj = self.cached_obj + self.cached_event = NULL + self.cached_obj = None + return obj + if self.eof != 0: + return None + event = yaml_parser_get_event(self.parser) + obj = self._convert_event(event) + if event.type == YAML_STREAM_END_EVENT: + self.eof = 1 + yaml_event_delete(event) + return obj + + def peek_event(self): + cdef yaml_event_t *event + if self.cached_event != NULL: + return self.cached_obj + if self.eof != 0: + return None + event = yaml_parser_peek_event(self.parser) + obj = self._convert_event(event) + if event.type == YAML_STREAM_END_EVENT: + self.eof = 1 + self.cached_event = event + self.cached_obj = obj + return obj + + def check_event(self, *choices): + cdef yaml_event_t *event + if self.cached_event != NULL: + obj = self.cached_obj + elif self.eof != 0: + return False + else: + event = yaml_parser_peek_event(self.parser) + obj = self._convert_event(event) + if event.type == YAML_STREAM_END_EVENT: + self.eof = 1 + self.cached_event = event + self.cached_obj = obj + if not choices: + return True + for choice in choices: + if isinstance(obj, choice): + return True + return False + +class Loader(ScannerAndParser, yaml.composer.Composer, yaml.constructor.Constructor, yaml.resolver.Resolver): def __init__(self, stream): - Scanner.__init__(self, stream) - yaml.parser.Parser.__init__(self) + ScannerAndParser.__init__(self, stream) yaml.composer.Composer.__init__(self) yaml.constructor.Constructor.__init__(self) yaml.resolver.Resolver.__init__(self) diff --git a/tests/test_yaml_ext.py b/tests/test_yaml_ext.py index ffd894f..cd3e204 100644 --- a/tests/test_yaml_ext.py +++ b/tests/test_yaml_ext.py @@ -8,7 +8,7 @@ class TestExtVersion(unittest.TestCase): def testExtVersion(self): self.failUnlessEqual("%s.%s.%s" % _yaml.get_version(), _yaml.get_version_string()) -class TestExtScanner(test_appliance.TestAppliance): +class TestExtLoader(test_appliance.TestAppliance): def _testExtScanner(self, test_name, data_filename, canonical_filename): data = file(data_filename, 'r').read() @@ -34,7 +34,35 @@ class TestExtScanner(test_appliance.TestAppliance): print "EXT_TOKENS:", ext_tokens raise -TestExtScanner.add_tests('testExtScanner', '.data', '.canonical') + def _testExtParser(self, test_name, data_filename, canonical_filename): + data = file(data_filename, 'r').read() + events = list(yaml.parse(data)) + ext_events = [] + try: + for event in yaml.parse(data, Loader=yaml.ExtLoader): + ext_events.append(event) + #print "EVENT:", event + self.failUnlessEqual(len(events), len(ext_events)) + for event, ext_event in zip(events, ext_events): + self.failUnlessEqual(event.__class__, ext_event.__class__) + if hasattr(event, 'anchor'): + self.failUnlessEqual(event.anchor, ext_event.anchor) + if hasattr(event, 'tag'): + self.failUnlessEqual(event.tag, ext_event.tag) + if hasattr(event, 'implicit'): + self.failUnlessEqual(event.implicit, ext_event.implicit) + if hasattr(event, 'value'): + self.failUnlessEqual(event.value, ext_event.value) + except: + print + print "DATA:" + print file(data_filename, 'rb').read() + print "EVENTS:", events + print "EXT_EVENTS:", ext_events + raise + +TestExtLoader.add_tests('testExtScanner', '.data', '.canonical') +TestExtLoader.add_tests('testExtParser', '.data', '.canonical') def main(module='__main__'): unittest.main(module) |