diff options
author | xi <xi@18f92427-320e-0410-9341-c67f048884a3> | 2006-07-21 13:52:03 +0000 |
---|---|---|
committer | xi <xi@18f92427-320e-0410-9341-c67f048884a3> | 2006-07-21 13:52:03 +0000 |
commit | 33cc8fd13df0c3f0495df2bb8b8ab98d0e9e171c (patch) | |
tree | 39d343ea0ae09f7b00116a54b5a8fafa418a2a33 | |
parent | b24bfb98d5ef5f796e22d53733d0ff63ce5fb821 (diff) | |
download | pyyaml-33cc8fd13df0c3f0495df2bb8b8ab98d0e9e171c.tar.gz |
Update libyaml bindings.
git-svn-id: http://svn.pyyaml.org/pyyaml/trunk@209 18f92427-320e-0410-9341-c67f048884a3
-rw-r--r-- | ext/_yaml.pxd | 16 | ||||
-rw-r--r-- | ext/_yaml.pyx | 692 | ||||
-rw-r--r-- | tests/test_yaml_ext.py | 13 |
3 files changed, 404 insertions, 317 deletions
diff --git a/ext/_yaml.pxd b/ext/_yaml.pxd index 79823b4..adf3ad2 100644 --- a/ext/_yaml.pxd +++ b/ext/_yaml.pxd @@ -1,11 +1,15 @@ cdef extern from "_yaml.h": + void memcpy(char *d, char *s, int l) + int strlen(char *s) int PyString_CheckExact(object o) int PyUnicode_CheckExact(object o) char *PyString_AS_STRING(object o) int PyString_GET_SIZE(object o) object PyString_FromStringAndSize(char *v, int l) + object PyUnicode_DecodeUTF8(char *s, int s, char *e) + object PyUnicode_AsUTF8String(object o) cdef enum yaml_encoding_t: YAML_ANY_ENCODING @@ -36,6 +40,7 @@ cdef extern from "_yaml.h": YAML_BLOCK_MAPPING_STYLE YAML_FLOW_MAPPING_STYLE cdef enum yaml_token_type_t: + YAML_NO_TOKEN YAML_STREAM_START_TOKEN YAML_STREAM_END_TOKEN YAML_VERSION_DIRECTIVE_TOKEN @@ -58,6 +63,7 @@ cdef extern from "_yaml.h": YAML_TAG_TOKEN YAML_SCALAR_TOKEN cdef enum yaml_event_type_t: + YAML_NO_EVENT YAML_STREAM_START_EVENT YAML_STREAM_END_EVENT YAML_DOCUMENT_START_EVENT @@ -70,7 +76,7 @@ cdef extern from "_yaml.h": YAML_MAPPING_END_EVENT ctypedef int yaml_read_handler_t(void *data, char *buffer, - int size, int *size_read) + int size, int *size_read) except 0 ctypedef struct yaml_mark_t: int index @@ -171,7 +177,7 @@ cdef extern from "_yaml.h": void yaml_get_version(int *major, int *minor, int *patch) void yaml_token_delete(yaml_token_t *token) void yaml_event_delete(yaml_event_t *event) - yaml_parser_t *yaml_parser_new() + int yaml_parser_initialize(yaml_parser_t *parser) void yaml_parser_delete(yaml_parser_t *parser) void yaml_parser_set_input_string(yaml_parser_t *parser, char *input, int size) @@ -179,8 +185,6 @@ cdef extern from "_yaml.h": yaml_read_handler_t *handler, void *data) void yaml_parser_set_encoding(yaml_parser_t *parser, yaml_encoding_t encoding) - yaml_token_t *yaml_parser_get_token(yaml_parser_t *parser) - yaml_token_t *yaml_parser_peek_token(yaml_parser_t *parser) - yaml_event_t *yaml_parser_get_event(yaml_parser_t *parser) - yaml_event_t *yaml_parser_peek_event(yaml_parser_t *parser) + int yaml_parser_scan(yaml_parser_t *parser, yaml_token_t *token) except * + int yaml_parser_parse(yaml_parser_t *parser, yaml_event_t *event) except * diff --git a/ext/_yaml.pyx b/ext/_yaml.pyx index 65fecaa..b540440 100644 --- a/ext/_yaml.pyx +++ b/ext/_yaml.pyx @@ -9,411 +9,485 @@ def get_version(): yaml_get_version(&major, &minor, &patch) return (major, minor, patch) -def test_scanner(data): - cdef yaml_parser_t *parser - cdef yaml_token_t *token +def test_scanner(stream): + cdef yaml_parser_t parser + cdef yaml_token_t token cdef int done - if PyString_CheckExact(data) == 0: - raise TypeError("string input required") - parser = yaml_parser_new() - if parser == NULL: - raise MemoryError - yaml_parser_set_input_string(parser, PyString_AS_STRING(data), PyString_GET_SIZE(data)) + cdef int count + if hasattr(stream, 'read'): + stream = stream.read() + if PyUnicode_CheckExact(stream) != 0: + stream = stream.encode('utf-8') + if PyString_CheckExact(stream) == 0: + raise TypeError("a string or stream input is required") + if yaml_parser_initialize(&parser) == 0: + raise RuntimeError("cannot initialize parser") + yaml_parser_set_input_string(&parser, PyString_AS_STRING(stream), PyString_GET_SIZE(stream)) done = 0 + count = 0 while done == 0: - token = yaml_parser_get_token(parser) - if token == NULL: - raise MemoryError - if token.type == YAML_STREAM_END_TOKEN: + if yaml_parser_scan(&parser, &token) == 0: + raise RuntimeError("cannot get next token: #%s" % count) + if token.type == YAML_NO_TOKEN: done = 1 - yaml_token_delete(token) - yaml_parser_delete(parser) + else: + count = count+1 + yaml_token_delete(&token) + yaml_parser_delete(&parser) + dummy = len(stream) + return count -def test_parser(data): - cdef yaml_parser_t *parser - cdef yaml_event_t *event +def test_parser(stream): + cdef yaml_parser_t parser + cdef yaml_event_t event cdef int done - if PyString_CheckExact(data) == 0: - raise TypeError("string input required") - parser = yaml_parser_new() - if parser == NULL: - raise MemoryError - yaml_parser_set_input_string(parser, PyString_AS_STRING(data), PyString_GET_SIZE(data)) + cdef int count + if hasattr(stream, 'read'): + stream = stream.read() + if PyUnicode_CheckExact(stream) != 0: + stream = stream.encode('utf-8') + if PyString_CheckExact(stream) == 0: + raise TypeError("a string or stream input is required") + if yaml_parser_initialize(&parser) == 0: + raise RuntimeError("cannot initialize parser") + yaml_parser_set_input_string(&parser, PyString_AS_STRING(stream), PyString_GET_SIZE(stream)) done = 0 + count = 0 while done == 0: - event = yaml_parser_get_event(parser) - if event == NULL: - raise MemoryError - if event.type == YAML_STREAM_END_EVENT: + if yaml_parser_parse(&parser, &event) == 0: + raise RuntimeError("cannot get next event: #%s" % count) + if event.type == YAML_NO_EVENT: done = 1 - yaml_event_delete(event) - yaml_parser_delete(parser) + else: + count = count+1 + yaml_event_delete(&event) + yaml_parser_delete(&parser) + dummy = len(stream) + return count cdef class ScannerAndParser: - cdef yaml_parser_t *parser - cdef int eof + cdef yaml_parser_t parser + cdef object stream - cdef yaml_token_t *cached_token - cdef yaml_event_t *cached_event - cdef object cached_obj + cdef object current_token + cdef object current_event + + cdef object cached_input + cdef object cached_YAML + cdef object cached_TAG + cdef object cached_question + cdef object cached_Mark + cdef object cached_ReaderError + cdef object cached_ScannerError + cdef object cached_ParserError + cdef object cached_StreamStartToken + cdef object cached_StreamEndToken + cdef object cached_DirectiveToken + cdef object cached_DocumentStartToken + cdef object cached_DocumentEndToken + cdef object cached_BlockSequenceStartToken + cdef object cached_BlockMappingStartToken + cdef object cached_BlockEndToken + cdef object cached_FlowSequenceStartToken + cdef object cached_FlowMappingStartToken + cdef object cached_FlowSequenceEndToken + cdef object cached_FlowMappingEndToken + cdef object cached_BlockEntryToken + cdef object cached_FlowEntryToken + cdef object cached_KeyToken + cdef object cached_ValueToken + cdef object cached_AliasToken + cdef object cached_AnchorToken + cdef object cached_TagToken + cdef object cached_ScalarToken + cdef object cached_StreamStartEvent + cdef object cached_StreamEndEvent + cdef object cached_DocumentStartEvent + cdef object cached_DocumentEndEvent + cdef object cached_AliasEvent + cdef object cached_SequenceStartEvent + cdef object cached_SequenceEndEvent + cdef object cached_MappingStartEvent + cdef object cached_MappingEndEvent def __init__(self, stream): - if hasattr(stream, 'read'): - stream = stream.read() - if PyUnicode_CheckExact(stream) != 0: - stream = stream.encode('utf-8') - if PyString_CheckExact(stream) == 0: - raise TypeError("a string or stream input is required") - self.parser = yaml_parser_new() - if self.parser == NULL: + if yaml_parser_initialize(&self.parser) == 0: raise MemoryError - yaml_parser_set_input_string(self.parser, PyString_AS_STRING(stream), PyString_GET_SIZE(stream)) - self.eof = 0 - self.stream = stream - self.cached_token = NULL - self.cached_obj = None + if hasattr(stream, 'read'): + self.stream = stream + yaml_parser_set_input(&self.parser, input_handler, <void *>self) + else: + if PyUnicode_CheckExact(stream) != 0: + stream = PyUnicode_AsUTF8String(stream) + if PyString_CheckExact(stream) == 0: + raise TypeError("a string or stream input is required") + self.stream = stream + yaml_parser_set_input_string(&self.parser, PyString_AS_STRING(stream), PyString_GET_SIZE(stream)) + self.current_token = None + self._cache_names() + + def get_token(self): + if self.current_token is not None: + value = self.current_token + self.current_token = None + else: + value = self._scan() + return value + + def peek_token(self): + if self.current_token is None: + self.current_token = self._scan() + return self.current_token + + def check_token(self, *choices): + if self.current_token is None: + self.current_token = self._scan() + if self.current_token is None: + return False + if not choices: + return True + token_class = self.current_token.__class__ + for choice in choices: + if token_class is choice: + return True + return False + + def get_event(self): + if self.current_event is not None: + value = self.current_event + self.current_event = None + else: + value = self._parse() + return value + + def peek_event(self): + if self.current_event is None: + self.current_event = self._parse() + return self.current_event + + def check_event(self, *choices): + if self.current_event is None: + self.current_event = self._parse() + if self.current_event is None: + return False + if not choices: + return True + event_class = self.current_event.__class__ + for choice in choices: + if event_class is choice: + return True + return False def __dealloc__(self): - if self.parser != NULL: - yaml_parser_delete(self.parser) - self.parser = NULL + yaml_parser_delete(&self.parser) - cdef object _convert_token(self, yaml_token_t *token): - if token == NULL: + cdef object _cache_names(self): + self.cached_input = '<input>' + self.cached_YAML = 'YAML' + self.cached_TAG = 'TAG' + self.cached_question = '?' + self.cached_Mark = yaml.Mark + self.cached_ReaderError = yaml.reader.ReaderError + self.cached_ScannerError = yaml.scanner.ScannerError + self.cached_ParserError = yaml.parser.ParserError + self.cached_StreamStartToken = yaml.StreamStartToken + self.cached_StreamEndToken = yaml.StreamEndToken + self.cached_DirectiveToken = yaml.DirectiveToken + self.cached_DocumentStartToken = yaml.DocumentStartToken + self.cached_DocumentEndToken = yaml.DocumentEndToken + self.cached_BlockSequenceStartToken = yaml.BlockSequenceStartToken + self.cached_BlockMappingStartToken = yaml.BlockMappingStartToken + self.cached_BlockEndToken = yaml.BlockEndToken + self.cached_FlowSequenceStartToken = yaml.FlowSequenceStartToken + self.cached_FlowMappingStartToken = yaml.FlowMappingStartToken + self.cached_FlowSequenceEndToken = yaml.FlowSequenceEndToken + self.cached_FlowMappingEndToken = yaml.FlowMappingEndToken + self.cached_BlockEntryToken = yaml.BlockEntryToken + self.cached_FlowEntryToken = yaml.FlowEntryToken + self.cached_KeyToken = yaml.KeyToken + self.cached_ValueToken = yaml.ValueToken + self.cached_AliasToken = yaml.AliasToken + self.cached_AnchorToken = yaml.AnchorToken + self.cached_TagToken = yaml.TagToken + self.cached_ScalarToken = yaml.ScalarToken + self.cached_StreamStartEvent = yaml.StreamStartEvent + self.cached_StreamEndEvent = yaml.StreamEndEvent + self.cached_DocumentStartEvent = yaml.DocumentStartEvent + self.cached_DocumentEndEvent = yaml.DocumentEndEvent + self.cached_AliasEvent = yaml.AliasEvent + self.cached_ScalarEvent = yaml.ScalarEvent + self.cached_SequenceStartEvent = yaml.SequenceStartEvent + self.cached_SequenceEndEvent = yaml.SequenceEndEvent + self.cached_MappingStartEvent = yaml.MappingStartEvent + self.cached_MappingEndEvent = yaml.MappingEndEvent + + cdef object _scan(self): + cdef yaml_token_t token + if yaml_parser_scan(&self.parser, &token) == 0: if self.parser.error == YAML_MEMORY_ERROR: raise MemoryError elif self.parser.error == YAML_READER_ERROR: - raise yaml.reader.ReaderError("<input>", + raise self.cached_ReaderError(self.cached_input, self.parser.problem_offset, self.parser.problem_value, - '?', self.parser.problem) + self.cached_question, self.parser.problem) elif self.parser.error == YAML_SCANNER_ERROR: + context_mark = None + problem_mark = None + if self.parser.context != NULL: + context_mark = self.cached_Mark(self.cached_input, + self.parser.context_mark.index, + self.parser.context_mark.line, + self.parser.context_mark.column, + None, None) + if self.parser.problem != NULL: + problem_mark = self.cached_Mark(self.cached_input, + self.parser.problem_mark.index, + self.parser.problem_mark.line, + self.parser.problem_mark.column, + None, None) if self.parser.context != NULL: - raise yaml.scanner.ScannerError( - self.parser.context, - yaml.Mark("<input>", - self.parser.context_mark.index, - self.parser.context_mark.line, - self.parser.context_mark.column, - None, None), - self.parser.problem, - yaml.Mark("<input>", - self.parser.problem_mark.index, - self.parser.problem_mark.line, - self.parser.problem_mark.column, - None, None)) + raise self.cached_ScannerError( + self.parser.context, context_mark, + self.parser.problem, problem_mark) else: raise yaml.scanner.ScannerError(None, None, - self.parser.problem, - yaml.Mark("<input>", - self.parser.problem_mark.index, - self.parser.problem_mark.line, - self.parser.problem_mark.column, - None, None)) - else: - raise RuntimeError("neither error nor token produced") - start_mark = yaml.Mark("<input>", + self.parser.problem, problem_mark) + start_mark = yaml.Mark(self.cached_input, token.start_mark.index, token.start_mark.line, token.start_mark.column, None, None) - end_mark = yaml.Mark("<input>", + end_mark = yaml.Mark(self.cached_input, token.end_mark.index, token.end_mark.line, token.end_mark.column, None, None) - if token.type == YAML_STREAM_START_TOKEN: - return yaml.StreamStartToken(start_mark, end_mark) + if token.type == YAML_NO_TOKEN: + return None + elif token.type == YAML_STREAM_START_TOKEN: + return self.cached_StreamStartToken(start_mark, end_mark) elif token.type == YAML_STREAM_END_TOKEN: - return yaml.StreamEndToken(start_mark, end_mark) + return self.cached_StreamEndToken(start_mark, end_mark) elif token.type == YAML_VERSION_DIRECTIVE_TOKEN: - return yaml.DirectiveToken('YAML', + return self.cached_DirectiveToken(self.cached_YAML, (token.data.version_directive.major, token.data.version_directive.minor), start_mark, end_mark) elif token.type == YAML_TAG_DIRECTIVE_TOKEN: - return yaml.DirectiveToken('TAG', + return self.cached_DirectiveToken(self.cached_TAG, (token.data.tag_directive.handle, token.data.tag_directive.prefix), start_mark, end_mark) elif token.type == YAML_DOCUMENT_START_TOKEN: - return yaml.DocumentStartToken(start_mark, end_mark) + return self.cached_DocumentStartToken(start_mark, end_mark) elif token.type == YAML_DOCUMENT_END_TOKEN: - return yaml.DocumentEndToken(start_mark, end_mark) + return self.cached_DocumentEndToken(start_mark, end_mark) elif token.type == YAML_BLOCK_SEQUENCE_START_TOKEN: - return yaml.BlockSequenceStartToken(start_mark, end_mark) + return self.cached_BlockSequenceStartToken(start_mark, end_mark) elif token.type == YAML_BLOCK_MAPPING_START_TOKEN: - return yaml.BlockMappingStartToken(start_mark, end_mark) + return self.cached_BlockMappingStartToken(start_mark, end_mark) elif token.type == YAML_BLOCK_END_TOKEN: - return yaml.BlockEndToken(start_mark, end_mark) + return self.cached_BlockEndToken(start_mark, end_mark) elif token.type == YAML_FLOW_SEQUENCE_START_TOKEN: - return yaml.FlowSequenceStartToken(start_mark, end_mark) + return self.cached_FlowSequenceStartToken(start_mark, end_mark) elif token.type == YAML_FLOW_SEQUENCE_END_TOKEN: - return yaml.FlowSequenceEndToken(start_mark, end_mark) + return self.cached_FlowSequenceEndToken(start_mark, end_mark) elif token.type == YAML_FLOW_MAPPING_START_TOKEN: - return yaml.FlowMappingStartToken(start_mark, end_mark) + return self.cached_FlowMappingStartToken(start_mark, end_mark) elif token.type == YAML_FLOW_MAPPING_END_TOKEN: - return yaml.FlowMappingEndToken(start_mark, end_mark) + return self.cached_FlowMappingEndToken(start_mark, end_mark) elif token.type == YAML_BLOCK_ENTRY_TOKEN: - return yaml.BlockEntryToken(start_mark, end_mark) + return self.cached_BlockEntryToken(start_mark, end_mark) elif token.type == YAML_FLOW_ENTRY_TOKEN: - return yaml.FlowEntryToken(start_mark, end_mark) + return self.cached_FlowEntryToken(start_mark, end_mark) elif token.type == YAML_KEY_TOKEN: - return yaml.KeyToken(start_mark, end_mark) + return self.cached_KeyToken(start_mark, end_mark) elif token.type == YAML_VALUE_TOKEN: - return yaml.ValueToken(start_mark, end_mark) + return self.cached_ValueToken(start_mark, end_mark) elif token.type == YAML_ALIAS_TOKEN: - return yaml.AliasToken(token.data.alias.value, - start_mark, end_mark) + value = PyUnicode_DecodeUTF8(token.data.alias.value, + strlen(token.data.alias.value), 'strict') + return self.cached_AliasToken(value, start_mark, end_mark) elif token.type == YAML_ANCHOR_TOKEN: - return yaml.AnchorToken(token.data.anchor.value, - start_mark, end_mark) + value = PyUnicode_DecodeUTF8(token.data.anchor.value, + strlen(token.data.anchor.value), 'strict') + return self.cached_AnchorToken(value, start_mark, end_mark) elif token.type == YAML_TAG_TOKEN: - handle = token.data.tag.handle - if handle == '': + handle = PyUnicode_DecodeUTF8(token.data.tag.handle, + strlen(token.data.tag.handle), 'strict') + suffix = PyUnicode_DecodeUTF8(token.data.tag.suffix, + strlen(token.data.tag.suffix), 'strict') + if not handle: handle = None - return yaml.TagToken((handle, token.data.tag.suffix), - start_mark, end_mark) + return self.cached_TagToken((handle, suffix), start_mark, end_mark) elif token.type == YAML_SCALAR_TOKEN: - value = PyString_FromStringAndSize(token.data.scalar.value, token.data.scalar.length) - return yaml.ScalarToken(unicode(value, 'utf-8'), - bool(token.data.scalar.style == YAML_PLAIN_SCALAR_STYLE), - start_mark, end_mark) + value = PyUnicode_DecodeUTF8(token.data.scalar.value, + token.data.scalar.length, 'strict') + plain = False + style = None + if token.data.scalar.style == YAML_PLAIN_SCALAR_STYLE: + plain = True + style = '' + elif token.data.scalar.style == YAML_SINGLE_QUOTED_SCALAR_STYLE: + style = '\'' + elif token.data.scalar.style == YAML_DOUBLE_QUOTED_SCALAR_STYLE: + style = '"' + elif token.data.scalar.style == YAML_LITERAL_SCALAR_STYLE: + style = '|' + elif token.data.scalar.style == YAML_FOLDED_SCALAR_STYLE: + style = '>' + return self.cached_ScalarToken(value, plain, + start_mark, end_mark, style) else: raise RuntimeError("unknown token type") - cdef object _convert_event(self, yaml_event_t *event): - if event == NULL: + cdef object _parse(self): + cdef yaml_event_t event + if yaml_parser_parse(&self.parser, &event) == 0: if self.parser.error == YAML_MEMORY_ERROR: raise MemoryError elif self.parser.error == YAML_READER_ERROR: - raise yaml.reader.ReaderError("<input>", + raise self.cached_ReaderError(self.cached_input, self.parser.problem_offset, self.parser.problem_value, - '?', self.parser.problem) - elif self.parser.error == YAML_SCANNER_ERROR: - if self.parser.context != NULL: - raise yaml.scanner.ScannerError( - self.parser.context, - yaml.Mark("<input>", - self.parser.context_mark.index, - self.parser.context_mark.line, - self.parser.context_mark.column, - None, None), - self.parser.problem, - yaml.Mark("<input>", - self.parser.problem_mark.index, - self.parser.problem_mark.line, - self.parser.problem_mark.column, - None, None)) - else: - raise yaml.scanner.ScannerError(None, None, - self.parser.problem, - yaml.Mark("<input>", - self.parser.problem_mark.index, - self.parser.problem_mark.line, - self.parser.problem_mark.column, - None, None)) - elif self.parser.error == YAML_PARSER_ERROR: + self.cached_question, self.parser.problem) + elif self.parser.error == YAML_SCANNER_ERROR \ + or self.parser.error == YAML_PARSER_ERROR: + context_mark = None + problem_mark = None if self.parser.context != NULL: - raise yaml.parser.ParserError( - self.parser.context, - yaml.Mark("<input>", - self.parser.context_mark.index, - self.parser.context_mark.line, - self.parser.context_mark.column, - None, None), - self.parser.problem, - yaml.Mark("<input>", - self.parser.problem_mark.index, - self.parser.problem_mark.line, - self.parser.problem_mark.column, - None, None)) + context_mark = self.cached_Mark(self.cached_input, + self.parser.context_mark.index, + self.parser.context_mark.line, + self.parser.context_mark.column, + None, None) + if self.parser.problem != NULL: + problem_mark = self.cached_Mark(self.cached_input, + self.parser.problem_mark.index, + self.parser.problem_mark.line, + self.parser.problem_mark.column, + None, None) + if self.parser.error == YAML_SCANNER_ERROR: + if self.parser.context != NULL: + raise self.cached_ScannerError( + self.parser.context, context_mark, + self.parser.problem, problem_mark) + else: + raise self.cached_ScannerError(None, None, + self.parser.problem, problem_mark) else: - raise yaml.parser.ParserError(None, None, - self.parser.problem, - yaml.Mark("<input>", - self.parser.problem_mark.index, - self.parser.problem_mark.line, - self.parser.problem_mark.column, - None, None)) - else: - raise RuntimeError("neither error nor event produced") - start_mark = yaml.Mark("<input>", + if self.parser.context != NULL: + raise self.cached_ParserError( + self.parser.context, context_mark, + self.parser.problem, problem_mark) + else: + raise self.cached_ParserError(None, None, + self.parser.problem, problem_mark) + start_mark = yaml.Mark(self.cached_input, event.start_mark.index, event.start_mark.line, event.start_mark.column, None, None) - end_mark = yaml.Mark("<input>", + end_mark = yaml.Mark(self.cached_input, event.end_mark.index, event.end_mark.line, event.end_mark.column, None, None) - if event.type == YAML_STREAM_START_EVENT: - return yaml.StreamStartEvent(start_mark, end_mark) + if event.type == YAML_NO_EVENT: + return None + elif event.type == YAML_STREAM_START_EVENT: + return self.cached_StreamStartEvent(start_mark, end_mark) elif event.type == YAML_STREAM_END_EVENT: - return yaml.StreamEndEvent(start_mark, end_mark) + return self.cached_StreamEndEvent(start_mark, end_mark) elif event.type == YAML_DOCUMENT_START_EVENT: - return yaml.DocumentStartEvent(start_mark, end_mark, - (event.data.document_start.implicit == 0)) + return self.cached_DocumentStartEvent(start_mark, end_mark) elif event.type == YAML_DOCUMENT_END_EVENT: - return yaml.DocumentEndEvent(start_mark, end_mark, - (event.data.document_end.implicit == 0)) - elif event.type == YAML_SCALAR_EVENT: - if event.data.scalar.anchor == NULL: - anchor = None - else: - anchor = event.data.scalar.anchor - if event.data.scalar.tag == NULL: - tag = None - else: - tag = event.data.scalar.tag - implicit = (event.data.scalar.plain_implicit == 1, event.data.scalar.quoted_implicit == 1) - flow_style = (event.data.sequence_start.style == YAML_FLOW_SEQUENCE_STYLE) - value = PyString_FromStringAndSize(event.data.scalar.value, event.data.scalar.length) - return yaml.ScalarEvent(anchor, tag, implicit, unicode(value, 'utf-8'), - start_mark, end_mark) + return self.cached_DocumentEndEvent(start_mark, end_mark) elif event.type == YAML_ALIAS_EVENT: - if event.data.alias.anchor == NULL: - anchor = None - else: - anchor = event.data.alias.anchor - return yaml.AliasEvent(anchor, start_mark, end_mark) + anchor = PyUnicode_DecodeUTF8(event.data.alias.anchor, + strlen(event.data.alias.anchor), 'strict') + return self.cached_AliasEvent(anchor, start_mark, end_mark) + elif event.type == YAML_SCALAR_EVENT: + anchor = None + if event.data.scalar.anchor != NULL: + anchor = PyUnicode_DecodeUTF8(event.data.scalar.anchor, + strlen(event.data.scalar.anchor), 'strict') + tag = None + if event.data.scalar.tag != NULL: + tag = PyUnicode_DecodeUTF8(event.data.scalar.tag, + strlen(event.data.scalar.tag), 'strict') + value = PyUnicode_DecodeUTF8(event.data.scalar.value, + event.data.scalar.length, 'strict') + plain_implicit = (event.data.scalar.plain_implicit == 1) + quoted_implicit = (event.data.scalar.quoted_implicit == 1) + style = None + if event.data.scalar.style == YAML_PLAIN_SCALAR_STYLE: + style = '' + elif event.data.scalar.style == YAML_SINGLE_QUOTED_SCALAR_STYLE: + style = '\'' + elif event.data.scalar.style == YAML_DOUBLE_QUOTED_SCALAR_STYLE: + style = '"' + elif event.data.scalar.style == YAML_LITERAL_SCALAR_STYLE: + style = '|' + elif event.data.scalar.style == YAML_FOLDED_SCALAR_STYLE: + style = '>' + return self.cached_ScalarEvent(anchor, tag, + (plain_implicit, quoted_implicit), + value, start_mark, end_mark, style) elif event.type == YAML_SEQUENCE_START_EVENT: - if event.data.sequence_start.anchor == NULL: - anchor = None - else: - anchor = event.data.sequence_start.anchor - if event.data.sequence_start.tag == NULL: - tag = None - else: - tag = event.data.sequence_start.tag + anchor = None + if event.data.sequence_start.anchor != NULL: + anchor = PyUnicode_DecodeUTF8(event.data.sequence_start.anchor, + strlen(event.data.sequence_start.anchor), 'strict') + tag = None + if event.data.sequence_start.tag != NULL: + tag = PyUnicode_DecodeUTF8(event.data.sequence_start.tag, + strlen(event.data.sequence_start.tag), 'strict') implicit = (event.data.sequence_start.implicit == 1) - flow_style = (event.data.sequence_start.style == YAML_FLOW_SEQUENCE_STYLE) - return yaml.SequenceStartEvent(anchor, tag, implicit, + flow_style = None + if event.data.sequence_start.style == YAML_FLOW_SEQUENCE_STYLE: + flow_style = True + elif event.data.sequence_start.style == YAML_BLOCK_SEQUENCE_STYLE: + flow_style = False + return self.cached_SequenceStartEvent(anchor, tag, implicit, start_mark, end_mark, flow_style) elif event.type == YAML_MAPPING_START_EVENT: - if event.data.mapping_start.anchor == NULL: - anchor = None - else: - anchor = event.data.mapping_start.anchor - if event.data.mapping_start.tag == NULL: - tag = None - else: - tag = event.data.mapping_start.tag + anchor = None + if event.data.mapping_start.anchor != NULL: + anchor = PyUnicode_DecodeUTF8(event.data.mapping_start.anchor, + strlen(event.data.mapping_start.anchor), 'strict') + tag = None + if event.data.mapping_start.tag != NULL: + tag = PyUnicode_DecodeUTF8(event.data.mapping_start.tag, + strlen(event.data.mapping_start.tag), 'strict') implicit = (event.data.mapping_start.implicit == 1) - flow_style = (event.data.mapping_start.style == YAML_FLOW_MAPPING_STYLE) - return yaml.MappingStartEvent(anchor, tag, implicit, + flow_style = None + if event.data.mapping_start.style == YAML_FLOW_SEQUENCE_STYLE: + flow_style = True + elif event.data.mapping_start.style == YAML_BLOCK_SEQUENCE_STYLE: + flow_style = False + return self.cached_MappingStartEvent(anchor, tag, implicit, start_mark, end_mark, flow_style) elif event.type == YAML_SEQUENCE_END_EVENT: - return yaml.SequenceEndEvent(start_mark, end_mark) + return self.cached_SequenceEndEvent(start_mark, end_mark) elif event.type == YAML_MAPPING_END_EVENT: - return yaml.MappingEndEvent(start_mark, end_mark) + return self.cached_MappingEndEvent(start_mark, end_mark) else: raise RuntimeError("unknown event type") - def get_token(self): - cdef yaml_token_t *token - if self.cached_token != NULL: - yaml_token_delete(yaml_parser_get_token(self.parser)) - obj = self.cached_obj - self.cached_token = NULL - self.cached_obj = None - return obj - if self.eof != 0: - return None - token = yaml_parser_get_token(self.parser) - obj = self._convert_token(token) - if token.type == YAML_STREAM_END_TOKEN: - self.eof = 1 - yaml_token_delete(token) - return obj - - def peek_token(self): - cdef yaml_token_t *token - if self.cached_token != NULL: - return self.cached_obj - if self.eof != 0: - return None - token = yaml_parser_peek_token(self.parser) - obj = self._convert_token(token) - if token.type == YAML_STREAM_END_TOKEN: - self.eof = 1 - self.cached_token = token - self.cached_obj = obj - return obj - - def check_token(self, *choices): - cdef yaml_token_t *token - if self.cached_token != NULL: - obj = self.cached_obj - elif self.eof != 0: - return False - else: - token = yaml_parser_peek_token(self.parser) - obj = self._convert_token(token) - if token.type == YAML_STREAM_END_TOKEN: - self.eof = 1 - self.cached_token = token - self.cached_obj = obj - if not choices: - return True - for choice in choices: - if isinstance(obj, choice): - return True - return False - - def get_event(self): - cdef yaml_event_t *event - if self.cached_event != NULL: - yaml_event_delete(yaml_parser_get_event(self.parser)) - obj = self.cached_obj - self.cached_event = NULL - self.cached_obj = None - return obj - if self.eof != 0: - return None - event = yaml_parser_get_event(self.parser) - obj = self._convert_event(event) - if event.type == YAML_STREAM_END_EVENT: - self.eof = 1 - yaml_event_delete(event) - return obj - - def peek_event(self): - cdef yaml_event_t *event - if self.cached_event != NULL: - return self.cached_obj - if self.eof != 0: - return None - event = yaml_parser_peek_event(self.parser) - obj = self._convert_event(event) - if event.type == YAML_STREAM_END_EVENT: - self.eof = 1 - self.cached_event = event - self.cached_obj = obj - return obj - - def check_event(self, *choices): - cdef yaml_event_t *event - if self.cached_event != NULL: - obj = self.cached_obj - elif self.eof != 0: - return False - else: - event = yaml_parser_peek_event(self.parser) - obj = self._convert_event(event) - if event.type == YAML_STREAM_END_EVENT: - self.eof = 1 - self.cached_event = event - self.cached_obj = obj - if not choices: - return True - for choice in choices: - if isinstance(obj, choice): - return True - return False +cdef int input_handler(void *data, char *buffer, int size, int *read) except 0: + cdef ScannerAndParser parser + parser = <ScannerAndParser>data + value = parser.stream.read(size) + if PyString_CheckExact(value) == 0: + raise TypeError("a string value is expected") + if PyString_GET_SIZE(value) > size: + raise ValueError("a string value it too long") + memcpy(buffer, PyString_AS_STRING(value), PyString_GET_SIZE(value)) + read[0] = PyString_GET_SIZE(value) + return 1 class Loader(ScannerAndParser, yaml.composer.Composer, diff --git a/tests/test_yaml_ext.py b/tests/test_yaml_ext.py index cd3e204..c92cc29 100644 --- a/tests/test_yaml_ext.py +++ b/tests/test_yaml_ext.py @@ -10,11 +10,19 @@ class TestExtVersion(unittest.TestCase): class TestExtLoader(test_appliance.TestAppliance): - def _testExtScanner(self, test_name, data_filename, canonical_filename): - data = file(data_filename, 'r').read() + def _testExtScannerFileInput(self, test_name, data_filename, canonical_filename): + self._testExtScanner(test_name, data_filename, canonical_filename, True) + + def _testExtScanner(self, test_name, data_filename, canonical_filename, file_input=False): + if file_input: + data = file(data_filename, 'r') + else: + data = file(data_filename, 'r').read() tokens = list(yaml.scan(data)) ext_tokens = [] try: + if file_input: + data = file(data_filename, 'r') for token in yaml.scan(data, Loader=yaml.ExtLoader): ext_tokens.append(token) self.failUnlessEqual(len(tokens), len(ext_tokens)) @@ -62,6 +70,7 @@ class TestExtLoader(test_appliance.TestAppliance): raise TestExtLoader.add_tests('testExtScanner', '.data', '.canonical') +TestExtLoader.add_tests('testExtScannerFileInput', '.data', '.canonical') TestExtLoader.add_tests('testExtParser', '.data', '.canonical') def main(module='__main__'): |