summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorxi <xi@18f92427-320e-0410-9341-c67f048884a3>2006-02-12 23:19:54 +0000
committerxi <xi@18f92427-320e-0410-9341-c67f048884a3>2006-02-12 23:19:54 +0000
commitc65cc8317f9cd1a432b75c7c6366680af003b5e4 (patch)
treeb768743c175b9f7ac69926db6e8b4e3e20bd53bb
parent2d5e94be47b74f34aac788f97fe28fc1fe12dd04 (diff)
downloadpyyaml-c65cc8317f9cd1a432b75c7c6366680af003b5e4.tar.gz
Working on the scanner.
git-svn-id: http://svn.pyyaml.org/branches/pyyaml3000@39 18f92427-320e-0410-9341-c67f048884a3
-rw-r--r--Makefile22
-rw-r--r--lib/yaml/__init__.py0
-rw-r--r--lib/yaml/marker.py35
-rw-r--r--lib/yaml/scanner.py1007
-rw-r--r--setup.py22
-rw-r--r--tests/data/spec-02-01.tokens1
-rw-r--r--tests/data/spec-02-02.tokens5
-rw-r--r--tests/data/spec-02-03.tokens4
-rw-r--r--tests/data/spec-02-04.tokens4
-rw-r--r--tests/data/spec-02-05.tokens5
-rw-r--r--tests/data/spec-02-06.tokens4
-rw-r--r--tests/data/spec-02-07.tokens12
-rw-r--r--tests/data/spec-02-08.tokens15
-rw-r--r--tests/data/spec-02-09.tokens5
-rw-r--r--tests/data/spec-02-10.tokens5
-rw-r--r--tests/data/test_marker.markers38
-rw-r--r--tests/test_appliance.py28
-rw-r--r--tests/test_build.py12
-rw-r--r--tests/test_marker.py35
-rw-r--r--tests/test_tokens.py62
-rw-r--r--tests/test_yaml.py12
21 files changed, 1333 insertions, 0 deletions
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..f8207b6
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,22 @@
+
+.PHONY: default build force install test clean
+
+PYTHON=/usr/bin/python
+TEST=
+PARAMETERS=
+
+build:
+ ${PYTHON} setup.py build ${PARAMETERS}
+
+force:
+ ${PYTHON} setup.py build -f ${PARAMETERS}
+
+install: build
+ ${PYTHON} setup.py install ${PARAMETERS}
+
+test: build
+ ${PYTHON} tests/test_build.py -v ${TEST}
+
+clean:
+ ${PYTHON} setup.py clean -a
+
diff --git a/lib/yaml/__init__.py b/lib/yaml/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/lib/yaml/__init__.py
diff --git a/lib/yaml/marker.py b/lib/yaml/marker.py
new file mode 100644
index 0000000..6ec12e6
--- /dev/null
+++ b/lib/yaml/marker.py
@@ -0,0 +1,35 @@
+
+class Marker:
+
+ def __init__(self, source, data, index, row, column):
+ self.source = source
+ self.data = data
+ self.index = index
+ self.row = row
+ self.column = column
+
+ def get_snippet(self, max_length=79):
+ if not isinstance(self.data, basestring):
+ return None
+ head = ''
+ start = self.index
+ while start > 0 and self.data[start-1] not in '\r\n':
+ start -= 1
+ if self.index-start > max_length/2-1:
+ head = ' ... '
+ start += 5
+ break
+ tail = ''
+ end = self.index
+ while end < len(self.data) and self.data[end] not in '\r\n':
+ end += 1
+ if end-self.index > max_length/2-1:
+ tail = ' ... '
+ end -= 5
+ break
+ snippet = self.data[start:end]
+ if isinstance(snippet, unicode):
+ snippet = snippet.encode('utf-8')
+ return head + snippet + tail + '\n' \
+ + ' '*(self.index-start+len(head)) + '^' + '\n'
+
diff --git a/lib/yaml/scanner.py b/lib/yaml/scanner.py
new file mode 100644
index 0000000..d871369
--- /dev/null
+++ b/lib/yaml/scanner.py
@@ -0,0 +1,1007 @@
+
+from marker import Marker
+from error import ParserError
+from stream import Stream
+
+class Scanner:
+
+ def __init__(self, source, data):
+ """Initialize the scanner."""
+ # The input stream. The Stream class do the dirty work of checking for
+ # BOM and converting the input data to Unicode. It also adds LF to the
+ # end if the data does not ends with an EOL character.
+ #
+ # Stream supports the following methods
+ # self.stream.peek(k=1) # peek the next k characters
+ # self.stream.read(k=1) # read the next k characters and move the
+ # # pointer
+ self.stream = Stream(source, data)
+
+ # Had we reached the end of the stream?
+ self.done = False
+
+ # The number of unclosed '{' and '['. `flow_level == 0` means block
+ # context.
+ self.flow_level = 0
+
+ # List of processed tokens that are not yet emitted.
+ self.tokens = []
+
+ # Number of tokens that were emitted through the `get_token` method.
+ self.tokens_taken = 0
+
+ # The current indentation level.
+ self.indent = -1
+
+ # Past indentation levels.
+ self.indents = []
+
+ # Variables related to simple key treatment.
+
+ # A simple key is a key that is not denoted by the '?' indicator.
+ # Example of simple keys:
+ # ---
+ # block simple key: value
+ # ? not a simple key:
+ # : { flow simple key: value }
+ # We emit the KEY token before all keys, so when we find a potential
+ # simple key, we try to locate the corresponding ':' indicator.
+ # Simple keys should be limited to a single line and 1024 characters.
+
+ # Can a block collection start at the current position? A block
+ # collection may start:
+ # - at the beginning of the line (not counting spaces),
+ # - after the block sequence indicator '-'.
+ self.allow_block_collection = True
+
+ # Can a simple key in flow context start at the current position? A
+ # simple key may start after the '{', '[', and ',' indicators.
+ self.allow_flow_simple_keys = False
+
+ # Keep track of possible simple keys. This is a dictionary. The key
+ # is `flow_level`; there can be no more that one possible simple key
+ # for each level. The value is a record of
+ # (stream.index, stream.line, stream.column, token_number)
+ self.possible_simple_keys = {}
+
+ # Public methods:
+
+ def peek_token(self):
+ """Get the current token."""
+ while self.need_more_tokens()
+ self.fetch_more_tokens()
+ if self.tokens:
+ return self.tokens[0]
+
+ def get_token(self):
+ "Get the current token and remove it from the list."""
+ while self.need_more_tokens():
+ self.fetch_more_tokens()
+ if self.tokens:
+ self.tokens_taken += 1
+ return self.tokens.pop(0)
+
+ # Private methods:
+
+ def need_more_tokens(self):
+ if self.done:
+ return False
+ if not self.tokens:
+ return True
+ # The current token may be a potential simple key, so we
+ # need to look further.
+ if self.next_possible_simple_key() == self.tokens_taken:
+ return True
+
+ def fetch_more_tokens(self):
+
+ # Eat whitespaces and comments until we reach the next token.
+ self.find_next_token()
+
+ # Compare the current indentation and column. It may add some tokens
+ # and decrease the current indentation.
+ self.unwind_indent(self.stream.column)
+
+ # Peek the next character.
+ ch = self.stream.peek()
+
+ # Is it the end of stream?
+ if ch is None:
+ return self.fetch_end()
+
+ # Is it a directive?
+ if ch == u'%' and self.check_directive():
+ return self.fetch_directive()
+
+ # Is it the document start?
+ if ch == u'-' and self.check_document_start():
+ return self.fetch_document_start()
+
+ # Is it the document end?
+ if ch == u'.' and self.check_document_end():
+ return self.fetch_document_end()
+
+ # Note: the order of the following checks is NOT significant.
+
+ # Is it the sequence indicator?
+ if ch in u'-,' and self.check_entry():
+ return self.fetch_entry()
+
+ # Is it the flow sequence start indicator?
+ if ch == u'[':
+ return self.fetch_flow_sequence_start()
+
+ # Is it the flow mapping start indicator?
+ if ch == u'{':
+ return self.fetch_flow_mapping_start()
+
+ # Is it the flow sequence end indicator?
+ if ch == u']':
+ return self.fetch_flow_sequence_end()
+
+ # Is it the flow mapping end indicator?
+ if ch == u'}':
+ return self.fetch_flow_mapping_end()
+
+ # Is it the key indicator?
+ if ch == u'?' and self.check_key():
+ return self.fetch_key()
+
+ # Is it the value indicator?
+ if ch == u':' and self.check_value():
+ return self.fetch_value()
+
+ # Is it an alias?
+ if ch == u'*':
+ return self.fetch_alias()
+
+ # Is it an anchor?
+ if ch == u'&':
+ return self.fetch_anchor()
+
+ # Is is a tag?
+ if ch == u'!':
+ return self.fetch_tag()
+
+ # Is is a literal scalar?
+ if ch == u'|':
+ return self.fetch_literal()
+
+ # Is it a folded scalar?
+ if ch == u'>':
+ return self.fetch_folded()
+
+ # Is it a single quoted scalar?
+ if ch == u'\'':
+ return self.fetch_single()
+
+ # Is it a double quoted scalar?
+ if ch == u'\"':
+ return self.fetch_double()
+
+ # It must be a plain scalar.
+ if self.check_plain():
+ return self.fetch_plain()
+
+ # No? It's an error then. Let's produce a nice error message.
+ self.invalid_token()
+
+ def fetch_end(self):
+
+ # Set the current intendation to -1.
+ self.unwind_indents(-1)
+
+ # Reset everything (not really needed).
+ self.allow_block_collection = False
+ self.allow_flow_simple_keys = False
+ self.possible_simple_keys = {}
+
+ # Add END.
+ marker = self.stream.get_marker()
+ self.tokens.append(EndToken(marker))
+
+ # The stream is ended.
+ self.done = True
+
+ def check_directive(self):
+
+ # Checking for
+ # /* The beginning of the line */ '%'
+ # The '%' indicator is already checked.
+ if self.stream.column == 0:
+ return True
+
+ def check_document_start(self):
+
+ # Checking for
+ # /* The beginning of the line */ '---' /* Space or EOL */
+ if self.stream.column == 0:
+ prefix = self.stream.peek(4)
+ if prefix[:3] == u'---' and prefix[3] in u' \t\r\n\x85\u2028\u2029':
+ return True
+
+ def fetch_document_start(self):
+
+ # Set the current intendation to -1.
+ self.unwind_indents(-1)
+
+ # No block collections after '---'.
+ self.allow_block_collection = False
+
+ # No flow simple keys (not needed -- we are in the block context).
+ self.allow_flow_simple_keys = False
+
+ # Reset possible simple keys (not needed -- EOL should have reset it).
+ self.possible_simple_keys = {}
+
+ start_marker = self.stream.get_marker()
+
+ # The characters are already checked, just move forward.
+ self.stream.read(3)
+
+ end_marker = self.stream.get_marker()
+
+ # Add DOCUMENT-START.
+ self.tokens.append(DocumentStartToken(start_marker, end_marker))
+
+
+ def check_document_end(self):
+ if self.stream.column == 0:
+ prefix = self.stream.peek(4)
+ if prefix[:3] == u'...' and prefix[3] in u' \t\r\n\x85\u2028\u2029':
+ return True
+
+ def fetch_document_end(self):
+ # The same code as `fetch_document_start`.
+
+ # Set the current intendation to -1.
+ self.unwind_indents(-1)
+
+ # Reset everything (not really needed).
+ self.allow_block_collection = False
+ self.allow_flow_simple_keys = False
+ self.possible_simple_keys = {}
+
+ start_marker = self.stream.get_marker()
+
+ # The characters are already checked, just move forward.
+ self.stream.read(3)
+
+ end_marker = self.stream.get_marker()
+
+ # Add DOCUMENT-END.
+ self.tokens.append(DocumentEndToken(start_marker, end_marker))
+
+
+
+# Tokens:
+# YAML_DIRECTIVE: ^ '%' YAML ' '+ (version: \d+ '.' \d+) s-l-comments
+# TAG_DIRECTIVE: ^ % TAG ' '+ (handle: '!' (word-char* '!')? ) (prefix: uri-char+) s-l-comments
+# RESERVED_DIRECTIVE: ^ '%' (directive-name: ns-char+) (' '+ (directive-parameter: ns-char+))* s-l-comments
+# DOCUMENT_START: ^ '---' (' ' | b-any)
+# DOCUMENT_END: ^ ... (' ' | b-any)
+# TAG: '!' ( ('<' uri-char+ '>') | uri-char* ) (' ' | b-any)
+# ANCHOR: '&' ns-char+ <-- bug
+# ALIAS: * ns-char+ <-- bug
+# ENTRY(block): '-' (' ' | b-any)
+# KEY(block): '?' (' ' | b-any)
+# VALUE(block): ':' (' ' | b-any)
+# FLOW_SEQ_START: '['
+# FLOW_SEQ_END: ']'
+# FLOW_MAP_START: '{'
+# FLOW_MAP_END: '}'
+# KEY(flow): '?'
+# VALUE(flow): ':'
+# ENTRY(flow): ','
+# PLAIN: (plain-char - indicator) | ([-?:] plain-char) ... <-- bugs
+# DOUBLE_QUOTED: '"' ...
+# SINGLE_QUOTED: ''' ...
+# LITERAL: '|' ...
+# FOLDED: '>' ...
+# BLOCK_SEQ_START: indentation before '-'.
+# BLOCK_MAP_START: indentation before '?' or a simple key.
+# BLOCK_END: no indentation
+# LINE: end of line
+
+# b-generic: \r \n | \r | \n | #x85
+# b-specific: #x2028 | #x2029
+# b-any: b-generic | b-specific
+# hex-digit: [0-9A-Fa-f]
+# word-char: [0-9A-Za-z-]
+# uri-char: word-char | % hex-digit hex-digit | [;/?:@&=+$,_.!~*'()[]]
+
+# Production rules:
+# stream :== implicit_document? explicit_document* END
+# explicit_document :== DIRECTIVE* DOCUMENT_START block_node? DOCUMENT_END?
+# implicit_document :== block_node DOCUMENT_END?
+# block_node :== ALIAS | properties? block_content
+# flow_node :== ALIAS | properties? flow_content
+# properties :== TAG ANCHOR? | ANCHOR TAG?
+# block_content :== block_collection | flow_collection | SCALAR
+# flow_content :== flow_collection | SCALAR
+# block_collection :== block_sequence | block_mapping
+# block_sequence :== BLOCK_SEQ_START (ENTRY block_node?)* BLOCK_END
+# block_mapping :== BLOCK_MAP_START ((KEY block_node_or_indentless_sequence?)? (VALUE block_node_or_indentless_sequence?)?)* BLOCK_END
+# block_node_or_indentless_sequence :== ALIAS | properties? (block_content | indentless_block_sequence)
+# indentless_block_sequence :== (ENTRY block_node?)+
+# flow_collection :== flow_sequence | flow_mapping
+# flow_sequence :== FLOW_SEQ_START (flow_sequence_entry ENTRY)* flow_sequence_entry? FLOW_SEQ_END
+# flow_sequence_entry :== flow_node | KEY flow_node (VALUE flow_node?)?
+# flow_mapping :== FLOW_MAP_START flow_mapping_entry ENTRY)* flow_mapping_entry? FLOW_MAP_END
+# flow_mapping_entry :== flow_node | KEY flow_node (VALUE flow_node?)?
+
+# FIRST(rule) sets:
+# stream: {}
+# explicit_document: { DIRECTIVE DOCUMENT_START }
+# implicit_document: block_node
+# block_node: { ALIAS TAG ANCHOR SCALAR BLOCK_SEQ_START BLOCK_MAP_START FLOW_SEQ_START FLOW_MAP_START }
+# flow_node: { ALIAS TAG ANCHOR SCALAR FLOW_SEQ_START FLOW_MAP_START }
+# block_content: { BLOCK_SEQ_START BLOCK_MAP_START FLOW_SEQ_START FLOW_MAP_START SCALAR }
+# flow_content: { FLOW_SEQ_START FLOW_MAP_START SCALAR }
+# block_collection: { BLOCK_SEQ_START BLOCK_MAP_START }
+# flow_collection: { FLOW_SEQ_START FLOW_MAP_START }
+# block_sequence: { BLOCK_SEQ_START }
+# block_mapping: { BLOCK_MAP_START }
+# block_node_or_indentless_sequence: { ALIAS TAG ANCHOR SCALAR BLOCK_SEQ_START BLOCK_MAP_START FLOW_SEQ_START FLOW_MAP_START ENTRY }
+# indentless_sequence: { ENTRY }
+# flow_collection: { FLOW_SEQ_START FLOW_MAP_START }
+# flow_sequence: { FLOW_SEQ_START }
+# flow_mapping: { FLOW_MAP_START }
+# flow_sequence_entry: { ALIAS TAG ANCHOR SCALAR FLOW_SEQ_START FLOW_MAP_START KEY }
+# flow_mapping_entry: { ALIAS TAG ANCHOR SCALAR FLOW_SEQ_START FLOW_MAP_START KEY }
+
+class Marker(object):
+
+ def __init__(self, source, data, index, length=0):
+ self.source = source
+ self.data = data
+ self.index = index
+ self.length = length
+ self._line = None
+ self._position = None
+
+ def line(self):
+ if not self._line:
+ self._make_line_position()
+ return self._line
+
+ def position(self):
+ if not self._position:
+ self._make_line_position()
+ return self._position
+
+ def _make_line_position(self):
+ allow_block_collection = self.data.rfind('\n', 0, self.index)+1
+ line_end = self.data.find('\n', self.index)+1
+ if line_end == 0:
+ line_end = len(self.data)
+ self._line = (allow_block_collection, line_end)
+ row = self.data.count('\n', 0, allow_block_collection)
+ col = self.index-allow_block_collection
+ self._position = (row, col)
+
+class Error(Exception):
+
+ def __init__(self, message=None, marker=None):
+ Exception.__init__(self)
+ self.message = message
+ self.marker = marker
+
+ def __str__(self):
+ if self.marker is not None:
+ row, col = self.marker.position()
+ start, end = self.marker.line()
+ error_position = "source \"%s\", line %s, column %s:\n%s\n" \
+ % (self.marker.source, row+1, col+1, self.marker.data[start:end].rstrip().encode('utf-8'))
+ error_pointer = " " * col + "^\n"
+ else:
+ error_position = ""
+ error_pointer = ""
+ if self.message is not None:
+ error_message = self.message
+ else:
+ error_message = "YAML error"
+ return error_position+error_pointer+error_message
+
+class _Scanner:
+
+ def scan(self, source, data):
+ self.source = source
+ self.data = data
+ self.flow_level = 0
+ self.indents = []
+ self.indent = -1
+ self.index = 0
+ self.line = 0
+ self.column = 0
+ self.allow_block_collection = True
+ self.guess_simple_key = False
+ self.guess_simple_key_token = None
+ self.guess_simple_key_indent = None
+ self.allow_flow_key = False
+ self.guess_flow_key_levels = []
+ self.guess_flow_key_tokens = []
+ self.tokens = []
+ while self.eat_ignored() or self.fetch_token():
+ pass
+ return self.tokens
+
+ def eat_ignored(self):
+ result = False
+ while self.eat_ignored_spaces() or self.eat_ignored_comment() or self.eat_ignored_newline():
+ result = True
+ return result
+
+ def eat_ignored_spaces(self):
+ result = False
+ while self.index < len(self.data) and self.data[self.index] == ' ':
+ self.index += 1
+ self.column += 1
+ result = True
+ return result
+
+ def eat_ignored_comment(self):
+ if self.index < len(self.data) and self.data[self.index] == '#':
+ self.eat_line()
+ return False
+
+ def eat_line(self):
+ result = False
+ while self.index < len(self.data) and self.data[self.index] not in '\r\n':
+ self.index += 1
+ self.column += 1
+ result = True
+ return result
+
+ def eat_ignored_newline(self):
+ if self.index < len(self.data) and self.data[self.index] in '\r\n':
+ if self.data[self.index:self.index+2] == '\r\n':
+ self.index += 2
+ else:
+ self.index += 1
+ self.line += 1
+ self.column = 0
+ self.allow_block_collection = True
+ return True
+ return False
+
+ def eat_ns(self):
+ result = False
+ while self.index < len(self.data) and self.data[self.index] not in ' \t\r\n':
+ self.index += 1
+ self.column += 1
+ result = True
+ return result
+
+ def eat_indent(self, indent=0):
+ if indent < self.indent:
+ indent = self.indent
+ if self.column != 0:
+ return False
+ count = 0
+ while self.index < len(self.data) and self.data[self.index] == ' ' and count < indent:
+ self.index += 1
+ self.column += 1
+ count += 1
+ return count == indent
+
+ def eat_double_quoted(self):
+ if self.index < len(self.data) and self.data[self.index] == '"':
+ self.index += 1
+ self.column += 1
+ while self.index < len(self.data) and self.data[self.index] != '"':
+ if self.data[self.index:self.index+2] in ['\\\\', '\\"']:
+ self.index += 2
+ self.column += 2
+ elif self.data[self.index] in '\r\n':
+ self.eat_ignored_newline()
+ if not self.eat_indent(1):
+ self.error("Invalid indentation")
+ else:
+ self.index += 1
+ self.column += 1
+ if self.index < len(self.data) and self.data[self.index] == '"':
+ self.index += 1
+ self.column += 1
+ return True
+ else:
+ self.error("unclosed double quoted scalar")
+ else:
+ return False
+
+ def eat_single_quoted(self):
+ if self.index < len(self.data) and self.data[self.index] == '\'':
+ self.index += 1
+ self.column += 1
+ while self.index < len(self.data) and \
+ (self.data[self.index] != '\'' or self.data[self.index:self.index+2] == '\'\''):
+ if self.data[self.index:self.index+2] == '\'\'':
+ self.index += 2
+ self.column += 2
+ elif self.data[self.index] in '\r\n':
+ self.eat_ignored_newline()
+ if not self.eat_indent(1):
+ self.error("Invalid indentation")
+ else:
+ self.index += 1
+ self.column += 1
+ if self.index < len(self.data) and self.data[self.index] == '\'':
+ self.index += 1
+ self.column += 1
+ return True
+ else:
+ self.error("unclosed single quoted scalar")
+ else:
+ return False
+
+ def eat_folded(self):
+ self.eat_block_scalar()
+
+ def eat_literal(self):
+ self.eat_block_scalar()
+
+ def eat_block_scalar(self):
+ if self.index < len(self.data) and self.data[self.index] in '>|':
+ self.eat_line()
+ if not self.eat_ignored_newline():
+ return True
+ indent = self.indent+1
+ if indent < 1:
+ indent = 1
+ while (self.eat_indent(indent) and ((self.eat_line() and self.eat_ignored_newline()) or (self.eat_ignored_newline()))) or \
+ (self.eat_ignored_comment() and self.eat_ignored_newline()) or \
+ self.eat_ignored_newline():
+ pass
+ return True
+ return False
+
+ def eat_block_plain(self):
+ return self.eat_plain(block=True)
+
+ def eat_flow_plain(self):
+ return self.eat_plain(block=False)
+
+ def eat_plain(self, block):
+ indent = self.indent+1
+ if indent < 1:
+ indent = 1
+ if self.index < len(self.data):
+ if self.data[self.index] not in ' \t\r\n-?:,[]{}#&*!|>\'"%@`' or \
+ (block and self.data[self.index] == '-' and self.data[self.index:self.index+2] not in ['-', '- ', '-\r', '-\n']) or \
+ (block and self.data[self.index] == '?' and self.data[self.index:self.index+2] not in ['?', '? ', '?\r', '?\n']) or \
+ (block and self.data[self.index] == ':' and self.data[self.index:self.index+2] not in [':', ': ', ':\r', ':\n']):
+ if block and self.allow_block_collection:
+ self.guessing_simple_key()
+ if self.flow_level and self.allow_flow_key:
+ self.guess_flow_key_levels.append(self.flow_level)
+ self.guess_flow_key_tokens.append(len(self.tokens))
+ self.allow_flow_key = False
+ self.index += 1
+ self.column += 1
+ space = False
+ while True:
+ self.eat_ignored_spaces()
+ while self.index < len(self.data) and (
+ self.data[self.index] not in '\r\n?:,[]{}#' or
+ (not space and self.data[self.index] == '#') or
+ (block and self.data[self.index] in '?,[]{}') or
+ (block and self.data[self.index] == ':' and self.data[self.index:self.index+2] not in [':', ': ', ':\r', ':\n'])):
+ space = self.data[self.index] not in ' \t'
+ self.index += 1
+ self.column += 1
+ self.allow_block_collection = False
+ if not (self.eat_ignored_newline() and self.eat_indent(indent)):
+ break
+ space = True
+ return True
+ return False
+
+ def no_simple_key(self):
+ self.guess_simple_key = False
+ self.guess_simple_key_token = None
+ self.guess_simple_key_indent = None
+
+ def guessing_simple_key(self):
+ self.guess_simple_key = True
+ self.guess_simple_key_token = len(self.tokens)
+ self.guess_simple_key_indent = self.column
+
+ def unwind_indents(self, level):
+ while self.indent > level:
+ if self.flow_level:
+ self.error("Invalid indentation")
+ self.tokens.append('BLOCK_END')
+ self.indent = self.indents.pop()
+ self.no_simple_key()
+
+ def fetch_token(self):
+ self.unwind_indents(self.column)
+ if self.index < len(self.data):
+ if self.column == 0:
+ if self.data[self.index] == '%':
+ self.tokens.append('DIRECTIVE')
+ self.eat_line()
+ self.no_simple_key()
+ return True
+ if self.data[self.index:self.index+3] == '---' and \
+ (not self.data[self.index+3:self.index+4] or self.data[self.index+3:self.index+4] in ' \r\n'):
+ self.unwind_indents(-1)
+ self.tokens.append('DOCUMENT_START')
+ self.index += 3
+ self.column += 3
+ self.allow_block_collection = False
+ self.allow_flow_key = False
+ self.guess_flow_keys = []
+ self.no_simple_key()
+ return True
+ if self.data[self.index:self.index+3] == '...' and \
+ (not self.data[self.index+3:self.index+4] or self.data[self.index+3:self.index+4] in ' \r\n'):
+ self.unwind_indents(-1)
+ self.tokens.append('DOCUMENT_END')
+ self.index += 3
+ self.column += 3
+ self.allow_block_collection = False
+ self.allow_flow_key = False
+ self.guess_flow_keys = []
+ self.no_simple_key()
+ return True
+ if self.data[self.index] in '[]{}':
+ if self.data[self.index] == '[':
+ self.flow_level += 1
+ self.allow_flow_key = True
+ self.tokens.append('FLOW_SEQ_START')
+ elif self.data[self.index] == '{':
+ self.flow_level += 1
+ self.allow_flow_key = True
+ self.tokens.append('FLOW_MAP_START')
+ elif self.data[self.index] == ']':
+ if not self.flow_level:
+ self.error("Extra ]")
+ self.flow_level -= 1
+ self.allow_flow_key = False
+ self.tokens.append('FLOW_SEQ_END')
+ else:
+ if not self.flow_level:
+ self.error("Extra }")
+ self.flow_level -= 1
+ self.allow_flow_key = False
+ self.tokens.append('FLOW_MAP_END')
+ while self.guess_flow_key_levels and self.guess_flow_key_levels[-1] > self.flow_level:
+ self.guess_flow_key_levels.pop()
+ self.guess_flow_key_tokens.pop()
+ self.index += 1
+ self.column += 1
+ self.allow_block_collection = False
+ return True
+ if self.data[self.index] in '!&*':
+ if self.flow_level and self.allow_flow_key:
+ self.guess_flow_key_levels.append(self.flow_level)
+ self.guess_flow_key_tokens.append(len(self.tokens))
+ if not self.flow_level and self.allow_block_collection:
+ self.guessing_simple_key()
+ if self.data[self.index] == '!':
+ self.tokens.append('TAG')
+ elif self.data[self.index] == '&':
+ self.tokens.append('ANCHOR')
+ else:
+ self.tokens.append('ALIAS')
+ self.eat_ns()
+ self.allow_flow_key = False
+ self.allow_block_collection = False
+ return True
+ if self.data[self.index] == '"':
+ if self.flow_level and self.allow_flow_key:
+ self.guess_flow_key_levels.append(self.flow_level)
+ self.guess_flow_key_tokens.append(len(self.tokens))
+ if not self.flow_level and self.allow_block_collection:
+ self.guessing_simple_key()
+ self.tokens.append('SCALAR')
+ self.eat_double_quoted()
+ self.allow_flow_key = False
+ self.allow_block_collection = False
+ return True
+ if self.data[self.index] == '\'':
+ if self.flow_level and self.allow_flow_key:
+ self.guess_flow_key_levels.append(self.flow_level)
+ self.guess_flow_key_tokens.append(len(self.tokens))
+ if not self.flow_level and self.allow_block_collection:
+ self.guessing_simple_key()
+ self.tokens.append('SCALAR')
+ self.eat_single_quoted()
+ self.allow_flow_key = False
+ self.allow_block_collection = False
+ return True
+ if not self.flow_level:
+ if self.data[self.index] in '-?:' and \
+ (not self.data[self.index+1:self.index+2] or self.data[self.index+1:self.index+2] in ' \r\n'):
+ if self.guess_simple_key and self.data[self.index] == ':':
+ self.tokens.insert(self.guess_simple_key_token, 'KEY')
+ if self.guess_simple_key_indent > self.indent:
+ self.indents.append(self.indent)
+ self.indent = self.guess_simple_key_indent
+ self.tokens.insert(self.guess_simple_key_token, 'BLOCK_MAP_START')
+ self.tokens.append('VALUE')
+ self.no_simple_key()
+ self.index += 1
+ self.column += 1
+ self.allow_block_collection = False
+ return True
+ else:
+ if not self.allow_block_collection:
+ self.error("Block collection should start at the beginning of the line")
+ if self.column > self.indent:
+ self.indents.append(self.indent)
+ self.indent = self.column
+ if self.data[self.index] == '-':
+ self.tokens.append('BLOCK_SEQ_START')
+ else:
+ self.tokens.append('BLOCK_MAP_START')
+ if self.data[self.index] == '-':
+ self.tokens.append('ENTRY')
+ elif self.data[self.index] == '?':
+ self.tokens.append('KEY')
+ else:
+ self.tokens.append('VALUE')
+ self.index += 1
+ self.column += 1
+ #self.allow_block_collection = False
+ self.allow_block_collection = True
+ self.no_simple_key()
+ return True
+ if self.data[self.index] == '>':
+ self.no_simple_key()
+ self.tokens.append('SCALAR')
+ self.eat_folded()
+ self.allow_block_collection = True
+ return True
+ if self.data[self.index] == '|':
+ self.no_simple_key()
+ self.tokens.append('SCALAR')
+ self.eat_literal()
+ self.allow_block_collection = True
+ return True
+ if self.eat_block_plain():
+ self.tokens.append('SCALAR')
+ return True
+ else:
+ if self.data[self.index] in ',?:':
+ if self.data[self.index] == ',':
+ self.tokens.append('ENTRY')
+ while self.guess_flow_key_levels and self.guess_flow_key_levels[-1] >= self.flow_level:
+ self.guess_flow_key_levels.pop()
+ self.guess_flow_key_tokens.pop()
+ self.allow_flow_key = True
+ elif self.data[self.index] == '?':
+ self.tokens.append('KEY')
+ while self.guess_flow_key_levels and self.guess_flow_key_levels[-1] >= self.flow_level:
+ self.guess_flow_key_levels.pop()
+ self.guess_flow_key_tokens.pop()
+ self.allow_flow_key = False
+ else:
+ self.tokens.append('VALUE')
+ if self.guess_flow_key_levels and self.guess_flow_key_levels[-1] == self.flow_level:
+ self.guess_flow_key_levels.pop()
+ index = self.guess_flow_key_tokens.pop()
+ self.tokens.insert(index, 'KEY')
+ self.allow_flow_key =False
+ self.index += 1
+ self.column += 1
+ return True
+ if self.eat_flow_plain():
+ self.tokens.append('SCALAR')
+ return True
+ self.error("Invalid token")
+ else:
+ self.unwind_indents(-1)
+
+ def error(self, message):
+ raise Error(message, Marker(self.source, self.data, self.index))
+
+class Parser:
+
+ def parse(self, source, data):
+ scanner = Scanner()
+ self.tokens = scanner.scan(source, data)
+ self.tokens.append('END')
+ documents = self.parse_stream()
+ if len(documents) == 1:
+ return documents[0]
+ return documents
+
+ def parse_stream(self):
+ documents = []
+ if self.tokens[0] not in ['DIRECTIVE', 'DOCUMENT_START', 'END']:
+ documents.append(self.parse_block_node())
+ while self.tokens[0] != 'END':
+ while self.tokens[0] == 'DIRECTIVE':
+ self.tokens.pop(0)
+ if self.tokens[0] != 'DOCUMENT_START':
+ self.error('DOCUMENT_START is expected')
+ self.tokens.pop(0)
+ if self.tokens[0] in ['DIRECTIVE', 'DOCUMENT_START', 'DOCUMENT_END', 'END']:
+ documents.append(None)
+ else:
+ documents.append(self.parse_block_node())
+ while self.tokens[0] == 'DOCUMENT_END':
+ self.tokens.pop(0)
+ if self.tokens[0] != 'END':
+ self.error("END is expected")
+ return tuple(documents)
+
+ def parse_block_node(self):
+ if self.tokens[0] == 'ALIAS':
+ self.tokens.pop(0)
+ return '*'
+ if self.tokens[0] == 'TAG':
+ self.tokens.pop(0)
+ if self.tokens[0] == 'ANCHOR':
+ self.tokens.pop(0)
+ elif self.tokens[0] == 'ANCHOR':
+ self.tokens.pop(0)
+ if self.tokens[0] == 'TAG':
+ self.tokens.pop(0)
+ return self.parse_block_content()
+
+ def parse_flow_node(self):
+ if self.tokens[0] == 'ALIAS':
+ self.tokens.pop(0)
+ return '*'
+ if self.tokens[0] == 'TAG':
+ self.tokens.pop(0)
+ if self.tokens[0] == 'ANCHOR':
+ self.tokens.pop(0)
+ elif self.tokens[0] == 'ANCHOR':
+ self.tokens.pop(0)
+ if self.tokens[0] == 'TAG':
+ self.tokens.pop(0)
+ return self.parse_flow_content()
+
+ def parse_block_node_or_indentless_sequence(self):
+ if self.tokens[0] == 'ALIAS':
+ self.tokens.pop(0)
+ return '*'
+ if self.tokens[0] == 'TAG':
+ self.tokens.pop(0)
+ if self.tokens[0] == 'ANCHOR':
+ self.tokens.pop(0)
+ elif self.tokens[0] == 'ANCHOR':
+ self.tokens.pop(0)
+ if self.tokens[0] == 'TAG':
+ self.tokens.pop(0)
+ if self.tokens[0] == 'ENTRY':
+ return self.parse_indentless_sequence(self)
+ return self.parse_block_content()
+
+ def parse_block_content(self):
+ if self.tokens[0] == 'SCALAR':
+ self.tokens.pop(0)
+ return True
+ elif self.tokens[0] == 'BLOCK_SEQ_START':
+ return self.parse_block_sequence()
+ elif self.tokens[0] == 'BLOCK_MAP_START':
+ return self.parse_block_mapping()
+ elif self.tokens[0] == 'FLOW_SEQ_START':
+ return self.parse_flow_sequence()
+ elif self.tokens[0] == 'FLOW_MAP_START':
+ return self.parse_flow_mapping()
+ else:
+ self.error('block content is expected')
+
+ def parse_flow_content(self):
+ if self.tokens[0] == 'SCALAR':
+ self.tokens.pop(0)
+ return True
+ elif self.tokens[0] == 'FLOW_SEQ_START':
+ return self.parse_flow_sequence()
+ elif self.tokens[0] == 'FLOW_MAP_START':
+ return self.parse_flow_mapping()
+ else:
+ self.error('flow content is expected')
+
+ def parse_block_sequence(self):
+ sequence = []
+ if self.tokens[0] != 'BLOCK_SEQ_START':
+ self.error('BLOCK_SEQ_START is expected')
+ self.tokens.pop(0)
+ while self.tokens[0] == 'ENTRY':
+ self.tokens.pop(0)
+ if self.tokens[0] not in ['ENTRY', 'BLOCK_END']:
+ sequence.append(self.parse_block_node())
+ else:
+ sequence.append(None)
+ if self.tokens[0] != 'BLOCK_END':
+ self.error('BLOCK_END is expected')
+ self.tokens.pop(0)
+ return sequence
+
+ def parse_indentless_sequence(self):
+ sequence = []
+ while self.tokens[0] == 'ENTRY':
+ self.tokens.pop(0)
+ if self.tokens[0] not in ['ENTRY']:
+ sequence.append(self.parse_block_node())
+ else:
+ sequence.append(None)
+ return sequence
+
+ def parse_block_mapping(self):
+ mapping = []
+ if self.tokens[0] != 'BLOCK_MAP_START':
+ self.error('BLOCK_MAP_START is expected')
+ self.tokens.pop(0)
+ while self.tokens[0] in ['KEY', 'VALUE']:
+ key = None
+ value = None
+ if self.tokens[0] == 'KEY':
+ self.tokens.pop(0)
+ if self.tokens[0] not in ['KEY', 'VALUE', 'BLOCK_END']:
+ key = self.parse_block_node_or_indentless_sequence()
+ if self.tokens[0] == 'VALUE':
+ self.tokens.pop(0)
+ if self.tokens[0] not in ['KEY', 'VALUE', 'BLOCK_END']:
+ value = self.parse_block_node_or_indentless_sequence()
+ mapping.append((key, value))
+ if self.tokens[0] != 'BLOCK_END':
+ self.error('BLOCK_END is expected')
+ self.tokens.pop(0)
+ return mapping
+
+ def parse_flow_sequence(self):
+ sequence = []
+ if self.tokens[0] != 'FLOW_SEQ_START':
+ self.error('FLOW_SEQ_START is expected')
+ self.tokens.pop(0)
+ while self.tokens[0] != 'FLOW_SEQ_END':
+ if self.tokens[0] == 'KEY':
+ self.tokens.pop(0)
+ key = None
+ value = None
+ if self.tokens[0] != 'VALUE':
+ key = self.parse_flow_node()
+ if self.tokens[0] == 'VALUE':
+ self.tokens.pop(0)
+ if self.tokens[0] not in ['ENTRY', 'FLOW_SEQ_END']:
+ value = self.parse_flow_node()
+ sequence.append([(key, value)])
+ else:
+ sequence.append(self.parse_flow_node())
+ if self.tokens[0] not in ['ENTRY', 'FLOW_SEQ_END']:
+ self.error("ENTRY or FLOW_SEQ_END is expected")
+ if self.tokens[0] == 'ENTRY':
+ self.tokens.pop(0)
+ if self.tokens[0] != 'FLOW_SEQ_END':
+ self.error('FLOW_SEQ_END is expected')
+ self.tokens.pop(0)
+ return sequence
+
+ def parse_flow_mapping(self):
+ mapping = []
+ if self.tokens[0] != 'FLOW_MAP_START':
+ self.error('FLOW_MAP_START is expected')
+ self.tokens.pop(0)
+ while self.tokens[0] != 'FLOW_MAP_END':
+ if self.tokens[0] == 'KEY':
+ self.tokens.pop(0)
+ key = None
+ value = None
+ if self.tokens[0] != 'VALUE':
+ key = self.parse_flow_node()
+ if self.tokens[0] == 'VALUE':
+ self.tokens.pop(0)
+ if self.tokens[0] not in ['ENTRY', 'FLOW_MAP_END']:
+ value = self.parse_flow_node()
+ mapping.append((key, value))
+ else:
+ mapping.append((self.parse_flow_node(), None))
+ if self.tokens[0] not in ['ENTRY', 'FLOW_MAP_END']:
+ self.error("ENTRY or FLOW_MAP_END is expected")
+ if self.tokens[0] == 'ENTRY':
+ self.tokens.pop(0)
+ if self.tokens[0] != 'FLOW_MAP_END':
+ self.error('FLOW_MAP_END is expected')
+ self.tokens.pop(0)
+ return mapping
+
+ def error(self, message):
+ raise Error(message+': '+str(self.tokens))
+
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..e6c1652
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,22 @@
+
+NAME = 'PyYAML3000'
+VERSION = '0.1'
+DESCRIPTION = "YAML parser (and emitter)"
+AUTHOR = "Kirill Simonov"
+AUTHOR_EMAIL = 'xi@resolvent.net'
+LICENSE = "BSD"
+
+from distutils.core import setup
+
+setup(
+ name=NAME,
+ version=VERSION,
+ description=DESCRIPTION,
+ author=AUTHOR,
+ author_email=AUTHOR_EMAIL,
+ license=LICENSE,
+
+ package_dir={'': 'lib'},
+ packages=['yaml'],
+)
+
diff --git a/tests/data/spec-02-01.tokens b/tests/data/spec-02-01.tokens
new file mode 100644
index 0000000..ce44cac
--- /dev/null
+++ b/tests/data/spec-02-01.tokens
@@ -0,0 +1 @@
+[[ , _ , _ , _ ]}
diff --git a/tests/data/spec-02-02.tokens b/tests/data/spec-02-02.tokens
new file mode 100644
index 0000000..e4e381b
--- /dev/null
+++ b/tests/data/spec-02-02.tokens
@@ -0,0 +1,5 @@
+{{
+? _ : _
+? _ : _
+? _ : _
+]}
diff --git a/tests/data/spec-02-03.tokens b/tests/data/spec-02-03.tokens
new file mode 100644
index 0000000..89815f2
--- /dev/null
+++ b/tests/data/spec-02-03.tokens
@@ -0,0 +1,4 @@
+{{
+? _ : [[ , _ , _ , _ ]}
+? _ : [[ , _ , _ , _ ]}
+]}
diff --git a/tests/data/spec-02-04.tokens b/tests/data/spec-02-04.tokens
new file mode 100644
index 0000000..9cb9815
--- /dev/null
+++ b/tests/data/spec-02-04.tokens
@@ -0,0 +1,4 @@
+[[
+, {{ ? _ : _ ? _ : _ ? _ : _ ]}
+, {{ ? _ : _ ? _ : _ ? _ : _ ]}
+]}
diff --git a/tests/data/spec-02-05.tokens b/tests/data/spec-02-05.tokens
new file mode 100644
index 0000000..3f6f1ab
--- /dev/null
+++ b/tests/data/spec-02-05.tokens
@@ -0,0 +1,5 @@
+[[
+, [ _ , _ , _ ]
+, [ _ , _ , _ ]
+, [ _ , _ , _ ]
+]}
diff --git a/tests/data/spec-02-06.tokens b/tests/data/spec-02-06.tokens
new file mode 100644
index 0000000..a1a5eef
--- /dev/null
+++ b/tests/data/spec-02-06.tokens
@@ -0,0 +1,4 @@
+{{
+? _ : { ? _ : _ , ? _ : _ }
+? _ : { ? _ : _ , ? _ : _ }
+]}
diff --git a/tests/data/spec-02-07.tokens b/tests/data/spec-02-07.tokens
new file mode 100644
index 0000000..ed48883
--- /dev/null
+++ b/tests/data/spec-02-07.tokens
@@ -0,0 +1,12 @@
+---
+[[
+, _
+, _
+, _
+]}
+
+---
+[[
+, _
+, _
+]}
diff --git a/tests/data/spec-02-08.tokens b/tests/data/spec-02-08.tokens
new file mode 100644
index 0000000..7d2c03d
--- /dev/null
+++ b/tests/data/spec-02-08.tokens
@@ -0,0 +1,15 @@
+---
+{{
+? _ : _
+? _ : _
+? _ : _
+]}
+...
+
+---
+{{
+? _ : _
+? _ : _
+? _ : _
+]}
+...
diff --git a/tests/data/spec-02-09.tokens b/tests/data/spec-02-09.tokens
new file mode 100644
index 0000000..b2ec10e
--- /dev/null
+++ b/tests/data/spec-02-09.tokens
@@ -0,0 +1,5 @@
+---
+{{
+? _ : [[ , _ , _ ]}
+? _ : [[ , _ , _ ]}
+]}
diff --git a/tests/data/spec-02-10.tokens b/tests/data/spec-02-10.tokens
new file mode 100644
index 0000000..26caa2b
--- /dev/null
+++ b/tests/data/spec-02-10.tokens
@@ -0,0 +1,5 @@
+---
+{{
+? _ : [[ , _ , & _ ]}
+? _ : [[ , * , _ ]}
+]}
diff --git a/tests/data/test_marker.markers b/tests/data/test_marker.markers
new file mode 100644
index 0000000..7b08ee4
--- /dev/null
+++ b/tests/data/test_marker.markers
@@ -0,0 +1,38 @@
+---
+*The first line.
+The last line.
+---
+The first*line.
+The last line.
+---
+The first line.*
+The last line.
+---
+The first line.
+*The last line.
+---
+The first line.
+The last*line.
+---
+The first line.
+The last line.*
+---
+The first line.
+*The selected line.
+The last line.
+---
+The first line.
+The selected*line.
+The last line.
+---
+The first line.
+The selected line.*
+The last line.
+---
+*The only line.
+---
+The only*line.
+---
+The only line.*
+---
+Loooooooooooooooooooooooooooooooooooooooooooooong*Liiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiine
diff --git a/tests/test_appliance.py b/tests/test_appliance.py
new file mode 100644
index 0000000..372a68d
--- /dev/null
+++ b/tests/test_appliance.py
@@ -0,0 +1,28 @@
+
+import unittest, os
+
+class TestAppliance(unittest.TestCase):
+
+ DATA = 'tests/data'
+
+ tests = {}
+ for filename in os.listdir(DATA):
+ if os.path.isfile(os.path.join(DATA, filename)):
+ root, ext = os.path.splitext(filename)
+ tests.setdefault(root, []).append(ext)
+
+ def add_tests(cls, method_name, *extensions):
+ for test in cls.tests:
+ available_extensions = cls.tests[test]
+ for ext in extensions:
+ if ext not in available_extensions:
+ break
+ else:
+ filenames = [os.path.join(cls.DATA, test+ext) for ext in extensions]
+ def test_method(self, test=test, filenames=filenames):
+ getattr(self, '_'+method_name)(test, *filenames)
+ test = test.replace('-', '_')
+ test_method.__name__ = '%s_%s' % (method_name, test)
+ setattr(cls, test_method.__name__, test_method)
+ add_tests = classmethod(add_tests)
+
diff --git a/tests/test_build.py b/tests/test_build.py
new file mode 100644
index 0000000..50f7437
--- /dev/null
+++ b/tests/test_build.py
@@ -0,0 +1,12 @@
+
+def main():
+ import sys, os, distutils.util
+ #build_lib = os.path.join('build', 'lib.%s-%s' % (distutils.util.get_platform(), sys.version[0:3]))
+ build_lib = 'build/lib'
+ sys.path.insert(0, build_lib)
+ import test_yaml
+ test_yaml.main('test_yaml')
+
+if __name__ == '__main__':
+ main()
+
diff --git a/tests/test_marker.py b/tests/test_marker.py
new file mode 100644
index 0000000..4570098
--- /dev/null
+++ b/tests/test_marker.py
@@ -0,0 +1,35 @@
+
+import test_appliance
+
+from yaml.marker import Marker
+
+class TestMarker(test_appliance.TestAppliance):
+
+ def _testMarkers(self, test_name, markers_filename):
+ inputs = file(markers_filename, 'rb').read().split('---\n')[1:]
+ for input in inputs:
+ index = 0
+ line = 0
+ column = 0
+ while input[index] != '*':
+ if input[index] == '\n':
+ line += 1
+ column = 0
+ else:
+ column += 1
+ index += 1
+ for str_type in [str, unicode]:
+ marker = Marker(test_name, str_type(input), index, line, column)
+ snippet = marker.get_snippet()
+ #print "INPUT:"
+ #print input
+ #print "SNIPPET:"
+ #print snippet
+ self.failUnless(isinstance(snippet, str))
+ self.failUnlessEqual(snippet.count('\n'), 2)
+ data, pointer, dummy = snippet.split('\n')
+ self.failUnless(len(data) < 80)
+ self.failUnlessEqual(data[len(pointer)-1], '*')
+
+TestMarker.add_tests('testMarkers', '.markers')
+
diff --git a/tests/test_tokens.py b/tests/test_tokens.py
new file mode 100644
index 0000000..f233589
--- /dev/null
+++ b/tests/test_tokens.py
@@ -0,0 +1,62 @@
+
+import test_appliance
+
+from yaml.scanner import Scanner
+
+class TestTokens(test_appliance.TestAppliance):
+
+ # Tokens mnemonic:
+ # directive: %
+ # document_start: ---
+ # document_end: ...
+ # alias: *
+ # anchor: &
+ # tag: !
+ # scalar _
+ # block_sequence_start: [[
+ # block_mapping_start: {{
+ # block_end: ]}
+ # flow_sequence_start: [
+ # flow_sequence_end: ]
+ # flow_mapping_start: {
+ # flow_mapping_end: }
+ # entry: ,
+ # key: ?
+ # value: :
+
+ replaces = {
+ 'DIRECTIVE': '%',
+ 'DOCUMENT_START': '---',
+ 'DOCUMENT_END': '...',
+ 'ALIAS': '*',
+ 'ANCHOR': '&',
+ 'TAG': '!',
+ 'SCALAR': '_',
+ 'BLOCK_SEQ_START': '[[',
+ 'BLOCK_MAP_START': '{{',
+ 'BLOCK_END': ']}',
+ 'FLOW_SEQ_START': '[',
+ 'FLOW_SEQ_END': ']',
+ 'FLOW_MAP_START': '{',
+ 'FLOW_MAP_END': '}',
+ 'ENTRY': ',',
+ 'KEY': '?',
+ 'VALUE': ':',
+ }
+
+ def _testTokens(self, test_name, data_filename, tokens_filename):
+ tokens1 = None
+ tokens2 = file(tokens_filename, 'rb').read().split()
+ try:
+ scanner = Scanner()
+ tokens1 = scanner.scan(data_filename, file(data_filename, 'rb').read())
+ tokens1 = [self.replaces[t] for t in tokens1]
+ self.failUnlessEqual(tokens1, tokens2)
+ except:
+ print
+ print "TOKENS1:", tokens1
+ print "TOKENS2:", tokens2
+ raise
+
+TestTokens.add_tests('testTokens', '.data', '.tokens')
+
diff --git a/tests/test_yaml.py b/tests/test_yaml.py
new file mode 100644
index 0000000..54447b3
--- /dev/null
+++ b/tests/test_yaml.py
@@ -0,0 +1,12 @@
+
+import unittest
+
+from test_marker import *
+from test_tokens import *
+
+def main(module='__main__'):
+ unittest.main(module)
+
+if __name__ == '__main__':
+ main()
+