summaryrefslogtreecommitdiff
path: root/lib/yaml/scanner.py
diff options
context:
space:
mode:
Diffstat (limited to 'lib/yaml/scanner.py')
-rw-r--r--lib/yaml/scanner.py150
1 files changed, 82 insertions, 68 deletions
diff --git a/lib/yaml/scanner.py b/lib/yaml/scanner.py
index c83a551..220a99b 100644
--- a/lib/yaml/scanner.py
+++ b/lib/yaml/scanner.py
@@ -14,7 +14,6 @@ from error import YAMLError
from tokens import *
class ScannerError(YAMLError):
- # TODO:
# ScannerError: while reading a quoted string
# in '...', line 5, column 10:
# key: "valu\?e"
@@ -23,6 +22,7 @@ class ScannerError(YAMLError):
# in '...', line 5, column 15:
# key: "valu\?e"
# ^
+
def __init__(self, context=None, context_marker=None,
problem=None, problem_marker=None):
self.context = context
@@ -41,6 +41,8 @@ class ScannerError(YAMLError):
return '\n'.join(lines)
class SimpleKey:
+ # See below simple keys treatment.
+
def __init__(self, token_number, required, index, line, column, marker):
self.token_number = token_number
self.required = required
@@ -114,23 +116,43 @@ class Scanner:
# '[', or '{' tokens.
self.possible_simple_keys = {}
- # Two public methods.
+ # Public methods.
+
+ def check(self, *choices):
+ # Check if the next token is one of the given types.
+ while self.need_more_tokens():
+ self.fetch_more_tokens()
+ if self.tokens:
+ for choice in choices:
+ if isinstance(self.tokens[0], choice):
+ return True
+ return False
- def peek_token(self):
- """Get the current token."""
+ def peek(self):
+ # Return the next token, but do not delete if from the queue.
while self.need_more_tokens():
self.fetch_more_tokens()
if self.tokens:
return self.tokens[0]
- def get_token(self):
- "Get the current token and remove it from the list of pending tokens."""
+ def get(self):
+ # Return the next token.
while self.need_more_tokens():
self.fetch_more_tokens()
if self.tokens:
self.tokens_taken += 1
return self.tokens.pop(0)
+ def __iter__(self):
+ # Iterator protocol.
+ while self.need_more_tokens():
+ self.fetch_more_tokens()
+ while self.tokens:
+ self.tokens_taken += 1
+ yield self.tokens.pop(0)
+ while self.need_more_tokens():
+ self.fetch_more_tokens()
+
# Private methods.
def need_more_tokens(self):
@@ -163,10 +185,6 @@ class Scanner:
if ch == u'\0':
return self.fetch_stream_end()
- # Is it the byte order mark?
- if ch == u'\uFEFF':
- return self.fetch_bom()
-
# Is it a directive?
if ch == u'%' and self.check_directive():
return self.fetch_directive()
@@ -197,9 +215,13 @@ class Scanner:
if ch == u'}':
return self.fetch_flow_mapping_end()
- # Is it the entry indicator?
- if ch in u'-,' and self.check_entry():
- return self.fetch_entry()
+ # Is it the flow entry indicator?
+ if ch in u',':
+ return self.fetch_flow_entry()
+
+ # Is it the block entry indicator?
+ if ch in u'-' and self.check_block_entry():
+ return self.fetch_block_entry()
# Is it the key indicator?
if ch == u'?' and self.check_key():
@@ -364,33 +386,6 @@ class Scanner:
# The reader is ended.
self.done = True
- def fetch_bom(self):
- # We consider the BOM marker as a DOCUMENT-END indicator unless it's
- # the first character in the stream. It's a reasonable approximation
- # of the specification requirements. We can follow the specification
- # literally, but it will require a new token class. Probably later.
-
- # We ignore BOM if it is the first character in the stream.
- if self.reader.index == 0:
- slef.reader.forward()
-
- # Otherwise we issue DOCUMENT-END.
- else:
-
- # Set the current intendation to -1.
- self.unwind_indent(-1)
-
- # Reset simple keys. Note that there could not be a block
- # collection after BOM.
- self.remove_possible_simple_key()
- self.allow_simple_key = False
-
- # Add DOCUMENT-END.
- start_marker = self.reader.get_marker()
- self.reader.forward()
- end_marker = self.reader.get_marker()
- self.tokens.append(DocumentEndToken(start_marker, end_marker))
-
def fetch_directive(self):
# Set the current intendation to -1.
@@ -471,7 +466,21 @@ class Scanner:
end_marker = self.reader.get_marker()
self.tokens.append(TokenClass(start_marker, end_marker))
- def fetch_entry(self):
+ def fetch_flow_entry(self):
+
+ # Simple keys are allowed after ','.
+ self.allow_simple_key = True
+
+ # Reset possible simple key on the current level.
+ self.remove_possible_simple_key()
+
+ # Add FLOW-ENTRY.
+ start_marker = self.reader.get_marker()
+ self.reader.forward()
+ end_marker = self.reader.get_marker()
+ self.tokens.append(FlowEntryToken(start_marker, end_marker))
+
+ def fetch_block_entry(self):
# Block context needs additional checks.
if not self.flow_level:
@@ -487,17 +496,22 @@ class Scanner:
marker = self.reader.get_marker()
self.tokens.append(BlockSequenceStartToken(marker, marker))
- # Simple keys are allowed after '-' and ','.
+ # It's an error for the block entry to occur in the flow context,
+ # but we let the parser detect this.
+ else:
+ pass
+
+ # Simple keys are allowed after '-'.
self.allow_simple_key = True
# Reset possible simple key on the current level.
self.remove_possible_simple_key()
- # Add ENTRY.
+ # Add BLOCK-ENTRY.
start_marker = self.reader.get_marker()
self.reader.forward()
end_marker = self.reader.get_marker()
- self.tokens.append(EntryToken(start_marker, end_marker))
+ self.tokens.append(BlockEntryToken(start_marker, end_marker))
def fetch_key(self):
@@ -681,16 +695,10 @@ class Scanner:
and self.reader.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
return True
- def check_entry(self):
+ def check_block_entry(self):
- # ENTRY(flow context): ','
- if self.flow_level:
- return self.reader.peek() == u','
-
- # ENTRY(block context): '-' (' '|'\n')
- else:
- return self.reader.peek() == u'-' \
- and self.reader.peek(1) in u'\0 \t\r\n\x85\u2028\u2029'
+ # BLOCK-ENTRY: '-' (' '|'\n')
+ return self.reader.peek(1) in u'\0 \t\r\n\x85\u2028\u2029'
def check_key(self):
@@ -737,6 +745,12 @@ class Scanner:
# We ignore spaces, line breaks and comments.
# If we find a line break in the block context, we set the flag
# `allow_simple_key` on.
+ # The byte order mark is stripped if it's the first character in the
+ # stream. We do not yet support BOM inside the stream as the
+ # specification requires. Any such mark will be considered as a part
+ # of the document.
+ if self.reader.index == 0 and self.reader.peek() == u'\uFEFF':
+ self.reader.forward()
found = False
while not found:
while self.reader.peek() == u' ':
@@ -980,25 +994,25 @@ class Scanner:
# Unfortunately, folding rules are ambiguous.
#
# This is the folding according to the specification:
+
+ if folded and line_break == u'\n' \
+ and leading_non_space and self.reader.peek() not in u' \t':
+ if not breaks:
+ chunks.append(u' ')
+ else:
+ chunks.append(line_break)
+
+ # This is Clark Evans's interpretation (also in the spec
+ # examples):
#
- #if folded and line_break == u'\n' \
- # and leading_non_space and self.reader.peek() not in u' \t':
+ #if folded and line_break == u'\n':
# if not breaks:
- # chunks.append(u' ')
+ # if self.reader.peek() not in ' \t':
+ # chunks.append(u' ')
+ # else:
+ # chunks.append(line_break)
#else:
# chunks.append(line_break)
- #
- # This is Clark Evans's interpretation (also in the spec
- # examples):
- #
- if folded and line_break == u'\n':
- if not breaks:
- if self.reader.peek() not in ' \t':
- chunks.append(u' ')
- else:
- chunks.append(line_break)
- else:
- chunks.append(line_break)
else:
break