summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorxi <xi@18f92427-320e-0410-9341-c67f048884a3>2006-03-26 22:57:23 +0000
committerxi <xi@18f92427-320e-0410-9341-c67f048884a3>2006-03-26 22:57:23 +0000
commitc6f2fc9875595b01f4249fbe87b0b846d0a2adc9 (patch)
tree07ef1572223ac0bfbb1d8d58c03e36c81c91e5a1
parent066042294c0843c568addf69db0ea58c3abda162 (diff)
downloadpyyaml-c6f2fc9875595b01f4249fbe87b0b846d0a2adc9.tar.gz
Parser now provides style information. Allow empty plain scalars if a tag or anchor is given.
git-svn-id: http://svn.pyyaml.org/pyyaml/branches/working-on-emitter@127 18f92427-320e-0410-9341-c67f048884a3
-rw-r--r--lib/yaml/emitter.py2
-rw-r--r--lib/yaml/events.py27
-rw-r--r--lib/yaml/parser.py58
-rw-r--r--lib/yaml/reader.py4
-rw-r--r--lib/yaml/scanner.py45
-rw-r--r--lib/yaml/tokens.py24
-rw-r--r--tests/data/spec-08-12.canonical1
-rw-r--r--tests/data/spec-08-12.data2
-rw-r--r--tests/test_emitter.py11
9 files changed, 135 insertions, 39 deletions
diff --git a/lib/yaml/emitter.py b/lib/yaml/emitter.py
index 4f33cb8..d2b372f 100644
--- a/lib/yaml/emitter.py
+++ b/lib/yaml/emitter.py
@@ -175,6 +175,6 @@ class Emitter:
self.soft_space = True
def write_indent(self):
- self.writer.write("\n"+" "*(self.level*4))
+ self.writer.write("\n"+" "*(self.level*2))
self.soft_space = False
diff --git a/lib/yaml/events.py b/lib/yaml/events.py
index 97bccb3..0c27903 100644
--- a/lib/yaml/events.py
+++ b/lib/yaml/events.py
@@ -20,19 +20,25 @@ class AliasEvent(NodeEvent):
pass
class ScalarEvent(NodeEvent):
- def __init__(self, anchor, tag, value, start_mark=None, end_mark=None):
+ def __init__(self, anchor, tag, value, start_mark=None, end_mark=None,
+ implicit=None, style=None):
self.anchor = anchor
self.tag = tag
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
+ self.implicit = implicit
+ self.style = style
class CollectionEvent(NodeEvent):
- def __init__(self, anchor, tag, start_mark=None, end_mark=None):
+ def __init__(self, anchor, tag, start_mark=None, end_mark=None,
+ flow=None, compact=None):
self.anchor = anchor
self.tag = tag
self.start_mark = start_mark
self.end_mark = end_mark
+ self.flow = flow
+ self.compact = compact
class SequenceEvent(CollectionEvent):
pass
@@ -44,13 +50,26 @@ class CollectionEndEvent(Event):
pass
class DocumentStartEvent(Event):
- pass
+ def __init__(self, start_mark=None, end_mark=None,
+ indent=None, implicit=None, version=None, tags=None,
+ canonical=None):
+ self.start_mark = start_mark
+ self.end_mark = end_mark
+ self.indent = indent
+ self.implicit = implicit
+ self.version = version
+ self.tags = tags
+ self.canonical = canonical
class DocumentEndEvent(Event):
pass
class StreamStartEvent(Event):
- pass
+ def __init__(self, start_mark=None, end_mark=None,
+ encoding=None):
+ self.start_mark = start_mark
+ self.end_mark = end_mark
+ self.encoding = encoding
class StreamEndEvent(Event):
pass
diff --git a/lib/yaml/parser.py b/lib/yaml/parser.py
index 9870699..f245f8f 100644
--- a/lib/yaml/parser.py
+++ b/lib/yaml/parser.py
@@ -130,7 +130,8 @@ class Parser:
# Parse start of stream.
token = self.scanner.get()
- yield StreamStartEvent(token.start_mark, token.end_mark)
+ yield StreamStartEvent(token.start_mark, token.end_mark,
+ encoding=token.encoding)
# Parse implicit document.
if not self.scanner.check(DirectiveToken, DocumentStartToken,
@@ -138,7 +139,7 @@ class Parser:
self.tag_handles = self.DEFAULT_TAGS
token = self.scanner.peek()
start_mark = end_mark = token.start_mark
- yield DocumentStartEvent(start_mark, end_mark)
+ yield DocumentStartEvent(start_mark, end_mark, implicit=True)
for event in self.parse_block_node():
yield event
token = self.scanner.peek()
@@ -152,7 +153,7 @@ class Parser:
while not self.scanner.check(StreamEndToken):
token = self.scanner.peek()
start_mark = token.start_mark
- self.process_directives()
+ version, tags = self.process_directives()
if not self.scanner.check(DocumentStartToken):
raise ParserError(None, None,
"expected '<document start>', but found %r"
@@ -160,7 +161,8 @@ class Parser:
self.scanner.peek().start_mark)
token = self.scanner.get()
end_mark = token.end_mark
- yield DocumentStartEvent(start_mark, end_mark)
+ yield DocumentStartEvent(start_mark, end_mark,
+ implicit=False, version=version, tags=tags)
if self.scanner.check(DirectiveToken,
DocumentStartToken, DocumentEndToken, StreamEndToken):
yield self.process_empty_scalar(token.end_mark)
@@ -201,9 +203,14 @@ class Parser:
"duplicate tag handle %r" % handle.encode('utf-8'),
token.start_mark)
self.tag_handles[handle] = prefix
+ version_value = self.yaml_version
+ tags_value = None
+ if self.tag_handles:
+ tags_value = self.tag_handles.copy()
for key in self.DEFAULT_TAGS:
if key not in self.tag_handles:
self.tag_handles[key] = self.DEFAULT_TAGS[key]
+ return version_value, tags_value
def parse_block_node(self):
return self.parse_node(block=True)
@@ -232,19 +239,22 @@ class Parser:
start_mark = end_mark = tag_mark = None
if self.scanner.check(AnchorToken):
token = self.scanner.get()
- start_mark = end_mark = token.start_mark
+ start_mark = token.start_mark
+ end_mark = token.end_mark
anchor = token.value
if self.scanner.check(TagToken):
token = self.scanner.get()
- end_mark = tag_mark = token.start_mark
+ tag_mark = token.start_mark
+ end_mark = token.end_mark
tag = token.value
elif self.scanner.check(TagToken):
token = self.scanner.get()
- start_mark = end_mark = tag_mark = token.start_mark
+ start_mark = tag_mark = token.start_mark
+ end_mark = token.end_mark
tag = token.value
if self.scanner.check(AnchorToken):
token = self.scanner.get()
- end_mark = token.start_mark
+ end_mark = token.end_mark
anchor = token.value
if tag is not None:
handle, suffix = tag
@@ -261,35 +271,48 @@ class Parser:
self.scanner.peek().plain):
tag = u'!'
if start_mark is None:
- start_mark = self.scanner.peek().start_mark
+ start_mark = end_mark = self.scanner.peek().start_mark
event = None
collection_events = None
if indentless_sequence and self.scanner.check(BlockEntryToken):
end_mark = self.scanner.peek().end_mark
- event = SequenceEvent(anchor, tag, start_mark, end_mark)
+ event = SequenceEvent(anchor, tag, start_mark, end_mark,
+ flow=False, compact=False)
collection_events = self.parse_indentless_sequence()
else:
if self.scanner.check(ScalarToken):
token = self.scanner.get()
end_mark = token.end_mark
event = ScalarEvent(anchor, tag, token.value,
- start_mark, end_mark)
+ start_mark, end_mark,
+ implicit=(tag is None), style=token.style)
elif self.scanner.check(FlowSequenceStartToken):
end_mark = self.scanner.peek().end_mark
- event = SequenceEvent(anchor, tag, start_mark, end_mark)
+ event = SequenceEvent(anchor, tag, start_mark, end_mark,
+ flow=True)
collection_events = self.parse_flow_sequence()
elif self.scanner.check(FlowMappingStartToken):
end_mark = self.scanner.peek().end_mark
- event = MappingEvent(anchor, tag, start_mark, end_mark)
+ event = MappingEvent(anchor, tag, start_mark, end_mark,
+ flow=True)
collection_events = self.parse_flow_mapping()
elif block and self.scanner.check(BlockSequenceStartToken):
end_mark = self.scanner.peek().start_mark
- event = SequenceEvent(anchor, tag, start_mark, end_mark)
+ compact = self.scanner.peek().inline
+ event = SequenceEvent(anchor, tag, start_mark, end_mark,
+ flow=False, compact=compact)
collection_events = self.parse_block_sequence()
elif block and self.scanner.check(BlockMappingStartToken):
end_mark = self.scanner.peek().start_mark
- event = MappingEvent(anchor, tag, start_mark, end_mark)
+ compact = self.scanner.peek().inline
+ event = MappingEvent(anchor, tag, start_mark, end_mark,
+ flow=False, compact=compact)
collection_events = self.parse_block_mapping()
+ elif anchor is not None or tag is not None:
+ # Empty scalars are allowed even if a tag or an anchor is
+ # specified.
+ event = ScalarEvent(anchor, tag, u'', start_mark, end_mark,
+ implicit=False, style='')
else:
if block:
node = 'block'
@@ -384,7 +407,8 @@ class Parser:
if self.scanner.check(KeyToken):
token = self.scanner.get()
yield MappingEvent(None, u'!',
- token.start_mark, token.end_mark)
+ token.start_mark, token.end_mark,
+ flow=True, compact=True)
if not self.scanner.check(ValueToken,
FlowEntryToken, FlowSequenceEndToken):
for event in self.parse_flow_node():
@@ -460,5 +484,5 @@ class Parser:
yield CollectionEndEvent(token.start_mark, token.end_mark)
def process_empty_scalar(self, mark):
- return ScalarEvent(None, None, u'', mark, mark)
+ return ScalarEvent(None, None, u'', mark, mark, implicit=True)
diff --git a/lib/yaml/reader.py b/lib/yaml/reader.py
index 9778943..ab16a13 100644
--- a/lib/yaml/reader.py
+++ b/lib/yaml/reader.py
@@ -100,6 +100,7 @@ class Reader:
self.pointer = 0
self.raw_buffer = None
self.raw_decode = None
+ self.encoding = None
self.index = 0
self.line = 0
self.column = 0
@@ -156,10 +157,13 @@ class Reader:
if not isinstance(self.raw_buffer, unicode):
if self.raw_buffer.startswith(codecs.BOM_UTF16_LE):
self.raw_decode = utf_16_le_decode
+ self.encoding = 'utf-16-le'
elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE):
self.raw_decode = utf_16_be_decode
+ self.encoding = 'utf-16-be'
else:
self.raw_decode = utf_8_decode
+ self.encoding = 'utf-8'
self.update(1)
NON_PRINTABLE = re.compile(u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]')
diff --git a/lib/yaml/scanner.py b/lib/yaml/scanner.py
index 9c536b4..a665da9 100644
--- a/lib/yaml/scanner.py
+++ b/lib/yaml/scanner.py
@@ -35,13 +35,15 @@ class ScannerError(MarkedYAMLError):
class SimpleKey:
# See below simple keys treatment.
- def __init__(self, token_number, required, index, line, column, mark):
+ def __init__(self, token_number, required, index, line, column, mark=None,
+ inline=None):
self.token_number = token_number
self.required = required
self.index = index
self.line = line
self.column = column
self.mark = mark
+ self.inline = inline
class Scanner:
@@ -81,6 +83,10 @@ class Scanner:
# Past indentation levels.
self.indents = []
+ # Used for providing style information to the parser.
+ self.current_line = self.previous_line = self.reader.line
+ self.current_column = self.previus_column = self.reader.column
+
# Variables related to simple keys treatment.
# A simple key is a key that is not denoted by the '?' indicator.
@@ -321,8 +327,9 @@ class Scanner:
line = self.reader.line
column = self.reader.column
mark = self.reader.get_mark()
+ inline = (self.current_line == self.previous_line)
key = SimpleKey(token_number, required,
- index, line, column, mark)
+ index, line, column, mark, inline)
self.possible_simple_keys[self.flow_level] = key
def remove_possible_simple_key(self):
@@ -380,8 +387,8 @@ class Scanner:
mark = self.reader.get_mark()
# Add STREAM-END.
- self.tokens.append(StreamStartToken(mark, mark))
-
+ self.tokens.append(StreamStartToken(mark, mark,
+ encoding=self.reader.encoding))
def fetch_stream_end(self):
@@ -509,7 +516,8 @@ class Scanner:
# We may need to add BLOCK-SEQUENCE-START.
if self.add_indent(self.reader.column):
mark = self.reader.get_mark()
- self.tokens.append(BlockSequenceStartToken(mark, mark))
+ inline = (self.current_line == self.previous_line)
+ self.tokens.append(BlockSequenceStartToken(mark, mark, inline))
# It's an error for the block entry to occur in the flow context,
# but we let the parser detect this.
@@ -542,7 +550,8 @@ class Scanner:
# We may need to add BLOCK-MAPPING-START.
if self.add_indent(self.reader.column):
mark = self.reader.get_mark()
- self.tokens.append(BlockMappingStartToken(mark, mark))
+ inline = (self.current_line == self.previous_line)
+ self.tokens.append(BlockMappingStartToken(mark, mark, inline))
# Simple keys are allowed after '?' in the block context.
self.allow_simple_key = not self.flow_level
@@ -572,7 +581,8 @@ class Scanner:
if not self.flow_level:
if self.add_indent(key.column):
self.tokens.insert(key.token_number-self.tokens_taken,
- BlockMappingStartToken(key.mark, key.mark))
+ BlockMappingStartToken(key.mark, key.mark,
+ key.inline))
# There cannot be two simple keys one after another.
self.allow_simple_key = False
@@ -791,6 +801,11 @@ class Scanner:
else:
found = True
+ self.previous_line = self.current_line
+ self.previous_column = self.current_column
+ self.current_line = self.reader.line
+ self.current_column = self.reader.column
+
def scan_directive(self):
# See the specification for details.
start_mark = self.reader.get_mark()
@@ -1053,7 +1068,12 @@ class Scanner:
chunks.extend(breaks)
# We are done.
- return ScalarToken(u''.join(chunks), False, start_mark, end_mark)
+ if folded:
+ style = '>'
+ else:
+ style = '|'
+ return ScalarToken(u''.join(chunks), False, start_mark, end_mark,
+ style)
def scan_block_scalar_indicators(self, start_mark):
# See the specification for details.
@@ -1154,7 +1174,12 @@ class Scanner:
chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark))
self.reader.forward()
end_mark = self.reader.get_mark()
- return ScalarToken(u''.join(chunks), False, start_mark, end_mark)
+ if double:
+ style = '"'
+ else:
+ style = '\''
+ return ScalarToken(u''.join(chunks), False, start_mark, end_mark,
+ style)
ESCAPE_REPLACEMENTS = {
u'0': u'\0',
@@ -1305,7 +1330,7 @@ class Scanner:
if not spaces or self.reader.peek() == u'#' \
or (not self.flow_level and self.reader.column < indent):
break
- return ScalarToken(u''.join(chunks), True, start_mark, end_mark)
+ return ScalarToken(u''.join(chunks), True, start_mark, end_mark, '')
def scan_plain_spaces(self, indent, start_mark):
# See the specification for details.
diff --git a/lib/yaml/tokens.py b/lib/yaml/tokens.py
index 93c3005..e315374 100644
--- a/lib/yaml/tokens.py
+++ b/lib/yaml/tokens.py
@@ -24,21 +24,41 @@ class DirectiveToken(Token):
class DocumentStartToken(Token):
id = '<document start>'
+ def __init__(self, name, value, start_mark=None, end_mark=None):
+ self.name = name
+ self.value = value
+ self.start_mark = start_mark
+ self.end_mark = end_mark
class DocumentEndToken(Token):
id = '<document end>'
class StreamStartToken(Token):
id = '<stream start>'
+ def __init__(self, start_mark=None, end_mark=None,
+ encoding=None):
+ self.start_mark = start_mark
+ self.end_mark = end_mark
+ self.encoding = encoding
class StreamEndToken(Token):
id = '<stream end>'
class BlockSequenceStartToken(Token):
id = '<block sequence start>'
+ def __init__(self, start_mark=None, end_mark=None,
+ inline=None):
+ self.start_mark = start_mark
+ self.end_mark = end_mark
+ self.inline = inline
class BlockMappingStartToken(Token):
id = '<block mapping start>'
+ def __init__(self, start_mark=None, end_mark=None,
+ inline=None):
+ self.start_mark = start_mark
+ self.end_mark = end_mark
+ self.inline = inline
class BlockEndToken(Token):
id = '<block end>'
@@ -90,9 +110,11 @@ class TagToken(Token):
class ScalarToken(Token):
id = '<scalar>'
- def __init__(self, value, plain, start_mark=None, end_mark=None):
+ def __init__(self, value, plain, start_mark=None, end_mark=None,
+ style=None):
self.value = value
self.plain = plain
self.start_mark = start_mark
self.end_mark = end_mark
+ self.style = style
diff --git a/tests/data/spec-08-12.canonical b/tests/data/spec-08-12.canonical
index dc3b81a..93899f4 100644
--- a/tests/data/spec-08-12.canonical
+++ b/tests/data/spec-08-12.canonical
@@ -6,4 +6,5 @@
!!str "Tagged",
*A,
!!str "",
+ !!str "",
]
diff --git a/tests/data/spec-08-12.data b/tests/data/spec-08-12.data
index d936ba2..3d4c6b7 100644
--- a/tests/data/spec-08-12.data
+++ b/tests/data/spec-08-12.data
@@ -3,6 +3,6 @@
&anchor "Anchored",
!!str 'Tagged',
*anchor, # Alias node
-# !!str, # Empty plain scalar
+ !!str , # Empty plain scalar
'', # Empty plain scalar
]
diff --git a/tests/test_emitter.py b/tests/test_emitter.py
index 1ff0ecc..fed6953 100644
--- a/tests/test_emitter.py
+++ b/tests/test_emitter.py
@@ -1,5 +1,5 @@
-import test_appliance, sys
+import test_appliance, sys, StringIO
from yaml import *
@@ -7,11 +7,12 @@ class TestEmitterOnCanonical(test_appliance.TestAppliance):
def _testEmitterOnCanonical(self, test_name, canonical_filename):
events = list(iter(Parser(Scanner(Reader(file(canonical_filename, 'rb'))))))
- writer = sys.stdout
+ #writer = sys.stdout
+ writer = StringIO.StringIO()
emitter = Emitter(writer)
- print "-"*30
- print "ORIGINAL DATA:"
- print file(canonical_filename, 'rb').read()
+ #print "-"*30
+ #print "ORIGINAL DATA:"
+ #print file(canonical_filename, 'rb').read()
for event in events:
emitter.emit(event)