summaryrefslogtreecommitdiff
path: root/bzrlib/rio.py
diff options
context:
space:
mode:
Diffstat (limited to 'bzrlib/rio.py')
-rw-r--r--bzrlib/rio.py389
1 files changed, 389 insertions, 0 deletions
diff --git a/bzrlib/rio.py b/bzrlib/rio.py
new file mode 100644
index 0000000..adc05d2
--- /dev/null
+++ b/bzrlib/rio.py
@@ -0,0 +1,389 @@
+# Copyright (C) 2005 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+from __future__ import absolute_import
+
+# \subsection{\emph{rio} - simple text metaformat}
+#
+# \emph{r} stands for `restricted', `reproducible', or `rfc822-like'.
+#
+# The stored data consists of a series of \emph{stanzas}, each of which contains
+# \emph{fields} identified by an ascii name, with Unicode or string contents.
+# The field tag is constrained to alphanumeric characters.
+# There may be more than one field in a stanza with the same name.
+#
+# The format itself does not deal with character encoding issues, though
+# the result will normally be written in Unicode.
+#
+# The format is intended to be simple enough that there is exactly one character
+# stream representation of an object and vice versa, and that this relation
+# will continue to hold for future versions of bzr.
+
+import re
+
+from bzrlib import osutils
+from bzrlib.iterablefile import IterableFile
+
+# XXX: some redundancy is allowing to write stanzas in isolation as well as
+# through a writer object.
+
+class RioWriter(object):
+ def __init__(self, to_file):
+ self._soft_nl = False
+ self._to_file = to_file
+
+ def write_stanza(self, stanza):
+ if self._soft_nl:
+ self._to_file.write('\n')
+ stanza.write(self._to_file)
+ self._soft_nl = True
+
+
+class RioReader(object):
+ """Read stanzas from a file as a sequence
+
+ to_file can be anything that can be enumerated as a sequence of
+ lines (with newlines.)
+ """
+ def __init__(self, from_file):
+ self._from_file = from_file
+
+ def __iter__(self):
+ while True:
+ s = read_stanza(self._from_file)
+ if s is None:
+ break
+ else:
+ yield s
+
+
+def rio_file(stanzas, header=None):
+ """Produce a rio IterableFile from an iterable of stanzas"""
+ def str_iter():
+ if header is not None:
+ yield header + '\n'
+ first_stanza = True
+ for s in stanzas:
+ if first_stanza is not True:
+ yield '\n'
+ for line in s.to_lines():
+ yield line
+ first_stanza = False
+ return IterableFile(str_iter())
+
+
+def read_stanzas(from_file):
+ while True:
+ s = read_stanza(from_file)
+ if s is None:
+ break
+ else:
+ yield s
+
+class Stanza(object):
+ """One stanza for rio.
+
+ Each stanza contains a set of named fields.
+
+ Names must be non-empty ascii alphanumeric plus _. Names can be repeated
+ within a stanza. Names are case-sensitive. The ordering of fields is
+ preserved.
+
+ Each field value must be either an int or a string.
+ """
+
+ __slots__ = ['items']
+
+ def __init__(self, **kwargs):
+ """Construct a new Stanza.
+
+ The keyword arguments, if any, are added in sorted order to the stanza.
+ """
+ self.items = []
+ if kwargs:
+ for tag, value in sorted(kwargs.items()):
+ self.add(tag, value)
+
+ def add(self, tag, value):
+ """Append a name and value to the stanza."""
+ if not valid_tag(tag):
+ raise ValueError("invalid tag %r" % (tag,))
+ if isinstance(value, str):
+ value = unicode(value)
+ elif isinstance(value, unicode):
+ pass
+ ## elif isinstance(value, (int, long)):
+ ## value = str(value) # XXX: python2.4 without L-suffix
+ else:
+ raise TypeError("invalid type for rio value: %r of type %s"
+ % (value, type(value)))
+ self.items.append((tag, value))
+
+ @classmethod
+ def from_pairs(cls, pairs):
+ ret = cls()
+ ret.items = pairs
+ return ret
+
+ def __contains__(self, find_tag):
+ """True if there is any field in this stanza with the given tag."""
+ for tag, value in self.items:
+ if tag == find_tag:
+ return True
+ return False
+
+ def __len__(self):
+ """Return number of pairs in the stanza."""
+ return len(self.items)
+
+ def __eq__(self, other):
+ if not isinstance(other, Stanza):
+ return False
+ return self.items == other.items
+
+ def __ne__(self, other):
+ return not self.__eq__(other)
+
+ def __repr__(self):
+ return "Stanza(%r)" % self.items
+
+ def iter_pairs(self):
+ """Return iterator of tag, value pairs."""
+ return iter(self.items)
+
+ def to_lines(self):
+ """Generate sequence of lines for external version of this file.
+
+ The lines are always utf-8 encoded strings.
+ """
+ if not self.items:
+ # max() complains if sequence is empty
+ return []
+ result = []
+ for tag, value in self.items:
+ if value == '':
+ result.append(tag + ': \n')
+ elif '\n' in value:
+ # don't want splitlines behaviour on empty lines
+ val_lines = value.split('\n')
+ result.append(tag + ': ' + val_lines[0].encode('utf-8') + '\n')
+ for line in val_lines[1:]:
+ result.append('\t' + line.encode('utf-8') + '\n')
+ else:
+ result.append(tag + ': ' + value.encode('utf-8') + '\n')
+ return result
+
+ def to_string(self):
+ """Return stanza as a single string"""
+ return ''.join(self.to_lines())
+
+ def to_unicode(self):
+ """Return stanza as a single Unicode string.
+
+ This is most useful when adding a Stanza to a parent Stanza
+ """
+ if not self.items:
+ return u''
+
+ result = []
+ for tag, value in self.items:
+ if value == u'':
+ result.append(tag + u': \n')
+ elif u'\n' in value:
+ # don't want splitlines behaviour on empty lines
+ val_lines = value.split(u'\n')
+ result.append(tag + u': ' + val_lines[0] + u'\n')
+ for line in val_lines[1:]:
+ result.append(u'\t' + line + u'\n')
+ else:
+ result.append(tag + u': ' + value + u'\n')
+ return u''.join(result)
+
+ def write(self, to_file):
+ """Write stanza to a file"""
+ to_file.writelines(self.to_lines())
+
+ def get(self, tag):
+ """Return the value for a field wih given tag.
+
+ If there is more than one value, only the first is returned. If the
+ tag is not present, KeyError is raised.
+ """
+ for t, v in self.items:
+ if t == tag:
+ return v
+ else:
+ raise KeyError(tag)
+
+ __getitem__ = get
+
+ def get_all(self, tag):
+ r = []
+ for t, v in self.items:
+ if t == tag:
+ r.append(v)
+ return r
+
+ def as_dict(self):
+ """Return a dict containing the unique values of the stanza.
+ """
+ d = {}
+ for tag, value in self.items:
+ d[tag] = value
+ return d
+
+
+def valid_tag(tag):
+ return _valid_tag(tag)
+
+
+def read_stanza(line_iter):
+ """Return new Stanza read from list of lines or a file
+
+ Returns one Stanza that was read, or returns None at end of file. If a
+ blank line follows the stanza, it is consumed. It's not an error for
+ there to be no blank at end of file. If there is a blank file at the
+ start of the input this is really an empty stanza and that is returned.
+
+ Only the stanza lines and the trailing blank (if any) are consumed
+ from the line_iter.
+
+ The raw lines must be in utf-8 encoding.
+ """
+ return _read_stanza_utf8(line_iter)
+
+
+def read_stanza_unicode(unicode_iter):
+ """Read a Stanza from a list of lines or a file.
+
+ The lines should already be in unicode form. This returns a single
+ stanza that was read. If there is a blank line at the end of the Stanza,
+ it is consumed. It is not an error for there to be no blank line at
+ the end of the iterable. If there is a blank line at the beginning,
+ this is treated as an empty Stanza and None is returned.
+
+ Only the stanza lines and the trailing blank (if any) are consumed
+ from the unicode_iter
+
+ :param unicode_iter: A iterable, yeilding Unicode strings. See read_stanza
+ if you have a utf-8 encoded string.
+ :return: A Stanza object if there are any lines in the file.
+ None otherwise
+ """
+ return _read_stanza_unicode(unicode_iter)
+
+
+def to_patch_lines(stanza, max_width=72):
+ """Convert a stanza into RIO-Patch format lines.
+
+ RIO-Patch is a RIO variant designed to be e-mailed as part of a patch.
+ It resists common forms of damage such as newline conversion or the removal
+ of trailing whitespace, yet is also reasonably easy to read.
+
+ :param max_width: The maximum number of characters per physical line.
+ :return: a list of lines
+ """
+ if max_width <= 6:
+ raise ValueError(max_width)
+ max_rio_width = max_width - 4
+ lines = []
+ for pline in stanza.to_lines():
+ for line in pline.split('\n')[:-1]:
+ line = re.sub('\\\\', '\\\\\\\\', line)
+ while len(line) > 0:
+ partline = line[:max_rio_width]
+ line = line[max_rio_width:]
+ if len(line) > 0 and line[0] != [' ']:
+ break_index = -1
+ break_index = partline.rfind(' ', -20)
+ if break_index < 3:
+ break_index = partline.rfind('-', -20)
+ break_index += 1
+ if break_index < 3:
+ break_index = partline.rfind('/', -20)
+ if break_index >= 3:
+ line = partline[break_index:] + line
+ partline = partline[:break_index]
+ if len(line) > 0:
+ line = ' ' + line
+ partline = re.sub('\r', '\\\\r', partline)
+ blank_line = False
+ if len(line) > 0:
+ partline += '\\'
+ elif re.search(' $', partline):
+ partline += '\\'
+ blank_line = True
+ lines.append('# ' + partline + '\n')
+ if blank_line:
+ lines.append('# \n')
+ return lines
+
+
+def _patch_stanza_iter(line_iter):
+ map = {'\\\\': '\\',
+ '\\r' : '\r',
+ '\\\n': ''}
+ def mapget(match):
+ return map[match.group(0)]
+
+ last_line = None
+ for line in line_iter:
+ if line.startswith('# '):
+ line = line[2:]
+ elif line.startswith('#'):
+ line = line[1:]
+ else:
+ raise ValueError("bad line %r" % (line,))
+ if last_line is not None and len(line) > 2:
+ line = line[2:]
+ line = re.sub('\r', '', line)
+ line = re.sub('\\\\(.|\n)', mapget, line)
+ if last_line is None:
+ last_line = line
+ else:
+ last_line += line
+ if last_line[-1] == '\n':
+ yield last_line
+ last_line = None
+ if last_line is not None:
+ yield last_line
+
+
+def read_patch_stanza(line_iter):
+ """Convert an iterable of RIO-Patch format lines into a Stanza.
+
+ RIO-Patch is a RIO variant designed to be e-mailed as part of a patch.
+ It resists common forms of damage such as newline conversion or the removal
+ of trailing whitespace, yet is also reasonably easy to read.
+
+ :return: a Stanza
+ """
+ return read_stanza(_patch_stanza_iter(line_iter))
+
+
+try:
+ from bzrlib._rio_pyx import (
+ _read_stanza_utf8,
+ _read_stanza_unicode,
+ _valid_tag,
+ )
+except ImportError, e:
+ osutils.failed_to_load_extension(e)
+ from bzrlib._rio_py import (
+ _read_stanza_utf8,
+ _read_stanza_unicode,
+ _valid_tag,
+ )