1 files changed, 580 insertions, 0 deletions
diff --git a/docutils/utils.py b/docutils/utils.py
new file mode 100644
index 000000000..24c4986f0
--- /dev/null
+++ b/docutils/utils.py
@@ -0,0 +1,580 @@
+# Author: David Goodger
+# Contact: goodger@users.sourceforge.net
+# Revision: $Revision$
+# Date: $Date$
+# Copyright: This module has been placed in the public domain.
+
+"""
+Miscellaneous utilities for the documentation utilities.
+"""
+
+__docformat__ = 'reStructuredText'
+
+import sys
+import os
+import os.path
+import types
+import warnings
+import unicodedata
+from types import StringType, UnicodeType
+from docutils import ApplicationError, DataError
+from docutils import frontend, nodes
+
+
+class SystemMessage(ApplicationError):
+
+    def __init__(self, system_message, level):
+        Exception.__init__(self, system_message.astext())
+        self.level = level
+
+
+class SystemMessagePropagation(ApplicationError): pass
+
+
+class Reporter:
+
+    """
+    Info/warning/error reporter and ``system_message`` element generator.
+
+    Five levels of system messages are defined, along with corresponding
+    methods: `debug()`, `info()`, `warning()`, `error()`, and `severe()`.
+
+    There is typically one Reporter object per process.  A Reporter object is
+    instantiated with thresholds for reporting (generating warnings) and
+    halting processing (raising exceptions), a switch to turn debug output on
+    or off, and an I/O stream for warnings.  These are stored as instance
+    attributes.
+
+    When a system message is generated, its level is compared to the stored
+    thresholds, and a warning or error is generated as appropriate.  Debug
+    messages are produced iff the stored debug switch is on, independently of
+    other thresholds.  Message output is sent to the stored warning stream if
+    not set to ''.
+
+    The Reporter class also employs a modified form of the "Observer" pattern
+    [GoF95]_ to track system messages generated.  The `attach_observer` method
+    should be called before parsing, with a bound method or function which
+    accepts system messages.  The observer can be removed with
+    `detach_observer`, and another added in its place.
+
+    .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of
+       Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,
+       1995.
+    """
+
+    levels = 'DEBUG INFO WARNING ERROR SEVERE'.split()
+    """List of names for system message levels, indexed by level."""
+
+    def __init__(self, source, report_level, halt_level, stream=None,
+                 debug=0, encoding='ascii', error_handler='replace'):
+        """
+        :Parameters:
+            - `source`: The path to or description of the source data.
+            - `report_level`: The level at or above which warning output will
+              be sent to `stream`.
+            - `halt_level`: The level at or above which `SystemMessage`
+              exceptions will be raised, halting execution.
+            - `debug`: Show debug (level=0) system messages?
+            - `stream`: Where warning output is sent.  Can be file-like (has a
+              ``.write`` method), a string (file name, opened for writing),
+              '' (empty string, for discarding all stream messages) or
+              `None` (implies `sys.stderr`; default).
+            - `encoding`: The encoding for stderr output.
+            - `error_handler`: The error handler for stderr output encoding.
+        """
+
+        self.source = source
+        """The path to or description of the source data."""
+
+        self.encoding = encoding
+        """The character encoding for the stderr output."""
+
+        self.error_handler = error_handler
+        """The character encoding error handler."""
+
+        self.debug_flag = debug
+        """Show debug (level=0) system messages?"""
+
+        self.report_level = report_level
+        """The level at or above which warning output will be sent
+        to `self.stream`."""
+
+        self.halt_level = halt_level
+        """The level at or above which `SystemMessage` exceptions
+        will be raised, halting execution."""
+
+        if stream is None:
+            stream = sys.stderr
+        elif type(stream) in (StringType, UnicodeType):
+            # Leave stream untouched if it's ''.
+            if stream != '':
+                if type(stream) == StringType:
+                    stream = open(stream, 'w')
+                elif type(stream) == UnicodeType:
+                    stream = open(stream.encode(), 'w')
+
+        self.stream = stream
+        """Where warning output is sent."""
+
+        self.observers = []
+        """List of bound methods or functions to call with each system_message
+        created."""
+
+        self.max_level = -1
+        """The highest level system message generated so far."""
+
+    def set_conditions(self, category, report_level, halt_level,
+                       stream=None, debug=0):
+        warnings.warn('docutils.utils.Reporter.set_conditions deprecated; '
+                      'set attributes via configuration settings or directly',
+                      DeprecationWarning, stacklevel=2)
+        self.report_level = report_level
+        self.halt_level = halt_level
+        if stream is None:
+            stream = sys.stderr
+        self.stream = stream
+        self.debug_flag = debug
+
+    def attach_observer(self, observer):
+        """
+        The `observer` parameter is a function or bound method which takes one
+        argument, a `nodes.system_message` instance.
+        """
+        self.observers.append(observer)
+
+    def detach_observer(self, observer):
+        self.observers.remove(observer)
+
+    def notify_observers(self, message):
+        for observer in self.observers:
+            observer(message)
+
+    def system_message(self, level, message, *children, **kwargs):
+        """
+        Return a system_message object.
+
+        Raise an exception or generate a warning if appropriate.
+        """
+        attributes = kwargs.copy()
+        if kwargs.has_key('base_node'):
+            source, line = get_source_line(kwargs['base_node'])
+            del attributes['base_node']
+            if source is not None:
+                attributes.setdefault('source', source)
+            if line is not None:
+                attributes.setdefault('line', line)
+        attributes.setdefault('source', self.source)
+        msg = nodes.system_message(message, level=level,
+                                   type=self.levels[level],
+                                   *children, **attributes)
+        if self.stream and (level >= self.report_level
+                            or self.debug_flag and level == 0):
+            msgtext = msg.astext().encode(self.encoding, self.error_handler)
+            print >>self.stream, msgtext
+        if level >= self.halt_level:
+            raise SystemMessage(msg, level)
+        if level > 0 or self.debug_flag:
+            self.notify_observers(msg)
+        self.max_level = max(level, self.max_level)
+        return msg
+
+    def debug(self, *args, **kwargs):
+        """
+        Level-0, "DEBUG": an internal reporting issue. Typically, there is no
+        effect on the processing. Level-0 system messages are handled
+        separately from the others.
+        """
+        if self.debug_flag:
+            return self.system_message(0, *args, **kwargs)
+
+    def info(self, *args, **kwargs):
+        """
+        Level-1, "INFO": a minor issue that can be ignored. Typically there is
+        no effect on processing, and level-1 system messages are not reported.
+        """
+        return self.system_message(1, *args, **kwargs)
+
+    def warning(self, *args, **kwargs):
+        """
+        Level-2, "WARNING": an issue that should be addressed. If ignored,
+        there may be unpredictable problems with the output.
+        """
+        return self.system_message(2, *args, **kwargs)
+
+    def error(self, *args, **kwargs):
+        """
+        Level-3, "ERROR": an error that should be addressed. If ignored, the
+        output will contain errors.
+        """
+        return self.system_message(3, *args, **kwargs)
+
+    def severe(self, *args, **kwargs):
+        """
+        Level-4, "SEVERE": a severe error that must be addressed. If ignored,
+        the output will contain severe errors. Typically level-4 system
+        messages are turned into exceptions which halt processing.
+        """
+        return self.system_message(4, *args, **kwargs)
+
+
+class ExtensionOptionError(DataError): pass
+class BadOptionError(ExtensionOptionError): pass
+class BadOptionDataError(ExtensionOptionError): pass
+class DuplicateOptionError(ExtensionOptionError): pass
+
+
+def extract_extension_options(field_list, options_spec):
+    """
+    Return a dictionary mapping extension option names to converted values.
+
+    :Parameters:
+        - `field_list`: A flat field list without field arguments, where each
+          field body consists of a single paragraph only.
+        - `options_spec`: Dictionary mapping known option names to a
+          conversion function such as `int` or `float`.
+
+    :Exceptions:
+        - `KeyError` for unknown option names.
+        - `ValueError` for invalid option values (raised by the conversion
+           function).
+        - `TypeError` for invalid option value types (raised by conversion
+           function).
+        - `DuplicateOptionError` for duplicate options.
+        - `BadOptionError` for invalid fields.
+        - `BadOptionDataError` for invalid option data (missing name,
+          missing data, bad quotes, etc.).
+    """
+    option_list = extract_options(field_list)
+    option_dict = assemble_option_dict(option_list, options_spec)
+    return option_dict
+
+def extract_options(field_list):
+    """
+    Return a list of option (name, value) pairs from field names & bodies.
+
+    :Parameter:
+        `field_list`: A flat field list, where each field name is a single
+        word and each field body consists of a single paragraph only.
+
+    :Exceptions:
+        - `BadOptionError` for invalid fields.
+        - `BadOptionDataError` for invalid option data (missing name,
+          missing data, bad quotes, etc.).
+    """
+    option_list = []
+    for field in field_list:
+        if len(field[0].astext().split()) != 1:
+            raise BadOptionError(
+                'extension option field name may not contain multiple words')
+        name = str(field[0].astext().lower())
+        body = field[1]
+        if len(body) == 0:
+            data = None
+        elif len(body) > 1 or not isinstance(body[0], nodes.paragraph) \
+              or len(body[0]) != 1 or not isinstance(body[0][0], nodes.Text):
+            raise BadOptionDataError(
+                  'extension option field body may contain\n'
+                  'a single paragraph only (option "%s")' % name)
+        else:
+            data = body[0][0].astext()
+        option_list.append((name, data))
+    return option_list
+
+def assemble_option_dict(option_list, options_spec):
+    """
+    Return a mapping of option names to values.
+
+    :Parameters:
+        - `option_list`: A list of (name, value) pairs (the output of
+          `extract_options()`).
+        - `options_spec`: Dictionary mapping known option names to a
+          conversion function such as `int` or `float`.
+
+    :Exceptions:
+        - `KeyError` for unknown option names.
+        - `DuplicateOptionError` for duplicate options.
+        - `ValueError` for invalid option values (raised by conversion
+           function).
+        - `TypeError` for invalid option value types (raised by conversion
+           function).
+    """
+    options = {}
+    for name, value in option_list:
+        convertor = options_spec[name]  # raises KeyError if unknown
+        if convertor is None:
+            raise KeyError(name)        # or if explicitly disabled
+        if options.has_key(name):
+            raise DuplicateOptionError('duplicate option "%s"' % name)
+        try:
+            options[name] = convertor(value)
+        except (ValueError, TypeError), detail:
+            raise detail.__class__('(option: "%s"; value: %r)\n%s'
+                                   % (name, value, ' '.join(detail.args)))
+    return options
+
+
+class NameValueError(DataError): pass
+
+
+def extract_name_value(line):
+    """
+    Return a list of (name, value) from a line of the form "name=value ...".
+
+    :Exception:
+        `NameValueError` for invalid input (missing name, missing data, bad
+        quotes, etc.).
+    """
+    attlist = []
+    while line:
+        equals = line.find('=')
+        if equals == -1:
+            raise NameValueError('missing "="')
+        attname = line[:equals].strip()
+        if equals == 0 or not attname:
+            raise NameValueError(
+                  'missing attribute name before "="')
+        line = line[equals+1:].lstrip()
+        if not line:
+            raise NameValueError(
+                  'missing value after "%s="' % attname)
+        if line[0] in '\'"':
+            endquote = line.find(line[0], 1)
+            if endquote == -1:
+                raise NameValueError(
+                      'attribute "%s" missing end quote (%s)'
+                      % (attname, line[0]))
+            if len(line) > endquote + 1 and line[endquote + 1].strip():
+                raise NameValueError(
+                      'attribute "%s" end quote (%s) not followed by '
+                      'whitespace' % (attname, line[0]))
+            data = line[1:endquote]
+            line = line[endquote+1:].lstrip()
+        else:
+            space = line.find(' ')
+            if space == -1:
+                data = line
+                line = ''
+            else:
+                data = line[:space]
+                line = line[space+1:].lstrip()
+        attlist.append((attname.lower(), data))
+    return attlist
+
+def new_reporter(source_path, settings):
+    """
+    Return a new Reporter object.
+
+    :Parameters:
+        `source` : string
+            The path to or description of the source text of the document.
+        `settings` : optparse.Values object
+            Runtime settings.
+    """
+    reporter = Reporter(
+        source_path, settings.report_level, settings.halt_level,
+        stream=settings.warning_stream, debug=settings.debug,
+        encoding=settings.error_encoding,
+        error_handler=settings.error_encoding_error_handler)
+    return reporter
+
+def new_document(source_path, settings=None):
+    """
+    Return a new empty document object.
+
+    :Parameters:
+        `source` : string
+            The path to or description of the source text of the document.
+        `settings` : optparse.Values object
+            Runtime settings.  If none provided, a default set will be used.
+    """
+    if settings is None:
+        settings = frontend.OptionParser().get_default_values()
+    reporter = new_reporter(source_path, settings)
+    document = nodes.document(settings, reporter, source=source_path)
+    document.note_source(source_path, -1)
+    return document
+
+def clean_rcs_keywords(paragraph, keyword_substitutions):
+    if len(paragraph) == 1 and isinstance(paragraph[0], nodes.Text):
+        textnode = paragraph[0]
+        for pattern, substitution in keyword_substitutions:
+            match = pattern.search(textnode.data)
+            if match:
+                textnode.data = pattern.sub(substitution, textnode.data)
+                return
+
+def relative_path(source, target):
+    """
+    Build and return a path to `target`, relative to `source` (both files).
+
+    If there is no common prefix, return the absolute path to `target`.
+    """
+    source_parts = os.path.abspath(source or 'dummy_file').split(os.sep)
+    target_parts = os.path.abspath(target).split(os.sep)
+    # Check first 2 parts because '/dir'.split('/') == ['', 'dir']:
+    if source_parts[:2] != target_parts[:2]:
+        # Nothing in common between paths.
+        # Return absolute path, using '/' for URLs:
+        return '/'.join(target_parts)
+    source_parts.reverse()
+    target_parts.reverse()
+    while (source_parts and target_parts
+           and source_parts[-1] == target_parts[-1]):
+        # Remove path components in common:
+        source_parts.pop()
+        target_parts.pop()
+    target_parts.reverse()
+    parts = ['..'] * (len(source_parts) - 1) + target_parts
+    return '/'.join(parts)
+
+def get_stylesheet_reference(settings, relative_to=None):
+    """
+    Retrieve a stylesheet reference from the settings object.
+    """
+    if settings.stylesheet_path:
+        assert not settings.stylesheet, \
+               'stylesheet and stylesheet_path are mutually exclusive.'
+        if relative_to == None:
+            relative_to = settings._destination
+        return relative_path(relative_to, settings.stylesheet_path)
+    else:
+        return settings.stylesheet
+
+def get_trim_footnote_ref_space(settings):
+    """
+    Return whether or not to trim footnote space.
+
+    If trim_footnote_reference_space is not None, return it.
+
+    If trim_footnote_reference_space is None, return False unless the
+    footnote reference style is 'superscript'.
+    """
+    if settings.trim_footnote_reference_space is None:
+        return hasattr(settings, 'footnote_references') and \
+               settings.footnote_references == 'superscript'
+    else:
+        return settings.trim_footnote_reference_space
+
+def get_source_line(node):
+    """
+    Return the "source" and "line" attributes from the `node` given or from
+    its closest ancestor.
+    """
+    while node:
+        if node.source or node.line:
+            return node.source, node.line
+        node = node.parent
+    return None, None
+
+def escape2null(text):
+    """Return a string with escape-backslashes converted to nulls."""
+    parts = []
+    start = 0
+    while 1:
+        found = text.find('\\', start)
+        if found == -1:
+            parts.append(text[start:])
+            return ''.join(parts)
+        parts.append(text[start:found])
+        parts.append('\x00' + text[found+1:found+2])
+        start = found + 2               # skip character after escape
+
+def unescape(text, restore_backslashes=0):
+    """
+    Return a string with nulls removed or restored to backslashes.
+    Backslash-escaped spaces are also removed.
+    """
+    if restore_backslashes:
+        return text.replace('\x00', '\\')
+    else:
+        for sep in ['\x00 ', '\x00\n', '\x00']:
+            text = ''.join(text.split(sep))
+        return text
+
+east_asian_widths = {'W': 2,   # Wide
+                     'F': 2,   # Full-width (wide)
+                     'Na': 1,  # Narrow
+                     'H': 1,   # Half-width (narrow)
+                     'N': 1,   # Neutral (not East Asian, treated as narrow)
+                     'A': 1}   # Ambiguous (s/b wide in East Asian context,
+                               # narrow otherwise, but that doesn't work)
+"""Mapping of result codes from `unicodedata.east_asian_width()` to character
+column widths."""
+
+def east_asian_column_width(text):
+    if isinstance(text, types.UnicodeType):
+        total = 0
+        for c in text:
+            total += east_asian_widths[unicodedata.east_asian_width(c)]
+        return total
+    else:
+        return len(text)
+
+if hasattr(unicodedata, 'east_asian_width'):
+    column_width = east_asian_column_width
+else:
+    column_width = len
+
+
+class DependencyList:
+
+    """
+    List of dependencies, with file recording support.
+
+    Note that the output file is not automatically closed.  You have
+    to explicitly call the close() method.
+    """
+
+    def __init__(self, output_file=None, dependencies=[]):
+        """
+        Initialize the dependency list, automatically setting the
+        output file to `output_file` (see `set_output()`) and adding
+        all supplied dependencies.
+        """
+        self.set_output(output_file)
+        for i in dependencies:
+            self.add(i)
+
+    def set_output(self, output_file):
+        """
+        Set the output file and clear the list of already added
+        dependencies.
+
+        `output_file` must be a string.  The specified file is
+        immediately overwritten.
+
+        If output_file is '-', the output will be written to stdout.
+        If it is None, no file output is done when calling add().
+        """
+        self.list = []
+        if output_file == '-':
+            self.file = sys.stdout
+        elif output_file:
+            self.file = open(output_file, 'w')
+        else:
+            self.file = None
+
+    def add(self, filename):
+        """
+        If the dependency `filename` has not already been added,
+        append it to self.list and print it to self.file if self.file
+        is not None.
+        """
+        if not filename in self.list:
+            self.list.append(filename)
+            if self.file is not None:
+                print >>self.file, filename
+
+    def close(self):
+        """
+        Close the output file.
+        """
+        self.file.close()
+        self.file = None
+
+    def __repr__(self):
+        if self.file:
+            output_file = self.file.name
+        else:
+            output_file = None
+        return '%s(%r, %s)' % (self.__class__.__name__, output_file, self.list)