1 files changed, 603 insertions, 0 deletions
diff --git a/src/contrib/markdown/__init__.py b/src/contrib/markdown/__init__.py
new file mode 100644
index 0000000..af5a2c1
--- /dev/null
+++ b/src/contrib/markdown/__init__.py
@@ -0,0 +1,603 @@
+"""
+Python Markdown
+===============
+
+Python Markdown converts Markdown to HTML and can be used as a library or
+called from the command line.
+
+## Basic usage as a module:
+
+    import markdown
+    md = Markdown()
+    html = md.convert(your_text_string)
+
+## Basic use from the command line:
+
+    python markdown.py source.txt > destination.html
+
+Run "python markdown.py --help" to see more options.
+
+## Extensions
+
+See <http://www.freewisdom.org/projects/python-markdown/> for more
+information and instructions on how to extend the functionality of
+Python Markdown.  Read that before you try modifying this file.
+
+## Authors and License
+
+Started by [Manfred Stienstra](http://www.dwerg.net/).  Continued and
+maintained  by [Yuri Takhteyev](http://www.freewisdom.org), [Waylan
+Limberg](http://achinghead.com/) and [Artem Yunusov](http://blog.splyer.com).
+
+Contact: markdown@freewisdom.org
+
+Copyright 2007, 2008 The Python Markdown Project (v. 1.7 and later)
+Copyright 200? Django Software Foundation (OrderedDict implementation)
+Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
+Copyright 2004 Manfred Stienstra (the original version)
+
+License: BSD (see docs/LICENSE for details).
+"""
+
+version = "2.0-rc2"
+version_info = (2,0,0, "rc2")
+
+import re
+import codecs
+import sys
+import warnings
+import logging
+from logging import DEBUG, INFO, WARN, ERROR, CRITICAL
+
+
+"""
+CONSTANTS
+=============================================================================
+"""
+
+"""
+Constants you might want to modify
+-----------------------------------------------------------------------------
+"""
+
+# default logging level for command-line use
+COMMAND_LINE_LOGGING_LEVEL = CRITICAL
+TAB_LENGTH = 4               # expand tabs to this many spaces
+ENABLE_ATTRIBUTES = True     # @id = xyz -> <... id="xyz">
+SMART_EMPHASIS = True        # this_or_that does not become this<i>or</i>that
+DEFAULT_OUTPUT_FORMAT = 'xhtml1'     # xhtml or html4 output
+HTML_REMOVED_TEXT = "[HTML_REMOVED]" # text used instead of HTML in safe mode
+BLOCK_LEVEL_ELEMENTS = re.compile("p|div|h[1-6]|blockquote|pre|table|dl|ol|ul"
+                                  "|script|noscript|form|fieldset|iframe|math"
+                                  "|ins|del|hr|hr/|style|li|dt|dd|thead|tbody"
+                                  "|tr|th|td")
+DOC_TAG = "div"     # Element used to wrap document - later removed
+
+# Placeholders
+STX = u'\u0002'  # Use STX ("Start of text") for start-of-placeholder
+ETX = u'\u0003'  # Use ETX ("End of text") for end-of-placeholder
+INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:"
+INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX
+AMP_SUBSTITUTE = STX+"amp"+ETX
+
+
+"""
+Constants you probably do not need to change
+-----------------------------------------------------------------------------
+"""
+
+RTL_BIDI_RANGES = ( (u'\u0590', u'\u07FF'),
+                     # Hebrew (0590-05FF), Arabic (0600-06FF),
+                     # Syriac (0700-074F), Arabic supplement (0750-077F),
+                     # Thaana (0780-07BF), Nko (07C0-07FF).
+                    (u'\u2D30', u'\u2D7F'), # Tifinagh
+                    )
+
+
+"""
+AUXILIARY GLOBAL FUNCTIONS
+=============================================================================
+"""
+
+
+def message(level, text):
+    """ A wrapper method for logging debug messages. """
+    logger =  logging.getLogger('MARKDOWN')
+    if logger.handlers:
+        # The logger is configured
+        logger.log(level, text)
+        if level > WARN:
+            sys.exit(0)
+    elif level > WARN:
+        raise MarkdownException, text
+    else:
+        warnings.warn(text, MarkdownWarning)
+
+
+def isBlockLevel(tag):
+    """Check if the tag is a block level HTML tag."""
+    return BLOCK_LEVEL_ELEMENTS.match(tag)
+
+"""
+MISC AUXILIARY CLASSES
+=============================================================================
+"""
+
+class AtomicString(unicode):
+    """A string which should not be further processed."""
+    pass
+
+
+class MarkdownException(Exception):
+    """ A Markdown Exception. """
+    pass
+
+
+class MarkdownWarning(Warning):
+    """ A Markdown Warning. """
+    pass
+
+
+"""
+OVERALL DESIGN
+=============================================================================
+
+Markdown processing takes place in four steps:
+
+1. A bunch of "preprocessors" munge the input text.
+2. BlockParser() parses the high-level structural elements of the
+   pre-processed text into an ElementTree.
+3. A bunch of "treeprocessors" are run against the ElementTree. One such
+   treeprocessor runs InlinePatterns against the ElementTree, detecting inline
+   markup.
+4. Some post-processors are run against the text after the ElementTree has
+   been serialized into text.
+5. The output is written to a string.
+
+Those steps are put together by the Markdown() class.
+
+"""
+
+import preprocessors
+import blockprocessors
+import treeprocessors
+import inlinepatterns
+import postprocessors
+import blockparser
+import etree_loader
+import odict
+
+# Extensions should use "markdown.etree" instead of "etree" (or do `from
+# markdown import etree`).  Do not import it by yourself.
+
+etree = etree_loader.importETree()
+
+# Adds the ability to output html4
+import html4
+
+
+class Markdown:
+    """Convert Markdown to HTML."""
+
+    def __init__(self,
+                 extensions=[],
+                 extension_configs={},
+                 safe_mode = False, 
+                 output_format=DEFAULT_OUTPUT_FORMAT):
+        """
+        Creates a new Markdown instance.
+
+        Keyword arguments:
+
+        * extensions: A list of extensions.
+           If they are of type string, the module mdx_name.py will be loaded.
+           If they are a subclass of markdown.Extension, they will be used
+           as-is.
+        * extension-configs: Configuration setting for extensions.
+        * safe_mode: Disallow raw html. One of "remove", "replace" or "escape".
+        * output_format: Format of output. Supported formats are:
+            * "xhtml1": Outputs XHTML 1.x. Default.
+            * "xhtml": Outputs latest supported version of XHTML (currently XHTML 1.1).
+            * "html4": Outputs HTML 4
+            * "html": Outputs latest supported version of HTML (currently HTML 4).
+            Note that it is suggested that the more specific formats ("xhtml1" 
+            and "html4") be used as "xhtml" or "html" may change in the future
+            if it makes sense at that time. 
+
+        """
+        
+        self.safeMode = safe_mode
+        self.registeredExtensions = []
+        self.docType = ""
+        self.stripTopLevelTags = True
+
+        # Preprocessors
+        self.preprocessors = odict.OrderedDict()
+        self.preprocessors["html_block"] = \
+                preprocessors.HtmlBlockPreprocessor(self)
+        self.preprocessors["reference"] = \
+                preprocessors.ReferencePreprocessor(self)
+        # footnote preprocessor will be inserted with "<reference"
+
+        # Block processors - ran by the parser
+        self.parser = blockparser.BlockParser()
+        self.parser.blockprocessors['empty'] = \
+                blockprocessors.EmptyBlockProcessor(self.parser)
+        self.parser.blockprocessors['indent'] = \
+                blockprocessors.ListIndentProcessor(self.parser)
+        self.parser.blockprocessors['code'] = \
+                blockprocessors.CodeBlockProcessor(self.parser)
+        self.parser.blockprocessors['hashheader'] = \
+                blockprocessors.HashHeaderProcessor(self.parser)
+        self.parser.blockprocessors['setextheader'] = \
+                blockprocessors.SetextHeaderProcessor(self.parser)
+        self.parser.blockprocessors['hr'] = \
+                blockprocessors.HRProcessor(self.parser)
+        self.parser.blockprocessors['olist'] = \
+                blockprocessors.OListProcessor(self.parser)
+        self.parser.blockprocessors['ulist'] = \
+                blockprocessors.UListProcessor(self.parser)
+        self.parser.blockprocessors['quote'] = \
+                blockprocessors.BlockQuoteProcessor(self.parser)
+        self.parser.blockprocessors['paragraph'] = \
+                blockprocessors.ParagraphProcessor(self.parser)
+
+
+        #self.prePatterns = []
+
+        # Inline patterns - Run on the tree
+        self.inlinePatterns = odict.OrderedDict()
+        self.inlinePatterns["backtick"] = \
+                inlinepatterns.BacktickPattern(inlinepatterns.BACKTICK_RE)
+        self.inlinePatterns["escape"] = \
+                inlinepatterns.SimpleTextPattern(inlinepatterns.ESCAPE_RE)
+        self.inlinePatterns["reference"] = \
+            inlinepatterns.ReferencePattern(inlinepatterns.REFERENCE_RE, self)
+        self.inlinePatterns["link"] = \
+                inlinepatterns.LinkPattern(inlinepatterns.LINK_RE, self)
+        self.inlinePatterns["image_link"] = \
+                inlinepatterns.ImagePattern(inlinepatterns.IMAGE_LINK_RE, self)
+        self.inlinePatterns["image_reference"] = \
+            inlinepatterns.ImageReferencePattern(inlinepatterns.IMAGE_REFERENCE_RE, self)
+        self.inlinePatterns["autolink"] = \
+            inlinepatterns.AutolinkPattern(inlinepatterns.AUTOLINK_RE, self)
+        self.inlinePatterns["automail"] = \
+            inlinepatterns.AutomailPattern(inlinepatterns.AUTOMAIL_RE, self)
+        self.inlinePatterns["linebreak2"] = \
+            inlinepatterns.SubstituteTagPattern(inlinepatterns.LINE_BREAK_2_RE, 'br')
+        self.inlinePatterns["linebreak"] = \
+            inlinepatterns.SubstituteTagPattern(inlinepatterns.LINE_BREAK_RE, 'br')
+        self.inlinePatterns["html"] = \
+                inlinepatterns.HtmlPattern(inlinepatterns.HTML_RE, self)
+        self.inlinePatterns["entity"] = \
+                inlinepatterns.HtmlPattern(inlinepatterns.ENTITY_RE, self)
+        self.inlinePatterns["not_strong"] = \
+                inlinepatterns.SimpleTextPattern(inlinepatterns.NOT_STRONG_RE)
+        self.inlinePatterns["strong_em"] = \
+            inlinepatterns.DoubleTagPattern(inlinepatterns.STRONG_EM_RE, 'strong,em')
+        self.inlinePatterns["strong"] = \
+            inlinepatterns.SimpleTagPattern(inlinepatterns.STRONG_RE, 'strong')
+        self.inlinePatterns["emphasis"] = \
+            inlinepatterns.SimpleTagPattern(inlinepatterns.EMPHASIS_RE, 'em')
+        self.inlinePatterns["emphasis2"] = \
+            inlinepatterns.SimpleTagPattern(inlinepatterns.EMPHASIS_2_RE, 'em')
+        # The order of the handlers matters!!!
+
+
+        # Tree processors - run once we have a basic parse.
+        self.treeprocessors = odict.OrderedDict()
+        self.treeprocessors["inline"] = treeprocessors.InlineProcessor(self)
+        self.treeprocessors["prettify"] = \
+                treeprocessors.PrettifyTreeprocessor(self)
+
+        # Postprocessors - finishing touches.
+        self.postprocessors = odict.OrderedDict()
+        self.postprocessors["raw_html"] = \
+                postprocessors.RawHtmlPostprocessor(self)
+        self.postprocessors["amp_substitute"] = \
+                postprocessors.AndSubstitutePostprocessor()
+        # footnote postprocessor will be inserted with ">amp_substitute"
+
+        # Map format keys to serializers
+        self.output_formats = {
+            'html'  : html4.to_html_string, 
+            'html4' : html4.to_html_string,
+            'xhtml' : etree.tostring, 
+            'xhtml1': etree.tostring,
+        }
+
+        self.references = {}
+        self.htmlStash = preprocessors.HtmlStash()
+        self.registerExtensions(extensions = extensions,
+                                configs = extension_configs)
+        self.set_output_format(output_format)
+        self.reset()
+
+    def registerExtensions(self, extensions, configs):
+        """
+        Register extensions with this instance of Markdown.
+
+        Keyword aurguments:
+
+        * extensions: A list of extensions, which can either
+           be strings or objects.  See the docstring on Markdown.
+        * configs: A dictionary mapping module names to config options.
+
+        """
+        for ext in extensions:
+            if isinstance(ext, basestring):
+                ext = load_extension(ext, configs.get(ext, []))
+            try:
+                ext.extendMarkdown(self, globals())
+            except AttributeError:
+                message(ERROR, "Incorrect type! Extension '%s' is "
+                               "neither a string or an Extension." %(repr(ext)))
+            
+
+    def registerExtension(self, extension):
+        """ This gets called by the extension """
+        self.registeredExtensions.append(extension)
+
+    def reset(self):
+        """
+        Resets all state variables so that we can start with a new text.
+        """
+        self.htmlStash.reset()
+        self.references.clear()
+
+        for extension in self.registeredExtensions:
+            extension.reset()
+
+    def set_output_format(self, format):
+        """ Set the output format for the class instance. """
+        try:
+            self.serializer = self.output_formats[format.lower()]
+        except KeyError:
+            message(CRITICAL, 'Invalid Output Format: "%s". Use one of %s.' \
+                               % (format, self.output_formats.keys()))
+
+    def convert(self, source):
+        """
+        Convert markdown to serialized XHTML or HTML.
+
+        Keyword arguments:
+
+        * source: Source text as a Unicode string.
+
+        """
+
+        # Fixup the source text
+        if not source.strip():
+            return u""  # a blank unicode string
+        try:
+            source = unicode(source)
+        except UnicodeDecodeError:
+            message(CRITICAL, 'UnicodeDecodeError: Markdown only accepts unicode or ascii input.')
+            return u""
+
+        source = source.replace(STX, "").replace(ETX, "")
+        source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"
+        source = re.sub(r'\n\s+\n', '\n\n', source)
+        source = source.expandtabs(TAB_LENGTH)
+
+        # Split into lines and run the line preprocessors.
+        self.lines = source.split("\n")
+        for prep in self.preprocessors.values():
+            self.lines = prep.run(self.lines)
+
+        # Parse the high-level elements.
+        root = self.parser.parseDocument(self.lines).getroot()
+
+        # Run the tree-processors
+        for treeprocessor in self.treeprocessors.values():
+            newRoot = treeprocessor.run(root)
+            if newRoot:
+                root = newRoot
+
+        # Serialize _properly_.  Strip top-level tags.
+        output, length = codecs.utf_8_decode(self.serializer(root, encoding="utf8"))
+        if self.stripTopLevelTags:
+            start = output.index('<%s>'%DOC_TAG)+len(DOC_TAG)+2
+            end = output.rindex('</%s>'%DOC_TAG)
+            output = output[start:end].strip()
+
+        # Run the text post-processors
+        for pp in self.postprocessors.values():
+            output = pp.run(output)
+
+        return output.strip()
+
+    def convertFile(self, input=None, output=None, encoding=None):
+        """Converts a markdown file and returns the HTML as a unicode string.
+
+        Decodes the file using the provided encoding (defaults to utf-8),
+        passes the file content to markdown, and outputs the html to either
+        the provided stream or the file with provided name, using the same
+        encoding as the source file.
+
+        **Note:** This is the only place that decoding and encoding of unicode
+        takes place in Python-Markdown.  (All other code is unicode-in /
+        unicode-out.)
+
+        Keyword arguments:
+
+        * input: Name of source text file.
+        * output: Name of output file. Writes to stdout if `None`.
+        * encoding: Encoding of input and output files. Defaults to utf-8.
+
+        """
+
+        encoding = encoding or "utf-8"
+
+        # Read the source
+        input_file = codecs.open(input, mode="r", encoding=encoding)
+        text = input_file.read()
+        input_file.close()
+        text = text.lstrip(u'\ufeff') # remove the byte-order mark
+
+        # Convert
+        html = self.convert(text)
+
+        # Write to file or stdout
+        if isinstance(output, (str, unicode)):
+            output_file = codecs.open(output, "w", encoding=encoding)
+            output_file.write(html)
+            output_file.close()
+        else:
+            output.write(html.encode(encoding))
+
+
+"""
+Extensions
+-----------------------------------------------------------------------------
+"""
+
+class Extension:
+    """ Base class for extensions to subclass. """
+    def __init__(self, configs = {}):
+        """Create an instance of an Extention.
+
+        Keyword arguments:
+
+        * configs: A dict of configuration setting used by an Extension.
+        """
+        self.config = configs
+
+    def getConfig(self, key):
+        """ Return a setting for the given key or an empty string. """
+        if key in self.config:
+            return self.config[key][0]
+        else:
+            return ""
+
+    def getConfigInfo(self):
+        """ Return all config settings as a list of tuples. """
+        return [(key, self.config[key][1]) for key in self.config.keys()]
+
+    def setConfig(self, key, value):
+        """ Set a config setting for `key` with the given `value`. """
+        self.config[key][0] = value
+
+    def extendMarkdown(self, md, md_globals):
+        """
+        Add the various proccesors and patterns to the Markdown Instance.
+
+        This method must be overriden by every extension.
+
+        Keyword arguments:
+
+        * md: The Markdown instance.
+
+        * md_globals: Global variables in the markdown module namespace.
+
+        """
+        pass
+
+
+def load_extension(ext_name, configs = []):
+    """Load extension by name, then return the module.
+
+    The extension name may contain arguments as part of the string in the
+    following format: "extname(key1=value1,key2=value2)"
+
+    """
+
+    # Parse extensions config params (ignore the order)
+    configs = dict(configs)
+    pos = ext_name.find("(") # find the first "("
+    if pos > 0:
+        ext_args = ext_name[pos+1:-1]
+        ext_name = ext_name[:pos]
+        pairs = [x.split("=") for x in ext_args.split(",")]
+        configs.update([(x.strip(), y.strip()) for (x, y) in pairs])
+
+    # Setup the module names
+    ext_module = 'markdown.extensions'
+    module_name_new_style = '.'.join([ext_module, ext_name])
+    module_name_old_style = '_'.join(['mdx', ext_name])
+
+    # Try loading the extention first from one place, then another
+    try: # New style (markdown.extensons.<extension>)
+        module = __import__(module_name_new_style, {}, {}, [ext_module])
+    except ImportError:
+        try: # Old style (mdx.<extension>)
+            module = __import__(module_name_old_style)
+        except ImportError:
+           message(WARN, "Failed loading extension '%s' from '%s' or '%s'"
+               % (ext_name, module_name_new_style, module_name_old_style))
+           # Return None so we don't try to initiate none-existant extension
+           return None
+
+    # If the module is loaded successfully, we expect it to define a
+    # function called makeExtension()
+    try:
+        return module.makeExtension(configs.items())
+    except AttributeError:
+        message(CRITICAL, "Failed to initiate extension '%s'" % ext_name)
+
+
+def load_extensions(ext_names):
+    """Loads multiple extensions"""
+    extensions = []
+    for ext_name in ext_names:
+        extension = load_extension(ext_name)
+        if extension:
+            extensions.append(extension)
+    return extensions
+
+
+"""
+EXPORTED FUNCTIONS
+=============================================================================
+
+Those are the two functions we really mean to export: markdown() and
+markdownFromFile().
+"""
+
+def markdown(text,
+             extensions = [],
+             safe_mode = False,
+             output_format = DEFAULT_OUTPUT_FORMAT):
+    """Convert a markdown string to HTML and return HTML as a unicode string.
+
+    This is a shortcut function for `Markdown` class to cover the most
+    basic use case.  It initializes an instance of Markdown, loads the
+    necessary extensions and runs the parser on the given text.
+
+    Keyword arguments:
+
+    * text: Markdown formatted text as Unicode or ASCII string.
+    * extensions: A list of extensions or extension names (may contain config args).
+    * safe_mode: Disallow raw html.  One of "remove", "replace" or "escape".
+    * output_format: Format of output. Supported formats are:
+        * "xhtml1": Outputs XHTML 1.x. Default.
+        * "xhtml": Outputs latest supported version of XHTML (currently XHTML 1.1).
+        * "html4": Outputs HTML 4
+        * "html": Outputs latest supported version of HTML (currently HTML 4).
+        Note that it is suggested that the more specific formats ("xhtml1" 
+        and "html4") be used as "xhtml" or "html" may change in the future
+        if it makes sense at that time. 
+
+    Returns: An HTML document as a string.
+
+    """
+    md = Markdown(extensions=load_extensions(extensions),
+                  safe_mode=safe_mode, 
+                  output_format=output_format)
+    return md.convert(text)
+
+
+def markdownFromFile(input = None,
+                     output = None,
+                     extensions = [],
+                     encoding = None,
+                     safe_mode = False,
+                     output_format = DEFAULT_OUTPUT_FORMAT):
+    """Read markdown code from a file and write it to a file or a stream."""
+    md = Markdown(extensions=load_extensions(extensions), 
+                  safe_mode=safe_mode,
+                  output_format=output_format)
+    md.convertFile(input, output, encoding)
+
+
+