1 files changed, 460 insertions, 0 deletions
diff --git a/src/contrib/markdown/blockprocessors.py b/src/contrib/markdown/blockprocessors.py
new file mode 100644
index 0000000..79f4db9
--- /dev/null
+++ b/src/contrib/markdown/blockprocessors.py
@@ -0,0 +1,460 @@
+"""
+CORE MARKDOWN BLOCKPARSER
+=============================================================================
+
+This parser handles basic parsing of Markdown blocks.  It doesn't concern itself
+with inline elements such as **bold** or *italics*, but rather just catches 
+blocks, lists, quotes, etc.
+
+The BlockParser is made up of a bunch of BlockProssors, each handling a 
+different type of block. Extensions may add/replace/remove BlockProcessors
+as they need to alter how markdown blocks are parsed.
+
+"""
+
+import re
+import markdown
+
+class BlockProcessor:
+    """ Base class for block processors. 
+    
+    Each subclass will provide the methods below to work with the source and
+    tree. Each processor will need to define it's own ``test`` and ``run``
+    methods. The ``test`` method should return True or False, to indicate
+    whether the current block should be processed by this processor. If the
+    test passes, the parser will call the processors ``run`` method.
+
+    """
+
+    def __init__(self, parser=None):
+        self.parser = parser
+
+    def lastChild(self, parent):
+        """ Return the last child of an etree element. """
+        if len(parent):
+            return parent[-1]
+        else:
+            return None
+
+    def detab(self, text):
+        """ Remove a tab from the front of each line of the given text. """
+        newtext = []
+        lines = text.split('\n')
+        for line in lines:
+            if line.startswith(' '*markdown.TAB_LENGTH):
+                newtext.append(line[markdown.TAB_LENGTH:])
+            elif not line.strip():
+                newtext.append('')
+            else:
+                break
+        return '\n'.join(newtext), '\n'.join(lines[len(newtext):])
+
+    def looseDetab(self, text, level=1):
+        """ Remove a tab from front of lines but allowing dedented lines. """
+        lines = text.split('\n')
+        for i in range(len(lines)):
+            if lines[i].startswith(' '*markdown.TAB_LENGTH*level):
+                lines[i] = lines[i][markdown.TAB_LENGTH*level:]
+        return '\n'.join(lines)
+
+    def test(self, parent, block):
+        """ Test for block type. Must be overridden by subclasses. 
+        
+        As the parser loops through processors, it will call the ``test`` method
+        on each to determine if the given block of text is of that type. This
+        method must return a boolean ``True`` or ``False``. The actual method of
+        testing is left to the needs of that particular block type. It could 
+        be as simple as ``block.startswith(some_string)`` or a complex regular
+        expression. As the block type may be different depending on the parent
+        of the block (i.e. inside a list), the parent etree element is also 
+        provided and may be used as part of the test.
+
+        Keywords:
+        
+        * ``parent``: A etree element which will be the parent of the block.
+        * ``block``: A block of text from the source which has been split at 
+            blank lines.
+        """
+        pass
+
+    def run(self, parent, blocks):
+        """ Run processor. Must be overridden by subclasses. 
+        
+        When the parser determines the appropriate type of a block, the parser
+        will call the corresponding processor's ``run`` method. This method
+        should parse the individual lines of the block and append them to
+        the etree. 
+
+        Note that both the ``parent`` and ``etree`` keywords are pointers
+        to instances of the objects which should be edited in place. Each
+        processor must make changes to the existing objects as there is no
+        mechanism to return new/different objects to replace them.
+
+        This means that this method should be adding SubElements or adding text
+        to the parent, and should remove (``pop``) or add (``insert``) items to
+        the list of blocks.
+
+        Keywords:
+
+        * ``parent``: A etree element which is the parent of the current block.
+        * ``blocks``: A list of all remaining blocks of the document.
+        """
+        pass
+
+
+class ListIndentProcessor(BlockProcessor):
+    """ Process children of list items. 
+    
+    Example:
+        * a list item
+            process this part
+
+            or this part
+
+    """
+
+    INDENT_RE = re.compile(r'^(([ ]{%s})+)'% markdown.TAB_LENGTH)
+    ITEM_TYPES = ['li']
+    LIST_TYPES = ['ul', 'ol']
+
+    def test(self, parent, block):
+        return block.startswith(' '*markdown.TAB_LENGTH) and \
+                not self.parser.state.isstate('detabbed') and  \
+                (parent.tag in self.ITEM_TYPES or \
+                    (len(parent) and parent[-1] and \
+                        (parent[-1].tag in self.LIST_TYPES)
+                    )
+                )
+
+    def run(self, parent, blocks):
+        block = blocks.pop(0)
+        level, sibling = self.get_level(parent, block)
+        block = self.looseDetab(block, level)
+
+        self.parser.state.set('detabbed')
+        if parent.tag in self.ITEM_TYPES:
+            # The parent is already a li. Just parse the child block.
+            self.parser.parseBlocks(parent, [block])
+        elif sibling.tag in self.ITEM_TYPES:
+            # The sibling is a li. Use it as parent.
+            self.parser.parseBlocks(sibling, [block])
+        elif len(sibling) and sibling[-1].tag in self.ITEM_TYPES:
+            # The parent is a list (``ol`` or ``ul``) which has children.
+            # Assume the last child li is the parent of this block.
+            if sibling[-1].text:
+                # If the parent li has text, that text needs to be moved to a p
+                block = '%s\n\n%s' % (sibling[-1].text, block)
+                sibling[-1].text = ''
+            self.parser.parseChunk(sibling[-1], block)
+        else:
+            self.create_item(sibling, block)
+        self.parser.state.reset()
+
+    def create_item(self, parent, block):
+        """ Create a new li and parse the block with it as the parent. """
+        li = markdown.etree.SubElement(parent, 'li')
+        self.parser.parseBlocks(li, [block])
+ 
+    def get_level(self, parent, block):
+        """ Get level of indent based on list level. """
+        # Get indent level
+        m = self.INDENT_RE.match(block)
+        if m:
+            indent_level = len(m.group(1))/markdown.TAB_LENGTH
+        else:
+            indent_level = 0
+        if self.parser.state.isstate('list'):
+            # We're in a tightlist - so we already are at correct parent.
+            level = 1
+        else:
+            # We're in a looselist - so we need to find parent.
+            level = 0
+        # Step through children of tree to find matching indent level.
+        while indent_level > level:
+            child = self.lastChild(parent)
+            if child and (child.tag in self.LIST_TYPES or child.tag in self.ITEM_TYPES):
+                if child.tag in self.LIST_TYPES:
+                    level += 1
+                parent = child
+            else:
+                # No more child levels. If we're short of indent_level,
+                # we have a code block. So we stop here.
+                break
+        return level, parent
+
+
+class CodeBlockProcessor(BlockProcessor):
+    """ Process code blocks. """
+
+    def test(self, parent, block):
+        return block.startswith(' '*markdown.TAB_LENGTH)
+    
+    def run(self, parent, blocks):
+        sibling = self.lastChild(parent)
+        block = blocks.pop(0)
+        theRest = ''
+        if sibling and sibling.tag == "pre" and len(sibling) \
+                    and sibling[0].tag == "code":
+            # The previous block was a code block. As blank lines do not start
+            # new code blocks, append this block to the previous, adding back
+            # linebreaks removed from the split into a list.
+            code = sibling[0]
+            block, theRest = self.detab(block)
+            code.text = markdown.AtomicString('%s\n%s\n' % (code.text, block.rstrip()))
+        else:
+            # This is a new codeblock. Create the elements and insert text.
+            pre = markdown.etree.SubElement(parent, 'pre')
+            code = markdown.etree.SubElement(pre, 'code')
+            block, theRest = self.detab(block)
+            code.text = markdown.AtomicString('%s\n' % block.rstrip())
+        if theRest:
+            # This block contained unindented line(s) after the first indented 
+            # line. Insert these lines as the first block of the master blocks
+            # list for future processing.
+            blocks.insert(0, theRest)
+
+
+class BlockQuoteProcessor(BlockProcessor):
+
+    RE = re.compile(r'(^|\n)[ ]{0,3}>[ ]?(.*)')
+
+    def test(self, parent, block):
+        return bool(self.RE.search(block))
+
+    def run(self, parent, blocks):
+        block = blocks.pop(0)
+        m = self.RE.search(block)
+        if m:
+            before = block[:m.start()] # Lines before blockquote
+            # Pass lines before blockquote in recursively for parsing forst.
+            self.parser.parseBlocks(parent, [before])
+            # Remove ``> `` from begining of each line.
+            block = '\n'.join([self.clean(line) for line in 
+                            block[m.start():].split('\n')])
+        sibling = self.lastChild(parent)
+        if sibling and sibling.tag == "blockquote":
+            # Previous block was a blockquote so set that as this blocks parent
+            quote = sibling
+        else:
+            # This is a new blockquote. Create a new parent element.
+            quote = markdown.etree.SubElement(parent, 'blockquote')
+        # Recursively parse block with blockquote as parent.
+        self.parser.parseChunk(quote, block)
+
+    def clean(self, line):
+        """ Remove ``>`` from beginning of a line. """
+        m = self.RE.match(line)
+        if line.strip() == ">":
+            return ""
+        elif m:
+            return m.group(2)
+        else:
+            return line
+
+class OListProcessor(BlockProcessor):
+    """ Process ordered list blocks. """
+
+    TAG = 'ol'
+    # Detect an item (``1. item``). ``group(1)`` contains contents of item.
+    RE = re.compile(r'^[ ]{0,3}\d+\.[ ](.*)')
+    # Detect items on secondary lines. they can be of either list type.
+    CHILD_RE = re.compile(r'^[ ]{0,3}((\d+\.)|[*+-])[ ](.*)')
+    # Detect indented (nested) items of either type
+    INDENT_RE = re.compile(r'^[ ]{4,7}((\d+\.)|[*+-])[ ].*')
+
+    def test(self, parent, block):
+        return bool(self.RE.match(block))
+
+    def run(self, parent, blocks):
+        # Check fr multiple items in one block.
+        items = self.get_items(blocks.pop(0))
+        sibling = self.lastChild(parent)
+        if sibling and sibling.tag in ['ol', 'ul']:
+            # Previous block was a list item, so set that as parent
+            lst = sibling
+            # make sure previous item is in a p.
+            if len(lst) and lst[-1].text and not len(lst[-1]):
+                p = markdown.etree.SubElement(lst[-1], 'p')
+                p.text = lst[-1].text
+                lst[-1].text = ''
+            # parse first block differently as it gets wrapped in a p.
+            li = markdown.etree.SubElement(lst, 'li')
+            self.parser.state.set('looselist')
+            firstitem = items.pop(0)
+            self.parser.parseBlocks(li, [firstitem])
+            self.parser.state.reset()
+        else:
+            # This is a new list so create parent with appropriate tag.
+            lst = markdown.etree.SubElement(parent, self.TAG)
+        self.parser.state.set('list')
+        # Loop through items in block, recursively parsing each with the
+        # appropriate parent.
+        for item in items:
+            if item.startswith(' '*markdown.TAB_LENGTH):
+                # Item is indented. Parse with last item as parent
+                self.parser.parseBlocks(lst[-1], [item])
+            else:
+                # New item. Create li and parse with it as parent
+                li = markdown.etree.SubElement(lst, 'li')
+                self.parser.parseBlocks(li, [item])
+        self.parser.state.reset()
+
+    def get_items(self, block):
+        """ Break a block into list items. """
+        items = []
+        for line in block.split('\n'):
+            m = self.CHILD_RE.match(line)
+            if m:
+                # This is a new item. Append
+                items.append(m.group(3))
+            elif self.INDENT_RE.match(line):
+                # This is an indented (possibly nested) item.
+                if items[-1].startswith(' '*markdown.TAB_LENGTH):
+                    # Previous item was indented. Append to that item.
+                    items[-1] = '%s\n%s' % (items[-1], line)
+                else:
+                    items.append(line)
+            else:
+                # This is another line of previous item. Append to that item.
+                items[-1] = '%s\n%s' % (items[-1], line)
+        return items
+
+
+class UListProcessor(OListProcessor):
+    """ Process unordered list blocks. """
+
+    TAG = 'ul'
+    RE = re.compile(r'^[ ]{0,3}[*+-][ ](.*)')
+
+
+class HashHeaderProcessor(BlockProcessor):
+    """ Process Hash Headers. """
+
+    # Detect a header at start of any line in block
+    RE = re.compile(r'(^|\n)(?P<level>#{1,6})(?P<header>.*?)#*(\n|$)')
+
+    def test(self, parent, block):
+        return bool(self.RE.search(block))
+
+    def run(self, parent, blocks):
+        block = blocks.pop(0)
+        m = self.RE.search(block)
+        if m:
+            before = block[:m.start()] # All lines before header
+            after = block[m.end():]    # All lines after header
+            if before:
+                # As the header was not the first line of the block and the
+                # lines before the header must be parsed first,
+                # recursively parse this lines as a block.
+                self.parser.parseBlocks(parent, [before])
+            # Create header using named groups from RE
+            h = markdown.etree.SubElement(parent, 'h%d' % len(m.group('level')))
+            h.text = m.group('header').strip()
+            if after:
+                # Insert remaining lines as first block for future parsing.
+                blocks.insert(0, after)
+        else:
+            # This should never happen, but just in case...
+            message(CRITICAL, "We've got a problem header!")
+
+
+class SetextHeaderProcessor(BlockProcessor):
+    """ Process Setext-style Headers. """
+
+    # Detect Setext-style header. Must be first 2 lines of block.
+    RE = re.compile(r'^.*?\n[=-]{3,}', re.MULTILINE)
+
+    def test(self, parent, block):
+        return bool(self.RE.match(block))
+
+    def run(self, parent, blocks):
+        lines = blocks.pop(0).split('\n')
+        # Determine level. ``=`` is 1 and ``-`` is 2.
+        if lines[1].startswith('='):
+            level = 1
+        else:
+            level = 2
+        h = markdown.etree.SubElement(parent, 'h%d' % level)
+        h.text = lines[0].strip()
+        if len(lines) > 2:
+            # Block contains additional lines. Add to  master blocks for later.
+            blocks.insert(0, '\n'.join(lines[2:]))
+
+
+class HRProcessor(BlockProcessor):
+    """ Process Horizontal Rules. """
+
+    RE = r'[ ]{0,3}(?P<ch>[*_-])[ ]?((?P=ch)[ ]?){2,}[ ]*'
+    # Detect hr on any line of a block.
+    SEARCH_RE = re.compile(r'(^|\n)%s(\n|$)' % RE)
+    # Match a hr on a single line of text.
+    MATCH_RE = re.compile(r'^%s$' % RE)
+
+    def test(self, parent, block):
+        return bool(self.SEARCH_RE.search(block))
+
+    def run(self, parent, blocks):
+        lines = blocks.pop(0).split('\n')
+        prelines = []
+        # Check for lines in block before hr.
+        for line in lines:
+            m = self.MATCH_RE.match(line)
+            if m:
+                break
+            else:
+                prelines.append(line)
+        if len(prelines):
+            # Recursively parse lines before hr so they get parsed first.
+            self.parser.parseBlocks(parent, ['\n'.join(prelines)])
+        # create hr
+        hr = markdown.etree.SubElement(parent, 'hr')
+        # check for lines in block after hr.
+        lines = lines[len(prelines)+1:]
+        if len(lines):
+            # Add lines after hr to master blocks for later parsing.
+            blocks.insert(0, '\n'.join(lines))
+
+
+class EmptyBlockProcessor(BlockProcessor):
+    """ Process blocks and start with an empty line. """
+
+    # Detect a block that only contains whitespace 
+    # or only whitespace on the first line.
+    RE = re.compile(r'^\s*\n')
+
+    def test(self, parent, block):
+        return bool(self.RE.match(block))
+
+    def run(self, parent, blocks):
+        block = blocks.pop(0)
+        m = self.RE.match(block)
+        if m:
+            # Add remaining line to master blocks for later.
+            blocks.insert(0, block[m.end():])
+            sibling = self.lastChild(parent)
+            if sibling and sibling.tag == 'pre' and sibling[0] and \
+                    sibling[0].tag == 'code':
+                # Last block is a codeblock. Append to preserve whitespace.
+                sibling[0].text = markdown.AtomicString('%s/n/n/n' % sibling[0].text )
+
+
+class ParagraphProcessor(BlockProcessor):
+    """ Process Paragraph blocks. """
+
+    def test(self, parent, block):
+        return True
+
+    def run(self, parent, blocks):
+        block = blocks.pop(0)
+        if block.strip():
+            # Not a blank block. Add to parent, otherwise throw it away.
+            if self.parser.state.isstate('list'):
+                # The parent is a tight-list. Append to parent.text
+                if parent.text:
+                    parent.text = '%s\n%s' % (parent.text, block)
+                else:
+                    parent.text = block.lstrip()
+            else:
+                # Create a regular paragraph
+                p = markdown.etree.SubElement(parent, 'p')
+                p.text = block.lstrip()