From de5c696f94e8dde242c29d4be50b7bbf3c17fedb Mon Sep 17 00:00:00 2001 From: Isaac Muse Date: Thu, 23 Nov 2017 07:56:38 -0700 Subject: Feature ancestry (#598) Ancestry exclusion for inline patterns. Adds the ability for an inline pattern to define a list of ancestor tag names that should be avoided. If a pattern would create a descendant of one of the listed tag names, the pattern will not match. Fixes #596. --- .spell-dict | 3 ++- docs/extensions/api.txt | 6 ++++- markdown/inlinepatterns.py | 2 ++ markdown/treeprocessors.py | 57 +++++++++++++++++++++++++++++++++++++--------- tests/test_apis.py | 52 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 107 insertions(+), 13 deletions(-) diff --git a/.spell-dict b/.spell-dict index 44d0a16..fb9ea28 100644 --- a/.spell-dict +++ b/.spell-dict @@ -103,6 +103,7 @@ traceback Tredinnick Treeprocessor Treeprocessors +tuple tuples unordered untrusted @@ -122,4 +123,4 @@ wiki JavaScript plugin plugins -configs \ No newline at end of file +configs diff --git a/docs/extensions/api.txt b/docs/extensions/api.txt index 9653883..246bb27 100644 --- a/docs/extensions/api.txt +++ b/docs/extensions/api.txt @@ -53,7 +53,7 @@ A pseudo example: Inline Patterns {: #inlinepatterns } ------------------------------------ -Inline Patterns implement the inline HTML element syntax for Markdown such as +Inline Patterns implement the inline HTML element syntax for Markdown such as `*emphasis*` or `[links](http://example.com)`. Pattern objects should be instances of classes that inherit from `markdown.inlinepatterns.Pattern` or one of its children. Each pattern object uses a single regular expression and @@ -68,6 +68,10 @@ must have the following methods: Accepts a match object and returns an ElementTree element of a plain Unicode string. +Also, Inline Patterns can define the property `ANCESTOR_EXCLUDES` with either +a list or tuple of undesirable ancestors. The pattern should not match if it +would cause the content to be a descendant of one of the defined tag names. + Note that any regular expression returned by `getCompiledRegExp` must capture the whole block. Therefore, they should all start with `r'^(.*?)'` and end with `r'(.*?)!'`. When using the default `getCompiledRegExp()` method diff --git a/markdown/inlinepatterns.py b/markdown/inlinepatterns.py index 3658ebd..2f00b3d 100644 --- a/markdown/inlinepatterns.py +++ b/markdown/inlinepatterns.py @@ -189,6 +189,8 @@ The pattern classes class Pattern(object): """Base class that inline patterns subclass. """ + ANCESTOR_EXCLUDES = tuple() + def __init__(self, pattern, markdown_instance=None): """ Create an instant of an inline pattern. diff --git a/markdown/treeprocessors.py b/markdown/treeprocessors.py index bb76572..f159a8a 100644 --- a/markdown/treeprocessors.py +++ b/markdown/treeprocessors.py @@ -54,6 +54,7 @@ class InlineProcessor(Treeprocessor): self.__placeholder_re = util.INLINE_PLACEHOLDER_RE self.markdown = md self.inlinePatterns = md.inlinePatterns + self.ancestors = [] def __makePlaceholder(self, type): """ Generate a placeholder """ @@ -138,7 +139,7 @@ class InlineProcessor(Treeprocessor): childResult.reverse() for newChild in childResult: - node.insert(pos, newChild) + node.insert(pos, newChild[0]) def __processPlaceholders(self, data, parent, isText=True): """ @@ -155,10 +156,10 @@ class InlineProcessor(Treeprocessor): def linkText(text): if text: if result: - if result[-1].tail: - result[-1].tail += text + if result[-1][0].tail: + result[-1][0].tail += text else: - result[-1].tail = text + result[-1][0].tail = text elif not isText: if parent.tail: parent.tail += text @@ -199,7 +200,7 @@ class InlineProcessor(Treeprocessor): continue strartIndex = phEndIndex - result.append(node) + result.append((node, self.ancestors[:])) else: # wrong placeholder end = index + len(self.__placeholder_prefix) @@ -230,6 +231,11 @@ class InlineProcessor(Treeprocessor): Returns: String with placeholders instead of ElementTree elements. """ + + for exclude in pattern.ANCESTOR_EXCLUDES: + if exclude.lower() in self.ancestors: + return data, False, 0 + match = pattern.getCompiledRegExp().match(data[startIndex:]) leftData = data[:startIndex] @@ -247,9 +253,11 @@ class InlineProcessor(Treeprocessor): for child in [node] + list(node): if not isString(node): if child.text: + self.ancestors.append(child.tag.lower()) child.text = self.__handleInline( child.text, patternIndex + 1 ) + self.ancestors.pop() if child.tail: child.tail = self.__handleInline( child.tail, patternIndex @@ -261,7 +269,17 @@ class InlineProcessor(Treeprocessor): match.group(1), placeholder, match.groups()[-1]), True, 0 - def run(self, tree): + def __build_ancestors(self, parent, parents): + """Build the ancestor list.""" + ancestors = [] + while parent: + if parent: + ancestors.append(parent.tag.lower()) + parent = self.parent_map.get(parent) + ancestors.reverse() + parents.extend(ancestors) + + def run(self, tree, ancestors=None): """Apply inline patterns to a parsed Markdown tree. Iterate over ElementTree, find elements with inline tag, apply inline @@ -274,28 +292,42 @@ class InlineProcessor(Treeprocessor): Arguments: * tree: ElementTree object, representing Markdown tree. + * ancestors: List of parent tag names that preceed the tree node (if needed). Returns: ElementTree object with applied inline patterns. """ self.stashed_nodes = {} - stack = [tree] + # Ensure a valid parent list, but copy passed in lists + # to ensure we don't have the user accidentally change it on us. + tree_parents = [] if ancestors is None else ancestors[:] + + self.parent_map = dict((c, p) for p in tree.getiterator() for c in p) + stack = [(tree, tree_parents)] while stack: - currElement = stack.pop() + currElement, parents = stack.pop() + + self.ancestors = parents + self.__build_ancestors(currElement, self.ancestors) + insertQueue = [] for child in currElement: if child.text and not isinstance( child.text, util.AtomicString ): + self.ancestors.append(child.tag.lower()) text = child.text child.text = None lst = self.__processPlaceholders( self.__handleInline(text), child ) + for l in lst: + self.parent_map[l[0]] = child stack += lst insertQueue.append((child, lst)) + self.ancestors.pop() if child.tail: tail = self.__handleInline(child.tail) dumby = util.etree.Element('d') @@ -306,9 +338,11 @@ class InlineProcessor(Treeprocessor): pos = list(currElement).index(child) + 1 tailResult.reverse() for newChild in tailResult: - currElement.insert(pos, newChild) + self.parent_map[newChild[0]] = currElement + currElement.insert(pos, newChild[0]) if len(child): - stack.append(child) + self.parent_map[child] = currElement + stack.append((child, self.ancestors[:])) for element, lst in insertQueue: if self.markdown.enable_attributes: @@ -317,7 +351,8 @@ class InlineProcessor(Treeprocessor): element.text, element ) i = 0 - for newChild in lst: + for obj in lst: + newChild = obj[0] if self.markdown.enable_attributes: # Processing attributes if newChild.tail and isString(newChild.tail): diff --git a/tests/test_apis.py b/tests/test_apis.py index 7b1214f..48e79e8 100644 --- a/tests/test_apis.py +++ b/tests/test_apis.py @@ -770,3 +770,55 @@ class TestEscapeAppend(unittest.TestCase): self.assertEqual('|' in md.ESCAPED_CHARS, True) md2 = markdown.Markdown() self.assertEqual('|' not in md2.ESCAPED_CHARS, True) + + +class TestAncestorExclusion(unittest.TestCase): + """ Tests exclusion of tags in ancestor list. """ + + class AncestorExample(markdown.inlinepatterns.SimpleTagPattern): + """ Ancestor Test. """ + + ANCESTOR_EXCLUDES = ('a',) + + def handleMatch(self, m): + """ Handle match. """ + el = markdown.util.etree.Element(self.tag) + el.text = m.group(3) + return el + + class AncestorExtension(markdown.Extension): + + def __init__(self, *args, **kwargs): + """Initialize.""" + + self.config = {} + + def extendMarkdown(self, md, md_globals): + """Modify inline patterns.""" + + pattern = r'(\+)([^\+]+)\2' + md.inlinePatterns["ancestor-test"] = TestAncestorExclusion.AncestorExample(pattern, 'strong') + + def setUp(self): + """Setup markdown object.""" + self.md = markdown.Markdown(extensions=[TestAncestorExclusion.AncestorExtension()]) + + def test_ancestors(self): + """ Test that an extension can exclude parent tags. """ + test = """ +Some +test+ and a [+link+](http://test.com) +""" + result = """

Some test and a +link+

""" + + self.md.reset() + self.assertEqual(self.md.convert(test), result) + + def test_ancestors_tail(self): + """ Test that an extension can exclude parent tags when dealing with a tail. """ + test = """ +[***+em+*+strong+**](http://test.com) +""" + result = """

+em++strong+

""" + + self.md.reset() + self.assertEqual(self.md.convert(test), result) -- cgit v1.2.1