1 files changed, 197 insertions, 0 deletions
diff --git a/pystache/parser.py b/pystache/parser.py
new file mode 100644
index 0000000..d07ebf6
--- /dev/null
+++ b/pystache/parser.py
@@ -0,0 +1,197 @@
+# coding: utf-8
+
+"""
+Provides a class for parsing template strings.
+
+This module is only meant for internal use by the renderengine module.
+
+"""
+
+import re
+
+from parsed import ParsedTemplate
+
+
+DEFAULT_DELIMITERS = ('{{', '}}')
+END_OF_LINE_CHARACTERS = ['\r', '\n']
+NON_BLANK_RE = re.compile(r'^(.)', re.M)
+
+
+def _compile_template_re(delimiters):
+
+    # The possible tag type characters following the opening tag,
+    # excluding "=" and "{".
+    tag_types = "!>&/#^"
+
+    # TODO: are we following this in the spec?
+    #
+    #   The tag's content MUST be a non-whitespace character sequence
+    #   NOT containing the current closing delimiter.
+    #
+    tag = r"""
+        (?P<whitespace>[\ \t]*)
+        %(otag)s \s*
+        (?:
+          (?P<change>=) \s* (?P<delims>.+?)   \s* = |
+          (?P<raw>{)    \s* (?P<raw_name>.+?) \s* } |
+          (?P<tag>[%(tag_types)s]?)  \s* (?P<tag_key>[\s\S]+?)
+        )
+        \s* %(ctag)s
+    """ % {'tag_types': tag_types, 'otag': re.escape(delimiters[0]), 'ctag': re.escape(delimiters[1])}
+
+    return re.compile(tag, re.VERBOSE)
+
+
+class ParsingError(Exception):
+
+    pass
+
+
+class Parser(object):
+
+    _delimiters = None
+    _template_re = None
+
+    def __init__(self, engine, delimiters=None):
+        """
+        Construct an instance.
+
+        Arguments:
+
+          engine: a RenderEngine instance.
+
+        """
+        if delimiters is None:
+            delimiters = DEFAULT_DELIMITERS
+
+        self._delimiters = delimiters
+        self.engine = engine
+
+    def compile_template_re(self):
+        self._template_re = _compile_template_re(self._delimiters)
+
+    def _change_delimiters(self, delimiters):
+        self._delimiters = delimiters
+        self.compile_template_re()
+
+    def parse(self, template, index=0, section_key=None):
+        """
+        Parse a template string into a ParsedTemplate instance.
+
+        This method uses the current tag delimiter.
+
+        Arguments:
+
+          template: a template string of type unicode.
+
+        """
+        parse_tree = []
+        start_index = index
+
+        while True:
+            match = self._template_re.search(template, index)
+
+            if match is None:
+                break
+
+            match_index = match.start()
+            end_index = match.end()
+
+            before_tag = template[index : match_index]
+
+            parse_tree.append(before_tag)
+
+            matches = match.groupdict()
+
+            # Normalize the matches dictionary.
+            if matches['change'] is not None:
+                matches.update(tag='=', tag_key=matches['delims'])
+            elif matches['raw'] is not None:
+                matches.update(tag='&', tag_key=matches['raw_name'])
+
+            tag_type = matches['tag']
+            tag_key = matches['tag_key']
+            leading_whitespace = matches['whitespace']
+
+            # Standalone (non-interpolation) tags consume the entire line,
+            # both leading whitespace and trailing newline.
+            did_tag_begin_line = match_index == 0 or template[match_index - 1] in END_OF_LINE_CHARACTERS
+            did_tag_end_line = end_index == len(template) or template[end_index] in END_OF_LINE_CHARACTERS
+            is_tag_interpolating = tag_type in ['', '&']
+
+            if did_tag_begin_line and did_tag_end_line and not is_tag_interpolating:
+                if end_index < len(template):
+                    end_index += template[end_index] == '\r' and 1 or 0
+                if end_index < len(template):
+                    end_index += template[end_index] == '\n' and 1 or 0
+            elif leading_whitespace:
+                parse_tree.append(leading_whitespace)
+                match_index += len(leading_whitespace)
+                leading_whitespace = ''
+
+            if tag_type == '/':
+                if tag_key != section_key:
+                    raise ParsingError("Section end tag mismatch: %s != %s" % (repr(tag_key), repr(section_key)))
+
+                return ParsedTemplate(parse_tree), template[start_index:match_index], end_index
+
+            index = self._handle_tag_type(template, parse_tree, tag_type, tag_key, leading_whitespace, end_index)
+
+        # Save the rest of the template.
+        parse_tree.append(template[index:])
+
+        return ParsedTemplate(parse_tree)
+
+    def _parse_section(self, template, index_start, section_key):
+        parsed_template, template, index_end = self.parse(template=template, index=index_start, section_key=section_key)
+
+        return parsed_template, template, index_end
+
+    def _handle_tag_type(self, template, parse_tree, tag_type, tag_key, leading_whitespace, end_index):
+
+        # TODO: switch to using a dictionary instead of a bunch of ifs and elifs.
+        if tag_type == '!':
+            return end_index
+
+        if tag_type == '=':
+            delimiters = tag_key.split()
+            self._change_delimiters(delimiters)
+            return end_index
+
+        engine = self.engine
+
+        if tag_type == '':
+
+            func = engine._make_get_escaped(tag_key)
+
+        elif tag_type == '&':
+
+            func = engine._make_get_literal(tag_key)
+
+        elif tag_type == '#':
+
+            parsed_section, template, end_index = self._parse_section(template, end_index, tag_key)
+            func = engine._make_get_section(tag_key, parsed_section, template, self._delimiters)
+
+        elif tag_type == '^':
+
+            parsed_section, template, end_index = self._parse_section(template, end_index, tag_key)
+            func = engine._make_get_inverse(tag_key, parsed_section)
+
+        elif tag_type == '>':
+
+            template = engine.load_partial(tag_key)
+
+            # Indent before rendering.
+            template = re.sub(NON_BLANK_RE, leading_whitespace + r'\1', template)
+
+            func = engine._make_get_partial(template)
+
+        else:
+
+            raise Exception("Unrecognized tag type: %s" % repr(tag_type))
+
+        parse_tree.append(func)
+
+        return end_index
+