# coding: utf-8 """ Provides a class for parsing template strings. This module is only meant for internal use by the renderengine module. """ import re from pystache.common import TemplateNotFoundError from pystache.parsed import ParsedTemplate DEFAULT_DELIMITERS = (u'{{', u'}}') END_OF_LINE_CHARACTERS = [u'\r', u'\n'] NON_BLANK_RE = re.compile(ur'^(.)', re.M) def _compile_template_re(delimiters=None): """ Return a regular expresssion object (re.RegexObject) instance. """ if delimiters is None: delimiters = DEFAULT_DELIMITERS # The possible tag type characters following the opening tag, # excluding "=" and "{". tag_types = "!>&/#^" # TODO: are we following this in the spec? # # The tag's content MUST be a non-whitespace character sequence # NOT containing the current closing delimiter. # tag = r""" (?P[\ \t]*) %(otag)s \s* (?: (?P=) \s* (?P.+?) \s* = | (?P{) \s* (?P.+?) \s* } | (?P[%(tag_types)s]?) \s* (?P[\s\S]+?) ) \s* %(ctag)s """ % {'tag_types': tag_types, 'otag': re.escape(delimiters[0]), 'ctag': re.escape(delimiters[1])} return re.compile(tag, re.VERBOSE) class ParsingError(Exception): pass class Parser(object): _delimiters = None _template_re = None def __init__(self, engine, delimiters=None): """ Construct an instance. Arguments: engine: a RenderEngine instance. """ if delimiters is None: delimiters = DEFAULT_DELIMITERS self._delimiters = delimiters self.engine = engine def compile_template_re(self): self._template_re = _compile_template_re(self._delimiters) def _change_delimiters(self, delimiters): self._delimiters = delimiters self.compile_template_re() def parse(self, template, start_index=0, section_key=None): """ Parse a template string starting at some index. This method uses the current tag delimiter. Arguments: template: a unicode string that is the template to parse. index: the index at which to start parsing. Returns: a ParsedTemplate instance. """ parse_tree = [] index = start_index while True: match = self._template_re.search(template, index) if match is None: break match_index = match.start() end_index = match.end() before_tag = template[index : match_index] parse_tree.append(before_tag) matches = match.groupdict() # Normalize the matches dictionary. if matches['change'] is not None: matches.update(tag='=', tag_key=matches['delims']) elif matches['raw'] is not None: matches.update(tag='&', tag_key=matches['raw_name']) tag_type = matches['tag'] tag_key = matches['tag_key'] leading_whitespace = matches['whitespace'] # Standalone (non-interpolation) tags consume the entire line, # both leading whitespace and trailing newline. did_tag_begin_line = match_index == 0 or template[match_index - 1] in END_OF_LINE_CHARACTERS did_tag_end_line = end_index == len(template) or template[end_index] in END_OF_LINE_CHARACTERS is_tag_interpolating = tag_type in ['', '&'] if did_tag_begin_line and did_tag_end_line and not is_tag_interpolating: if end_index < len(template): end_index += template[end_index] == '\r' and 1 or 0 if end_index < len(template): end_index += template[end_index] == '\n' and 1 or 0 elif leading_whitespace: parse_tree.append(leading_whitespace) match_index += len(leading_whitespace) leading_whitespace = '' if tag_type == '/': if tag_key != section_key: raise ParsingError("Section end tag mismatch: %s != %s" % (tag_key, section_key)) return ParsedTemplate(parse_tree), match_index, end_index index = self._handle_tag_type(template, parse_tree, tag_type, tag_key, leading_whitespace, end_index) # Save the rest of the template. parse_tree.append(template[index:]) return ParsedTemplate(parse_tree) def _parse_section(self, template, start_index, section_key): """ Parse the contents of a template section. Arguments: template: a unicode template string. start_index: the string index at which the section contents begin. section_key: the tag key of the section. Returns: a 3-tuple: parsed_section: the section contents parsed as a ParsedTemplate instance. content_end_index: the string index after the section contents. end_index: the string index after the closing section tag (and including any trailing newlines). """ parsed_section, content_end_index, end_index = \ self.parse(template=template, start_index=start_index, section_key=section_key) return parsed_section, template[start_index:content_end_index], end_index def _handle_tag_type(self, template, parse_tree, tag_type, tag_key, leading_whitespace, end_index): # TODO: switch to using a dictionary instead of a bunch of ifs and elifs. if tag_type == '!': return end_index if tag_type == '=': delimiters = tag_key.split() self._change_delimiters(delimiters) return end_index engine = self.engine if tag_type == '': func = engine._make_get_escaped(tag_key) elif tag_type == '&': func = engine._make_get_literal(tag_key) elif tag_type == '#': parsed_section, section_contents, end_index = self._parse_section(template, end_index, tag_key) func = engine._make_get_section(tag_key, parsed_section, section_contents, self._delimiters) elif tag_type == '^': parsed_section, section_contents, end_index = self._parse_section(template, end_index, tag_key) func = engine._make_get_inverse(tag_key, parsed_section) elif tag_type == '>': try: # TODO: make engine.load() and test it separately. template = engine.load_partial(tag_key) except TemplateNotFoundError: template = u'' # Indent before rendering. template = re.sub(NON_BLANK_RE, leading_whitespace + ur'\1', template) func = engine._make_get_partial(template) else: raise Exception("Unrecognized tag type: %s" % repr(tag_type)) parse_tree.append(func) return end_index