diff options
author | R. Tyler Ballance <tyler@monkeypox.org> | 2009-08-14 15:11:24 -0700 |
---|---|---|
committer | R. Tyler Ballance <tyler@monkeypox.org> | 2009-08-14 15:11:24 -0700 |
commit | 988f6da0fc7c211f654352ec1453f0ef168da7cf (patch) | |
tree | 50ab6734ef1cacb71ff2ba30da76a3cfce51365e /cheetah/Filters.py | |
parent | f8c53e6fdaf28e1461456bf844c59a10e15bee68 (diff) | |
parent | 53144fffd7754476b8b866b7b52fa9faab1548e4 (diff) | |
download | python-cheetah-988f6da0fc7c211f654352ec1453f0ef168da7cf.tar.gz |
Merge branch 'next' into performance
Left some rename conflicts partially unresolved, need to move src/c/ to
cheetah/c/ in the next commit
Conflicts:
SetupConfig.py
cheetah/_namemapper.c
src/_namemapper.c
src/c/_namemapper.c
Diffstat (limited to 'cheetah/Filters.py')
-rw-r--r-- | cheetah/Filters.py | 233 |
1 files changed, 233 insertions, 0 deletions
diff --git a/cheetah/Filters.py b/cheetah/Filters.py new file mode 100644 index 0000000..dd65f28 --- /dev/null +++ b/cheetah/Filters.py @@ -0,0 +1,233 @@ +''' + Filters for the #filter directive as well as #transform + + #filter results in output filters Cheetah's $placeholders . + #transform results in a filter on the entirety of the output +''' +import sys + +# Additional entities WebSafe knows how to transform. No need to include +# '<', '>' or '&' since those will have been done already. +webSafeEntities = {' ': ' ', '"': '"'} + +class Filter(object): + """A baseclass for the Cheetah Filters.""" + + def __init__(self, template=None): + """Setup a reference to the template that is using the filter instance. + This reference isn't used by any of the standard filters, but is + available to Filter subclasses, should they need it. + + Subclasses should call this method. + """ + self.template = template + + def filter(self, val, encoding=None, str=str, **kw): + ''' + Pass Unicode strings through unmolested, unless an encoding is specified. + ''' + if val is None: + return u'' + if isinstance(val, unicode): + if encoding: + return val.encode(encoding) + else: + return val + else: + try: + return str(val) + except UnicodeEncodeError: + return unicode(val) + return u'' + +RawOrEncodedUnicode = Filter + +class EncodeUnicode(Filter): + def filter(self, val, + encoding='utf8', + str=str, + **kw): + """Encode Unicode strings, by default in UTF-8. + + >>> import Cheetah.Template + >>> t = Cheetah.Template.Template(''' + ... $myvar + ... ${myvar, encoding='utf16'} + ... ''', searchList=[{'myvar': u'Asni\xe8res'}], + ... filter='EncodeUnicode') + >>> print t + """ + if isinstance(val, unicode): + return val.encode(encoding) + if val is None: + return '' + return str(val) + + +class Markdown(EncodeUnicode): + ''' + Markdown will change regular strings to Markdown + (http://daringfireball.net/projects/markdown/) + + Such that: + My Header + ========= + Becaomes: + <h1>My Header</h1> + + and so on. + + Markdown is meant to be used with the #transform + tag, as it's usefulness with #filter is marginal at + best + ''' + def filter(self, value, **kwargs): + # This is a bit of a hack to allow outright embedding of the markdown module + try: + import markdown + except ImportError: + print '>>> Exception raised importing the "markdown" module' + print '>>> Are you sure you have the ElementTree module installed?' + print ' http://effbot.org/downloads/#elementtree' + raise + + encoded = super(Markdown, self).filter(value, **kwargs) + return markdown.markdown(encoded) + +class CodeHighlighter(EncodeUnicode): + ''' + The CodeHighlighter filter depends on the "pygments" module which you can + download and install from: http://pygments.org + + What the CodeHighlighter assumes the string that it's receiving is source + code and uses pygments.lexers.guess_lexer() to try to guess which parser + to use when highlighting it. + + CodeHighlighter will return the HTML and CSS to render the code block, syntax + highlighted, in a browser + + NOTE: I had an issue installing pygments on Linux/amd64/Python 2.6 dealing with + importing of pygments.lexers, I was able to correct the failure by adding: + raise ImportError + to line 39 of pygments/plugin.py (since importing pkg_resources was causing issues) + ''' + def filter(self, source, **kwargs): + encoded = super(CodeHighlighter, self).filter(source, **kwargs) + try: + from pygments import highlight + from pygments import lexers + from pygments import formatters + except ImportError, ex: + print '<%s> - Failed to import pygments! (%s)' % (self.__class__.__name__, ex) + print '-- You may need to install it from: http://pygments.org' + return encoded + + lexer = None + try: + lexer = lexers.guess_lexer(source) + except lexers.ClassNotFound: + lexer = lexers.PythonLexer() + + formatter = formatters.HtmlFormatter(cssclass='code_highlighter') + encoded = highlight(encoded, lexer, formatter) + css = formatter.get_style_defs('.code_highlighter') + return '''<style type="text/css"><!-- + %(css)s + --></style>%(source)s''' % {'css' : css, 'source' : encoded} + + + +class MaxLen(Filter): + def filter(self, val, **kw): + """Replace None with '' and cut off at maxlen.""" + + output = super(MaxLen, self).filter(val, **kw) + if kw.has_key('maxlen') and len(output) > kw['maxlen']: + return output[:kw['maxlen']] + return output + +class WebSafe(Filter): + """Escape HTML entities in $placeholders. + """ + def filter(self, val, **kw): + s = super(WebSafe, self).filter(val, **kw) + # These substitutions are copied from cgi.escape(). + s = s.replace("&", "&") # Must be done first! + s = s.replace("<", "<") + s = s.replace(">", ">") + # Process the additional transformations if any. + if kw.has_key('also'): + also = kw['also'] + entities = webSafeEntities # Global variable. + for k in also: + if k in entities: + v = entities[k] + else: + v = "&#%s;" % ord(k) + s = s.replace(k, v) + return s + + +class Strip(Filter): + """Strip leading/trailing whitespace but preserve newlines. + + This filter goes through the value line by line, removing leading and + trailing whitespace on each line. It does not strip newlines, so every + input line corresponds to one output line, with its trailing newline intact. + + We do not use val.split('\n') because that would squeeze out consecutive + blank lines. Instead, we search for each newline individually. This + makes us unable to use the fast C .split method, but it makes the filter + much more widely useful. + + This filter is intended to be usable both with the #filter directive and + with the proposed #sed directive (which has not been ratified yet.) + """ + def filter(self, val, **kw): + s = super(Strip, self).filter(val, **kw) + result = [] + start = 0 # The current line will be s[start:end]. + while 1: # Loop through each line. + end = s.find('\n', start) # Find next newline. + if end == -1: # If no more newlines. + break + chunk = s[start:end].strip() + result.append(chunk) + result.append('\n') + start = end + 1 + # Write the unfinished portion after the last newline, if any. + chunk = s[start:].strip() + result.append(chunk) + return "".join(result) + +class StripSqueeze(Filter): + """Canonicalizes every chunk of whitespace to a single space. + + Strips leading/trailing whitespace. Removes all newlines, so multi-line + input is joined into one ling line with NO trailing newline. + """ + def filter(self, val, **kw): + s = super(StripSqueeze, self).filter(val, **kw) + s = s.split() + return " ".join(s) + +################################################## +## MAIN ROUTINE -- testing + +def test(): + s1 = "abc <=> &" + s2 = " asdf \n\t 1 2 3\n" + print "WebSafe INPUT:", `s1` + print " WebSafe:", `WebSafe().filter(s1)` + + print + print " Strip INPUT:", `s2` + print " Strip:", `Strip().filter(s2)` + print "StripSqueeze:", `StripSqueeze().filter(s2)` + + print "Unicode:", `EncodeUnicode().filter(u'aoeu12345\u1234')` + +if __name__ == "__main__": + test() + +# vim: shiftwidth=4 tabstop=4 expandtab |