summaryrefslogtreecommitdiff
path: root/cheetah/Filters.py
diff options
context:
space:
mode:
authorR. Tyler Ballance <tyler@monkeypox.org>2009-08-14 15:11:24 -0700
committerR. Tyler Ballance <tyler@monkeypox.org>2009-08-14 15:11:24 -0700
commit988f6da0fc7c211f654352ec1453f0ef168da7cf (patch)
tree50ab6734ef1cacb71ff2ba30da76a3cfce51365e /cheetah/Filters.py
parentf8c53e6fdaf28e1461456bf844c59a10e15bee68 (diff)
parent53144fffd7754476b8b866b7b52fa9faab1548e4 (diff)
downloadpython-cheetah-988f6da0fc7c211f654352ec1453f0ef168da7cf.tar.gz
Merge branch 'next' into performance
Left some rename conflicts partially unresolved, need to move src/c/ to cheetah/c/ in the next commit Conflicts: SetupConfig.py cheetah/_namemapper.c src/_namemapper.c src/c/_namemapper.c
Diffstat (limited to 'cheetah/Filters.py')
-rw-r--r--cheetah/Filters.py233
1 files changed, 233 insertions, 0 deletions
diff --git a/cheetah/Filters.py b/cheetah/Filters.py
new file mode 100644
index 0000000..dd65f28
--- /dev/null
+++ b/cheetah/Filters.py
@@ -0,0 +1,233 @@
+'''
+ Filters for the #filter directive as well as #transform
+
+ #filter results in output filters Cheetah's $placeholders .
+ #transform results in a filter on the entirety of the output
+'''
+import sys
+
+# Additional entities WebSafe knows how to transform. No need to include
+# '<', '>' or '&' since those will have been done already.
+webSafeEntities = {' ': '&nbsp;', '"': '&quot;'}
+
+class Filter(object):
+ """A baseclass for the Cheetah Filters."""
+
+ def __init__(self, template=None):
+ """Setup a reference to the template that is using the filter instance.
+ This reference isn't used by any of the standard filters, but is
+ available to Filter subclasses, should they need it.
+
+ Subclasses should call this method.
+ """
+ self.template = template
+
+ def filter(self, val, encoding=None, str=str, **kw):
+ '''
+ Pass Unicode strings through unmolested, unless an encoding is specified.
+ '''
+ if val is None:
+ return u''
+ if isinstance(val, unicode):
+ if encoding:
+ return val.encode(encoding)
+ else:
+ return val
+ else:
+ try:
+ return str(val)
+ except UnicodeEncodeError:
+ return unicode(val)
+ return u''
+
+RawOrEncodedUnicode = Filter
+
+class EncodeUnicode(Filter):
+ def filter(self, val,
+ encoding='utf8',
+ str=str,
+ **kw):
+ """Encode Unicode strings, by default in UTF-8.
+
+ >>> import Cheetah.Template
+ >>> t = Cheetah.Template.Template('''
+ ... $myvar
+ ... ${myvar, encoding='utf16'}
+ ... ''', searchList=[{'myvar': u'Asni\xe8res'}],
+ ... filter='EncodeUnicode')
+ >>> print t
+ """
+ if isinstance(val, unicode):
+ return val.encode(encoding)
+ if val is None:
+ return ''
+ return str(val)
+
+
+class Markdown(EncodeUnicode):
+ '''
+ Markdown will change regular strings to Markdown
+ (http://daringfireball.net/projects/markdown/)
+
+ Such that:
+ My Header
+ =========
+ Becaomes:
+ <h1>My Header</h1>
+
+ and so on.
+
+ Markdown is meant to be used with the #transform
+ tag, as it's usefulness with #filter is marginal at
+ best
+ '''
+ def filter(self, value, **kwargs):
+ # This is a bit of a hack to allow outright embedding of the markdown module
+ try:
+ import markdown
+ except ImportError:
+ print '>>> Exception raised importing the "markdown" module'
+ print '>>> Are you sure you have the ElementTree module installed?'
+ print ' http://effbot.org/downloads/#elementtree'
+ raise
+
+ encoded = super(Markdown, self).filter(value, **kwargs)
+ return markdown.markdown(encoded)
+
+class CodeHighlighter(EncodeUnicode):
+ '''
+ The CodeHighlighter filter depends on the "pygments" module which you can
+ download and install from: http://pygments.org
+
+ What the CodeHighlighter assumes the string that it's receiving is source
+ code and uses pygments.lexers.guess_lexer() to try to guess which parser
+ to use when highlighting it.
+
+ CodeHighlighter will return the HTML and CSS to render the code block, syntax
+ highlighted, in a browser
+
+ NOTE: I had an issue installing pygments on Linux/amd64/Python 2.6 dealing with
+ importing of pygments.lexers, I was able to correct the failure by adding:
+ raise ImportError
+ to line 39 of pygments/plugin.py (since importing pkg_resources was causing issues)
+ '''
+ def filter(self, source, **kwargs):
+ encoded = super(CodeHighlighter, self).filter(source, **kwargs)
+ try:
+ from pygments import highlight
+ from pygments import lexers
+ from pygments import formatters
+ except ImportError, ex:
+ print '<%s> - Failed to import pygments! (%s)' % (self.__class__.__name__, ex)
+ print '-- You may need to install it from: http://pygments.org'
+ return encoded
+
+ lexer = None
+ try:
+ lexer = lexers.guess_lexer(source)
+ except lexers.ClassNotFound:
+ lexer = lexers.PythonLexer()
+
+ formatter = formatters.HtmlFormatter(cssclass='code_highlighter')
+ encoded = highlight(encoded, lexer, formatter)
+ css = formatter.get_style_defs('.code_highlighter')
+ return '''<style type="text/css"><!--
+ %(css)s
+ --></style>%(source)s''' % {'css' : css, 'source' : encoded}
+
+
+
+class MaxLen(Filter):
+ def filter(self, val, **kw):
+ """Replace None with '' and cut off at maxlen."""
+
+ output = super(MaxLen, self).filter(val, **kw)
+ if kw.has_key('maxlen') and len(output) > kw['maxlen']:
+ return output[:kw['maxlen']]
+ return output
+
+class WebSafe(Filter):
+ """Escape HTML entities in $placeholders.
+ """
+ def filter(self, val, **kw):
+ s = super(WebSafe, self).filter(val, **kw)
+ # These substitutions are copied from cgi.escape().
+ s = s.replace("&", "&amp;") # Must be done first!
+ s = s.replace("<", "&lt;")
+ s = s.replace(">", "&gt;")
+ # Process the additional transformations if any.
+ if kw.has_key('also'):
+ also = kw['also']
+ entities = webSafeEntities # Global variable.
+ for k in also:
+ if k in entities:
+ v = entities[k]
+ else:
+ v = "&#%s;" % ord(k)
+ s = s.replace(k, v)
+ return s
+
+
+class Strip(Filter):
+ """Strip leading/trailing whitespace but preserve newlines.
+
+ This filter goes through the value line by line, removing leading and
+ trailing whitespace on each line. It does not strip newlines, so every
+ input line corresponds to one output line, with its trailing newline intact.
+
+ We do not use val.split('\n') because that would squeeze out consecutive
+ blank lines. Instead, we search for each newline individually. This
+ makes us unable to use the fast C .split method, but it makes the filter
+ much more widely useful.
+
+ This filter is intended to be usable both with the #filter directive and
+ with the proposed #sed directive (which has not been ratified yet.)
+ """
+ def filter(self, val, **kw):
+ s = super(Strip, self).filter(val, **kw)
+ result = []
+ start = 0 # The current line will be s[start:end].
+ while 1: # Loop through each line.
+ end = s.find('\n', start) # Find next newline.
+ if end == -1: # If no more newlines.
+ break
+ chunk = s[start:end].strip()
+ result.append(chunk)
+ result.append('\n')
+ start = end + 1
+ # Write the unfinished portion after the last newline, if any.
+ chunk = s[start:].strip()
+ result.append(chunk)
+ return "".join(result)
+
+class StripSqueeze(Filter):
+ """Canonicalizes every chunk of whitespace to a single space.
+
+ Strips leading/trailing whitespace. Removes all newlines, so multi-line
+ input is joined into one ling line with NO trailing newline.
+ """
+ def filter(self, val, **kw):
+ s = super(StripSqueeze, self).filter(val, **kw)
+ s = s.split()
+ return " ".join(s)
+
+##################################################
+## MAIN ROUTINE -- testing
+
+def test():
+ s1 = "abc <=> &"
+ s2 = " asdf \n\t 1 2 3\n"
+ print "WebSafe INPUT:", `s1`
+ print " WebSafe:", `WebSafe().filter(s1)`
+
+ print
+ print " Strip INPUT:", `s2`
+ print " Strip:", `Strip().filter(s2)`
+ print "StripSqueeze:", `StripSqueeze().filter(s2)`
+
+ print "Unicode:", `EncodeUnicode().filter(u'aoeu12345\u1234')`
+
+if __name__ == "__main__":
+ test()
+
+# vim: shiftwidth=4 tabstop=4 expandtab