Added lexer for new Hy language, a variant of Lisp running

on Python. See: http://hylang.org Note that this shares a file extension with the Hybris language but the analyse_text() function does a good job of making the distinction. This did however require one change to the tests to actually pass the code of the test file to get_lexer_for_filename() so that the tests would differentiate Hy from Hybris. And while this is a Lisp and shares some syntax with Clojure, it has been added to the agile.py file to share the lists of keywords and builtins with the PythonLexer.
author: Bob Tolbert <bob@tolbert.org> 2013-09-08 15:58:19 -0600
committer: Bob Tolbert <bob@tolbert.org> 2013-09-08 15:58:19 -0600
commit: 846195f0fb24e724a611d601494fa5056936b5bc (patch)
tree: 65df35ce1d501c24d46d46f19e424ad3ee25f428 /pygments/lexers/agile.py
parent: 1ea0fa53d253eae501f0a48611dd01493240b34d (diff)
download: pygments-846195f0fb24e724a611d601494fa5056936b5bc.tar.gz
1 files changed, 111 insertions, 1 deletions
diff --git a/pygments/lexers/agile.py b/pygments/lexers/agile.py
index 1f81365e..bc0cf4c0 100644
--- a/pygments/lexers/agile.py
+++ b/pygments/lexers/agile.py
@@ -23,7 +23,7 @@ __all__ = ['PythonLexer', 'PythonConsoleLexer', 'PythonTracebackLexer',
            'Python3Lexer', 'Python3TracebackLexer', 'RubyLexer',
            'RubyConsoleLexer', 'PerlLexer', 'LuaLexer', 'MoonScriptLexer',
            'CrocLexer', 'MiniDLexer', 'IoLexer', 'TclLexer', 'FactorLexer',
-           'FancyLexer', 'DgLexer', 'Perl6Lexer']
+           'FancyLexer', 'DgLexer', 'Perl6Lexer', 'HyLexer']
 
 # b/w compatibility
 from pygments.lexers.functional import SchemeLexer
@@ -2288,3 +2288,113 @@ class Perl6Lexer(ExtendedRegexLexer):
     def __init__(self, **options):
         super(Perl6Lexer, self).__init__(**options)
         self.encoding = options.get('encoding', 'utf-8')
+
+
+class HyLexer(RegexLexer):
+    """
+    Lexer for `Hy <http://hylang.org/>`_ source code.
+    """
+    name = 'Hy'
+    aliases = ['hy']
+    filenames = ['*.hy']
+    mimetypes = ['text/x-hy', 'application/x-hy']
+    
+    special_forms = [
+        'cond', 'for', '->', '->>', 'car',
+        'cdr', 'first', 'rest', 'let', 'when', 'unless',
+        'import', 'do', 'progn', 'get', 'slice', 'assoc', 'with-decorator',
+        ',', 'list_comp', 'kwapply', '~', 'is', 'in', 'is-not', 'not-in', 
+        'quasiquote', 'unquote', 'unquote-splice', 'quote', '|', '<<=', '>>=',
+        'foreach', 'while', 
+        'eval-and-compile', 'eval-when-compile'
+    ]
+    
+    declarations = [
+        'def' 'defn', 'defun', 'defmacro', 'defclass', 'lambda', 'fn', 'setv'
+    ]
+
+    hy_builtins = []
+
+    hy_core = [
+        'cycle', 'dec', 'distinct', 'drop', 'even?', 'filter', 'inc',
+        'instance?', 'iterable?', 'iterate', 'iterator?', 'neg?',
+        'none?', 'nth', 'numeric?', 'odd?', 'pos?', 'remove', 'repeat',
+        'repeatedly', 'take', 'take_nth', 'take_while', 'zero?'
+    ]
+
+    builtins = hy_builtins + hy_core
+
+    # valid names for identifiers
+    # well, names can only not consist fully of numbers
+    # but this should be good enough for now
+    valid_name = r'(?!#)[\w!$%*+<=>?/.#-]+'
+
+    def _multi_escape(entries):
+        return '(%s)' % ('|'.join(re.escape(entry) + ' ' for entry in entries))
+
+    tokens = {
+        'root': [
+            # the comments - always starting with semicolon
+            # and going to the end of the line
+            (r';.*$', Comment.Single),
+
+            # whitespaces - usually not relevant
+            (r'[,\s]+', Text),
+
+            # numbers
+            (r'-?\d+\.\d+', Number.Float),
+            (r'-?\d+', Number.Integer),
+            (r'0[0-7]+j?', Number.Oct),
+            (r'0[xX][a-fA-F0-9]+', Number.Hex),
+
+            # strings, symbols and characters
+            (r'"(\\\\|\\"|[^"])*"', String),
+            (r"'" + valid_name, String.Symbol),
+            (r"\\(.|[a-z]+)", String.Char),
+            (r'^(\s*)([rRuU]{,2}"""(?:.|\n)*?""")', bygroups(Text, String.Doc)),
+            (r"^(\s*)([rRuU]{,2}'''(?:.|\n)*?''')", bygroups(Text, String.Doc)),
+
+            # keywords
+            (r'::?' + valid_name, String.Symbol),
+
+            # special operators
+            (r'~@|[`\'#^~&@]', Operator),
+
+            include('py-keywords'),
+            include('py-builtins'),
+
+            # highlight the special forms
+            (_multi_escape(special_forms), Keyword),
+
+            # Technically, only the special forms are 'keywords'. The problem
+            # is that only treating them as keywords means that things like
+            # 'defn' and 'ns' need to be highlighted as builtins. This is ugly
+            # and weird for most styles. So, as a compromise we're going to
+            # highlight them as Keyword.Declarations.
+            (_multi_escape(declarations), Keyword.Declaration),
+
+            # highlight the builtins
+            (_multi_escape(builtins), Name.Builtin),
+
+            # the remaining functions
+            (r'(?<=\()' + valid_name, Name.Function),
+
+            # find the remaining variables
+            (valid_name, Name.Variable),
+
+            # Hy accepts vector notation
+            (r'(\[|\])', Punctuation),
+
+            # Hy accepts map notation
+            (r'(\{|\})', Punctuation),
+
+            # the famous parentheses!
+            (r'(\(|\))', Punctuation),
+
+        ],
+        'py-keywords': PythonLexer.tokens['keywords'],
+        'py-builtins': PythonLexer.tokens['builtins'],
+    }
+
+    def analyse_text(text):
+        return '(import' in text or '(defn' in text
author	Bob Tolbert <bob@tolbert.org>	2013-09-08 15:58:19 -0600
committer	Bob Tolbert <bob@tolbert.org>	2013-09-08 15:58:19 -0600
commit	846195f0fb24e724a611d601494fa5056936b5bc (patch)
tree	65df35ce1d501c24d46d46f19e424ad3ee25f428 /pygments/lexers/agile.py
parent	1ea0fa53d253eae501f0a48611dd01493240b34d (diff)
download	pygments-846195f0fb24e724a611d601494fa5056936b5bc.tar.gz