summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--AUTHORS1
-rw-r--r--CHANGES1
-rw-r--r--pygments/lexers/_mapping.py2
-rw-r--r--pygments/lexers/jvm.py121
-rw-r--r--pygments/lexers/kotlin.py138
5 files changed, 123 insertions, 140 deletions
diff --git a/AUTHORS b/AUTHORS
index df51a66a..83bbee57 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -96,6 +96,7 @@ Other contributors, listed alphabetically, are:
* Kirill Simonov -- YAML lexer
* Steve Spigarelli -- XQuery lexer
* Jerome St-Louis -- eC lexer
+* James Strachan -- Kotlin lexer
* Tiberius Teng -- default style overhaul
* Jeremy Thurgood -- Erlang, Squid config lexers
* Erick Tryzelaar -- Felix lexer
diff --git a/CHANGES b/CHANGES
index 3755b3af..2cb8396d 100644
--- a/CHANGES
+++ b/CHANGES
@@ -43,6 +43,7 @@ Version 1.5
* Scilab (#740)
* Elixir (PR#57)
* Tea (PR#56)
+ * Kotlin (PR#58)
- Fix Python 3 terminal highlighting with pygmentize (#691).
diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py
index 91161556..7c745a89 100644
--- a/pygments/lexers/_mapping.py
+++ b/pygments/lexers/_mapping.py
@@ -131,7 +131,7 @@ LEXERS = {
'JavascriptPhpLexer': ('pygments.lexers.templates', 'JavaScript+PHP', ('js+php', 'javascript+php'), (), ('application/x-javascript+php', 'text/x-javascript+php', 'text/javascript+php')),
'JavascriptSmartyLexer': ('pygments.lexers.templates', 'JavaScript+Smarty', ('js+smarty', 'javascript+smarty'), (), ('application/x-javascript+smarty', 'text/x-javascript+smarty', 'text/javascript+smarty')),
'JspLexer': ('pygments.lexers.templates', 'Java Server Page', ('jsp',), ('*.jsp',), ('application/x-jsp',)),
- 'KotlinLexer': ('pygments.lexers.kotlin', 'Kotlin', ('kotlin',), ('*.kt',), ('application/x-kotlin',)),
+ 'KotlinLexer': ('pygments.lexers.jvm', 'Kotlin', ('kotlin',), ('*.kt',), ('text/x-kotlin',)),
'LighttpdConfLexer': ('pygments.lexers.text', 'Lighttpd configuration file', ('lighty', 'lighttpd'), (), ('text/x-lighttpd-conf',)),
'LiterateHaskellLexer': ('pygments.lexers.functional', 'Literate Haskell', ('lhs', 'literate-haskell'), ('*.lhs',), ('text/x-literate-haskell',)),
'LlvmLexer': ('pygments.lexers.asm', 'LLVM', ('llvm',), ('*.ll',), ('text/x-llvm',)),
diff --git a/pygments/lexers/jvm.py b/pygments/lexers/jvm.py
index 6e3e103f..b56d4582 100644
--- a/pygments/lexers/jvm.py
+++ b/pygments/lexers/jvm.py
@@ -15,10 +15,12 @@ from pygments.lexer import Lexer, RegexLexer, include, bygroups, using, \
this
from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
Number, Punctuation
+from pygments.util import get_choice_opt
+from pygments import unistring as uni
__all__ = ['JavaLexer', 'ScalaLexer', 'GosuLexer', 'GosuTemplateLexer',
- 'GroovyLexer', 'IokeLexer', 'ClojureLexer']
+ 'GroovyLexer', 'IokeLexer', 'ClojureLexer', 'KotlinLexer']
class JavaLexer(RegexLexer):
@@ -726,3 +728,120 @@ class TeaLangLexer(RegexLexer):
(r'[a-zA-Z0-9_.]+\*?', Name.Namespace, '#pop')
],
}
+
+
+class KotlinLexer(RegexLexer):
+ """
+ For `Kotlin <http://confluence.jetbrains.net/display/Kotlin/>`_
+ source code.
+
+ Additional options accepted:
+
+ `unicodelevel`
+ Determines which Unicode characters this lexer allows for identifiers.
+ The possible values are:
+
+ * ``none`` -- only the ASCII letters and numbers are allowed. This
+ is the fastest selection.
+ * ``basic`` -- all Unicode characters from the specification except
+ category ``Lo`` are allowed.
+ * ``full`` -- all Unicode characters as specified in the C# specs
+ are allowed. Note that this means a considerable slowdown since the
+ ``Lo`` category has more than 40,000 characters in it!
+
+ The default value is ``basic``.
+
+ *New in Pygments 1.5.*
+ """
+
+ name = 'Kotlin'
+ aliases = ['kotlin']
+ filenames = ['*.kt']
+ mimetypes = ['text/x-kotlin'] # inferred
+
+ flags = re.MULTILINE | re.DOTALL | re.UNICODE
+
+ # for the range of allowed unicode characters in identifiers,
+ # see http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-334.pdf
+
+ def _escape(st):
+ return st.replace(u'\\', ur'\\').replace(u'-', ur'\-').\
+ replace(u'[', ur'\[').replace(u']', ur'\]')
+
+ levels = {
+ 'none': '@?[_a-zA-Z][a-zA-Z0-9_]*',
+ 'basic': ('@?[_' + uni.Lu + uni.Ll + uni.Lt + uni.Lm + uni.Nl + ']' +
+ '[' + uni.Lu + uni.Ll + uni.Lt + uni.Lm + uni.Nl +
+ uni.Nd + uni.Pc + uni.Cf + uni.Mn + uni.Mc + ']*'),
+ 'full': ('@?(?:_|[^' +
+ _escape(uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl')) + '])'
+ + '[^' + _escape(uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo',
+ 'Nl', 'Nd', 'Pc', 'Cf', 'Mn',
+ 'Mc')) + ']*'),
+ }
+
+ tokens = {}
+ token_variants = True
+
+ for levelname, cs_ident in levels.items():
+ tokens[levelname] = {
+ 'root': [
+ # method names
+ (r'^([ \t]*(?:' + cs_ident + r'(?:\[\])?\s+)+?)' # return type
+ r'(' + cs_ident + ')' # method name
+ r'(\s*)(\()', # signature start
+ bygroups(using(this), Name.Function, Text, Punctuation)),
+ (r'^\s*\[.*?\]', Name.Attribute),
+ (r'[^\S\n]+', Text),
+ (r'\\\n', Text), # line continuation
+ (r'//.*?\n', Comment.Single),
+ (r'/[*](.|\n)*?[*]/', Comment.Multiline),
+ (r'\n', Text),
+ (r'[~!%^&*()+=|\[\]:;,.<>/?-]', Punctuation),
+ (r'[{}]', Punctuation),
+ (r'@"(""|[^"])*"', String),
+ (r'"(\\\\|\\"|[^"\n])*["\n]', String),
+ (r"'\\.'|'[^\\]'", String.Char),
+ (r"[0-9](\.[0-9]*)?([eE][+-][0-9]+)?"
+ r"[flFLdD]?|0[xX][0-9a-fA-F]+[Ll]?", Number),
+ (r'#[ \t]*(if|endif|else|elif|define|undef|'
+ r'line|error|warning|region|endregion|pragma)\b.*?\n',
+ Comment.Preproc),
+ (r'\b(extern)(\s+)(alias)\b', bygroups(Keyword, Text,
+ Keyword)),
+ (r'(abstract|as|break|catch|'
+ r'fun|continue|default|delegate|'
+ r'do|else|enum|extern|false|finally|'
+ r'fixed|for|goto|if|implicit|in|interface|'
+ r'internal|is|lock|null|'
+ r'out|override|private|protected|public|readonly|'
+ r'ref|return|sealed|sizeof|'
+ r'when|this|throw|true|try|typeof|'
+ r'unchecked|unsafe|virtual|void|while|'
+ r'get|set|new|partial|yield|val|var)\b', Keyword),
+ (r'(global)(::)', bygroups(Keyword, Punctuation)),
+ (r'(bool|byte|char|decimal|double|dynamic|float|int|long|'
+ r'short)\b\??', Keyword.Type),
+ (r'(class|struct)(\s+)', bygroups(Keyword, Text), 'class'),
+ (r'(package|using)(\s+)', bygroups(Keyword, Text), 'package'),
+ (cs_ident, Name),
+ ],
+ 'class': [
+ (cs_ident, Name.Class, '#pop')
+ ],
+ 'package': [
+ (r'(?=\()', Text, '#pop'), # using (resource)
+ ('(' + cs_ident + r'|\.)+', Name.Namespace, '#pop')
+ ]
+ }
+
+ def __init__(self, **options):
+ level = get_choice_opt(options, 'unicodelevel', self.tokens.keys(),
+ 'basic')
+ if level not in self._all_tokens:
+ # compile the regexes now
+ self._tokens = self.__class__.process_tokendef(level)
+ else:
+ self._tokens = self._all_tokens[level]
+
+ RegexLexer.__init__(self, **options)
diff --git a/pygments/lexers/kotlin.py b/pygments/lexers/kotlin.py
deleted file mode 100644
index d8a31138..00000000
--- a/pygments/lexers/kotlin.py
+++ /dev/null
@@ -1,138 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
- pygments.lexers.kotlin
- ~~~~~~~~~~~~~~~~~~~~~~
-
- Lexer for the Kotlin programming language.
- http://confluence.jetbrains.net/display/Kotlin/
-
- :copyright: Copyright 2006-2012 by the Pygments team, see AUTHORS.
- :license: BSD, see LICENSE for details.
-"""
-import re
-
-from pygments.lexer import RegexLexer, DelegatingLexer, bygroups, include, \
- using, this
-from pygments.token import Punctuation, \
- Text, Comment, Operator, Keyword, Name, String, Number, Literal, Other
-from pygments.util import get_choice_opt
-from pygments import unistring as uni
-
-from pygments.lexers.web import XmlLexer
-
-__all__ = ['KotlinLexer']
-
-
-def _escape(st):
- return st.replace(u'\\', ur'\\').replace(u'-', ur'\-').\
- replace(u'[', ur'\[').replace(u']', ur'\]')
-
-class KotlinLexer(RegexLexer):
- """
- For `Kotlin <http://confluence.jetbrains.net/display/Kotlin/>`_
- source code.
-
- Additional options accepted:
-
- `unicodelevel`
- Determines which Unicode characters this lexer allows for identifiers.
- The possible values are:
-
- * ``none`` -- only the ASCII letters and numbers are allowed. This
- is the fastest selection.
- * ``basic`` -- all Unicode characters from the specification except
- category ``Lo`` are allowed.
- * ``full`` -- all Unicode characters as specified in the C# specs
- are allowed. Note that this means a considerable slowdown since the
- ``Lo`` category has more than 40,000 characters in it!
-
- The default value is ``basic``.
-
- *New in Pygments 0.8.*
- """
-
- name = 'Kotlin'
- aliases = ['kotlin']
- filenames = ['*.kt']
- mimetypes = ['text/x-kotlin'] # inferred
-
- flags = re.MULTILINE | re.DOTALL | re.UNICODE
-
- # for the range of allowed unicode characters in identifiers,
- # see http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-334.pdf
-
- levels = {
- 'none': '@?[_a-zA-Z][a-zA-Z0-9_]*',
- 'basic': ('@?[_' + uni.Lu + uni.Ll + uni.Lt + uni.Lm + uni.Nl + ']' +
- '[' + uni.Lu + uni.Ll + uni.Lt + uni.Lm + uni.Nl +
- uni.Nd + uni.Pc + uni.Cf + uni.Mn + uni.Mc + ']*'),
- 'full': ('@?(?:_|[^' +
- _escape(uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl')) + '])'
- + '[^' + _escape(uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo',
- 'Nl', 'Nd', 'Pc', 'Cf', 'Mn',
- 'Mc')) + ']*'),
- }
-
- tokens = {}
- token_variants = True
-
- for levelname, cs_ident in levels.items():
- tokens[levelname] = {
- 'root': [
- # method names
- (r'^([ \t]*(?:' + cs_ident + r'(?:\[\])?\s+)+?)' # return type
- r'(' + cs_ident + ')' # method name
- r'(\s*)(\()', # signature start
- bygroups(using(this), Name.Function, Text, Punctuation)),
- (r'^\s*\[.*?\]', Name.Attribute),
- (r'[^\S\n]+', Text),
- (r'\\\n', Text), # line continuation
- (r'//.*?\n', Comment.Single),
- (r'/[*](.|\n)*?[*]/', Comment.Multiline),
- (r'\n', Text),
- (r'[~!%^&*()+=|\[\]:;,.<>/?-]', Punctuation),
- (r'[{}]', Punctuation),
- (r'@"(""|[^"])*"', String),
- (r'"(\\\\|\\"|[^"\n])*["\n]', String),
- (r"'\\.'|'[^\\]'", String.Char),
- (r"[0-9](\.[0-9]*)?([eE][+-][0-9]+)?"
- r"[flFLdD]?|0[xX][0-9a-fA-F]+[Ll]?", Number),
- (r'#[ \t]*(if|endif|else|elif|define|undef|'
- r'line|error|warning|region|endregion|pragma)\b.*?\n',
- Comment.Preproc),
- (r'\b(extern)(\s+)(alias)\b', bygroups(Keyword, Text,
- Keyword)),
- (r'(abstract|as|break|catch|'
- r'fun|continue|default|delegate|'
- r'do|else|enum|extern|false|finally|'
- r'fixed|for|goto|if|implicit|in|interface|'
- r'internal|is|lock|null|'
- r'out|override|private|protected|public|readonly|'
- r'ref|return|sealed|sizeof|'
- r'when|this|throw|true|try|typeof|'
- r'unchecked|unsafe|virtual|void|while|'
- r'get|set|new|partial|yield|val|var)\b', Keyword),
- (r'(global)(::)', bygroups(Keyword, Punctuation)),
- (r'(bool|byte|char|decimal|double|dynamic|float|int|long|short)\b\??', Keyword.Type),
- (r'(class|struct)(\s+)', bygroups(Keyword, Text), 'class'),
- (r'(package|using)(\s+)', bygroups(Keyword, Text), 'package'),
- (cs_ident, Name),
- ],
- 'class': [
- (cs_ident, Name.Class, '#pop')
- ],
- 'package': [
- (r'(?=\()', Text, '#pop'), # using (resource)
- ('(' + cs_ident + r'|\.)+', Name.Namespace, '#pop')
- ]
- }
-
- def __init__(self, **options):
- level = get_choice_opt(options, 'unicodelevel', self.tokens.keys(), 'basic')
- if level not in self._all_tokens:
- # compile the regexes now
- self._tokens = self.__class__.process_tokendef(level)
- else:
- self._tokens = self._all_tokens[level]
-
- RegexLexer.__init__(self, **options)