some fixes to the kotlin lexer to work with the corda kolin codebase

author: nimmajbb <devnull@localhost> 2019-04-01 17:15:14 +0100
committer: nimmajbb <devnull@localhost> 2019-04-01 17:15:14 +0100
commit: 0de93e092cd1e7d14a5d2c360b680f2724d98185 (patch)
tree: 733900bf2891daa361ca2a87ff9977ac6a7c732a
parent: a5329233321fc0c557448963a747428af413ce8f (diff)
download: pygments-0de93e092cd1e7d14a5d2c360b680f2724d98185.tar.gz
2 files changed, 161 insertions, 3 deletions
diff --git a/pygments/lexers/jvm.py b/pygments/lexers/jvm.py
index 5a9a74a9..8de6e9f2 100644
--- a/pygments/lexers/jvm.py
+++ b/pygments/lexers/jvm.py
@@ -1006,7 +1006,7 @@ class KotlinLexer(RegexLexer):
 
     .. versionadded:: 1.5
     """
-
+    
     name = 'Kotlin'
     aliases = ['kotlin']
     filenames = ['*.kt']
@@ -1017,15 +1017,22 @@ class KotlinLexer(RegexLexer):
     kt_name = ('@?[_' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl') + ']' +
                '[' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl', 'Nd', 'Pc', 'Cf',
                                  'Mn', 'Mc') + ']*')
-    kt_id = '(' + kt_name + '|`' + kt_name + '`)'
+    
+    kt_space_name = ('@?[_' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl') + ']' +
+               '[' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl', 'Nd', 'Pc', 'Cf',
+                                 'Mn', 'Mc', 'Zs') + ',-]*')
+
+    kt_id = '(' + kt_name + '|`' + kt_space_name + '`)'
 
     tokens = {
         'root': [
             (r'^\s*\[.*?\]', Name.Attribute),
             (r'[^\S\n]+', Text),
+            (r'\s+', Text),
             (r'\\\n', Text),  # line continuation
             (r'//.*?\n', Comment.Single),
             (r'/[*].*?[*]/', Comment.Multiline),
+            (r'""".*?"""', String),
             (r'\n', Text),
             (r'::|!!|\?[:.]', Operator),
             (r'[~!%^&*()+=|\[\]:;,.<>/?-]', Punctuation),
@@ -1035,11 +1042,14 @@ class KotlinLexer(RegexLexer):
             (r"'\\.'|'[^\\]'", String.Char),
             (r"[0-9](\.[0-9]*)?([eE][+-][0-9]+)?[flFL]?|"
              r"0[xX][0-9a-fA-F]+[Ll]?", Number),
-            (r'(class)(\s+)(object)', bygroups(Keyword, Text, Keyword)),
+            (r'(object)(\s+)(:)(\s+)', bygroups(Keyword, Text, Punctuation, Text), 'class'),
+            (r'(companion)(\s+)(object)', bygroups(Keyword, Text, Keyword)),
             (r'(class|interface|object)(\s+)', bygroups(Keyword, Text), 'class'),
             (r'(package|import)(\s+)', bygroups(Keyword, Text), 'package'),
+            (r'(val|var)(\s+)([(])', bygroups(Keyword, Text, Punctuation), 'property_dec'),
             (r'(val|var)(\s+)', bygroups(Keyword, Text), 'property'),
             (r'(fun)(\s+)', bygroups(Keyword, Text), 'function'),
+            (r'(inline fun)(\s+)', bygroups(Keyword, Text), 'function'),
             (r'(abstract|annotation|as|break|by|catch|class|companion|const|'
              r'constructor|continue|crossinline|data|do|dynamic|else|enum|'
              r'external|false|final|finally|for|fun|get|if|import|in|infix|'
@@ -1058,9 +1068,26 @@ class KotlinLexer(RegexLexer):
         'property': [
             (kt_id, Name.Property, '#pop')
         ],
+        'property_dec': [
+            (r'(,)(\s*)', bygroups(Punctuation, Text)),
+            (r'(:)(\s*)', bygroups(Punctuation, Text)),
+            (r'<', Punctuation, 'generic'),
+            (r'([)])', Punctuation, '#pop'),
+            (kt_id, Name.Property)
+        ],
         'function': [
+            (r'<', Punctuation, 'generic'),
+            (r''+kt_id+'([.])'+kt_id, bygroups(Name.Class, Punctuation, Name.Function), '#pop'),
             (kt_id, Name.Function, '#pop')
         ],
+        'generic': [
+            (r'(>)(\s*)', bygroups(Punctuation, Text), '#pop'),
+            (r':',Punctuation),
+            (r'(reified|out|in)\b', Keyword),
+            (r',',Text),
+            (r'\s+',Text),
+            (kt_id,Name)
+        ]
     }
 
 
diff --git a/tests/test_kotlin.py b/tests/test_kotlin.py
new file mode 100644
index 00000000..7c733ad9
--- /dev/null
+++ b/tests/test_kotlin.py
@@ -0,0 +1,131 @@
+# -*- coding: utf-8 -*-
+"""
+    Basic JavaLexer Test
+    ~~~~~~~~~~~~~~~~~~~~
+
+    :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+
+import unittest
+
+from pygments.token import Text, Name, Operator, Keyword, Number, Punctuation, String
+from pygments.lexers import KotlinLexer
+
+class KotlinTest(unittest.TestCase):
+
+    def setUp(self):
+        self.lexer = KotlinLexer()
+        self.maxDiff = None
+    
+    def testCanCopeWithBackTickNamesInFunctions(self):
+        fragment = u'fun `wo bble`'
+        tokens = [
+            (Keyword, u'fun'),
+            (Text, u' '),
+            (Name.Function, u'`wo bble`'),
+            (Text, u'\n')
+        ]
+        self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))
+
+    def testCanCopeWithCommasAndDashesInBackTickNames(self):
+        fragment = u'fun `wo,-bble`'
+        tokens = [
+            (Keyword, u'fun'),
+            (Text, u' '),
+            (Name.Function, u'`wo,-bble`'),
+            (Text, u'\n')
+        ]
+        self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))
+    
+    def testCanCopeWithDestructuring(self):
+        fragment = u'val (a, b) = '
+        tokens = [
+            (Keyword, u'val'),
+            (Text, u' '),
+            (Punctuation, u'('),
+            (Name.Property, u'a'),
+            (Punctuation, u','),
+            (Text, u' '),
+            (Name.Property, u'b'),
+            (Punctuation, u')'),
+            (Text, u' '),
+            (Punctuation, u'='),
+            (Text, u' '),
+            (Text, u'\n')
+        ]
+        self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))
+    
+    def testCanCopeGenericsInDestructuring(self):
+        fragment = u'val (a: List<Something>, b: Set<Wobble>) ='
+        tokens = [
+            (Keyword, u'val'),
+            (Text, u' '),
+            (Punctuation, u'('),
+            (Name.Property, u'a'),
+            (Punctuation, u':'),
+            (Text, u' '),
+            (Name.Property, u'List'),
+            (Punctuation, u'<'),
+            (Name, u'Something'),
+            (Punctuation, u'>'),
+            (Punctuation, u','),
+            (Text, u' '),
+            (Name.Property, u'b'),
+            (Punctuation, u':'),
+            (Text, u' '),
+            (Name.Property, u'Set'),
+            (Punctuation, u'<'),
+            (Name, u'Wobble'),
+            (Punctuation, u'>'),
+            (Punctuation, u')'),
+            (Text, u' '),
+            (Punctuation, u'='),
+            (Text, u'\n')
+        ]
+        self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))
+
+    def testCanCopeWithGenerics(self):
+        fragment = u'inline fun <reified T : ContractState> VaultService.queryBy(): Vault.Page<T> {'
+        tokens = [
+            (Keyword, u'inline fun'),
+            (Text, u' '),
+            (Punctuation, u'<'),
+            (Keyword, u'reified'),
+            (Text, u' '),
+            (Name, u'T'),
+            (Text, u' '),
+            (Punctuation, u':'),
+            (Text, u' '),
+            (Name, u'ContractState'),
+            (Punctuation, u'>'),
+            (Text, u' '),
+            (Name.Class, u'VaultService'),
+            (Punctuation, u'.'),
+            (Name.Function, u'queryBy'),
+            (Punctuation, u'('),
+            (Punctuation, u')'),
+            (Punctuation, u':'),
+            (Text, u' '),
+            (Name, u'Vault'),
+            (Punctuation, u'.'),
+            (Name, u'Page'),
+            (Punctuation, u'<'),
+            (Name, u'T'),
+            (Punctuation, u'>'),
+            (Text, u' '),
+            (Punctuation, u'{'),
+            (Text, u'\n')
+        ]
+        self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))
+
+    def testShouldCopeWithMultilineComments(self):
+        fragment = u'"""\nthis\nis\na\ncomment"""'
+        tokens = [
+            (String, u'"""\nthis\nis\na\ncomment"""'), 
+            (Text, u'\n')
+        ]
+        self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))
+
+if __name__ == '__main__':
+    unittest.main()
author	nimmajbb <devnull@localhost>	2019-04-01 17:15:14 +0100
committer	nimmajbb <devnull@localhost>	2019-04-01 17:15:14 +0100
commit	0de93e092cd1e7d14a5d2c360b680f2724d98185 (patch)
tree	733900bf2891daa361ca2a87ff9977ac6a7c732a
parent	a5329233321fc0c557448963a747428af413ce8f (diff)
download	pygments-0de93e092cd1e7d14a5d2c360b680f2724d98185.tar.gz