From 0de93e092cd1e7d14a5d2c360b680f2724d98185 Mon Sep 17 00:00:00 2001 From: nimmajbb Date: Mon, 1 Apr 2019 17:15:14 +0100 Subject: some fixes to the kotlin lexer to work with the corda kolin codebase --- pygments/lexers/jvm.py | 33 +++++++++++-- tests/test_kotlin.py | 131 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 161 insertions(+), 3 deletions(-) create mode 100644 tests/test_kotlin.py diff --git a/pygments/lexers/jvm.py b/pygments/lexers/jvm.py index 5a9a74a9..8de6e9f2 100644 --- a/pygments/lexers/jvm.py +++ b/pygments/lexers/jvm.py @@ -1006,7 +1006,7 @@ class KotlinLexer(RegexLexer): .. versionadded:: 1.5 """ - + name = 'Kotlin' aliases = ['kotlin'] filenames = ['*.kt'] @@ -1017,15 +1017,22 @@ class KotlinLexer(RegexLexer): kt_name = ('@?[_' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl') + ']' + '[' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl', 'Nd', 'Pc', 'Cf', 'Mn', 'Mc') + ']*') - kt_id = '(' + kt_name + '|`' + kt_name + '`)' + + kt_space_name = ('@?[_' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl') + ']' + + '[' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl', 'Nd', 'Pc', 'Cf', + 'Mn', 'Mc', 'Zs') + ',-]*') + + kt_id = '(' + kt_name + '|`' + kt_space_name + '`)' tokens = { 'root': [ (r'^\s*\[.*?\]', Name.Attribute), (r'[^\S\n]+', Text), + (r'\s+', Text), (r'\\\n', Text), # line continuation (r'//.*?\n', Comment.Single), (r'/[*].*?[*]/', Comment.Multiline), + (r'""".*?"""', String), (r'\n', Text), (r'::|!!|\?[:.]', Operator), (r'[~!%^&*()+=|\[\]:;,.<>/?-]', Punctuation), @@ -1035,11 +1042,14 @@ class KotlinLexer(RegexLexer): (r"'\\.'|'[^\\]'", String.Char), (r"[0-9](\.[0-9]*)?([eE][+-][0-9]+)?[flFL]?|" r"0[xX][0-9a-fA-F]+[Ll]?", Number), - (r'(class)(\s+)(object)', bygroups(Keyword, Text, Keyword)), + (r'(object)(\s+)(:)(\s+)', bygroups(Keyword, Text, Punctuation, Text), 'class'), + (r'(companion)(\s+)(object)', bygroups(Keyword, Text, Keyword)), (r'(class|interface|object)(\s+)', bygroups(Keyword, Text), 'class'), (r'(package|import)(\s+)', bygroups(Keyword, Text), 'package'), + (r'(val|var)(\s+)([(])', bygroups(Keyword, Text, Punctuation), 'property_dec'), (r'(val|var)(\s+)', bygroups(Keyword, Text), 'property'), (r'(fun)(\s+)', bygroups(Keyword, Text), 'function'), + (r'(inline fun)(\s+)', bygroups(Keyword, Text), 'function'), (r'(abstract|annotation|as|break|by|catch|class|companion|const|' r'constructor|continue|crossinline|data|do|dynamic|else|enum|' r'external|false|final|finally|for|fun|get|if|import|in|infix|' @@ -1058,9 +1068,26 @@ class KotlinLexer(RegexLexer): 'property': [ (kt_id, Name.Property, '#pop') ], + 'property_dec': [ + (r'(,)(\s*)', bygroups(Punctuation, Text)), + (r'(:)(\s*)', bygroups(Punctuation, Text)), + (r'<', Punctuation, 'generic'), + (r'([)])', Punctuation, '#pop'), + (kt_id, Name.Property) + ], 'function': [ + (r'<', Punctuation, 'generic'), + (r''+kt_id+'([.])'+kt_id, bygroups(Name.Class, Punctuation, Name.Function), '#pop'), (kt_id, Name.Function, '#pop') ], + 'generic': [ + (r'(>)(\s*)', bygroups(Punctuation, Text), '#pop'), + (r':',Punctuation), + (r'(reified|out|in)\b', Keyword), + (r',',Text), + (r'\s+',Text), + (kt_id,Name) + ] } diff --git a/tests/test_kotlin.py b/tests/test_kotlin.py new file mode 100644 index 00000000..7c733ad9 --- /dev/null +++ b/tests/test_kotlin.py @@ -0,0 +1,131 @@ +# -*- coding: utf-8 -*- +""" + Basic JavaLexer Test + ~~~~~~~~~~~~~~~~~~~~ + + :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import unittest + +from pygments.token import Text, Name, Operator, Keyword, Number, Punctuation, String +from pygments.lexers import KotlinLexer + +class KotlinTest(unittest.TestCase): + + def setUp(self): + self.lexer = KotlinLexer() + self.maxDiff = None + + def testCanCopeWithBackTickNamesInFunctions(self): + fragment = u'fun `wo bble`' + tokens = [ + (Keyword, u'fun'), + (Text, u' '), + (Name.Function, u'`wo bble`'), + (Text, u'\n') + ] + self.assertEqual(tokens, list(self.lexer.get_tokens(fragment))) + + def testCanCopeWithCommasAndDashesInBackTickNames(self): + fragment = u'fun `wo,-bble`' + tokens = [ + (Keyword, u'fun'), + (Text, u' '), + (Name.Function, u'`wo,-bble`'), + (Text, u'\n') + ] + self.assertEqual(tokens, list(self.lexer.get_tokens(fragment))) + + def testCanCopeWithDestructuring(self): + fragment = u'val (a, b) = ' + tokens = [ + (Keyword, u'val'), + (Text, u' '), + (Punctuation, u'('), + (Name.Property, u'a'), + (Punctuation, u','), + (Text, u' '), + (Name.Property, u'b'), + (Punctuation, u')'), + (Text, u' '), + (Punctuation, u'='), + (Text, u' '), + (Text, u'\n') + ] + self.assertEqual(tokens, list(self.lexer.get_tokens(fragment))) + + def testCanCopeGenericsInDestructuring(self): + fragment = u'val (a: List, b: Set) =' + tokens = [ + (Keyword, u'val'), + (Text, u' '), + (Punctuation, u'('), + (Name.Property, u'a'), + (Punctuation, u':'), + (Text, u' '), + (Name.Property, u'List'), + (Punctuation, u'<'), + (Name, u'Something'), + (Punctuation, u'>'), + (Punctuation, u','), + (Text, u' '), + (Name.Property, u'b'), + (Punctuation, u':'), + (Text, u' '), + (Name.Property, u'Set'), + (Punctuation, u'<'), + (Name, u'Wobble'), + (Punctuation, u'>'), + (Punctuation, u')'), + (Text, u' '), + (Punctuation, u'='), + (Text, u'\n') + ] + self.assertEqual(tokens, list(self.lexer.get_tokens(fragment))) + + def testCanCopeWithGenerics(self): + fragment = u'inline fun VaultService.queryBy(): Vault.Page {' + tokens = [ + (Keyword, u'inline fun'), + (Text, u' '), + (Punctuation, u'<'), + (Keyword, u'reified'), + (Text, u' '), + (Name, u'T'), + (Text, u' '), + (Punctuation, u':'), + (Text, u' '), + (Name, u'ContractState'), + (Punctuation, u'>'), + (Text, u' '), + (Name.Class, u'VaultService'), + (Punctuation, u'.'), + (Name.Function, u'queryBy'), + (Punctuation, u'('), + (Punctuation, u')'), + (Punctuation, u':'), + (Text, u' '), + (Name, u'Vault'), + (Punctuation, u'.'), + (Name, u'Page'), + (Punctuation, u'<'), + (Name, u'T'), + (Punctuation, u'>'), + (Text, u' '), + (Punctuation, u'{'), + (Text, u'\n') + ] + self.assertEqual(tokens, list(self.lexer.get_tokens(fragment))) + + def testShouldCopeWithMultilineComments(self): + fragment = u'"""\nthis\nis\na\ncomment"""' + tokens = [ + (String, u'"""\nthis\nis\na\ncomment"""'), + (Text, u'\n') + ] + self.assertEqual(tokens, list(self.lexer.get_tokens(fragment))) + +if __name__ == '__main__': + unittest.main() -- cgit v1.2.1