Matlab class properties (#1466)

* WIP: Add failing test for a matlab class with properties. * Add some missing keywords * Add leading \s* matchers to things above the command form regex, as it tends to swallow keywords otherwise. * Add support for the special 'properties' block syntax. * Fix apparent infinite loop when given garbage input. * Use includes to clean up some of my copypasta. * Fix negative lookahead when there's more than one space between operators. * Use Whitespace not Text for spaces; combine adjacent whitespace. * Add support for declarative property constraints.
author: Dan <drmoose@users.noreply.github.com> 2021-01-18 12:01:21 -0500
committer: GitHub <noreply@github.com> 2021-01-18 18:01:21 +0100
commit: 423c44a451db7e5f63147b1c1519661d745fc43a (patch)
tree: 770ac1cc23c93e6cb4ee9e4f0ea3dbf705b34a80
parent: 9647d2ae506b8e05ebabe9243df707bac901a6a3 (diff)
download: pygments-git-423c44a451db7e5f63147b1c1519661d745fc43a.tar.gz
2 files changed, 201 insertions, 68 deletions
diff --git a/pygments/lexers/matlab.py b/pygments/lexers/matlab.py
index 0654cc0c..8b33e5ee 100644
--- a/pygments/lexers/matlab.py
+++ b/pygments/lexers/matlab.py
@@ -11,7 +11,7 @@
 import re
 
 from pygments.lexer import Lexer, RegexLexer, bygroups, default, words, \
-    do_insertions
+    do_insertions, include
 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
     Number, Punctuation, Generic, Whitespace
 
@@ -20,6 +20,7 @@ from pygments.lexers import _scilab_builtins
 __all__ = ['MatlabLexer', 'MatlabSessionLexer', 'OctaveLexer', 'ScilabLexer']
 
 
+
 class MatlabLexer(RegexLexer):
     """
     For Matlab source code.
@@ -75,24 +76,54 @@ class MatlabLexer(RegexLexer):
     _operators = r'-|==|~=|<=|>=|<|>|&&|&|~|\|\|?|\.\*|\*|\+|\.\^|\.\\|\./|/|\\'
 
     tokens = {
+        'expressions': [
+            # operators:
+            (_operators, Operator),
+
+            # numbers (must come before punctuation to handle `.5`; cannot use
+            # `\b` due to e.g. `5. + .5`).
+            (r'(?<!\w)((\d+\.\d*)|(\d*\.\d+))([eEf][+-]?\d+)?(?!\w)', Number.Float),
+            (r'\b\d+[eEf][+-]?[0-9]+\b', Number.Float),
+            (r'\b\d+\b', Number.Integer),
+
+            # punctuation:
+            (r'\[|\]|\(|\)|\{|\}|:|@|\.|,', Punctuation),
+            (r'=|:|;', Punctuation),
+
+            # quote can be transpose, instead of string:
+            # (not great, but handles common cases...)
+            (r'(?<=[\w)\].])\'+', Operator),
+
+            (r'"(""|[^"])*"', String),
+
+            (r'(?<![\w)\].])\'', String, 'string'),
+            (r'[a-zA-Z_]\w*', Name),
+            (r'\s+', Whitespace),
+            (r'.', Text),
+        ],
         'root': [
             # line starting with '!' is sent as a system command.  not sure what
             # label to use...
             (r'^!.*', String.Other),
             (r'%\{\s*\n', Comment.Multiline, 'blockcomment'),
             (r'%.*$', Comment),
-            (r'^\s*function\b', Keyword, 'deffunc'),
+            (r'(\s*^\s*)(function)\b', bygroups(Whitespace, Keyword), 'deffunc'),
+            (r'(\s*^\s*)(properties)(\s+)(\()',
+             bygroups(Whitespace, Keyword, Whitespace, Punctuation),
+             ('defprops', 'propattrs')),
+            (r'(\s*^\s*)(properties)\b',
+             bygroups(Whitespace, Keyword), 'defprops'),
 
             # from 'iskeyword' on version 9.4 (R2018a):
             # Check that there is no preceding dot, as keywords are valid field
             # names.
-            (words(('break', 'case', 'catch', 'classdef', 'continue', 'else',
-                    'elseif', 'end', 'for', 'function',
-                    'global', 'if', 'otherwise', 'parfor',
+            (words(('break', 'case', 'catch', 'classdef', 'continue',
+                    'dynamicprops', 'else', 'elseif', 'end', 'for', 'function',
+                    'global', 'if', 'methods', 'otherwise', 'parfor',
                     'persistent', 'return', 'spmd', 'switch',
                     'try', 'while'),
-                   prefix=r'(?<!\.)', suffix=r'\b'),
-             Keyword),
+                   prefix=r'(?<!\.)(\s*)(', suffix=r')\b'),
+             bygroups(Whitespace, Keyword)),
 
             ("(" + "|".join(elfun + specfun + elmat) + r')\b',  Name.Builtin),
 
@@ -104,31 +135,10 @@ class MatlabLexer(RegexLexer):
             # is recognized if it is either surrounded by spaces or by no
             # spaces on both sides; only the former case matters for us.  (This
             # allows distinguishing `cd ./foo` from `cd ./ foo`.)
-            (r'(?:^|(?<=;))(\s*)(\w+)(\s+)(?!=|\(|(?:%s)\s+)' % _operators,
-             bygroups(Text, Name, Text), 'commandargs'),
+            (r'(?:^|(?<=;))(\s*)(\w+)(\s+)(?!=|\(|(?:%s)\s+|\s)' % _operators,
+             bygroups(Whitespace, Name, Whitespace), 'commandargs'),
 
-            # operators:
-            (_operators, Operator),
-
-            # numbers (must come before punctuation to handle `.5`; cannot use
-            # `\b` due to e.g. `5. + .5`).
-            (r'(?<!\w)((\d+\.\d*)|(\d*\.\d+))([eEf][+-]?\d+)?(?!\w)', Number.Float),
-            (r'\b\d+[eEf][+-]?[0-9]+\b', Number.Float),
-            (r'\b\d+\b', Number.Integer),
-
-            # punctuation:
-            (r'\[|\]|\(|\)|\{|\}|:|@|\.|,', Punctuation),
-            (r'=|:|;', Punctuation),
-
-            # quote can be transpose, instead of string:
-            # (not great, but handles common cases...)
-            (r'(?<=[\w)\].])\'+', Operator),
-
-            (r'"(""|[^"])*"', String),
-
-            (r'(?<![\w)\].])\'', String, 'string'),
-            (r'[a-zA-Z_]\w*', Name),
-            (r'.', Text),
+            include('expressions')
         ],
         'blockcomment': [
             (r'^\s*%\}', Comment.Multiline, '#pop'),
@@ -141,7 +151,26 @@ class MatlabLexer(RegexLexer):
                       Whitespace, Name.Function, Punctuation, Text,
                       Punctuation, Whitespace), '#pop'),
             # function with no args
-            (r'(\s*)([a-zA-Z_]\w*)', bygroups(Text, Name.Function), '#pop'),
+            (r'(\s*)([a-zA-Z_]\w*)',
+             bygroups(Whitespace, Name.Function), '#pop'),
+        ],
+        'propattrs': [
+            (r'(\w+)(\s*)(=)(\s*)(\d+)',
+             bygroups(Name.Builtin, Whitespace, Punctuation, Whitespace,
+                      Number)),
+            (r'(\w+)(\s*)(=)(\s*)([a-zA-Z]\w*)',
+             bygroups(Name.Builtin, Whitespace, Punctuation, Whitespace,
+                      Keyword)),
+            (r',', Punctuation),
+            (r'\)', Punctuation, '#pop'),
+            (r'\s+', Whitespace),
+            (r'.', Text),
+        ],
+        'defprops': [
+            (r'%\{\s*\n', Comment.Multiline, 'blockcomment'),
+            (r'%.*$', Comment),
+            (r'(?<!\.)end\b', Keyword, '#pop'),
+            include('expressions'),
         ],
         'string': [
             (r"[^']*'", String, '#pop'),
@@ -153,7 +182,7 @@ class MatlabLexer(RegexLexer):
             # equal sign or operator
             (r"=", Punctuation, '#pop'),
             (_operators, Operator, '#pop'),
-            (r"[ \t]+", Text),
+            (r"[ \t]+", Whitespace),
             ("'[^']*'", String),
             (r"[^';\s]+", String),
             (";", Punctuation, '#pop'),
@@ -642,7 +671,8 @@ class OctaveLexer(RegexLexer):
                       Whitespace, Name.Function, Punctuation, Text,
                       Punctuation, Whitespace), '#pop'),
             # function with no args
-            (r'(\s*)([a-zA-Z_]\w*)', bygroups(Text, Name.Function), '#pop'),
+            (r'(\s*)([a-zA-Z_]\w*)',
+             bygroups(Whitespace, Name.Function), '#pop'),
         ],
     }
 
diff --git a/tests/test_matlab.py b/tests/test_matlab.py
index 945a3434..b375c8da 100644
--- a/tests/test_matlab.py
+++ b/tests/test_matlab.py
@@ -36,7 +36,7 @@ def test_single_line(lexer):
         (Token.Literal.Number.Integer, '101325'),
         (Token.Punctuation, ')'),
         (Token.Punctuation, ';'),
-        (Token.Text, '\n'),
+        (Token.Text.Whitespace, '\n'),
     ]
     assert list(lexer.get_tokens(fragment)) == tokens
 
@@ -56,14 +56,14 @@ def test_line_continuation(lexer):
         (Token.Literal.Number.Integer, '300'),
         (Token.Punctuation, ','),
         (Token.Keyword, '...'),
-        (Token.Text, '\n'),
+        (Token.Text.Whitespace, '\n'),
         (Token.Literal.String, "'"),
         (Token.Literal.String, "P'"),
         (Token.Punctuation, ','),
         (Token.Literal.Number.Integer, '101325'),
         (Token.Punctuation, ')'),
         (Token.Punctuation, ';'),
-        (Token.Text, '\n'),
+        (Token.Text.Whitespace, '\n'),
     ]
     assert list(lexer.get_tokens(fragment)) == tokens
 
@@ -73,37 +73,29 @@ def test_keywords_ended_by_newline(lexer):
     fragment = "if x > 100\n    disp('x > 100')\nelse\n    disp('x < 100')\nend\n"
     tokens = [
         (Token.Keyword, 'if'),
-        (Token.Text, ' '),
+        (Token.Text.Whitespace, ' '),
         (Token.Name, 'x'),
-        (Token.Text, ' '),
+        (Token.Text.Whitespace, ' '),
         (Token.Operator, '>'),
-        (Token.Text, ' '),
+        (Token.Text.Whitespace, ' '),
         (Token.Literal.Number.Integer, '100'),
-        (Token.Text, '\n'),
-        (Token.Text, ' '),
-        (Token.Text, ' '),
-        (Token.Text, ' '),
-        (Token.Text, ' '),
+        (Token.Text.Whitespace, '\n    '),
         (Token.Name.Builtin, 'disp'),
         (Token.Punctuation, '('),
         (Token.Literal.String, "'"),
         (Token.Literal.String, "x > 100'"),
         (Token.Punctuation, ')'),
-        (Token.Text, '\n'),
+        (Token.Text.Whitespace, '\n'),
         (Token.Keyword, 'else'),
-        (Token.Text, '\n'),
-        (Token.Text, ' '),
-        (Token.Text, ' '),
-        (Token.Text, ' '),
-        (Token.Text, ' '),
+        (Token.Text.Whitespace, '\n    '),
         (Token.Name.Builtin, 'disp'),
         (Token.Punctuation, '('),
         (Token.Literal.String, "'"),
         (Token.Literal.String, "x < 100'"),
         (Token.Punctuation, ')'),
-        (Token.Text, '\n'),
+        (Token.Text.Whitespace, '\n'),
         (Token.Keyword, 'end'),
-        (Token.Text, '\n'),
+        (Token.Text.Whitespace, '\n'),
     ]
     assert list(lexer.get_tokens(fragment)) == tokens
 
@@ -123,14 +115,14 @@ def test_comment_after_continuation(lexer):
         (Token.Punctuation, ','),
         (Token.Keyword, '...'),
         (Token.Comment, ' a comment'),
-        (Token.Text, '\n'),
+        (Token.Text.Whitespace, '\n'),
         (Token.Literal.String, "'"),
         (Token.Literal.String, "P'"),
         (Token.Punctuation, ','),
         (Token.Literal.Number.Integer, '101325'),
         (Token.Punctuation, ')'),
         (Token.Punctuation, ';'),
-        (Token.Text, '\n'),
+        (Token.Text.Whitespace, '\n'),
     ]
     assert list(lexer.get_tokens(fragment)) == tokens
 
@@ -142,13 +134,12 @@ def test_multiple_spaces_variable_assignment(lexer):
     fragment = 'x  = 100;\n'
     tokens = [
         (Token.Name, 'x'),
-        (Token.Text, ' '),
-        (Token.Text, ' '),
+        (Token.Text.Whitespace, '  '),
         (Token.Punctuation, '='),
-        (Token.Text, ' '),
+        (Token.Text.Whitespace, ' '),
         (Token.Literal.Number.Integer, '100'),
         (Token.Punctuation, ';'),
-        (Token.Text, '\n'),
+        (Token.Text.Whitespace, '\n'),
     ]
     assert list(lexer.get_tokens(fragment)) == tokens
 
@@ -160,13 +151,12 @@ def test_operator_multiple_space(lexer):
     fragment = 'x  > 100;\n'
     tokens = [
         (Token.Name, 'x'),
-        (Token.Text, ' '),
-        (Token.Text, ' '),
+        (Token.Text.Whitespace, '  '),
         (Token.Operator, '>'),
-        (Token.Text, ' '),
+        (Token.Text.Whitespace, ' '),
         (Token.Literal.Number.Integer, '100'),
         (Token.Punctuation, ';'),
-        (Token.Text, '\n'),
+        (Token.Text.Whitespace, '\n'),
     ]
     assert list(lexer.get_tokens(fragment)) == tokens
 
@@ -176,12 +166,12 @@ def test_one_space_assignment(lexer):
     fragment = 'x = 100;\n'
     tokens = [
         (Token.Name, 'x'),
-        (Token.Text, ' '),
+        (Token.Text.Whitespace, ' '),
         (Token.Punctuation, '='),
-        (Token.Text, ' '),
+        (Token.Text.Whitespace, ' '),
         (Token.Literal.Number.Integer, '100'),
         (Token.Punctuation, ';'),
-        (Token.Text, '\n'),
+        (Token.Text.Whitespace, '\n'),
     ]
     assert list(lexer.get_tokens(fragment)) == tokens
 
@@ -195,8 +185,121 @@ def test_command_mode(lexer):
     fragment = 'help sin\n'
     tokens = [
         (Token.Name, 'help'),
-        (Token.Text, ' '),
+        (Token.Text.Whitespace, ' '),
         (Token.Literal.String, 'sin'),
-        (Token.Text, '\n'),
+        (Token.Text.Whitespace, '\n'),
     ]
     assert list(lexer.get_tokens(fragment)) == tokens
+
+
+
+MATLAB_SAMPLE_CLASS = """
+classdef Name < dynamicprops
+    properties
+        % i am a comment
+        name1
+        name2
+    end
+    properties (Constant = true, SetAccess = protected)
+        % i too am a comment
+        matrix = [0, 1, 2];
+        string = 'i am a string'
+    end
+    methods
+        % i am also a comment
+        function self = Name()
+            % i am a comment inside a constructor
+        end
+    end
+end
+""".strip()
+
+def test_classes_with_properties(lexer):
+    whitespace = Token.Text.Whitespace
+    tokens = [
+        (Token.Keyword, 'classdef'),
+        (whitespace, ' '),
+        (Token.Name, 'Name'),
+        (whitespace, ' '),
+        (Token.Operator, '<'),
+        (whitespace, ' '),
+        (Token.Keyword, 'dynamicprops'),
+        (whitespace, '\n    '),
+        (Token.Keyword, 'properties'),
+        (whitespace, '\n        '),
+        (Token.Comment, '% i am a comment'),
+        (whitespace, '\n        '),
+        (Token.Name, 'name1'),
+        (whitespace, '\n        '),
+        (Token.Name, 'name2'),
+        (whitespace, '\n    '),
+        (Token.Keyword, 'end'),
+        (whitespace, '\n    '),
+        (Token.Keyword, 'properties'),
+        (whitespace, ' '),
+        (Token.Punctuation, '('),
+        (Token.Name.Builtin, 'Constant'),
+        (whitespace, ' '),
+        (Token.Punctuation, '='),
+        (whitespace, ' '),
+        (Token.Keyword, 'true'),
+        (Token.Punctuation, ','),
+        (whitespace, ' '),
+        (Token.Name.Builtin, 'SetAccess'),
+        (whitespace, ' '),
+        (Token.Punctuation, '='),
+        (whitespace, ' '),
+        (Token.Keyword, 'protected'),
+        (Token.Punctuation, ')'),
+        (whitespace, "\n        "),
+        (Token.Comment, '% i too am a comment'),
+        (whitespace, '\n        '),
+        (Token.Name, 'matrix'),
+        (whitespace, ' '),
+        (Token.Punctuation, '='),
+        (whitespace, ' '),
+        (Token.Punctuation, '['),
+        (Token.Literal.Number.Integer, '0'),
+        (Token.Punctuation, ','),
+        (whitespace, ' '),
+        (Token.Literal.Number.Integer, '1'),
+        (Token.Punctuation, ','),
+        (whitespace, ' '),
+        (Token.Literal.Number.Integer, '2'),
+        (Token.Punctuation, ']'),
+        (Token.Punctuation, ';'),
+        (whitespace, '\n        '),
+        (Token.Name, 'string'),
+        (whitespace, ' '),
+        (Token.Punctuation, '='),
+        (whitespace, ' '),
+        (Token.Literal.String, "'"),
+        (Token.Literal.String, "i am a string'"),
+        (whitespace, '\n    '),
+        (Token.Keyword, 'end'),
+        (whitespace, '\n    '),
+        (Token.Keyword, 'methods'),
+        (whitespace, '\n        '),
+        (Token.Comment, '% i am also a comment'),
+        (whitespace, '\n        '),
+        (Token.Keyword, 'function'),
+        (whitespace, ' '),
+        (Token.Text, 'self'),
+        (whitespace, ' '),
+        (Token.Punctuation, '='),
+        (whitespace, ' '),
+        (Token.Name.Function, 'Name'),
+        (Token.Punctuation, '('),
+        (Token.Punctuation, ')'),
+        (whitespace, '\n            '),
+        (Token.Comment, '% i am a comment inside a constructor'),
+        (whitespace, '\n        '),
+        (Token.Keyword, 'end'),
+        (whitespace, '\n    '),
+        (Token.Keyword, 'end'),
+        (whitespace, '\n'),
+        (Token.Keyword, 'end'),
+        (whitespace, '\n'),
+    ]
+    assert list(lexer.get_tokens(MATLAB_SAMPLE_CLASS)) == tokens
+
author	Dan <drmoose@users.noreply.github.com>	2021-01-18 12:01:21 -0500
committer	GitHub <noreply@github.com>	2021-01-18 18:01:21 +0100
commit	423c44a451db7e5f63147b1c1519661d745fc43a (patch)
tree	770ac1cc23c93e6cb4ee9e4f0ea3dbf705b34a80
parent	9647d2ae506b8e05ebabe9243df707bac901a6a3 (diff)
download	pygments-git-423c44a451db7e5f63147b1c1519661d745fc43a.tar.gz