diff options
author | Dan <drmoose@users.noreply.github.com> | 2021-01-18 12:01:21 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-01-18 18:01:21 +0100 |
commit | 423c44a451db7e5f63147b1c1519661d745fc43a (patch) | |
tree | 770ac1cc23c93e6cb4ee9e4f0ea3dbf705b34a80 | |
parent | 9647d2ae506b8e05ebabe9243df707bac901a6a3 (diff) | |
download | pygments-git-423c44a451db7e5f63147b1c1519661d745fc43a.tar.gz |
Matlab class properties (#1466)
* WIP: Add failing test for a matlab class with properties.
* Add some missing keywords
* Add leading \s* matchers to things above the command form regex, as it tends to swallow keywords otherwise.
* Add support for the special 'properties' block syntax.
* Fix apparent infinite loop when given garbage input.
* Use includes to clean up some of my copypasta.
* Fix negative lookahead when there's more than one space between operators.
* Use Whitespace not Text for spaces; combine adjacent whitespace.
* Add support for declarative property constraints.
-rw-r--r-- | pygments/lexers/matlab.py | 98 | ||||
-rw-r--r-- | tests/test_matlab.py | 171 |
2 files changed, 201 insertions, 68 deletions
diff --git a/pygments/lexers/matlab.py b/pygments/lexers/matlab.py index 0654cc0c..8b33e5ee 100644 --- a/pygments/lexers/matlab.py +++ b/pygments/lexers/matlab.py @@ -11,7 +11,7 @@ import re from pygments.lexer import Lexer, RegexLexer, bygroups, default, words, \ - do_insertions + do_insertions, include from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ Number, Punctuation, Generic, Whitespace @@ -20,6 +20,7 @@ from pygments.lexers import _scilab_builtins __all__ = ['MatlabLexer', 'MatlabSessionLexer', 'OctaveLexer', 'ScilabLexer'] + class MatlabLexer(RegexLexer): """ For Matlab source code. @@ -75,24 +76,54 @@ class MatlabLexer(RegexLexer): _operators = r'-|==|~=|<=|>=|<|>|&&|&|~|\|\|?|\.\*|\*|\+|\.\^|\.\\|\./|/|\\' tokens = { + 'expressions': [ + # operators: + (_operators, Operator), + + # numbers (must come before punctuation to handle `.5`; cannot use + # `\b` due to e.g. `5. + .5`). + (r'(?<!\w)((\d+\.\d*)|(\d*\.\d+))([eEf][+-]?\d+)?(?!\w)', Number.Float), + (r'\b\d+[eEf][+-]?[0-9]+\b', Number.Float), + (r'\b\d+\b', Number.Integer), + + # punctuation: + (r'\[|\]|\(|\)|\{|\}|:|@|\.|,', Punctuation), + (r'=|:|;', Punctuation), + + # quote can be transpose, instead of string: + # (not great, but handles common cases...) + (r'(?<=[\w)\].])\'+', Operator), + + (r'"(""|[^"])*"', String), + + (r'(?<![\w)\].])\'', String, 'string'), + (r'[a-zA-Z_]\w*', Name), + (r'\s+', Whitespace), + (r'.', Text), + ], 'root': [ # line starting with '!' is sent as a system command. not sure what # label to use... (r'^!.*', String.Other), (r'%\{\s*\n', Comment.Multiline, 'blockcomment'), (r'%.*$', Comment), - (r'^\s*function\b', Keyword, 'deffunc'), + (r'(\s*^\s*)(function)\b', bygroups(Whitespace, Keyword), 'deffunc'), + (r'(\s*^\s*)(properties)(\s+)(\()', + bygroups(Whitespace, Keyword, Whitespace, Punctuation), + ('defprops', 'propattrs')), + (r'(\s*^\s*)(properties)\b', + bygroups(Whitespace, Keyword), 'defprops'), # from 'iskeyword' on version 9.4 (R2018a): # Check that there is no preceding dot, as keywords are valid field # names. - (words(('break', 'case', 'catch', 'classdef', 'continue', 'else', - 'elseif', 'end', 'for', 'function', - 'global', 'if', 'otherwise', 'parfor', + (words(('break', 'case', 'catch', 'classdef', 'continue', + 'dynamicprops', 'else', 'elseif', 'end', 'for', 'function', + 'global', 'if', 'methods', 'otherwise', 'parfor', 'persistent', 'return', 'spmd', 'switch', 'try', 'while'), - prefix=r'(?<!\.)', suffix=r'\b'), - Keyword), + prefix=r'(?<!\.)(\s*)(', suffix=r')\b'), + bygroups(Whitespace, Keyword)), ("(" + "|".join(elfun + specfun + elmat) + r')\b', Name.Builtin), @@ -104,31 +135,10 @@ class MatlabLexer(RegexLexer): # is recognized if it is either surrounded by spaces or by no # spaces on both sides; only the former case matters for us. (This # allows distinguishing `cd ./foo` from `cd ./ foo`.) - (r'(?:^|(?<=;))(\s*)(\w+)(\s+)(?!=|\(|(?:%s)\s+)' % _operators, - bygroups(Text, Name, Text), 'commandargs'), + (r'(?:^|(?<=;))(\s*)(\w+)(\s+)(?!=|\(|(?:%s)\s+|\s)' % _operators, + bygroups(Whitespace, Name, Whitespace), 'commandargs'), - # operators: - (_operators, Operator), - - # numbers (must come before punctuation to handle `.5`; cannot use - # `\b` due to e.g. `5. + .5`). - (r'(?<!\w)((\d+\.\d*)|(\d*\.\d+))([eEf][+-]?\d+)?(?!\w)', Number.Float), - (r'\b\d+[eEf][+-]?[0-9]+\b', Number.Float), - (r'\b\d+\b', Number.Integer), - - # punctuation: - (r'\[|\]|\(|\)|\{|\}|:|@|\.|,', Punctuation), - (r'=|:|;', Punctuation), - - # quote can be transpose, instead of string: - # (not great, but handles common cases...) - (r'(?<=[\w)\].])\'+', Operator), - - (r'"(""|[^"])*"', String), - - (r'(?<![\w)\].])\'', String, 'string'), - (r'[a-zA-Z_]\w*', Name), - (r'.', Text), + include('expressions') ], 'blockcomment': [ (r'^\s*%\}', Comment.Multiline, '#pop'), @@ -141,7 +151,26 @@ class MatlabLexer(RegexLexer): Whitespace, Name.Function, Punctuation, Text, Punctuation, Whitespace), '#pop'), # function with no args - (r'(\s*)([a-zA-Z_]\w*)', bygroups(Text, Name.Function), '#pop'), + (r'(\s*)([a-zA-Z_]\w*)', + bygroups(Whitespace, Name.Function), '#pop'), + ], + 'propattrs': [ + (r'(\w+)(\s*)(=)(\s*)(\d+)', + bygroups(Name.Builtin, Whitespace, Punctuation, Whitespace, + Number)), + (r'(\w+)(\s*)(=)(\s*)([a-zA-Z]\w*)', + bygroups(Name.Builtin, Whitespace, Punctuation, Whitespace, + Keyword)), + (r',', Punctuation), + (r'\)', Punctuation, '#pop'), + (r'\s+', Whitespace), + (r'.', Text), + ], + 'defprops': [ + (r'%\{\s*\n', Comment.Multiline, 'blockcomment'), + (r'%.*$', Comment), + (r'(?<!\.)end\b', Keyword, '#pop'), + include('expressions'), ], 'string': [ (r"[^']*'", String, '#pop'), @@ -153,7 +182,7 @@ class MatlabLexer(RegexLexer): # equal sign or operator (r"=", Punctuation, '#pop'), (_operators, Operator, '#pop'), - (r"[ \t]+", Text), + (r"[ \t]+", Whitespace), ("'[^']*'", String), (r"[^';\s]+", String), (";", Punctuation, '#pop'), @@ -642,7 +671,8 @@ class OctaveLexer(RegexLexer): Whitespace, Name.Function, Punctuation, Text, Punctuation, Whitespace), '#pop'), # function with no args - (r'(\s*)([a-zA-Z_]\w*)', bygroups(Text, Name.Function), '#pop'), + (r'(\s*)([a-zA-Z_]\w*)', + bygroups(Whitespace, Name.Function), '#pop'), ], } diff --git a/tests/test_matlab.py b/tests/test_matlab.py index 945a3434..b375c8da 100644 --- a/tests/test_matlab.py +++ b/tests/test_matlab.py @@ -36,7 +36,7 @@ def test_single_line(lexer): (Token.Literal.Number.Integer, '101325'), (Token.Punctuation, ')'), (Token.Punctuation, ';'), - (Token.Text, '\n'), + (Token.Text.Whitespace, '\n'), ] assert list(lexer.get_tokens(fragment)) == tokens @@ -56,14 +56,14 @@ def test_line_continuation(lexer): (Token.Literal.Number.Integer, '300'), (Token.Punctuation, ','), (Token.Keyword, '...'), - (Token.Text, '\n'), + (Token.Text.Whitespace, '\n'), (Token.Literal.String, "'"), (Token.Literal.String, "P'"), (Token.Punctuation, ','), (Token.Literal.Number.Integer, '101325'), (Token.Punctuation, ')'), (Token.Punctuation, ';'), - (Token.Text, '\n'), + (Token.Text.Whitespace, '\n'), ] assert list(lexer.get_tokens(fragment)) == tokens @@ -73,37 +73,29 @@ def test_keywords_ended_by_newline(lexer): fragment = "if x > 100\n disp('x > 100')\nelse\n disp('x < 100')\nend\n" tokens = [ (Token.Keyword, 'if'), - (Token.Text, ' '), + (Token.Text.Whitespace, ' '), (Token.Name, 'x'), - (Token.Text, ' '), + (Token.Text.Whitespace, ' '), (Token.Operator, '>'), - (Token.Text, ' '), + (Token.Text.Whitespace, ' '), (Token.Literal.Number.Integer, '100'), - (Token.Text, '\n'), - (Token.Text, ' '), - (Token.Text, ' '), - (Token.Text, ' '), - (Token.Text, ' '), + (Token.Text.Whitespace, '\n '), (Token.Name.Builtin, 'disp'), (Token.Punctuation, '('), (Token.Literal.String, "'"), (Token.Literal.String, "x > 100'"), (Token.Punctuation, ')'), - (Token.Text, '\n'), + (Token.Text.Whitespace, '\n'), (Token.Keyword, 'else'), - (Token.Text, '\n'), - (Token.Text, ' '), - (Token.Text, ' '), - (Token.Text, ' '), - (Token.Text, ' '), + (Token.Text.Whitespace, '\n '), (Token.Name.Builtin, 'disp'), (Token.Punctuation, '('), (Token.Literal.String, "'"), (Token.Literal.String, "x < 100'"), (Token.Punctuation, ')'), - (Token.Text, '\n'), + (Token.Text.Whitespace, '\n'), (Token.Keyword, 'end'), - (Token.Text, '\n'), + (Token.Text.Whitespace, '\n'), ] assert list(lexer.get_tokens(fragment)) == tokens @@ -123,14 +115,14 @@ def test_comment_after_continuation(lexer): (Token.Punctuation, ','), (Token.Keyword, '...'), (Token.Comment, ' a comment'), - (Token.Text, '\n'), + (Token.Text.Whitespace, '\n'), (Token.Literal.String, "'"), (Token.Literal.String, "P'"), (Token.Punctuation, ','), (Token.Literal.Number.Integer, '101325'), (Token.Punctuation, ')'), (Token.Punctuation, ';'), - (Token.Text, '\n'), + (Token.Text.Whitespace, '\n'), ] assert list(lexer.get_tokens(fragment)) == tokens @@ -142,13 +134,12 @@ def test_multiple_spaces_variable_assignment(lexer): fragment = 'x = 100;\n' tokens = [ (Token.Name, 'x'), - (Token.Text, ' '), - (Token.Text, ' '), + (Token.Text.Whitespace, ' '), (Token.Punctuation, '='), - (Token.Text, ' '), + (Token.Text.Whitespace, ' '), (Token.Literal.Number.Integer, '100'), (Token.Punctuation, ';'), - (Token.Text, '\n'), + (Token.Text.Whitespace, '\n'), ] assert list(lexer.get_tokens(fragment)) == tokens @@ -160,13 +151,12 @@ def test_operator_multiple_space(lexer): fragment = 'x > 100;\n' tokens = [ (Token.Name, 'x'), - (Token.Text, ' '), - (Token.Text, ' '), + (Token.Text.Whitespace, ' '), (Token.Operator, '>'), - (Token.Text, ' '), + (Token.Text.Whitespace, ' '), (Token.Literal.Number.Integer, '100'), (Token.Punctuation, ';'), - (Token.Text, '\n'), + (Token.Text.Whitespace, '\n'), ] assert list(lexer.get_tokens(fragment)) == tokens @@ -176,12 +166,12 @@ def test_one_space_assignment(lexer): fragment = 'x = 100;\n' tokens = [ (Token.Name, 'x'), - (Token.Text, ' '), + (Token.Text.Whitespace, ' '), (Token.Punctuation, '='), - (Token.Text, ' '), + (Token.Text.Whitespace, ' '), (Token.Literal.Number.Integer, '100'), (Token.Punctuation, ';'), - (Token.Text, '\n'), + (Token.Text.Whitespace, '\n'), ] assert list(lexer.get_tokens(fragment)) == tokens @@ -195,8 +185,121 @@ def test_command_mode(lexer): fragment = 'help sin\n' tokens = [ (Token.Name, 'help'), - (Token.Text, ' '), + (Token.Text.Whitespace, ' '), (Token.Literal.String, 'sin'), - (Token.Text, '\n'), + (Token.Text.Whitespace, '\n'), ] assert list(lexer.get_tokens(fragment)) == tokens + + + +MATLAB_SAMPLE_CLASS = """ +classdef Name < dynamicprops + properties + % i am a comment + name1 + name2 + end + properties (Constant = true, SetAccess = protected) + % i too am a comment + matrix = [0, 1, 2]; + string = 'i am a string' + end + methods + % i am also a comment + function self = Name() + % i am a comment inside a constructor + end + end +end +""".strip() + +def test_classes_with_properties(lexer): + whitespace = Token.Text.Whitespace + tokens = [ + (Token.Keyword, 'classdef'), + (whitespace, ' '), + (Token.Name, 'Name'), + (whitespace, ' '), + (Token.Operator, '<'), + (whitespace, ' '), + (Token.Keyword, 'dynamicprops'), + (whitespace, '\n '), + (Token.Keyword, 'properties'), + (whitespace, '\n '), + (Token.Comment, '% i am a comment'), + (whitespace, '\n '), + (Token.Name, 'name1'), + (whitespace, '\n '), + (Token.Name, 'name2'), + (whitespace, '\n '), + (Token.Keyword, 'end'), + (whitespace, '\n '), + (Token.Keyword, 'properties'), + (whitespace, ' '), + (Token.Punctuation, '('), + (Token.Name.Builtin, 'Constant'), + (whitespace, ' '), + (Token.Punctuation, '='), + (whitespace, ' '), + (Token.Keyword, 'true'), + (Token.Punctuation, ','), + (whitespace, ' '), + (Token.Name.Builtin, 'SetAccess'), + (whitespace, ' '), + (Token.Punctuation, '='), + (whitespace, ' '), + (Token.Keyword, 'protected'), + (Token.Punctuation, ')'), + (whitespace, "\n "), + (Token.Comment, '% i too am a comment'), + (whitespace, '\n '), + (Token.Name, 'matrix'), + (whitespace, ' '), + (Token.Punctuation, '='), + (whitespace, ' '), + (Token.Punctuation, '['), + (Token.Literal.Number.Integer, '0'), + (Token.Punctuation, ','), + (whitespace, ' '), + (Token.Literal.Number.Integer, '1'), + (Token.Punctuation, ','), + (whitespace, ' '), + (Token.Literal.Number.Integer, '2'), + (Token.Punctuation, ']'), + (Token.Punctuation, ';'), + (whitespace, '\n '), + (Token.Name, 'string'), + (whitespace, ' '), + (Token.Punctuation, '='), + (whitespace, ' '), + (Token.Literal.String, "'"), + (Token.Literal.String, "i am a string'"), + (whitespace, '\n '), + (Token.Keyword, 'end'), + (whitespace, '\n '), + (Token.Keyword, 'methods'), + (whitespace, '\n '), + (Token.Comment, '% i am also a comment'), + (whitespace, '\n '), + (Token.Keyword, 'function'), + (whitespace, ' '), + (Token.Text, 'self'), + (whitespace, ' '), + (Token.Punctuation, '='), + (whitespace, ' '), + (Token.Name.Function, 'Name'), + (Token.Punctuation, '('), + (Token.Punctuation, ')'), + (whitespace, '\n '), + (Token.Comment, '% i am a comment inside a constructor'), + (whitespace, '\n '), + (Token.Keyword, 'end'), + (whitespace, '\n '), + (Token.Keyword, 'end'), + (whitespace, '\n'), + (Token.Keyword, 'end'), + (whitespace, '\n'), + ] + assert list(lexer.get_tokens(MATLAB_SAMPLE_CLASS)) == tokens + |