summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan <drmoose@users.noreply.github.com>2021-01-18 12:01:21 -0500
committerGitHub <noreply@github.com>2021-01-18 18:01:21 +0100
commit423c44a451db7e5f63147b1c1519661d745fc43a (patch)
tree770ac1cc23c93e6cb4ee9e4f0ea3dbf705b34a80
parent9647d2ae506b8e05ebabe9243df707bac901a6a3 (diff)
downloadpygments-git-423c44a451db7e5f63147b1c1519661d745fc43a.tar.gz
Matlab class properties (#1466)
* WIP: Add failing test for a matlab class with properties. * Add some missing keywords * Add leading \s* matchers to things above the command form regex, as it tends to swallow keywords otherwise. * Add support for the special 'properties' block syntax. * Fix apparent infinite loop when given garbage input. * Use includes to clean up some of my copypasta. * Fix negative lookahead when there's more than one space between operators. * Use Whitespace not Text for spaces; combine adjacent whitespace. * Add support for declarative property constraints.
-rw-r--r--pygments/lexers/matlab.py98
-rw-r--r--tests/test_matlab.py171
2 files changed, 201 insertions, 68 deletions
diff --git a/pygments/lexers/matlab.py b/pygments/lexers/matlab.py
index 0654cc0c..8b33e5ee 100644
--- a/pygments/lexers/matlab.py
+++ b/pygments/lexers/matlab.py
@@ -11,7 +11,7 @@
import re
from pygments.lexer import Lexer, RegexLexer, bygroups, default, words, \
- do_insertions
+ do_insertions, include
from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
Number, Punctuation, Generic, Whitespace
@@ -20,6 +20,7 @@ from pygments.lexers import _scilab_builtins
__all__ = ['MatlabLexer', 'MatlabSessionLexer', 'OctaveLexer', 'ScilabLexer']
+
class MatlabLexer(RegexLexer):
"""
For Matlab source code.
@@ -75,24 +76,54 @@ class MatlabLexer(RegexLexer):
_operators = r'-|==|~=|<=|>=|<|>|&&|&|~|\|\|?|\.\*|\*|\+|\.\^|\.\\|\./|/|\\'
tokens = {
+ 'expressions': [
+ # operators:
+ (_operators, Operator),
+
+ # numbers (must come before punctuation to handle `.5`; cannot use
+ # `\b` due to e.g. `5. + .5`).
+ (r'(?<!\w)((\d+\.\d*)|(\d*\.\d+))([eEf][+-]?\d+)?(?!\w)', Number.Float),
+ (r'\b\d+[eEf][+-]?[0-9]+\b', Number.Float),
+ (r'\b\d+\b', Number.Integer),
+
+ # punctuation:
+ (r'\[|\]|\(|\)|\{|\}|:|@|\.|,', Punctuation),
+ (r'=|:|;', Punctuation),
+
+ # quote can be transpose, instead of string:
+ # (not great, but handles common cases...)
+ (r'(?<=[\w)\].])\'+', Operator),
+
+ (r'"(""|[^"])*"', String),
+
+ (r'(?<![\w)\].])\'', String, 'string'),
+ (r'[a-zA-Z_]\w*', Name),
+ (r'\s+', Whitespace),
+ (r'.', Text),
+ ],
'root': [
# line starting with '!' is sent as a system command. not sure what
# label to use...
(r'^!.*', String.Other),
(r'%\{\s*\n', Comment.Multiline, 'blockcomment'),
(r'%.*$', Comment),
- (r'^\s*function\b', Keyword, 'deffunc'),
+ (r'(\s*^\s*)(function)\b', bygroups(Whitespace, Keyword), 'deffunc'),
+ (r'(\s*^\s*)(properties)(\s+)(\()',
+ bygroups(Whitespace, Keyword, Whitespace, Punctuation),
+ ('defprops', 'propattrs')),
+ (r'(\s*^\s*)(properties)\b',
+ bygroups(Whitespace, Keyword), 'defprops'),
# from 'iskeyword' on version 9.4 (R2018a):
# Check that there is no preceding dot, as keywords are valid field
# names.
- (words(('break', 'case', 'catch', 'classdef', 'continue', 'else',
- 'elseif', 'end', 'for', 'function',
- 'global', 'if', 'otherwise', 'parfor',
+ (words(('break', 'case', 'catch', 'classdef', 'continue',
+ 'dynamicprops', 'else', 'elseif', 'end', 'for', 'function',
+ 'global', 'if', 'methods', 'otherwise', 'parfor',
'persistent', 'return', 'spmd', 'switch',
'try', 'while'),
- prefix=r'(?<!\.)', suffix=r'\b'),
- Keyword),
+ prefix=r'(?<!\.)(\s*)(', suffix=r')\b'),
+ bygroups(Whitespace, Keyword)),
("(" + "|".join(elfun + specfun + elmat) + r')\b', Name.Builtin),
@@ -104,31 +135,10 @@ class MatlabLexer(RegexLexer):
# is recognized if it is either surrounded by spaces or by no
# spaces on both sides; only the former case matters for us. (This
# allows distinguishing `cd ./foo` from `cd ./ foo`.)
- (r'(?:^|(?<=;))(\s*)(\w+)(\s+)(?!=|\(|(?:%s)\s+)' % _operators,
- bygroups(Text, Name, Text), 'commandargs'),
+ (r'(?:^|(?<=;))(\s*)(\w+)(\s+)(?!=|\(|(?:%s)\s+|\s)' % _operators,
+ bygroups(Whitespace, Name, Whitespace), 'commandargs'),
- # operators:
- (_operators, Operator),
-
- # numbers (must come before punctuation to handle `.5`; cannot use
- # `\b` due to e.g. `5. + .5`).
- (r'(?<!\w)((\d+\.\d*)|(\d*\.\d+))([eEf][+-]?\d+)?(?!\w)', Number.Float),
- (r'\b\d+[eEf][+-]?[0-9]+\b', Number.Float),
- (r'\b\d+\b', Number.Integer),
-
- # punctuation:
- (r'\[|\]|\(|\)|\{|\}|:|@|\.|,', Punctuation),
- (r'=|:|;', Punctuation),
-
- # quote can be transpose, instead of string:
- # (not great, but handles common cases...)
- (r'(?<=[\w)\].])\'+', Operator),
-
- (r'"(""|[^"])*"', String),
-
- (r'(?<![\w)\].])\'', String, 'string'),
- (r'[a-zA-Z_]\w*', Name),
- (r'.', Text),
+ include('expressions')
],
'blockcomment': [
(r'^\s*%\}', Comment.Multiline, '#pop'),
@@ -141,7 +151,26 @@ class MatlabLexer(RegexLexer):
Whitespace, Name.Function, Punctuation, Text,
Punctuation, Whitespace), '#pop'),
# function with no args
- (r'(\s*)([a-zA-Z_]\w*)', bygroups(Text, Name.Function), '#pop'),
+ (r'(\s*)([a-zA-Z_]\w*)',
+ bygroups(Whitespace, Name.Function), '#pop'),
+ ],
+ 'propattrs': [
+ (r'(\w+)(\s*)(=)(\s*)(\d+)',
+ bygroups(Name.Builtin, Whitespace, Punctuation, Whitespace,
+ Number)),
+ (r'(\w+)(\s*)(=)(\s*)([a-zA-Z]\w*)',
+ bygroups(Name.Builtin, Whitespace, Punctuation, Whitespace,
+ Keyword)),
+ (r',', Punctuation),
+ (r'\)', Punctuation, '#pop'),
+ (r'\s+', Whitespace),
+ (r'.', Text),
+ ],
+ 'defprops': [
+ (r'%\{\s*\n', Comment.Multiline, 'blockcomment'),
+ (r'%.*$', Comment),
+ (r'(?<!\.)end\b', Keyword, '#pop'),
+ include('expressions'),
],
'string': [
(r"[^']*'", String, '#pop'),
@@ -153,7 +182,7 @@ class MatlabLexer(RegexLexer):
# equal sign or operator
(r"=", Punctuation, '#pop'),
(_operators, Operator, '#pop'),
- (r"[ \t]+", Text),
+ (r"[ \t]+", Whitespace),
("'[^']*'", String),
(r"[^';\s]+", String),
(";", Punctuation, '#pop'),
@@ -642,7 +671,8 @@ class OctaveLexer(RegexLexer):
Whitespace, Name.Function, Punctuation, Text,
Punctuation, Whitespace), '#pop'),
# function with no args
- (r'(\s*)([a-zA-Z_]\w*)', bygroups(Text, Name.Function), '#pop'),
+ (r'(\s*)([a-zA-Z_]\w*)',
+ bygroups(Whitespace, Name.Function), '#pop'),
],
}
diff --git a/tests/test_matlab.py b/tests/test_matlab.py
index 945a3434..b375c8da 100644
--- a/tests/test_matlab.py
+++ b/tests/test_matlab.py
@@ -36,7 +36,7 @@ def test_single_line(lexer):
(Token.Literal.Number.Integer, '101325'),
(Token.Punctuation, ')'),
(Token.Punctuation, ';'),
- (Token.Text, '\n'),
+ (Token.Text.Whitespace, '\n'),
]
assert list(lexer.get_tokens(fragment)) == tokens
@@ -56,14 +56,14 @@ def test_line_continuation(lexer):
(Token.Literal.Number.Integer, '300'),
(Token.Punctuation, ','),
(Token.Keyword, '...'),
- (Token.Text, '\n'),
+ (Token.Text.Whitespace, '\n'),
(Token.Literal.String, "'"),
(Token.Literal.String, "P'"),
(Token.Punctuation, ','),
(Token.Literal.Number.Integer, '101325'),
(Token.Punctuation, ')'),
(Token.Punctuation, ';'),
- (Token.Text, '\n'),
+ (Token.Text.Whitespace, '\n'),
]
assert list(lexer.get_tokens(fragment)) == tokens
@@ -73,37 +73,29 @@ def test_keywords_ended_by_newline(lexer):
fragment = "if x > 100\n disp('x > 100')\nelse\n disp('x < 100')\nend\n"
tokens = [
(Token.Keyword, 'if'),
- (Token.Text, ' '),
+ (Token.Text.Whitespace, ' '),
(Token.Name, 'x'),
- (Token.Text, ' '),
+ (Token.Text.Whitespace, ' '),
(Token.Operator, '>'),
- (Token.Text, ' '),
+ (Token.Text.Whitespace, ' '),
(Token.Literal.Number.Integer, '100'),
- (Token.Text, '\n'),
- (Token.Text, ' '),
- (Token.Text, ' '),
- (Token.Text, ' '),
- (Token.Text, ' '),
+ (Token.Text.Whitespace, '\n '),
(Token.Name.Builtin, 'disp'),
(Token.Punctuation, '('),
(Token.Literal.String, "'"),
(Token.Literal.String, "x > 100'"),
(Token.Punctuation, ')'),
- (Token.Text, '\n'),
+ (Token.Text.Whitespace, '\n'),
(Token.Keyword, 'else'),
- (Token.Text, '\n'),
- (Token.Text, ' '),
- (Token.Text, ' '),
- (Token.Text, ' '),
- (Token.Text, ' '),
+ (Token.Text.Whitespace, '\n '),
(Token.Name.Builtin, 'disp'),
(Token.Punctuation, '('),
(Token.Literal.String, "'"),
(Token.Literal.String, "x < 100'"),
(Token.Punctuation, ')'),
- (Token.Text, '\n'),
+ (Token.Text.Whitespace, '\n'),
(Token.Keyword, 'end'),
- (Token.Text, '\n'),
+ (Token.Text.Whitespace, '\n'),
]
assert list(lexer.get_tokens(fragment)) == tokens
@@ -123,14 +115,14 @@ def test_comment_after_continuation(lexer):
(Token.Punctuation, ','),
(Token.Keyword, '...'),
(Token.Comment, ' a comment'),
- (Token.Text, '\n'),
+ (Token.Text.Whitespace, '\n'),
(Token.Literal.String, "'"),
(Token.Literal.String, "P'"),
(Token.Punctuation, ','),
(Token.Literal.Number.Integer, '101325'),
(Token.Punctuation, ')'),
(Token.Punctuation, ';'),
- (Token.Text, '\n'),
+ (Token.Text.Whitespace, '\n'),
]
assert list(lexer.get_tokens(fragment)) == tokens
@@ -142,13 +134,12 @@ def test_multiple_spaces_variable_assignment(lexer):
fragment = 'x = 100;\n'
tokens = [
(Token.Name, 'x'),
- (Token.Text, ' '),
- (Token.Text, ' '),
+ (Token.Text.Whitespace, ' '),
(Token.Punctuation, '='),
- (Token.Text, ' '),
+ (Token.Text.Whitespace, ' '),
(Token.Literal.Number.Integer, '100'),
(Token.Punctuation, ';'),
- (Token.Text, '\n'),
+ (Token.Text.Whitespace, '\n'),
]
assert list(lexer.get_tokens(fragment)) == tokens
@@ -160,13 +151,12 @@ def test_operator_multiple_space(lexer):
fragment = 'x > 100;\n'
tokens = [
(Token.Name, 'x'),
- (Token.Text, ' '),
- (Token.Text, ' '),
+ (Token.Text.Whitespace, ' '),
(Token.Operator, '>'),
- (Token.Text, ' '),
+ (Token.Text.Whitespace, ' '),
(Token.Literal.Number.Integer, '100'),
(Token.Punctuation, ';'),
- (Token.Text, '\n'),
+ (Token.Text.Whitespace, '\n'),
]
assert list(lexer.get_tokens(fragment)) == tokens
@@ -176,12 +166,12 @@ def test_one_space_assignment(lexer):
fragment = 'x = 100;\n'
tokens = [
(Token.Name, 'x'),
- (Token.Text, ' '),
+ (Token.Text.Whitespace, ' '),
(Token.Punctuation, '='),
- (Token.Text, ' '),
+ (Token.Text.Whitespace, ' '),
(Token.Literal.Number.Integer, '100'),
(Token.Punctuation, ';'),
- (Token.Text, '\n'),
+ (Token.Text.Whitespace, '\n'),
]
assert list(lexer.get_tokens(fragment)) == tokens
@@ -195,8 +185,121 @@ def test_command_mode(lexer):
fragment = 'help sin\n'
tokens = [
(Token.Name, 'help'),
- (Token.Text, ' '),
+ (Token.Text.Whitespace, ' '),
(Token.Literal.String, 'sin'),
- (Token.Text, '\n'),
+ (Token.Text.Whitespace, '\n'),
]
assert list(lexer.get_tokens(fragment)) == tokens
+
+
+
+MATLAB_SAMPLE_CLASS = """
+classdef Name < dynamicprops
+ properties
+ % i am a comment
+ name1
+ name2
+ end
+ properties (Constant = true, SetAccess = protected)
+ % i too am a comment
+ matrix = [0, 1, 2];
+ string = 'i am a string'
+ end
+ methods
+ % i am also a comment
+ function self = Name()
+ % i am a comment inside a constructor
+ end
+ end
+end
+""".strip()
+
+def test_classes_with_properties(lexer):
+ whitespace = Token.Text.Whitespace
+ tokens = [
+ (Token.Keyword, 'classdef'),
+ (whitespace, ' '),
+ (Token.Name, 'Name'),
+ (whitespace, ' '),
+ (Token.Operator, '<'),
+ (whitespace, ' '),
+ (Token.Keyword, 'dynamicprops'),
+ (whitespace, '\n '),
+ (Token.Keyword, 'properties'),
+ (whitespace, '\n '),
+ (Token.Comment, '% i am a comment'),
+ (whitespace, '\n '),
+ (Token.Name, 'name1'),
+ (whitespace, '\n '),
+ (Token.Name, 'name2'),
+ (whitespace, '\n '),
+ (Token.Keyword, 'end'),
+ (whitespace, '\n '),
+ (Token.Keyword, 'properties'),
+ (whitespace, ' '),
+ (Token.Punctuation, '('),
+ (Token.Name.Builtin, 'Constant'),
+ (whitespace, ' '),
+ (Token.Punctuation, '='),
+ (whitespace, ' '),
+ (Token.Keyword, 'true'),
+ (Token.Punctuation, ','),
+ (whitespace, ' '),
+ (Token.Name.Builtin, 'SetAccess'),
+ (whitespace, ' '),
+ (Token.Punctuation, '='),
+ (whitespace, ' '),
+ (Token.Keyword, 'protected'),
+ (Token.Punctuation, ')'),
+ (whitespace, "\n "),
+ (Token.Comment, '% i too am a comment'),
+ (whitespace, '\n '),
+ (Token.Name, 'matrix'),
+ (whitespace, ' '),
+ (Token.Punctuation, '='),
+ (whitespace, ' '),
+ (Token.Punctuation, '['),
+ (Token.Literal.Number.Integer, '0'),
+ (Token.Punctuation, ','),
+ (whitespace, ' '),
+ (Token.Literal.Number.Integer, '1'),
+ (Token.Punctuation, ','),
+ (whitespace, ' '),
+ (Token.Literal.Number.Integer, '2'),
+ (Token.Punctuation, ']'),
+ (Token.Punctuation, ';'),
+ (whitespace, '\n '),
+ (Token.Name, 'string'),
+ (whitespace, ' '),
+ (Token.Punctuation, '='),
+ (whitespace, ' '),
+ (Token.Literal.String, "'"),
+ (Token.Literal.String, "i am a string'"),
+ (whitespace, '\n '),
+ (Token.Keyword, 'end'),
+ (whitespace, '\n '),
+ (Token.Keyword, 'methods'),
+ (whitespace, '\n '),
+ (Token.Comment, '% i am also a comment'),
+ (whitespace, '\n '),
+ (Token.Keyword, 'function'),
+ (whitespace, ' '),
+ (Token.Text, 'self'),
+ (whitespace, ' '),
+ (Token.Punctuation, '='),
+ (whitespace, ' '),
+ (Token.Name.Function, 'Name'),
+ (Token.Punctuation, '('),
+ (Token.Punctuation, ')'),
+ (whitespace, '\n '),
+ (Token.Comment, '% i am a comment inside a constructor'),
+ (whitespace, '\n '),
+ (Token.Keyword, 'end'),
+ (whitespace, '\n '),
+ (Token.Keyword, 'end'),
+ (whitespace, '\n'),
+ (Token.Keyword, 'end'),
+ (whitespace, '\n'),
+ ]
+ assert list(lexer.get_tokens(MATLAB_SAMPLE_CLASS)) == tokens
+