diff options
author | Bryan W. Weber <bryan.w.weber@gmail.com> | 2020-05-06 14:04:30 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-05-06 20:04:30 +0200 |
commit | 78886ba1b8eda31d3a4092e7bdad2764d0ce00b8 (patch) | |
tree | 4b55eced17c71acaf866240e7d9f512ed7cbc0c8 | |
parent | d090c0be255cc2eef02637e2bebeaab4b5fa9ddd (diff) | |
download | pygments-git-78886ba1b8eda31d3a4092e7bdad2764d0ce00b8.tar.gz |
Matlabsession line continuation (#1399)
* Add explicit line continuation for Matlab session
Matlab lines can be explicitly continued with the ... syntax at the end
of a line. In the Session lexer, this requires continuing to the next
line to add more text. Otherwise, the next line is marked as output.
* The ellipses in Matlab should be a Keyword
The built-in Matlab syntax highlighter highlights ... with the same
formatting as a keyword. Everything after that on the line should be a
comment.
* Update Matlab functions and keywords from R2018a
* Fix many spaces in assignment formatted as string
In command mode, MATLAB allows mutiple space separated arguments to a
function which are interpreted as char arrays, and are formatted as
Strings. This check was also catching cases where there were multiple
spaces following an assignment or comparison operation and formatting
the rest of the line as a string. Now, if an = or operator is found, the
commandargs state is popped and control returns to the root state.
* Add tests for MATLAB formatting
-rw-r--r-- | pygments/lexers/matlab.py | 56 | ||||
-rw-r--r-- | tests/test_matlab.py | 204 |
2 files changed, 244 insertions, 16 deletions
diff --git a/pygments/lexers/matlab.py b/pygments/lexers/matlab.py index d39f1ff1..f7f77ac9 100644 --- a/pygments/lexers/matlab.py +++ b/pygments/lexers/matlab.py @@ -45,30 +45,30 @@ class MatlabLexer(RegexLexer): # specfun: Special Math functions # elmat: Elementary matrices and matrix manipulation # - # taken from Matlab version 7.4.0.336 (R2007a) + # taken from Matlab version 9.4 (R2018a) # elfun = ("sin", "sind", "sinh", "asin", "asind", "asinh", "cos", "cosd", "cosh", "acos", "acosd", "acosh", "tan", "tand", "tanh", "atan", "atand", "atan2", - "atanh", "sec", "secd", "sech", "asec", "asecd", "asech", "csc", "cscd", + "atan2d", "atanh", "sec", "secd", "sech", "asec", "asecd", "asech", "csc", "cscd", "csch", "acsc", "acscd", "acsch", "cot", "cotd", "coth", "acot", "acotd", - "acoth", "hypot", "exp", "expm1", "log", "log1p", "log10", "log2", "pow2", + "acoth", "hypot", "deg2rad", "rad2deg", "exp", "expm1", "log", "log1p", "log10", "log2", "pow2", "realpow", "reallog", "realsqrt", "sqrt", "nthroot", "nextpow2", "abs", "angle", "complex", "conj", "imag", "real", "unwrap", "isreal", "cplxpair", "fix", "floor", "ceil", "round", "mod", "rem", "sign") specfun = ("airy", "besselj", "bessely", "besselh", "besseli", "besselk", "beta", - "betainc", "betaln", "ellipj", "ellipke", "erf", "erfc", "erfcx", - "erfinv", "expint", "gamma", "gammainc", "gammaln", "psi", "legendre", + "betainc", "betaincinv", "betaln", "ellipj", "ellipke", "erf", "erfc", "erfcx", + "erfinv", "erfcinv", "expint", "gamma", "gammainc", "gammaincinv", "gammaln", "psi", "legendre", "cross", "dot", "factor", "isprime", "primes", "gcd", "lcm", "rat", "rats", "perms", "nchoosek", "factorial", "cart2sph", "cart2pol", "pol2cart", "sph2cart", "hsv2rgb", "rgb2hsv") - elmat = ("zeros", "ones", "eye", "repmat", "rand", "randn", "linspace", "logspace", + elmat = ("zeros", "ones", "eye", "repmat", "repelem", "linspace", "logspace", "freqspace", "meshgrid", "accumarray", "size", "length", "ndims", "numel", - "disp", "isempty", "isequal", "isequalwithequalnans", "cat", "reshape", - "diag", "blkdiag", "tril", "triu", "fliplr", "flipud", "flipdim", "rot90", + "disp", "isempty", "isequal", "isequaln", "cat", "reshape", + "diag", "blkdiag", "tril", "triu", "fliplr", "flipud", "flip", "rot90", "find", "end", "sub2ind", "ind2sub", "bsxfun", "ndgrid", "permute", "ipermute", "shiftdim", "circshift", "squeeze", "isscalar", "isvector", - "ans", "eps", "realmax", "realmin", "pi", "i", "inf", "nan", "isnan", - "isinf", "isfinite", "j", "why", "compan", "gallery", "hadamard", "hankel", + "isrow", "iscolumn", "ismatrix", "eps", "realmax", "realmin", "intmax", "intmin", "flintmax", "pi", "i", "inf", "nan", "isnan", + "isinf", "isfinite", "j", "true", "false", "compan", "gallery", "hadamard", "hankel", "hilb", "invhilb", "magic", "pascal", "rosser", "toeplitz", "vander", "wilkinson") @@ -83,13 +83,13 @@ class MatlabLexer(RegexLexer): (r'%.*$', Comment), (r'^\s*function\b', Keyword, 'deffunc'), - # from 'iskeyword' on version 7.11 (R2010): + # from 'iskeyword' on version 9.4 (R2018a): # Check that there is no preceding dot, as keywords are valid field # names. (words(('break', 'case', 'catch', 'classdef', 'continue', 'else', - 'elseif', 'end', 'enumerated', 'events', 'for', 'function', - 'global', 'if', 'methods', 'otherwise', 'parfor', - 'persistent', 'properties', 'return', 'spmd', 'switch', + 'elseif', 'end', 'for', 'function', + 'global', 'if', 'otherwise', 'parfor', + 'persistent', 'return', 'spmd', 'switch', 'try', 'while'), prefix=r'(?<!\.)', suffix=r'\b'), Keyword), @@ -97,7 +97,7 @@ class MatlabLexer(RegexLexer): ("(" + "|".join(elfun + specfun + elmat) + r')\b', Name.Builtin), # line continuation with following comment: - (r'\.\.\..*$', Comment), + (r'(\.\.\.)(.*)$', bygroups(Keyword, Comment)), # command form: # "How MATLAB Recognizes Command Syntax" specifies that an operator @@ -147,6 +147,12 @@ class MatlabLexer(RegexLexer): (r"[^']*'", String, '#pop'), ], 'commandargs': [ + # If an equal sign or other operator is encountered, this + # isn't a command. It might be a variable assignment or + # comparison operation with multiple spaces before the + # equal sign or operator + (r"=", Punctuation, '#pop'), + (_operators, Operator, '#pop'), (r"[ \t]+", Text), ("'[^']*'", String), (r"[^';\s]+", String), @@ -187,6 +193,7 @@ class MatlabSessionLexer(Lexer): curcode = '' insertions = [] + continuation = False for match in line_re.finditer(text): line = match.group() @@ -209,7 +216,17 @@ class MatlabSessionLexer(Lexer): # line = "\n" + line token = (0, Generic.Traceback, line) insertions.append((idx, [token])) - + elif continuation: + # line_start is the length of the most recent prompt symbol + line_start = len(insertions[-1][-1][-1]) + # Set leading spaces with the length of the prompt to be a generic prompt + # This keeps code aligned when prompts are removed, say with some Javascript + if line.startswith(' '*line_start): + insertions.append((len(curcode), + [(0, Generic.Prompt, line[:line_start])])) + curcode += line[line_start:] + else: + curcode += line else: if curcode: for item in do_insertions( @@ -220,6 +237,13 @@ class MatlabSessionLexer(Lexer): yield match.start(), Generic.Output, line + # Does not allow continuation if a comment is included after the ellipses. + # Continues any line that ends with ..., even comments (lines that start with %) + if line.strip().endswith('...'): + continuation = True + else: + continuation = False + if curcode: # or item: for item in do_insertions( insertions, mlexer.get_tokens_unprocessed(curcode)): diff --git a/tests/test_matlab.py b/tests/test_matlab.py new file mode 100644 index 00000000..0ac1df95 --- /dev/null +++ b/tests/test_matlab.py @@ -0,0 +1,204 @@ +# -*- coding: utf-8 -*- +""" + MATLAB Tests + ~~~~~~~~~~~ + + :copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import pytest + +from pygments.token import Token +from pygments.lexers import MatlabLexer + + +@pytest.fixture(scope='module') +def lexer(): + yield MatlabLexer() + + +def test_single_line(lexer): + """ + Test that a single line with strings, a method, and numbers is parsed correctly. + """ + fragment = "set('T',300,'P',101325);\n" + tokens = [ + (Token.Name, 'set'), + (Token.Punctuation, '('), + (Token.Literal.String, "'"), + (Token.Literal.String, "T'"), + (Token.Punctuation, ','), + (Token.Literal.Number.Integer, '300'), + (Token.Punctuation, ','), + (Token.Literal.String, "'"), + (Token.Literal.String, "P'"), + (Token.Punctuation, ','), + (Token.Literal.Number.Integer, '101325'), + (Token.Punctuation, ')'), + (Token.Punctuation, ';'), + (Token.Text, '\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + +def test_line_continuation(lexer): + """ + Test that line continuation by ellipses does not produce generic + output on the second line. + """ + fragment = "set('T',300,...\n'P',101325);\n" + tokens = [ + (Token.Name, 'set'), + (Token.Punctuation, '('), + (Token.Literal.String, "'"), + (Token.Literal.String, "T'"), + (Token.Punctuation, ','), + (Token.Literal.Number.Integer, '300'), + (Token.Punctuation, ','), + (Token.Keyword, '...'), + (Token.Text, '\n'), + (Token.Literal.String, "'"), + (Token.Literal.String, "P'"), + (Token.Punctuation, ','), + (Token.Literal.Number.Integer, '101325'), + (Token.Punctuation, ')'), + (Token.Punctuation, ';'), + (Token.Text, '\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + +def test_keywords_ended_by_newline(lexer): + """Test that keywords on their own line are marked as keywords.""" + fragment = "if x > 100\n disp('x > 100')\nelse\n disp('x < 100')\nend\n" + tokens = [ + (Token.Keyword, 'if'), + (Token.Text, ' '), + (Token.Name, 'x'), + (Token.Text, ' '), + (Token.Operator, '>'), + (Token.Text, ' '), + (Token.Literal.Number.Integer, '100'), + (Token.Text, '\n'), + (Token.Text, ' '), + (Token.Text, ' '), + (Token.Text, ' '), + (Token.Text, ' '), + (Token.Name.Builtin, 'disp'), + (Token.Punctuation, '('), + (Token.Literal.String, "'"), + (Token.Literal.String, "x > 100'"), + (Token.Punctuation, ')'), + (Token.Text, '\n'), + (Token.Keyword, 'else'), + (Token.Text, '\n'), + (Token.Text, ' '), + (Token.Text, ' '), + (Token.Text, ' '), + (Token.Text, ' '), + (Token.Name.Builtin, 'disp'), + (Token.Punctuation, '('), + (Token.Literal.String, "'"), + (Token.Literal.String, "x < 100'"), + (Token.Punctuation, ')'), + (Token.Text, '\n'), + (Token.Keyword, 'end'), + (Token.Text, '\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + +def test_comment_after_continuation(lexer): + """ + Test that text after the line continuation ellipses is marked as a comment. + """ + fragment = "set('T',300,... a comment\n'P',101325);\n" + tokens = [ + (Token.Name, 'set'), + (Token.Punctuation, '('), + (Token.Literal.String, "'"), + (Token.Literal.String, "T'"), + (Token.Punctuation, ','), + (Token.Literal.Number.Integer, '300'), + (Token.Punctuation, ','), + (Token.Keyword, '...'), + (Token.Comment, ' a comment'), + (Token.Text, '\n'), + (Token.Literal.String, "'"), + (Token.Literal.String, "P'"), + (Token.Punctuation, ','), + (Token.Literal.Number.Integer, '101325'), + (Token.Punctuation, ')'), + (Token.Punctuation, ';'), + (Token.Text, '\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + +def test_multiple_spaces_variable_assignment(lexer): + """ + Test that multiple spaces with an equal sign doesn't get formatted to a string. + """ + fragment = 'x = 100;\n' + tokens = [ + (Token.Name, 'x'), + (Token.Text, ' '), + (Token.Text, ' '), + (Token.Punctuation, '='), + (Token.Text, ' '), + (Token.Literal.Number.Integer, '100'), + (Token.Punctuation, ';'), + (Token.Text, '\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + +def test_operator_multiple_space(lexer): + """ + Test that multiple spaces with an operator doesn't get formatted to a string. + """ + fragment = 'x > 100;\n' + tokens = [ + (Token.Name, 'x'), + (Token.Text, ' '), + (Token.Text, ' '), + (Token.Operator, '>'), + (Token.Text, ' '), + (Token.Literal.Number.Integer, '100'), + (Token.Punctuation, ';'), + (Token.Text, '\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + +def test_one_space_assignment(lexer): + """Test that one space before an equal sign is formatted correctly.""" + fragment = 'x = 100;\n' + tokens = [ + (Token.Name, 'x'), + (Token.Text, ' '), + (Token.Punctuation, '='), + (Token.Text, ' '), + (Token.Literal.Number.Integer, '100'), + (Token.Punctuation, ';'), + (Token.Text, '\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + +def test_command_mode(lexer): + """ + MATLAB allows char function arguments to not be enclosed by parentheses + or contain quote characters, as long as they are space separated. Test + that one common such function is formatted appropriately. + """ + fragment = 'help sin\n' + tokens = [ + (Token.Name, 'help'), + (Token.Text, ' '), + (Token.Literal.String, 'sin'), + (Token.Punctuation, ''), + (Token.Text, '\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens |