Matlabsession line continuation (#1399)

* Add explicit line continuation for Matlab session Matlab lines can be explicitly continued with the ... syntax at the end of a line. In the Session lexer, this requires continuing to the next line to add more text. Otherwise, the next line is marked as output. * The ellipses in Matlab should be a Keyword The built-in Matlab syntax highlighter highlights ... with the same formatting as a keyword. Everything after that on the line should be a comment. * Update Matlab functions and keywords from R2018a * Fix many spaces in assignment formatted as string In command mode, MATLAB allows mutiple space separated arguments to a function which are interpreted as char arrays, and are formatted as Strings. This check was also catching cases where there were multiple spaces following an assignment or comparison operation and formatting the rest of the line as a string. Now, if an = or operator is found, the commandargs state is popped and control returns to the root state. * Add tests for MATLAB formatting
author: Bryan W. Weber <bryan.w.weber@gmail.com> 2020-05-06 14:04:30 -0400
committer: GitHub <noreply@github.com> 2020-05-06 20:04:30 +0200
commit: 78886ba1b8eda31d3a4092e7bdad2764d0ce00b8 (patch)
tree: 4b55eced17c71acaf866240e7d9f512ed7cbc0c8
parent: d090c0be255cc2eef02637e2bebeaab4b5fa9ddd (diff)
download: pygments-git-78886ba1b8eda31d3a4092e7bdad2764d0ce00b8.tar.gz
2 files changed, 244 insertions, 16 deletions
diff --git a/pygments/lexers/matlab.py b/pygments/lexers/matlab.py
index d39f1ff1..f7f77ac9 100644
--- a/pygments/lexers/matlab.py
+++ b/pygments/lexers/matlab.py
@@ -45,30 +45,30 @@ class MatlabLexer(RegexLexer):
     # specfun: Special Math functions
     # elmat: Elementary matrices and matrix manipulation
     #
-    # taken from Matlab version 7.4.0.336 (R2007a)
+    # taken from Matlab version 9.4 (R2018a)
     #
     elfun = ("sin", "sind", "sinh", "asin", "asind", "asinh", "cos", "cosd", "cosh",
              "acos", "acosd", "acosh", "tan", "tand", "tanh", "atan", "atand", "atan2",
-             "atanh", "sec", "secd", "sech", "asec", "asecd", "asech", "csc", "cscd",
+             "atan2d", "atanh", "sec", "secd", "sech", "asec", "asecd", "asech", "csc", "cscd",
              "csch", "acsc", "acscd", "acsch", "cot", "cotd", "coth", "acot", "acotd",
-             "acoth", "hypot", "exp", "expm1", "log", "log1p", "log10", "log2", "pow2",
+             "acoth", "hypot", "deg2rad", "rad2deg", "exp", "expm1", "log", "log1p", "log10", "log2", "pow2",
              "realpow", "reallog", "realsqrt", "sqrt", "nthroot", "nextpow2", "abs",
              "angle", "complex", "conj", "imag", "real", "unwrap", "isreal", "cplxpair",
              "fix", "floor", "ceil", "round", "mod", "rem", "sign")
     specfun = ("airy", "besselj", "bessely", "besselh", "besseli", "besselk", "beta",
-               "betainc", "betaln", "ellipj", "ellipke", "erf", "erfc", "erfcx",
-               "erfinv", "expint", "gamma", "gammainc", "gammaln", "psi", "legendre",
+               "betainc", "betaincinv", "betaln", "ellipj", "ellipke", "erf", "erfc", "erfcx",
+               "erfinv", "erfcinv", "expint", "gamma", "gammainc", "gammaincinv", "gammaln", "psi", "legendre",
                "cross", "dot", "factor", "isprime", "primes", "gcd", "lcm", "rat",
                "rats", "perms", "nchoosek", "factorial", "cart2sph", "cart2pol",
                "pol2cart", "sph2cart", "hsv2rgb", "rgb2hsv")
-    elmat = ("zeros", "ones", "eye", "repmat", "rand", "randn", "linspace", "logspace",
+    elmat = ("zeros", "ones", "eye", "repmat", "repelem", "linspace", "logspace",
              "freqspace", "meshgrid", "accumarray", "size", "length", "ndims", "numel",
-             "disp", "isempty", "isequal", "isequalwithequalnans", "cat", "reshape",
-             "diag", "blkdiag", "tril", "triu", "fliplr", "flipud", "flipdim", "rot90",
+             "disp", "isempty", "isequal", "isequaln", "cat", "reshape",
+             "diag", "blkdiag", "tril", "triu", "fliplr", "flipud", "flip", "rot90",
              "find", "end", "sub2ind", "ind2sub", "bsxfun", "ndgrid", "permute",
              "ipermute", "shiftdim", "circshift", "squeeze", "isscalar", "isvector",
-             "ans", "eps", "realmax", "realmin", "pi", "i", "inf", "nan", "isnan",
-             "isinf", "isfinite", "j", "why", "compan", "gallery", "hadamard", "hankel",
+             "isrow", "iscolumn", "ismatrix", "eps", "realmax", "realmin", "intmax", "intmin", "flintmax", "pi", "i", "inf", "nan", "isnan",
+             "isinf", "isfinite", "j", "true", "false", "compan", "gallery", "hadamard", "hankel",
              "hilb", "invhilb", "magic", "pascal", "rosser", "toeplitz", "vander",
              "wilkinson")
 
@@ -83,13 +83,13 @@ class MatlabLexer(RegexLexer):
             (r'%.*$', Comment),
             (r'^\s*function\b', Keyword, 'deffunc'),
 
-            # from 'iskeyword' on version 7.11 (R2010):
+            # from 'iskeyword' on version 9.4 (R2018a):
             # Check that there is no preceding dot, as keywords are valid field
             # names.
             (words(('break', 'case', 'catch', 'classdef', 'continue', 'else',
-                    'elseif', 'end', 'enumerated', 'events', 'for', 'function',
-                    'global', 'if', 'methods', 'otherwise', 'parfor',
-                    'persistent', 'properties', 'return', 'spmd', 'switch',
+                    'elseif', 'end', 'for', 'function',
+                    'global', 'if', 'otherwise', 'parfor',
+                    'persistent', 'return', 'spmd', 'switch',
                     'try', 'while'),
                    prefix=r'(?<!\.)', suffix=r'\b'),
              Keyword),
@@ -97,7 +97,7 @@ class MatlabLexer(RegexLexer):
             ("(" + "|".join(elfun + specfun + elmat) + r')\b',  Name.Builtin),
 
             # line continuation with following comment:
-            (r'\.\.\..*$', Comment),
+            (r'(\.\.\.)(.*)$', bygroups(Keyword, Comment)),
 
             # command form:
             # "How MATLAB Recognizes Command Syntax" specifies that an operator
@@ -147,6 +147,12 @@ class MatlabLexer(RegexLexer):
             (r"[^']*'", String, '#pop'),
         ],
         'commandargs': [
+            # If an equal sign or other operator is encountered, this
+            # isn't a command. It might be a variable assignment or
+            # comparison operation with multiple spaces before the
+            # equal sign or operator
+            (r"=", Punctuation, '#pop'),
+            (_operators, Operator, '#pop'),
             (r"[ \t]+", Text),
             ("'[^']*'", String),
             (r"[^';\s]+", String),
@@ -187,6 +193,7 @@ class MatlabSessionLexer(Lexer):
 
         curcode = ''
         insertions = []
+        continuation = False
 
         for match in line_re.finditer(text):
             line = match.group()
@@ -209,7 +216,17 @@ class MatlabSessionLexer(Lexer):
                 # line = "\n" + line
                 token = (0, Generic.Traceback, line)
                 insertions.append((idx, [token]))
-
+            elif continuation:
+                # line_start is the length of the most recent prompt symbol
+                line_start = len(insertions[-1][-1][-1])
+                # Set leading spaces with the length of the prompt to be a generic prompt
+                # This keeps code aligned when prompts are removed, say with some Javascript
+                if line.startswith(' '*line_start):
+                    insertions.append((len(curcode),
+                                    [(0, Generic.Prompt, line[:line_start])]))
+                    curcode += line[line_start:]
+                else:
+                    curcode += line
             else:
                 if curcode:
                     for item in do_insertions(
@@ -220,6 +237,13 @@ class MatlabSessionLexer(Lexer):
 
                 yield match.start(), Generic.Output, line
 
+            # Does not allow continuation if a comment is included after the ellipses.
+            # Continues any line that ends with ..., even comments (lines that start with %)
+            if line.strip().endswith('...'):
+                continuation = True
+            else:
+                continuation = False
+
         if curcode:  # or item:
             for item in do_insertions(
                     insertions, mlexer.get_tokens_unprocessed(curcode)):
diff --git a/tests/test_matlab.py b/tests/test_matlab.py
new file mode 100644
index 00000000..0ac1df95
--- /dev/null
+++ b/tests/test_matlab.py
@@ -0,0 +1,204 @@
+# -*- coding: utf-8 -*-
+"""
+    MATLAB Tests
+    ~~~~~~~~~~~
+
+    :copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+
+import pytest
+
+from pygments.token import Token
+from pygments.lexers import MatlabLexer
+
+
+@pytest.fixture(scope='module')
+def lexer():
+    yield MatlabLexer()
+
+
+def test_single_line(lexer):
+    """
+    Test that a single line with strings, a method, and numbers is parsed correctly.
+    """
+    fragment = "set('T',300,'P',101325);\n"
+    tokens = [
+        (Token.Name, 'set'),
+        (Token.Punctuation, '('),
+        (Token.Literal.String, "'"),
+        (Token.Literal.String, "T'"),
+        (Token.Punctuation, ','),
+        (Token.Literal.Number.Integer, '300'),
+        (Token.Punctuation, ','),
+        (Token.Literal.String, "'"),
+        (Token.Literal.String, "P'"),
+        (Token.Punctuation, ','),
+        (Token.Literal.Number.Integer, '101325'),
+        (Token.Punctuation, ')'),
+        (Token.Punctuation, ';'),
+        (Token.Text, '\n'),
+    ]
+    assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_line_continuation(lexer):
+    """
+    Test that line continuation by ellipses does not produce generic
+    output on the second line.
+    """
+    fragment = "set('T',300,...\n'P',101325);\n"
+    tokens = [
+        (Token.Name, 'set'),
+        (Token.Punctuation, '('),
+        (Token.Literal.String, "'"),
+        (Token.Literal.String, "T'"),
+        (Token.Punctuation, ','),
+        (Token.Literal.Number.Integer, '300'),
+        (Token.Punctuation, ','),
+        (Token.Keyword, '...'),
+        (Token.Text, '\n'),
+        (Token.Literal.String, "'"),
+        (Token.Literal.String, "P'"),
+        (Token.Punctuation, ','),
+        (Token.Literal.Number.Integer, '101325'),
+        (Token.Punctuation, ')'),
+        (Token.Punctuation, ';'),
+        (Token.Text, '\n'),
+    ]
+    assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_keywords_ended_by_newline(lexer):
+    """Test that keywords on their own line are marked as keywords."""
+    fragment = "if x > 100\n    disp('x > 100')\nelse\n    disp('x < 100')\nend\n"
+    tokens = [
+        (Token.Keyword, 'if'),
+        (Token.Text, ' '),
+        (Token.Name, 'x'),
+        (Token.Text, ' '),
+        (Token.Operator, '>'),
+        (Token.Text, ' '),
+        (Token.Literal.Number.Integer, '100'),
+        (Token.Text, '\n'),
+        (Token.Text, ' '),
+        (Token.Text, ' '),
+        (Token.Text, ' '),
+        (Token.Text, ' '),
+        (Token.Name.Builtin, 'disp'),
+        (Token.Punctuation, '('),
+        (Token.Literal.String, "'"),
+        (Token.Literal.String, "x > 100'"),
+        (Token.Punctuation, ')'),
+        (Token.Text, '\n'),
+        (Token.Keyword, 'else'),
+        (Token.Text, '\n'),
+        (Token.Text, ' '),
+        (Token.Text, ' '),
+        (Token.Text, ' '),
+        (Token.Text, ' '),
+        (Token.Name.Builtin, 'disp'),
+        (Token.Punctuation, '('),
+        (Token.Literal.String, "'"),
+        (Token.Literal.String, "x < 100'"),
+        (Token.Punctuation, ')'),
+        (Token.Text, '\n'),
+        (Token.Keyword, 'end'),
+        (Token.Text, '\n'),
+    ]
+    assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_comment_after_continuation(lexer):
+    """
+    Test that text after the line continuation ellipses is marked as a comment.
+    """
+    fragment = "set('T',300,... a comment\n'P',101325);\n"
+    tokens = [
+        (Token.Name, 'set'),
+        (Token.Punctuation, '('),
+        (Token.Literal.String, "'"),
+        (Token.Literal.String, "T'"),
+        (Token.Punctuation, ','),
+        (Token.Literal.Number.Integer, '300'),
+        (Token.Punctuation, ','),
+        (Token.Keyword, '...'),
+        (Token.Comment, ' a comment'),
+        (Token.Text, '\n'),
+        (Token.Literal.String, "'"),
+        (Token.Literal.String, "P'"),
+        (Token.Punctuation, ','),
+        (Token.Literal.Number.Integer, '101325'),
+        (Token.Punctuation, ')'),
+        (Token.Punctuation, ';'),
+        (Token.Text, '\n'),
+    ]
+    assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_multiple_spaces_variable_assignment(lexer):
+    """
+    Test that multiple spaces with an equal sign doesn't get formatted to a string.
+    """
+    fragment = 'x  = 100;\n'
+    tokens = [
+        (Token.Name, 'x'),
+        (Token.Text, ' '),
+        (Token.Text, ' '),
+        (Token.Punctuation, '='),
+        (Token.Text, ' '),
+        (Token.Literal.Number.Integer, '100'),
+        (Token.Punctuation, ';'),
+        (Token.Text, '\n'),
+    ]
+    assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_operator_multiple_space(lexer):
+    """
+    Test that multiple spaces with an operator doesn't get formatted to a string.
+    """
+    fragment = 'x  > 100;\n'
+    tokens = [
+        (Token.Name, 'x'),
+        (Token.Text, ' '),
+        (Token.Text, ' '),
+        (Token.Operator, '>'),
+        (Token.Text, ' '),
+        (Token.Literal.Number.Integer, '100'),
+        (Token.Punctuation, ';'),
+        (Token.Text, '\n'),
+    ]
+    assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_one_space_assignment(lexer):
+    """Test that one space before an equal sign is formatted correctly."""
+    fragment = 'x = 100;\n'
+    tokens = [
+        (Token.Name, 'x'),
+        (Token.Text, ' '),
+        (Token.Punctuation, '='),
+        (Token.Text, ' '),
+        (Token.Literal.Number.Integer, '100'),
+        (Token.Punctuation, ';'),
+        (Token.Text, '\n'),
+    ]
+    assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_command_mode(lexer):
+    """
+    MATLAB allows char function arguments to not be enclosed by parentheses
+    or contain quote characters, as long as they are space separated. Test
+    that one common such function is formatted appropriately.
+    """
+    fragment = 'help sin\n'
+    tokens = [
+        (Token.Name, 'help'),
+        (Token.Text, ' '),
+        (Token.Literal.String, 'sin'),
+        (Token.Punctuation, ''),
+        (Token.Text, '\n'),
+    ]
+    assert list(lexer.get_tokens(fragment)) == tokens
author	Bryan W. Weber <bryan.w.weber@gmail.com>	2020-05-06 14:04:30 -0400
committer	GitHub <noreply@github.com>	2020-05-06 20:04:30 +0200
commit	78886ba1b8eda31d3a4092e7bdad2764d0ce00b8 (patch)
tree	4b55eced17c71acaf866240e7d9f512ed7cbc0c8
parent	d090c0be255cc2eef02637e2bebeaab4b5fa9ddd (diff)
download	pygments-git-78886ba1b8eda31d3a4092e7bdad2764d0ce00b8.tar.gz