Add GDScript lexer (#1457)

* Added GDScript lexer * Fix regular expressions in GDScript lexer * Update GDScript lexer with the current version from Godot docs * Add tests for GDScript lexer * Update authors * Add an example file for GDScript * Implement analyze_text for GAP and GDScript * Fix example file name in tests * Update license Co-authored-by: Daniel J. Ramirez <djrmuv@gmail.com>
author: Paweł Fertyk <pfertyk@users.noreply.github.com> 2020-06-01 14:48:05 +0200
committer: GitHub <noreply@github.com> 2020-06-01 14:48:05 +0200
commit: e5dc231aa0d780395436e55c621e22dacfaf97de (patch)
tree: 55adfa89291b5695b24652d86f9a85730032a14d
parent: 231919b4ec7d6d0cb23940d414dd03d262d6a048 (diff)
download: pygments-git-e5dc231aa0d780395436e55c621e22dacfaf97de.tar.gz
7 files changed, 639 insertions, 2 deletions
diff --git a/AUTHORS b/AUTHORS
index 269ee3ff..76da176c 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -56,6 +56,7 @@ Other contributors, listed alphabetically, are:
 * Sven Efftinge -- Xtend lexer
 * Artem Egorkine -- terminal256 formatter
 * Matthew Fernandez -- CAmkES lexer
+* Paweł Fertyk -- GDScript lexer
 * Michael Ficarra -- CPSA lexer
 * James H. Fisher -- PostScript lexer
 * William S. Fulton -- SWIG lexer
@@ -169,6 +170,7 @@ Other contributors, listed alphabetically, are:
 * Xidorn Quan -- Web IDL lexer
 * Elias Rabel -- Fortran fixed form lexer
 * raichoo -- Idris lexer
+* Daniel Ramirez -- GDScript lexer
 * Kashif Rasul -- CUDA lexer
 * Nathan Reed -- HLSL lexer
 * Justin Reidy -- MXML lexer
diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py
index 5dac02e4..767532e7 100644
--- a/pygments/lexers/_mapping.py
+++ b/pygments/lexers/_mapping.py
@@ -169,6 +169,7 @@ LEXERS = {
     'FoxProLexer': ('pygments.lexers.foxpro', 'FoxPro', ('foxpro', 'vfp', 'clipper', 'xbase'), ('*.PRG', '*.prg'), ()),
     'FreeFemLexer': ('pygments.lexers.freefem', 'Freefem', ('freefem',), ('*.edp',), ('text/x-freefem',)),
     'GAPLexer': ('pygments.lexers.algebra', 'GAP', ('gap',), ('*.g', '*.gd', '*.gi', '*.gap'), ()),
+    'GDScriptLexer': ('pygments.lexers.gdscript', 'GDScript', ('gdscript', 'gd'), ('*.gd',), ('text/x-gdscript', 'application/x-gdscript')),
     'GLShaderLexer': ('pygments.lexers.graphics', 'GLSL', ('glsl',), ('*.vert', '*.frag', '*.geo'), ('text/x-glslsrc',)),
     'GasLexer': ('pygments.lexers.asm', 'GAS', ('gas', 'asm'), ('*.s', '*.S'), ('text/x-gas',)),
     'GenshiLexer': ('pygments.lexers.templates', 'Genshi', ('genshi', 'kid', 'xml+genshi', 'xml+kid'), ('*.kid',), ('application/x-genshi', 'application/x-kid')),
diff --git a/pygments/lexers/algebra.py b/pygments/lexers/algebra.py
index ba2a61a9..aa1dea77 100644
--- a/pygments/lexers/algebra.py
+++ b/pygments/lexers/algebra.py
@@ -68,6 +68,25 @@ class GAPLexer(RegexLexer):
         ],
     }
 
+    def analyse_text(text):
+        score = 0.0
+
+        # Declaration part
+        if re.search(
+            r"(InstallTrueMethod|Declare(Attribute|Category|Filter|Operation" +
+            r"|GlobalFunction|Synonym|SynonymAttr|Property))", text
+        ):
+            score += 0.7
+
+        # Implementation part
+        if re.search(
+            r"(DeclareRepresentation|Install(GlobalFunction|Method|" +
+            r"ImmediateMethod|OtherMethod)|New(Family|Type)|Objectify)", text
+        ):
+            score += 0.7
+
+        return min(score, 1.0)
+
 
 class MathematicaLexer(RegexLexer):
     """
diff --git a/pygments/lexers/gdscript.py b/pygments/lexers/gdscript.py
new file mode 100644
index 00000000..8dec78fb
--- /dev/null
+++ b/pygments/lexers/gdscript.py
@@ -0,0 +1,359 @@
+# -*- coding: utf-8 -*-
+"""
+    pygments.lexers.gdscript
+    ~~~~~~~~~~~~~~~~~~~~~~
+
+    Lexer for GDScript.
+
+    :copyright: Copyright 2xxx by The Godot Engine Community
+    :license: BSD, see LICENSE for details.
+
+    modified by Daniel J. Ramirez <djrmuv@gmail.com> based on the original python.py pygment
+"""
+
+import re
+
+from pygments.lexer import (
+    RegexLexer,
+    include,
+    bygroups,
+    default,
+    words,
+    combined,
+)
+from pygments.token import (
+    Text,
+    Comment,
+    Operator,
+    Keyword,
+    Name,
+    String,
+    Number,
+    Punctuation,
+)
+
+__all__ = ["GDScriptLexer"]
+
+line_re = re.compile(".*?\n")
+
+
+class GDScriptLexer(RegexLexer):
+    """
+    For `GDScript source code <https://www.godotengine.org>`_.
+    """
+
+    name = "GDScript"
+    aliases = ["gdscript", "gd"]
+    filenames = ["*.gd"]
+    mimetypes = ["text/x-gdscript", "application/x-gdscript"]
+
+    def innerstring_rules(ttype):
+        return [
+            # the old style '%s' % (...) string formatting
+            (
+                r"%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?"
+                "[hlL]?[E-GXc-giorsux%]",
+                String.Interpol,
+            ),
+            # backslashes, quotes and formatting signs must be parsed one at a time
+            (r'[^\\\'"%\n]+', ttype),
+            (r'[\'"\\]', ttype),
+            # unhandled string formatting sign
+            (r"%", ttype),
+            # newlines are an error (use "nl" state)
+        ]
+
+    tokens = {
+        "root": [
+            (r"\n", Text),
+            (
+                r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")',
+                bygroups(Text, String.Affix, String.Doc),
+            ),
+            (
+                r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')",
+                bygroups(Text, String.Affix, String.Doc),
+            ),
+            (r"[^\S\n]+", Text),
+            (r"#.*$", Comment.Single),
+            (r"[]{}:(),;[]", Punctuation),
+            (r"\\\n", Text),
+            (r"\\", Text),
+            (r"(in|and|or|not)\b", Operator.Word),
+            (
+                r"!=|==|<<|>>|&&|\+=|-=|\*=|/=|%=|&=|\|=|\|\||[-~+/*%=<>&^.!|$]",
+                Operator,
+            ),
+            include("keywords"),
+            (r"(func)((?:\s|\\\s)+)", bygroups(Keyword, Text), "funcname"),
+            (r"(class)((?:\s|\\\s)+)", bygroups(Keyword, Text), "classname"),
+            include("builtins"),
+            (
+                '([rR]|[uUbB][rR]|[rR][uUbB])(""")',
+                bygroups(String.Affix, String.Double),
+                "tdqs",
+            ),
+            (
+                "([rR]|[uUbB][rR]|[rR][uUbB])(''')",
+                bygroups(String.Affix, String.Single),
+                "tsqs",
+            ),
+            (
+                '([rR]|[uUbB][rR]|[rR][uUbB])(")',
+                bygroups(String.Affix, String.Double),
+                "dqs",
+            ),
+            (
+                "([rR]|[uUbB][rR]|[rR][uUbB])(')",
+                bygroups(String.Affix, String.Single),
+                "sqs",
+            ),
+            (
+                '([uUbB]?)(""")',
+                bygroups(String.Affix, String.Double),
+                combined("stringescape", "tdqs"),
+            ),
+            (
+                "([uUbB]?)(''')",
+                bygroups(String.Affix, String.Single),
+                combined("stringescape", "tsqs"),
+            ),
+            (
+                '([uUbB]?)(")',
+                bygroups(String.Affix, String.Double),
+                combined("stringescape", "dqs"),
+            ),
+            (
+                "([uUbB]?)(')",
+                bygroups(String.Affix, String.Single),
+                combined("stringescape", "sqs"),
+            ),
+            include("name"),
+            include("numbers"),
+        ],
+        "keywords": [
+            (
+                words(
+                    (
+                        "and",
+                        "in",
+                        "not",
+                        "or",
+                        "as",
+                        "breakpoint",
+                        "class",
+                        "class_name",
+                        "extends",
+                        "is",
+                        "func",
+                        "setget",
+                        "signal",
+                        "tool",
+                        "const",
+                        "enum",
+                        "export",
+                        "onready",
+                        "static",
+                        "var",
+                        "break",
+                        "continue",
+                        "if",
+                        "elif",
+                        "else",
+                        "for",
+                        "pass",
+                        "return",
+                        "match",
+                        "while",
+                        "remote",
+                        "master",
+                        "puppet",
+                        "remotesync",
+                        "mastersync",
+                        "puppetsync",
+                    ),
+                    suffix=r"\b",
+                ),
+                Keyword,
+            ),
+        ],
+        "builtins": [
+            (
+                words(
+                    (
+                        "Color8",
+                        "ColorN",
+                        "abs",
+                        "acos",
+                        "asin",
+                        "assert",
+                        "atan",
+                        "atan2",
+                        "bytes2var",
+                        "ceil",
+                        "char",
+                        "clamp",
+                        "convert",
+                        "cos",
+                        "cosh",
+                        "db2linear",
+                        "decimals",
+                        "dectime",
+                        "deg2rad",
+                        "dict2inst",
+                        "ease",
+                        "exp",
+                        "floor",
+                        "fmod",
+                        "fposmod",
+                        "funcref",
+                        "hash",
+                        "inst2dict",
+                        "instance_from_id",
+                        "is_inf",
+                        "is_nan",
+                        "lerp",
+                        "linear2db",
+                        "load",
+                        "log",
+                        "max",
+                        "min",
+                        "nearest_po2",
+                        "pow",
+                        "preload",
+                        "print",
+                        "print_stack",
+                        "printerr",
+                        "printraw",
+                        "prints",
+                        "printt",
+                        "rad2deg",
+                        "rand_range",
+                        "rand_seed",
+                        "randf",
+                        "randi",
+                        "randomize",
+                        "range",
+                        "round",
+                        "seed",
+                        "sign",
+                        "sin",
+                        "sinh",
+                        "sqrt",
+                        "stepify",
+                        "str",
+                        "str2var",
+                        "tan",
+                        "tan",
+                        "tanh",
+                        "type_exist",
+                        "typeof",
+                        "var2bytes",
+                        "var2str",
+                        "weakref",
+                        "yield",
+                    ),
+                    prefix=r"(?<!\.)",
+                    suffix=r"\b",
+                ),
+                Name.Builtin,
+            ),
+            (r"((?<!\.)(self|false|true)|(PI|TAU|NAN|INF)" r")\b", Name.Builtin.Pseudo),
+            (
+                words(
+                    (
+                        "bool",
+                        "int",
+                        "float",
+                        "String",
+                        "NodePath",
+                        "Vector2",
+                        "Rect2",
+                        "Transform2D",
+                        "Vector3",
+                        "Rect3",
+                        "Plane",
+                        "Quat",
+                        "Basis",
+                        "Transform",
+                        "Color",
+                        "RID",
+                        "Object",
+                        "NodePath",
+                        "Dictionary",
+                        "Array",
+                        "PackedByteArray",
+                        "PackedInt32Array",
+                        "PackedInt64Array",
+                        "PackedFloat32Array",
+                        "PackedFloat64Array",
+                        "PackedStringArray",
+                        "PackedVector2Array",
+                        "PackedVector3Array",
+                        "PackedColorArray",
+                        "null",
+                    ),
+                    prefix=r"(?<!\.)",
+                    suffix=r"\b",
+                ),
+                Name.Builtin.Type,
+            ),
+        ],
+        "numbers": [
+            (r"(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?j?", Number.Float),
+            (r"\d+[eE][+-]?[0-9]+j?", Number.Float),
+            (r"0[xX][a-fA-F0-9]+", Number.Hex),
+            (r"\d+j?", Number.Integer),
+        ],
+        "name": [(r"[a-zA-Z_]\w*", Name)],
+        "funcname": [(r"[a-zA-Z_]\w*", Name.Function, "#pop"), default("#pop")],
+        "classname": [(r"[a-zA-Z_]\w*", Name.Class, "#pop")],
+        "stringescape": [
+            (
+                r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|'
+                r"U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})",
+                String.Escape,
+            )
+        ],
+        "strings-single": innerstring_rules(String.Single),
+        "strings-double": innerstring_rules(String.Double),
+        "dqs": [
+            (r'"', String.Double, "#pop"),
+            (r'\\\\|\\"|\\\n', String.Escape),  # included here for raw strings
+            include("strings-double"),
+        ],
+        "sqs": [
+            (r"'", String.Single, "#pop"),
+            (r"\\\\|\\'|\\\n", String.Escape),  # included here for raw strings
+            include("strings-single"),
+        ],
+        "tdqs": [
+            (r'"""', String.Double, "#pop"),
+            include("strings-double"),
+            (r"\n", String.Double),
+        ],
+        "tsqs": [
+            (r"'''", String.Single, "#pop"),
+            include("strings-single"),
+            (r"\n", String.Single),
+        ],
+    }
+
+    def analyse_text(text):
+        score = 0.0
+
+        if re.search(
+            r"func (_ready|_init|_input|_process|_unhandled_input)", text
+        ):
+            score += 0.8
+
+        if re.search(
+            r"(extends |class_name |onready |preload|load|setget|func [^_])",
+            text
+        ):
+            score += 0.4
+
+        if re.search(r"(var|const|enum|export|signal|tool)", text):
+            score += 0.2
+
+        return min(score, 1.0)
diff --git a/tests/examplefiles/gdscript_example.gd b/tests/examplefiles/gdscript_example.gd
new file mode 100644
index 00000000..d1043fc3
--- /dev/null
+++ b/tests/examplefiles/gdscript_example.gd
@@ -0,0 +1,77 @@
+# A file is a class!
+
+# Inheritance
+
+extends BaseClass
+
+# (optional) class definition with a custom icon
+
+class_name MyClass, "res://path/to/optional/icon.svg"
+
+
+# Member variables
+
+var a = 5
+var s = "Hello"
+var arr = [1, 2, 3]
+var dict = {"key": "value", 2: 3}
+var typed_var: int
+var inferred_type := "String"
+
+# Constants
+
+const ANSWER = 42
+const THE_NAME = "Charly"
+
+# Enums
+
+enum {UNIT_NEUTRAL, UNIT_ENEMY, UNIT_ALLY}
+enum Named {THING_1, THING_2, ANOTHER_THING = -1}
+
+# Built-in vector types
+
+var v2 = Vector2(1, 2)
+var v3 = Vector3(1, 2, 3)
+
+
+# Function
+
+func some_function(param1, param2):
+    var local_var = 5
+
+    if param1 < local_var:
+        print(param1)
+    elif param2 > 5:
+        print(param2)
+    else:
+        print("Fail!")
+
+    for i in range(20):
+        print(i)
+
+    while param2 != 0:
+        param2 -= 1
+
+    var local_var2 = param1 + 3
+    return local_var2
+
+
+# Functions override functions with the same name on the base/parent class.
+# If you still want to call them, use '.' (like 'super' in other languages).
+
+func something(p1, p2):
+    .something(p1, p2)
+
+
+# Inner class
+
+class Something:
+    var a = 10
+
+
+# Constructor
+
+func _init():
+    print("Constructed!")
+    var lv = Something.new()
+    print(lv.a)
diff --git a/tests/test_gdscript.py b/tests/test_gdscript.py
new file mode 100644
index 00000000..d52d8ff8
--- /dev/null
+++ b/tests/test_gdscript.py
@@ -0,0 +1,167 @@
+# -*- coding: utf-8 -*-
+"""
+    GDScript Tests
+    ~~~~~~~~~~~~~~
+
+    :copyright: Copyright 2020 by the Pygments team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+
+import pytest
+
+from pygments.lexers import GDScriptLexer
+from pygments.token import Token
+
+
+@pytest.fixture(scope="module")
+def lexer():
+    yield GDScriptLexer()
+
+
+def test_variable_declaration_and_assigment(lexer):
+    fragment = "var abc = 5.4"
+    tokens = [
+        (Token.Keyword, "var"),
+        (Token.Text, " "),
+        (Token.Name, "abc"),
+        (Token.Text, " "),
+        (Token.Operator, "="),
+        (Token.Text, " "),
+        (Token.Number.Float, "5.4"),
+        (Token.Text, "\n"),
+    ]
+    assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_simple_function(lexer):
+    fragment = "func abc(arg):\n\tprint(\"Hello, World!\")"
+    tokens = [
+        (Token.Keyword, "func"),
+        (Token.Text, " "),
+        (Token.Name, "abc"),
+        (Token.Punctuation, "("),
+        (Token.Name, "arg"),
+        (Token.Punctuation, ")"),
+        (Token.Punctuation, ":"),
+        (Token.Text, "\n"),
+        (Token.Text, "\t"),
+        (Token.Name.Builtin, "print"),
+        (Token.Punctuation, "("),
+        (Token.Literal.String.Double, "\""),
+        (Token.Literal.String.Double, "Hello, World!"),
+        (Token.Literal.String.Double, "\""),
+        (Token.Punctuation, ")"),
+        (Token.Text, "\n"),
+    ]
+    assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_function_with_types(lexer):
+    fragment = "func abc(arg: String) -> void:\n\tprint(\"Hello\", arg)"
+    tokens = [
+        (Token.Keyword, "func"),
+        (Token.Text, " "),
+        (Token.Name, "abc"),
+        (Token.Punctuation, "("),
+        (Token.Name, "arg"),
+        (Token.Punctuation, ":"),
+        (Token.Text, " "),
+        (Token.Name.Builtin.Type, "String"),
+        (Token.Punctuation, ")"),
+        (Token.Text, " "),
+        (Token.Operator, "-"),
+        (Token.Operator, ">"),
+        (Token.Text, " "),
+        (Token.Name, "void"),
+        (Token.Punctuation, ":"),
+        (Token.Text, "\n"),
+        (Token.Text, "\t"),
+        (Token.Name.Builtin, "print"),
+        (Token.Punctuation, "("),
+        (Token.Literal.String.Double, "\""),
+        (Token.Literal.String.Double, "Hello"),
+        (Token.Literal.String.Double, "\""),
+        (Token.Punctuation, ","),
+        (Token.Text, " "),
+        (Token.Name, "arg"),
+        (Token.Punctuation, ")"),
+        (Token.Text, "\n"),
+    ]
+    assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_signal(lexer):
+    fragment = "signal sig (arg1, arg2)"
+    tokens = [
+        (Token.Keyword, "signal"),
+        (Token.Text, " "),
+        (Token.Name, "sig"),
+        (Token.Text, " "),
+        (Token.Punctuation, "("),
+        (Token.Name, "arg1"),
+        (Token.Punctuation, ","),
+        (Token.Text, " "),
+        (Token.Name, "arg2"),
+        (Token.Punctuation, ")"),
+        (Token.Text, "\n"),
+    ]
+    assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_export_array(lexer):
+    fragment = "export (Array, AudioStream) var streams"
+    tokens = [
+        (Token.Keyword, "export"),
+        (Token.Text, " "),
+        (Token.Punctuation, "("),
+        (Token.Name.Builtin.Type, "Array"),
+        (Token.Punctuation, ","),
+        (Token.Text, " "),
+        (Token.Name, "AudioStream"),
+        (Token.Punctuation, ")"),
+        (Token.Text, " "),
+        (Token.Keyword, "var"),
+        (Token.Text, " "),
+        (Token.Name, "streams"),
+        (Token.Text, "\n"),
+    ]
+    assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_inner_class(lexer):
+    fragment = "class InnerClass:\n\tvar a = 5"
+    tokens = [
+        (Token.Keyword, "class"),
+        (Token.Text, " "),
+        (Token.Name, "InnerClass"),
+        (Token.Punctuation, ":"),
+        (Token.Text, "\n"),
+        (Token.Text, "\t"),
+        (Token.Keyword, "var"),
+        (Token.Text, " "),
+        (Token.Name, "a"),
+        (Token.Text, " "),
+        (Token.Operator, "="),
+        (Token.Text, " "),
+        (Token.Literal.Number.Integer, "5"),
+        (Token.Text, "\n"),
+    ]
+    assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_comment(lexer):
+    fragment = "# Comment"
+    tokens = [
+        (Token.Comment.Single, "# Comment"),
+        (Token.Text, "\n"),
+    ]
+    assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_multiline_string(lexer):
+    fragment = '"""\nMultiline\n"""'
+    tokens = [
+        (Token.Literal.String.Doc, '"""\nMultiline\n"""'),
+        (Token.Text, "\n"),
+    ]
+    assert list(lexer.get_tokens(fragment)) == tokens
diff --git a/tests/test_lexers_other.py b/tests/test_lexers_other.py
index 3e8d3fc1..70ffba17 100644
--- a/tests/test_lexers_other.py
+++ b/tests/test_lexers_other.py
@@ -12,8 +12,9 @@ import os
 
 import pytest
 
-from pygments.lexers import guess_lexer
-from pygments.lexers.scripting import EasytrieveLexer, JclLexer, RexxLexer
+from pygments.lexers import (
+    EasytrieveLexer, GAPLexer, GDScriptLexer, JclLexer, RexxLexer, guess_lexer
+)
 
 
 def _example_file_path(filename):
@@ -68,3 +69,14 @@ def test_rexx_can_guess_from_text():
             parse value greeting "hello" name "!"
             say name''')
     assert val > 0.2
+
+
+@pytest.mark.parametrize("file_path, lexer", [
+    ("gdscript_example.gd", GDScriptLexer),
+    ("example.gd", GAPLexer),
+])
+def test_chooses_correct_lexer_for_example_files(file_path, lexer):
+    with open(_example_file_path(file_path), "rb") as fp:
+        text = fp.read().decode("utf-8")
+    guessed_lexer = guess_lexer(text)
+    assert guessed_lexer.name == lexer.name
author	Paweł Fertyk <pfertyk@users.noreply.github.com>	2020-06-01 14:48:05 +0200
committer	GitHub <noreply@github.com>	2020-06-01 14:48:05 +0200
commit	e5dc231aa0d780395436e55c621e22dacfaf97de (patch)
tree	55adfa89291b5695b24652d86f9a85730032a14d
parent	231919b4ec7d6d0cb23940d414dd03d262d6a048 (diff)
download	pygments-git-e5dc231aa0d780395436e55c621e22dacfaf97de.tar.gz