""" pygments.lexers.julia ~~~~~~~~~~~~~~~~~~~~~ Lexers for the Julia language. :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. """ from pygments.lexer import Lexer, RegexLexer, bygroups, do_insertions, \ words, include from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ Number, Punctuation, Generic, Whitespace from pygments.util import shebang_matches from pygments.lexers._julia_builtins import OPERATORS_LIST, DOTTED_OPERATORS_LIST, \ KEYWORD_LIST, BUILTIN_LIST, LITERAL_LIST __all__ = ['JuliaLexer', 'JuliaConsoleLexer'] # see https://docs.julialang.org/en/v1/manual/variables/#Allowed-Variable-Names allowed_variable = \ '(?:[a-zA-Z_\u00A1-\U0010ffff][a-zA-Z_0-9!\u00A1-\U0010ffff]*)' # see https://github.com/JuliaLang/julia/blob/master/src/flisp/julia_opsuffs.h operator_suffixes = r'[²³¹ʰʲʳʷʸˡˢˣᴬᴮᴰᴱᴳᴴᴵᴶᴷᴸᴹᴺᴼᴾᴿᵀᵁᵂᵃᵇᵈᵉᵍᵏᵐᵒᵖᵗᵘᵛᵝᵞᵟᵠᵡᵢᵣᵤᵥᵦᵧᵨᵩᵪᶜᶠᶥᶦᶫᶰᶸᶻᶿ′″‴‵‶‷⁗⁰ⁱ⁴⁵⁶⁷⁸⁹⁺⁻⁼⁽⁾ⁿ₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎ₐₑₒₓₕₖₗₘₙₚₛₜⱼⱽ]*' class JuliaLexer(RegexLexer): """ For Julia source code. .. versionadded:: 1.6 """ name = 'Julia' url = 'https://julialang.org/' aliases = ['julia', 'jl'] filenames = ['*.jl'] mimetypes = ['text/x-julia', 'application/x-julia'] tokens = { 'root': [ (r'\n', Whitespace), (r'[^\S\n]+', Whitespace), (r'#=', Comment.Multiline, "blockcomment"), (r'#.*$', Comment), (r'[\[\](),;]', Punctuation), # symbols # intercept range expressions first (r'(' + allowed_variable + r')(\s*)(:)(' + allowed_variable + ')', bygroups(Name, Whitespace, Operator, Name)), # then match :name which does not follow closing brackets, digits, or the # ::, <:, and :> operators (r'(?\d.])(:' + allowed_variable + ')', String.Symbol), # type assertions - excludes expressions like ::typeof(sin) and ::avec[1] (r'(?<=::)(\s*)(' + allowed_variable + r')\b(?![(\[])', bygroups(Whitespace, Keyword.Type)), # type comparisons # - MyType <: A or MyType >: A ('(' + allowed_variable + r')(\s*)([<>]:)(\s*)(' + allowed_variable + r')\b(?![(\[])', bygroups(Keyword.Type, Whitespace, Operator, Whitespace, Keyword.Type)), # - <: B or >: B (r'([<>]:)(\s*)(' + allowed_variable + r')\b(?![(\[])', bygroups(Operator, Whitespace, Keyword.Type)), # - A <: or A >: (r'\b(' + allowed_variable + r')(\s*)([<>]:)', bygroups(Keyword.Type, Whitespace, Operator)), # operators # Suffixes aren't actually allowed on all operators, but we'll ignore that # since those cases are invalid Julia code. (words([*OPERATORS_LIST, *DOTTED_OPERATORS_LIST], suffix=operator_suffixes), Operator), (words(['.' + o for o in DOTTED_OPERATORS_LIST], suffix=operator_suffixes), Operator), (words(['...', '..']), Operator), # NOTE # Patterns below work only for definition sites and thus hardly reliable. # # functions # (r'(function)(\s+)(' + allowed_variable + ')', # bygroups(Keyword, Text, Name.Function)), # chars (r"'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,3}|\\u[a-fA-F0-9]{1,4}|" r"\\U[a-fA-F0-9]{1,6}|[^\\\'\n])'", String.Char), # try to match trailing transpose (r'(?<=[.\w)\]])(\'' + operator_suffixes + ')+', Operator), # raw strings (r'(raw)(""")', bygroups(String.Affix, String), 'tqrawstring'), (r'(raw)(")', bygroups(String.Affix, String), 'rawstring'), # regular expressions (r'(r)(""")', bygroups(String.Affix, String.Regex), 'tqregex'), (r'(r)(")', bygroups(String.Affix, String.Regex), 'regex'), # other strings (r'(' + allowed_variable + ')?(""")', bygroups(String.Affix, String), 'tqstring'), (r'(' + allowed_variable + ')?(")', bygroups(String.Affix, String), 'string'), # backticks (r'(' + allowed_variable + ')?(```)', bygroups(String.Affix, String.Backtick), 'tqcommand'), (r'(' + allowed_variable + ')?(`)', bygroups(String.Affix, String.Backtick), 'command'), # type names # - names that begin a curly expression ('(' + allowed_variable + r')(\{)', bygroups(Keyword.Type, Punctuation), 'curly'), # - names as part of bare 'where' (r'(where)(\s+)(' + allowed_variable + ')', bygroups(Keyword, Whitespace, Keyword.Type)), # - curly expressions in general (r'(\{)', Punctuation, 'curly'), # - names as part of type declaration (r'(abstract|primitive)([ \t]+)(type\b)([\s()]+)(' + allowed_variable + r')', bygroups(Keyword, Whitespace, Keyword, Text, Keyword.Type)), (r'(mutable(?=[ \t]))?([ \t]+)?(struct\b)([\s()]+)(' + allowed_variable + r')', bygroups(Keyword, Whitespace, Keyword, Text, Keyword.Type)), # macros (r'@' + allowed_variable, Name.Decorator), (words([*OPERATORS_LIST, '..', '.', *DOTTED_OPERATORS_LIST], prefix='@', suffix=operator_suffixes), Name.Decorator), # keywords (words(KEYWORD_LIST, suffix=r'\b'), Keyword), # builtin types (words(BUILTIN_LIST, suffix=r'\b'), Keyword.Type), # builtin literals (words(LITERAL_LIST, suffix=r'\b'), Name.Builtin), # names (allowed_variable, Name), # numbers (r'(\d+((_\d+)+)?\.(?!\.)(\d+((_\d+)+)?)?|\.\d+((_\d+)+)?)([eEf][+-]?[0-9]+)?', Number.Float), (r'\d+((_\d+)+)?[eEf][+-]?[0-9]+', Number.Float), (r'0x[a-fA-F0-9]+((_[a-fA-F0-9]+)+)?(\.([a-fA-F0-9]+((_[a-fA-F0-9]+)+)?)?)?p[+-]?\d+', Number.Float), (r'0b[01]+((_[01]+)+)?', Number.Bin), (r'0o[0-7]+((_[0-7]+)+)?', Number.Oct), (r'0x[a-fA-F0-9]+((_[a-fA-F0-9]+)+)?', Number.Hex), (r'\d+((_\d+)+)?', Number.Integer), # single dot operator matched last to permit e.g. ".1" as a float (words(['.']), Operator), ], "blockcomment": [ (r'[^=#]', Comment.Multiline), (r'#=', Comment.Multiline, '#push'), (r'=#', Comment.Multiline, '#pop'), (r'[=#]', Comment.Multiline), ], 'curly': [ (r'\{', Punctuation, '#push'), (r'\}', Punctuation, '#pop'), (allowed_variable, Keyword.Type), include('root'), ], 'tqrawstring': [ (r'"""', String, '#pop'), (r'([^"]|"[^"][^"])+', String), ], 'rawstring': [ (r'"', String, '#pop'), (r'\\"', String.Escape), (r'([^"\\]|\\[^"])+', String), ], # Interpolation is defined as "$" followed by the shortest full # expression, which is something we can't parse. Include the most # common cases here: $word, and $(paren'd expr). 'interp': [ (r'\$' + allowed_variable, String.Interpol), (r'(\$)(\()', bygroups(String.Interpol, Punctuation), 'in-intp'), ], 'in-intp': [ (r'\(', Punctuation, '#push'), (r'\)', Punctuation, '#pop'), include('root'), ], 'string': [ (r'(")(' + allowed_variable + r'|\d+)?', bygroups(String, String.Affix), '#pop'), # FIXME: This escape pattern is not perfect. (r'\\([\\"\'$nrbtfav]|(x|u|U)[a-fA-F0-9]+|\d+)', String.Escape), include('interp'), # @printf and @sprintf formats (r'%[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?[hlL]?[E-GXc-giorsux%]', String.Interpol), (r'[^"$%\\]+', String), (r'.', String), ], 'tqstring': [ (r'(""")(' + allowed_variable + r'|\d+)?', bygroups(String, String.Affix), '#pop'), (r'\\([\\"\'$nrbtfav]|(x|u|U)[a-fA-F0-9]+|\d+)', String.Escape), include('interp'), (r'[^"$%\\]+', String), (r'.', String), ], 'regex': [ (r'(")([imsxa]*)?', bygroups(String.Regex, String.Affix), '#pop'), (r'\\"', String.Regex), (r'[^\\"]+', String.Regex), ], 'tqregex': [ (r'(""")([imsxa]*)?', bygroups(String.Regex, String.Affix), '#pop'), (r'[^"]+', String.Regex), ], 'command': [ (r'(`)(' + allowed_variable + r'|\d+)?', bygroups(String.Backtick, String.Affix), '#pop'), (r'\\[`$]', String.Escape), include('interp'), (r'[^\\`$]+', String.Backtick), (r'.', String.Backtick), ], 'tqcommand': [ (r'(```)(' + allowed_variable + r'|\d+)?', bygroups(String.Backtick, String.Affix), '#pop'), (r'\\\$', String.Escape), include('interp'), (r'[^\\`$]+', String.Backtick), (r'.', String.Backtick), ], } def analyse_text(text): return shebang_matches(text, r'julia') class JuliaConsoleLexer(Lexer): """ For Julia console sessions. Modeled after MatlabSessionLexer. .. versionadded:: 1.6 """ name = 'Julia console' aliases = ['jlcon', 'julia-repl'] def get_tokens_unprocessed(self, text): jllexer = JuliaLexer(**self.options) start = 0 curcode = '' insertions = [] output = False error = False for line in text.splitlines(keepends=True): if line.startswith('julia>'): insertions.append((len(curcode), [(0, Generic.Prompt, line[:6])])) curcode += line[6:] output = False error = False elif line.startswith('help?>') or line.startswith('shell>'): yield start, Generic.Prompt, line[:6] yield start + 6, Text, line[6:] output = False error = False elif line.startswith(' ') and not output: insertions.append((len(curcode), [(0, Whitespace, line[:6])])) curcode += line[6:] else: if curcode: yield from do_insertions( insertions, jllexer.get_tokens_unprocessed(curcode)) curcode = '' insertions = [] if line.startswith('ERROR: ') or error: yield start, Generic.Error, line error = True else: yield start, Generic.Output, line output = True start += len(line) if curcode: yield from do_insertions( insertions, jllexer.get_tokens_unprocessed(curcode))