summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOleh Prypin <oleh@pryp.in>2021-01-04 18:12:07 +0100
committerGitHub <noreply@github.com>2021-01-04 18:12:07 +0100
commitf8b097c61cf86616012b616b0b873384cf8df236 (patch)
tree21cc9391319e6f284d7248f1a49f269f6a31b2b3
parentc01ccfead679b1b38a989b674d02771ff6ccbe23 (diff)
downloadpygments-git-f8b097c61cf86616012b616b0b873384cf8df236.tar.gz
Update Crystal lexer (#1650)
* crystal: drop all classes from builtins; these aren't normally highlighted ("normally" meaning all other highlighter tools) * crystal: fix percent-strings, drop Ruby-specific arbitrary delimiters It seems that Ruby supports strings such as `%*text*` where `*` can be anything. But Crystal never had anything like that. It does, however, keep `%|text|`, so add a case for that. * crystal: update keywords and builtins * crystal: fix string literals and escape sequences Update list of escapes. Support Unicode escape sequences. Also remove the Ruby-specific `:@foo` symbol syntax, Crystal doesn't have it. * crystal: uppercase identifiers aren't always constants Make `FOO::Bar` be highlighted like `Foo::Bar` would be, rather than like `FOO` * crystal: annotations can be namespaced Highlight the entire inside part of `@[Foo::Bar]`, not just the `Foo` part (these used to be named 'attributes' but the official name is 'annotations' now, so I also change that) * fixup! crystal: fix percent-strings, drop Ruby-specific arbitrary delimiters
-rw-r--r--pygments/lexers/crystal.py119
-rw-r--r--tests/examplefiles/test.cr16
-rw-r--r--tests/test_crystal.py237
3 files changed, 282 insertions, 90 deletions
diff --git a/pygments/lexers/crystal.py b/pygments/lexers/crystal.py
index 8c648e3f..e16a01f7 100644
--- a/pygments/lexers/crystal.py
+++ b/pygments/lexers/crystal.py
@@ -89,26 +89,10 @@ class CrystalLexer(ExtendedRegexLexer):
del heredocstack[:]
def gen_crystalstrings_rules():
- def intp_regex_callback(self, match, ctx):
- yield match.start(1), String.Regex, match.group(1) # begin
- nctx = LexerContext(match.group(3), 0, ['interpolated-regex'])
- for i, t, v in self.get_tokens_unprocessed(context=nctx):
- yield match.start(3)+i, t, v
- yield match.start(4), String.Regex, match.group(4) # end[imsx]*
- ctx.pos = match.end()
-
- def intp_string_callback(self, match, ctx):
- yield match.start(1), String.Other, match.group(1)
- nctx = LexerContext(match.group(3), 0, ['interpolated-string'])
- for i, t, v in self.get_tokens_unprocessed(context=nctx):
- yield match.start(3)+i, t, v
- yield match.start(4), String.Other, match.group(4) # end
- ctx.pos = match.end()
-
states = {}
states['strings'] = [
- (r'\:@{0,2}[a-zA-Z_]\w*[!?]?', String.Symbol),
- (words(CRYSTAL_OPERATORS, prefix=r'\:@{0,2}'), String.Symbol),
+ (r'\:\w+[!?]?', String.Symbol),
+ (words(CRYSTAL_OPERATORS, prefix=r'\:'), String.Symbol),
(r":'(\\\\|\\[^\\]|[^'\\])*'", String.Symbol),
# This allows arbitrary text after '\ for simplicity
(r"'(\\\\|\\'|[^']|\\[^'\\]+)'", String.Char),
@@ -130,35 +114,42 @@ class CrystalLexer(ExtendedRegexLexer):
(end, ttype, '#pop'),
]
- # braced quoted strings
+ # https://crystal-lang.org/docs/syntax_and_semantics/literals/string.html#percent-string-literals
for lbrace, rbrace, bracecc, name in \
('\\{', '\\}', '{}', 'cb'), \
('\\[', '\\]', '\\[\\]', 'sb'), \
('\\(', '\\)', '()', 'pa'), \
- ('<', '>', '<>', 'ab'):
+ ('<', '>', '<>', 'ab'), \
+ ('\\|', '\\|', '\\|', 'pi'):
states[name+'-intp-string'] = [
(r'\\' + lbrace, String.Other),
+ ] + (lbrace != rbrace) * [
(lbrace, String.Other, '#push'),
+ ] + [
(rbrace, String.Other, '#pop'),
include('string-intp-escaped'),
(r'[\\#' + bracecc + ']', String.Other),
(r'[^\\#' + bracecc + ']+', String.Other),
]
- states['strings'].append((r'%' + lbrace, String.Other,
+ states['strings'].append((r'%Q?' + lbrace, String.Other,
name+'-intp-string'))
states[name+'-string'] = [
(r'\\[\\' + bracecc + ']', String.Other),
+ ] + (lbrace != rbrace) * [
(lbrace, String.Other, '#push'),
+ ] + [
(rbrace, String.Other, '#pop'),
(r'[\\#' + bracecc + ']', String.Other),
(r'[^\\#' + bracecc + ']+', String.Other),
]
- # http://crystal-lang.org/docs/syntax_and_semantics/literals/array.html
- states['strings'].append((r'%[wi]' + lbrace, String.Other,
+ # https://crystal-lang.org/docs/syntax_and_semantics/literals/array.html#percent-array-literals
+ states['strings'].append((r'%[qwi]' + lbrace, String.Other,
name+'-string'))
states[name+'-regex'] = [
(r'\\[\\' + bracecc + ']', String.Regex),
+ ] + (lbrace != rbrace) * [
(lbrace, String.Regex, '#push'),
+ ] + [
(rbrace + '[imsx]*', String.Regex, '#pop'),
include('string-intp'),
(r'[\\#' + bracecc + ']', String.Regex),
@@ -167,27 +158,6 @@ class CrystalLexer(ExtendedRegexLexer):
states['strings'].append((r'%r' + lbrace, String.Regex,
name+'-regex'))
- # these must come after %<brace>!
- states['strings'] += [
- # %r regex
- (r'(%r([\W_]))((?:\\\2|(?!\2).)*)(\2[imsx]*)',
- intp_regex_callback),
- # regular fancy strings with qsw
- (r'(%[wi]([\W_]))((?:\\\2|(?!\2).)*)(\2)',
- intp_string_callback),
- # special forms of fancy strings after operators or
- # in method calls with braces
- (r'(?<=[-+/*%=<>&!^|~,(])(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)',
- bygroups(Text, String.Other, None)),
- # and because of fixed width lookbehinds the whole thing a
- # second time for line startings...
- (r'^(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)',
- bygroups(Text, String.Other, None)),
- # all regular fancy strings without qsw
- (r'(%([\[{(<]))((?:\\\2|(?!\2).)*)(\2)',
- intp_string_callback),
- ]
-
return states
tokens = {
@@ -195,10 +165,15 @@ class CrystalLexer(ExtendedRegexLexer):
(r'#.*?$', Comment.Single),
# keywords
(words('''
- abstract asm as begin break case do else elsif end ensure extend ifdef if
- include instance_sizeof next of pointerof private protected rescue return
- require sizeof super then typeof unless until when while with yield
+ abstract asm begin break case do else elsif end ensure extend if in
+ include next of private protected require rescue return select self super
+ then unless until when while with yield
'''.split(), suffix=r'\b'), Keyword),
+ (words('''
+ previous_def forall out uninitialized __DIR__ __FILE__ __LINE__
+ '''.split(), prefix=r'(?<!\.)', suffix=r'\b'), Keyword.Pseudo),
+ # https://crystal-lang.org/docs/syntax_and_semantics/is_a.html
+ (r'\.(is_a\?|nil\?|responds_to\?|as\?|as\b)', Keyword.Pseudo),
(words(['true', 'false', 'nil'], suffix=r'\b'), Keyword.Constant),
# start of function, class and module names
(r'(module|lib)(\s+)([a-zA-Z_]\w*(?:::[a-zA-Z_]\w*)*)',
@@ -206,29 +181,23 @@ class CrystalLexer(ExtendedRegexLexer):
(r'(def|fun|macro)(\s+)((?:[a-zA-Z_]\w*::)*)',
bygroups(Keyword, Text, Name.Namespace), 'funcname'),
(r'def(?=[*%&^`~+-/\[<>=])', Keyword, 'funcname'),
- (r'(class|struct|union|type|alias|enum)(\s+)((?:[a-zA-Z_]\w*::)*)',
+ (r'(annotation|class|struct|union|type|alias|enum)(\s+)((?:[a-zA-Z_]\w*::)*)',
bygroups(Keyword, Text, Name.Namespace), 'classname'),
- (r'(self|out|uninitialized)\b|(is_a|responds_to)\?', Keyword.Pseudo),
- # macros
+ # https://crystal-lang.org/api/toplevel.html
(words('''
- debugger record pp assert_responds_to spawn parallel
- getter setter property delegate def_hash def_equals def_equals_and_hash
- forward_missing_to
- '''.split(), suffix=r'\b'), Name.Builtin.Pseudo),
- (r'getter[!?]|property[!?]|__(DIR|FILE|LINE)__\b', Name.Builtin.Pseudo),
+ instance_sizeof offsetof pointerof sizeof typeof
+ '''.split(), prefix=r'(?<!\.)', suffix=r'\b'), Keyword.Pseudo),
+ # macros
+ (r'(?<!\.)(debugger\b|p!|pp!|record\b|spawn\b)', Name.Builtin.Pseudo),
# builtins
- # http://crystal-lang.org/api/toplevel.html
(words('''
- Object Value Struct Reference Proc Class Nil Symbol Enum Void
- Bool Number Int Int8 Int16 Int32 Int64 UInt8 UInt16 UInt32 UInt64
- Float Float32 Float64 Char String
- Pointer Slice Range Exception Regex
- Mutex StaticArray Array Hash Set Tuple Deque Box Process File
- Dir Time Channel Concurrent Scheduler
- abort at_exit caller delay exit fork future get_stack_top gets
- lazy loop main p print printf puts
- raise rand read_line sleep sprintf system with_color
+ abort at_exit caller exit gets loop main p pp print printf puts
+ raise rand read_line sleep spawn sprintf system
'''.split(), prefix=r'(?<!\.)', suffix=r'\b'), Name.Builtin),
+ # https://crystal-lang.org/api/Object.html#macro-summary
+ (r'(?<!\.)(((class_)?((getter|property)\b[!?]?|setter\b))|'
+ r'(def_(clone|equals|equals_and_hash|hash)|delegate|forward_missing_to)\b)',
+ Name.Builtin.Pseudo),
# normal heredocs
(r'(?<!\w)(<<-?)(["`\']?)([a-zA-Z_]\w*)(\2)(.*?\n)',
heredoc_callback),
@@ -300,18 +269,18 @@ class CrystalLexer(ExtendedRegexLexer):
(r'\$-[0adFiIlpvw]', Name.Variable.Global),
(r'::', Operator),
include('strings'),
- # chars
+ # https://crystal-lang.org/reference/syntax_and_semantics/literals/char.html
(r'\?(\\[MC]-)*' # modifiers
- r'(\\([\\befnrtv#"\']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})|\S)'
+ r'(\\([\\abefnrtv#"\']|[0-7]{1,3}|x[a-fA-F0-9]{2}|u[a-fA-F0-9]{4}|u\{[a-fA-F0-9 ]+\})|\S)'
r'(?!\w)',
String.Char),
- (r'[A-Z][A-Z_]+\b', Name.Constant),
+ (r'[A-Z][A-Z_]+\b(?!::|\.)', Name.Constant),
# macro expansion
(r'\{%', String.Interpol, 'in-macro-control'),
(r'\{\{', String.Interpol, 'in-macro-expr'),
- # attributes
- (r'(@\[)(\s*)([A-Z]\w*)',
- bygroups(Operator, Text, Name.Decorator), 'in-attr'),
+ # annotations
+ (r'(@\[)(\s*)([A-Z]\w*(::[A-Z]\w*)*)',
+ bygroups(Operator, Text, Name.Decorator), 'in-annot'),
# this is needed because Crystal attributes can look
# like keywords (class) or like this: ` ?!?
(words(CRYSTAL_OPERATORS, prefix=r'(\.|::)'),
@@ -348,7 +317,9 @@ class CrystalLexer(ExtendedRegexLexer):
(r'#\{', String.Interpol, 'in-intp'),
],
'string-escaped': [
- (r'\\([\\befnstv#"\']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})', String.Escape)
+ # https://crystal-lang.org/reference/syntax_and_semantics/literals/string.html
+ (r'\\([\\abefnrtv#"\']|[0-7]{1,3}|x[a-fA-F0-9]{2}|u[a-fA-F0-9]{4}|u\{[a-fA-F0-9 ]+\})',
+ String.Escape)
],
'string-intp-escaped': [
include('string-intp'),
@@ -378,7 +349,7 @@ class CrystalLexer(ExtendedRegexLexer):
'in-macro-control': [
(r'\{%', String.Interpol, '#push'),
(r'%\}', String.Interpol, '#pop'),
- (r'for\b|in\b', Keyword),
+ (r'(for|verbatim)\b', Keyword),
include('root'),
],
'in-macro-expr': [
@@ -386,7 +357,7 @@ class CrystalLexer(ExtendedRegexLexer):
(r'\}\}', String.Interpol, '#pop'),
include('root'),
],
- 'in-attr': [
+ 'in-annot': [
(r'\[', Operator, '#push'),
(r'\]', Operator, '#pop'),
include('root'),
diff --git a/tests/examplefiles/test.cr b/tests/examplefiles/test.cr
index baa52d98..3af9040a 100644
--- a/tests/examplefiles/test.cr
+++ b/tests/examplefiles/test.cr
@@ -1954,17 +1954,11 @@ r = %r(regex with slash: /)
world, \
no newlines" # same as "hello world, no newlines"
-# Supports double quotes and nested parenthesis
-%(hello ("world")) # same as "hello (\"world\")"
-
-# Supports double quotes and nested brackets
-%[hello ["world"]] # same as "hello [\"world\"]"
-
-# Supports double quotes and nested curlies
-%{hello {"world"}} # same as "hello {\"world\"}"
-
-# Supports double quotes and nested angles
-%<hello <"world">> # same as "hello <\"world\">"
+%(hello ("world")) # => "hello (\"world\")"
+%[hello ["world"]] # => "hello [\"world\"]"
+%{hello {"world"}} # => "hello {\"world\"}"
+%<hello <"world">> # => "hello <\"world\">"
+%|hello "world"| # => "hello \"world\""
<<-XML
<parent>
diff --git a/tests/test_crystal.py b/tests/test_crystal.py
index fb2503aa..ae0dd7ff 100644
--- a/tests/test_crystal.py
+++ b/tests/test_crystal.py
@@ -104,6 +104,19 @@ def test_interpolation_nested_curly(lexer):
assert list(lexer.get_tokens(fragment)) == tokens
+def test_escaped_interpolation(lexer):
+ fragment = '"\\#{a + b}"\n'
+ # i.e. no actual interpolation
+ tokens = [
+ (String.Double, '"'),
+ (String.Escape, '\\#'),
+ (String.Double, '{a + b}'),
+ (String.Double, '"'),
+ (Text, '\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+
def test_operator_methods(lexer):
fragment = '([] of Int32).[]?(5)\n'
tokens = [
@@ -113,7 +126,7 @@ def test_operator_methods(lexer):
(Text, ' '),
(Keyword, 'of'),
(Text, ' '),
- (Name.Builtin, 'Int32'),
+ (Name, 'Int32'),
(Punctuation, ')'),
(Operator, '.'),
(Name.Operator, '[]?'),
@@ -155,6 +168,21 @@ def test_numbers(lexer):
assert next(lexer.get_tokens(fragment + '\n'))[0] == Error
+def test_symbols(lexer):
+ for fragment in [':sym_bol', ':\u3042', ':question?']:
+ assert list(lexer.get_tokens(fragment + '\n')) == \
+ [(String.Symbol, fragment), (Text, '\n')]
+
+ fragment = ':"sym bol"\n'
+ tokens = [
+ (String.Symbol, ':"'),
+ (String.Symbol, 'sym bol'),
+ (String.Symbol, '"'),
+ (Text, '\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+
def test_chars(lexer):
for fragment in ["'a'", "'я'", "'\\u{1234}'", "'\n'"]:
assert list(lexer.get_tokens(fragment + '\n')) == \
@@ -162,6 +190,187 @@ def test_chars(lexer):
assert next(lexer.get_tokens("'abc'"))[0] == Error
+def test_string_escapes(lexer):
+ for body in ['\\n', '\\a', '\\xff', '\\u1234', '\\000', '\\u{0}', '\\u{10AfF9}']:
+ fragment = '"a' + body + 'z"\n'
+ assert list(lexer.get_tokens(fragment)) == [
+ (String.Double, '"'),
+ (String.Double, 'a'),
+ (String.Escape, body),
+ (String.Double, 'z'),
+ (String.Double, '"'),
+ (Text, '\n'),
+ ]
+
+
+def test_empty_percent_strings(lexer):
+ for body in ['%()', '%[]', '%{}', '%<>', '%||']:
+ fragment = '(' + body + ')\n'
+ assert list(lexer.get_tokens(fragment)) == [
+ (Punctuation, '('),
+ (String.Other, body[:-1]),
+ (String.Other, body[-1]),
+ (Punctuation, ')'),
+ (Text, '\n'),
+ ]
+
+
+def test_percent_strings(lexer):
+ fragment = (
+ '%(hello ("world"))\n'
+ '%[hello ["world"]]\n'
+ '%{hello "world"}\n'
+ '%<hello <"world">>\n'
+ '%|hello "world"|\n')
+ tokens = [
+ (String.Other, '%('),
+ (String.Other, 'hello '),
+ (String.Other, '('),
+ (String.Other, '"world"'),
+ (String.Other, ')'),
+ (String.Other, ')'),
+ (Text, '\n'),
+ (String.Other, '%['),
+ (String.Other, 'hello '),
+ (String.Other, '['),
+ (String.Other, '"world"'),
+ (String.Other, ']'),
+ (String.Other, ']'),
+ (Text, '\n'),
+ (String.Other, '%{'),
+ (String.Other, 'hello "world"'),
+ (String.Other, '}'),
+ (Text, '\n'),
+ (String.Other, '%<'),
+ (String.Other, 'hello '),
+ (String.Other, '<'),
+ (String.Other, '"world"'),
+ (String.Other, '>'),
+ (String.Other, '>'),
+ (Text, '\n'),
+ (String.Other, '%|'),
+ (String.Other, 'hello "world"'),
+ (String.Other, '|'),
+ (Text, '\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_special_percent_strings(lexer):
+ fragment = '%Q(hello \\n #{name})\n%q(hello \\n #{name})\n%w(foo\\nbar baz)\n'
+ tokens = [
+ (String.Other, '%Q('),
+ (String.Other, 'hello '),
+ (String.Escape, '\\n'),
+ (String.Other, ' '),
+ (String.Interpol, '#{'),
+ (Name, 'name'),
+ (String.Interpol, '}'),
+ (String.Other, ')'),
+ (Text, '\n'),
+ # The ones below have no interpolation.
+ (String.Other, '%q('),
+ (String.Other, 'hello '),
+ (String.Other, '\\'),
+ (String.Other, 'n '),
+ (String.Other, '#'),
+ (String.Other, '{name}'),
+ (String.Other, ')'),
+ (Text, '\n'),
+ (String.Other, '%w('),
+ (String.Other, 'foo'),
+ (String.Other, '\\'),
+ (String.Other, 'nbar baz'),
+ (String.Other, ')'),
+ (Text, '\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_pseudo_keywords(lexer):
+ fragment = (
+ 'def f(x : T, line = __LINE__) forall T\n'
+ 'if x.is_a?(String)\n'
+ 'pp! x\n'
+ 'end\n'
+ 'end\n')
+ tokens = [
+ (Keyword, 'def'),
+ (Text, ' '),
+ (Name.Function, 'f'),
+ (Punctuation, '('),
+ (Name, 'x'),
+ (Text, ' '),
+ (Punctuation, ':'),
+ (Text, ' '),
+ (Name, 'T'),
+ (Punctuation, ','),
+ (Text, ' '),
+ (Name, 'line'),
+ (Text, ' '),
+ (Operator, '='),
+ (Text, ' '),
+ (Keyword.Pseudo, '__LINE__'),
+ (Punctuation, ')'),
+ (Text, ' '),
+ (Keyword.Pseudo, 'forall'),
+ (Text, ' '),
+ (Name, 'T'),
+ (Text, '\n'),
+ (Keyword, 'if'),
+ (Text, ' '),
+ (Name, 'x'),
+ (Keyword.Pseudo, '.is_a?'),
+ (Punctuation, '('),
+ (Name, 'String'),
+ (Punctuation, ')'),
+ (Text, '\n'),
+ (Name.Builtin.Pseudo, 'pp!'),
+ (Text, ' '),
+ (Name, 'x'),
+ (Text, '\n'),
+ (Keyword, 'end'),
+ (Text, '\n'),
+ (Keyword, 'end'),
+ (Text, '\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_pseudo_builtins(lexer):
+ fragment = 'record Cls do\ndef_equals s\nend\n'
+ tokens = [
+ (Name.Builtin.Pseudo, 'record'),
+ (Text, ' '),
+ (Name, 'Cls'),
+ (Text, ' '),
+ (Keyword, 'do'),
+ (Text, '\n'),
+ (Name.Builtin.Pseudo, 'def_equals'),
+ (Text, ' '),
+ (Name, 's'),
+ (Text, '\n'),
+ (Keyword, 'end'),
+ (Text, '\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_constant_and_module(lexer):
+ fragment = 'HTTP\nHTTP::Server.new\n'
+ tokens = [
+ (Name.Constant, 'HTTP'),
+ (Text, '\n'),
+ (Name, 'HTTP'),
+ (Operator, '::'),
+ (Name, 'Server'),
+ (Operator, '.'),
+ (Name, 'new'),
+ (Text, '\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+
def test_macro(lexer):
fragment = (
'def<=>(other : self) : Int\n'
@@ -179,12 +388,12 @@ def test_macro(lexer):
(Text, ' '),
(Punctuation, ':'),
(Text, ' '),
- (Keyword.Pseudo, 'self'),
+ (Keyword, 'self'),
(Punctuation, ')'),
(Text, ' '),
(Punctuation, ':'),
(Text, ' '),
- (Name.Builtin, 'Int'),
+ (Name, 'Int'),
(Text, '\n'),
(String.Interpol, '{%'),
(Keyword, 'for'),
@@ -276,7 +485,7 @@ def test_lib(lexer):
(Text, ' '),
(Punctuation, ':'),
(Text, ' '),
- (Name.Builtin, 'Void'),
+ (Name, 'Void'),
(Operator, '*'),
(Punctuation, ')'),
(Text, ' '),
@@ -284,7 +493,7 @@ def test_lib(lexer):
(Text, ' '),
(Name, 'LibC'),
(Operator, '::'),
- (Name.Builtin, 'Int'),
+ (Name, 'Int'),
(Text, '\n'),
(Keyword, 'end'),
(Text, '\n')
@@ -312,3 +521,21 @@ def test_escaped_bracestring(lexer):
(Text, '\n'),
]
assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_annotation(lexer):
+ fragment = '@[FOO::Bar::Baz(opt: "xx")]\n'
+ tokens = [
+ (Operator, '@['),
+ (Name.Decorator, 'FOO::Bar::Baz'),
+ (Punctuation, '('),
+ (String.Symbol, 'opt'),
+ (Punctuation, ':'),
+ (Text, ' '),
+ (String.Double, '"'),
+ (String.Double, 'xx'),
+ (String.Double, '"'),
+ (Punctuation, ')'),
+ (Operator, ']'),
+ (Text, '\n'),
+ ]