diff options
author | Matthäus G. Chajdas <dev@anteru.net> | 2021-02-14 09:07:11 +0100 |
---|---|---|
committer | Matthäus G. Chajdas <dev@anteru.net> | 2021-02-14 09:07:11 +0100 |
commit | a2579d3ab55857c1881c172698498ed088b51607 (patch) | |
tree | bac341ca20b335af8ff991625a66455c926a42dd | |
parent | 87bc368e02bacb9d2333592a3ea2e619c5b94578 (diff) | |
parent | 221bf885d8231d94e66ea8cc90eca8433b61805e (diff) | |
download | pygments-git-a2579d3ab55857c1881c172698498ed088b51607.tar.gz |
Merge branch 'issue-253-ruby-unicode' of https://github.com/kurtmckee/pygments into kurtmckee-issue-253-ruby-unicode
-rw-r--r-- | pygments/lexers/ruby.py | 10 | ||||
-rw-r--r-- | tests/test_ruby.py | 185 |
2 files changed, 192 insertions, 3 deletions
diff --git a/pygments/lexers/ruby.py b/pygments/lexers/ruby.py index d2244cf8..71b5a89b 100644 --- a/pygments/lexers/ruby.py +++ b/pygments/lexers/ruby.py @@ -329,9 +329,13 @@ class RubyLexer(ExtendedRegexLexer): ], 'funcname': [ (r'\(', Punctuation, 'defexpr'), - (r'(?:([a-zA-Z_]\w*)(\.))?' - r'([a-zA-Z_]\w*[!?]?|\*\*?|[-+]@?|' - r'[/%&|^`~]|\[\]=?|<<|>>|<=?>|>=?|===?)', + (r'(?:([a-zA-Z_]\w*)(\.))?' # optional scope name, like "self." + r'(' + r'[a-zA-Z\u0080-\uffff][a-zA-Z0-9_\u0080-\uffff]*[!?=]?' # method name + r'|!=|!~|=~|\*\*?|[-+!~]@?|[/%&|^]|<=>|<[<=]?|>[>=]?|===?' # or operator override + r'|\[\]=?' # or element reference/assignment override + r'|`' # or the undocumented backtick override + r')', bygroups(Name.Class, Operator, Name.Function), '#pop'), default('#pop') ], diff --git a/tests/test_ruby.py b/tests/test_ruby.py new file mode 100644 index 00000000..9697bea8 --- /dev/null +++ b/tests/test_ruby.py @@ -0,0 +1,185 @@ +# -*- coding: utf-8 -*- +""" + Basic RubyLexer Test + ~~~~~~~~~~~~~~~~~~~~ + + :copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import pytest + +from pygments.token import Name, Number, Operator, Text, Token +from pygments.lexers.ruby import RubyLexer + + +@pytest.fixture(scope='module') +def lexer(): + yield RubyLexer() + + +def test_range_syntax1(lexer): + fragment = u'1..3\n' + tokens = [ + (Number.Integer, u'1'), + (Operator, u'..'), + (Number.Integer, u'3'), + (Text, u'\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + +def test_range_syntax2(lexer): + fragment = u'1...3\n' + tokens = [ + (Number.Integer, u'1'), + (Operator, u'...'), + (Number.Integer, u'3'), + (Text, u'\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + +def test_range_syntax3(lexer): + fragment = u'1 .. 3\n' + tokens = [ + (Number.Integer, u'1'), + (Text, u' '), + (Operator, u'..'), + (Text, u' '), + (Number.Integer, u'3'), + (Text, u'\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + +def test_interpolation_nested_curly(lexer): + fragment = ( + u'"A#{ (3..5).group_by { |x| x/2}.map ' + u'do |k,v| "#{k}" end.join }" + "Z"\n') + + tokens = [ + (Token.Literal.String.Double, u'"'), + (Token.Literal.String.Double, u'A'), + (Token.Literal.String.Interpol, u'#{'), + (Token.Text, u' '), + (Token.Punctuation, u'('), + (Token.Literal.Number.Integer, u'3'), + (Token.Operator, u'..'), + (Token.Literal.Number.Integer, u'5'), + (Token.Punctuation, u')'), + (Token.Operator, u'.'), + (Token.Name, u'group_by'), + (Token.Text, u' '), + (Token.Literal.String.Interpol, u'{'), + (Token.Text, u' '), + (Token.Operator, u'|'), + (Token.Name, u'x'), + (Token.Operator, u'|'), + (Token.Text, u' '), + (Token.Name, u'x'), + (Token.Operator, u'/'), + (Token.Literal.Number.Integer, u'2'), + (Token.Literal.String.Interpol, u'}'), + (Token.Operator, u'.'), + (Token.Name, u'map'), + (Token.Text, u' '), + (Token.Keyword, u'do'), + (Token.Text, u' '), + (Token.Operator, u'|'), + (Token.Name, u'k'), + (Token.Punctuation, u','), + (Token.Name, u'v'), + (Token.Operator, u'|'), + (Token.Text, u' '), + (Token.Literal.String.Double, u'"'), + (Token.Literal.String.Interpol, u'#{'), + (Token.Name, u'k'), + (Token.Literal.String.Interpol, u'}'), + (Token.Literal.String.Double, u'"'), + (Token.Text, u' '), + (Token.Keyword, u'end'), + (Token.Operator, u'.'), + (Token.Name, u'join'), + (Token.Text, u' '), + (Token.Literal.String.Interpol, u'}'), + (Token.Literal.String.Double, u'"'), + (Token.Text, u' '), + (Token.Operator, u'+'), + (Token.Text, u' '), + (Token.Literal.String.Double, u'"'), + (Token.Literal.String.Double, u'Z'), + (Token.Literal.String.Double, u'"'), + (Token.Text, u'\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + +def test_operator_methods(lexer): + fragment = u'x.==4\n' + tokens = [ + (Token.Name, u'x'), + (Token.Operator, u'.'), + (Token.Name.Operator, u'=='), + (Token.Literal.Number.Integer, u'4'), + (Token.Text, u'\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + +def test_escaped_bracestring(lexer): + fragment = u'str.gsub(%r{\\\\\\\\}, "/")\n' + tokens = [ + (Token.Name, u'str'), + (Token.Operator, u'.'), + (Token.Name, u'gsub'), + (Token.Punctuation, u'('), + (Token.Literal.String.Regex, u'%r{'), + (Token.Literal.String.Regex, u'\\\\'), + (Token.Literal.String.Regex, u'\\\\'), + (Token.Literal.String.Regex, u'}'), + (Token.Punctuation, u','), + (Token.Text, u' '), + (Token.Literal.String.Double, u'"'), + (Token.Literal.String.Double, u'/'), + (Token.Literal.String.Double, u'"'), + (Token.Punctuation, u')'), + (Token.Text, u'\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + +@pytest.mark.parametrize( + 'method_name', + ( + # Bare, un-scoped method names + 'a', 'A', 'z', 'Z', 'は', '\u0080', '\uffff', + 'aは0_', 'はA__9', '\u0080はa0_', '\uffff__99Z', + + # Method names with trailing characters + 'aは!', 'はz?', 'はa=', + + # Scoped method names + 'self.a', 'String.は_', 'example.AZ09_!', + + # Operator overrides + '+', '+@', '-', '-@', '!', '!@', '~', '~@', + '*', '**', '/', '%', '&', '^', '`', + '<=>', '<', '<<', '<=', '>', '>>', '>=', + '==', '!=', '===', '=~', '!~', + '[]', '[]=', + ) +) +def test_positive_method_names(lexer, method_name): + """Validate positive method name parsing.""" + + text = 'def ' + method_name + assert list(lexer.get_tokens(text))[-2] == (Name.Function, method_name.rpartition('.')[2]) + + +@pytest.mark.parametrize('method_name', ('1', '_', '<>', '<<=', '>>=', '&&', '||', '==?', '==!', '====')) +def test_negative_method_names(lexer, method_name): + """Validate negative method name parsing.""" + + text = 'def ' + method_name + assert list(lexer.get_tokens(text))[-2] != (Name.Function, method_name) |