summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthäus G. Chajdas <dev@anteru.net>2021-02-14 09:07:11 +0100
committerMatthäus G. Chajdas <dev@anteru.net>2021-02-14 09:07:11 +0100
commita2579d3ab55857c1881c172698498ed088b51607 (patch)
treebac341ca20b335af8ff991625a66455c926a42dd
parent87bc368e02bacb9d2333592a3ea2e619c5b94578 (diff)
parent221bf885d8231d94e66ea8cc90eca8433b61805e (diff)
downloadpygments-git-a2579d3ab55857c1881c172698498ed088b51607.tar.gz
Merge branch 'issue-253-ruby-unicode' of https://github.com/kurtmckee/pygments into kurtmckee-issue-253-ruby-unicode
-rw-r--r--pygments/lexers/ruby.py10
-rw-r--r--tests/test_ruby.py185
2 files changed, 192 insertions, 3 deletions
diff --git a/pygments/lexers/ruby.py b/pygments/lexers/ruby.py
index d2244cf8..71b5a89b 100644
--- a/pygments/lexers/ruby.py
+++ b/pygments/lexers/ruby.py
@@ -329,9 +329,13 @@ class RubyLexer(ExtendedRegexLexer):
],
'funcname': [
(r'\(', Punctuation, 'defexpr'),
- (r'(?:([a-zA-Z_]\w*)(\.))?'
- r'([a-zA-Z_]\w*[!?]?|\*\*?|[-+]@?|'
- r'[/%&|^`~]|\[\]=?|<<|>>|<=?>|>=?|===?)',
+ (r'(?:([a-zA-Z_]\w*)(\.))?' # optional scope name, like "self."
+ r'('
+ r'[a-zA-Z\u0080-\uffff][a-zA-Z0-9_\u0080-\uffff]*[!?=]?' # method name
+ r'|!=|!~|=~|\*\*?|[-+!~]@?|[/%&|^]|<=>|<[<=]?|>[>=]?|===?' # or operator override
+ r'|\[\]=?' # or element reference/assignment override
+ r'|`' # or the undocumented backtick override
+ r')',
bygroups(Name.Class, Operator, Name.Function), '#pop'),
default('#pop')
],
diff --git a/tests/test_ruby.py b/tests/test_ruby.py
new file mode 100644
index 00000000..9697bea8
--- /dev/null
+++ b/tests/test_ruby.py
@@ -0,0 +1,185 @@
+# -*- coding: utf-8 -*-
+"""
+ Basic RubyLexer Test
+ ~~~~~~~~~~~~~~~~~~~~
+
+ :copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS.
+ :license: BSD, see LICENSE for details.
+"""
+
+import pytest
+
+from pygments.token import Name, Number, Operator, Text, Token
+from pygments.lexers.ruby import RubyLexer
+
+
+@pytest.fixture(scope='module')
+def lexer():
+ yield RubyLexer()
+
+
+def test_range_syntax1(lexer):
+ fragment = u'1..3\n'
+ tokens = [
+ (Number.Integer, u'1'),
+ (Operator, u'..'),
+ (Number.Integer, u'3'),
+ (Text, u'\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_range_syntax2(lexer):
+ fragment = u'1...3\n'
+ tokens = [
+ (Number.Integer, u'1'),
+ (Operator, u'...'),
+ (Number.Integer, u'3'),
+ (Text, u'\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_range_syntax3(lexer):
+ fragment = u'1 .. 3\n'
+ tokens = [
+ (Number.Integer, u'1'),
+ (Text, u' '),
+ (Operator, u'..'),
+ (Text, u' '),
+ (Number.Integer, u'3'),
+ (Text, u'\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_interpolation_nested_curly(lexer):
+ fragment = (
+ u'"A#{ (3..5).group_by { |x| x/2}.map '
+ u'do |k,v| "#{k}" end.join }" + "Z"\n')
+
+ tokens = [
+ (Token.Literal.String.Double, u'"'),
+ (Token.Literal.String.Double, u'A'),
+ (Token.Literal.String.Interpol, u'#{'),
+ (Token.Text, u' '),
+ (Token.Punctuation, u'('),
+ (Token.Literal.Number.Integer, u'3'),
+ (Token.Operator, u'..'),
+ (Token.Literal.Number.Integer, u'5'),
+ (Token.Punctuation, u')'),
+ (Token.Operator, u'.'),
+ (Token.Name, u'group_by'),
+ (Token.Text, u' '),
+ (Token.Literal.String.Interpol, u'{'),
+ (Token.Text, u' '),
+ (Token.Operator, u'|'),
+ (Token.Name, u'x'),
+ (Token.Operator, u'|'),
+ (Token.Text, u' '),
+ (Token.Name, u'x'),
+ (Token.Operator, u'/'),
+ (Token.Literal.Number.Integer, u'2'),
+ (Token.Literal.String.Interpol, u'}'),
+ (Token.Operator, u'.'),
+ (Token.Name, u'map'),
+ (Token.Text, u' '),
+ (Token.Keyword, u'do'),
+ (Token.Text, u' '),
+ (Token.Operator, u'|'),
+ (Token.Name, u'k'),
+ (Token.Punctuation, u','),
+ (Token.Name, u'v'),
+ (Token.Operator, u'|'),
+ (Token.Text, u' '),
+ (Token.Literal.String.Double, u'"'),
+ (Token.Literal.String.Interpol, u'#{'),
+ (Token.Name, u'k'),
+ (Token.Literal.String.Interpol, u'}'),
+ (Token.Literal.String.Double, u'"'),
+ (Token.Text, u' '),
+ (Token.Keyword, u'end'),
+ (Token.Operator, u'.'),
+ (Token.Name, u'join'),
+ (Token.Text, u' '),
+ (Token.Literal.String.Interpol, u'}'),
+ (Token.Literal.String.Double, u'"'),
+ (Token.Text, u' '),
+ (Token.Operator, u'+'),
+ (Token.Text, u' '),
+ (Token.Literal.String.Double, u'"'),
+ (Token.Literal.String.Double, u'Z'),
+ (Token.Literal.String.Double, u'"'),
+ (Token.Text, u'\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_operator_methods(lexer):
+ fragment = u'x.==4\n'
+ tokens = [
+ (Token.Name, u'x'),
+ (Token.Operator, u'.'),
+ (Token.Name.Operator, u'=='),
+ (Token.Literal.Number.Integer, u'4'),
+ (Token.Text, u'\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_escaped_bracestring(lexer):
+ fragment = u'str.gsub(%r{\\\\\\\\}, "/")\n'
+ tokens = [
+ (Token.Name, u'str'),
+ (Token.Operator, u'.'),
+ (Token.Name, u'gsub'),
+ (Token.Punctuation, u'('),
+ (Token.Literal.String.Regex, u'%r{'),
+ (Token.Literal.String.Regex, u'\\\\'),
+ (Token.Literal.String.Regex, u'\\\\'),
+ (Token.Literal.String.Regex, u'}'),
+ (Token.Punctuation, u','),
+ (Token.Text, u' '),
+ (Token.Literal.String.Double, u'"'),
+ (Token.Literal.String.Double, u'/'),
+ (Token.Literal.String.Double, u'"'),
+ (Token.Punctuation, u')'),
+ (Token.Text, u'\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+
+@pytest.mark.parametrize(
+ 'method_name',
+ (
+ # Bare, un-scoped method names
+ 'a', 'A', 'z', 'Z', 'は', '\u0080', '\uffff',
+ 'aは0_', 'はA__9', '\u0080はa0_', '\uffff__99Z',
+
+ # Method names with trailing characters
+ 'aは!', 'はz?', 'はa=',
+
+ # Scoped method names
+ 'self.a', 'String.は_', 'example.AZ09_!',
+
+ # Operator overrides
+ '+', '+@', '-', '-@', '!', '!@', '~', '~@',
+ '*', '**', '/', '%', '&', '^', '`',
+ '<=>', '<', '<<', '<=', '>', '>>', '>=',
+ '==', '!=', '===', '=~', '!~',
+ '[]', '[]=',
+ )
+)
+def test_positive_method_names(lexer, method_name):
+ """Validate positive method name parsing."""
+
+ text = 'def ' + method_name
+ assert list(lexer.get_tokens(text))[-2] == (Name.Function, method_name.rpartition('.')[2])
+
+
+@pytest.mark.parametrize('method_name', ('1', '_', '<>', '<<=', '>>=', '&&', '||', '==?', '==!', '===='))
+def test_negative_method_names(lexer, method_name):
+ """Validate negative method name parsing."""
+
+ text = 'def ' + method_name
+ assert list(lexer.get_tokens(text))[-2] != (Name.Function, method_name)