summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatth?us G. Chajdas <dev@anteru.net>2019-07-20 11:37:16 +0200
committerMatth?us G. Chajdas <dev@anteru.net>2019-07-20 11:37:16 +0200
commit8b54ce8837c3982a0ca0d2edff321cda4dd088f3 (patch)
treed1027d759d6952c65c83ae7fa06034fea2888021
parent8275050f16ac7e673bdb33b753f4b604335e51fa (diff)
downloadpygments-8b54ce8837c3982a0ca0d2edff321cda4dd088f3.tar.gz
Fix #1528 -- Yaml gets confused when a comment contains a key:value pair.
-rw-r--r--CHANGES1
-rw-r--r--pygments/lexers/data.py2
-rw-r--r--tests/test_data.py19
3 files changed, 20 insertions, 2 deletions
diff --git a/CHANGES b/CHANGES
index eac1a3bb..ac233630 100644
--- a/CHANGES
+++ b/CHANGES
@@ -13,6 +13,7 @@ Version 2.4.2
- Updated lexers:
* Erlang, Elxir shells (PR#823, #1521)
+ * YAML (#1528)
- Fix encoding error when guessing lexer with given ``encoding`` option
(#1438)
diff --git a/pygments/lexers/data.py b/pygments/lexers/data.py
index b3253542..46ca7340 100644
--- a/pygments/lexers/data.py
+++ b/pygments/lexers/data.py
@@ -233,7 +233,7 @@ class YamlLexer(ExtendedRegexLexer):
# whitespaces separating tokens
(r'[ ]+', Text),
# key with colon
- (r'''([^,:?\[\]{}"'\n]+)(:)(?=[ ]|$)''',
+ (r'''([^#,:?\[\]{}"'\n]+)(:)(?=[ ]|$)''',
bygroups(Name.Tag, set_indent(Punctuation, implicit=True))),
# tags, anchors and aliases,
include('descriptors'),
diff --git a/tests/test_data.py b/tests/test_data.py
index be371419..20e74be0 100644
--- a/tests/test_data.py
+++ b/tests/test_data.py
@@ -9,7 +9,7 @@
import unittest
-from pygments.lexers import JsonLexer, JsonBareObjectLexer
+from pygments.lexers import JsonLexer, JsonBareObjectLexer, YamlLexer
from pygments.token import Token
@@ -98,3 +98,20 @@ class JsonBareObjectTest(unittest.TestCase):
]
self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))
+class YamlTest(unittest.TestCase):
+ def setUp(self):
+ self.lexer = YamlLexer()
+
+ def testColonInComment(self):
+ # Bug #1528: This previously parsed 'token # innocent' as a tag
+ fragment = u'here: token # innocent: comment\n'
+ tokens = [
+ (Token.Name.Tag, u'here'),
+ (Token.Punctuation, u':'),
+ (Token.Text, u' '),
+ (Token.Literal.Scalar.Plain, u'token'),
+ (Token.Text, u' '),
+ (Token.Comment.Single, u'# innocent: comment'),
+ (Token.Text, u'\n'),
+ ]
+ self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))