diff options
Diffstat (limited to 'pygments/lexers/data.py')
-rw-r--r-- | pygments/lexers/data.py | 87 |
1 files changed, 75 insertions, 12 deletions
diff --git a/pygments/lexers/data.py b/pygments/lexers/data.py index ab7c0297..56cfc56d 100644 --- a/pygments/lexers/data.py +++ b/pygments/lexers/data.py @@ -10,8 +10,8 @@ from pygments.lexer import Lexer, ExtendedRegexLexer, LexerContext, \ include, bygroups -from pygments.token import Text, Comment, Keyword, Name, String, Number, \ - Punctuation, Literal, Error, Whitespace +from pygments.token import Comment, Error, Keyword, Literal, Name, Number, \ + Punctuation, String, Whitespace __all__ = ['YamlLexer', 'JsonLexer', 'JsonBareObjectLexer', 'JsonLdLexer'] @@ -437,6 +437,12 @@ class JsonLexer(Lexer): """ For JSON data structures. + Javascript-style comments are supported (like ``/* */`` and ``//``), + though comments are not part of the JSON specification. + This allows users to highlight JSON as it is used in the wild. + + No validation is performed on the input JSON document. + .. versionadded:: 1.5 """ @@ -471,6 +477,10 @@ class JsonLexer(Lexer): in_number = False in_float = False in_punctuation = False + in_comment_single = False + in_comment_multiline = False + expecting_second_comment_opener = False # // or /* + expecting_second_comment_closer = False # */ start = 0 @@ -564,6 +574,49 @@ class JsonLexer(Lexer): in_punctuation = False # Fall through so the new character can be evaluated. + elif in_comment_single: + if character != '\n': + continue + + if queue: + queue.append((start, Comment.Single, text[start:stop])) + else: + yield start, Comment.Single, text[start:stop] + + in_comment_single = False + # Fall through so the new character can be evaluated. + + elif in_comment_multiline: + if character == '*': + expecting_second_comment_closer = True + elif expecting_second_comment_closer: + expecting_second_comment_closer = False + if character == '/': + if queue: + queue.append((start, Comment.Multiline, text[start:stop + 1])) + else: + yield start, Comment.Multiline, text[start:stop + 1] + + in_comment_multiline = False + + continue + + elif expecting_second_comment_opener: + expecting_second_comment_opener = False + if character == '/': + in_comment_single = True + continue + elif character == '*': + in_comment_multiline = True + continue + + # Exhaust the queue. Accept the existing token types. + yield from queue + queue.clear() + + yield start, Error, text[start:stop] + # Fall through so the new character can be evaluated. + start = stop if character == '"': @@ -589,18 +642,18 @@ class JsonLexer(Lexer): elif character == ':': # Yield from the queue. Replace string token types. for _start, _token, _text in queue: - # There can be only two types of tokens before a ':': - # Whitespace, or a quoted string. If it's a quoted string - # we emit Name.Tag, otherwise, we yield the whitespace - # tokens. In all other cases this is invalid JSON. This - # allows for things like '"foo" "bar": "baz"' but we're not - # a validating JSON lexer so it's acceptable - if _token is Whitespace: - yield _start, _token, _text - elif _token is String.Double: + # There can be only three types of tokens before a ':': + # Whitespace, Comment, or a quoted string. + # + # If it's a quoted string we emit Name.Tag. + # Otherwise, we yield the original token. + # + # In all other cases this would be invalid JSON, + # but this is not a validating JSON lexer, so it's OK. + if _token is String.Double: yield _start, Name.Tag, _text else: - yield _start, Error, _text + yield _start, _token, _text queue.clear() in_punctuation = True @@ -612,6 +665,10 @@ class JsonLexer(Lexer): in_punctuation = True + elif character == '/': + # This is the beginning of a comment. + expecting_second_comment_opener = True + else: # Exhaust the queue. Accept the existing token types. yield from queue @@ -633,6 +690,12 @@ class JsonLexer(Lexer): yield start, Whitespace, text[start:] elif in_punctuation: yield start, Punctuation, text[start:] + elif in_comment_single: + yield start, Comment.Single, text[start:] + elif in_comment_multiline: + yield start, Error, text[start:] + elif expecting_second_comment_opener: + yield start, Error, text[start:] class JsonBareObjectLexer(JsonLexer): |