summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoshua Huber <joshua@mrhuber.com>2020-05-18 08:13:37 -0400
committerGitHub <noreply@github.com>2020-05-18 14:13:37 +0200
commitc40bbcca8fa5a99146d6677204e1837f6d2f4aea (patch)
treeb5ba4a9718c33c1fc48692afc4823764620e931a
parent65562a9624863476722dcc8c319d4579583edf8a (diff)
downloadpygments-git-c40bbcca8fa5a99146d6677204e1837f6d2f4aea.tar.gz
Make HttpLexer compatible with RFC 7230 (section 3.1.2). Specifically this addresses for the case where only a numeric HTTP status code is returned (eg. 200) and no textual reason phrase (eg. OK). Strictly according to RFC 7230, the whitespace just after the status code number is NOT optional, and in fact Tomcat 8.5 behaves this way, emiting status lines like "HTTP/1.1 200 \n" (note the whitespace after the 200). (#1432)
-rw-r--r--pygments/lexers/textfmts.py2
-rw-r--r--tests/test_textfmts.py14
2 files changed, 15 insertions, 1 deletions
diff --git a/pygments/lexers/textfmts.py b/pygments/lexers/textfmts.py
index bfd053d5..63d1da39 100644
--- a/pygments/lexers/textfmts.py
+++ b/pygments/lexers/textfmts.py
@@ -179,7 +179,7 @@ class HttpLexer(RegexLexer):
bygroups(Name.Function, Text, Name.Namespace, Text,
Keyword.Reserved, Operator, Number, Text),
'headers'),
- (r'(HTTP)(/)(1\.[01]|2|3)( +)(\d{3})(?:( +)([^\r\n]+))?(\r?\n|\Z)',
+ (r'(HTTP)(/)(1\.[01]|2|3)( +)(\d{3})(?:( +)([^\r\n]*))?(\r?\n|\Z)',
bygroups(Keyword.Reserved, Operator, Number, Text, Number, Text,
Name.Exception, Text),
'headers'),
diff --git a/tests/test_textfmts.py b/tests/test_textfmts.py
index 6d382e8a..5a4f56c4 100644
--- a/tests/test_textfmts.py
+++ b/tests/test_textfmts.py
@@ -46,6 +46,20 @@ def test_http_status_line_without_reason_phrase(lexer):
assert list(lexer.get_tokens(fragment)) == tokens
+def test_http_status_line_without_reason_phrase_rfc_7230(lexer):
+ fragment = u'HTTP/1.1 200 \n'
+ tokens = [
+ (Token.Keyword.Reserved, u'HTTP'),
+ (Token.Operator, u'/'),
+ (Token.Number, u'1.1'),
+ (Token.Text, u' '),
+ (Token.Number, u'200'),
+ (Token.Text, u' '),
+ (Token.Text, u'\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+
def test_application_xml(lexer):
fragment = u'GET / HTTP/1.0\nContent-Type: application/xml\n\n<foo>\n'
tokens = [