diff options
author | Joshua Huber <joshua@mrhuber.com> | 2020-05-18 08:13:37 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-05-18 14:13:37 +0200 |
commit | c40bbcca8fa5a99146d6677204e1837f6d2f4aea (patch) | |
tree | b5ba4a9718c33c1fc48692afc4823764620e931a | |
parent | 65562a9624863476722dcc8c319d4579583edf8a (diff) | |
download | pygments-git-c40bbcca8fa5a99146d6677204e1837f6d2f4aea.tar.gz |
Make HttpLexer compatible with RFC 7230 (section 3.1.2). Specifically this addresses for the case where only a numeric HTTP status code is returned (eg. 200) and no textual reason phrase (eg. OK). Strictly according to RFC 7230, the whitespace just after the status code number is NOT optional, and in fact Tomcat 8.5 behaves this way, emiting status lines like "HTTP/1.1 200 \n" (note the whitespace after the 200). (#1432)
-rw-r--r-- | pygments/lexers/textfmts.py | 2 | ||||
-rw-r--r-- | tests/test_textfmts.py | 14 |
2 files changed, 15 insertions, 1 deletions
diff --git a/pygments/lexers/textfmts.py b/pygments/lexers/textfmts.py index bfd053d5..63d1da39 100644 --- a/pygments/lexers/textfmts.py +++ b/pygments/lexers/textfmts.py @@ -179,7 +179,7 @@ class HttpLexer(RegexLexer): bygroups(Name.Function, Text, Name.Namespace, Text, Keyword.Reserved, Operator, Number, Text), 'headers'), - (r'(HTTP)(/)(1\.[01]|2|3)( +)(\d{3})(?:( +)([^\r\n]+))?(\r?\n|\Z)', + (r'(HTTP)(/)(1\.[01]|2|3)( +)(\d{3})(?:( +)([^\r\n]*))?(\r?\n|\Z)', bygroups(Keyword.Reserved, Operator, Number, Text, Number, Text, Name.Exception, Text), 'headers'), diff --git a/tests/test_textfmts.py b/tests/test_textfmts.py index 6d382e8a..5a4f56c4 100644 --- a/tests/test_textfmts.py +++ b/tests/test_textfmts.py @@ -46,6 +46,20 @@ def test_http_status_line_without_reason_phrase(lexer): assert list(lexer.get_tokens(fragment)) == tokens +def test_http_status_line_without_reason_phrase_rfc_7230(lexer): + fragment = u'HTTP/1.1 200 \n' + tokens = [ + (Token.Keyword.Reserved, u'HTTP'), + (Token.Operator, u'/'), + (Token.Number, u'1.1'), + (Token.Text, u' '), + (Token.Number, u'200'), + (Token.Text, u' '), + (Token.Text, u'\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + def test_application_xml(lexer): fragment = u'GET / HTTP/1.0\nContent-Type: application/xml\n\n<foo>\n' tokens = [ |