diff options
-rw-r--r-- | pygments/lexers/_mapping.py | 2 | ||||
-rw-r--r-- | pygments/lexers/grammar_notation.py | 131 | ||||
-rw-r--r-- | tests/examplefiles/abnf_example1.abnf | 22 | ||||
-rw-r--r-- | tests/examplefiles/abnf_example2.abnf | 9 | ||||
-rw-r--r-- | tests/examplefiles/bnf_example1.bnf | 15 |
5 files changed, 179 insertions, 0 deletions
diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py index 6542aa10..4bcd3460 100644 --- a/pygments/lexers/_mapping.py +++ b/pygments/lexers/_mapping.py @@ -17,6 +17,7 @@ from __future__ import print_function LEXERS = { 'ABAPLexer': ('pygments.lexers.business', 'ABAP', ('abap',), ('*.abap',), ('text/x-abap',)), + 'AbnfLexer': ('pygments.lexers.grammar_notation', 'ABNF', ('abnf',), ('*.abnf',), ('text/x-abnf',)), 'APLLexer': ('pygments.lexers.apl', 'APL', ('apl',), ('*.apl',), ()), 'ActionScript3Lexer': ('pygments.lexers.actionscript', 'ActionScript 3', ('as3', 'actionscript3'), ('*.as',), ('application/x-actionscript3', 'text/x-actionscript3', 'text/actionscript3')), 'ActionScriptLexer': ('pygments.lexers.actionscript', 'ActionScript', ('as', 'actionscript'), ('*.as',), ('application/x-actionscript', 'text/x-actionscript', 'text/actionscript')), @@ -51,6 +52,7 @@ LEXERS = { 'BefungeLexer': ('pygments.lexers.esoteric', 'Befunge', ('befunge',), ('*.befunge',), ('application/x-befunge',)), 'BlitzBasicLexer': ('pygments.lexers.basic', 'BlitzBasic', ('blitzbasic', 'b3d', 'bplus'), ('*.bb', '*.decls'), ('text/x-bb',)), 'BlitzMaxLexer': ('pygments.lexers.basic', 'BlitzMax', ('blitzmax', 'bmax'), ('*.bmx',), ('text/x-bmx',)), + 'BnfLexer': ('pygments.lexers.grammar_notation', 'BNF', ('bnf',), ('*.bnf',), ('text/x-bnf',)), 'BooLexer': ('pygments.lexers.dotnet', 'Boo', ('boo',), ('*.boo',), ('text/x-boo',)), 'BoogieLexer': ('pygments.lexers.esoteric', 'Boogie', ('boogie',), ('*.bpl',), ()), 'BrainfuckLexer': ('pygments.lexers.esoteric', 'Brainfuck', ('brainfuck', 'bf'), ('*.bf', '*.b'), ('application/x-brainfuck',)), diff --git a/pygments/lexers/grammar_notation.py b/pygments/lexers/grammar_notation.py new file mode 100644 index 00000000..460914f4 --- /dev/null +++ b/pygments/lexers/grammar_notation.py @@ -0,0 +1,131 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.grammar_notation + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Lexers for grammer notations like BNF. + + :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +from pygments.lexer import RegexLexer, bygroups, words +from pygments.token import Punctuation, Text, Comment, Operator, \ + Keyword, Name, Literal + +__all__ = ['BnfLexer', 'AbnfLexer'] + + +class BnfLexer(RegexLexer): + """ + This lexer is for grammer notations which are similar to + original BNF. + + In order to maximize a number of targets of this lexer, + let's decide some designs: + + * We don't distinguish `Terminal Symbol`. + + * We do assume that `NonTerminal Symbol` are always enclosed + with arrow brackets. + + * We do assume that `NonTerminal Symbol` may include + any printable characters except arrow brackets and ASCII 0x20. + This assumption is for `RBNF <http://www.rfc-base.org/txt/rfc-5511.txt>`_. + + * We do assume that target notation doesn't support comment. + + * We don't distinguish any operators and punctuation except + `::=`. + + Though these desision making might cause too minimal highlighting + and you might be disappointed, but it is reasonable for us. + + .. versionadded:: 2.1 + """ + + name = 'BNF' + aliases = ['bnf'] + filenames = ['*.bnf'] + mimetypes = ['text/x-bnf'] + + tokens = { + 'root': [ + (r'(<)([ -;=?-~]+)(>)', + bygroups(Punctuation, Name.Class, Punctuation)), + + # an only operator + (r'::=', Operator), + + # fallback + (r'[^<>:]+', Text), # for performance + (r'.', Text), + ], + } + + +class AbnfLexer(RegexLexer): + """ + Lexer for `IETF 7405 ABNF + <http://www.ietf.org/rfc/rfc7405.txt>`_ + (Updates `5234 <http://www.ietf.org/rfc/rfc5234.txt>`_) + grammars. + + .. versionadded:: 2.1 + """ + + name = 'ABNF' + aliases = ['abnf'] + filenames = ['*.abnf'] + mimetypes = ['text/x-abnf'] + + _core_rules = ( + 'ALPHA', 'BIT', 'CHAR', 'CR', 'CRLF', 'CTL', 'DIGIT', + 'DQUOTE', 'HEXDIG', 'HTAB', 'LF', 'LWSP', 'OCTET', + 'SP', 'VCHAR', 'WSP') + + tokens = { + 'root': [ + # comment + (r';.*$', Comment.Single), + + # quoted + # double quote itself in this state, it is as '%x22'. + (r'(%[si])?"[^"]*"', Literal), + + # binary (but i have never seen...) + (r'%b[01]+\-[01]+\b', Literal), # range + (r'%b[01]+(\.[01]+)*\b', Literal), # concat + + # decimal + (r'%d[0-9]+\-[0-9]+\b', Literal), # range + (r'%d[0-9]+(\.[0-9]+)*\b', Literal), # concat + + # hexadecimal + (r'%x[0-9a-fA-F]+\-[0-9a-fA-F]+\b', Literal), # range + (r'%x[0-9a-fA-F]+(\.[0-9a-fA-F]+)*\b', Literal), # concat + + # repetition (<a>*<b>element) including nRule + (r'\b[0-9]+\*[0-9]+', Operator), + (r'\b[0-9]+\*', Operator), + (r'\b[0-9]+', Operator), + (r'\*', Operator), + + # Strictly speaking, these are not keyword but + # are called `Core Rule'. + (words(_core_rules, suffix=r'\b'), Keyword), + + # nonterminals (ALPHA *(ALPHA / DIGIT / "-")) + (r'[a-zA-Z][a-zA-Z0-9-]+\b', Name.Class), + + # operators + (r'(=/|=|/)', Operator), + + # punctuation + (r'[\[\]()]', Punctuation), + + # fallback + (r'\s+', Text), + (r'.', Text), + ], + } diff --git a/tests/examplefiles/abnf_example1.abnf b/tests/examplefiles/abnf_example1.abnf new file mode 100644 index 00000000..5cd9cd25 --- /dev/null +++ b/tests/examplefiles/abnf_example1.abnf @@ -0,0 +1,22 @@ +; This examples from WikiPedia <https://en.wikipedia.org/wiki/Augmented_Backus%E2%80%93Naur_Form>. + + postal-address = name-part street zip-part + + name-part = *(personal-part SP) last-name [SP suffix] CRLF + name-part =/ personal-part CRLF + + personal-part = first-name / (initial ".") + first-name = *ALPHA + initial = ALPHA + last-name = *ALPHA + suffix = ("Jr." / "Sr." / 1*("I" / "V" / "X")) + + street = [apt SP] house-num SP street-name CRLF + apt = 1*4DIGIT + house-num = 1*8(DIGIT / ALPHA) + street-name = 1*VCHAR + + zip-part = town-name "," SP state 1*2SP zip-code CRLF + town-name = 1*(ALPHA / SP) + state = 2ALPHA + zip-code = 5DIGIT ["-" 4DIGIT] diff --git a/tests/examplefiles/abnf_example2.abnf b/tests/examplefiles/abnf_example2.abnf new file mode 100644 index 00000000..8781adfb --- /dev/null +++ b/tests/examplefiles/abnf_example2.abnf @@ -0,0 +1,9 @@ +crlf = %d13.10 + +command = "command string" + +char-line = %x0D.0A *(%x20-7E) %x0D.0A + +without-ws-and-ctl = %d1-8 / %d11 / %d12 / %d14-31 / %d127 + +three-blank-lines = %x0D.0A.0D.0A.0D.0A diff --git a/tests/examplefiles/bnf_example1.bnf b/tests/examplefiles/bnf_example1.bnf new file mode 100644 index 00000000..fe041a6e --- /dev/null +++ b/tests/examplefiles/bnf_example1.bnf @@ -0,0 +1,15 @@ +; This examples from WikiPedia <https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_Form>. + + <postal-address> ::= <name-part> <street-address> <zip-part> + + <name-part> ::= <personal-part> <last-name> <opt-suffix-part> <EOL> + | <personal-part> <name-part> + + <personal-part> ::= <initial> "." | <first-name> + + <street-address> ::= <house-num> <street-name> <opt-apt-num> <EOL> + + <zip-part> ::= <town-name> "," <state-code> <ZIP-code> <EOL> + + <opt-suffix-part> ::= "Sr." | "Jr." | <roman-numeral> | "" + <opt-apt-num> ::= <apt-num> | "" |