summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--pygments/lexers/_mapping.py1
-rw-r--r--pygments/lexers/text.py73
-rw-r--r--tests/examplefiles/example.ebnf193
-rw-r--r--tests/examplefiles/test.ebnf31
4 files changed, 297 insertions, 1 deletions
diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py
index b45c56cd..d491c03b 100644
--- a/pygments/lexers/_mapping.py
+++ b/pygments/lexers/_mapping.py
@@ -94,6 +94,7 @@ LEXERS = {
'DylanLidLexer': ('pygments.lexers.compiled', 'DylanLID', ('dylan-lid', 'lid'), ('*.lid', '*.hdp'), ('text/x-dylan-lid',)),
'ECLLexer': ('pygments.lexers.other', 'ECL', ('ecl',), ('*.ecl',), ('application/x-ecl',)),
'ECLexer': ('pygments.lexers.compiled', 'eC', ('ec',), ('*.ec', '*.eh'), ('text/x-echdr', 'text/x-ecsrc')),
+ 'EbnfLexer': ('pygments.lexers.text', 'EBNF', ('ebnf',), ('*.ebnf',), ('text/x-ebnf',)),
'ElixirConsoleLexer': ('pygments.lexers.functional', 'Elixir iex session', ('iex',), (), ('text/x-elixir-shellsession',)),
'ElixirLexer': ('pygments.lexers.functional', 'Elixir', ('elixir', 'ex', 'exs'), ('*.ex', '*.exs'), ('text/x-elixir',)),
'ErbLexer': ('pygments.lexers.templates', 'ERB', ('erb',), (), ('application/x-ruby-templating',)),
diff --git a/pygments/lexers/text.py b/pygments/lexers/text.py
index 5e340893..f33febaf 100644
--- a/pygments/lexers/text.py
+++ b/pygments/lexers/text.py
@@ -25,7 +25,7 @@ __all__ = ['IniLexer', 'PropertiesLexer', 'SourcesListLexer', 'BaseMakefileLexer
'RstLexer', 'VimLexer', 'GettextLexer', 'SquidConfLexer',
'DebianControlLexer', 'DarcsPatchLexer', 'YamlLexer',
'LighttpdConfLexer', 'NginxConfLexer', 'CMakeLexer', 'HttpLexer',
- 'PyPyLogLexer', 'RegeditLexer', 'HxmlLexer']
+ 'PyPyLogLexer', 'RegeditLexer', 'HxmlLexer', 'EbnfLexer']
class IniLexer(RegexLexer):
@@ -1841,3 +1841,74 @@ class HxmlLexer(RegexLexer):
(r'#.*', Comment.Single)
]
}
+
+
+class EbnfLexer(RegexLexer):
+ """
+ Lexer for ISO/IEC 14977 EBNF grammars.
+ """
+
+ name = 'EBNF'
+ aliases = ['ebnf']
+ filenames = ['*.ebnf']
+ mimetypes = ['text/x-ebnf']
+
+ """
+syn match ebnfMetaIdentifier /[A-Za-z]/ skipwhite skipempty nextgroup=ebnfSeperator
+
+syn match ebnfSeperator "=" contained nextgroup=ebnfProduction skipwhite skipempty
+
+syn region ebnfProduction start=/\zs[^\.;]/ end=/[\.;]/me=e-1 contained contains=ebnfSpecial,ebnfDelimiter,ebnfTerminal,ebnfSpecialSequence,ebnfComment nextgroup=ebnfEndProduction skipwhite skipempty
+syn match ebnfDelimiter #[,(|)\]}\[{/!]\|\(\*)\)\|\((\*\)\|\(/)\)\|\(:)\)\|\((/\)\|\((:\)# contained
+syn match ebnfSpecial /[\-\*]/ contained
+syn region ebnfSpecialSequence matchgroup=Delimiter start=/?/ end=/?/ contained
+syn match ebnfEndProduction /[\.;]/ contained
+syn region ebnfTerminal matchgroup=delimiter start=/"/ end=/"/ contained
+syn region ebnfTerminal matchgroup=delimiter start=/'/ end=/'/ contained
+syn region ebnfComment start="(\*" end="\*)"
+ """
+ tokens = {
+ 'root': [
+ include('whitespace'),
+ include('comment_start'),
+ include('identifier'),
+ (r'=', Operator, 'production'),
+ ],
+ 'production': [
+ include('whitespace'),
+ include('comment_start'),
+ include('identifier'),
+ include('strings'),
+ (r'(\?[^?]*\?)', Name.Entity),
+ (r'[\[\]{}(),|]', Punctuation),
+ (r'-', Operator),
+ (r';', Punctuation, '#pop'),
+ ],
+ 'whitespace': [
+ (r'\s+', Text),
+ ],
+ 'comment_start': [
+ (r'\(\*', Comment.Multiline, 'comment'),
+ ],
+ 'comment': [
+ (r'[^*)]', Comment.Multiline),
+ include('comment_start'),
+ (r'\*\)', Comment.Multiline, '#pop'),
+ (r'[*)]', Comment.Multiline),
+ ],
+ 'identifier': [
+ (r'([a-zA-Z][a-zA-Z0-9 \-]*)', Keyword),
+ ],
+ 'strings': [
+ (r'"', String.Double, 'dq_string'),
+ (r"'", String.Single, 'sq_string'),
+ ],
+ 'dq_string': [
+ (r'[^"]', String.Double),
+ (r'"', String.Double, '#pop'),
+ ],
+ 'sq_string': [
+ (r"[^']", String.Single),
+ (r"'", String.Single, '#pop'),
+ ],
+ }
diff --git a/tests/examplefiles/example.ebnf b/tests/examplefiles/example.ebnf
new file mode 100644
index 00000000..7a917405
--- /dev/null
+++ b/tests/examplefiles/example.ebnf
@@ -0,0 +1,193 @@
+(*
+ The syntax of Extended BNF can be defined using itself. There are four
+ parts in this example, the first part names the characters, the second
+ part defines the removal of unnecessary non- printing characters, the
+ third part defines the removal of textual comments, and the final part
+ defines the structure of Extended BNF itself.
+
+ Each syntax rule in this example starts with a comment that identifies
+ the corresponding clause in the standard.
+
+ The meaning of special-sequences is not defined in the standard. In
+ this example (see the reference to 7.6) they represent control
+ functions defined by ISO/IEC 6429:1992. Another special-sequence
+ defines a syntactic-exception (see the reference to 4.7).
+*)
+
+(*
+ The first part of the lexical syntax defines the characters in the
+ 7-bit character set (ISO/IEC 646:1991) that represent each
+ terminal-character and gap-separator in Extended BNF.
+*)
+
+(* see 7.2 *) letter
+ = 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'g' | 'h' | 'i' | 'j' | 'k' | 'l'
+ | 'm' | 'n' | 'o' | 'p' | 'q' | 'r' | 's' | 't' | 'u' | 'v' | 'w' | 'x'
+ | 'y' | 'z'
+ | 'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'G' | 'H' | 'I' | 'J' | 'K' | 'L'
+ | 'M' | 'N' | 'O' | 'P' | 'Q' | 'R' | 'S' | 'T' | 'U' | 'V' | 'W' | 'X'
+ | 'Y' | 'Z';
+(* see 7.2 *) decimal digit
+ = '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9';
+(*
+ The representation of the following terminal-characters is defined in
+ clauses 7.3, 7.4 and tables 1, 2.
+*)
+concatenate symbol = ',';
+defining symbol = '=';
+definition separator symbol = '|' | '/' | '!';
+end comment symbol = '*)';
+end group symbol = ')';
+end option symbol = ']' | '/)';
+end repeat symbol = '}' | ':)';
+except symbol = '-';
+first quote symbol = "'";
+repetition symbol = '*';
+second quote symbol = '"';
+special sequence symbol = '?';
+start comment symbol = '(*';
+start group symbol = '(';
+start option symbol = '[' | '(/';
+start repeat symbol = '{' | '(:';
+terminator symbol = ';' | '.';
+(* see 7.5 *) other character
+ = ' ' | ':' | '+' | '_' | '%' | '@' | '&' | '#' | '$' | '<' | '>'
+ | '\' | '^' | "'" | '~';
+(* see 7.6 *) space character = ' ';
+horizontal tabulation character
+ = ? ISO 6429 character Horizontal Tabulation ? ;
+new line
+ = { ? ISO 6429 character Carriage Return ? },
+ ? ISO 6429 character Line Feed ?,
+ { ? ISO 6429 character Carriage Return ? };
+vertical tabulation character
+ = ? ISO 6429 character Vertical Tabulation ? ;
+form feed
+ = ? ISO 6429 character Form Feed ? ;
+
+(*
+ The second part of the syntax defines the removal of unnecessary
+ non-printing characters from a syntax.
+*)
+
+(* see 6.2 *) terminal character
+ = letter
+ | decimal digit
+ | concatenate symbol
+ | defining symbol
+ | definition separator symbol
+ | end comment symbol
+ | end group symbol
+ | end option symbol
+ | end repeat symbol
+ | except symbol
+ | first quote symbol
+ | repetition symbol
+ | second quote symbol
+ | special sequence symbol
+ | start comment symbol
+ | start group symbol
+ | start option symbol
+ | start repeat symbol
+ | terminator symbol
+ | other character;
+(* see 6.3 *) gap free symbol
+ = terminal character - (first quote symbol | second quote symbol)
+ | terminal string;
+(* see 4.16 *) terminal string
+ = first quote symbol, first terminal character,
+ {first terminal character}, first quote symbol
+ | second quote symbol, second terminal character,
+ {second terminal character}, second quote symbol;
+(* see 4.17 *) first terminal character
+ = terminal character - first quote symbol;
+(* see 4.18 *) second terminal character
+ = terminal character - second quote symbol;
+(* see 6.4 *) gap separator
+ = space character
+ | horizontal tabulation character
+ | new line
+ | vertical tabulation character
+ | form feed;
+(* see 6.5 *) syntax
+ = {gap separator}, gap free symbol, {gap separator},
+ {gap free symbol, {gap separator}};
+
+(*
+ The third part of the syntax defines the removal of
+ bracketed-textual-comments from gap-free-symbols that form a syntax.
+*)
+
+(* see 6.6 *) commentless symbol
+ = terminal character
+ - (letter
+ | decimal digit
+ | first quote symbol
+ | second quote symbol
+ | start comment symbol
+ | end comment symbol
+ | special sequence symbol
+ | other character)
+ | meta identifier
+ | integer
+ | terminal string
+ | special sequence;
+(* see 4.9 *) integer
+ = decimal digit, {decimal digit};
+(* see 4.14 *) meta identifier
+ = letter, {meta identifier character};
+(* see 4.15 *) meta identifier character
+ = letter
+ | decimal digit;
+(* see 4.19 *) special sequence
+ = special sequence symbol,
+ {special sequence character},
+ special sequence symbol;
+(* see 4.20 *) special sequence character
+ = terminal character - special sequence symbol;
+(* see 6.7 *) comment symbol
+ = bracketed textual comment | other character | commentless symbol;
+(* see 6.8 *) bracketed textual comment
+ = start comment symbol, {comment symbol}, end comment symbol;
+(* see 6.9 *) syntax
+ = {bracketed textual comment}, commentless symbol,
+ {bracketed textual comment},
+ {commentless symbol, {bracketed textual comment}};
+
+(*
+ The final part of the syntax defines the abstract syntax of Extended
+ BNF, i.e. the structure in terms of the commentless symbols.
+*)
+
+(* see 4.2 *) syntax
+ = syntax rule, {syntax rule};
+(* see 4.3 *) syntax rule
+ = meta identifier, defining symbol,
+ definitions list, terminator symbol;
+(* see 4.4 *) definitions list
+ = single definition, {definition separator symbol, single definition};
+(* see 4.5 *) single definition
+ = syntactic term, {concatenate symbol, syntactic term};
+(* see 4.6 *) syntactic term
+ = syntactic factor, [except symbol, syntactic exception];
+(* see 4.7 *) syntactic exception
+ = ? a syntactic-factor that could be replaced by a syntactic-factor
+ containing no meta-identifiers
+ ? ;
+(* see 4.8 *) syntactic factor
+ = [integer, repetition symbol], syntactic primary;
+(* see 4.10 *) syntactic primary
+ = optional sequence
+ | repeated sequence
+ | grouped sequence
+ | meta identifier
+ | terminal string
+ | special sequence
+ | empty sequence;
+(* see 4.11 *) optional sequence
+ = start option symbol, definitions list, end option symbol;
+(* see 4.12 *) repeated sequence
+ = start repeat symbol, definitions list, end repeat symbol;
+(* see 4.13 *) grouped sequence
+ = start group symbol, definitions list, end group symbol;
+(* see 4.21 *) empty sequence = ;
diff --git a/tests/examplefiles/test.ebnf b/tests/examplefiles/test.ebnf
new file mode 100644
index 00000000..a96171b0
--- /dev/null
+++ b/tests/examplefiles/test.ebnf
@@ -0,0 +1,31 @@
+letter = "A" | "B" | "C" | "D" | "E" | "F" | "G"
+ | "H" | "I" | "J" | "K" | "L" | "M" | "N"
+ | "O" | "P" | "Q" | "R" | "S" | "T" | "U"
+ | "V" | "W" | "X" | "Y" | "Z" ;
+digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
+symbol = "[" | "]" | "{" | "}" | "(" | ")" | "<" | ">"
+ | "'" | '"' | "=" | "|" | "." | "," | ";" ;
+character = letter | digit | symbol | " " ;
+
+identifier = letter , { letter | digit | " " } ;
+terminal = "'" , character , { character } , "'"
+ | '"' , character , { character } , '"' ;
+
+special = "?" , any , "?" ;
+
+comment = (* this is a comment "" *) "(*" , any-symbol , "*)" ;
+any-symbol = ? any visible character ? ; (* ? ... ? *)
+
+lhs = identifier ;
+rhs = identifier
+ | terminal
+ | comment , rhs
+ | rhs , comment
+ | "[" , rhs , "]"
+ | "{" , rhs , "}"
+ | "(" , rhs , ")"
+ | rhs , "|" , rhs
+ | rhs , "," , rhs ;
+
+rule = lhs , "=" , rhs , ";" | comment ;
+grammar = { rule } ;