4 files changed, 297 insertions, 1 deletions
diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py
index b45c56cd..d491c03b 100644
--- a/pygments/lexers/_mapping.py
+++ b/pygments/lexers/_mapping.py
@@ -94,6 +94,7 @@ LEXERS = {
     'DylanLidLexer': ('pygments.lexers.compiled', 'DylanLID', ('dylan-lid', 'lid'), ('*.lid', '*.hdp'), ('text/x-dylan-lid',)),
     'ECLLexer': ('pygments.lexers.other', 'ECL', ('ecl',), ('*.ecl',), ('application/x-ecl',)),
     'ECLexer': ('pygments.lexers.compiled', 'eC', ('ec',), ('*.ec', '*.eh'), ('text/x-echdr', 'text/x-ecsrc')),
+    'EbnfLexer': ('pygments.lexers.text', 'EBNF', ('ebnf',), ('*.ebnf',), ('text/x-ebnf',)),
     'ElixirConsoleLexer': ('pygments.lexers.functional', 'Elixir iex session', ('iex',), (), ('text/x-elixir-shellsession',)),
     'ElixirLexer': ('pygments.lexers.functional', 'Elixir', ('elixir', 'ex', 'exs'), ('*.ex', '*.exs'), ('text/x-elixir',)),
     'ErbLexer': ('pygments.lexers.templates', 'ERB', ('erb',), (), ('application/x-ruby-templating',)),
diff --git a/pygments/lexers/text.py b/pygments/lexers/text.py
index 5e340893..f33febaf 100644
--- a/pygments/lexers/text.py
+++ b/pygments/lexers/text.py
@@ -25,7 +25,7 @@ __all__ = ['IniLexer', 'PropertiesLexer', 'SourcesListLexer', 'BaseMakefileLexer
            'RstLexer', 'VimLexer', 'GettextLexer', 'SquidConfLexer',
            'DebianControlLexer', 'DarcsPatchLexer', 'YamlLexer',
            'LighttpdConfLexer', 'NginxConfLexer', 'CMakeLexer', 'HttpLexer',
-           'PyPyLogLexer', 'RegeditLexer', 'HxmlLexer']
+           'PyPyLogLexer', 'RegeditLexer', 'HxmlLexer', 'EbnfLexer']
 
 
 class IniLexer(RegexLexer):
@@ -1841,3 +1841,74 @@ class HxmlLexer(RegexLexer):
             (r'#.*', Comment.Single)
         ]
     }
+
+
+class EbnfLexer(RegexLexer):
+    """
+    Lexer for ISO/IEC 14977 EBNF grammars.
+    """
+
+    name = 'EBNF'
+    aliases = ['ebnf']
+    filenames = ['*.ebnf']
+    mimetypes = ['text/x-ebnf']
+
+    """
+syn match ebnfMetaIdentifier /[A-Za-z]/ skipwhite skipempty nextgroup=ebnfSeperator
+
+syn match ebnfSeperator "=" contained nextgroup=ebnfProduction skipwhite skipempty
+
+syn region ebnfProduction start=/\zs[^\.;]/ end=/[\.;]/me=e-1 contained contains=ebnfSpecial,ebnfDelimiter,ebnfTerminal,ebnfSpecialSequence,ebnfComment nextgroup=ebnfEndProduction skipwhite skipempty
+syn match ebnfDelimiter #[,(|)\]}\[{/!]\|\(\*)\)\|\((\*\)\|\(/)\)\|\(:)\)\|\((/\)\|\((:\)# contained
+syn match ebnfSpecial /[\-\*]/ contained
+syn region ebnfSpecialSequence matchgroup=Delimiter start=/?/ end=/?/ contained
+syn match ebnfEndProduction /[\.;]/ contained
+syn region ebnfTerminal matchgroup=delimiter start=/"/ end=/"/ contained
+syn region ebnfTerminal matchgroup=delimiter start=/'/ end=/'/ contained
+syn region ebnfComment start="(\*" end="\*)"
+    """
+    tokens = {
+        'root': [
+            include('whitespace'),
+            include('comment_start'),
+            include('identifier'),
+            (r'=', Operator, 'production'),
+        ],
+        'production': [
+            include('whitespace'),
+            include('comment_start'),
+            include('identifier'),
+            include('strings'),
+            (r'(\?[^?]*\?)', Name.Entity),
+            (r'[\[\]{}(),|]', Punctuation),
+            (r'-', Operator),
+            (r';', Punctuation, '#pop'),
+        ],
+        'whitespace': [
+            (r'\s+', Text),
+          ],
+        'comment_start': [
+            (r'\(\*', Comment.Multiline, 'comment'),
+          ],
+        'comment': [
+          (r'[^*)]', Comment.Multiline),
+          include('comment_start'),
+          (r'\*\)', Comment.Multiline, '#pop'),
+          (r'[*)]', Comment.Multiline),
+          ],
+        'identifier': [
+            (r'([a-zA-Z][a-zA-Z0-9 \-]*)', Keyword),
+          ],
+        'strings': [
+          (r'"', String.Double, 'dq_string'),
+          (r"'", String.Single, 'sq_string'),
+          ],
+        'dq_string': [
+          (r'[^"]', String.Double),
+          (r'"', String.Double, '#pop'),
+          ],
+        'sq_string': [
+          (r"[^']", String.Single),
+          (r"'", String.Single, '#pop'),
+          ],
+    }
diff --git a/tests/examplefiles/example.ebnf b/tests/examplefiles/example.ebnf
new file mode 100644
index 00000000..7a917405
--- /dev/null
+++ b/tests/examplefiles/example.ebnf
@@ -0,0 +1,193 @@
+(*
+  The syntax of Extended BNF can be defined using itself. There are four
+  parts in this example, the first part names the characters, the second
+  part defines the removal of unnecessary non- printing characters, the
+  third part defines the removal of textual comments, and the final part
+  defines the structure of Extended BNF itself.
+
+  Each syntax rule in this example starts with a comment that identifies
+  the corresponding clause in the standard.
+
+  The meaning of special-sequences is not defined in the standard. In
+  this example (see the reference to 7.6) they represent control
+  functions defined by ISO/IEC 6429:1992. Another special-sequence
+  defines a syntactic-exception (see the reference to 4.7).
+*)
+
+(*
+  The first part of the lexical syntax defines the characters in the
+  7-bit character set (ISO/IEC 646:1991) that represent each
+  terminal-character and gap-separator in Extended BNF.
+*)
+
+(* see 7.2 *) letter
+  = 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'g' | 'h' | 'i' | 'j' | 'k' | 'l'
+  | 'm' | 'n' | 'o' | 'p' | 'q' | 'r' | 's' | 't' | 'u' | 'v' | 'w' | 'x'
+  | 'y' | 'z'
+  | 'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'G' | 'H' | 'I' | 'J' | 'K' | 'L'
+  | 'M' | 'N' | 'O' | 'P' | 'Q' | 'R' | 'S' | 'T' | 'U' | 'V' | 'W' | 'X'
+  | 'Y' | 'Z';
+(* see 7.2 *) decimal digit
+  = '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9';
+(*
+  The representation of the following terminal-characters is defined in
+  clauses 7.3, 7.4 and tables 1, 2.
+*)
+concatenate symbol = ',';
+defining symbol = '=';
+definition separator symbol = '|' | '/' | '!';
+end comment symbol = '*)';
+end group symbol = ')';
+end option symbol = ']' | '/)';
+end repeat symbol = '}' | ':)';
+except symbol = '-';
+first quote symbol = "'";
+repetition symbol = '*';
+second quote symbol = '"';
+special sequence symbol = '?';
+start comment symbol = '(*';
+start group symbol = '(';
+start option symbol = '[' | '(/';
+start repeat symbol = '{' | '(:';
+terminator symbol = ';' | '.';
+(* see 7.5 *) other character
+  = ' ' | ':' | '+' | '_' | '%' | '@' | '&' | '#' | '$' | '<' | '>'
+  | '\' | '^' | "'" | '~';
+(* see 7.6 *) space character = ' ';
+horizontal tabulation character
+  = ? ISO 6429 character Horizontal Tabulation ? ;
+new line
+  = { ? ISO 6429 character Carriage Return ? },
+  ? ISO 6429 character Line Feed ?,
+  { ? ISO 6429 character Carriage Return ? };
+vertical tabulation character
+  = ? ISO 6429 character Vertical Tabulation ? ;
+form feed
+  = ? ISO 6429 character Form Feed ? ;
+
+(*
+  The second part of the syntax defines the removal of unnecessary
+  non-printing characters from a syntax.
+*)
+
+(* see 6.2 *) terminal character
+  = letter
+  | decimal digit
+  | concatenate symbol
+  | defining symbol
+  | definition separator symbol
+  | end comment symbol
+  | end group symbol
+  | end option symbol
+  | end repeat symbol
+  | except symbol
+  | first quote symbol
+  | repetition symbol
+  | second quote symbol
+  | special sequence symbol
+  | start comment symbol
+  | start group symbol
+  | start option symbol
+  | start repeat symbol
+  | terminator symbol
+  | other character;
+(* see 6.3 *) gap free symbol
+  = terminal character - (first quote symbol | second quote symbol)
+  | terminal string;
+(* see 4.16 *) terminal string
+  = first quote symbol, first terminal character,
+    {first terminal character}, first quote symbol
+  | second quote symbol, second terminal character,
+    {second terminal character}, second quote symbol;
+(* see 4.17 *) first terminal character
+  = terminal character - first quote symbol;
+(* see 4.18 *) second terminal character
+  = terminal character - second quote symbol;
+(* see 6.4 *) gap separator
+  = space character
+  | horizontal tabulation character
+  | new line
+  | vertical tabulation character
+  | form feed;
+(* see 6.5 *) syntax
+  = {gap separator}, gap free symbol, {gap separator},
+    {gap free symbol, {gap separator}};
+
+(*
+  The third part of the syntax defines the removal of
+  bracketed-textual-comments from gap-free-symbols that form a syntax.
+*)
+
+(* see 6.6 *) commentless symbol
+  = terminal character
+    - (letter
+      | decimal digit
+      | first quote symbol
+      | second quote symbol
+      | start comment symbol
+      | end comment symbol
+      | special sequence symbol
+      | other character)
+  | meta identifier
+  | integer
+  | terminal string
+  | special sequence;
+(* see 4.9 *) integer
+  = decimal digit, {decimal digit};
+(* see 4.14 *) meta identifier
+  = letter, {meta identifier character};
+(* see 4.15 *) meta identifier character
+  = letter
+  | decimal digit;
+(* see 4.19 *) special sequence
+  = special sequence symbol,
+    {special sequence character},
+    special sequence symbol;
+(* see 4.20 *) special sequence character
+  = terminal character - special sequence symbol;
+(* see 6.7 *) comment symbol
+  = bracketed textual comment | other character | commentless symbol;
+(* see 6.8 *) bracketed textual comment
+  = start comment symbol, {comment symbol}, end comment symbol;
+(* see 6.9 *) syntax
+  = {bracketed textual comment}, commentless symbol,
+    {bracketed textual comment},
+    {commentless symbol, {bracketed textual comment}};
+
+(*
+  The final part of the syntax defines the abstract syntax of Extended
+  BNF, i.e. the structure in terms of the commentless symbols.
+*)
+
+(* see 4.2 *) syntax
+  = syntax rule, {syntax rule};
+(* see 4.3 *) syntax rule
+  = meta identifier, defining symbol,
+    definitions list, terminator symbol;
+(* see 4.4 *) definitions list
+  = single definition, {definition separator symbol, single definition};
+(* see 4.5 *) single definition
+  = syntactic term, {concatenate symbol, syntactic term};
+(* see 4.6 *) syntactic term
+  = syntactic factor, [except symbol, syntactic exception];
+(* see 4.7 *) syntactic exception
+  = ? a syntactic-factor that could be replaced by a syntactic-factor
+      containing no meta-identifiers
+    ? ;
+(* see 4.8 *) syntactic factor
+  = [integer, repetition symbol], syntactic primary;
+(* see 4.10 *) syntactic primary
+  = optional sequence
+  | repeated sequence
+  | grouped sequence
+  | meta identifier
+  | terminal string
+  | special sequence
+  | empty sequence;
+(* see 4.11 *) optional sequence
+  = start option symbol, definitions list, end option symbol;
+(* see 4.12 *) repeated sequence
+  = start repeat symbol, definitions list, end repeat symbol;
+(* see 4.13 *) grouped sequence
+  = start group symbol, definitions list, end group symbol;
+(* see 4.21 *) empty sequence = ;
diff --git a/tests/examplefiles/test.ebnf b/tests/examplefiles/test.ebnf
new file mode 100644
index 00000000..a96171b0
--- /dev/null
+++ b/tests/examplefiles/test.ebnf
@@ -0,0 +1,31 @@
+letter = "A" | "B" | "C" | "D" | "E" | "F" | "G"
+       | "H" | "I" | "J" | "K" | "L" | "M" | "N"
+       | "O" | "P" | "Q" | "R" | "S" | "T" | "U"
+       | "V" | "W" | "X" | "Y" | "Z" ;
+digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
+symbol = "[" | "]" | "{" | "}" | "(" | ")" | "<" | ">"
+       | "'" | '"' | "=" | "|" | "." | "," | ";" ;
+character = letter | digit | symbol | " " ;
+
+identifier = letter , { letter | digit | " " } ;
+terminal = "'" , character , { character } , "'"
+         | '"' , character , { character } , '"' ;
+
+special = "?" , any , "?" ;
+
+comment = (* this is a comment "" *) "(*" , any-symbol , "*)" ;
+any-symbol = ? any visible character ? ; (* ? ... ? *)
+
+lhs = identifier ;
+rhs = identifier
+     | terminal
+     | comment , rhs
+     | rhs , comment
+     | "[" , rhs , "]"
+     | "{" , rhs , "}"
+     | "(" , rhs , ")"
+     | rhs , "|" , rhs
+     | rhs , "," , rhs ;
+
+rule = lhs , "=" , rhs , ";" | comment ;
+grammar = { rule } ;