summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBryan Davis <bd808@bd808.com>2013-05-06 22:57:33 -0600
committerBryan Davis <bd808@bd808.com>2013-05-06 22:57:33 -0600
commitee21fc5d24c38f2d2ace98f87089971d6a6a8fe1 (patch)
treeab7ab9769d175a0f3a490023c802e06662fa2bcd
parent3eae99b73eba30b37c703f545d0af2cf444e45a5 (diff)
downloadpygments-ee21fc5d24c38f2d2ace98f87089971d6a6a8fe1.tar.gz
Code review changes.
- Fix indentation - Remove overly complicated string parsing rules - Add link to wikipedia article in docstring - Remove example using grammar from pre-published version of ISO spec which has questionble licensing constraints.
-rw-r--r--pygments/lexers/text.py27
-rw-r--r--tests/examplefiles/example.ebnf193
2 files changed, 9 insertions, 211 deletions
diff --git a/pygments/lexers/text.py b/pygments/lexers/text.py
index 9a28ffab..47c575c0 100644
--- a/pygments/lexers/text.py
+++ b/pygments/lexers/text.py
@@ -1845,7 +1845,9 @@ class HxmlLexer(RegexLexer):
class EbnfLexer(RegexLexer):
"""
- Lexer for ISO/IEC 14977 EBNF grammars.
+ Lexer for `ISO/IEC 14977 EBNF
+ <http://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_Form>`_
+ grammars.
"""
name = 'EBNF'
@@ -1864,7 +1866,8 @@ class EbnfLexer(RegexLexer):
include('whitespace'),
include('comment_start'),
include('identifier'),
- include('strings'),
+ (r'"[^"]*"', String.Double),
+ (r"'[^']*'", String.Single),
(r'(\?[^?]*\?)', Name.Entity),
(r'[\[\]{}(),|]', Punctuation),
(r'-', Operator),
@@ -1877,24 +1880,12 @@ class EbnfLexer(RegexLexer):
(r'\(\*', Comment.Multiline, 'comment'),
],
'comment': [
- (r'[^*)]', Comment.Multiline),
- include('comment_start'),
- (r'\*\)', Comment.Multiline, '#pop'),
- (r'[*)]', Comment.Multiline),
+ (r'[^*)]', Comment.Multiline),
+ include('comment_start'),
+ (r'\*\)', Comment.Multiline, '#pop'),
+ (r'[*)]', Comment.Multiline),
],
'identifier': [
(r'([a-zA-Z][a-zA-Z0-9 \-]*)', Keyword),
],
- 'strings': [
- (r'"', String.Double, 'dq_string'),
- (r"'", String.Single, 'sq_string'),
- ],
- 'dq_string': [
- (r'[^"]', String.Double),
- (r'"', String.Double, '#pop'),
- ],
- 'sq_string': [
- (r"[^']", String.Single),
- (r"'", String.Single, '#pop'),
- ],
}
diff --git a/tests/examplefiles/example.ebnf b/tests/examplefiles/example.ebnf
deleted file mode 100644
index 7a917405..00000000
--- a/tests/examplefiles/example.ebnf
+++ /dev/null
@@ -1,193 +0,0 @@
-(*
- The syntax of Extended BNF can be defined using itself. There are four
- parts in this example, the first part names the characters, the second
- part defines the removal of unnecessary non- printing characters, the
- third part defines the removal of textual comments, and the final part
- defines the structure of Extended BNF itself.
-
- Each syntax rule in this example starts with a comment that identifies
- the corresponding clause in the standard.
-
- The meaning of special-sequences is not defined in the standard. In
- this example (see the reference to 7.6) they represent control
- functions defined by ISO/IEC 6429:1992. Another special-sequence
- defines a syntactic-exception (see the reference to 4.7).
-*)
-
-(*
- The first part of the lexical syntax defines the characters in the
- 7-bit character set (ISO/IEC 646:1991) that represent each
- terminal-character and gap-separator in Extended BNF.
-*)
-
-(* see 7.2 *) letter
- = 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'g' | 'h' | 'i' | 'j' | 'k' | 'l'
- | 'm' | 'n' | 'o' | 'p' | 'q' | 'r' | 's' | 't' | 'u' | 'v' | 'w' | 'x'
- | 'y' | 'z'
- | 'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'G' | 'H' | 'I' | 'J' | 'K' | 'L'
- | 'M' | 'N' | 'O' | 'P' | 'Q' | 'R' | 'S' | 'T' | 'U' | 'V' | 'W' | 'X'
- | 'Y' | 'Z';
-(* see 7.2 *) decimal digit
- = '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9';
-(*
- The representation of the following terminal-characters is defined in
- clauses 7.3, 7.4 and tables 1, 2.
-*)
-concatenate symbol = ',';
-defining symbol = '=';
-definition separator symbol = '|' | '/' | '!';
-end comment symbol = '*)';
-end group symbol = ')';
-end option symbol = ']' | '/)';
-end repeat symbol = '}' | ':)';
-except symbol = '-';
-first quote symbol = "'";
-repetition symbol = '*';
-second quote symbol = '"';
-special sequence symbol = '?';
-start comment symbol = '(*';
-start group symbol = '(';
-start option symbol = '[' | '(/';
-start repeat symbol = '{' | '(:';
-terminator symbol = ';' | '.';
-(* see 7.5 *) other character
- = ' ' | ':' | '+' | '_' | '%' | '@' | '&' | '#' | '$' | '<' | '>'
- | '\' | '^' | "'" | '~';
-(* see 7.6 *) space character = ' ';
-horizontal tabulation character
- = ? ISO 6429 character Horizontal Tabulation ? ;
-new line
- = { ? ISO 6429 character Carriage Return ? },
- ? ISO 6429 character Line Feed ?,
- { ? ISO 6429 character Carriage Return ? };
-vertical tabulation character
- = ? ISO 6429 character Vertical Tabulation ? ;
-form feed
- = ? ISO 6429 character Form Feed ? ;
-
-(*
- The second part of the syntax defines the removal of unnecessary
- non-printing characters from a syntax.
-*)
-
-(* see 6.2 *) terminal character
- = letter
- | decimal digit
- | concatenate symbol
- | defining symbol
- | definition separator symbol
- | end comment symbol
- | end group symbol
- | end option symbol
- | end repeat symbol
- | except symbol
- | first quote symbol
- | repetition symbol
- | second quote symbol
- | special sequence symbol
- | start comment symbol
- | start group symbol
- | start option symbol
- | start repeat symbol
- | terminator symbol
- | other character;
-(* see 6.3 *) gap free symbol
- = terminal character - (first quote symbol | second quote symbol)
- | terminal string;
-(* see 4.16 *) terminal string
- = first quote symbol, first terminal character,
- {first terminal character}, first quote symbol
- | second quote symbol, second terminal character,
- {second terminal character}, second quote symbol;
-(* see 4.17 *) first terminal character
- = terminal character - first quote symbol;
-(* see 4.18 *) second terminal character
- = terminal character - second quote symbol;
-(* see 6.4 *) gap separator
- = space character
- | horizontal tabulation character
- | new line
- | vertical tabulation character
- | form feed;
-(* see 6.5 *) syntax
- = {gap separator}, gap free symbol, {gap separator},
- {gap free symbol, {gap separator}};
-
-(*
- The third part of the syntax defines the removal of
- bracketed-textual-comments from gap-free-symbols that form a syntax.
-*)
-
-(* see 6.6 *) commentless symbol
- = terminal character
- - (letter
- | decimal digit
- | first quote symbol
- | second quote symbol
- | start comment symbol
- | end comment symbol
- | special sequence symbol
- | other character)
- | meta identifier
- | integer
- | terminal string
- | special sequence;
-(* see 4.9 *) integer
- = decimal digit, {decimal digit};
-(* see 4.14 *) meta identifier
- = letter, {meta identifier character};
-(* see 4.15 *) meta identifier character
- = letter
- | decimal digit;
-(* see 4.19 *) special sequence
- = special sequence symbol,
- {special sequence character},
- special sequence symbol;
-(* see 4.20 *) special sequence character
- = terminal character - special sequence symbol;
-(* see 6.7 *) comment symbol
- = bracketed textual comment | other character | commentless symbol;
-(* see 6.8 *) bracketed textual comment
- = start comment symbol, {comment symbol}, end comment symbol;
-(* see 6.9 *) syntax
- = {bracketed textual comment}, commentless symbol,
- {bracketed textual comment},
- {commentless symbol, {bracketed textual comment}};
-
-(*
- The final part of the syntax defines the abstract syntax of Extended
- BNF, i.e. the structure in terms of the commentless symbols.
-*)
-
-(* see 4.2 *) syntax
- = syntax rule, {syntax rule};
-(* see 4.3 *) syntax rule
- = meta identifier, defining symbol,
- definitions list, terminator symbol;
-(* see 4.4 *) definitions list
- = single definition, {definition separator symbol, single definition};
-(* see 4.5 *) single definition
- = syntactic term, {concatenate symbol, syntactic term};
-(* see 4.6 *) syntactic term
- = syntactic factor, [except symbol, syntactic exception];
-(* see 4.7 *) syntactic exception
- = ? a syntactic-factor that could be replaced by a syntactic-factor
- containing no meta-identifiers
- ? ;
-(* see 4.8 *) syntactic factor
- = [integer, repetition symbol], syntactic primary;
-(* see 4.10 *) syntactic primary
- = optional sequence
- | repeated sequence
- | grouped sequence
- | meta identifier
- | terminal string
- | special sequence
- | empty sequence;
-(* see 4.11 *) optional sequence
- = start option symbol, definitions list, end option symbol;
-(* see 4.12 *) repeated sequence
- = start repeat symbol, definitions list, end repeat symbol;
-(* see 4.13 *) grouped sequence
- = start group symbol, definitions list, end group symbol;
-(* see 4.21 *) empty sequence = ;