diff options
author | Bryan Davis <bd808@bd808.com> | 2013-05-06 22:57:33 -0600 |
---|---|---|
committer | Bryan Davis <bd808@bd808.com> | 2013-05-06 22:57:33 -0600 |
commit | ee21fc5d24c38f2d2ace98f87089971d6a6a8fe1 (patch) | |
tree | ab7ab9769d175a0f3a490023c802e06662fa2bcd | |
parent | 3eae99b73eba30b37c703f545d0af2cf444e45a5 (diff) | |
download | pygments-ee21fc5d24c38f2d2ace98f87089971d6a6a8fe1.tar.gz |
Code review changes.
- Fix indentation
- Remove overly complicated string parsing rules
- Add link to wikipedia article in docstring
- Remove example using grammar from pre-published version of ISO spec which
has questionble licensing constraints.
-rw-r--r-- | pygments/lexers/text.py | 27 | ||||
-rw-r--r-- | tests/examplefiles/example.ebnf | 193 |
2 files changed, 9 insertions, 211 deletions
diff --git a/pygments/lexers/text.py b/pygments/lexers/text.py index 9a28ffab..47c575c0 100644 --- a/pygments/lexers/text.py +++ b/pygments/lexers/text.py @@ -1845,7 +1845,9 @@ class HxmlLexer(RegexLexer): class EbnfLexer(RegexLexer): """ - Lexer for ISO/IEC 14977 EBNF grammars. + Lexer for `ISO/IEC 14977 EBNF + <http://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_Form>`_ + grammars. """ name = 'EBNF' @@ -1864,7 +1866,8 @@ class EbnfLexer(RegexLexer): include('whitespace'), include('comment_start'), include('identifier'), - include('strings'), + (r'"[^"]*"', String.Double), + (r"'[^']*'", String.Single), (r'(\?[^?]*\?)', Name.Entity), (r'[\[\]{}(),|]', Punctuation), (r'-', Operator), @@ -1877,24 +1880,12 @@ class EbnfLexer(RegexLexer): (r'\(\*', Comment.Multiline, 'comment'), ], 'comment': [ - (r'[^*)]', Comment.Multiline), - include('comment_start'), - (r'\*\)', Comment.Multiline, '#pop'), - (r'[*)]', Comment.Multiline), + (r'[^*)]', Comment.Multiline), + include('comment_start'), + (r'\*\)', Comment.Multiline, '#pop'), + (r'[*)]', Comment.Multiline), ], 'identifier': [ (r'([a-zA-Z][a-zA-Z0-9 \-]*)', Keyword), ], - 'strings': [ - (r'"', String.Double, 'dq_string'), - (r"'", String.Single, 'sq_string'), - ], - 'dq_string': [ - (r'[^"]', String.Double), - (r'"', String.Double, '#pop'), - ], - 'sq_string': [ - (r"[^']", String.Single), - (r"'", String.Single, '#pop'), - ], } diff --git a/tests/examplefiles/example.ebnf b/tests/examplefiles/example.ebnf deleted file mode 100644 index 7a917405..00000000 --- a/tests/examplefiles/example.ebnf +++ /dev/null @@ -1,193 +0,0 @@ -(* - The syntax of Extended BNF can be defined using itself. There are four - parts in this example, the first part names the characters, the second - part defines the removal of unnecessary non- printing characters, the - third part defines the removal of textual comments, and the final part - defines the structure of Extended BNF itself. - - Each syntax rule in this example starts with a comment that identifies - the corresponding clause in the standard. - - The meaning of special-sequences is not defined in the standard. In - this example (see the reference to 7.6) they represent control - functions defined by ISO/IEC 6429:1992. Another special-sequence - defines a syntactic-exception (see the reference to 4.7). -*) - -(* - The first part of the lexical syntax defines the characters in the - 7-bit character set (ISO/IEC 646:1991) that represent each - terminal-character and gap-separator in Extended BNF. -*) - -(* see 7.2 *) letter - = 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'g' | 'h' | 'i' | 'j' | 'k' | 'l' - | 'm' | 'n' | 'o' | 'p' | 'q' | 'r' | 's' | 't' | 'u' | 'v' | 'w' | 'x' - | 'y' | 'z' - | 'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'G' | 'H' | 'I' | 'J' | 'K' | 'L' - | 'M' | 'N' | 'O' | 'P' | 'Q' | 'R' | 'S' | 'T' | 'U' | 'V' | 'W' | 'X' - | 'Y' | 'Z'; -(* see 7.2 *) decimal digit - = '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9'; -(* - The representation of the following terminal-characters is defined in - clauses 7.3, 7.4 and tables 1, 2. -*) -concatenate symbol = ','; -defining symbol = '='; -definition separator symbol = '|' | '/' | '!'; -end comment symbol = '*)'; -end group symbol = ')'; -end option symbol = ']' | '/)'; -end repeat symbol = '}' | ':)'; -except symbol = '-'; -first quote symbol = "'"; -repetition symbol = '*'; -second quote symbol = '"'; -special sequence symbol = '?'; -start comment symbol = '(*'; -start group symbol = '('; -start option symbol = '[' | '(/'; -start repeat symbol = '{' | '(:'; -terminator symbol = ';' | '.'; -(* see 7.5 *) other character - = ' ' | ':' | '+' | '_' | '%' | '@' | '&' | '#' | '$' | '<' | '>' - | '\' | '^' | "'" | '~'; -(* see 7.6 *) space character = ' '; -horizontal tabulation character - = ? ISO 6429 character Horizontal Tabulation ? ; -new line - = { ? ISO 6429 character Carriage Return ? }, - ? ISO 6429 character Line Feed ?, - { ? ISO 6429 character Carriage Return ? }; -vertical tabulation character - = ? ISO 6429 character Vertical Tabulation ? ; -form feed - = ? ISO 6429 character Form Feed ? ; - -(* - The second part of the syntax defines the removal of unnecessary - non-printing characters from a syntax. -*) - -(* see 6.2 *) terminal character - = letter - | decimal digit - | concatenate symbol - | defining symbol - | definition separator symbol - | end comment symbol - | end group symbol - | end option symbol - | end repeat symbol - | except symbol - | first quote symbol - | repetition symbol - | second quote symbol - | special sequence symbol - | start comment symbol - | start group symbol - | start option symbol - | start repeat symbol - | terminator symbol - | other character; -(* see 6.3 *) gap free symbol - = terminal character - (first quote symbol | second quote symbol) - | terminal string; -(* see 4.16 *) terminal string - = first quote symbol, first terminal character, - {first terminal character}, first quote symbol - | second quote symbol, second terminal character, - {second terminal character}, second quote symbol; -(* see 4.17 *) first terminal character - = terminal character - first quote symbol; -(* see 4.18 *) second terminal character - = terminal character - second quote symbol; -(* see 6.4 *) gap separator - = space character - | horizontal tabulation character - | new line - | vertical tabulation character - | form feed; -(* see 6.5 *) syntax - = {gap separator}, gap free symbol, {gap separator}, - {gap free symbol, {gap separator}}; - -(* - The third part of the syntax defines the removal of - bracketed-textual-comments from gap-free-symbols that form a syntax. -*) - -(* see 6.6 *) commentless symbol - = terminal character - - (letter - | decimal digit - | first quote symbol - | second quote symbol - | start comment symbol - | end comment symbol - | special sequence symbol - | other character) - | meta identifier - | integer - | terminal string - | special sequence; -(* see 4.9 *) integer - = decimal digit, {decimal digit}; -(* see 4.14 *) meta identifier - = letter, {meta identifier character}; -(* see 4.15 *) meta identifier character - = letter - | decimal digit; -(* see 4.19 *) special sequence - = special sequence symbol, - {special sequence character}, - special sequence symbol; -(* see 4.20 *) special sequence character - = terminal character - special sequence symbol; -(* see 6.7 *) comment symbol - = bracketed textual comment | other character | commentless symbol; -(* see 6.8 *) bracketed textual comment - = start comment symbol, {comment symbol}, end comment symbol; -(* see 6.9 *) syntax - = {bracketed textual comment}, commentless symbol, - {bracketed textual comment}, - {commentless symbol, {bracketed textual comment}}; - -(* - The final part of the syntax defines the abstract syntax of Extended - BNF, i.e. the structure in terms of the commentless symbols. -*) - -(* see 4.2 *) syntax - = syntax rule, {syntax rule}; -(* see 4.3 *) syntax rule - = meta identifier, defining symbol, - definitions list, terminator symbol; -(* see 4.4 *) definitions list - = single definition, {definition separator symbol, single definition}; -(* see 4.5 *) single definition - = syntactic term, {concatenate symbol, syntactic term}; -(* see 4.6 *) syntactic term - = syntactic factor, [except symbol, syntactic exception]; -(* see 4.7 *) syntactic exception - = ? a syntactic-factor that could be replaced by a syntactic-factor - containing no meta-identifiers - ? ; -(* see 4.8 *) syntactic factor - = [integer, repetition symbol], syntactic primary; -(* see 4.10 *) syntactic primary - = optional sequence - | repeated sequence - | grouped sequence - | meta identifier - | terminal string - | special sequence - | empty sequence; -(* see 4.11 *) optional sequence - = start option symbol, definitions list, end option symbol; -(* see 4.12 *) repeated sequence - = start repeat symbol, definitions list, end repeat symbol; -(* see 4.13 *) grouped sequence - = start group symbol, definitions list, end group symbol; -(* see 4.21 *) empty sequence = ; |