diff options
author | Bryan Davis <bd808@bd808.com> | 2013-04-25 22:54:55 -0600 |
---|---|---|
committer | Bryan Davis <bd808@bd808.com> | 2013-04-25 22:54:55 -0600 |
commit | 4641268f11100113420b27d0fd7465c9c43e814b (patch) | |
tree | 88e169210f9c2a4a4ef7c0cb25b74256fef7f902 /tests/examplefiles | |
parent | 690efaf0304427ab5ab023c67b27459beccc4b15 (diff) | |
download | pygments-4641268f11100113420b27d0fd7465c9c43e814b.tar.gz |
Lexer for ISO/IEC 14977 EBNF grammars.
A regex based lexer for EBNF as defined in
http://www.cl.cam.ac.uk/~mgk25/iso-14977.pdf
Related to issue #805.
Diffstat (limited to 'tests/examplefiles')
-rw-r--r-- | tests/examplefiles/example.ebnf | 193 | ||||
-rw-r--r-- | tests/examplefiles/test.ebnf | 31 |
2 files changed, 224 insertions, 0 deletions
diff --git a/tests/examplefiles/example.ebnf b/tests/examplefiles/example.ebnf new file mode 100644 index 00000000..7a917405 --- /dev/null +++ b/tests/examplefiles/example.ebnf @@ -0,0 +1,193 @@ +(* + The syntax of Extended BNF can be defined using itself. There are four + parts in this example, the first part names the characters, the second + part defines the removal of unnecessary non- printing characters, the + third part defines the removal of textual comments, and the final part + defines the structure of Extended BNF itself. + + Each syntax rule in this example starts with a comment that identifies + the corresponding clause in the standard. + + The meaning of special-sequences is not defined in the standard. In + this example (see the reference to 7.6) they represent control + functions defined by ISO/IEC 6429:1992. Another special-sequence + defines a syntactic-exception (see the reference to 4.7). +*) + +(* + The first part of the lexical syntax defines the characters in the + 7-bit character set (ISO/IEC 646:1991) that represent each + terminal-character and gap-separator in Extended BNF. +*) + +(* see 7.2 *) letter + = 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'g' | 'h' | 'i' | 'j' | 'k' | 'l' + | 'm' | 'n' | 'o' | 'p' | 'q' | 'r' | 's' | 't' | 'u' | 'v' | 'w' | 'x' + | 'y' | 'z' + | 'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'G' | 'H' | 'I' | 'J' | 'K' | 'L' + | 'M' | 'N' | 'O' | 'P' | 'Q' | 'R' | 'S' | 'T' | 'U' | 'V' | 'W' | 'X' + | 'Y' | 'Z'; +(* see 7.2 *) decimal digit + = '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9'; +(* + The representation of the following terminal-characters is defined in + clauses 7.3, 7.4 and tables 1, 2. +*) +concatenate symbol = ','; +defining symbol = '='; +definition separator symbol = '|' | '/' | '!'; +end comment symbol = '*)'; +end group symbol = ')'; +end option symbol = ']' | '/)'; +end repeat symbol = '}' | ':)'; +except symbol = '-'; +first quote symbol = "'"; +repetition symbol = '*'; +second quote symbol = '"'; +special sequence symbol = '?'; +start comment symbol = '(*'; +start group symbol = '('; +start option symbol = '[' | '(/'; +start repeat symbol = '{' | '(:'; +terminator symbol = ';' | '.'; +(* see 7.5 *) other character + = ' ' | ':' | '+' | '_' | '%' | '@' | '&' | '#' | '$' | '<' | '>' + | '\' | '^' | "'" | '~'; +(* see 7.6 *) space character = ' '; +horizontal tabulation character + = ? ISO 6429 character Horizontal Tabulation ? ; +new line + = { ? ISO 6429 character Carriage Return ? }, + ? ISO 6429 character Line Feed ?, + { ? ISO 6429 character Carriage Return ? }; +vertical tabulation character + = ? ISO 6429 character Vertical Tabulation ? ; +form feed + = ? ISO 6429 character Form Feed ? ; + +(* + The second part of the syntax defines the removal of unnecessary + non-printing characters from a syntax. +*) + +(* see 6.2 *) terminal character + = letter + | decimal digit + | concatenate symbol + | defining symbol + | definition separator symbol + | end comment symbol + | end group symbol + | end option symbol + | end repeat symbol + | except symbol + | first quote symbol + | repetition symbol + | second quote symbol + | special sequence symbol + | start comment symbol + | start group symbol + | start option symbol + | start repeat symbol + | terminator symbol + | other character; +(* see 6.3 *) gap free symbol + = terminal character - (first quote symbol | second quote symbol) + | terminal string; +(* see 4.16 *) terminal string + = first quote symbol, first terminal character, + {first terminal character}, first quote symbol + | second quote symbol, second terminal character, + {second terminal character}, second quote symbol; +(* see 4.17 *) first terminal character + = terminal character - first quote symbol; +(* see 4.18 *) second terminal character + = terminal character - second quote symbol; +(* see 6.4 *) gap separator + = space character + | horizontal tabulation character + | new line + | vertical tabulation character + | form feed; +(* see 6.5 *) syntax + = {gap separator}, gap free symbol, {gap separator}, + {gap free symbol, {gap separator}}; + +(* + The third part of the syntax defines the removal of + bracketed-textual-comments from gap-free-symbols that form a syntax. +*) + +(* see 6.6 *) commentless symbol + = terminal character + - (letter + | decimal digit + | first quote symbol + | second quote symbol + | start comment symbol + | end comment symbol + | special sequence symbol + | other character) + | meta identifier + | integer + | terminal string + | special sequence; +(* see 4.9 *) integer + = decimal digit, {decimal digit}; +(* see 4.14 *) meta identifier + = letter, {meta identifier character}; +(* see 4.15 *) meta identifier character + = letter + | decimal digit; +(* see 4.19 *) special sequence + = special sequence symbol, + {special sequence character}, + special sequence symbol; +(* see 4.20 *) special sequence character + = terminal character - special sequence symbol; +(* see 6.7 *) comment symbol + = bracketed textual comment | other character | commentless symbol; +(* see 6.8 *) bracketed textual comment + = start comment symbol, {comment symbol}, end comment symbol; +(* see 6.9 *) syntax + = {bracketed textual comment}, commentless symbol, + {bracketed textual comment}, + {commentless symbol, {bracketed textual comment}}; + +(* + The final part of the syntax defines the abstract syntax of Extended + BNF, i.e. the structure in terms of the commentless symbols. +*) + +(* see 4.2 *) syntax + = syntax rule, {syntax rule}; +(* see 4.3 *) syntax rule + = meta identifier, defining symbol, + definitions list, terminator symbol; +(* see 4.4 *) definitions list + = single definition, {definition separator symbol, single definition}; +(* see 4.5 *) single definition + = syntactic term, {concatenate symbol, syntactic term}; +(* see 4.6 *) syntactic term + = syntactic factor, [except symbol, syntactic exception]; +(* see 4.7 *) syntactic exception + = ? a syntactic-factor that could be replaced by a syntactic-factor + containing no meta-identifiers + ? ; +(* see 4.8 *) syntactic factor + = [integer, repetition symbol], syntactic primary; +(* see 4.10 *) syntactic primary + = optional sequence + | repeated sequence + | grouped sequence + | meta identifier + | terminal string + | special sequence + | empty sequence; +(* see 4.11 *) optional sequence + = start option symbol, definitions list, end option symbol; +(* see 4.12 *) repeated sequence + = start repeat symbol, definitions list, end repeat symbol; +(* see 4.13 *) grouped sequence + = start group symbol, definitions list, end group symbol; +(* see 4.21 *) empty sequence = ; diff --git a/tests/examplefiles/test.ebnf b/tests/examplefiles/test.ebnf new file mode 100644 index 00000000..a96171b0 --- /dev/null +++ b/tests/examplefiles/test.ebnf @@ -0,0 +1,31 @@ +letter = "A" | "B" | "C" | "D" | "E" | "F" | "G" + | "H" | "I" | "J" | "K" | "L" | "M" | "N" + | "O" | "P" | "Q" | "R" | "S" | "T" | "U" + | "V" | "W" | "X" | "Y" | "Z" ; +digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ; +symbol = "[" | "]" | "{" | "}" | "(" | ")" | "<" | ">" + | "'" | '"' | "=" | "|" | "." | "," | ";" ; +character = letter | digit | symbol | " " ; + +identifier = letter , { letter | digit | " " } ; +terminal = "'" , character , { character } , "'" + | '"' , character , { character } , '"' ; + +special = "?" , any , "?" ; + +comment = (* this is a comment "" *) "(*" , any-symbol , "*)" ; +any-symbol = ? any visible character ? ; (* ? ... ? *) + +lhs = identifier ; +rhs = identifier + | terminal + | comment , rhs + | rhs , comment + | "[" , rhs , "]" + | "{" , rhs , "}" + | "(" , rhs , ")" + | rhs , "|" , rhs + | rhs , "," , rhs ; + +rule = lhs , "=" , rhs , ";" | comment ; +grammar = { rule } ; |