summaryrefslogtreecommitdiff
path: root/tests/examplefiles
diff options
context:
space:
mode:
authorBryan Davis <bd808@bd808.com>2013-04-25 22:54:55 -0600
committerBryan Davis <bd808@bd808.com>2013-04-25 22:54:55 -0600
commit4641268f11100113420b27d0fd7465c9c43e814b (patch)
tree88e169210f9c2a4a4ef7c0cb25b74256fef7f902 /tests/examplefiles
parent690efaf0304427ab5ab023c67b27459beccc4b15 (diff)
downloadpygments-4641268f11100113420b27d0fd7465c9c43e814b.tar.gz
Lexer for ISO/IEC 14977 EBNF grammars.
A regex based lexer for EBNF as defined in http://www.cl.cam.ac.uk/~mgk25/iso-14977.pdf Related to issue #805.
Diffstat (limited to 'tests/examplefiles')
-rw-r--r--tests/examplefiles/example.ebnf193
-rw-r--r--tests/examplefiles/test.ebnf31
2 files changed, 224 insertions, 0 deletions
diff --git a/tests/examplefiles/example.ebnf b/tests/examplefiles/example.ebnf
new file mode 100644
index 00000000..7a917405
--- /dev/null
+++ b/tests/examplefiles/example.ebnf
@@ -0,0 +1,193 @@
+(*
+ The syntax of Extended BNF can be defined using itself. There are four
+ parts in this example, the first part names the characters, the second
+ part defines the removal of unnecessary non- printing characters, the
+ third part defines the removal of textual comments, and the final part
+ defines the structure of Extended BNF itself.
+
+ Each syntax rule in this example starts with a comment that identifies
+ the corresponding clause in the standard.
+
+ The meaning of special-sequences is not defined in the standard. In
+ this example (see the reference to 7.6) they represent control
+ functions defined by ISO/IEC 6429:1992. Another special-sequence
+ defines a syntactic-exception (see the reference to 4.7).
+*)
+
+(*
+ The first part of the lexical syntax defines the characters in the
+ 7-bit character set (ISO/IEC 646:1991) that represent each
+ terminal-character and gap-separator in Extended BNF.
+*)
+
+(* see 7.2 *) letter
+ = 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'g' | 'h' | 'i' | 'j' | 'k' | 'l'
+ | 'm' | 'n' | 'o' | 'p' | 'q' | 'r' | 's' | 't' | 'u' | 'v' | 'w' | 'x'
+ | 'y' | 'z'
+ | 'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'G' | 'H' | 'I' | 'J' | 'K' | 'L'
+ | 'M' | 'N' | 'O' | 'P' | 'Q' | 'R' | 'S' | 'T' | 'U' | 'V' | 'W' | 'X'
+ | 'Y' | 'Z';
+(* see 7.2 *) decimal digit
+ = '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9';
+(*
+ The representation of the following terminal-characters is defined in
+ clauses 7.3, 7.4 and tables 1, 2.
+*)
+concatenate symbol = ',';
+defining symbol = '=';
+definition separator symbol = '|' | '/' | '!';
+end comment symbol = '*)';
+end group symbol = ')';
+end option symbol = ']' | '/)';
+end repeat symbol = '}' | ':)';
+except symbol = '-';
+first quote symbol = "'";
+repetition symbol = '*';
+second quote symbol = '"';
+special sequence symbol = '?';
+start comment symbol = '(*';
+start group symbol = '(';
+start option symbol = '[' | '(/';
+start repeat symbol = '{' | '(:';
+terminator symbol = ';' | '.';
+(* see 7.5 *) other character
+ = ' ' | ':' | '+' | '_' | '%' | '@' | '&' | '#' | '$' | '<' | '>'
+ | '\' | '^' | "'" | '~';
+(* see 7.6 *) space character = ' ';
+horizontal tabulation character
+ = ? ISO 6429 character Horizontal Tabulation ? ;
+new line
+ = { ? ISO 6429 character Carriage Return ? },
+ ? ISO 6429 character Line Feed ?,
+ { ? ISO 6429 character Carriage Return ? };
+vertical tabulation character
+ = ? ISO 6429 character Vertical Tabulation ? ;
+form feed
+ = ? ISO 6429 character Form Feed ? ;
+
+(*
+ The second part of the syntax defines the removal of unnecessary
+ non-printing characters from a syntax.
+*)
+
+(* see 6.2 *) terminal character
+ = letter
+ | decimal digit
+ | concatenate symbol
+ | defining symbol
+ | definition separator symbol
+ | end comment symbol
+ | end group symbol
+ | end option symbol
+ | end repeat symbol
+ | except symbol
+ | first quote symbol
+ | repetition symbol
+ | second quote symbol
+ | special sequence symbol
+ | start comment symbol
+ | start group symbol
+ | start option symbol
+ | start repeat symbol
+ | terminator symbol
+ | other character;
+(* see 6.3 *) gap free symbol
+ = terminal character - (first quote symbol | second quote symbol)
+ | terminal string;
+(* see 4.16 *) terminal string
+ = first quote symbol, first terminal character,
+ {first terminal character}, first quote symbol
+ | second quote symbol, second terminal character,
+ {second terminal character}, second quote symbol;
+(* see 4.17 *) first terminal character
+ = terminal character - first quote symbol;
+(* see 4.18 *) second terminal character
+ = terminal character - second quote symbol;
+(* see 6.4 *) gap separator
+ = space character
+ | horizontal tabulation character
+ | new line
+ | vertical tabulation character
+ | form feed;
+(* see 6.5 *) syntax
+ = {gap separator}, gap free symbol, {gap separator},
+ {gap free symbol, {gap separator}};
+
+(*
+ The third part of the syntax defines the removal of
+ bracketed-textual-comments from gap-free-symbols that form a syntax.
+*)
+
+(* see 6.6 *) commentless symbol
+ = terminal character
+ - (letter
+ | decimal digit
+ | first quote symbol
+ | second quote symbol
+ | start comment symbol
+ | end comment symbol
+ | special sequence symbol
+ | other character)
+ | meta identifier
+ | integer
+ | terminal string
+ | special sequence;
+(* see 4.9 *) integer
+ = decimal digit, {decimal digit};
+(* see 4.14 *) meta identifier
+ = letter, {meta identifier character};
+(* see 4.15 *) meta identifier character
+ = letter
+ | decimal digit;
+(* see 4.19 *) special sequence
+ = special sequence symbol,
+ {special sequence character},
+ special sequence symbol;
+(* see 4.20 *) special sequence character
+ = terminal character - special sequence symbol;
+(* see 6.7 *) comment symbol
+ = bracketed textual comment | other character | commentless symbol;
+(* see 6.8 *) bracketed textual comment
+ = start comment symbol, {comment symbol}, end comment symbol;
+(* see 6.9 *) syntax
+ = {bracketed textual comment}, commentless symbol,
+ {bracketed textual comment},
+ {commentless symbol, {bracketed textual comment}};
+
+(*
+ The final part of the syntax defines the abstract syntax of Extended
+ BNF, i.e. the structure in terms of the commentless symbols.
+*)
+
+(* see 4.2 *) syntax
+ = syntax rule, {syntax rule};
+(* see 4.3 *) syntax rule
+ = meta identifier, defining symbol,
+ definitions list, terminator symbol;
+(* see 4.4 *) definitions list
+ = single definition, {definition separator symbol, single definition};
+(* see 4.5 *) single definition
+ = syntactic term, {concatenate symbol, syntactic term};
+(* see 4.6 *) syntactic term
+ = syntactic factor, [except symbol, syntactic exception];
+(* see 4.7 *) syntactic exception
+ = ? a syntactic-factor that could be replaced by a syntactic-factor
+ containing no meta-identifiers
+ ? ;
+(* see 4.8 *) syntactic factor
+ = [integer, repetition symbol], syntactic primary;
+(* see 4.10 *) syntactic primary
+ = optional sequence
+ | repeated sequence
+ | grouped sequence
+ | meta identifier
+ | terminal string
+ | special sequence
+ | empty sequence;
+(* see 4.11 *) optional sequence
+ = start option symbol, definitions list, end option symbol;
+(* see 4.12 *) repeated sequence
+ = start repeat symbol, definitions list, end repeat symbol;
+(* see 4.13 *) grouped sequence
+ = start group symbol, definitions list, end group symbol;
+(* see 4.21 *) empty sequence = ;
diff --git a/tests/examplefiles/test.ebnf b/tests/examplefiles/test.ebnf
new file mode 100644
index 00000000..a96171b0
--- /dev/null
+++ b/tests/examplefiles/test.ebnf
@@ -0,0 +1,31 @@
+letter = "A" | "B" | "C" | "D" | "E" | "F" | "G"
+ | "H" | "I" | "J" | "K" | "L" | "M" | "N"
+ | "O" | "P" | "Q" | "R" | "S" | "T" | "U"
+ | "V" | "W" | "X" | "Y" | "Z" ;
+digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
+symbol = "[" | "]" | "{" | "}" | "(" | ")" | "<" | ">"
+ | "'" | '"' | "=" | "|" | "." | "," | ";" ;
+character = letter | digit | symbol | " " ;
+
+identifier = letter , { letter | digit | " " } ;
+terminal = "'" , character , { character } , "'"
+ | '"' , character , { character } , '"' ;
+
+special = "?" , any , "?" ;
+
+comment = (* this is a comment "" *) "(*" , any-symbol , "*)" ;
+any-symbol = ? any visible character ? ; (* ? ... ? *)
+
+lhs = identifier ;
+rhs = identifier
+ | terminal
+ | comment , rhs
+ | rhs , comment
+ | "[" , rhs , "]"
+ | "{" , rhs , "}"
+ | "(" , rhs , ")"
+ | rhs , "|" , rhs
+ | rhs , "," , rhs ;
+
+rule = lhs , "=" , rhs , ";" | comment ;
+grammar = { rule } ;