From a52ee9df900b5a77ffbfa9ef15cdf20461979957 Mon Sep 17 00:00:00 2001 From: hhsprings Date: Thu, 5 Nov 2015 14:50:08 +0900 Subject: Add the lexer for IETF 7405 ABNF . --- pygments/lexers/_mapping.py | 1 + pygments/lexers/grammar_notation.py | 96 +++++++++ tests/examplefiles/abnf_example1.abnf | 58 ++++++ tests/examplefiles/abnf_example2.abnf | 58 ++++++ tests/examplefiles/abnf_example3.abnf | 382 ++++++++++++++++++++++++++++++++++ tests/examplefiles/abnf_example4.abnf | 382 ++++++++++++++++++++++++++++++++++ tests/examplefiles/abnf_example5.abnf | 7 + 7 files changed, 984 insertions(+) create mode 100644 pygments/lexers/grammar_notation.py create mode 100644 tests/examplefiles/abnf_example1.abnf create mode 100644 tests/examplefiles/abnf_example2.abnf create mode 100644 tests/examplefiles/abnf_example3.abnf create mode 100644 tests/examplefiles/abnf_example4.abnf create mode 100644 tests/examplefiles/abnf_example5.abnf diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py index af7eec36..adf58313 100644 --- a/pygments/lexers/_mapping.py +++ b/pygments/lexers/_mapping.py @@ -17,6 +17,7 @@ from __future__ import print_function LEXERS = { 'ABAPLexer': ('pygments.lexers.business', 'ABAP', ('abap',), ('*.abap',), ('text/x-abap',)), + 'AbnfLexer': ('pygments.lexers.grammar_notation', 'ABNF', ('abnf',), ('*.abnf',), ('text/x-abnf',)), 'APLLexer': ('pygments.lexers.apl', 'APL', ('apl',), ('*.apl',), ()), 'ActionScript3Lexer': ('pygments.lexers.actionscript', 'ActionScript 3', ('as3', 'actionscript3'), ('*.as',), ('application/x-actionscript3', 'text/x-actionscript3', 'text/actionscript3')), 'ActionScriptLexer': ('pygments.lexers.actionscript', 'ActionScript', ('as', 'actionscript'), ('*.as',), ('application/x-actionscript', 'text/x-actionscript', 'text/actionscript')), diff --git a/pygments/lexers/grammar_notation.py b/pygments/lexers/grammar_notation.py new file mode 100644 index 00000000..65475646 --- /dev/null +++ b/pygments/lexers/grammar_notation.py @@ -0,0 +1,96 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.grammar_notation + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Lexers for parser generators. + + :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import re + +from pygments.lexer import RegexLexer, DelegatingLexer, \ + include, bygroups, using, words +from pygments.token import Punctuation, Other, Text, Comment, Operator, \ + Keyword, Name, String, Number, Whitespace, Literal + +__all__ = ['AbnfLexer'] + + +# EBNF shold be moved here, i think. + +class AbnfLexer(RegexLexer): + """ + Lexer for `IETF 7405 ABNF + `_ + (Updates `5234 `_) + grammars. + + .. versionadded:: 2.1 + """ + + name = 'ABNF' + aliases = ['abnf'] + filenames = ['*.abnf'] + mimetypes = ['text/x-abnf'] + + _core_rules = ( + 'ALPHA', 'BIT', 'CHAR', 'CR', 'CRLF', 'CTL', 'DIGIT', + 'DQUOTE', 'HEXDIG', 'HTAB', 'LF', 'LWSP', 'OCTET', + 'SP', 'VCHAR', 'WSP',) + + def nonterminal_cb(self, match): + txt = match.group(0) + if txt in self._core_rules: + # Strictly speaking, these are not keyword but + # is called `Core Rule'. + yield match.start(), Keyword, txt + else: + yield match.start(), Name.Class, txt + + tokens = { + 'root': [ + # comment + (r';.*$', Comment.Single), + + # quoted + (r'(%[si])?"', Literal, 'quoted-termination'), + + # binary (but i have never seen...) + (r'%b[01]+\-[01]+\b', Literal), # range + (r'%b[01]+(\.[01]+)*\b', Literal), # concat + + # decimal + (r'%d[0-9]+\-[0-9]+\b', Literal), # range + (r'%d[0-9]+(\.[0-9]+)*\b', Literal), # concat + + # hexadecimal + (r'%x[0-9a-fA-F]+\-[0-9a-fA-F]+\b', Literal), # range + (r'%x[0-9a-fA-F]+(\.[0-9a-fA-F]+)*\b', Literal), # concat + + # repetition (*element) including nRule + (r'\b[0-9]+\*[0-9]+', Operator), + (r'\b[0-9]+\*', Operator), + (r'\b[0-9]+', Operator), + (r'\*', Operator), + + # nonterminals (ALPHA *(ALPHA / DIGIT / "-")) + (r'[a-zA-Z][a-zA-Z0-9-]+\b', nonterminal_cb), + + # operators + (r'(=/|=|/)', Operator), + + # punctuation + (r'[\[\]()]', Punctuation), + + # fallback + (r'.', Text), + ], + 'quoted-termination': [ + # double quote itself in this state, it is as '%x22'. + (r'"', Literal, '#pop'), + (r'.', Literal), + ] + } diff --git a/tests/examplefiles/abnf_example1.abnf b/tests/examplefiles/abnf_example1.abnf new file mode 100644 index 00000000..c5bbe221 --- /dev/null +++ b/tests/examplefiles/abnf_example1.abnf @@ -0,0 +1,58 @@ +rulelist = 1*( rule / (*c-wsp c-nl) ) + +rule = rulename defined-as elements c-nl + ; continues if next line starts + ; with white space + +rulename = ALPHA *(ALPHA / DIGIT / "-") +defined-as = *c-wsp ("=" / "=/") *c-wsp + ; basic rules definition and + ; incremental alternatives + +elements = alternation *c-wsp + +c-wsp = WSP / (c-nl WSP) + +c-nl = comment / CRLF + ; comment or newline + +comment = ";" *(WSP / VCHAR) CRLF + +alternation = concatenation + *(*c-wsp "/" *c-wsp concatenation) + +concatenation = repetition *(1*c-wsp repetition) + +repetition = [repeat] element + +repeat = 1*DIGIT / (*DIGIT "*" *DIGIT) + +element = rulename / group / option / + char-val / num-val / prose-val + +group = "(" *c-wsp alternation *c-wsp ")" + +option = "[" *c-wsp alternation *c-wsp "]" + +char-val = DQUOTE *(%x20-21 / %x23-7E) DQUOTE + ; quoted string of SP and VCHAR + ; without DQUOTE + +num-val = "%" (bin-val / dec-val / hex-val) + +bin-val = "b" 1*BIT + [ 1*("." 1*BIT) / ("-" 1*BIT) ] + ; series of concatenated bit values + ; or single ONEOF range + +dec-val = "d" 1*DIGIT + [ 1*("." 1*DIGIT) / ("-" 1*DIGIT) ] + +hex-val = "x" 1*HEXDIG + [ 1*("." 1*HEXDIG) / ("-" 1*HEXDIG) ] + +prose-val = "<" *(%x20-3D / %x3F-7E) ">" + ; bracketed string of SP and VCHAR + ; without angles + ; prose description, to be used as + ; last resort diff --git a/tests/examplefiles/abnf_example2.abnf b/tests/examplefiles/abnf_example2.abnf new file mode 100644 index 00000000..77c79cb4 --- /dev/null +++ b/tests/examplefiles/abnf_example2.abnf @@ -0,0 +1,58 @@ + rulelist = 1*( rule / (*c-wsp c-nl) ) + + rule = rulename defined-as elements c-nl + ; continues if next line starts + ; with white space + + rulename = ALPHA *(ALPHA / DIGIT / "-") + defined-as = *c-wsp ("=" / "=/") *c-wsp + ; basic rules definition and + ; incremental alternatives + + elements = alternation *c-wsp + + c-wsp = WSP / (c-nl WSP) + + c-nl = comment / CRLF + ; comment or newline + + comment = ";" *(WSP / VCHAR) CRLF + + alternation = concatenation + *(*c-wsp "/" *c-wsp concatenation) + + concatenation = repetition *(1*c-wsp repetition) + + repetition = [repeat] element + + repeat = 1*DIGIT / (*DIGIT "*" *DIGIT) + + element = rulename / group / option / + char-val / num-val / prose-val + + group = "(" *c-wsp alternation *c-wsp ")" + + option = "[" *c-wsp alternation *c-wsp "]" + + char-val = DQUOTE *(%x20-21 / %x23-7E) DQUOTE + ; quoted string of SP and VCHAR + ; without DQUOTE + + num-val = "%" (bin-val / dec-val / hex-val) + + bin-val = "b" 1*BIT + [ 1*("." 1*BIT) / ("-" 1*BIT) ] + ; series of concatenated bit values + ; or single ONEOF range + + dec-val = "d" 1*DIGIT + [ 1*("." 1*DIGIT) / ("-" 1*DIGIT) ] + + hex-val = "x" 1*HEXDIG + [ 1*("." 1*HEXDIG) / ("-" 1*HEXDIG) ] + + prose-val = "<" *(%x20-3D / %x3F-7E) ">" + ; bracketed string of SP and VCHAR + ; without angles + ; prose description, to be used as + ; last resort diff --git a/tests/examplefiles/abnf_example3.abnf b/tests/examplefiles/abnf_example3.abnf new file mode 100644 index 00000000..51690f54 --- /dev/null +++ b/tests/examplefiles/abnf_example3.abnf @@ -0,0 +1,382 @@ +NO-WS-CTL = %d1-8 / ; US-ASCII control characters + %d11 / ; that do not include the + %d12 / ; carriage return, line feed, + %d14-31 / ; and white space characters + %d127 + +text = %d1-9 / ; Characters excluding CR and LF + %d11 / + %d12 / + %d14-127 / + obs-text + +specials = "(" / ")" / ; Special characters used in + "<" / ">" / ; other parts of the syntax + "[" / "]" / + ":" / ";" / + "@" / "\" / + "," / "." / + DQUOTE + +quoted-pair = ("\" text) / obs-qp + +FWS = ([*WSP CRLF] 1*WSP) / ; Folding white space + obs-FWS + +ctext = NO-WS-CTL / ; Non white space controls + + %d33-39 / ; The rest of the US-ASCII + %d42-91 / ; characters not including "(", + %d93-126 ; ")", or "\" + +ccontent = ctext / quoted-pair / comment + +comment = "(" *([FWS] ccontent) [FWS] ")" + +CFWS = *([FWS] comment) (([FWS] comment) / FWS) + + +atext = ALPHA / DIGIT / ; Any character except controls, + "!" / "#" / ; SP, and specials. + "$" / "%" / ; Used for atoms + "&" / "'" / + "*" / "+" / + "-" / "/" / + "=" / "?" / + "^" / "_" / + "`" / "{" / + "|" / "}" / + "~" + +atom = [CFWS] 1*atext [CFWS] + +dot-atom = [CFWS] dot-atom-text [CFWS] + +dot-atom-text = 1*atext *("." 1*atext) + +qtext = NO-WS-CTL / ; Non white space controls + + %d33 / ; The rest of the US-ASCII + %d35-91 / ; characters not including "\" + %d93-126 ; or the quote character + +qcontent = qtext / quoted-pair + +quoted-string = [CFWS] + DQUOTE *([FWS] qcontent) [FWS] DQUOTE + [CFWS] + +word = atom / quoted-string + +phrase = 1*word / obs-phrase + +utext = NO-WS-CTL / ; Non white space controls + %d33-126 / ; The rest of US-ASCII + obs-utext + +unstructured = *([FWS] utext) [FWS] + +date-time = [ day-of-week "," ] date FWS time [CFWS] + +day-of-week = ([FWS] day-name) / obs-day-of-week + +day-name = "Mon" / "Tue" / "Wed" / "Thu" / + "Fri" / "Sat" / "Sun" + +date = day month year + +year = 4*DIGIT / obs-year + +month = (FWS month-name FWS) / obs-month + +month-name = "Jan" / "Feb" / "Mar" / "Apr" / + "May" / "Jun" / "Jul" / "Aug" / + "Sep" / "Oct" / "Nov" / "Dec" + +day = ([FWS] 1*2DIGIT) / obs-day + +time = time-of-day FWS zone + +time-of-day = hour ":" minute [ ":" second ] + +hour = 2DIGIT / obs-hour + +minute = 2DIGIT / obs-minute + +second = 2DIGIT / obs-second + +zone = (( "+" / "-" ) 4DIGIT) / obs-zone + +address = mailbox / group + +mailbox = name-addr / addr-spec + +name-addr = [display-name] angle-addr + +angle-addr = [CFWS] "<" addr-spec ">" [CFWS] / obs-angle-addr + +group = display-name ":" [mailbox-list / CFWS] ";" + [CFWS] + +display-name = phrase + +mailbox-list = (mailbox *("," mailbox)) / obs-mbox-list + +address-list = (address *("," address)) / obs-addr-list + +addr-spec = local-part "@" domain + +local-part = dot-atom / quoted-string / obs-local-part + +domain = dot-atom / domain-literal / obs-domain + +domain-literal = [CFWS] "[" *([FWS] dcontent) [FWS] "]" [CFWS] + +dcontent = dtext / quoted-pair + +dtext = NO-WS-CTL / ; Non white space controls + + %d33-90 / ; The rest of the US-ASCII + %d94-126 ; characters not including "[", + ; "]", or "\" + +message = (fields / obs-fields) + [CRLF body] + +body = *(*998text CRLF) *998text + +fields = *(trace + *(resent-date / + resent-from / + resent-sender / + resent-to / + resent-cc / + resent-bcc / + resent-msg-id)) + *(orig-date / + from / + sender / + reply-to / + to / + cc / + bcc / + message-id / + in-reply-to / + references / + subject / + comments / + keywords / + optional-field) + +orig-date = "Date:" date-time CRLF + +from = "From:" mailbox-list CRLF + +sender = "Sender:" mailbox CRLF + +reply-to = "Reply-To:" address-list CRLF + +to = "To:" address-list CRLF + +cc = "Cc:" address-list CRLF + +bcc = "Bcc:" (address-list / [CFWS]) CRLF + +message-id = "Message-ID:" msg-id CRLF + +in-reply-to = "In-Reply-To:" 1*msg-id CRLF + +references = "References:" 1*msg-id CRLF + +msg-id = [CFWS] "<" id-left "@" id-right ">" [CFWS] + +id-left = dot-atom-text / no-fold-quote / obs-id-left + +id-right = dot-atom-text / no-fold-literal / obs-id-right + +no-fold-quote = DQUOTE *(qtext / quoted-pair) DQUOTE + +no-fold-literal = "[" *(dtext / quoted-pair) "]" + +subject = "Subject:" unstructured CRLF + +comments = "Comments:" unstructured CRLF + +keywords = "Keywords:" phrase *("," phrase) CRLF + +resent-date = "Resent-Date:" date-time CRLF + +resent-from = "Resent-From:" mailbox-list CRLF + +resent-sender = "Resent-Sender:" mailbox CRLF + +resent-to = "Resent-To:" address-list CRLF + +resent-cc = "Resent-Cc:" address-list CRLF + +resent-bcc = "Resent-Bcc:" (address-list / [CFWS]) CRLF + +resent-msg-id = "Resent-Message-ID:" msg-id CRLF + +trace = [return] + 1*received + +return = "Return-Path:" path CRLF + +path = ([CFWS] "<" ([CFWS] / addr-spec) ">" [CFWS]) / + obs-path + +received = "Received:" name-val-list ";" date-time CRLF + +name-val-list = [CFWS] [name-val-pair *(CFWS name-val-pair)] + +name-val-pair = item-name CFWS item-value + +item-name = ALPHA *(["-"] (ALPHA / DIGIT)) + +item-value = 1*angle-addr / addr-spec / + atom / domain / msg-id + +optional-field = field-name ":" unstructured CRLF + +field-name = 1*ftext + +ftext = %d33-57 / ; Any character except + %d59-126 ; controls, SP, and + ; ":". + +obs-qp = "\" (%d0-127) + +obs-text = *LF *CR *(obs-char *LF *CR) + +obs-char = %d0-9 / %d11 / ; %d0-127 except CR and + %d12 / %d14-127 ; LF + +obs-utext = obs-text + +obs-phrase = word *(word / "." / CFWS) + +obs-phrase-list = phrase / 1*([phrase] [CFWS] "," [CFWS]) [phrase] + +obs-FWS = 1*WSP *(CRLF 1*WSP) + +obs-day-of-week = [CFWS] day-name [CFWS] + +obs-year = [CFWS] 2*DIGIT [CFWS] + +obs-month = CFWS month-name CFWS + +obs-day = [CFWS] 1*2DIGIT [CFWS] + +obs-hour = [CFWS] 2DIGIT [CFWS] + +obs-minute = [CFWS] 2DIGIT [CFWS] + +obs-second = [CFWS] 2DIGIT [CFWS] + +obs-zone = "UT" / "GMT" / ; Universal Time + ; North American UT + ; offsets + "EST" / "EDT" / ; Eastern: - 5/ - 4 + "CST" / "CDT" / ; Central: - 6/ - 5 + "MST" / "MDT" / ; Mountain: - 7/ - 6 + "PST" / "PDT" / ; Pacific: - 8/ - 7 + + %d65-73 / ; Military zones - "A" + %d75-90 / ; through "I" and "K" + %d97-105 / ; through "Z", both + %d107-122 ; upper and lower case + +obs-angle-addr = [CFWS] "<" [obs-route] addr-spec ">" [CFWS] + +obs-route = [CFWS] obs-domain-list ":" [CFWS] + +obs-domain-list = "@" domain *(*(CFWS / "," ) [CFWS] "@" domain) + +obs-local-part = word *("." word) + +obs-domain = atom *("." atom) + +obs-mbox-list = 1*([mailbox] [CFWS] "," [CFWS]) [mailbox] + +obs-addr-list = 1*([address] [CFWS] "," [CFWS]) [address] + +obs-fields = *(obs-return / + obs-received / + obs-orig-date / + obs-from / + obs-sender / + obs-reply-to / + obs-to / + obs-cc / + obs-bcc / + obs-message-id / + obs-in-reply-to / + obs-references / + obs-subject / + obs-comments / + obs-keywords / + obs-resent-date / + obs-resent-from / + obs-resent-send / + obs-resent-rply / + obs-resent-to / + obs-resent-cc / + obs-resent-bcc / + obs-resent-mid / + obs-optional) + +obs-orig-date = "Date" *WSP ":" date-time CRLF + +obs-from = "From" *WSP ":" mailbox-list CRLF + +obs-sender = "Sender" *WSP ":" mailbox CRLF + +obs-reply-to = "Reply-To" *WSP ":" mailbox-list CRLF + +obs-to = "To" *WSP ":" address-list CRLF + +obs-cc = "Cc" *WSP ":" address-list CRLF + +obs-bcc = "Bcc" *WSP ":" (address-list / [CFWS]) CRLF + +obs-message-id = "Message-ID" *WSP ":" msg-id CRLF + +obs-in-reply-to = "In-Reply-To" *WSP ":" *(phrase / msg-id) CRLF + +obs-references = "References" *WSP ":" *(phrase / msg-id) CRLF + +obs-id-left = local-part + +obs-id-right = domain + +obs-subject = "Subject" *WSP ":" unstructured CRLF + +obs-comments = "Comments" *WSP ":" unstructured CRLF + +obs-keywords = "Keywords" *WSP ":" obs-phrase-list CRLF + +obs-resent-from = "Resent-From" *WSP ":" mailbox-list CRLF + +obs-resent-send = "Resent-Sender" *WSP ":" mailbox CRLF + +obs-resent-date = "Resent-Date" *WSP ":" date-time CRLF + +obs-resent-to = "Resent-To" *WSP ":" address-list CRLF + +obs-resent-cc = "Resent-Cc" *WSP ":" address-list CRLF + +obs-resent-bcc = "Resent-Bcc" *WSP ":" + (address-list / [CFWS]) CRLF + +obs-resent-mid = "Resent-Message-ID" *WSP ":" msg-id CRLF + +obs-resent-rply = "Resent-Reply-To" *WSP ":" address-list CRLF + +obs-return = "Return-Path" *WSP ":" path CRLF + +obs-received = "Received" *WSP ":" name-val-list CRLF + +obs-path = obs-angle-addr + +obs-optional = field-name *WSP ":" unstructured CRLF diff --git a/tests/examplefiles/abnf_example4.abnf b/tests/examplefiles/abnf_example4.abnf new file mode 100644 index 00000000..78dc38cb --- /dev/null +++ b/tests/examplefiles/abnf_example4.abnf @@ -0,0 +1,382 @@ +NO-WS-CTL = %d1-8 / ; US-ASCII control characters + %d11 / ; that do not include the + %d12 / ; carriage return, line feed, + %d14-31 / ; and white space characters + %d127 + +text = %d1-9 / ; Characters excluding CR and LF + %d11 / + %d12 / + %d14-127 / + obs-text + +specials = "(" / ")" / ; Special characters used in + "<" / ">" / ; other parts of the syntax + "[" / "]" / + ":" / ";" / + "@" / "\" / + "," / "." / + DQUOTE + +quoted-pair = ("\" text) / obs-qp + +FWS = ([*WSP CRLF] 1*WSP) / ; Folding white space + obs-FWS + +ctext = NO-WS-CTL / ; Non white space controls + + %d33-39 / ; The rest of the US-ASCII + %d42-91 / ; characters not including "(", + %d93-126 ; ")", or "\" + +ccontent = ctext / quoted-pair / comment + +comment = "(" *([FWS] ccontent) [FWS] ")" + +CFWS = *([FWS] comment) (([FWS] comment) / FWS) + + +atext = ALPHA / DIGIT / ; Any character except controls, + "!" / "#" / ; SP, and specials. + "$" / "%" / ; Used for atoms + "&" / "'" / + "*" / "+" / + "-" / "/" / + "=" / "?" / + "^" / "_" / + "`" / "{" / + "|" / "}" / + "~" + +atom = [CFWS] 1*atext [CFWS] + +dot-atom = [CFWS] dot-atom-text [CFWS] + +dot-atom-text = 1*atext *("." 1*atext) + +qtext = NO-WS-CTL / ; Non white space controls + + %d33 / ; The rest of the US-ASCII + %d35-91 / ; characters not including "\" + %d93-126 ; or the quote character + +qcontent = qtext / quoted-pair + +quoted-string = [CFWS] + DQUOTE *([FWS] qcontent) [FWS] DQUOTE + [CFWS] + +word = atom / quoted-string + +phrase = 1*word / obs-phrase + +utext = NO-WS-CTL / ; Non white space controls + %d33-126 / ; The rest of US-ASCII + obs-utext + +unstructured = *([FWS] utext) [FWS] + +date-time = [ day-of-week "," ] date FWS time [CFWS] + +day-of-week = ([FWS] day-name) / obs-day-of-week + +day-name = %i"Mon" / %i"Tue" / %i"Wed" / %i"Thu" / + %i"Fri" / %i"Sat" / %i"Sun" + +date = day month year + +year = 4*DIGIT / obs-year + +month = (FWS month-name FWS) / obs-month + +month-name = %i"Jan" / %i"Feb" / %i"Mar" / %i"Apr" / + %i"May" / %i"Jun" / %i"Jul" / %i"Aug" / + %i"Sep" / %i"Oct" / %i"Nov" / %i"Dec" + +day = ([FWS] 1*2DIGIT) / obs-day + +time = time-of-day FWS zone + +time-of-day = hour ":" minute [ ":" second ] + +hour = 2DIGIT / obs-hour + +minute = 2DIGIT / obs-minute + +second = 2DIGIT / obs-second + +zone = (( "+" / "-" ) 4DIGIT) / obs-zone + +address = mailbox / group + +mailbox = name-addr / addr-spec + +name-addr = [display-name] angle-addr + +angle-addr = [CFWS] "<" addr-spec ">" [CFWS] / obs-angle-addr + +group = display-name ":" [mailbox-list / CFWS] ";" + [CFWS] + +display-name = phrase + +mailbox-list = (mailbox *("," mailbox)) / obs-mbox-list + +address-list = (address *("," address)) / obs-addr-list + +addr-spec = local-part "@" domain + +local-part = dot-atom / quoted-string / obs-local-part + +domain = dot-atom / domain-literal / obs-domain + +domain-literal = [CFWS] "[" *([FWS] dcontent) [FWS] "]" [CFWS] + +dcontent = dtext / quoted-pair + +dtext = NO-WS-CTL / ; Non white space controls + + %d33-90 / ; The rest of the US-ASCII + %d94-126 ; characters not including "[", + ; "]", or "\" + +message = (fields / obs-fields) + [CRLF body] + +body = *(*998text CRLF) *998text + +fields = *(trace + *(resent-date / + resent-from / + resent-sender / + resent-to / + resent-cc / + resent-bcc / + resent-msg-id)) + *(orig-date / + from / + sender / + reply-to / + to / + cc / + bcc / + message-id / + in-reply-to / + references / + subject / + comments / + keywords / + optional-field) + +orig-date = %i"Date:" date-time CRLF + +from = %i"From:" mailbox-list CRLF + +sender = %i"Sender:" mailbox CRLF + +reply-to = %i"Reply-To:" address-list CRLF + +to = %i"To:" address-list CRLF + +cc = %i"Cc:" address-list CRLF + +bcc = %i"Bcc:" (address-list / [CFWS]) CRLF + +message-id = %i"Message-ID:" msg-id CRLF + +in-reply-to = %i"In-Reply-To:" 1*msg-id CRLF + +references = %i"References:" 1*msg-id CRLF + +msg-id = [CFWS] "<" id-left "@" id-right ">" [CFWS] + +id-left = dot-atom-text / no-fold-quote / obs-id-left + +id-right = dot-atom-text / no-fold-literal / obs-id-right + +no-fold-quote = DQUOTE *(qtext / quoted-pair) DQUOTE + +no-fold-literal = "[" *(dtext / quoted-pair) "]" + +subject = %i"Subject:" unstructured CRLF + +comments = %i"Comments:" unstructured CRLF + +keywords = %i"Keywords:" phrase *("," phrase) CRLF + +resent-date = %i"Resent-Date:" date-time CRLF + +resent-from = %i"Resent-From:" mailbox-list CRLF + +resent-sender = %i"Resent-Sender:" mailbox CRLF + +resent-to = %i"Resent-To:" address-list CRLF + +resent-cc = %i"Resent-Cc:" address-list CRLF + +resent-bcc = %i"Resent-Bcc:" (address-list / [CFWS]) CRLF + +resent-msg-id = %i"Resent-Message-ID:" msg-id CRLF + +trace = [return] + 1*received + +return = %i"Return-Path:" path CRLF + +path = ([CFWS] "<" ([CFWS] / addr-spec) ">" [CFWS]) / + obs-path + +received = %i"Received:" name-val-list ";" date-time CRLF + +name-val-list = [CFWS] [name-val-pair *(CFWS name-val-pair)] + +name-val-pair = item-name CFWS item-value + +item-name = ALPHA *(["-"] (ALPHA / DIGIT)) + +item-value = 1*angle-addr / addr-spec / + atom / domain / msg-id + +optional-field = field-name ":" unstructured CRLF + +field-name = 1*ftext + +ftext = %d33-57 / ; Any character except + %d59-126 ; controls, SP, and + ; ":". + +obs-qp = "\" (%d0-127) + +obs-text = *LF *CR *(obs-char *LF *CR) + +obs-char = %d0-9 / %d11 / ; %d0-127 except CR and + %d12 / %d14-127 ; LF + +obs-utext = obs-text + +obs-phrase = word *(word / "." / CFWS) + +obs-phrase-list = phrase / 1*([phrase] [CFWS] "," [CFWS]) [phrase] + +obs-FWS = 1*WSP *(CRLF 1*WSP) + +obs-day-of-week = [CFWS] day-name [CFWS] + +obs-year = [CFWS] 2*DIGIT [CFWS] + +obs-month = CFWS month-name CFWS + +obs-day = [CFWS] 1*2DIGIT [CFWS] + +obs-hour = [CFWS] 2DIGIT [CFWS] + +obs-minute = [CFWS] 2DIGIT [CFWS] + +obs-second = [CFWS] 2DIGIT [CFWS] + +obs-zone = %i"UT" / %i"GMT" / ; Universal Time + ; North American UT + ; offsets + %i"EST" / %i"EDT" / ; Eastern: - 5/ - 4 + %i"CST" / %i"CDT" / ; Central: - 6/ - 5 + %i"MST" / %i"MDT" / ; Mountain: - 7/ - 6 + %i"PST" / %i"PDT" / ; Pacific: - 8/ - 7 + + %d65-73 / ; Military zones - "A" + %d75-90 / ; through "I" and "K" + %d97-105 / ; through "Z", both + %d107-122 ; upper and lower case + +obs-angle-addr = [CFWS] "<" [obs-route] addr-spec ">" [CFWS] + +obs-route = [CFWS] obs-domain-list ":" [CFWS] + +obs-domain-list = "@" domain *(*(CFWS / "," ) [CFWS] "@" domain) + +obs-local-part = word *("." word) + +obs-domain = atom *("." atom) + +obs-mbox-list = 1*([mailbox] [CFWS] "," [CFWS]) [mailbox] + +obs-addr-list = 1*([address] [CFWS] "," [CFWS]) [address] + +obs-fields = *(obs-return / + obs-received / + obs-orig-date / + obs-from / + obs-sender / + obs-reply-to / + obs-to / + obs-cc / + obs-bcc / + obs-message-id / + obs-in-reply-to / + obs-references / + obs-subject / + obs-comments / + obs-keywords / + obs-resent-date / + obs-resent-from / + obs-resent-send / + obs-resent-rply / + obs-resent-to / + obs-resent-cc / + obs-resent-bcc / + obs-resent-mid / + obs-optional) + +obs-orig-date = %i"Date" *WSP ":" date-time CRLF + +obs-from = %i"From" *WSP ":" mailbox-list CRLF + +obs-sender = %i"Sender" *WSP ":" mailbox CRLF + +obs-reply-to = %i"Reply-To" *WSP ":" mailbox-list CRLF + +obs-to = %i"To" *WSP ":" address-list CRLF + +obs-cc = %i"Cc" *WSP ":" address-list CRLF + +obs-bcc = %i"Bcc" *WSP ":" (address-list / [CFWS]) CRLF + +obs-message-id = %i"Message-ID" *WSP ":" msg-id CRLF + +obs-in-reply-to = %i"In-Reply-To" *WSP ":" *(phrase / msg-id) CRLF + +obs-references = %i"References" *WSP ":" *(phrase / msg-id) CRLF + +obs-id-left = local-part + +obs-id-right = domain + +obs-subject = %i"Subject" *WSP ":" unstructured CRLF + +obs-comments = %i"Comments" *WSP ":" unstructured CRLF + +obs-keywords = %i"Keywords" *WSP ":" obs-phrase-list CRLF + +obs-resent-from = %i"Resent-From" *WSP ":" mailbox-list CRLF + +obs-resent-send = %i"Resent-Sender" *WSP ":" mailbox CRLF + +obs-resent-date = %i"Resent-Date" *WSP ":" date-time CRLF + +obs-resent-to = %i"Resent-To" *WSP ":" address-list CRLF + +obs-resent-cc = %i"Resent-Cc" *WSP ":" address-list CRLF + +obs-resent-bcc = %i"Resent-Bcc" *WSP ":" + (address-list / [CFWS]) CRLF + +obs-resent-mid = %i"Resent-Message-ID" *WSP ":" msg-id CRLF + +obs-resent-rply = %i"Resent-Reply-To" *WSP ":" address-list CRLF + +obs-return = %i"Return-Path" *WSP ":" path CRLF + +obs-received = %i"Received" *WSP ":" name-val-list CRLF + +obs-path = obs-angle-addr + +obs-optional = field-name *WSP ":" unstructured CRLF diff --git a/tests/examplefiles/abnf_example5.abnf b/tests/examplefiles/abnf_example5.abnf new file mode 100644 index 00000000..6ef65125 --- /dev/null +++ b/tests/examplefiles/abnf_example5.abnf @@ -0,0 +1,7 @@ +crlf = %d13.10 + +command = "command string" + +char-line = %x0D.0A *(%x20-7E) %x0D.0A + +oldrule =/ additional-alternatives -- cgit v1.2.1 From 7e380ac8b6880e08471ffea5ee855dd573a68099 Mon Sep 17 00:00:00 2001 From: hhsprings Date: Thu, 5 Nov 2015 15:44:10 +0900 Subject: Delete examples1~4 because these examples may have license issue. --- tests/examplefiles/abnf_example1.abnf | 58 ------ tests/examplefiles/abnf_example2.abnf | 58 ------ tests/examplefiles/abnf_example3.abnf | 382 ---------------------------------- tests/examplefiles/abnf_example4.abnf | 382 ---------------------------------- 4 files changed, 880 deletions(-) delete mode 100644 tests/examplefiles/abnf_example1.abnf delete mode 100644 tests/examplefiles/abnf_example2.abnf delete mode 100644 tests/examplefiles/abnf_example3.abnf delete mode 100644 tests/examplefiles/abnf_example4.abnf diff --git a/tests/examplefiles/abnf_example1.abnf b/tests/examplefiles/abnf_example1.abnf deleted file mode 100644 index c5bbe221..00000000 --- a/tests/examplefiles/abnf_example1.abnf +++ /dev/null @@ -1,58 +0,0 @@ -rulelist = 1*( rule / (*c-wsp c-nl) ) - -rule = rulename defined-as elements c-nl - ; continues if next line starts - ; with white space - -rulename = ALPHA *(ALPHA / DIGIT / "-") -defined-as = *c-wsp ("=" / "=/") *c-wsp - ; basic rules definition and - ; incremental alternatives - -elements = alternation *c-wsp - -c-wsp = WSP / (c-nl WSP) - -c-nl = comment / CRLF - ; comment or newline - -comment = ";" *(WSP / VCHAR) CRLF - -alternation = concatenation - *(*c-wsp "/" *c-wsp concatenation) - -concatenation = repetition *(1*c-wsp repetition) - -repetition = [repeat] element - -repeat = 1*DIGIT / (*DIGIT "*" *DIGIT) - -element = rulename / group / option / - char-val / num-val / prose-val - -group = "(" *c-wsp alternation *c-wsp ")" - -option = "[" *c-wsp alternation *c-wsp "]" - -char-val = DQUOTE *(%x20-21 / %x23-7E) DQUOTE - ; quoted string of SP and VCHAR - ; without DQUOTE - -num-val = "%" (bin-val / dec-val / hex-val) - -bin-val = "b" 1*BIT - [ 1*("." 1*BIT) / ("-" 1*BIT) ] - ; series of concatenated bit values - ; or single ONEOF range - -dec-val = "d" 1*DIGIT - [ 1*("." 1*DIGIT) / ("-" 1*DIGIT) ] - -hex-val = "x" 1*HEXDIG - [ 1*("." 1*HEXDIG) / ("-" 1*HEXDIG) ] - -prose-val = "<" *(%x20-3D / %x3F-7E) ">" - ; bracketed string of SP and VCHAR - ; without angles - ; prose description, to be used as - ; last resort diff --git a/tests/examplefiles/abnf_example2.abnf b/tests/examplefiles/abnf_example2.abnf deleted file mode 100644 index 77c79cb4..00000000 --- a/tests/examplefiles/abnf_example2.abnf +++ /dev/null @@ -1,58 +0,0 @@ - rulelist = 1*( rule / (*c-wsp c-nl) ) - - rule = rulename defined-as elements c-nl - ; continues if next line starts - ; with white space - - rulename = ALPHA *(ALPHA / DIGIT / "-") - defined-as = *c-wsp ("=" / "=/") *c-wsp - ; basic rules definition and - ; incremental alternatives - - elements = alternation *c-wsp - - c-wsp = WSP / (c-nl WSP) - - c-nl = comment / CRLF - ; comment or newline - - comment = ";" *(WSP / VCHAR) CRLF - - alternation = concatenation - *(*c-wsp "/" *c-wsp concatenation) - - concatenation = repetition *(1*c-wsp repetition) - - repetition = [repeat] element - - repeat = 1*DIGIT / (*DIGIT "*" *DIGIT) - - element = rulename / group / option / - char-val / num-val / prose-val - - group = "(" *c-wsp alternation *c-wsp ")" - - option = "[" *c-wsp alternation *c-wsp "]" - - char-val = DQUOTE *(%x20-21 / %x23-7E) DQUOTE - ; quoted string of SP and VCHAR - ; without DQUOTE - - num-val = "%" (bin-val / dec-val / hex-val) - - bin-val = "b" 1*BIT - [ 1*("." 1*BIT) / ("-" 1*BIT) ] - ; series of concatenated bit values - ; or single ONEOF range - - dec-val = "d" 1*DIGIT - [ 1*("." 1*DIGIT) / ("-" 1*DIGIT) ] - - hex-val = "x" 1*HEXDIG - [ 1*("." 1*HEXDIG) / ("-" 1*HEXDIG) ] - - prose-val = "<" *(%x20-3D / %x3F-7E) ">" - ; bracketed string of SP and VCHAR - ; without angles - ; prose description, to be used as - ; last resort diff --git a/tests/examplefiles/abnf_example3.abnf b/tests/examplefiles/abnf_example3.abnf deleted file mode 100644 index 51690f54..00000000 --- a/tests/examplefiles/abnf_example3.abnf +++ /dev/null @@ -1,382 +0,0 @@ -NO-WS-CTL = %d1-8 / ; US-ASCII control characters - %d11 / ; that do not include the - %d12 / ; carriage return, line feed, - %d14-31 / ; and white space characters - %d127 - -text = %d1-9 / ; Characters excluding CR and LF - %d11 / - %d12 / - %d14-127 / - obs-text - -specials = "(" / ")" / ; Special characters used in - "<" / ">" / ; other parts of the syntax - "[" / "]" / - ":" / ";" / - "@" / "\" / - "," / "." / - DQUOTE - -quoted-pair = ("\" text) / obs-qp - -FWS = ([*WSP CRLF] 1*WSP) / ; Folding white space - obs-FWS - -ctext = NO-WS-CTL / ; Non white space controls - - %d33-39 / ; The rest of the US-ASCII - %d42-91 / ; characters not including "(", - %d93-126 ; ")", or "\" - -ccontent = ctext / quoted-pair / comment - -comment = "(" *([FWS] ccontent) [FWS] ")" - -CFWS = *([FWS] comment) (([FWS] comment) / FWS) - - -atext = ALPHA / DIGIT / ; Any character except controls, - "!" / "#" / ; SP, and specials. - "$" / "%" / ; Used for atoms - "&" / "'" / - "*" / "+" / - "-" / "/" / - "=" / "?" / - "^" / "_" / - "`" / "{" / - "|" / "}" / - "~" - -atom = [CFWS] 1*atext [CFWS] - -dot-atom = [CFWS] dot-atom-text [CFWS] - -dot-atom-text = 1*atext *("." 1*atext) - -qtext = NO-WS-CTL / ; Non white space controls - - %d33 / ; The rest of the US-ASCII - %d35-91 / ; characters not including "\" - %d93-126 ; or the quote character - -qcontent = qtext / quoted-pair - -quoted-string = [CFWS] - DQUOTE *([FWS] qcontent) [FWS] DQUOTE - [CFWS] - -word = atom / quoted-string - -phrase = 1*word / obs-phrase - -utext = NO-WS-CTL / ; Non white space controls - %d33-126 / ; The rest of US-ASCII - obs-utext - -unstructured = *([FWS] utext) [FWS] - -date-time = [ day-of-week "," ] date FWS time [CFWS] - -day-of-week = ([FWS] day-name) / obs-day-of-week - -day-name = "Mon" / "Tue" / "Wed" / "Thu" / - "Fri" / "Sat" / "Sun" - -date = day month year - -year = 4*DIGIT / obs-year - -month = (FWS month-name FWS) / obs-month - -month-name = "Jan" / "Feb" / "Mar" / "Apr" / - "May" / "Jun" / "Jul" / "Aug" / - "Sep" / "Oct" / "Nov" / "Dec" - -day = ([FWS] 1*2DIGIT) / obs-day - -time = time-of-day FWS zone - -time-of-day = hour ":" minute [ ":" second ] - -hour = 2DIGIT / obs-hour - -minute = 2DIGIT / obs-minute - -second = 2DIGIT / obs-second - -zone = (( "+" / "-" ) 4DIGIT) / obs-zone - -address = mailbox / group - -mailbox = name-addr / addr-spec - -name-addr = [display-name] angle-addr - -angle-addr = [CFWS] "<" addr-spec ">" [CFWS] / obs-angle-addr - -group = display-name ":" [mailbox-list / CFWS] ";" - [CFWS] - -display-name = phrase - -mailbox-list = (mailbox *("," mailbox)) / obs-mbox-list - -address-list = (address *("," address)) / obs-addr-list - -addr-spec = local-part "@" domain - -local-part = dot-atom / quoted-string / obs-local-part - -domain = dot-atom / domain-literal / obs-domain - -domain-literal = [CFWS] "[" *([FWS] dcontent) [FWS] "]" [CFWS] - -dcontent = dtext / quoted-pair - -dtext = NO-WS-CTL / ; Non white space controls - - %d33-90 / ; The rest of the US-ASCII - %d94-126 ; characters not including "[", - ; "]", or "\" - -message = (fields / obs-fields) - [CRLF body] - -body = *(*998text CRLF) *998text - -fields = *(trace - *(resent-date / - resent-from / - resent-sender / - resent-to / - resent-cc / - resent-bcc / - resent-msg-id)) - *(orig-date / - from / - sender / - reply-to / - to / - cc / - bcc / - message-id / - in-reply-to / - references / - subject / - comments / - keywords / - optional-field) - -orig-date = "Date:" date-time CRLF - -from = "From:" mailbox-list CRLF - -sender = "Sender:" mailbox CRLF - -reply-to = "Reply-To:" address-list CRLF - -to = "To:" address-list CRLF - -cc = "Cc:" address-list CRLF - -bcc = "Bcc:" (address-list / [CFWS]) CRLF - -message-id = "Message-ID:" msg-id CRLF - -in-reply-to = "In-Reply-To:" 1*msg-id CRLF - -references = "References:" 1*msg-id CRLF - -msg-id = [CFWS] "<" id-left "@" id-right ">" [CFWS] - -id-left = dot-atom-text / no-fold-quote / obs-id-left - -id-right = dot-atom-text / no-fold-literal / obs-id-right - -no-fold-quote = DQUOTE *(qtext / quoted-pair) DQUOTE - -no-fold-literal = "[" *(dtext / quoted-pair) "]" - -subject = "Subject:" unstructured CRLF - -comments = "Comments:" unstructured CRLF - -keywords = "Keywords:" phrase *("," phrase) CRLF - -resent-date = "Resent-Date:" date-time CRLF - -resent-from = "Resent-From:" mailbox-list CRLF - -resent-sender = "Resent-Sender:" mailbox CRLF - -resent-to = "Resent-To:" address-list CRLF - -resent-cc = "Resent-Cc:" address-list CRLF - -resent-bcc = "Resent-Bcc:" (address-list / [CFWS]) CRLF - -resent-msg-id = "Resent-Message-ID:" msg-id CRLF - -trace = [return] - 1*received - -return = "Return-Path:" path CRLF - -path = ([CFWS] "<" ([CFWS] / addr-spec) ">" [CFWS]) / - obs-path - -received = "Received:" name-val-list ";" date-time CRLF - -name-val-list = [CFWS] [name-val-pair *(CFWS name-val-pair)] - -name-val-pair = item-name CFWS item-value - -item-name = ALPHA *(["-"] (ALPHA / DIGIT)) - -item-value = 1*angle-addr / addr-spec / - atom / domain / msg-id - -optional-field = field-name ":" unstructured CRLF - -field-name = 1*ftext - -ftext = %d33-57 / ; Any character except - %d59-126 ; controls, SP, and - ; ":". - -obs-qp = "\" (%d0-127) - -obs-text = *LF *CR *(obs-char *LF *CR) - -obs-char = %d0-9 / %d11 / ; %d0-127 except CR and - %d12 / %d14-127 ; LF - -obs-utext = obs-text - -obs-phrase = word *(word / "." / CFWS) - -obs-phrase-list = phrase / 1*([phrase] [CFWS] "," [CFWS]) [phrase] - -obs-FWS = 1*WSP *(CRLF 1*WSP) - -obs-day-of-week = [CFWS] day-name [CFWS] - -obs-year = [CFWS] 2*DIGIT [CFWS] - -obs-month = CFWS month-name CFWS - -obs-day = [CFWS] 1*2DIGIT [CFWS] - -obs-hour = [CFWS] 2DIGIT [CFWS] - -obs-minute = [CFWS] 2DIGIT [CFWS] - -obs-second = [CFWS] 2DIGIT [CFWS] - -obs-zone = "UT" / "GMT" / ; Universal Time - ; North American UT - ; offsets - "EST" / "EDT" / ; Eastern: - 5/ - 4 - "CST" / "CDT" / ; Central: - 6/ - 5 - "MST" / "MDT" / ; Mountain: - 7/ - 6 - "PST" / "PDT" / ; Pacific: - 8/ - 7 - - %d65-73 / ; Military zones - "A" - %d75-90 / ; through "I" and "K" - %d97-105 / ; through "Z", both - %d107-122 ; upper and lower case - -obs-angle-addr = [CFWS] "<" [obs-route] addr-spec ">" [CFWS] - -obs-route = [CFWS] obs-domain-list ":" [CFWS] - -obs-domain-list = "@" domain *(*(CFWS / "," ) [CFWS] "@" domain) - -obs-local-part = word *("." word) - -obs-domain = atom *("." atom) - -obs-mbox-list = 1*([mailbox] [CFWS] "," [CFWS]) [mailbox] - -obs-addr-list = 1*([address] [CFWS] "," [CFWS]) [address] - -obs-fields = *(obs-return / - obs-received / - obs-orig-date / - obs-from / - obs-sender / - obs-reply-to / - obs-to / - obs-cc / - obs-bcc / - obs-message-id / - obs-in-reply-to / - obs-references / - obs-subject / - obs-comments / - obs-keywords / - obs-resent-date / - obs-resent-from / - obs-resent-send / - obs-resent-rply / - obs-resent-to / - obs-resent-cc / - obs-resent-bcc / - obs-resent-mid / - obs-optional) - -obs-orig-date = "Date" *WSP ":" date-time CRLF - -obs-from = "From" *WSP ":" mailbox-list CRLF - -obs-sender = "Sender" *WSP ":" mailbox CRLF - -obs-reply-to = "Reply-To" *WSP ":" mailbox-list CRLF - -obs-to = "To" *WSP ":" address-list CRLF - -obs-cc = "Cc" *WSP ":" address-list CRLF - -obs-bcc = "Bcc" *WSP ":" (address-list / [CFWS]) CRLF - -obs-message-id = "Message-ID" *WSP ":" msg-id CRLF - -obs-in-reply-to = "In-Reply-To" *WSP ":" *(phrase / msg-id) CRLF - -obs-references = "References" *WSP ":" *(phrase / msg-id) CRLF - -obs-id-left = local-part - -obs-id-right = domain - -obs-subject = "Subject" *WSP ":" unstructured CRLF - -obs-comments = "Comments" *WSP ":" unstructured CRLF - -obs-keywords = "Keywords" *WSP ":" obs-phrase-list CRLF - -obs-resent-from = "Resent-From" *WSP ":" mailbox-list CRLF - -obs-resent-send = "Resent-Sender" *WSP ":" mailbox CRLF - -obs-resent-date = "Resent-Date" *WSP ":" date-time CRLF - -obs-resent-to = "Resent-To" *WSP ":" address-list CRLF - -obs-resent-cc = "Resent-Cc" *WSP ":" address-list CRLF - -obs-resent-bcc = "Resent-Bcc" *WSP ":" - (address-list / [CFWS]) CRLF - -obs-resent-mid = "Resent-Message-ID" *WSP ":" msg-id CRLF - -obs-resent-rply = "Resent-Reply-To" *WSP ":" address-list CRLF - -obs-return = "Return-Path" *WSP ":" path CRLF - -obs-received = "Received" *WSP ":" name-val-list CRLF - -obs-path = obs-angle-addr - -obs-optional = field-name *WSP ":" unstructured CRLF diff --git a/tests/examplefiles/abnf_example4.abnf b/tests/examplefiles/abnf_example4.abnf deleted file mode 100644 index 78dc38cb..00000000 --- a/tests/examplefiles/abnf_example4.abnf +++ /dev/null @@ -1,382 +0,0 @@ -NO-WS-CTL = %d1-8 / ; US-ASCII control characters - %d11 / ; that do not include the - %d12 / ; carriage return, line feed, - %d14-31 / ; and white space characters - %d127 - -text = %d1-9 / ; Characters excluding CR and LF - %d11 / - %d12 / - %d14-127 / - obs-text - -specials = "(" / ")" / ; Special characters used in - "<" / ">" / ; other parts of the syntax - "[" / "]" / - ":" / ";" / - "@" / "\" / - "," / "." / - DQUOTE - -quoted-pair = ("\" text) / obs-qp - -FWS = ([*WSP CRLF] 1*WSP) / ; Folding white space - obs-FWS - -ctext = NO-WS-CTL / ; Non white space controls - - %d33-39 / ; The rest of the US-ASCII - %d42-91 / ; characters not including "(", - %d93-126 ; ")", or "\" - -ccontent = ctext / quoted-pair / comment - -comment = "(" *([FWS] ccontent) [FWS] ")" - -CFWS = *([FWS] comment) (([FWS] comment) / FWS) - - -atext = ALPHA / DIGIT / ; Any character except controls, - "!" / "#" / ; SP, and specials. - "$" / "%" / ; Used for atoms - "&" / "'" / - "*" / "+" / - "-" / "/" / - "=" / "?" / - "^" / "_" / - "`" / "{" / - "|" / "}" / - "~" - -atom = [CFWS] 1*atext [CFWS] - -dot-atom = [CFWS] dot-atom-text [CFWS] - -dot-atom-text = 1*atext *("." 1*atext) - -qtext = NO-WS-CTL / ; Non white space controls - - %d33 / ; The rest of the US-ASCII - %d35-91 / ; characters not including "\" - %d93-126 ; or the quote character - -qcontent = qtext / quoted-pair - -quoted-string = [CFWS] - DQUOTE *([FWS] qcontent) [FWS] DQUOTE - [CFWS] - -word = atom / quoted-string - -phrase = 1*word / obs-phrase - -utext = NO-WS-CTL / ; Non white space controls - %d33-126 / ; The rest of US-ASCII - obs-utext - -unstructured = *([FWS] utext) [FWS] - -date-time = [ day-of-week "," ] date FWS time [CFWS] - -day-of-week = ([FWS] day-name) / obs-day-of-week - -day-name = %i"Mon" / %i"Tue" / %i"Wed" / %i"Thu" / - %i"Fri" / %i"Sat" / %i"Sun" - -date = day month year - -year = 4*DIGIT / obs-year - -month = (FWS month-name FWS) / obs-month - -month-name = %i"Jan" / %i"Feb" / %i"Mar" / %i"Apr" / - %i"May" / %i"Jun" / %i"Jul" / %i"Aug" / - %i"Sep" / %i"Oct" / %i"Nov" / %i"Dec" - -day = ([FWS] 1*2DIGIT) / obs-day - -time = time-of-day FWS zone - -time-of-day = hour ":" minute [ ":" second ] - -hour = 2DIGIT / obs-hour - -minute = 2DIGIT / obs-minute - -second = 2DIGIT / obs-second - -zone = (( "+" / "-" ) 4DIGIT) / obs-zone - -address = mailbox / group - -mailbox = name-addr / addr-spec - -name-addr = [display-name] angle-addr - -angle-addr = [CFWS] "<" addr-spec ">" [CFWS] / obs-angle-addr - -group = display-name ":" [mailbox-list / CFWS] ";" - [CFWS] - -display-name = phrase - -mailbox-list = (mailbox *("," mailbox)) / obs-mbox-list - -address-list = (address *("," address)) / obs-addr-list - -addr-spec = local-part "@" domain - -local-part = dot-atom / quoted-string / obs-local-part - -domain = dot-atom / domain-literal / obs-domain - -domain-literal = [CFWS] "[" *([FWS] dcontent) [FWS] "]" [CFWS] - -dcontent = dtext / quoted-pair - -dtext = NO-WS-CTL / ; Non white space controls - - %d33-90 / ; The rest of the US-ASCII - %d94-126 ; characters not including "[", - ; "]", or "\" - -message = (fields / obs-fields) - [CRLF body] - -body = *(*998text CRLF) *998text - -fields = *(trace - *(resent-date / - resent-from / - resent-sender / - resent-to / - resent-cc / - resent-bcc / - resent-msg-id)) - *(orig-date / - from / - sender / - reply-to / - to / - cc / - bcc / - message-id / - in-reply-to / - references / - subject / - comments / - keywords / - optional-field) - -orig-date = %i"Date:" date-time CRLF - -from = %i"From:" mailbox-list CRLF - -sender = %i"Sender:" mailbox CRLF - -reply-to = %i"Reply-To:" address-list CRLF - -to = %i"To:" address-list CRLF - -cc = %i"Cc:" address-list CRLF - -bcc = %i"Bcc:" (address-list / [CFWS]) CRLF - -message-id = %i"Message-ID:" msg-id CRLF - -in-reply-to = %i"In-Reply-To:" 1*msg-id CRLF - -references = %i"References:" 1*msg-id CRLF - -msg-id = [CFWS] "<" id-left "@" id-right ">" [CFWS] - -id-left = dot-atom-text / no-fold-quote / obs-id-left - -id-right = dot-atom-text / no-fold-literal / obs-id-right - -no-fold-quote = DQUOTE *(qtext / quoted-pair) DQUOTE - -no-fold-literal = "[" *(dtext / quoted-pair) "]" - -subject = %i"Subject:" unstructured CRLF - -comments = %i"Comments:" unstructured CRLF - -keywords = %i"Keywords:" phrase *("," phrase) CRLF - -resent-date = %i"Resent-Date:" date-time CRLF - -resent-from = %i"Resent-From:" mailbox-list CRLF - -resent-sender = %i"Resent-Sender:" mailbox CRLF - -resent-to = %i"Resent-To:" address-list CRLF - -resent-cc = %i"Resent-Cc:" address-list CRLF - -resent-bcc = %i"Resent-Bcc:" (address-list / [CFWS]) CRLF - -resent-msg-id = %i"Resent-Message-ID:" msg-id CRLF - -trace = [return] - 1*received - -return = %i"Return-Path:" path CRLF - -path = ([CFWS] "<" ([CFWS] / addr-spec) ">" [CFWS]) / - obs-path - -received = %i"Received:" name-val-list ";" date-time CRLF - -name-val-list = [CFWS] [name-val-pair *(CFWS name-val-pair)] - -name-val-pair = item-name CFWS item-value - -item-name = ALPHA *(["-"] (ALPHA / DIGIT)) - -item-value = 1*angle-addr / addr-spec / - atom / domain / msg-id - -optional-field = field-name ":" unstructured CRLF - -field-name = 1*ftext - -ftext = %d33-57 / ; Any character except - %d59-126 ; controls, SP, and - ; ":". - -obs-qp = "\" (%d0-127) - -obs-text = *LF *CR *(obs-char *LF *CR) - -obs-char = %d0-9 / %d11 / ; %d0-127 except CR and - %d12 / %d14-127 ; LF - -obs-utext = obs-text - -obs-phrase = word *(word / "." / CFWS) - -obs-phrase-list = phrase / 1*([phrase] [CFWS] "," [CFWS]) [phrase] - -obs-FWS = 1*WSP *(CRLF 1*WSP) - -obs-day-of-week = [CFWS] day-name [CFWS] - -obs-year = [CFWS] 2*DIGIT [CFWS] - -obs-month = CFWS month-name CFWS - -obs-day = [CFWS] 1*2DIGIT [CFWS] - -obs-hour = [CFWS] 2DIGIT [CFWS] - -obs-minute = [CFWS] 2DIGIT [CFWS] - -obs-second = [CFWS] 2DIGIT [CFWS] - -obs-zone = %i"UT" / %i"GMT" / ; Universal Time - ; North American UT - ; offsets - %i"EST" / %i"EDT" / ; Eastern: - 5/ - 4 - %i"CST" / %i"CDT" / ; Central: - 6/ - 5 - %i"MST" / %i"MDT" / ; Mountain: - 7/ - 6 - %i"PST" / %i"PDT" / ; Pacific: - 8/ - 7 - - %d65-73 / ; Military zones - "A" - %d75-90 / ; through "I" and "K" - %d97-105 / ; through "Z", both - %d107-122 ; upper and lower case - -obs-angle-addr = [CFWS] "<" [obs-route] addr-spec ">" [CFWS] - -obs-route = [CFWS] obs-domain-list ":" [CFWS] - -obs-domain-list = "@" domain *(*(CFWS / "," ) [CFWS] "@" domain) - -obs-local-part = word *("." word) - -obs-domain = atom *("." atom) - -obs-mbox-list = 1*([mailbox] [CFWS] "," [CFWS]) [mailbox] - -obs-addr-list = 1*([address] [CFWS] "," [CFWS]) [address] - -obs-fields = *(obs-return / - obs-received / - obs-orig-date / - obs-from / - obs-sender / - obs-reply-to / - obs-to / - obs-cc / - obs-bcc / - obs-message-id / - obs-in-reply-to / - obs-references / - obs-subject / - obs-comments / - obs-keywords / - obs-resent-date / - obs-resent-from / - obs-resent-send / - obs-resent-rply / - obs-resent-to / - obs-resent-cc / - obs-resent-bcc / - obs-resent-mid / - obs-optional) - -obs-orig-date = %i"Date" *WSP ":" date-time CRLF - -obs-from = %i"From" *WSP ":" mailbox-list CRLF - -obs-sender = %i"Sender" *WSP ":" mailbox CRLF - -obs-reply-to = %i"Reply-To" *WSP ":" mailbox-list CRLF - -obs-to = %i"To" *WSP ":" address-list CRLF - -obs-cc = %i"Cc" *WSP ":" address-list CRLF - -obs-bcc = %i"Bcc" *WSP ":" (address-list / [CFWS]) CRLF - -obs-message-id = %i"Message-ID" *WSP ":" msg-id CRLF - -obs-in-reply-to = %i"In-Reply-To" *WSP ":" *(phrase / msg-id) CRLF - -obs-references = %i"References" *WSP ":" *(phrase / msg-id) CRLF - -obs-id-left = local-part - -obs-id-right = domain - -obs-subject = %i"Subject" *WSP ":" unstructured CRLF - -obs-comments = %i"Comments" *WSP ":" unstructured CRLF - -obs-keywords = %i"Keywords" *WSP ":" obs-phrase-list CRLF - -obs-resent-from = %i"Resent-From" *WSP ":" mailbox-list CRLF - -obs-resent-send = %i"Resent-Sender" *WSP ":" mailbox CRLF - -obs-resent-date = %i"Resent-Date" *WSP ":" date-time CRLF - -obs-resent-to = %i"Resent-To" *WSP ":" address-list CRLF - -obs-resent-cc = %i"Resent-Cc" *WSP ":" address-list CRLF - -obs-resent-bcc = %i"Resent-Bcc" *WSP ":" - (address-list / [CFWS]) CRLF - -obs-resent-mid = %i"Resent-Message-ID" *WSP ":" msg-id CRLF - -obs-resent-rply = %i"Resent-Reply-To" *WSP ":" address-list CRLF - -obs-return = %i"Return-Path" *WSP ":" path CRLF - -obs-received = %i"Received" *WSP ":" name-val-list CRLF - -obs-path = obs-angle-addr - -obs-optional = field-name *WSP ":" unstructured CRLF -- cgit v1.2.1 From 2d469850efe9b5ad590a0c686c968d7810d1b104 Mon Sep 17 00:00:00 2001 From: hhsprings Date: Thu, 5 Nov 2015 15:50:33 +0900 Subject: re-create examples. --- tests/examplefiles/abnf_example1.abnf | 22 ++++++++++++++++++++++ tests/examplefiles/abnf_example2.abnf | 7 +++++++ tests/examplefiles/abnf_example5.abnf | 7 ------- 3 files changed, 29 insertions(+), 7 deletions(-) create mode 100644 tests/examplefiles/abnf_example1.abnf create mode 100644 tests/examplefiles/abnf_example2.abnf delete mode 100644 tests/examplefiles/abnf_example5.abnf diff --git a/tests/examplefiles/abnf_example1.abnf b/tests/examplefiles/abnf_example1.abnf new file mode 100644 index 00000000..5cd9cd25 --- /dev/null +++ b/tests/examplefiles/abnf_example1.abnf @@ -0,0 +1,22 @@ +; This examples from WikiPedia . + + postal-address = name-part street zip-part + + name-part = *(personal-part SP) last-name [SP suffix] CRLF + name-part =/ personal-part CRLF + + personal-part = first-name / (initial ".") + first-name = *ALPHA + initial = ALPHA + last-name = *ALPHA + suffix = ("Jr." / "Sr." / 1*("I" / "V" / "X")) + + street = [apt SP] house-num SP street-name CRLF + apt = 1*4DIGIT + house-num = 1*8(DIGIT / ALPHA) + street-name = 1*VCHAR + + zip-part = town-name "," SP state 1*2SP zip-code CRLF + town-name = 1*(ALPHA / SP) + state = 2ALPHA + zip-code = 5DIGIT ["-" 4DIGIT] diff --git a/tests/examplefiles/abnf_example2.abnf b/tests/examplefiles/abnf_example2.abnf new file mode 100644 index 00000000..19ccd856 --- /dev/null +++ b/tests/examplefiles/abnf_example2.abnf @@ -0,0 +1,7 @@ +crlf = %d13.10 + +command = "command string" + +char-line = %x0D.0A *(%x20-7E) %x0D.0A + +oldrule =/ additional-alternatives diff --git a/tests/examplefiles/abnf_example5.abnf b/tests/examplefiles/abnf_example5.abnf deleted file mode 100644 index 6ef65125..00000000 --- a/tests/examplefiles/abnf_example5.abnf +++ /dev/null @@ -1,7 +0,0 @@ -crlf = %d13.10 - -command = "command string" - -char-line = %x0D.0A *(%x20-7E) %x0D.0A - -oldrule =/ additional-alternatives -- cgit v1.2.1 From bb42aa11ad8c91127e9b87d196e8088cf90f4c26 Mon Sep 17 00:00:00 2001 From: hhsprings Date: Thu, 5 Nov 2015 15:54:55 +0900 Subject: update example2. --- tests/examplefiles/abnf_example2.abnf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/examplefiles/abnf_example2.abnf b/tests/examplefiles/abnf_example2.abnf index 19ccd856..07e28a1e 100644 --- a/tests/examplefiles/abnf_example2.abnf +++ b/tests/examplefiles/abnf_example2.abnf @@ -4,4 +4,4 @@ command = "command string" char-line = %x0D.0A *(%x20-7E) %x0D.0A -oldrule =/ additional-alternatives +without-ws-and-ctl = %d1-8 / %d11 / %d12 / %d14-31 / %d127 -- cgit v1.2.1 From 666244c83ca89846209c6ae3ad5917b900423c8d Mon Sep 17 00:00:00 2001 From: hhsprings Date: Thu, 5 Nov 2015 18:12:54 +0900 Subject: Add the lexer for original BNF. --- pygments/lexers/_mapping.py | 1 + pygments/lexers/grammar_notation.py | 57 ++++++++++++++++++++++++++++++++----- tests/examplefiles/bnf_example1.bnf | 15 ++++++++++ 3 files changed, 66 insertions(+), 7 deletions(-) create mode 100644 tests/examplefiles/bnf_example1.bnf diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py index adf58313..c9845733 100644 --- a/pygments/lexers/_mapping.py +++ b/pygments/lexers/_mapping.py @@ -51,6 +51,7 @@ LEXERS = { 'BefungeLexer': ('pygments.lexers.esoteric', 'Befunge', ('befunge',), ('*.befunge',), ('application/x-befunge',)), 'BlitzBasicLexer': ('pygments.lexers.basic', 'BlitzBasic', ('blitzbasic', 'b3d', 'bplus'), ('*.bb', '*.decls'), ('text/x-bb',)), 'BlitzMaxLexer': ('pygments.lexers.basic', 'BlitzMax', ('blitzmax', 'bmax'), ('*.bmx',), ('text/x-bmx',)), + 'BnfLexer': ('pygments.lexers.grammar_notation', 'BNF', ('bnf',), ('*.bnf',), ('text/x-bnf',)), 'BooLexer': ('pygments.lexers.dotnet', 'Boo', ('boo',), ('*.boo',), ('text/x-boo',)), 'BoogieLexer': ('pygments.lexers.esoteric', 'Boogie', ('boogie',), ('*.bpl',), ()), 'BrainfuckLexer': ('pygments.lexers.esoteric', 'Brainfuck', ('brainfuck', 'bf'), ('*.bf', '*.b'), ('application/x-brainfuck',)), diff --git a/pygments/lexers/grammar_notation.py b/pygments/lexers/grammar_notation.py index 65475646..43171387 100644 --- a/pygments/lexers/grammar_notation.py +++ b/pygments/lexers/grammar_notation.py @@ -9,17 +9,60 @@ :license: BSD, see LICENSE for details. """ -import re +from pygments.lexer import RegexLexer, bygroups +from pygments.token import Punctuation, Text, Comment, Operator, \ + Keyword, Name, Literal -from pygments.lexer import RegexLexer, DelegatingLexer, \ - include, bygroups, using, words -from pygments.token import Punctuation, Other, Text, Comment, Operator, \ - Keyword, Name, String, Number, Whitespace, Literal +__all__ = ['BnfLexer', 'AbnfLexer'] -__all__ = ['AbnfLexer'] +class BnfLexer(RegexLexer): + """ + This lexer is for grammer notations which are similar to + original BNF. + + In order to maximize a number of targets of this lexer, + let's decide some designs: + + * We don't distinct `Terminal Symbol`. + + * We do assume that `NonTerminal Symbol` are always enclosed + with arrow brackets. + + * We do assume that `NonTerminal Symbol` may include + any printable characters except arrow brackets and + space (no `spaces`, just space, i.e., ASCII \x020). + This assumption is for `RBNF `_. + + * We do assume that target notation doesn't support comment. + + * We don't distinct any operators and punctuation except + `::=`. + + Though these desision making might cause too minimal highlighting + and you might be disappointed, but it is reasonable for us. + + .. versionadded:: 2.1 + """ + + name = 'BNF' + aliases = ['bnf'] + filenames = ['*.bnf'] + mimetypes = ['text/x-bnf'] + + tokens = { + 'root': [ + (r'(<)([ -;=?-~]+)(>)', + bygroups(Punctuation, Name.Class, Punctuation)), + + # an only operator + (r'::=', Operator), + + # fallback + (r'.', Text), + ], + } -# EBNF shold be moved here, i think. class AbnfLexer(RegexLexer): """ diff --git a/tests/examplefiles/bnf_example1.bnf b/tests/examplefiles/bnf_example1.bnf new file mode 100644 index 00000000..fe041a6e --- /dev/null +++ b/tests/examplefiles/bnf_example1.bnf @@ -0,0 +1,15 @@ +; This examples from WikiPedia . + + ::= + + ::= + | + + ::= "." | + + ::= + + ::= "," + + ::= "Sr." | "Jr." | | "" + ::= | "" -- cgit v1.2.1 From e6927f0e2184adc0cc07f709fcacfe96a18e08dc Mon Sep 17 00:00:00 2001 From: Hiroaki Itoh Date: Fri, 6 Nov 2015 08:15:53 +0000 Subject: Correct docstring of module. --- pygments/lexers/grammar_notation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygments/lexers/grammar_notation.py b/pygments/lexers/grammar_notation.py index 43171387..2dc9aad9 100644 --- a/pygments/lexers/grammar_notation.py +++ b/pygments/lexers/grammar_notation.py @@ -3,7 +3,7 @@ pygments.lexers.grammar_notation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Lexers for parser generators. + Lexers for grammer notations like BNF. :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. -- cgit v1.2.1 From 92b38ef6e43ea87f525bd753f0f9c4e823f3f7e7 Mon Sep 17 00:00:00 2001 From: hhsprings Date: Fri, 6 Nov 2015 18:26:28 +0900 Subject: fix japanglish... --- pygments/lexers/grammar_notation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygments/lexers/grammar_notation.py b/pygments/lexers/grammar_notation.py index 2dc9aad9..60466def 100644 --- a/pygments/lexers/grammar_notation.py +++ b/pygments/lexers/grammar_notation.py @@ -88,7 +88,7 @@ class AbnfLexer(RegexLexer): txt = match.group(0) if txt in self._core_rules: # Strictly speaking, these are not keyword but - # is called `Core Rule'. + # are called `Core Rule'. yield match.start(), Keyword, txt else: yield match.start(), Name.Class, txt -- cgit v1.2.1 From d95d22cae75d2d60ce9c4553b92013b86ae7ceb8 Mon Sep 17 00:00:00 2001 From: hhsprings Date: Sat, 7 Nov 2015 22:54:45 +0900 Subject: See `#1164 `_. Before: 810 bytes, 4.6200 [ms] / 0.005704 [ms/byte] 156 bytes, 1.1100 [ms] / 0.007115 [ms/byte] 580 bytes, 2.1100 [ms] / 0.003638 [ms/byte] 810 bytes, 4.5600 [ms] / 0.005630 [ms/byte] 156 bytes, 0.7300 [ms] / 0.004679 [ms/byte] 580 bytes, 1.5600 [ms] / 0.002690 [ms/byte] 810 bytes, 3.5000 [ms] / 0.004321 [ms/byte] 156 bytes, 0.6800 [ms] / 0.004359 [ms/byte] 580 bytes, 1.1900 [ms] / 0.002052 [ms/byte] After: 810 bytes, 1.9700 [ms] / 0.002432 [ms/byte] 156 bytes, 0.4300 [ms] / 0.002756 [ms/byte] 580 bytes, 0.7300 [ms] / 0.001259 [ms/byte] 810 bytes, 1.9400 [ms] / 0.002395 [ms/byte] 156 bytes, 0.4500 [ms] / 0.002885 [ms/byte] 580 bytes, 0.6700 [ms] / 0.001155 [ms/byte] 810 bytes, 1.9300 [ms] / 0.002383 [ms/byte] 156 bytes, 0.4600 [ms] / 0.002949 [ms/byte] 580 bytes, 0.6700 [ms] / 0.001155 [ms/byte] --- pygments/lexers/grammar_notation.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/pygments/lexers/grammar_notation.py b/pygments/lexers/grammar_notation.py index 60466def..1c5f1163 100644 --- a/pygments/lexers/grammar_notation.py +++ b/pygments/lexers/grammar_notation.py @@ -59,6 +59,7 @@ class BnfLexer(RegexLexer): (r'::=', Operator), # fallback + (r'[^<>:=]+', Text), # for performance (r'.', Text), ], } @@ -99,7 +100,8 @@ class AbnfLexer(RegexLexer): (r';.*$', Comment.Single), # quoted - (r'(%[si])?"', Literal, 'quoted-termination'), + # double quote itself in this state, it is as '%x22'. + (r'(%[si])?"[^"]*"', Literal), # binary (but i have never seen...) (r'%b[01]+\-[01]+\b', Literal), # range @@ -129,11 +131,7 @@ class AbnfLexer(RegexLexer): (r'[\[\]()]', Punctuation), # fallback + (r'\s+', Text), (r'.', Text), ], - 'quoted-termination': [ - # double quote itself in this state, it is as '%x22'. - (r'"', Literal, '#pop'), - (r'.', Literal), - ] } -- cgit v1.2.1 From b9e6386e3ef2d19f0472e6aca0b8880e7e3626bf Mon Sep 17 00:00:00 2001 From: hhsprings Date: Mon, 9 Nov 2015 18:13:46 +0900 Subject: Fix regarding to Tim's review except `can you confirm that range and concat can't be used together in the same literal?'. --- pygments/lexers/grammar_notation.py | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/pygments/lexers/grammar_notation.py b/pygments/lexers/grammar_notation.py index 1c5f1163..8923bbaa 100644 --- a/pygments/lexers/grammar_notation.py +++ b/pygments/lexers/grammar_notation.py @@ -9,7 +9,7 @@ :license: BSD, see LICENSE for details. """ -from pygments.lexer import RegexLexer, bygroups +from pygments.lexer import RegexLexer, bygroups, words from pygments.token import Punctuation, Text, Comment, Operator, \ Keyword, Name, Literal @@ -24,14 +24,13 @@ class BnfLexer(RegexLexer): In order to maximize a number of targets of this lexer, let's decide some designs: - * We don't distinct `Terminal Symbol`. + * We don't distinguish `Terminal Symbol`. * We do assume that `NonTerminal Symbol` are always enclosed with arrow brackets. * We do assume that `NonTerminal Symbol` may include - any printable characters except arrow brackets and - space (no `spaces`, just space, i.e., ASCII \x020). + any printable characters except arrow brackets and ASCII 0x20. This assumption is for `RBNF `_. * We do assume that target notation doesn't support comment. @@ -59,7 +58,7 @@ class BnfLexer(RegexLexer): (r'::=', Operator), # fallback - (r'[^<>:=]+', Text), # for performance + (r'[^<>:]+', Text), # for performance (r'.', Text), ], } @@ -83,16 +82,7 @@ class AbnfLexer(RegexLexer): _core_rules = ( 'ALPHA', 'BIT', 'CHAR', 'CR', 'CRLF', 'CTL', 'DIGIT', 'DQUOTE', 'HEXDIG', 'HTAB', 'LF', 'LWSP', 'OCTET', - 'SP', 'VCHAR', 'WSP',) - - def nonterminal_cb(self, match): - txt = match.group(0) - if txt in self._core_rules: - # Strictly speaking, these are not keyword but - # are called `Core Rule'. - yield match.start(), Keyword, txt - else: - yield match.start(), Name.Class, txt + 'SP', 'VCHAR', 'WSP') tokens = { 'root': [ @@ -121,8 +111,12 @@ class AbnfLexer(RegexLexer): (r'\b[0-9]+', Operator), (r'\*', Operator), + # Strictly speaking, these are not keyword but + # are called `Core Rule'. + (words(_core_rules, suffix=r'\b'), Keyword), + # nonterminals (ALPHA *(ALPHA / DIGIT / "-")) - (r'[a-zA-Z][a-zA-Z0-9-]+\b', nonterminal_cb), + (r'[a-zA-Z][a-zA-Z0-9-]+\b', Name.Class), # operators (r'(=/|=|/)', Operator), -- cgit v1.2.1 From 1e63d6299c70143063037b4e64360dc574a642ee Mon Sep 17 00:00:00 2001 From: hhsprings Date: Mon, 9 Nov 2015 18:39:39 +0900 Subject: update example. --- tests/examplefiles/abnf_example2.abnf | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/examplefiles/abnf_example2.abnf b/tests/examplefiles/abnf_example2.abnf index 07e28a1e..8781adfb 100644 --- a/tests/examplefiles/abnf_example2.abnf +++ b/tests/examplefiles/abnf_example2.abnf @@ -5,3 +5,5 @@ command = "command string" char-line = %x0D.0A *(%x20-7E) %x0D.0A without-ws-and-ctl = %d1-8 / %d11 / %d12 / %d14-31 / %d127 + +three-blank-lines = %x0D.0A.0D.0A.0D.0A -- cgit v1.2.1 From f3c27772939937acbce4fc5fd4fd589ebccfacbe Mon Sep 17 00:00:00 2001 From: hhsprings Date: Tue, 10 Nov 2015 01:37:30 +0900 Subject: Sorry... --- pygments/lexers/grammar_notation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygments/lexers/grammar_notation.py b/pygments/lexers/grammar_notation.py index 8923bbaa..460914f4 100644 --- a/pygments/lexers/grammar_notation.py +++ b/pygments/lexers/grammar_notation.py @@ -35,7 +35,7 @@ class BnfLexer(RegexLexer): * We do assume that target notation doesn't support comment. - * We don't distinct any operators and punctuation except + * We don't distinguish any operators and punctuation except `::=`. Though these desision making might cause too minimal highlighting -- cgit v1.2.1