summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJon Siwek <jsiwek@corelight.com>2019-11-24 09:02:28 -0800
committerGeorg Brandl <georg@python.org>2019-11-25 20:39:24 +0100
commit71835d83fbfe004608609b4d2ae29aa8ddf31359 (patch)
tree18a8101d5c2ec6bff4d0e17831f5b7b3adfc9f7a
parented97a09b19cb256c6d0081bfb2f96f9d54e6c404 (diff)
downloadpygments-git-71835d83fbfe004608609b4d2ae29aa8ddf31359.tar.gz
Add Zeek lexer based on the Bro lexer
Bro has been renamed to Zeek, but the language is essentially the same without any different treatment of .zeek files from .bro files. This change also adds general improvements to the lexer.
-rw-r--r--doc/languages.rst1
-rw-r--r--pygments/lexers/_mapping.py1
-rw-r--r--pygments/lexers/dsls.py198
-rw-r--r--pygments/lexers/other.py4
-rw-r--r--tests/examplefiles/test.zeek181
5 files changed, 329 insertions, 56 deletions
diff --git a/doc/languages.rst b/doc/languages.rst
index be886683..e365ef2e 100644
--- a/doc/languages.rst
+++ b/doc/languages.rst
@@ -104,6 +104,7 @@ Programming languages
* Visual Basic.NET
* Visual FoxPro
* XQuery
+* `Zeek <https://www.zeek.org>`_
* Zephir
Template languages
diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py
index 7dfb2775..2533f651 100644
--- a/pygments/lexers/_mapping.py
+++ b/pygments/lexers/_mapping.py
@@ -475,6 +475,7 @@ LEXERS = {
'XtlangLexer': ('pygments.lexers.lisp', 'xtlang', ('extempore',), ('*.xtm',), ()),
'YamlJinjaLexer': ('pygments.lexers.templates', 'YAML+Jinja', ('yaml+jinja', 'salt', 'sls'), ('*.sls',), ('text/x-yaml+jinja', 'text/x-sls')),
'YamlLexer': ('pygments.lexers.data', 'YAML', ('yaml',), ('*.yaml', '*.yml'), ('text/x-yaml',)),
+ 'ZeekLexer': ('pygments.lexers.dsls', 'Zeek', ('zeek',), ('*.zeek',), ()),
'ZephirLexer': ('pygments.lexers.php', 'Zephir', ('zephir',), ('*.zep',), ()),
'ZigLexer': ('pygments.lexers.zig', 'Zig', ('zig',), ('*.zig',), ('text/zig',)),
}
diff --git a/pygments/lexers/dsls.py b/pygments/lexers/dsls.py
index eb338d79..23fc9a97 100644
--- a/pygments/lexers/dsls.py
+++ b/pygments/lexers/dsls.py
@@ -16,7 +16,7 @@ from pygments.lexer import ExtendedRegexLexer, RegexLexer, bygroups, words, \
from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
Number, Punctuation, Literal, Whitespace
-__all__ = ['ProtoBufLexer', 'BroLexer', 'PuppetLexer', 'RslLexer',
+__all__ = ['ProtoBufLexer', 'ZeekLexer', 'BroLexer', 'PuppetLexer', 'RslLexer',
'MscgenLexer', 'VGLLexer', 'AlloyLexer', 'PanLexer',
'CrmshLexer', 'ThriftLexer', 'FlatlineLexer', 'SnowballLexer']
@@ -188,84 +188,174 @@ class ThriftLexer(RegexLexer):
}
-class BroLexer(RegexLexer):
+class ZeekLexer(RegexLexer):
"""
- For `Bro <http://bro-ids.org/>`_ scripts.
+ For `Zeek <https://www.zeek.org/>`_ scripts.
- .. versionadded:: 1.5
+ .. versionadded:: 2.5
"""
- name = 'Bro'
- aliases = ['bro']
- filenames = ['*.bro']
+ name = 'Zeek'
+ aliases = ['zeek']
+ filenames = ['*.zeek']
- _hex = r'[0-9a-fA-F_]'
+ _hex = r'[0-9a-fA-F]'
_float = r'((\d*\.?\d+)|(\d+\.?\d*))([eE][-+]?\d+)?'
_h = r'[A-Za-z0-9][-A-Za-z0-9]*'
tokens = {
'root': [
- # Whitespace
- (r'^@.*?\n', Comment.Preproc),
- (r'#.*?\n', Comment.Single),
+ include('whitespace'),
+ include('comments'),
+ include('directives'),
+ include('attributes'),
+ include('types'),
+ include('keywords'),
+ include('literals'),
+ include('operators'),
+ include('punctuation'),
+ (r'\b((?:[A-Za-z_][A-Za-z_0-9]*)(?:::(?:[A-Za-z_][A-Za-z_0-9]*))*)(?=\s*\()',
+ Name.Function),
+ include('identifiers'),
+ ],
+
+ 'whitespace': [
(r'\n', Text),
(r'\s+', Text),
(r'\\\n', Text),
- # Keywords
- (r'(add|alarm|break|case|const|continue|delete|do|else|enum|event'
- r'|export|for|function|if|global|hook|local|module|next'
- r'|of|print|redef|return|schedule|switch|type|when|while)\b', Keyword),
- (r'(addr|any|bool|count|counter|double|file|int|interval|net'
- r'|pattern|port|record|set|string|subnet|table|time|timer'
- r'|vector)\b', Keyword.Type),
- (r'(T|F)\b', Keyword.Constant),
- (r'(&)((?:add|delete|expire)_func|attr|(?:create|read|write)_expire'
- r'|default|disable_print_hook|raw_output|encrypt|group|log'
- r'|mergeable|optional|persistent|priority|redef'
- r'|rotate_(?:interval|size)|synchronized)\b',
- bygroups(Punctuation, Keyword)),
- (r'\s+module\b', Keyword.Namespace),
- # Addresses, ports and networks
- (r'\d+/(tcp|udp|icmp|unknown)\b', Number),
- (r'(\d+\.){3}\d+', Number),
- (r'(' + _hex + r'){7}' + _hex, Number),
- (r'0x' + _hex + r'(' + _hex + r'|:)*::(' + _hex + r'|:)*', Number),
- (r'((\d+|:)(' + _hex + r'|:)*)?::(' + _hex + r'|:)*', Number),
- (r'(\d+\.\d+\.|(\d+\.){2}\d+)', Number),
+ ],
+
+ 'comments': [
+ (r'#.*$', Comment),
+ ],
+
+ 'directives': [
+ (r'(@(load-plugin|load-sigs|load|unload))\b.*$', Comment.Preproc),
+ (r'(@(DEBUG|DIR|FILENAME|deprecated|if|ifdef|ifndef|else|endif))\b', Comment.Preproc),
+ (r'(@prefixes)\s*(\+?=).*$', Comment.Preproc),
+ ],
+
+ 'attributes': [
+ (words(('redef', 'priority', 'log', 'optional', 'default', 'add_func',
+ 'delete_func', 'expire_func', 'read_expire', 'write_expire',
+ 'create_expire', 'synchronized', 'persistent', 'rotate_interval',
+ 'rotate_size', 'encrypt', 'raw_output', 'mergeable', 'error_handler',
+ 'type_column', 'deprecated'),
+ prefix=r'&', suffix=r'\b'),
+ Keyword.Pseudo),
+ ],
+
+ 'types': [
+ (words(('any',
+ 'enum', 'record', 'set', 'table', 'vector',
+ 'function', 'hook', 'event',
+ 'addr', 'bool', 'count', 'double', 'file', 'int', 'interval',
+ 'pattern', 'port', 'string', 'subnet', 'time'),
+ prefix=r'\b', suffix=r'\b'),
+ Keyword.Type),
+
+ (r'\b(opaque)(\s+)(of)(\s+)((?:[A-Za-z_][A-Za-z_0-9]*)(?:::(?:[A-Za-z_][A-Za-z_0-9]*))*)\b',
+ bygroups(Keyword.Type, Text, Operator.Word, Text, Keyword.Type)),
+
+ (r'\b(type)(\s+)((?:[A-Za-z_][A-Za-z_0-9]*)(?:::(?:[A-Za-z_][A-Za-z_0-9]*))*)(\s*)(:)(\s*)\b(record|enum)\b',
+ bygroups(Keyword, Text, Name.Class, Text, Operator, Text, Keyword.Type)),
+
+ (r'\b(type)(\s+)((?:[A-Za-z_][A-Za-z_0-9]*)(?:::(?:[A-Za-z_][A-Za-z_0-9]*))*)(\s*)(:)',
+ bygroups(Keyword, Text, Name, Text, Operator)),
+
+ (r'\b(redef)(\s+)(record|enum)(\s+)((?:[A-Za-z_][A-Za-z_0-9]*)(?:::(?:[A-Za-z_][A-Za-z_0-9]*))*)\b',
+ bygroups(Keyword, Text, Keyword.Type, Text, Name.Class)),
+ ],
+
+ 'keywords': [
+ (words(('redef', 'export', 'if', 'else', 'for', 'while',
+ 'return', 'break', 'next', 'continue', 'fallthrough',
+ 'switch', 'default', 'case',
+ 'add', 'delete',
+ 'when', 'timeout', 'schedule'),
+ prefix=r'\b', suffix=r'\b'),
+ Keyword),
+ (r'\b(print)\b', Keyword),
+ (r'\b(global|local|const|option)\b', Keyword.Declaration),
+ (r'\b(module)(\s+)(([A-Za-z_][A-Za-z_0-9]*)(?:::([A-Za-z_][A-Za-z_0-9]*))*)\b',
+ bygroups(Keyword.Namespace, Text, Name.Namespace)),
+ ],
+
+ 'literals': [
+ (r'"', String, 'string'),
+
+ # Not the greatest match for patterns, but generally helps
+ # disambiguate between start of a pattern and just a division
+ # operator.
+ (r'/(?=.*/)', String.Regex, 'regex'),
+
+ (r'\b(T|F)\b', Keyword.Constant),
+
+ # Port
+ (r'\b\d{1,5}/(udp|tcp|icmp|unknown)\b', Number),
+
+ # IPv4 Address
+ (r'\b(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2})\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2})\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2})\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2})\b', Number),
+
+ # IPv6 Address (not 100% correct: that takes more effort)
+ (r'\[([0-9a-fA-F]{0,4}:){2,7}([0-9a-fA-F]{0,4})?((25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2})\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2})\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2})\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[0-9]{1,2}))?\]', Number),
+
+ # Numeric
+ (r'\b0[xX]' + _hex + r'+\b', Number.Hex),
+ (r'\b' + _float + r'\s*(day|hr|min|sec|msec|usec)s?\b', Number.Float),
+ (r'\b' + _float + r'\b', Number.Float),
+ (r'\b(\d+)\b', Number.Integer),
+
# Hostnames
(_h + r'(\.' + _h + r')+', String),
- # Numeric
- (_float + r'\s+(day|hr|min|sec|msec|usec)s?\b', Literal.Date),
- (r'0[xX]' + _hex, Number.Hex),
- (_float, Number.Float),
- (r'\d+', Number.Integer),
- (r'/', String.Regex, 'regex'),
- (r'"', String, 'string'),
- # Operators
- (r'[!%*/+:<=>?~|-]', Operator),
+ ],
+
+ 'operators': [
+ (r'[!%*/+<=>~|&^-]', Operator),
(r'([-+=&|]{2}|[+=!><-]=)', Operator),
- (r'(in|match)\b', Operator.Word),
- (r'[{}()\[\]$.,;]', Punctuation),
- # Identfier
- (r'([_a-zA-Z]\w*)(::)', bygroups(Name, Name.Namespace)),
+ (r'\b(in|as|is|of)\b', Operator.Word),
+ (r'\??\$', Operator),
+ # Technically, colons are often used for punctuation/sepration.
+ # E.g. field name/type separation.
+ (r'[?:]', Operator),
+ ],
+
+ 'punctuation': [
+ (r'\?\$', Punctuation),
+ (r'[{}()\[\],;:.]', Punctuation),
+ ],
+
+ 'identifiers': [
+ (r'([a-zA-Z_]\w*)(::)', bygroups(Name, Punctuation)),
(r'[a-zA-Z_]\w*', Name)
],
+
'string': [
+ (r'\\.', String.Escape),
+ (r'%-?[0-9]*(\.[0-9]+)?[DTdxsefg]', String.Escape),
(r'"', String, '#pop'),
- (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|[0-7]{1,3})', String.Escape),
- (r'[^\\"\n]+', String),
- (r'\\\n', String),
- (r'\\', String)
+ (r'.', String),
],
+
'regex': [
+ (r'\\.', String.Escape),
(r'/', String.Regex, '#pop'),
- (r'\\[\\nt/]', String.Regex), # String.Escape is too intense here.
- (r'[^\\/\n]+', String.Regex),
- (r'\\\n', String.Regex),
- (r'\\', String.Regex)
- ]
+ (r'.', String.Regex),
+ ],
}
+class BroLexer(ZeekLexer):
+ """
+ For `Zeek <https://www.zeek.org/>`_ scripts.
+ Note, this is equivalent to ZeekLexer (Bro is the old name for Zeek).
+
+ .. versionadded:: 1.5
+ """
+ name = 'Bro'
+ aliases = ['bro']
+ filenames = ['*.bro']
+
+
class PuppetLexer(RegexLexer):
"""
For `Puppet <http://puppetlabs.com/>`__ configuration DSL.
diff --git a/pygments/lexers/other.py b/pygments/lexers/other.py
index c3a60cef..441ffe26 100644
--- a/pygments/lexers/other.py
+++ b/pygments/lexers/other.py
@@ -27,8 +27,8 @@ from pygments.lexers.graphics import PostScriptLexer, GnuplotLexer, \
from pygments.lexers.business import ABAPLexer, OpenEdgeLexer, \
GoodDataCLLexer, MaqlLexer
from pygments.lexers.automation import AutoItLexer, AutohotkeyLexer
-from pygments.lexers.dsls import ProtoBufLexer, BroLexer, PuppetLexer, \
- MscgenLexer, VGLLexer
+from pygments.lexers.dsls import ProtoBufLexer, ZeekLexer, BroLexer, \
+ PuppetLexer, MscgenLexer, VGLLexer
from pygments.lexers.basic import CbmBasicV2Lexer
from pygments.lexers.pawn import SourcePawnLexer, PawnLexer
from pygments.lexers.ecl import ECLLexer
diff --git a/tests/examplefiles/test.zeek b/tests/examplefiles/test.zeek
new file mode 100644
index 00000000..ee6bad8d
--- /dev/null
+++ b/tests/examplefiles/test.zeek
@@ -0,0 +1,181 @@
+# An example of the Zeek scripting language.
+
+##! A Zeekygen-style summmary comment.
+
+# TODO: just an example of a todo-indicator
+
+@load base/frameworks/notice
+
+@if ( F )
+@endif
+
+module Example;
+
+export {
+
+ type mycount: count;
+
+ type SimpleEnum: enum { ONE, TWO, THREE };
+
+ redef enum SimpleEnum += {
+
+ ## A Zeekygen-style comment.
+ FOUR,
+ FIVE, ##< A Zeekygen-style comment.
+ };
+
+ type SimpleRecord: record {
+ field1: count;
+ field2: bool;
+ } &redef;
+
+ redef record SimpleRecord += {
+
+ field3: string &optional;
+
+ field4: string &default="blah";
+ };
+
+ const init_option: bool = T;
+
+ option runtime_option: bool = F;
+
+ global test_opaque: opaque of md5;
+
+ global test_vector: vector of count;
+
+ global myfunction: function(msg: string, c: count &default=0): count;
+
+ global myhook: hook(tag: string);
+
+ global myevent: event(tag: string);
+}
+
+function myfunction(msg: string, c: count): count
+ {
+ print "in myfunction", msg, c;
+ return 0;
+ }
+
+event myevent(msg: string) &priority=1
+ {
+ print "in myevent";
+ }
+
+hook myhook(msg: string)
+ {
+ print "in myevent";
+ }
+
+event zeek_init()
+ {
+ local b = T;
+ local s = "\xff\xaf\"and more after the escaped quote";
+ local p = /foo|bar\xbe\/and more after the escaped slash/;
+ local c = 10;
+
+ local sr = SimpleRecord($field1 = 0, $field2 = T, $field3 = "hi");
+
+ print sr?$field3, sr$field1;
+
+ local myset: set[string] = set("one", "two", "three");
+
+ add myset["four"];
+ delete myset["one"];
+
+ for ( ms in myset )
+ {
+ print ms is string, s as string;
+
+ print s[1:3];
+
+ local tern: count = s == "two" ? 2 : 0;
+
+ if ( s !in myset )
+ print fmt("error %4.2f: %s", 3.14159, "wtf?");
+ }
+
+ switch ( c ) {
+ case 1:
+ break;
+ case 2:
+ fallthrough;
+ default:
+ break;
+ }
+
+ if ( ! b )
+ print "here";
+ else
+ print "there";
+
+ while ( c != 0 )
+ {
+ if ( c >= 5 )
+ c += 0;
+ else if ( c == 8 )
+ c -= 0;
+
+ c = c / 1;
+ c = c / 1;
+ c = c - 1;
+ }
+
+ print |myset|;
+ print ~5;
+ print 1 & 0xff;
+ print 2 ^ 5;
+
+ myfunction ("hello function");
+ hook myhook("hell hook");
+ event myevent("hello event");
+ schedule 1sec { myevent("hello scheduled event") };
+
+ print 0, 7;
+ print 0xff, 0xdeadbeef;
+
+ print 3.14159;
+ print 1234.0;
+ print 1234e0;
+ print .003E-23;
+ print .003E+23;
+
+ print 123/udp;
+ print 8000/tcp;
+ print 13/icmp;
+ print 42/unknown;
+
+ print google.com;
+ print 192.168.50.1;
+ print 255.255.255.255;
+ print 0.0.0.0;
+
+ print 10.0.0.0/16;
+
+ print [2001:0db8:85a3:0000:0000:8a2e:0370:7334];
+ # test for case insensitivity
+ print [2001:0DB8:85A3:0000:0000:8A2E:0370:7334];
+ # any case mixture is allowed
+ print [2001:0dB8:85a3:0000:0000:8A2E:0370:7334];
+ # leading zeroes of a 16-bit group may be omitted
+ print [2001:db8:85a3:0:0:8a2e:370:7334];
+ # a single occurrence of consecutive groups of zeroes may be replaced by ::
+ print [2001:db8:85a3::8a2e:370:7334];
+ # all zeroes should work
+ print [0:0:0:0:0:0:0:0];
+ # all zeroes condensed should work
+ print [::];
+ # hybrid ipv6-ipv4 address should work
+ print [2001:db8:0:0:0:FFFF:192.168.0.5];
+ # hybrid ipv6-ipv4 address with zero ommission should work
+ print [2001:db8::FFFF:192.168.0.5];
+
+ print [2001:0db8:85a3:0000:0000:8a2e:0370:7334]/64;
+
+ print 1day, 1days, 1.0day, 1.0days;
+ print 1hr, 1hrs, 1.0hr, 1.0hrs;
+ print 1min, 1mins, 1.0min, 1.0mins;
+ print 1sec, 1secs, 1.0sec, 1.0secs;
+ print 1msec, 1msecs, 1.0msec, 1.0msecs;
+ print 1usec, 1usecs, 1.0usec, 1.0usecs;
+ }