diff options
Diffstat (limited to 'pygments/lexers/text.py')
-rw-r--r-- | pygments/lexers/text.py | 745 |
1 files changed, 578 insertions, 167 deletions
diff --git a/pygments/lexers/text.py b/pygments/lexers/text.py index 5a63e503..6c2a4119 100644 --- a/pygments/lexers/text.py +++ b/pygments/lexers/text.py @@ -5,7 +5,7 @@ Lexers for non-source code file types. - :copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS. + :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. """ @@ -16,7 +16,7 @@ from pygments.lexer import Lexer, LexerContext, RegexLexer, ExtendedRegexLexer, bygroups, include, using, this, do_insertions from pygments.token import Punctuation, Text, Comment, Keyword, Name, String, \ Generic, Operator, Number, Whitespace, Literal -from pygments.util import get_bool_opt +from pygments.util import get_bool_opt, ClassNotFound from pygments.lexers.other import BashLexer __all__ = ['IniLexer', 'PropertiesLexer', 'SourcesListLexer', 'BaseMakefileLexer', @@ -24,7 +24,9 @@ __all__ = ['IniLexer', 'PropertiesLexer', 'SourcesListLexer', 'BaseMakefileLexer 'GroffLexer', 'ApacheConfLexer', 'BBCodeLexer', 'MoinWikiLexer', 'RstLexer', 'VimLexer', 'GettextLexer', 'SquidConfLexer', 'DebianControlLexer', 'DarcsPatchLexer', 'YamlLexer', - 'LighttpdConfLexer', 'NginxConfLexer', 'CMakeLexer'] + 'LighttpdConfLexer', 'NginxConfLexer', 'CMakeLexer', 'HttpLexer', + 'PyPyLogLexer', 'RegeditLexer', 'HxmlLexer', 'EbnfLexer', + 'TodotxtLexer'] class IniLexer(RegexLexer): @@ -33,14 +35,14 @@ class IniLexer(RegexLexer): """ name = 'INI' - aliases = ['ini', 'cfg'] + aliases = ['ini', 'cfg', 'dosini'] filenames = ['*.ini', '*.cfg'] mimetypes = ['text/x-ini'] tokens = { 'root': [ (r'\s+', Text), - (r'[;#].*?$', Comment), + (r'[;#].*', Comment.Single), (r'\[.*?\]$', Keyword), (r'(.*?)([ \t]*)(=)([ \t]*)(.*(?:\n[ \t].+)*)', bygroups(Name.Attribute, Text, Operator, Text, String)) @@ -54,15 +56,58 @@ class IniLexer(RegexLexer): return text[0] == '[' and text[npos-1] == ']' +class RegeditLexer(RegexLexer): + """ + Lexer for `Windows Registry + <http://en.wikipedia.org/wiki/Windows_Registry#.REG_files>`_ files produced + by regedit. + + .. versionadded:: 1.6 + """ + + name = 'reg' + aliases = ['registry'] + filenames = ['*.reg'] + mimetypes = ['text/x-windows-registry'] + + tokens = { + 'root': [ + (r'Windows Registry Editor.*', Text), + (r'\s+', Text), + (r'[;#].*', Comment.Single), + (r'(\[)(-?)(HKEY_[A-Z_]+)(.*?\])$', + bygroups(Keyword, Operator, Name.Builtin, Keyword)), + # String keys, which obey somewhat normal escaping + (r'("(?:\\"|\\\\|[^"])+")([ \t]*)(=)([ \t]*)', + bygroups(Name.Attribute, Text, Operator, Text), + 'value'), + # Bare keys (includes @) + (r'(.*?)([ \t]*)(=)([ \t]*)', + bygroups(Name.Attribute, Text, Operator, Text), + 'value'), + ], + 'value': [ + (r'-', Operator, '#pop'), # delete value + (r'(dword|hex(?:\([0-9a-fA-F]\))?)(:)([0-9a-fA-F,]+)', + bygroups(Name.Variable, Punctuation, Number), '#pop'), + # As far as I know, .reg files do not support line continuation. + (r'.*', String, '#pop'), + ] + } + + def analyse_text(text): + return text.startswith('Windows Registry Editor') + + class PropertiesLexer(RegexLexer): """ Lexer for configuration files in Java's properties format. - *New in Pygments 1.4.* + .. versionadded:: 1.4 """ name = 'Properties' - aliases = ['properties'] + aliases = ['properties', 'jproperties'] filenames = ['*.properties'] mimetypes = ['text/x-java-properties'] @@ -80,11 +125,11 @@ class SourcesListLexer(RegexLexer): """ Lexer that highlights debian sources.list files. - *New in Pygments 0.7.* + .. versionadded:: 0.7 """ name = 'Debian Sourcelist' - aliases = ['sourceslist', 'sources.list'] + aliases = ['sourceslist', 'sources.list', 'debsources'] filenames = ['sources.list'] mimetype = ['application/x-debian-sourceslist'] @@ -136,7 +181,7 @@ class MakefileLexer(Lexer): name = 'Makefile' aliases = ['make', 'makefile', 'mf', 'bsdmake'] - filenames = ['*.mak', 'Makefile', 'makefile', 'Makefile.*', 'GNUmakefile'] + filenames = ['*.mak', '*.mk', 'Makefile', 'makefile', 'Makefile.*', 'GNUmakefile'] mimetypes = ['text/x-makefile'] r_special = re.compile(r'^(?:' @@ -163,30 +208,37 @@ class MakefileLexer(Lexer): for item in do_insertions(ins, lex.get_tokens_unprocessed(done)): yield item + def analyse_text(text): + # Many makefiles have $(BIG_CAPS) style variables + if re.search(r'\$\([A-Z_]+\)', text): + return 0.1 + class BaseMakefileLexer(RegexLexer): """ Lexer for simple Makefiles (no preprocessing). - *New in Pygments 0.10.* + .. versionadded:: 0.10 """ - name = 'Makefile' + name = 'Base Makefile' aliases = ['basemake'] filenames = [] mimetypes = [] tokens = { 'root': [ + # recipes (need to allow spaces because of expandtabs) (r'^(?:[\t ]+.*\n|\n)+', using(BashLexer)), - (r'\$\((?:.*\\\n|.*\n)+', using(BashLexer)), + # special variables + (r'\$[<@$+%?|*]', Keyword), (r'\s+', Text), (r'#.*?\n', Comment), (r'(export)(\s+)(?=[a-zA-Z0-9_${}\t -]+\n)', bygroups(Keyword, Text), 'export'), (r'export\s+', Keyword), # assignment - (r'([a-zA-Z0-9_${}.-]+)(\s*)([!?:+]?=)([ \t]*)((?:.*\\\n|.*\n)+)', + (r'([a-zA-Z0-9_${}.-]+)(\s*)([!?:+]?=)([ \t]*)((?:.*\\\n)+|.*\n)', bygroups(Name.Variable, Text, Operator, Text, using(BashLexer))), # strings (r'(?s)"(\\\\|\\.|[^"\\])*"', String.Double), @@ -194,7 +246,15 @@ class BaseMakefileLexer(RegexLexer): # targets (r'([^\n:]+)(:+)([ \t]*)', bygroups(Name.Function, Operator, Text), 'block-header'), - # TODO: add paren handling (grr) + # expansions + (r'\$\(', Keyword, 'expansion'), + ], + 'expansion': [ + (r'[^$a-zA-Z_)]+', Text), + (r'[a-zA-Z_]+', Name.Variable), + (r'\$', Keyword), + (r'\(', Keyword, '#push'), + (r'\)', Keyword, '#pop'), ], 'export': [ (r'[a-zA-Z0-9_${}-]+', Name.Variable), @@ -202,12 +262,13 @@ class BaseMakefileLexer(RegexLexer): (r'\s+', Text), ], 'block-header': [ - (r'[^,\\\n#]+', Number), - (r',', Punctuation), - (r'#.*?\n', Comment), + (r'[,|]', Punctuation), + (r'#.*?\n', Comment, '#pop'), (r'\\\n', Text), # line continuation - (r'\\.', Text), - (r'(?:[\t ]+.*\n|\n)+', using(BashLexer), '#pop'), + (r'\$\(', Keyword, 'expansion'), + (r'[a-zA-Z_]+', Name), + (r'\n', Text, '#pop'), + (r'.', Text), ], } @@ -253,7 +314,7 @@ class DarcsPatchLexer(RegexLexer): format. Examples of this format are derived by commands such as ``darcs annotate --patch`` and ``darcs send``. - *New in Pygments 0.10.* + .. versionadded:: 0.10 """ name = 'Darcs Patch' aliases = ['dpatch'] @@ -291,12 +352,12 @@ class DarcsPatchLexer(RegexLexer): 'insert': [ include('specialText'), (r'\[', Generic.Inserted), - (r'[^\n\[]*', Generic.Inserted), + (r'[^\n\[]+', Generic.Inserted), ], 'delete': [ include('specialText'), (r'\[', Generic.Deleted), - (r'[^\n\[]*', Generic.Deleted), + (r'[^\n\[]+', Generic.Deleted), ], } @@ -345,18 +406,18 @@ class IrcLogsLexer(RegexLexer): # /me msgs ("^" + timestamp + r""" (\s*[*]\s+) # Star - ([^\s]+\s+.*?\n) # Nick + rest of message """, + (\S+\s+.*?\n) # Nick + rest of message """, bygroups(Comment.Preproc, Keyword, Generic.Inserted)), # join/part msgs ("^" + timestamp + r""" (\s*(?:\*{3}|<?-[!@=P]?->?)\s*) # Star(s) or symbols - ([^\s]+\s+) # Nick + Space + (\S+\s+) # Nick + Space (.*?\n) # Rest of message """, bygroups(Comment.Preproc, Keyword, String, Comment)), (r"^.*?\n", Text), ], 'msg': [ - (r"[^\s]+:(?!//)", Name.Attribute), # Prefix + (r"\S+:(?!//)", Name.Attribute), # Prefix (r".*\n", Text, '#pop'), ], } @@ -366,7 +427,7 @@ class BBCodeLexer(RegexLexer): """ A lexer that highlights BBCode(-like) syntax. - *New in Pygments 0.6.* + .. versionadded:: 0.6 """ name = 'BBCode' @@ -457,7 +518,7 @@ class GroffLexer(RegexLexer): Lexer for the (g)roff typesetting language, supporting groff extensions. Mainly useful for highlighting manpage sources. - *New in Pygments 0.6.* + .. versionadded:: 0.6 """ name = 'Groff' @@ -467,7 +528,7 @@ class GroffLexer(RegexLexer): tokens = { 'root': [ - (r'(?i)(\.)(\w+)', bygroups(Text, Keyword), 'request'), + (r'(\.)(\w+)', bygroups(Text, Keyword), 'request'), (r'\.', Punctuation, 'request'), # Regular characters, slurp till we find a backslash or newline (r'[^\\\n]*', Text, 'textline'), @@ -481,7 +542,7 @@ class GroffLexer(RegexLexer): # groff has many ways to write escapes. (r'\\"[^\n]*', Comment), (r'\\[fn]\w', String.Escape), - (r'\\\(..', String.Escape), + (r'\\\(.{2}', String.Escape), (r'\\.\[.*\]', String.Escape), (r'\\.', String.Escape), (r'\\\n', Text, 'request'), @@ -512,7 +573,7 @@ class ApacheConfLexer(RegexLexer): Lexer for configuration files following the Apache config file format. - *New in Pygments 0.6.* + .. versionadded:: 0.6 """ name = 'ApacheConf' @@ -527,7 +588,7 @@ class ApacheConfLexer(RegexLexer): (r'(#.*?)$', Comment), (r'(<[^\s>]+)(?:(\s+)(.*?))?(>)', bygroups(Name.Tag, Text, String, Name.Tag)), - (r'([a-zA-Z][a-zA-Z0-9]*)(\s+)', + (r'([a-zA-Z][a-zA-Z0-9_]*)(\s+)', bygroups(Name.Builtin, Text), 'value'), (r'\.+', Text), ], @@ -551,7 +612,7 @@ class MoinWikiLexer(RegexLexer): """ For MoinMoin (and Trac) Wiki markup. - *New in Pygments 0.7.* + .. versionadded:: 0.7 """ name = 'MoinMoin/Trac Wiki markup' @@ -572,7 +633,7 @@ class MoinWikiLexer(RegexLexer): (r'(\'\'\'?|\|\||`|__|~~|\^|,,|::)', Comment), # Formatting # Lists (r'^( +)([.*-])( )', bygroups(Text, Name.Builtin, Text)), - (r'^( +)([a-zivx]{1,5}\.)( )', bygroups(Text, Name.Builtin, Text)), + (r'^( +)([a-z]{1,5}\.)( )', bygroups(Text, Name.Builtin, Text)), # Other Formatting (r'\[\[\w+.*?\]\]', Keyword), # Macro (r'(\[[^\s\]]+)(\s+[^\]]+?)?(\])', @@ -596,14 +657,17 @@ class RstLexer(RegexLexer): """ For `reStructuredText <http://docutils.sf.net/rst.html>`_ markup. - *New in Pygments 0.7.* + .. versionadded:: 0.7 Additional options accepted: `handlecodeblocks` - Highlight the contents of ``.. sourcecode:: langauge`` and - ``.. code:: language`` directives with a lexer for the given - language (default: ``True``). *New in Pygments 0.8.* + Highlight the contents of ``.. sourcecode:: language``, + ``.. code:: language`` and ``.. code-block:: language`` + directives with a lexer for the given language (default: + ``True``). + + .. versionadded:: 0.8 """ name = 'reStructuredText' aliases = ['rst', 'rest', 'restructuredtext'] @@ -613,7 +677,6 @@ class RstLexer(RegexLexer): def _handle_sourcecode(self, match): from pygments.lexers import get_lexer_by_name - from pygments.util import ClassNotFound # section header yield match.start(1), Punctuation, match.group(1) @@ -653,6 +716,13 @@ class RstLexer(RegexLexer): for item in do_insertions(ins, lexer.get_tokens_unprocessed(code)): yield item + # from docutils.parsers.rst.states + closers = u'\'")]}>\u2019\u201d\xbb!?' + unicode_delimiters = u'\u2010\u2011\u2012\u2013\u2014\u00a0' + end_string_suffix = (r'((?=$)|(?=[-/:.,; \n\x00%s%s]))' + % (re.escape(unicode_delimiters), + re.escape(closers))) + tokens = { 'root': [ # Heading with overline @@ -681,7 +751,7 @@ class RstLexer(RegexLexer): (r'^(\s*)(\|)( .+\n(?:\| .+\n)*)', bygroups(Text, Operator, using(this, state='inline'))), # Sourcecode directives - (r'^( *\.\.)(\s*)((?:source)?code)(::)([ \t]*)([^\n]+)' + (r'^( *\.\.)(\s*)((?:source)?code(?:-block)?)(::)([ \t]*)([^\n]+)' r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*|)\n)+)', _handle_sourcecode), # A directive @@ -689,9 +759,9 @@ class RstLexer(RegexLexer): bygroups(Punctuation, Text, Operator.Word, Punctuation, Text, using(this, state='inline'))), # A reference target - (r'^( *\.\.)(\s*)([\w\t ]+:)(.*?)$', + (r'^( *\.\.)(\s*)(_(?:[^:\\]|\\.)+:)(.*?)$', bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))), - # A footnote target + # A footnote/citation target (r'^( *\.\.)(\s*)(\[.+\])(.*?)$', bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))), # A substitution def @@ -705,7 +775,7 @@ class RstLexer(RegexLexer): (r'^( *)(:.*?:)([ \t]+)(.*?)$', bygroups(Text, Name.Class, Text, Name.Function)), # Definition list - (r'^([^ ].*(?<!::)\n)((?:(?: +.*)\n)+)', + (r'^([^\s].*(?<!::)\n)((?:(?: +.*)\n)+)', bygroups(using(this, state='inline'), using(this, state='inline'))), # Code blocks (r'(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*|)\n)+)', @@ -730,10 +800,9 @@ class RstLexer(RegexLexer): (r'.', Text), ], 'literal': [ - (r'[^`\\]+', String), - (r'\\.', String), - (r'``', String, '#pop'), - (r'[`\\]', String), + (r'[^`]+', String), + (r'``' + end_string_suffix, String, '#pop'), + (r'`', String), ] } @@ -757,25 +826,27 @@ class VimLexer(RegexLexer): """ Lexer for VimL script files. - *New in Pygments 0.8.* + .. versionadded:: 0.8 """ name = 'VimL' aliases = ['vim'] - filenames = ['*.vim', '.vimrc'] + filenames = ['*.vim', '.vimrc', '.exrc', '.gvimrc', + '_vimrc', '_exrc', '_gvimrc', 'vimrc', 'gvimrc'] mimetypes = ['text/x-vim'] flags = re.MULTILINE tokens = { 'root': [ - # Who decided that doublequote was a good comment character?? (r'^\s*".*', Comment), - (r'(?<=\s)"[^\-:.%#=*].*', Comment), (r'[ \t]+', Text), # TODO: regexes can have other delims (r'/(\\\\|\\/|[^\n/])*/', String.Regex), (r'"(\\\\|\\"|[^\n"])*"', String.Double), - (r"'(\\\\|\\'|[^\n'])*'", String.Single), + (r"'(''|[^\n'])*'", String.Single), + + # Who decided that doublequote was a good comment character?? + (r'(?<=\s)"[^\-:.%#=*].*', Comment), (r'-?\d+', Number), (r'#[0-9a-f]{6}', Number.Hex), (r'^:', Punctuation), @@ -839,7 +910,7 @@ class GettextLexer(RegexLexer): """ Lexer for Gettext catalog files. - *New in Pygments 0.9.* + .. versionadded:: 0.9 """ name = 'Gettext Catalog' aliases = ['pot', 'po'] @@ -867,7 +938,7 @@ class SquidConfLexer(RegexLexer): """ Lexer for `squid <http://www.squid-cache.org/>`_ configuration files. - *New in Pygments 0.9.* + .. versionadded:: 0.9 """ name = 'SquidConf' @@ -876,106 +947,106 @@ class SquidConfLexer(RegexLexer): mimetypes = ['text/x-squidconf'] flags = re.IGNORECASE - keywords = [ "acl", "always_direct", "announce_host", - "announce_period", "announce_port", "announce_to", - "anonymize_headers", "append_domain", "as_whois_server", - "auth_param_basic", "authenticate_children", - "authenticate_program", "authenticate_ttl", "broken_posts", - "buffered_logs", "cache_access_log", "cache_announce", - "cache_dir", "cache_dns_program", "cache_effective_group", - "cache_effective_user", "cache_host", "cache_host_acl", - "cache_host_domain", "cache_log", "cache_mem", - "cache_mem_high", "cache_mem_low", "cache_mgr", - "cachemgr_passwd", "cache_peer", "cache_peer_access", - "cahce_replacement_policy", "cache_stoplist", - "cache_stoplist_pattern", "cache_store_log", "cache_swap", - "cache_swap_high", "cache_swap_log", "cache_swap_low", - "client_db", "client_lifetime", "client_netmask", - "connect_timeout", "coredump_dir", "dead_peer_timeout", - "debug_options", "delay_access", "delay_class", - "delay_initial_bucket_level", "delay_parameters", - "delay_pools", "deny_info", "dns_children", "dns_defnames", - "dns_nameservers", "dns_testnames", "emulate_httpd_log", - "err_html_text", "fake_user_agent", "firewall_ip", - "forwarded_for", "forward_snmpd_port", "fqdncache_size", - "ftpget_options", "ftpget_program", "ftp_list_width", - "ftp_passive", "ftp_user", "half_closed_clients", - "header_access", "header_replace", "hierarchy_stoplist", - "high_response_time_warning", "high_page_fault_warning", - "htcp_port", "http_access", "http_anonymizer", "httpd_accel", - "httpd_accel_host", "httpd_accel_port", - "httpd_accel_uses_host_header", "httpd_accel_with_proxy", - "http_port", "http_reply_access", "icp_access", - "icp_hit_stale", "icp_port", "icp_query_timeout", - "ident_lookup", "ident_lookup_access", "ident_timeout", - "incoming_http_average", "incoming_icp_average", - "inside_firewall", "ipcache_high", "ipcache_low", - "ipcache_size", "local_domain", "local_ip", "logfile_rotate", - "log_fqdn", "log_icp_queries", "log_mime_hdrs", - "maximum_object_size", "maximum_single_addr_tries", - "mcast_groups", "mcast_icp_query_timeout", "mcast_miss_addr", - "mcast_miss_encode_key", "mcast_miss_port", "memory_pools", - "memory_pools_limit", "memory_replacement_policy", - "mime_table", "min_http_poll_cnt", "min_icp_poll_cnt", - "minimum_direct_hops", "minimum_object_size", - "minimum_retry_timeout", "miss_access", "negative_dns_ttl", - "negative_ttl", "neighbor_timeout", "neighbor_type_domain", - "netdb_high", "netdb_low", "netdb_ping_period", - "netdb_ping_rate", "never_direct", "no_cache", - "passthrough_proxy", "pconn_timeout", "pid_filename", - "pinger_program", "positive_dns_ttl", "prefer_direct", - "proxy_auth", "proxy_auth_realm", "query_icmp", "quick_abort", - "quick_abort", "quick_abort_max", "quick_abort_min", - "quick_abort_pct", "range_offset_limit", "read_timeout", - "redirect_children", "redirect_program", - "redirect_rewrites_host_header", "reference_age", - "reference_age", "refresh_pattern", "reload_into_ims", - "request_body_max_size", "request_size", "request_timeout", - "shutdown_lifetime", "single_parent_bypass", - "siteselect_timeout", "snmp_access", "snmp_incoming_address", - "snmp_port", "source_ping", "ssl_proxy", - "store_avg_object_size", "store_objects_per_bucket", - "strip_query_terms", "swap_level1_dirs", "swap_level2_dirs", - "tcp_incoming_address", "tcp_outgoing_address", - "tcp_recv_bufsize", "test_reachability", "udp_hit_obj", - "udp_hit_obj_size", "udp_incoming_address", - "udp_outgoing_address", "unique_hostname", "unlinkd_program", - "uri_whitespace", "useragent_log", "visible_hostname", - "wais_relay", "wais_relay_host", "wais_relay_port", - ] - - opts = [ "proxy-only", "weight", "ttl", "no-query", "default", - "round-robin", "multicast-responder", "on", "off", "all", - "deny", "allow", "via", "parent", "no-digest", "heap", "lru", - "realm", "children", "credentialsttl", "none", "disable", - "offline_toggle", "diskd", "q1", "q2", - ] - - actions = [ "shutdown", "info", "parameter", "server_list", - "client_list", r'squid\.conf', - ] - - actions_stats = [ "objects", "vm_objects", "utilization", - "ipcache", "fqdncache", "dns", "redirector", "io", - "reply_headers", "filedescriptors", "netdb", - ] - - actions_log = [ "status", "enable", "disable", "clear"] - - acls = [ "url_regex", "urlpath_regex", "referer_regex", "port", - "proto", "req_mime_type", "rep_mime_type", "method", - "browser", "user", "src", "dst", "time", "dstdomain", "ident", - "snmp_community", - ] - - ip_re = r'\b(?:\d{1,3}\.){3}\d{1,3}\b' + keywords = [ + "access_log", "acl", "always_direct", "announce_host", + "announce_period", "announce_port", "announce_to", "anonymize_headers", + "append_domain", "as_whois_server", "auth_param_basic", + "authenticate_children", "authenticate_program", "authenticate_ttl", + "broken_posts", "buffered_logs", "cache_access_log", "cache_announce", + "cache_dir", "cache_dns_program", "cache_effective_group", + "cache_effective_user", "cache_host", "cache_host_acl", + "cache_host_domain", "cache_log", "cache_mem", "cache_mem_high", + "cache_mem_low", "cache_mgr", "cachemgr_passwd", "cache_peer", + "cache_peer_access", "cahce_replacement_policy", "cache_stoplist", + "cache_stoplist_pattern", "cache_store_log", "cache_swap", + "cache_swap_high", "cache_swap_log", "cache_swap_low", "client_db", + "client_lifetime", "client_netmask", "connect_timeout", "coredump_dir", + "dead_peer_timeout", "debug_options", "delay_access", "delay_class", + "delay_initial_bucket_level", "delay_parameters", "delay_pools", + "deny_info", "dns_children", "dns_defnames", "dns_nameservers", + "dns_testnames", "emulate_httpd_log", "err_html_text", + "fake_user_agent", "firewall_ip", "forwarded_for", "forward_snmpd_port", + "fqdncache_size", "ftpget_options", "ftpget_program", "ftp_list_width", + "ftp_passive", "ftp_user", "half_closed_clients", "header_access", + "header_replace", "hierarchy_stoplist", "high_response_time_warning", + "high_page_fault_warning", "hosts_file", "htcp_port", "http_access", + "http_anonymizer", "httpd_accel", "httpd_accel_host", + "httpd_accel_port", "httpd_accel_uses_host_header", + "httpd_accel_with_proxy", "http_port", "http_reply_access", + "icp_access", "icp_hit_stale", "icp_port", "icp_query_timeout", + "ident_lookup", "ident_lookup_access", "ident_timeout", + "incoming_http_average", "incoming_icp_average", "inside_firewall", + "ipcache_high", "ipcache_low", "ipcache_size", "local_domain", + "local_ip", "logfile_rotate", "log_fqdn", "log_icp_queries", + "log_mime_hdrs", "maximum_object_size", "maximum_single_addr_tries", + "mcast_groups", "mcast_icp_query_timeout", "mcast_miss_addr", + "mcast_miss_encode_key", "mcast_miss_port", "memory_pools", + "memory_pools_limit", "memory_replacement_policy", "mime_table", + "min_http_poll_cnt", "min_icp_poll_cnt", "minimum_direct_hops", + "minimum_object_size", "minimum_retry_timeout", "miss_access", + "negative_dns_ttl", "negative_ttl", "neighbor_timeout", + "neighbor_type_domain", "netdb_high", "netdb_low", "netdb_ping_period", + "netdb_ping_rate", "never_direct", "no_cache", "passthrough_proxy", + "pconn_timeout", "pid_filename", "pinger_program", "positive_dns_ttl", + "prefer_direct", "proxy_auth", "proxy_auth_realm", "query_icmp", + "quick_abort", "quick_abort", "quick_abort_max", "quick_abort_min", + "quick_abort_pct", "range_offset_limit", "read_timeout", + "redirect_children", "redirect_program", + "redirect_rewrites_host_header", "reference_age", "reference_age", + "refresh_pattern", "reload_into_ims", "request_body_max_size", + "request_size", "request_timeout", "shutdown_lifetime", + "single_parent_bypass", "siteselect_timeout", "snmp_access", + "snmp_incoming_address", "snmp_port", "source_ping", "ssl_proxy", + "store_avg_object_size", "store_objects_per_bucket", + "strip_query_terms", "swap_level1_dirs", "swap_level2_dirs", + "tcp_incoming_address", "tcp_outgoing_address", "tcp_recv_bufsize", + "test_reachability", "udp_hit_obj", "udp_hit_obj_size", + "udp_incoming_address", "udp_outgoing_address", "unique_hostname", + "unlinkd_program", "uri_whitespace", "useragent_log", + "visible_hostname", "wais_relay", "wais_relay_host", "wais_relay_port", + ] + + opts = [ + "proxy-only", "weight", "ttl", "no-query", "default", "round-robin", + "multicast-responder", "on", "off", "all", "deny", "allow", "via", + "parent", "no-digest", "heap", "lru", "realm", "children", "q1", "q2", + "credentialsttl", "none", "disable", "offline_toggle", "diskd", + ] + + actions = [ + "shutdown", "info", "parameter", "server_list", "client_list", + r'squid\.conf', + ] + + actions_stats = [ + "objects", "vm_objects", "utilization", "ipcache", "fqdncache", "dns", + "redirector", "io", "reply_headers", "filedescriptors", "netdb", + ] + + actions_log = ["status", "enable", "disable", "clear"] + + acls = [ + "url_regex", "urlpath_regex", "referer_regex", "port", "proto", + "req_mime_type", "rep_mime_type", "method", "browser", "user", "src", + "dst", "time", "dstdomain", "ident", "snmp_community", + ] + + ip_re = ( + r'(?:(?:(?:[3-9]\d?|2(?:5[0-5]|[0-4]?\d)?|1\d{0,2}|0x0*[0-9a-f]{1,2}|' + r'0+[1-3]?[0-7]{0,2})(?:\.(?:[3-9]\d?|2(?:5[0-5]|[0-4]?\d)?|1\d{0,2}|' + r'0x0*[0-9a-f]{1,2}|0+[1-3]?[0-7]{0,2})){3})|(?!.*::.*::)(?:(?!:)|' + r':(?=:))(?:[0-9a-f]{0,4}(?:(?<=::)|(?<!::):)){6}(?:[0-9a-f]{0,4}' + r'(?:(?<=::)|(?<!::):)[0-9a-f]{0,4}(?:(?<=::)|(?<!:)|(?<=:)(?<!::):)|' + r'(?:25[0-4]|2[0-4]\d|1\d\d|[1-9]?\d)(?:\.(?:25[0-4]|2[0-4]\d|1\d\d|' + r'[1-9]?\d)){3}))' + ) def makelistre(list): - return r'\b(?:'+'|'.join(list)+r')\b' + return r'\b(?:' + '|'.join(list) + r')\b' tokens = { 'root': [ - (r'\s+', Text), + (r'\s+', Whitespace), (r'#', Comment, 'comment'), (makelistre(keywords), Keyword), (makelistre(opts), Name.Constant), @@ -984,8 +1055,8 @@ class SquidConfLexer(RegexLexer): (r'stats/'+makelistre(actions), String), (r'log/'+makelistre(actions)+r'=', String), (makelistre(acls), Keyword), - (ip_re+r'(?:/(?:'+ip_re+r')|\d+)?', Number), - (r'\b\d+\b', Number), + (ip_re + r'(?:/(?:' + ip_re + r'|\b\d+\b))?', Number.Float), + (r'(?:\b\d+\b(?:-\b\d+|%)?)', Number), (r'\S+', Text), ], 'comment': [ @@ -999,10 +1070,10 @@ class DebianControlLexer(RegexLexer): """ Lexer for Debian ``control`` files and ``apt-cache show <pkg>`` outputs. - *New in Pygments 0.9.* + .. versionadded:: 0.9 """ name = 'Debian Control file' - aliases = ['control'] + aliases = ['control', 'debcontrol'] filenames = ['control'] tokens = { @@ -1010,11 +1081,11 @@ class DebianControlLexer(RegexLexer): (r'^(Description)', Keyword, 'description'), (r'^(Maintainer)(:\s*)', bygroups(Keyword, Text), 'maintainer'), (r'^((Build-)?Depends)', Keyword, 'depends'), - (r'^((?:Python-)?Version)(:\s*)([^\s]+)$', + (r'^((?:Python-)?Version)(:\s*)(\S+)$', bygroups(Keyword, Text, Number)), - (r'^((?:Installed-)?Size)(:\s*)([^\s]+)$', + (r'^((?:Installed-)?Size)(:\s*)(\S+)$', bygroups(Keyword, Text, Number)), - (r'^(MD5Sum|SHA1|SHA256)(:\s*)([^\s]+)$', + (r'^(MD5Sum|SHA1|SHA256)(:\s*)(\S+)$', bygroups(Keyword, Text, Number)), (r'^([a-zA-Z\-0-9\.]*?)(:\s*)(.*?)$', bygroups(Keyword, Whitespace, String)), @@ -1026,7 +1097,8 @@ class DebianControlLexer(RegexLexer): (r'.', Text), ], 'description': [ - (r'(.*)(Homepage)(: )([^\s]+)', bygroups(Text, String, Name, Name.Class)), + (r'(.*)(Homepage)(: )(\S+)', + bygroups(Text, String, Name, Name.Class)), (r':.*\n', Generic.Strong), (r' .*\n', Text), ('', Text, '#pop'), @@ -1039,9 +1111,9 @@ class DebianControlLexer(RegexLexer): (r'\|', Operator), (r'[\s]+', Text), (r'[}\)]\s*$', Text, '#pop'), - (r'[}]', Text), + (r'}', Text), (r'[^,]$', Name.Function, '#pop'), - (r'([\+\.a-zA-Z0-9-][\s\n]*)', Name.Function), + (r'([\+\.a-zA-Z0-9-])(\s*)', bygroups(Name.Function, Text)), (r'\[.*?\]', Name.Entity), ], 'depend_vers': [ @@ -1068,7 +1140,7 @@ class YamlLexer(ExtendedRegexLexer): Lexer for `YAML <http://yaml.org/>`_, a human-friendly data serialization language. - *New in Pygments 0.11.* + .. versionadded:: 0.11 """ name = 'YAML' @@ -1378,7 +1450,8 @@ class YamlLexer(ExtendedRegexLexer): # ignored and regular whitespaces in quoted scalars 'quoted-scalar-whitespaces': [ # leading and trailing whitespaces are ignored - (r'^[ ]+|[ ]+$', Text), + (r'^[ ]+', Text), + (r'[ ]+$', Text), # line breaks are ignored (r'\n+', Text), # other whitespaces are a part of the value @@ -1447,7 +1520,8 @@ class YamlLexer(ExtendedRegexLexer): # the scalar ends with a comment (r'[ ]+(?=#)', Text, '#pop'), # leading and trailing whitespaces are ignored - (r'^[ ]+|[ ]+$', Text), + (r'^[ ]+', Text), + (r'[ ]+$', Text), # line breaks are ignored (r'\n+', Text), # other whitespaces are a part of the value @@ -1468,7 +1542,7 @@ class LighttpdConfLexer(RegexLexer): """ Lexer for `Lighttpd <http://lighttpd.net/>`_ configuration files. - *New in Pygments 0.11.* + .. versionadded:: 0.11 """ name = 'Lighttpd configuration file' aliases = ['lighty', 'lighttpd'] @@ -1496,7 +1570,7 @@ class NginxConfLexer(RegexLexer): """ Lexer for `Nginx <http://nginx.net/>`_ configuration files. - *New in Pygments 0.11.* + .. versionadded:: 0.11 """ name = 'Nginx configuration file' aliases = ['nginx'] @@ -1542,7 +1616,7 @@ class CMakeLexer(RegexLexer): """ Lexer for `CMake <http://cmake.org/Wiki/CMake>`_ files. - *New in Pygments 1.2.* + .. versionadded:: 1.2 """ name = 'CMake' aliases = ['cmake'] @@ -1577,7 +1651,7 @@ class CMakeLexer(RegexLexer): # r'VTK_MAKE_INSTANTIATOR|VTK_WRAP_JAVA|VTK_WRAP_PYTHON|' # r'VTK_WRAP_TCL|WHILE|WRITE_FILE|' # r'COUNTARGS)\b', Name.Builtin, 'args'), - (r'\b([A-Za-z_]+)([ \t]*)(\()', bygroups(Name.Builtin, Text, + (r'\b(\w+)([ \t]*)(\()', bygroups(Name.Builtin, Text, Punctuation), 'args'), include('keywords'), include('ws') @@ -1586,6 +1660,7 @@ class CMakeLexer(RegexLexer): (r'\(', Punctuation, '#push'), (r'\)', Punctuation, '#pop'), (r'(\${)(.+?)(})', bygroups(Operator, Name.Variable, Operator)), + (r'(\$<)(.+?)(>)', bygroups(Operator, Name.Variable, Operator)), (r'(?s)".*?"', String.Double), (r'\\\S+', String), (r'[^\)$"# \t\n]+', String), @@ -1602,7 +1677,343 @@ class CMakeLexer(RegexLexer): ], 'ws': [ (r'[ \t]+', Text), - (r'#.+\n', Comment), + (r'#.*\n', Comment), + ] + } + + def analyse_text(text): + exp = r'^ *CMAKE_MINIMUM_REQUIRED *\( *VERSION *\d(\.\d)* *( FATAL_ERROR)? *\) *$' + if re.search(exp, text, flags=re.MULTILINE | re.IGNORECASE): + return 0.8 + return 0.0 + + +class HttpLexer(RegexLexer): + """ + Lexer for HTTP sessions. + + .. versionadded:: 1.5 + """ + + name = 'HTTP' + aliases = ['http'] + + flags = re.DOTALL + + def header_callback(self, match): + if match.group(1).lower() == 'content-type': + content_type = match.group(5).strip() + if ';' in content_type: + content_type = content_type[:content_type.find(';')].strip() + self.content_type = content_type + yield match.start(1), Name.Attribute, match.group(1) + yield match.start(2), Text, match.group(2) + yield match.start(3), Operator, match.group(3) + yield match.start(4), Text, match.group(4) + yield match.start(5), Literal, match.group(5) + yield match.start(6), Text, match.group(6) + + def continuous_header_callback(self, match): + yield match.start(1), Text, match.group(1) + yield match.start(2), Literal, match.group(2) + yield match.start(3), Text, match.group(3) + + def content_callback(self, match): + content_type = getattr(self, 'content_type', None) + content = match.group() + offset = match.start() + if content_type: + from pygments.lexers import get_lexer_for_mimetype + try: + lexer = get_lexer_for_mimetype(content_type) + except ClassNotFound: + pass + else: + for idx, token, value in lexer.get_tokens_unprocessed(content): + yield offset + idx, token, value + return + yield offset, Text, content + + tokens = { + 'root': [ + (r'(GET|POST|PUT|DELETE|HEAD|OPTIONS|TRACE|PATCH)( +)([^ ]+)( +)' + r'(HTTP)(/)(1\.[01])(\r?\n|$)', + bygroups(Name.Function, Text, Name.Namespace, Text, + Keyword.Reserved, Operator, Number, Text), + 'headers'), + (r'(HTTP)(/)(1\.[01])( +)(\d{3})( +)([^\r\n]+)(\r?\n|$)', + bygroups(Keyword.Reserved, Operator, Number, Text, Number, + Text, Name.Exception, Text), + 'headers'), + ], + 'headers': [ + (r'([^\s:]+)( *)(:)( *)([^\r\n]+)(\r?\n|$)', header_callback), + (r'([\t ]+)([^\r\n]+)(\r?\n|$)', continuous_header_callback), + (r'\r?\n', Text, 'content') + ], + 'content': [ + (r'.+', content_callback) + ] + } + + +class PyPyLogLexer(RegexLexer): + """ + Lexer for PyPy log files. + + .. versionadded:: 1.5 + """ + name = "PyPy Log" + aliases = ["pypylog", "pypy"] + filenames = ["*.pypylog"] + mimetypes = ['application/x-pypylog'] + + tokens = { + "root": [ + (r"\[\w+\] {jit-log-.*?$", Keyword, "jit-log"), + (r"\[\w+\] {jit-backend-counts$", Keyword, "jit-backend-counts"), + include("extra-stuff"), + ], + "jit-log": [ + (r"\[\w+\] jit-log-.*?}$", Keyword, "#pop"), + (r"^\+\d+: ", Comment), + (r"--end of the loop--", Comment), + (r"[ifp]\d+", Name), + (r"ptr\d+", Name), + (r"(\()(\w+(?:\.\w+)?)(\))", + bygroups(Punctuation, Name.Builtin, Punctuation)), + (r"[\[\]=,()]", Punctuation), + (r"(\d+\.\d+|inf|-inf)", Number.Float), + (r"-?\d+", Number.Integer), + (r"'.*'", String), + (r"(None|descr|ConstClass|ConstPtr|TargetToken)", Name), + (r"<.*?>+", Name.Builtin), + (r"(label|debug_merge_point|jump|finish)", Name.Class), + (r"(int_add_ovf|int_add|int_sub_ovf|int_sub|int_mul_ovf|int_mul|" + r"int_floordiv|int_mod|int_lshift|int_rshift|int_and|int_or|" + r"int_xor|int_eq|int_ne|int_ge|int_gt|int_le|int_lt|int_is_zero|" + r"int_is_true|" + r"uint_floordiv|uint_ge|uint_lt|" + r"float_add|float_sub|float_mul|float_truediv|float_neg|" + r"float_eq|float_ne|float_ge|float_gt|float_le|float_lt|float_abs|" + r"ptr_eq|ptr_ne|instance_ptr_eq|instance_ptr_ne|" + r"cast_int_to_float|cast_float_to_int|" + r"force_token|quasiimmut_field|same_as|virtual_ref_finish|" + r"virtual_ref|mark_opaque_ptr|" + r"call_may_force|call_assembler|call_loopinvariant|" + r"call_release_gil|call_pure|call|" + r"new_with_vtable|new_array|newstr|newunicode|new|" + r"arraylen_gc|" + r"getarrayitem_gc_pure|getarrayitem_gc|setarrayitem_gc|" + r"getarrayitem_raw|setarrayitem_raw|getfield_gc_pure|" + r"getfield_gc|getinteriorfield_gc|setinteriorfield_gc|" + r"getfield_raw|setfield_gc|setfield_raw|" + r"strgetitem|strsetitem|strlen|copystrcontent|" + r"unicodegetitem|unicodesetitem|unicodelen|" + r"guard_true|guard_false|guard_value|guard_isnull|" + r"guard_nonnull_class|guard_nonnull|guard_class|guard_no_overflow|" + r"guard_not_forced|guard_no_exception|guard_not_invalidated)", + Name.Builtin), + include("extra-stuff"), + ], + "jit-backend-counts": [ + (r"\[\w+\] jit-backend-counts}$", Keyword, "#pop"), + (r":", Punctuation), + (r"\d+", Number), + include("extra-stuff"), + ], + "extra-stuff": [ + (r"\s+", Text), + (r"#.*?$", Comment), + ], + } + + +class HxmlLexer(RegexLexer): + """ + Lexer for `haXe build <http://haxe.org/doc/compiler>`_ files. + + .. versionadded:: 1.6 + """ + name = 'Hxml' + aliases = ['haxeml', 'hxml'] + filenames = ['*.hxml'] + + tokens = { + 'root': [ + # Seperator + (r'(--)(next)', bygroups(Punctuation, Generic.Heading)), + # Compiler switches with one dash + (r'(-)(prompt|debug|v)', bygroups(Punctuation, Keyword.Keyword)), + # Compilerswitches with two dashes + (r'(--)(neko-source|flash-strict|flash-use-stage|no-opt|no-traces|' + r'no-inline|times|no-output)', bygroups(Punctuation, Keyword)), + # Targets and other options that take an argument + (r'(-)(cpp|js|neko|x|as3|swf9?|swf-lib|php|xml|main|lib|D|resource|' + r'cp|cmd)( +)(.+)', + bygroups(Punctuation, Keyword, Whitespace, String)), + # Options that take only numerical arguments + (r'(-)(swf-version)( +)(\d+)', + bygroups(Punctuation, Keyword, Number.Integer)), + # An Option that defines the size, the fps and the background + # color of an flash movie + (r'(-)(swf-header)( +)(\d+)(:)(\d+)(:)(\d+)(:)([A-Fa-f0-9]{6})', + bygroups(Punctuation, Keyword, Whitespace, Number.Integer, + Punctuation, Number.Integer, Punctuation, Number.Integer, + Punctuation, Number.Hex)), + # options with two dashes that takes arguments + (r'(--)(js-namespace|php-front|php-lib|remap|gen-hx-classes)( +)' + r'(.+)', bygroups(Punctuation, Keyword, Whitespace, String)), + # Single line comment, multiline ones are not allowed. + (r'#.*', Comment.Single) ] } + +class EbnfLexer(RegexLexer): + """ + Lexer for `ISO/IEC 14977 EBNF + <http://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_Form>`_ + grammars. + + .. versionadded:: 2.0 + """ + + name = 'EBNF' + aliases = ['ebnf'] + filenames = ['*.ebnf'] + mimetypes = ['text/x-ebnf'] + + tokens = { + 'root': [ + include('whitespace'), + include('comment_start'), + include('identifier'), + (r'=', Operator, 'production'), + ], + 'production': [ + include('whitespace'), + include('comment_start'), + include('identifier'), + (r'"[^"]*"', String.Double), + (r"'[^']*'", String.Single), + (r'(\?[^?]*\?)', Name.Entity), + (r'[\[\]{}(),|]', Punctuation), + (r'-', Operator), + (r';', Punctuation, '#pop'), + ], + 'whitespace': [ + (r'\s+', Text), + ], + 'comment_start': [ + (r'\(\*', Comment.Multiline, 'comment'), + ], + 'comment': [ + (r'[^*)]', Comment.Multiline), + include('comment_start'), + (r'\*\)', Comment.Multiline, '#pop'), + (r'[*)]', Comment.Multiline), + ], + 'identifier': [ + (r'([a-zA-Z][a-zA-Z0-9 \-]*)', Keyword), + ], + } + +class TodotxtLexer(RegexLexer): + """ + Lexer for `Todo.txt <http://todotxt.com/>`_ todo list format. + + .. versionadded:: 2.0 + """ + + name = 'Todotxt' + aliases = ['todotxt'] + # *.todotxt is not a standard extension for Todo.txt files; including it + # makes testing easier, and also makes autodetecting file type easier. + filenames = ['todo.txt', '*.todotxt'] + mimetypes = ['text/x-todo'] + + ## Aliases mapping standard token types of Todo.txt format concepts + CompleteTaskText = Operator # Chosen to de-emphasize complete tasks + IncompleteTaskText = Text # Incomplete tasks should look like plain text + + # Priority should have most emphasis to indicate importance of tasks + Priority = Generic.Heading + # Dates should have next most emphasis because time is important + Date = Generic.Subheading + + # Project and context should have equal weight, and be in different colors + Project = Generic.Error + Context = String + + # If tag functionality is added, it should have the same weight as Project + # and Context, and a different color. Generic.Traceback would work well. + + # Regex patterns for building up rules; dates, priorities, projects, and + # contexts are all atomic + # TODO: Make date regex more ISO 8601 compliant + date_regex = r'\d{4,}-\d{2}-\d{2}' + priority_regex = r'\([A-Z]\)' + project_regex = r'\+\S+' + context_regex = r'@\S+' + + # Compound regex expressions + complete_one_date_regex = r'(x )(' + date_regex + r')' + complete_two_date_regex = (complete_one_date_regex + r'( )(' + + date_regex + r')') + priority_date_regex = r'(' + priority_regex + r')( )(' + date_regex + r')' + + tokens = { + # Should parse starting at beginning of line; each line is a task + 'root': [ + ## Complete task entry points: two total: + # 1. Complete task with two dates + (complete_two_date_regex, bygroups(CompleteTaskText, Date, + CompleteTaskText, Date), + 'complete'), + # 2. Complete task with one date + (complete_one_date_regex, bygroups(CompleteTaskText, Date), + 'complete'), + + ## Incomplete task entry points: six total: + # 1. Priority plus date + (priority_date_regex, bygroups(Priority, IncompleteTaskText, Date), + 'incomplete'), + # 2. Priority only + (priority_regex, Priority, 'incomplete'), + # 3. Leading date + (date_regex, Date, 'incomplete'), + # 4. Leading context + (context_regex, Context, 'incomplete'), + # 5. Leading project + (project_regex, Project, 'incomplete'), + # 6. Non-whitespace catch-all + ('\S+', IncompleteTaskText, 'incomplete'), + ], + + # Parse a complete task + 'complete': [ + # Newline indicates end of task, should return to root + (r'\s*\n', CompleteTaskText, '#pop'), + # Tokenize contexts and projects + (context_regex, Context), + (project_regex, Project), + # Tokenize non-whitespace text + ('\S+', CompleteTaskText), + # Tokenize whitespace not containing a newline + ('\s+', CompleteTaskText), + ], + + # Parse an incomplete task + 'incomplete': [ + # Newline indicates end of task, should return to root + (r'\s*\n', IncompleteTaskText, '#pop'), + # Tokenize contexts and projects + (context_regex, Context), + (project_regex, Project), + # Tokenize non-whitespace text + ('\S+', IncompleteTaskText), + # Tokenize whitespace not containing a newline + ('\s+', IncompleteTaskText), + ], + } |