Add Notmuch lexer

author: Daniel Santana <daniel@santana.tech> 2019-11-24 13:04:39 -0300
committer: Georg Brandl <georg@python.org> 2019-11-26 06:10:12 +0100
commit: 176f1ac9efbbbd6b011f1537b20ab379cef2d9be (patch)
tree: 05417092f584684b5c1e7d2579702faae9aa366f
parent: dff6629557c3bac8324bdbe4ef9da5f001a863b4 (diff)
download: pygments-git-176f1ac9efbbbd6b011f1537b20ab379cef2d9be.tar.gz
3 files changed, 104 insertions, 3 deletions
diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py
index 81e03c37..c6fc606c 100644
--- a/pygments/lexers/_mapping.py
+++ b/pygments/lexers/_mapping.py
@@ -305,6 +305,7 @@ LEXERS = {
     'NimrodLexer': ('pygments.lexers.nimrod', 'Nimrod', ('nim', 'nimrod'), ('*.nim', '*.nimrod'), ('text/x-nim',)),
     'NitLexer': ('pygments.lexers.nit', 'Nit', ('nit',), ('*.nit',), ()),
     'NixLexer': ('pygments.lexers.nix', 'Nix', ('nixos', 'nix'), ('*.nix',), ('text/x-nix',)),
+    'NotmuchLexer': ('pygments.lexers.textfmts', 'Notmuch', ('notmuch',), (), ()),
     'NuSMVLexer': ('pygments.lexers.smv', 'NuSMV', ('nusmv',), ('*.smv',), ()),
     'NumPyLexer': ('pygments.lexers.python', 'NumPy', ('numpy',), (), ()),
     'ObjdumpLexer': ('pygments.lexers.asm', 'objdump', ('objdump',), ('*.objdump',), ('text/x-objdump',)),
diff --git a/pygments/lexers/textfmts.py b/pygments/lexers/textfmts.py
index a3aed0c0..d3a191b0 100644
--- a/pygments/lexers/textfmts.py
+++ b/pygments/lexers/textfmts.py
@@ -11,12 +11,14 @@
 
 import re
 
-from pygments.lexer import RegexLexer, bygroups
+from pygments.lexers import guess_lexer, get_lexer_by_name
+from pygments.lexer import RegexLexer, bygroups, default, do_insertions
 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
-    Number, Generic, Literal
+    Number, Generic, Literal, Punctuation
 from pygments.util import ClassNotFound
 
-__all__ = ['IrcLogsLexer', 'TodotxtLexer', 'HttpLexer', 'GettextLexer']
+__all__ = ['IrcLogsLexer', 'TodotxtLexer', 'HttpLexer', 'GettextLexer',
+           'NotmuchLexer']
 
 
 class IrcLogsLexer(RegexLexer):
@@ -295,3 +297,86 @@ class TodotxtLexer(RegexLexer):
             (r'\s+', IncompleteTaskText),
         ],
     }
+
+
+class NotmuchLexer(RegexLexer):
+    """
+    For `Notmuch <https://notmuchmail.org/>`_ email text format.
+
+    .. versionadded:: 2.5
+
+    Additional options accepted:
+
+    `body_lexer`
+        If given, highlight the contents of the message body with the specified
+        lexer, else guess it according to the body content (default: ``None``).
+    """
+
+    name = 'Notmuch'
+    aliases = ['notmuch']
+
+    def _highlight_code(self, match):
+        code = match.group(1)
+
+        try:
+            if self.body_lexer:
+                lexer = get_lexer_by_name(self.body_lexer)
+            else:
+                lexer = guess_lexer(code.strip())
+        except ClassNotFound:
+            lexer = get_lexer_by_name('text')
+
+        for item in lexer.get_tokens_unprocessed(code):
+            yield item
+
+    tokens = {
+        'root': [
+            (r'\fmessage{\s*', Keyword, ('message', 'message-attr')),
+        ],
+        'message-attr': [
+            (r'(\s*id:\s*)([^\s]+)', bygroups(Name.Attribute, String)),
+            (r'(\s*(?:depth|match|excluded):\s*)(\d+)',
+             bygroups(Name.Attribute, Number.Integer)),
+            (r'(\s*filename:\s*)(.+\n)',
+             bygroups(Name.Attribute, String)),
+            default('#pop'),
+        ],
+        'message': [
+            (r'\fmessage}\n', Keyword, '#pop'),
+            (r'\fheader{\n', Keyword, 'header'),
+            (r'\fbody{\n', Keyword, 'body'),
+        ],
+        'header': [
+            (r'\fheader}\n', Keyword, '#pop'),
+            (r'((?:Subject|From|To|Cc|Date):\s*)(.*\n)',
+             bygroups(Name.Attribute, String)),
+            (r'(.*)(\s*\(.*\))(\s*\(.*\)\n)',
+             bygroups(Generic.Strong, Literal, Name.Tag)),
+        ],
+        'body': [
+            (r'\fpart{\n', Keyword, 'part'),
+            (r'\f(part|attachment){\s*', Keyword, ('part', 'part-attr')),
+            (r'\fbody}\n', Keyword, '#pop'),
+        ],
+        'part-attr': [
+            (r'(ID:\s*)(\d+)', bygroups(Name.Attribute, Number.Integer)),
+            (r'(,\s*)((?:Filename|Content-id):\s*)([^,]+)',
+             bygroups(Punctuation, Name.Attribute, String)),
+            (r'(,\s*)(Content-type:\s*)(.+\n)',
+             bygroups(Punctuation, Name.Attribute, String)),
+            default('#pop'),
+        ],
+        'part': [
+            (r'\f(?:part|attachment)}\n', Keyword, '#pop'),
+            (r'\f(?:part|attachment){\s*', Keyword, ('#push', 'part-attr')),
+            (r'^Non-text part: .*\n', Comment),
+            (r'(?s)(.*?(?=\f(?:part|attachment)}\n))', _highlight_code),
+        ],
+    }
+
+    def analyse_text(text):
+        return 1.0 if text.startswith('\fmessage{') else 0.0
+
+    def __init__(self, **options):
+        self.body_lexer = options.get('body_lexer', None)
+        RegexLexer.__init__(self, **options)
diff --git a/tests/examplefiles/notmuch_example b/tests/examplefiles/notmuch_example
new file mode 100644
index 00000000..61be8c6a
--- /dev/null
+++ b/tests/examplefiles/notmuch_example
@@ -0,0 +1,15 @@
+message{ id:5d0693e2.1c69fb81.d5fc9.1f6e@mx.google.com depth:0 match:1 excluded:0 filename:/home/user/mail/INBOX/new/1560712171_0.11014.blue,U=20254,FMD5=7e33429f656f1e6e9d79b29c3f82c57e:2,
+header{
+John Doe <john.doe@example.com> (1 mins. ago) (inbox unread)
+Subject: Hello world!
+From: john.doe@example.com
+Date: Sun, 16 Jun 2019 16:00:00 -0300
+header}
+body{
+part{ ID: 1, Content-type: text/plain
+#!/bin/sh
+
+echo 'Hello world!'
+part}
+body}
+message}
author	Daniel Santana <daniel@santana.tech>	2019-11-24 13:04:39 -0300
committer	Georg Brandl <georg@python.org>	2019-11-26 06:10:12 +0100
commit	176f1ac9efbbbd6b011f1537b20ab379cef2d9be (patch)
tree	05417092f584684b5c1e7d2579702faae9aa366f
parent	dff6629557c3bac8324bdbe4ef9da5f001a863b4 (diff)
download	pygments-git-176f1ac9efbbbd6b011f1537b20ab379cef2d9be.tar.gz