summaryrefslogtreecommitdiff
path: root/pygments/lexers/hexdump.py
blob: da28543270cf3152c030385541ecd0a077a1bd3a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# -*- coding: utf-8 -*-
"""
    pygments.lexers.hexdump
    ~~~~~~~~~~~~~~~~~~~~~~~

    Lexers for hexadecimal dumps.

    :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
    :license: BSD, see LICENSE for details.
"""

from pygments.lexer import RegexLexer, bygroups, include
from pygments.token import Text, Name, Number, String, Punctuation

__all__ = ['HexdumpLexer']


class HexdumpLexer(RegexLexer):
    """
    For typical hex dump output formats by the UNIX and GNU/Linux tools ``hexdump``,
    ``hd``, ``hexcat``, ``od`` and ``xxd``, and the DOS tool ``DEBUG``. For example:

    .. sourcecode:: hexdump

        00000000  7f 45 4c 46 02 01 01 00  00 00 00 00 00 00 00 00  |.ELF............|
        00000010  02 00 3e 00 01 00 00 00  c5 48 40 00 00 00 00 00  |..>......H@.....|

    The specific supported formats are the outputs of:

    * ``hexdump FILE``
    * ``hexdump -C FILE`` -- the `canonical` format used in the example.
    * ``hd FILE`` -- same as ``hexdump -C FILE``.
    * ``hexcat FILE``
    * ``od -t x1z FILE``
    * ``xxd FILE``
    * ``DEBUG.EXE FILE.COM`` and entering ``d`` to the prompt.

    .. versionadded:: 2.1
    """
    name = 'Hexdump'
    aliases = ['hexdump']

    hd = r'[0-9A-Ha-h]'

    tokens = {
        'root': [
            (r'\n', Text),
            include('offset'),
            (r'('+hd+r'{2})(\-)('+hd+r'{2})',
             bygroups(Number.Hex, Punctuation, Number.Hex)),
            (hd+r'{2}', Number.Hex),
            (r'(\s{2,3})(\>)(.{16})(\<)$',
             bygroups(Text, Punctuation, String, Punctuation), 'bracket-strings'),
            (r'(\s{2,3})(\|)(.{16})(\|)$',
             bygroups(Text, Punctuation, String, Punctuation), 'piped-strings'),
            (r'(\s{2,3})(\>)(.{1,15})(\<)$',
             bygroups(Text, Punctuation, String, Punctuation)),
            (r'(\s{2,3})(\|)(.{1,15})(\|)$',
             bygroups(Text, Punctuation, String, Punctuation)),
            (r'(\s{2,3})(.{1,15})$', bygroups(Text, String)),
            (r'(\s{2,3})(.{16}|.{20})$', bygroups(Text, String), 'nonpiped-strings'),
            (r'\s', Text),
            (r'^\*', Punctuation),
        ],
        'offset': [
            (r'^('+hd+'+)(:)', bygroups(Name.Label, Punctuation), 'offset-mode'),
            (r'^'+hd+'+', Name.Label),
        ],
        'offset-mode': [
            (r'\s', Text, '#pop'),
            (hd+'+', Name.Label),
            (r':', Punctuation)
        ],
        'piped-strings': [
            (r'\n', Text),
            include('offset'),
            (hd+r'{2}', Number.Hex),
            (r'(\s{2,3})(\|)(.{1,16})(\|)$',
             bygroups(Text, Punctuation, String, Punctuation)),
            (r'\s', Text),
            (r'^\*', Punctuation),
        ],
        'bracket-strings': [
            (r'\n', Text),
            include('offset'),
            (hd+r'{2}', Number.Hex),
            (r'(\s{2,3})(\>)(.{1,16})(\<)$',
             bygroups(Text, Punctuation, String, Punctuation)),
            (r'\s', Text),
            (r'^\*', Punctuation),
        ],
        'nonpiped-strings': [
            (r'\n', Text),
            include('offset'),
            (r'('+hd+r'{2})(\-)('+hd+r'{2})',
             bygroups(Number.Hex, Punctuation, Number.Hex)),
            (hd+r'{2}', Number.Hex),
            (r'(\s{19,})(.{1,20}?)$', bygroups(Text, String)),
            (r'(\s{2,3})(.{1,20})$', bygroups(Text, String)),
            (r'\s', Text),
            (r'^\*', Punctuation),
        ],
    }