summaryrefslogtreecommitdiff
path: root/external/autopygmentize
blob: 85d236681ef2b3795b6e5c83b3cc50c294e48093 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#!/bin/bash
# Best effort auto-pygmentization with transparent decompression
# by Reuben Thomas 2008-2022
# This program is in the public domain.

# Strategy: first see if pygmentize can find a lexer; if not, ask file; if that finds nothing, fail
# Set the environment variable PYGMENTIZE_OPTS or pass options before the file path to configure pygments.

# This program can be used as a .lessfilter for the less pager to auto-color less's output

file="${!#}"              # last argument
options=${@:1:$(($#-1))}  # handle others args as options to pass to pygmentize

file_common_opts="--brief --dereference"

case $(file --mime-type --uncompress $file_common_opts "$file") in
    application/xml|image/svg+xml) lexer=xml;;
    application/javascript) lexer=javascript;;
    application/json) lexer=json;;
    text/html) lexer=html;;
    text/troff) lexer=nroff;;
    text/x-asm) lexer=nasm;;
    text/x-awk) lexer=awk;;
    text/x-c) lexer=c;;
    text/x-c++) lexer=cpp;;
    text/x-clojure) lexer=clojure;;
    text/x-crystal) lexer=crystal;;
    text/x-diff) lexer=diff;;
    text/x-execline) lexer=execline;;
    text/x-forth) lexer=forth;;
    text/x-fortran) lexer=fortran;;
    text/x-gawk) lexer=gawk;;
    text/x-java) lexer=java;;
    text/x-lisp) lexer=common-lisp;;
    text/x-lua|text/x-luatex) lexer=lua;;
    text/x-makefile) lexer=make;;
    text/x-msdos-batch) lexer=bat;;
    text/x-nawk) lexer=nawk;;
    text/x-objective-c) lexer=objective-c;;
    text/x-pascal) lexer=pascal;;
    text/x-perl) lexer=perl;;
    text/x-php) lexer=php;;
    text/x-po) lexer=po;;
    text/x-python) lexer=python;;
    text/x-ruby) lexer=ruby;;
    text/x-script.python) lexer=python;;
    text/x-shellscript) lexer=sh;;
    text/x-tcl) lexer=tcl;;
    text/x-tex|text/x-texinfo) lexer=latex;; # FIXME: texinfo really needs its own lexer
    text/xml) lexer=xml;;
    text/vnd.graphviz) lexer=graphviz;;

    # Types that file outputs which pygmentize didn't support as of file 5.41, pygments 2.11.2
    # text/binary
    # text/calendar
    # text/PGP
    # text/prs.lines.tag
    # text/rtf
    # text/spreadsheet
    # text/texmacs
    # text/vcard
    # text/vnd.sosi
    # text/x-Algol68
    # text/x-bcpl
    # text/x-dmtf-mif
    # text/x-gimp-curve
    # text/x-gimp-ggr
    # text/x-gimp-gpl
    # text/x-info
    # text/x-installshield-lid
    # text/x-m4
    # text/x-modulefile
    # text/x-ms-adm
    # text/x-ms-cpx
    # text/x-ms-regedirt
    # text/x-ms-tag
    # text/x-systemtap
    # text/x-vcard
    # text/x-wine-extension-reg
    # text/x-xmcd

    text/plain)  # special filenames. TODO: insert more
        case $(basename "$file") in
            .zshrc) lexer=sh;;
        esac
        # pygmentize -N is much cheaper than file, but makes some bad guesses (e.g.
        # it guesses ".pl" is Prolog, not Perl)
        lexer=$(pygmentize -N "$file")
        ;;
esac

# Find a concatenator for compressed files
concat=
concat_opts=
case $(file $file_common_opts --mime-type "$file") in
    # TODO: add support
    # application/x-rzip (does not decompress to stdout)
    # application/x-dzip (Windows only)
    application/gzip|application/x-gzip)  concat=zcat;;
    application/x-bzip)                   concat=bzip; concat_opts=-dc;;
    application/x-bzip2)                  concat=bzcat;;
    application/x-lz4)                    concat=lz4; concat_opts=-dc;;
    application/x-lzh-compressed)         concat=p7zip; concat_opts=-dc;;
    application/x-lzma)                   concat=lzcat;;
    application/x-lzip)                   concat=lzip; concat_opts=-dc;;
    application/x-xz)                     concat=xzcat;;
    application/x-zoo)                    concat=zoo; concat_opts=fu;;
esac
# If concat is unset or doesn't exist, use cat instead
if [[ "$concat" == "" ]] || ! command -v "$concat"; then
    concat=cat
    concat_opts=
fi

# Find a suitable reader, preceded by a hex dump for binary files,
# or fmt for text with very long lines
prereader=""
reader=cat
encoding=$(file --mime-encoding --uncompress $file_common_opts "$file")
# FIXME: need a way to switch between hex and text view, as file often
# misdiagnoses files when they contain a few control characters
# if [[ $encoding == "binary" ]]; then
#     prereader="od -x" # POSIX fallback
#     if [[ -n $(which hd) ]]; then
#         prereader=hd # preferred
#     fi
#     lexer=hexdump
#     encoding=latin1
#el
# FIXME: Using fmt does not work well for system logs
# if [[ "$lexer" == "text" ]]; then
#    if file "$file" | grep -ql "text, with very long lines"; then
#        reader=fmt
#    fi
# fi
if [[ "$lexer" != "text" ]]; then
    reader="pygmentize -O inencoding=$encoding $PYGMENTIZE_OPTS $options -l $lexer"
fi

# Run the reader
if [[ -n "$prereader" ]]; then
    exec $concat "$file" | $prereader | $reader
else
    exec $concat "$file" | $reader
fi