summaryrefslogtreecommitdiff
path: root/pygments/lexers/sas.py
blob: 55749ebefbeedf8b19a90a1e5abba383bd34ea99 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
# -*- coding: utf-8 -*-

"""
    pygments.lexers.sas
    ~~~~~~~~~~~~~~~~~~~

    Lexer for SAS.
"""

import re
from pygments.lexer import RegexLexer, include, words
from pygments.token import Comment, Keyword, Name, Number, String, Text, \
    Other, Generic

__all__ = ['SASLexer']

class SASLexer(RegexLexer):
    """
    For `SAS <http://www.sas.com/>` files.

    Syntax from syntax/sas.vim by James Kidd <james.kidd@covance.com>
    """

    name      = 'SAS'
    aliases   = ['SAS', 'sas']
    filenames = ['*.SAS', '*.sas']
    mimetypes = ['text/x-sas', 'text/sas', 'application/x-sas']
    flags     = re.IGNORECASE | re.MULTILINE

    builtins_macros = (
        "bquote", "nrbquote", "cmpres", "qcmpres", "compstor", "datatyp",
        "display", "do", "else", "end", "eval", "global", "goto", "if",
        "index", "input", "keydef", "label", "left", "length", "let",
        "local", "lowcase", "macro", "mend", "nrbquote", "nrquote",
        "nrstr", "put", "qcmpres", "qleft", "qlowcase", "qscan",
        "qsubstr", "qsysfunc", "qtrim", "quote", "qupcase", "scan",
        "str", "substr", "superq", "syscall", "sysevalf", "sysexec",
        "sysfunc", "sysget", "syslput", "sysprod", "sysrc", "sysrput",
        "then", "to", "trim", "unquote", "until", "upcase", "verify",
        "while", "window"
    )

    builtins_conditionals = (
        "do", "if", "then", "else", "end", "until", "while"
    )

    builtins_statements = (
        "abort", "array", "attrib", "by", "call", "cards", "cards4",
        "catname", "continue", "datalines", "datalines4", "delete", "delim",
        "delimiter", "display", "dm", "drop", "endsas", "error", "file",
        "filename", "footnote", "format", "goto", "in", "infile", "informat",
        "input", "keep", "label", "leave", "length", "libname", "link",
        "list", "lostcard", "merge", "missing", "modify", "options", "output",
        "out", "page", "put", "redirect", "remove", "rename", "replace",
        "retain", "return", "select", "set", "skip", "startsas", "stop",
        "title", "update", "waitsas", "where", "window", "x", "systask"
    )

    builtins_sql = (
        "add", "and", "alter", "as", "cascade", "check", "create",
        "delete", "describe", "distinct", "drop", "foreign", "from",
        "group", "having", "index", "insert", "into", "in", "key", "like",
        "message", "modify", "msgtype", "not", "null", "on", "or",
        "order", "primary", "references", "reset", "restrict", "select",
        "set", "table", "unique", "update", "validate", "view", "where"
    )

    builtins_functions = (
        "abs", "addr", "airy", "arcos", "arsin", "atan", "attrc",
        "attrn", "band", "betainv", "blshift", "bnot", "bor",
        "brshift", "bxor", "byte", "cdf", "ceil", "cexist", "cinv",
        "close", "cnonct", "collate", "compbl", "compound",
        "compress", "cos", "cosh", "css", "curobs", "cv", "daccdb",
        "daccdbsl", "daccsl", "daccsyd", "dacctab", "dairy", "date",
        "datejul", "datepart", "datetime", "day", "dclose", "depdb",
        "depdbsl", "depdbsl", "depsl", "depsl", "depsyd", "depsyd",
        "deptab", "deptab", "dequote", "dhms", "dif", "digamma",
        "dim", "dinfo", "dnum", "dopen", "doptname", "doptnum",
        "dread", "dropnote", "dsname", "erf", "erfc", "exist", "exp",
        "fappend", "fclose", "fcol", "fdelete", "fetch", "fetchobs",
        "fexist", "fget", "fileexist", "filename", "fileref",
        "finfo", "finv", "fipname", "fipnamel", "fipstate", "floor",
        "fnonct", "fnote", "fopen", "foptname", "foptnum", "fpoint",
        "fpos", "fput", "fread", "frewind", "frlen", "fsep", "fuzz",
        "fwrite", "gaminv", "gamma", "getoption", "getvarc", "getvarn",
        "hbound", "hms", "hosthelp", "hour", "ibessel", "index",
        "indexc", "indexw", "input", "inputc", "inputn", "int",
        "intck", "intnx", "intrr", "irr", "jbessel", "juldate",
        "kurtosis", "lag", "lbound", "left", "length", "lgamma",
        "libname", "libref", "log", "log10", "log2", "logpdf", "logpmf",
        "logsdf", "lowcase", "max", "mdy", "mean", "min", "minute",
        "mod", "month", "mopen", "mort", "n", "netpv", "nmiss",
        "normal", "note", "npv", "open", "ordinal", "pathname",
        "pdf", "peek", "peekc", "pmf", "point", "poisson", "poke",
        "probbeta", "probbnml", "probchi", "probf", "probgam",
        "probhypr", "probit", "probnegb", "probnorm", "probt",
        "put", "putc", "putn", "qtr", "quote", "ranbin", "rancau",
        "ranexp", "rangam", "range", "rank", "rannor", "ranpoi",
        "rantbl", "rantri", "ranuni", "repeat", "resolve", "reverse",
        "rewind", "right", "round", "saving", "scan", "sdf", "second",
        "sign", "sin", "sinh", "skewness", "soundex", "spedis",
        "sqrt", "std", "stderr", "stfips", "stname", "stnamel",
        "substr", "sum", "symget", "sysget", "sysmsg", "sysprod",
        "sysrc", "system", "tan", "tanh", "time", "timepart", "tinv",
        "tnonct", "today", "translate", "tranwrd", "trigamma",
        "trim", "trimn", "trunc", "uniform", "upcase", "uss", "var",
        "varfmt", "varinfmt", "varlabel", "varlen", "varname",
        "varnum", "varray", "varrayx", "vartype", "verify", "vformat",
        "vformatd", "vformatdx", "vformatn", "vformatnx", "vformatw",
        "vformatwx", "vformatx", "vinarray", "vinarrayx", "vinformat",
        "vinformatd", "vinformatdx", "vinformatn", "vinformatnx",
        "vinformatw", "vinformatwx", "vinformatx", "vlabel",
        "vlabelx", "vlength", "vlengthx", "vname", "vnamex", "vtype",
        "vtypex", "weekday", "year", "yyq", "zipfips", "zipname",
        "zipnamel", "zipstate"
    )

    tokens = {
        'root': [
            include('comments'),
            include('proc-data'),
            include('cards-datalines'),
            include('logs'),
            include('general'),
            (r'.', Text),
        ],
        # SAS is multi-line regardless, but * is ended by ;
        'comments': [
            (r'^\s*\*.*?;', Comment),
            (r'/\*.*?\*/', Comment),
            (r'^\s*\*(.|\n)*?;', Comment.Multiline),
            (r'/[*](.|\n)*?[*]/', Comment.Multiline),
        ],
        # Special highlight for proc, data, quit, run
        'proc-data': [
            (r'(^|;)\s*(proc [a-zA-Z0-9_]+|data|run|quit)[\s;\n]',
             Keyword.Reserved),
        ],
        # Special highlight cards and datalines
        'cards-datalines': [
            (r'^\s*(datalines|cards)\s*;\s*$', Keyword, 'data'),
        ],
        'data': [
            (r'(.|\n)*^\s*;\s*$', Other, '#pop'),
        ],
        # Special highlight for put NOTE|ERROR|WARNING (order matters)
        'logs': [
            (r'\n?^\s*%?put ', Keyword, 'log-messages'),
        ],
        'log-messages': [
            (r'NOTE(:|-).*', Generic, '#pop'),
            (r'WARNING(:|-).*', Generic.Emph, '#pop'),
            (r'ERROR(:|-).*', Generic.Error, '#pop'),
            (r'(?!(WARNING|NOTE|ERROR))+', Text, '#pop'),
            include('general'),
        ],
        'general': [
            include('keywords'),
            include('vars-strings'),
            include('special'),
            include('numbers'),
        ],
        # Keywords, statements, functions, macros
        'keywords': [
            (words(builtins_statements,
                   prefix = r'\b',
                   suffix = r'\b'),
             Keyword),
            (words(builtins_sql,
                   prefix = r'\b',
                   suffix = r'\b'),
             Keyword),
            (words(builtins_conditionals,
                   prefix = r'\b',
                   suffix = r'\b'),
             Keyword),
            (words(builtins_macros,
                   prefix = r'%',
                   suffix = r'\b'),
             Name.Builtin),
            (words(builtins_functions,
                   prefix = r'\b',
                   suffix = r'\('),
             Name.Builtin),
        ],
        # Strings and user-defined variables and macros (order matters)
        'vars-strings': [
            (r'&[a-zA-Z_][a-zA-Z0-9_]{0,31}\.?', Name.Variable),
            (r'%[a-zA-Z_][a-zA-Z0-9_]{0,31}', Name.Function),
            (r'\'', String, 'string_squote'),
            (r'"', String, 'string_dquote'),
        ],
        'string_squote': [
            ('\'', String, '#pop'),
            (r'\\\\|\\"|\\\n', String.Escape),
            # AFAIK, macro variables are not evaluated in single quotes
            # (r'&', Name.Variable, 'validvar'),
            # (r'&', String.Interpol, 'validvar'),
            (r'[^$\'\\]+', String),
            (r'[$\'\\]', String),
        ],
        'string_dquote': [
            (r'"', String, '#pop'),
            (r'\\\\|\\"|\\\n', String.Escape),
            (r'&', Name.Variable, 'validvar'),
            (r'&', String.Interpol, 'validvar'),
            (r'[^$&"\\]+', String),
            (r'[$"\\]', String),
        ],
        'validvar': [
            (r'[a-zA-Z_][a-zA-Z0-9_]{0,31}\.?', Name.Variable, '#pop'),
        ],
        # SAS numbers and special variables
        'numbers': [
            (r'\b[+-]?([0-9]+(\.[0-9]+)?|\.[0-9]+|\.)([eE][+-]?[0-9]+)?[i]?\b',
             Number),
        ],
        'special': [
            (r'(null|missing|_all_|_automatic_|_character_|_n_|'
             r'_infile_|_name_|_null_|_numeric_|_user_|_webout_)',
             Keyword.Constant),
        ],
        # 'operators': [
        #     (r'(-|=|<=|>=|<|>|<>|&|!=|'
        #      r'\||\*|\+|\^|/|!|~|~=)', Operator)
        # ],
    }