summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--pygments/lexers/_stata_builtins.py10
-rw-r--r--pygments/lexers/stata.py143
-rw-r--r--pygments/styles/__init__.py3
-rw-r--r--pygments/styles/stata_dark.py41
-rw-r--r--pygments/styles/stata_light.py (renamed from pygments/styles/stata.py)27
5 files changed, 166 insertions, 58 deletions
diff --git a/pygments/lexers/_stata_builtins.py b/pygments/lexers/_stata_builtins.py
index 5f5f72a9..13a3dacf 100644
--- a/pygments/lexers/_stata_builtins.py
+++ b/pygments/lexers/_stata_builtins.py
@@ -10,8 +10,12 @@
"""
+builtins_special = (
+ "if", "in", "using", "replace", "by", "gen", "generate"
+)
+
builtins_base = (
- "if", "else", "in", "foreach", "for", "forv", "forva",
+ "if", "else", "else\s+if", "in", "foreach", "for", "forv", "forva",
"forval", "forvalu", "forvalue", "forvalues", "by", "bys",
"bysort", "quietly", "qui", "about", "ac",
"ac_7", "acprplot", "acprplot_7", "adjust", "ado", "adopath",
@@ -66,7 +70,7 @@ builtins_base = (
"doedit", "dotplot", "dotplot_7", "dprobit", "drawnorm",
"drop", "ds", "ds_util", "dstdize", "duplicates", "durbina",
"dwstat", "dydx", "e", "ed", "edi", "edit", "egen",
- "eivreg", "emdef", "en", "enc", "enco", "encod", "encode",
+ "eivreg", "emdef", "end", "en", "enc", "enco", "encod", "encode",
"eq", "erase", "ereg", "ereg_lf", "ereg_p", "ereg_sw",
"ereghet", "ereghet_glf", "ereghet_glf_sh", "ereghet_gp",
"ereghet_ilf", "ereghet_ilf_sh", "ereghet_ip", "eret",
@@ -415,5 +419,3 @@ builtins_functions = (
"weekly", "wofd", "word", "wordcount", "year", "yearly",
"yh", "ym", "yofd", "yq", "yw"
)
-
-
diff --git a/pygments/lexers/stata.py b/pygments/lexers/stata.py
index a015a23e..8955a05c 100644
--- a/pygments/lexers/stata.py
+++ b/pygments/lexers/stata.py
@@ -9,6 +9,7 @@
:license: BSD, see LICENSE for details.
"""
+import re
from pygments.lexer import RegexLexer, include, words
from pygments.token import Comment, Keyword, Name, Number, \
String, Text, Operator
@@ -33,56 +34,118 @@ class StataLexer(RegexLexer):
aliases = ['stata', 'do']
filenames = ['*.do', '*.ado']
mimetypes = ['text/x-stata', 'text/stata', 'application/x-stata']
+ flags = re.MULTILINE | re.DOTALL
tokens = {
'root': [
include('comments'),
- include('vars-strings'),
+ include('strings'),
+ include('macros'),
include('numbers'),
include('keywords'),
+ include('operators'),
+ include('format'),
(r'.', Text),
],
- # Global and local macros; regular and special strings
- 'vars-strings': [
- (r'\$[\w{]', Name.Variable.Global, 'var_validglobal'),
- (r'`\w{0,31}\'', Name.Variable),
- (r'"', String, 'string_dquote'),
- (r'`"', String, 'string_mquote'),
- ],
- # For either string type, highlight macros as macros
- 'string_dquote': [
- (r'"', String, '#pop'),
- (r'\\\\|\\"|\\\n', String.Escape),
- (r'\$', Name.Variable.Global, 'var_validglobal'),
- (r'`', Name.Variable, 'var_validlocal'),
- (r'[^$`"\\]+', String),
- (r'[$"\\]', String),
- ],
- 'string_mquote': [
+ # Comments are a complicated beast in Stata because they can be
+ # nested and there are a few corner cases with that. See:
+ # - github.com/kylebarron/language-stata/issues/90
+ # - statalist.org/forums/forum/general-stata-discussion/general/1448244
+ 'comments': [
+ (r'(^//|(?<=\s)//)(?!/)', Comment.Single, 'comments-double-slash'),
+ (r'^\s*\*', Comment.Single, 'comments-star'),
+ (r'/\*', Comment.Multiline, 'comments-block'),
+ (r'(^///|(?<=\s)///)', Comment.Special, 'comments-triple-slash')
+ ],
+ 'comments-block': [
+ (r'/\*', Comment.Multiline, '#push'),
+ # this ends and restarts a comment block. but need to catch this so
+ # that it doesn\'t start _another_ level of comment blocks
+ (r'\*/\*', Comment.Multiline),
+ (r'(\*/\s+\*(?!/)[^\n]*)|(\*/)', Comment.Multiline, '#pop'),
+ # Match anything else as a character inside the comment
+ (r'.', Comment.Multiline),
+ ],
+ 'comments-star': [
+ (r'///.*?\n', Comment.Single,
+ ('#pop', 'comments-triple-slash')),
+ (r'(^//|(?<=\s)//)(?!/)', Comment.Single,
+ ('#pop', 'comments-double-slash')),
+ (r'/\*', Comment.Multiline, 'comments-block'),
+ (r'.(?=\n)', Comment.Single, '#pop'),
+ (r'.', Comment.Single),
+ ],
+ 'comments-triple-slash': [
+ (r'\n', Comment.Special, '#pop'),
+ # A // breaks out of a comment for the rest of the line
+ (r'//.*?(?=\n)', Comment.Single, '#pop'),
+ (r'.', Comment.Special),
+ ],
+ 'comments-double-slash': [
+ (r'\n', Text, '#pop'),
+ (r'.', Comment.Single),
+ ],
+ # `"compound string"' and regular "string"; note the former are
+ # nested.
+ 'strings': [
+ (r'`"', String, 'string-compound'),
+ (r'(?<!`)"', String, 'string-regular'),
+ ],
+ 'string-compound': [
+ (r'`"', String, '#push'),
(r'"\'', String, '#pop'),
- (r'\\\\|\\"|\\\n', String.Escape),
- (r'\$', Name.Variable.Global, 'var_validglobal'),
- (r'`', Name.Variable, 'var_validlocal'),
- (r'[^$`"\\]+', String),
- (r'[$"\\]', String),
- ],
- 'var_validglobal': [
- (r'\{\w{0,32}\}', Name.Variable.Global, '#pop'),
- (r'\w{1,32}', Name.Variable.Global, '#pop'),
+ (r'\\\\|\\"|\\\$|\\`|\\\n', String.Escape),
+ include('macros'),
+ (r'.', String)
],
- 'var_validlocal': [
- (r'\w{0,31}\'', Name.Variable, '#pop'),
+ 'string-regular': [
+ (r'(")(?!\')|(?=\n)', String, '#pop'),
+ (r'\\\\|\\"|\\\$|\\`|\\\n', String.Escape),
+ include('macros'),
+ (r'.', String)
],
- # * only OK at line start, // OK anywhere
- 'comments': [
- (r'^\s*\*.*$', Comment),
- (r'//.*', Comment.Single),
- (r'/\*.*?\*/', Comment.Multiline),
- (r'/[*](.|\n)*?[*]/', Comment.Multiline),
+ # A local is usually
+ # `\w{0,31}'
+ # `:extended macro'
+ # `=expression'
+ # `[rsen](results)'
+ # `(++--)scalar(++--)'
+ #
+ # However, there are all sorts of weird rules wrt edge
+ # cases. Instead of writing 27 exceptions, anything inside
+ # `' is a local.
+ #
+ # A global is more restricted, so we do follow rules. Note only
+ # locals explicitly enclosed ${} can be nested.
+ 'macros': [
+ (r'\$(\{|(?=[\$`]))', Name.Variable.Global, 'macro-global-nested'),
+ (r'\$', Name.Variable.Global, 'macro-global-name'),
+ (r'`', Name.Variable, 'macro-local'),
+ ],
+ 'macro-local': [
+ (r'`', Name.Variable, '#push'),
+ (r"'", Name.Variable, '#pop'),
+ (r'\$(\{|(?=[\$`]))', Name.Variable.Global, 'macro-global-nested'),
+ (r'\$', Name.Variable.Global, 'macro-global-name'),
+ (r'.', Name.Variable), # fallback
+ ],
+ 'macro-global-nested': [
+ (r'\$(\{|(?=[\$`]))', Name.Variable.Global, '#push'),
+ (r'\}', Name.Variable.Global, '#pop'),
+ (r'\$', Name.Variable.Global, 'macro-global-name'),
+ (r'`', Name.Variable, 'macro-local'),
+ (r'\w', Name.Variable.Global), # fallback
+ (r'(?!\w)', Name.Variable.Global, '#pop'),
+ ],
+ 'macro-global-name': [
+ (r'\$(\{|(?=[\$`]))', Name.Variable.Global, 'macro-global-nested', '#pop'),
+ (r'\$', Name.Variable.Global, 'macro-global-name', '#pop'),
+ (r'`', Name.Variable, 'macro-local', '#pop'),
+ (r'\w{1,32}', Name.Variable.Global, '#pop'),
],
# Built in functions and statements
'keywords': [
- (words(builtins_functions, prefix = r'\b', suffix = r'\('),
+ (words(builtins_functions, prefix = r'\b', suffix = r'(?=\()'),
Name.Function),
(words(builtins_base, prefix = r'(^\s*|\s)', suffix = r'\b'),
Keyword),
@@ -100,9 +163,9 @@ class StataLexer(RegexLexer):
],
# Stata formats
'format': [
- (r'%-?\d{1,2}(\.\d{1,2})?[gfe]c?', Name.Variable),
- (r'%(21x|16H|16L|8H|8L)', Name.Variable),
- (r'%-?(tc|tC|td|tw|tm|tq|th|ty|tg).{0,32}', Name.Variable),
- (r'%[-~]?\d{1,4}s', Name.Variable),
+ (r'%-?\d{1,2}(\.\d{1,2})?[gfe]c?', Name.Format),
+ (r'%(21x|16H|16L|8H|8L)', Name.Format),
+ (r'%-?(tc|tC|td|tw|tm|tq|th|ty|tg).{0,32}', Name.Format),
+ (r'%[-~]?\d{1,4}s', Name.Format),
]
}
diff --git a/pygments/styles/__init__.py b/pygments/styles/__init__.py
index 1f39c692..0f331e4c 100644
--- a/pygments/styles/__init__.py
+++ b/pygments/styles/__init__.py
@@ -46,6 +46,9 @@ STYLE_MAP = {
'abap': 'abap::AbapStyle',
'solarized-dark': 'solarized::SolarizedDarkStyle',
'solarized-light': 'solarized::SolarizedLightStyle',
+ 'sas': 'sas::SasStyle',
+ 'stata-light': 'stata_light::StataLightStyle',
+ 'stata-dark': 'stata_dark::StataDarkStyle',
}
diff --git a/pygments/styles/stata_dark.py b/pygments/styles/stata_dark.py
new file mode 100644
index 00000000..7610ba0f
--- /dev/null
+++ b/pygments/styles/stata_dark.py
@@ -0,0 +1,41 @@
+# -*- coding: utf-8 -*-
+"""
+ pygments.styles.stata_dark
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ Dark style inspired by Stata's do-file editor. Note this is not
+ meant to be a complete style, just for Stata's file formats.
+
+
+ :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS.
+ :license: BSD, see LICENSE for details.
+"""
+
+from pygments.style import Style
+from pygments.token import Keyword, Name, Comment, String, Error, \
+ Number, Operator, Whitespace, Generic, Text
+
+
+class StataDarkStyle(Style):
+
+ default_style = ''
+
+ background_color = "#232629"
+ highlight_color = "#49483e"
+
+ styles = {
+ Whitespace: '#bbbbbb',
+ Error: 'bg:#e3d2d2 #a61717',
+ Text: '#cccccc',
+ String: '#51cc99',
+ Number: '#4FB8CC',
+ Operator: '',
+ Name.Function: '#6a6aff',
+ Name.Format: '#e2828e',
+ Keyword: 'bold #7686bb',
+ Keyword.Constant: '',
+ Comment: 'italic #777777',
+ Name.Variable: 'bold #7AB4DB',
+ Name.Variable.Global: 'bold #BE646C',
+ Generic.Prompt: '#ffffff',
+ }
diff --git a/pygments/styles/stata.py b/pygments/styles/stata_light.py
index 2b5f5edd..86802ef7 100644
--- a/pygments/styles/stata.py
+++ b/pygments/styles/stata_light.py
@@ -1,11 +1,10 @@
# -*- coding: utf-8 -*-
"""
- pygments.styles.stata
- ~~~~~~~~~~~~~~~~~~~~~
+ pygments.styles.stata_light
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Style inspired by Stata's do-file editor. Note this is not meant
- to be a complete style. It's merely meant to mimic Stata's do file
- editor syntax highlighting.
+ Light Style inspired by Stata's do-file editor. Note this is not
+ meant to be a complete style, just for Stata's file formats.
:copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
@@ -13,28 +12,28 @@
from pygments.style import Style
from pygments.token import Keyword, Name, Comment, String, Error, \
- Number, Operator, Whitespace
+ Number, Operator, Whitespace, Text
-class StataStyle(Style):
+class StataLightStyle(Style):
"""
- Style inspired by Stata's do-file editor. Note this is not meant
- to be a complete style. It's merely meant to mimic Stata's do file
- editor syntax highlighting.
+ Light mode style inspired by Stata's do-file editor. This is not
+ meant to be a complete style, just for use with Stata.
"""
default_style = ''
-
styles = {
+ Text: '#111111',
Whitespace: '#bbbbbb',
- Comment: 'italic #008800',
+ Error: 'bg:#e3d2d2 #a61717',
String: '#7a2424',
Number: '#2c2cff',
Operator: '',
+ Name.Function: '#2c2cff',
+ Name.Format: '#be646c',
Keyword: 'bold #353580',
Keyword.Constant: '',
- Name.Function: '#2c2cff',
+ Comment: 'italic #008800',
Name.Variable: 'bold #35baba',
Name.Variable.Global: 'bold #b5565e',
- Error: 'bg:#e3d2d2 #a61717'
}