diff options
-rw-r--r-- | pygments/lexers/_stata_builtins.py | 10 | ||||
-rw-r--r-- | pygments/lexers/stata.py | 143 | ||||
-rw-r--r-- | pygments/styles/__init__.py | 3 | ||||
-rw-r--r-- | pygments/styles/stata_dark.py | 41 | ||||
-rw-r--r-- | pygments/styles/stata_light.py (renamed from pygments/styles/stata.py) | 27 |
5 files changed, 166 insertions, 58 deletions
diff --git a/pygments/lexers/_stata_builtins.py b/pygments/lexers/_stata_builtins.py index 5f5f72a9..13a3dacf 100644 --- a/pygments/lexers/_stata_builtins.py +++ b/pygments/lexers/_stata_builtins.py @@ -10,8 +10,12 @@ """ +builtins_special = ( + "if", "in", "using", "replace", "by", "gen", "generate" +) + builtins_base = ( - "if", "else", "in", "foreach", "for", "forv", "forva", + "if", "else", "else\s+if", "in", "foreach", "for", "forv", "forva", "forval", "forvalu", "forvalue", "forvalues", "by", "bys", "bysort", "quietly", "qui", "about", "ac", "ac_7", "acprplot", "acprplot_7", "adjust", "ado", "adopath", @@ -66,7 +70,7 @@ builtins_base = ( "doedit", "dotplot", "dotplot_7", "dprobit", "drawnorm", "drop", "ds", "ds_util", "dstdize", "duplicates", "durbina", "dwstat", "dydx", "e", "ed", "edi", "edit", "egen", - "eivreg", "emdef", "en", "enc", "enco", "encod", "encode", + "eivreg", "emdef", "end", "en", "enc", "enco", "encod", "encode", "eq", "erase", "ereg", "ereg_lf", "ereg_p", "ereg_sw", "ereghet", "ereghet_glf", "ereghet_glf_sh", "ereghet_gp", "ereghet_ilf", "ereghet_ilf_sh", "ereghet_ip", "eret", @@ -415,5 +419,3 @@ builtins_functions = ( "weekly", "wofd", "word", "wordcount", "year", "yearly", "yh", "ym", "yofd", "yq", "yw" ) - - diff --git a/pygments/lexers/stata.py b/pygments/lexers/stata.py index a015a23e..8955a05c 100644 --- a/pygments/lexers/stata.py +++ b/pygments/lexers/stata.py @@ -9,6 +9,7 @@ :license: BSD, see LICENSE for details. """ +import re from pygments.lexer import RegexLexer, include, words from pygments.token import Comment, Keyword, Name, Number, \ String, Text, Operator @@ -33,56 +34,118 @@ class StataLexer(RegexLexer): aliases = ['stata', 'do'] filenames = ['*.do', '*.ado'] mimetypes = ['text/x-stata', 'text/stata', 'application/x-stata'] + flags = re.MULTILINE | re.DOTALL tokens = { 'root': [ include('comments'), - include('vars-strings'), + include('strings'), + include('macros'), include('numbers'), include('keywords'), + include('operators'), + include('format'), (r'.', Text), ], - # Global and local macros; regular and special strings - 'vars-strings': [ - (r'\$[\w{]', Name.Variable.Global, 'var_validglobal'), - (r'`\w{0,31}\'', Name.Variable), - (r'"', String, 'string_dquote'), - (r'`"', String, 'string_mquote'), - ], - # For either string type, highlight macros as macros - 'string_dquote': [ - (r'"', String, '#pop'), - (r'\\\\|\\"|\\\n', String.Escape), - (r'\$', Name.Variable.Global, 'var_validglobal'), - (r'`', Name.Variable, 'var_validlocal'), - (r'[^$`"\\]+', String), - (r'[$"\\]', String), - ], - 'string_mquote': [ + # Comments are a complicated beast in Stata because they can be + # nested and there are a few corner cases with that. See: + # - github.com/kylebarron/language-stata/issues/90 + # - statalist.org/forums/forum/general-stata-discussion/general/1448244 + 'comments': [ + (r'(^//|(?<=\s)//)(?!/)', Comment.Single, 'comments-double-slash'), + (r'^\s*\*', Comment.Single, 'comments-star'), + (r'/\*', Comment.Multiline, 'comments-block'), + (r'(^///|(?<=\s)///)', Comment.Special, 'comments-triple-slash') + ], + 'comments-block': [ + (r'/\*', Comment.Multiline, '#push'), + # this ends and restarts a comment block. but need to catch this so + # that it doesn\'t start _another_ level of comment blocks + (r'\*/\*', Comment.Multiline), + (r'(\*/\s+\*(?!/)[^\n]*)|(\*/)', Comment.Multiline, '#pop'), + # Match anything else as a character inside the comment + (r'.', Comment.Multiline), + ], + 'comments-star': [ + (r'///.*?\n', Comment.Single, + ('#pop', 'comments-triple-slash')), + (r'(^//|(?<=\s)//)(?!/)', Comment.Single, + ('#pop', 'comments-double-slash')), + (r'/\*', Comment.Multiline, 'comments-block'), + (r'.(?=\n)', Comment.Single, '#pop'), + (r'.', Comment.Single), + ], + 'comments-triple-slash': [ + (r'\n', Comment.Special, '#pop'), + # A // breaks out of a comment for the rest of the line + (r'//.*?(?=\n)', Comment.Single, '#pop'), + (r'.', Comment.Special), + ], + 'comments-double-slash': [ + (r'\n', Text, '#pop'), + (r'.', Comment.Single), + ], + # `"compound string"' and regular "string"; note the former are + # nested. + 'strings': [ + (r'`"', String, 'string-compound'), + (r'(?<!`)"', String, 'string-regular'), + ], + 'string-compound': [ + (r'`"', String, '#push'), (r'"\'', String, '#pop'), - (r'\\\\|\\"|\\\n', String.Escape), - (r'\$', Name.Variable.Global, 'var_validglobal'), - (r'`', Name.Variable, 'var_validlocal'), - (r'[^$`"\\]+', String), - (r'[$"\\]', String), - ], - 'var_validglobal': [ - (r'\{\w{0,32}\}', Name.Variable.Global, '#pop'), - (r'\w{1,32}', Name.Variable.Global, '#pop'), + (r'\\\\|\\"|\\\$|\\`|\\\n', String.Escape), + include('macros'), + (r'.', String) ], - 'var_validlocal': [ - (r'\w{0,31}\'', Name.Variable, '#pop'), + 'string-regular': [ + (r'(")(?!\')|(?=\n)', String, '#pop'), + (r'\\\\|\\"|\\\$|\\`|\\\n', String.Escape), + include('macros'), + (r'.', String) ], - # * only OK at line start, // OK anywhere - 'comments': [ - (r'^\s*\*.*$', Comment), - (r'//.*', Comment.Single), - (r'/\*.*?\*/', Comment.Multiline), - (r'/[*](.|\n)*?[*]/', Comment.Multiline), + # A local is usually + # `\w{0,31}' + # `:extended macro' + # `=expression' + # `[rsen](results)' + # `(++--)scalar(++--)' + # + # However, there are all sorts of weird rules wrt edge + # cases. Instead of writing 27 exceptions, anything inside + # `' is a local. + # + # A global is more restricted, so we do follow rules. Note only + # locals explicitly enclosed ${} can be nested. + 'macros': [ + (r'\$(\{|(?=[\$`]))', Name.Variable.Global, 'macro-global-nested'), + (r'\$', Name.Variable.Global, 'macro-global-name'), + (r'`', Name.Variable, 'macro-local'), + ], + 'macro-local': [ + (r'`', Name.Variable, '#push'), + (r"'", Name.Variable, '#pop'), + (r'\$(\{|(?=[\$`]))', Name.Variable.Global, 'macro-global-nested'), + (r'\$', Name.Variable.Global, 'macro-global-name'), + (r'.', Name.Variable), # fallback + ], + 'macro-global-nested': [ + (r'\$(\{|(?=[\$`]))', Name.Variable.Global, '#push'), + (r'\}', Name.Variable.Global, '#pop'), + (r'\$', Name.Variable.Global, 'macro-global-name'), + (r'`', Name.Variable, 'macro-local'), + (r'\w', Name.Variable.Global), # fallback + (r'(?!\w)', Name.Variable.Global, '#pop'), + ], + 'macro-global-name': [ + (r'\$(\{|(?=[\$`]))', Name.Variable.Global, 'macro-global-nested', '#pop'), + (r'\$', Name.Variable.Global, 'macro-global-name', '#pop'), + (r'`', Name.Variable, 'macro-local', '#pop'), + (r'\w{1,32}', Name.Variable.Global, '#pop'), ], # Built in functions and statements 'keywords': [ - (words(builtins_functions, prefix = r'\b', suffix = r'\('), + (words(builtins_functions, prefix = r'\b', suffix = r'(?=\()'), Name.Function), (words(builtins_base, prefix = r'(^\s*|\s)', suffix = r'\b'), Keyword), @@ -100,9 +163,9 @@ class StataLexer(RegexLexer): ], # Stata formats 'format': [ - (r'%-?\d{1,2}(\.\d{1,2})?[gfe]c?', Name.Variable), - (r'%(21x|16H|16L|8H|8L)', Name.Variable), - (r'%-?(tc|tC|td|tw|tm|tq|th|ty|tg).{0,32}', Name.Variable), - (r'%[-~]?\d{1,4}s', Name.Variable), + (r'%-?\d{1,2}(\.\d{1,2})?[gfe]c?', Name.Format), + (r'%(21x|16H|16L|8H|8L)', Name.Format), + (r'%-?(tc|tC|td|tw|tm|tq|th|ty|tg).{0,32}', Name.Format), + (r'%[-~]?\d{1,4}s', Name.Format), ] } diff --git a/pygments/styles/__init__.py b/pygments/styles/__init__.py index 1f39c692..0f331e4c 100644 --- a/pygments/styles/__init__.py +++ b/pygments/styles/__init__.py @@ -46,6 +46,9 @@ STYLE_MAP = { 'abap': 'abap::AbapStyle', 'solarized-dark': 'solarized::SolarizedDarkStyle', 'solarized-light': 'solarized::SolarizedLightStyle', + 'sas': 'sas::SasStyle', + 'stata-light': 'stata_light::StataLightStyle', + 'stata-dark': 'stata_dark::StataDarkStyle', } diff --git a/pygments/styles/stata_dark.py b/pygments/styles/stata_dark.py new file mode 100644 index 00000000..7610ba0f --- /dev/null +++ b/pygments/styles/stata_dark.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- +""" + pygments.styles.stata_dark + ~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Dark style inspired by Stata's do-file editor. Note this is not + meant to be a complete style, just for Stata's file formats. + + + :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +from pygments.style import Style +from pygments.token import Keyword, Name, Comment, String, Error, \ + Number, Operator, Whitespace, Generic, Text + + +class StataDarkStyle(Style): + + default_style = '' + + background_color = "#232629" + highlight_color = "#49483e" + + styles = { + Whitespace: '#bbbbbb', + Error: 'bg:#e3d2d2 #a61717', + Text: '#cccccc', + String: '#51cc99', + Number: '#4FB8CC', + Operator: '', + Name.Function: '#6a6aff', + Name.Format: '#e2828e', + Keyword: 'bold #7686bb', + Keyword.Constant: '', + Comment: 'italic #777777', + Name.Variable: 'bold #7AB4DB', + Name.Variable.Global: 'bold #BE646C', + Generic.Prompt: '#ffffff', + } diff --git a/pygments/styles/stata.py b/pygments/styles/stata_light.py index 2b5f5edd..86802ef7 100644 --- a/pygments/styles/stata.py +++ b/pygments/styles/stata_light.py @@ -1,11 +1,10 @@ # -*- coding: utf-8 -*- """ - pygments.styles.stata - ~~~~~~~~~~~~~~~~~~~~~ + pygments.styles.stata_light + ~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Style inspired by Stata's do-file editor. Note this is not meant - to be a complete style. It's merely meant to mimic Stata's do file - editor syntax highlighting. + Light Style inspired by Stata's do-file editor. Note this is not + meant to be a complete style, just for Stata's file formats. :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. @@ -13,28 +12,28 @@ from pygments.style import Style from pygments.token import Keyword, Name, Comment, String, Error, \ - Number, Operator, Whitespace + Number, Operator, Whitespace, Text -class StataStyle(Style): +class StataLightStyle(Style): """ - Style inspired by Stata's do-file editor. Note this is not meant - to be a complete style. It's merely meant to mimic Stata's do file - editor syntax highlighting. + Light mode style inspired by Stata's do-file editor. This is not + meant to be a complete style, just for use with Stata. """ default_style = '' - styles = { + Text: '#111111', Whitespace: '#bbbbbb', - Comment: 'italic #008800', + Error: 'bg:#e3d2d2 #a61717', String: '#7a2424', Number: '#2c2cff', Operator: '', + Name.Function: '#2c2cff', + Name.Format: '#be646c', Keyword: 'bold #353580', Keyword.Constant: '', - Name.Function: '#2c2cff', + Comment: 'italic #008800', Name.Variable: 'bold #35baba', Name.Variable.Global: 'bold #b5565e', - Error: 'bg:#e3d2d2 #a61717' } |