From bcd5d3e83bb2edc0cf61781ebc042008d48bab45 Mon Sep 17 00:00:00 2001 From: Jeffrey Arnold Date: Fri, 6 Jul 2018 11:52:43 -0700 Subject: Update Stan lexer Update Stan lexer to language version 2.17.0; add builtin-functions, new keywords, fix bug in highlighting numbers. --- pygments/lexers/_stan_builtins.py | 276 +++++++++++++++++++++----------------- pygments/lexers/modeling.py | 32 +++-- 2 files changed, 170 insertions(+), 138 deletions(-) diff --git a/pygments/lexers/_stan_builtins.py b/pygments/lexers/_stan_builtins.py index a189647a..7f1e0ce3 100644 --- a/pygments/lexers/_stan_builtins.py +++ b/pygments/lexers/_stan_builtins.py @@ -4,24 +4,23 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This file contains the names of functions for Stan used by - ``pygments.lexers.math.StanLexer. This is for Stan language version 2.8.0. + ``pygments.lexers.math.StanLexer. This is for Stan language version 2.17.0. - :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS. + :copyright: Copyright 2006-2018 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. """ KEYWORDS = ( + 'break', + 'continue', 'else', 'for', 'if', 'in', - 'increment_log_prob', - 'integrate_ode', - 'lp__', 'print', 'reject', 'return', - 'while' + 'while', ) TYPES = ( @@ -35,18 +34,18 @@ TYPES = ( 'positive_ordered', 'real', 'row_vector', - 'row_vectormatrix', 'simplex', 'unit_vector', 'vector', - 'void') + 'void', +) FUNCTIONS = ( - 'Phi', - 'Phi_approx', 'abs', 'acos', 'acosh', + 'algebra_solver', + 'append_array', 'append_col', 'append_row', 'asin', @@ -54,55 +53,59 @@ FUNCTIONS = ( 'atan', 'atan2', 'atanh', - 'bernoulli_ccdf_log', 'bernoulli_cdf', - 'bernoulli_cdf_log', - 'bernoulli_log', - 'bernoulli_logit_log', + 'bernoulli_lccdf', + 'bernoulli_lcdf', + 'bernoulli_logit_lpmf', + 'bernoulli_logit_rng', + 'bernoulli_lpmf', 'bernoulli_rng', 'bessel_first_kind', 'bessel_second_kind', - 'beta_binomial_ccdf_log', 'beta_binomial_cdf', - 'beta_binomial_cdf_log', - 'beta_binomial_log', + 'beta_binomial_lccdf', + 'beta_binomial_lcdf', + 'beta_binomial_lpmf', 'beta_binomial_rng', - 'beta_ccdf_log', 'beta_cdf', - 'beta_cdf_log', - 'beta_log', + 'beta_lccdf', + 'beta_lcdf', + 'beta_lpdf', 'beta_rng', 'binary_log_loss', - 'binomial_ccdf_log', 'binomial_cdf', - 'binomial_cdf_log', 'binomial_coefficient_log', - 'binomial_log', - 'binomial_logit_log', + 'binomial_lccdf', + 'binomial_lcdf', + 'binomial_logit_lpmf', + 'binomial_lpmf', 'binomial_rng', 'block', - 'categorical_log', - 'categorical_logit_log', + 'categorical_logit_lpmf', + 'categorical_logit_rng', + 'categorical_lpmf', 'categorical_rng', - 'cauchy_ccdf_log', 'cauchy_cdf', - 'cauchy_cdf_log', - 'cauchy_log', + 'cauchy_lccdf', + 'cauchy_lcdf', + 'cauchy_lpdf', 'cauchy_rng', 'cbrt', 'ceil', - 'chi_square_ccdf_log', 'chi_square_cdf', - 'chi_square_cdf_log', - 'chi_square_log', + 'chi_square_lccdf', + 'chi_square_lcdf', + 'chi_square_lpdf', 'chi_square_rng', 'cholesky_decompose', + 'choose', 'col', 'cols', 'columns_dot_product', 'columns_dot_self', 'cos', 'cosh', + 'cov_exp_quad', 'crossprod', 'csr_extract_u', 'csr_extract_v', @@ -117,15 +120,15 @@ FUNCTIONS = ( 'diagonal', 'digamma', 'dims', - 'dirichlet_log', + 'dirichlet_lpdf', 'dirichlet_rng', 'distance', 'dot_product', 'dot_self', - 'double_exponential_ccdf_log', 'double_exponential_cdf', - 'double_exponential_cdf_log', - 'double_exponential_log', + 'double_exponential_lccdf', + 'double_exponential_lcdf', + 'double_exponential_lpdf', 'double_exponential_rng', 'e', 'eigenvalues_sym', @@ -134,16 +137,16 @@ FUNCTIONS = ( 'erfc', 'exp', 'exp2', - 'exp_mod_normal_ccdf_log', 'exp_mod_normal_cdf', - 'exp_mod_normal_cdf_log', - 'exp_mod_normal_log', + 'exp_mod_normal_lccdf', + 'exp_mod_normal_lcdf', + 'exp_mod_normal_lpdf', 'exp_mod_normal_rng', 'expm1', - 'exponential_ccdf_log', 'exponential_cdf', - 'exponential_cdf_log', - 'exponential_log', + 'exponential_lccdf', + 'exponential_lcdf', + 'exponential_lpdf', 'exponential_rng', 'fabs', 'falling_factorial', @@ -153,60 +156,65 @@ FUNCTIONS = ( 'fmax', 'fmin', 'fmod', - 'frechet_ccdf_log', 'frechet_cdf', - 'frechet_cdf_log', - 'frechet_log', + 'frechet_lccdf', + 'frechet_lcdf', + 'frechet_lpdf', 'frechet_rng', - 'gamma_ccdf_log', 'gamma_cdf', - 'gamma_cdf_log', - 'gamma_log', + 'gamma_lccdf', + 'gamma_lcdf', + 'gamma_lpdf', 'gamma_p', 'gamma_q', 'gamma_rng', - 'gaussian_dlm_obs_log', + 'gaussian_dlm_obs_lpdf', 'get_lp', - 'gumbel_ccdf_log', 'gumbel_cdf', - 'gumbel_cdf_log', - 'gumbel_log', + 'gumbel_lccdf', + 'gumbel_lcdf', + 'gumbel_lpdf', 'gumbel_rng', 'head', - 'hypergeometric_log', + 'hypergeometric_lpmf', 'hypergeometric_rng', 'hypot', - 'if_else', + 'inc_beta', 'int_step', + 'integrate_ode', + 'integrate_ode_bdf', + 'integrate_ode_rk45', 'inv', - 'inv_chi_square_ccdf_log', 'inv_chi_square_cdf', - 'inv_chi_square_cdf_log', - 'inv_chi_square_log', + 'inv_chi_square_lccdf', + 'inv_chi_square_lcdf', + 'inv_chi_square_lpdf', 'inv_chi_square_rng', 'inv_cloglog', - 'inv_gamma_ccdf_log', 'inv_gamma_cdf', - 'inv_gamma_cdf_log', - 'inv_gamma_log', + 'inv_gamma_lccdf', + 'inv_gamma_lcdf', + 'inv_gamma_lpdf', 'inv_gamma_rng', 'inv_logit', - 'inv_phi', + 'inv_Phi', 'inv_sqrt', 'inv_square', - 'inv_wishart_log', + 'inv_wishart_lpdf', 'inv_wishart_rng', 'inverse', 'inverse_spd', 'is_inf', 'is_nan', 'lbeta', + 'lchoose', 'lgamma', - 'lkj_corr_cholesky_log', + 'lkj_corr_cholesky_lpdf', 'lkj_corr_cholesky_rng', - 'lkj_corr_log', + 'lkj_corr_lpdf', 'lkj_corr_rng', 'lmgamma', + 'lmultiply', 'log', 'log10', 'log1m', @@ -223,81 +231,87 @@ FUNCTIONS = ( 'log_rising_factorial', 'log_softmax', 'log_sum_exp', - 'logistic_ccdf_log', 'logistic_cdf', - 'logistic_cdf_log', - 'logistic_log', + 'logistic_lccdf', + 'logistic_lcdf', + 'logistic_lpdf', 'logistic_rng', 'logit', - 'lognormal_ccdf_log', 'lognormal_cdf', - 'lognormal_cdf_log', - 'lognormal_log', + 'lognormal_lccdf', + 'lognormal_lcdf', + 'lognormal_lpdf', 'lognormal_rng', 'machine_precision', + 'matrix_exp', 'max', + 'mdivide_left_spd', 'mdivide_left_tri_low', + 'mdivide_right_spd', 'mdivide_right_tri_low', 'mean', 'min', 'modified_bessel_first_kind', 'modified_bessel_second_kind', - 'multi_gp_cholesky_log', - 'multi_gp_log', - 'multi_normal_cholesky_log', + 'multi_gp_cholesky_lpdf', + 'multi_gp_lpdf', + 'multi_normal_cholesky_lpdf', 'multi_normal_cholesky_rng', - 'multi_normal_log', - 'multi_normal_prec_log', + 'multi_normal_lpdf', + 'multi_normal_prec_lpdf', 'multi_normal_rng', - 'multi_student_t_log', + 'multi_student_t_lpdf', 'multi_student_t_rng', - 'multinomial_log', + 'multinomial_lpmf', 'multinomial_rng', 'multiply_log', 'multiply_lower_tri_self_transpose', - 'neg_binomial_2_ccdf_log', 'neg_binomial_2_cdf', - 'neg_binomial_2_cdf_log', - 'neg_binomial_2_log', - 'neg_binomial_2_log_log', + 'neg_binomial_2_lccdf', + 'neg_binomial_2_lcdf', + 'neg_binomial_2_log_lpmf', 'neg_binomial_2_log_rng', + 'neg_binomial_2_lpmf', 'neg_binomial_2_rng', - 'neg_binomial_ccdf_log', 'neg_binomial_cdf', - 'neg_binomial_cdf_log', - 'neg_binomial_log', + 'neg_binomial_lccdf', + 'neg_binomial_lcdf', + 'neg_binomial_lpmf', 'neg_binomial_rng', 'negative_infinity', - 'normal_ccdf_log', 'normal_cdf', - 'normal_cdf_log', - 'normal_log', + 'normal_lccdf', + 'normal_lcdf', + 'normal_lpdf', 'normal_rng', 'not_a_number', 'num_elements', - 'ordered_logistic_log', + 'ordered_logistic_lpmf', 'ordered_logistic_rng', 'owens_t', - 'pareto_ccdf_log', 'pareto_cdf', - 'pareto_cdf_log', - 'pareto_log', + 'pareto_lccdf', + 'pareto_lcdf', + 'pareto_lpdf', 'pareto_rng', - 'pareto_type_2_ccdf_log', 'pareto_type_2_cdf', - 'pareto_type_2_cdf_log', - 'pareto_type_2_log', + 'pareto_type_2_lccdf', + 'pareto_type_2_lcdf', + 'pareto_type_2_lpdf', 'pareto_type_2_rng', + 'Phi', + 'Phi_approx', 'pi', - 'poisson_ccdf_log', 'poisson_cdf', - 'poisson_cdf_log', - 'poisson_log', - 'poisson_log_log', + 'poisson_lccdf', + 'poisson_lcdf', + 'poisson_log_lpmf', 'poisson_log_rng', + 'poisson_lpmf', 'poisson_rng', 'positive_infinity', 'pow', + 'print', 'prod', 'qr_Q', 'qr_R', @@ -305,11 +319,12 @@ FUNCTIONS = ( 'quad_form_diag', 'quad_form_sym', 'rank', - 'rayleigh_ccdf_log', 'rayleigh_cdf', - 'rayleigh_cdf_log', - 'rayleigh_log', + 'rayleigh_lccdf', + 'rayleigh_lcdf', + 'rayleigh_lpdf', 'rayleigh_rng', + 'reject', 'rep_array', 'rep_matrix', 'rep_row_vector', @@ -320,10 +335,10 @@ FUNCTIONS = ( 'rows', 'rows_dot_product', 'rows_dot_self', - 'scaled_inv_chi_square_ccdf_log', 'scaled_inv_chi_square_cdf', - 'scaled_inv_chi_square_cdf_log', - 'scaled_inv_chi_square_log', + 'scaled_inv_chi_square_lccdf', + 'scaled_inv_chi_square_lcdf', + 'scaled_inv_chi_square_lpdf', 'scaled_inv_chi_square_rng', 'sd', 'segment', @@ -331,10 +346,10 @@ FUNCTIONS = ( 'singular_values', 'sinh', 'size', - 'skew_normal_ccdf_log', 'skew_normal_cdf', - 'skew_normal_cdf_log', - 'skew_normal_log', + 'skew_normal_lccdf', + 'skew_normal_lcdf', + 'skew_normal_lpdf', 'skew_normal_rng', 'softmax', 'sort_asc', @@ -346,10 +361,10 @@ FUNCTIONS = ( 'square', 'squared_distance', 'step', - 'student_t_ccdf_log', 'student_t_cdf', - 'student_t_cdf_log', - 'student_t_log', + 'student_t_lccdf', + 'student_t_lcdf', + 'student_t_lpdf', 'student_t_rng', 'sub_col', 'sub_row', @@ -357,6 +372,7 @@ FUNCTIONS = ( 'tail', 'tan', 'tanh', + 'target', 'tcrossprod', 'tgamma', 'to_array_1d', @@ -369,22 +385,22 @@ FUNCTIONS = ( 'trace_quad_form', 'trigamma', 'trunc', - 'uniform_ccdf_log', 'uniform_cdf', - 'uniform_cdf_log', - 'uniform_log', + 'uniform_lccdf', + 'uniform_lcdf', + 'uniform_lpdf', 'uniform_rng', 'variance', - 'von_mises_log', + 'von_mises_lpdf', 'von_mises_rng', - 'weibull_ccdf_log', 'weibull_cdf', - 'weibull_cdf_log', - 'weibull_log', + 'weibull_lccdf', + 'weibull_lcdf', + 'weibull_lpdf', 'weibull_rng', - 'wiener_log', - 'wishart_log', - 'wishart_rng' + 'wiener_lpdf', + 'wishart_lpdf', + 'wishart_rng', ) DISTRIBUTIONS = ( @@ -438,7 +454,7 @@ DISTRIBUTIONS = ( 'von_mises', 'weibull', 'wiener', - 'wishart' + 'wishart', ) RESERVED = ( @@ -469,19 +485,23 @@ RESERVED = ( 'do', 'double', 'dynamic_cast', + 'else', 'enum', 'explicit', 'export', 'extern', 'false', - 'false', 'float', + 'for', 'friend', 'fvar', 'goto', + 'if', + 'in', 'inline', 'int', 'long', + 'lp__', 'mutable', 'namespace', 'new', @@ -498,9 +518,16 @@ RESERVED = ( 'register', 'reinterpret_cast', 'repeat', + 'return', 'short', 'signed', 'sizeof', + 'STAN_MAJOR', + 'STAN_MATH_MAJOR', + 'STAN_MATH_MINOR', + 'STAN_MATH_PATCH', + 'STAN_MINOR', + 'STAN_PATCH', 'static', 'static_assert', 'static_cast', @@ -512,7 +539,6 @@ RESERVED = ( 'thread_local', 'throw', 'true', - 'true', 'try', 'typedef', 'typeid', @@ -526,7 +552,7 @@ RESERVED = ( 'void', 'volatile', 'wchar_t', + 'while', 'xor', - 'xor_eq' + 'xor_eq', ) - diff --git a/pygments/lexers/modeling.py b/pygments/lexers/modeling.py index b354f1cf..49d98d1b 100644 --- a/pygments/lexers/modeling.py +++ b/pygments/lexers/modeling.py @@ -284,8 +284,8 @@ class StanLexer(RegexLexer): """Pygments Lexer for Stan models. The Stan modeling language is specified in the *Stan Modeling Language - User's Guide and Reference Manual, v2.8.0*, - `pdf `__. + User's Guide and Reference Manual, v2.17.0*, + `pdf `__. .. versionadded:: 1.6 """ @@ -316,19 +316,24 @@ class StanLexer(RegexLexer): 'parameters', r'transformed\s+parameters', 'model', r'generated\s+quantities')), bygroups(Keyword.Namespace, Text, Punctuation)), + # target keyword + (r'target\s*\+=', Keyword), # Reserved Words (r'(%s)\b' % r'|'.join(_stan_builtins.KEYWORDS), Keyword), # Truncation (r'T(?=\s*\[)', Keyword), # Data types (r'(%s)\b' % r'|'.join(_stan_builtins.TYPES), Keyword.Type), + # < should be punctuation, but elsewhere I can't tell if it is in + # a range constraint + (r'(<)\s*(upper|lower)\s*(=)', bygroups(Operator, Keyword, Punctuation)), + (r'(,)\s*(upper)\s*(=)', bygroups(Punctuation, Keyword, Punctuation)), # Punctuation - (r"[;:,\[\]()]", Punctuation), + (r"[;,\[\]()]", Punctuation), # Builtin - (r'(%s)(?=\s*\()' - % r'|'.join(_stan_builtins.FUNCTIONS - + _stan_builtins.DISTRIBUTIONS), - Name.Builtin), + (r'(%s)(?=\s*\()' % '|'.join(_stan_builtins.FUNCTIONS), Name.Builtin), + (r'(~)\s*(%s)(?=\s*\()' % '|'.join(_stan_builtins.DISTRIBUTIONS), + bygroups(Operator, Name.Builtin)), # Special names ending in __, like lp__ (r'[A-Za-z]\w*__\b', Name.Builtin.Pseudo), (r'(%s)\b' % r'|'.join(_stan_builtins.RESERVED), Keyword.Reserved), @@ -337,17 +342,18 @@ class StanLexer(RegexLexer): # Regular variable names (r'[A-Za-z]\w*\b', Name), # Real Literals - (r'-?[0-9]+(\.[0-9]+)?[eE]-?[0-9]+', Number.Float), - (r'-?[0-9]*\.[0-9]*', Number.Float), + (r'[0-9]+(\.[0-9]*)?([eE][+-]?[0-9]+)?', Number.Float), + (r'\.[0-9]+([eE][+-]?[0-9]+)?', Number.Float), # Integer Literals - (r'-?[0-9]+', Number.Integer), + (r'[0-9]+', Number.Integer), # Assignment operators - # SLexer makes these tokens Operators. - (r'<-|~', Operator), + (r'<-|(?:\+|-|\.?/|\.?\*|=)?=|~', Operator), # Infix, prefix and postfix operators (and = ) - (r"\+|-|\.?\*|\.?/|\\|'|\^|==?|!=?|<=?|>=?|\|\||&&", Operator), + (r"\+|-|\.?\*|\.?/|\\|'|\^|!=?|<=?|>=?|\|\||&&|%|\?|:", Operator), # Block delimiters (r'[{}]', Punctuation), + # Distribution | + (r'\|', Punctuation) ] } -- cgit v1.2.1