summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--AUTHORS2
-rw-r--r--pygments/lexers/_mysql_builtins.py1282
-rw-r--r--pygments/lexers/sql.py209
-rw-r--r--tests/examplefiles/mysql.txt132
-rw-r--r--tests/test_mysql.py249
5 files changed, 1817 insertions, 57 deletions
diff --git a/AUTHORS b/AUTHORS
index 9b0ad009..5058c612 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -138,7 +138,7 @@ Other contributors, listed alphabetically, are:
* Stephen McKamey -- Duel/JBST lexer
* Brian McKenna -- F# lexer
* Charles McLaughlin -- Puppet lexer
-* Kurt McKee -- Tera Term macro lexer, PostgreSQL updates
+* Kurt McKee -- Tera Term macro lexer, PostgreSQL updates, MySQL overhaul
* Lukas Meuser -- BBCode formatter, Lua lexer
* Cat Miller -- Pig lexer
* Paul Miller -- LiveScript lexer
diff --git a/pygments/lexers/_mysql_builtins.py b/pygments/lexers/_mysql_builtins.py
new file mode 100644
index 00000000..121054c3
--- /dev/null
+++ b/pygments/lexers/_mysql_builtins.py
@@ -0,0 +1,1282 @@
+# -*- coding: utf-8 -*-
+"""
+ pygments.lexers._mysql_builtins
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ Self-updating data files for the MySQL lexer.
+
+ :copyright: Copyright 2020 by the Pygments team, see AUTHORS.
+ :license: BSD, see LICENSE for details.
+"""
+
+
+MYSQL_CONSTANTS = (
+ 'false',
+ 'null',
+ 'true',
+ 'unknown',
+)
+
+
+# At this time, no easily-parsed, definitive list of data types
+# has been found in the MySQL source code or documentation. (The
+# `sql/sql_yacc.yy` file is definitive but is difficult to parse.)
+# Therefore these types are currently maintained manually.
+#
+# Some words in this list -- like "long", "national", "precision",
+# and "varying" -- appear to only occur in combination with other
+# data type keywords. Therefore they are included as separate words
+# even though they do not naturally occur in syntax separately.
+#
+# This list is also used to strip data types out of the list of
+# MySQL keywords, which is automatically updated later in the file.
+#
+MYSQL_DATATYPES = (
+ # Numeric data types
+ 'bigint',
+ 'bit',
+ 'bool',
+ 'boolean',
+ 'dec',
+ 'decimal',
+ 'double',
+ 'fixed',
+ 'float',
+ 'float4',
+ 'float8',
+ 'int',
+ 'int1',
+ 'int2',
+ 'int3',
+ 'int4',
+ 'int8',
+ 'integer',
+ 'mediumint',
+ 'middleint',
+ 'numeric',
+ 'precision',
+ 'real',
+ 'serial',
+ 'smallint',
+ 'tinyint',
+
+ # Date and time data types
+ 'date',
+ 'datetime',
+ 'time',
+ 'timestamp',
+ 'year',
+
+ # String data types
+ 'binary',
+ 'blob',
+ 'char',
+ 'enum',
+ 'long',
+ 'longblob',
+ 'longtext',
+ 'mediumblob',
+ 'mediumtext',
+ 'national',
+ 'nchar',
+ 'nvarchar',
+ 'set',
+ 'text',
+ 'tinyblob',
+ 'tinytext',
+ 'varbinary',
+ 'varchar',
+ 'varcharacter',
+ 'varying',
+
+ # Spatial data types
+ 'geometry',
+ 'geometrycollection',
+ 'linestring',
+ 'multilinestring',
+ 'multipoint',
+ 'multipolygon',
+ 'point',
+ 'polygon',
+
+ # JSON data types
+ 'json',
+)
+
+# Everything below this line is auto-generated from the MySQL source code.
+# Run this file in Python and it will update itself.
+# -----------------------------------------------------------------------------
+
+MYSQL_FUNCTIONS = (
+ 'abs',
+ 'acos',
+ 'adddate',
+ 'addtime',
+ 'aes_decrypt',
+ 'aes_encrypt',
+ 'any_value',
+ 'asin',
+ 'atan',
+ 'atan2',
+ 'benchmark',
+ 'bin',
+ 'bin_to_uuid',
+ 'bit_and',
+ 'bit_count',
+ 'bit_length',
+ 'bit_or',
+ 'bit_xor',
+ 'can_access_column',
+ 'can_access_database',
+ 'can_access_event',
+ 'can_access_resource_group',
+ 'can_access_routine',
+ 'can_access_table',
+ 'can_access_trigger',
+ 'can_access_view',
+ 'cast',
+ 'ceil',
+ 'ceiling',
+ 'char_length',
+ 'character_length',
+ 'coercibility',
+ 'compress',
+ 'concat',
+ 'concat_ws',
+ 'connection_id',
+ 'conv',
+ 'convert_cpu_id_mask',
+ 'convert_interval_to_user_interval',
+ 'convert_tz',
+ 'cos',
+ 'cot',
+ 'count',
+ 'crc32',
+ 'curdate',
+ 'current_role',
+ 'curtime',
+ 'date_add',
+ 'date_format',
+ 'date_sub',
+ 'datediff',
+ 'dayname',
+ 'dayofmonth',
+ 'dayofweek',
+ 'dayofyear',
+ 'degrees',
+ 'elt',
+ 'exp',
+ 'export_set',
+ 'extract',
+ 'extractvalue',
+ 'field',
+ 'find_in_set',
+ 'floor',
+ 'format_bytes',
+ 'format_pico_time',
+ 'found_rows',
+ 'from_base64',
+ 'from_days',
+ 'from_unixtime',
+ 'get_dd_column_privileges',
+ 'get_dd_create_options',
+ 'get_dd_index_private_data',
+ 'get_dd_index_sub_part_length',
+ 'get_dd_property_key_value',
+ 'get_dd_tablespace_private_data',
+ 'get_lock',
+ 'greatest',
+ 'group_concat',
+ 'gtid_subset',
+ 'gtid_subtract',
+ 'hex',
+ 'icu_version',
+ 'ifnull',
+ 'inet6_aton',
+ 'inet6_ntoa',
+ 'inet_aton',
+ 'inet_ntoa',
+ 'instr',
+ 'internal_auto_increment',
+ 'internal_avg_row_length',
+ 'internal_check_time',
+ 'internal_checksum',
+ 'internal_data_free',
+ 'internal_data_length',
+ 'internal_dd_char_length',
+ 'internal_get_comment_or_error',
+ 'internal_get_dd_column_extra',
+ 'internal_get_enabled_role_json',
+ 'internal_get_hostname',
+ 'internal_get_mandatory_roles_json',
+ 'internal_get_partition_nodegroup',
+ 'internal_get_username',
+ 'internal_get_view_warning_or_error',
+ 'internal_index_column_cardinality',
+ 'internal_index_length',
+ 'internal_is_enabled_role',
+ 'internal_is_mandatory_role',
+ 'internal_keys_disabled',
+ 'internal_max_data_length',
+ 'internal_table_rows',
+ 'internal_tablespace_autoextend_size',
+ 'internal_tablespace_data_free',
+ 'internal_tablespace_extent_size',
+ 'internal_tablespace_extra',
+ 'internal_tablespace_free_extents',
+ 'internal_tablespace_id',
+ 'internal_tablespace_initial_size',
+ 'internal_tablespace_logfile_group_name',
+ 'internal_tablespace_logfile_group_number',
+ 'internal_tablespace_maximum_size',
+ 'internal_tablespace_row_format',
+ 'internal_tablespace_status',
+ 'internal_tablespace_total_extents',
+ 'internal_tablespace_type',
+ 'internal_tablespace_version',
+ 'internal_update_time',
+ 'is_free_lock',
+ 'is_ipv4',
+ 'is_ipv4_compat',
+ 'is_ipv4_mapped',
+ 'is_ipv6',
+ 'is_used_lock',
+ 'is_uuid',
+ 'is_visible_dd_object',
+ 'isnull',
+ 'json_array',
+ 'json_array_append',
+ 'json_array_insert',
+ 'json_arrayagg',
+ 'json_contains',
+ 'json_contains_path',
+ 'json_depth',
+ 'json_extract',
+ 'json_insert',
+ 'json_keys',
+ 'json_length',
+ 'json_merge',
+ 'json_merge_patch',
+ 'json_merge_preserve',
+ 'json_object',
+ 'json_objectagg',
+ 'json_overlaps',
+ 'json_pretty',
+ 'json_quote',
+ 'json_remove',
+ 'json_replace',
+ 'json_schema_valid',
+ 'json_schema_validation_report',
+ 'json_search',
+ 'json_set',
+ 'json_storage_free',
+ 'json_storage_size',
+ 'json_type',
+ 'json_unquote',
+ 'json_valid',
+ 'last_day',
+ 'last_insert_id',
+ 'lcase',
+ 'least',
+ 'length',
+ 'like_range_max',
+ 'like_range_min',
+ 'ln',
+ 'load_file',
+ 'locate',
+ 'log',
+ 'log10',
+ 'log2',
+ 'lower',
+ 'lpad',
+ 'ltrim',
+ 'make_set',
+ 'makedate',
+ 'maketime',
+ 'master_pos_wait',
+ 'max',
+ 'mbrcontains',
+ 'mbrcoveredby',
+ 'mbrcovers',
+ 'mbrdisjoint',
+ 'mbrequals',
+ 'mbrintersects',
+ 'mbroverlaps',
+ 'mbrtouches',
+ 'mbrwithin',
+ 'md5',
+ 'mid',
+ 'min',
+ 'monthname',
+ 'name_const',
+ 'now',
+ 'nullif',
+ 'oct',
+ 'octet_length',
+ 'ord',
+ 'period_add',
+ 'period_diff',
+ 'pi',
+ 'position',
+ 'pow',
+ 'power',
+ 'ps_current_thread_id',
+ 'ps_thread_id',
+ 'quote',
+ 'radians',
+ 'rand',
+ 'random_bytes',
+ 'regexp_instr',
+ 'regexp_like',
+ 'regexp_replace',
+ 'regexp_substr',
+ 'release_all_locks',
+ 'release_lock',
+ 'remove_dd_property_key',
+ 'reverse',
+ 'roles_graphml',
+ 'round',
+ 'rpad',
+ 'rtrim',
+ 'sec_to_time',
+ 'session_user',
+ 'sha',
+ 'sha1',
+ 'sha2',
+ 'sign',
+ 'sin',
+ 'sleep',
+ 'soundex',
+ 'space',
+ 'sqrt',
+ 'st_area',
+ 'st_asbinary',
+ 'st_asgeojson',
+ 'st_astext',
+ 'st_aswkb',
+ 'st_aswkt',
+ 'st_buffer',
+ 'st_buffer_strategy',
+ 'st_centroid',
+ 'st_contains',
+ 'st_convexhull',
+ 'st_crosses',
+ 'st_difference',
+ 'st_dimension',
+ 'st_disjoint',
+ 'st_distance',
+ 'st_distance_sphere',
+ 'st_endpoint',
+ 'st_envelope',
+ 'st_equals',
+ 'st_exteriorring',
+ 'st_geohash',
+ 'st_geomcollfromtext',
+ 'st_geomcollfromtxt',
+ 'st_geomcollfromwkb',
+ 'st_geometrycollectionfromtext',
+ 'st_geometrycollectionfromwkb',
+ 'st_geometryfromtext',
+ 'st_geometryfromwkb',
+ 'st_geometryn',
+ 'st_geometrytype',
+ 'st_geomfromgeojson',
+ 'st_geomfromtext',
+ 'st_geomfromwkb',
+ 'st_interiorringn',
+ 'st_intersection',
+ 'st_intersects',
+ 'st_isclosed',
+ 'st_isempty',
+ 'st_issimple',
+ 'st_isvalid',
+ 'st_latfromgeohash',
+ 'st_latitude',
+ 'st_length',
+ 'st_linefromtext',
+ 'st_linefromwkb',
+ 'st_linestringfromtext',
+ 'st_linestringfromwkb',
+ 'st_longfromgeohash',
+ 'st_longitude',
+ 'st_makeenvelope',
+ 'st_mlinefromtext',
+ 'st_mlinefromwkb',
+ 'st_mpointfromtext',
+ 'st_mpointfromwkb',
+ 'st_mpolyfromtext',
+ 'st_mpolyfromwkb',
+ 'st_multilinestringfromtext',
+ 'st_multilinestringfromwkb',
+ 'st_multipointfromtext',
+ 'st_multipointfromwkb',
+ 'st_multipolygonfromtext',
+ 'st_multipolygonfromwkb',
+ 'st_numgeometries',
+ 'st_numinteriorring',
+ 'st_numinteriorrings',
+ 'st_numpoints',
+ 'st_overlaps',
+ 'st_pointfromgeohash',
+ 'st_pointfromtext',
+ 'st_pointfromwkb',
+ 'st_pointn',
+ 'st_polyfromtext',
+ 'st_polyfromwkb',
+ 'st_polygonfromtext',
+ 'st_polygonfromwkb',
+ 'st_simplify',
+ 'st_srid',
+ 'st_startpoint',
+ 'st_swapxy',
+ 'st_symdifference',
+ 'st_touches',
+ 'st_transform',
+ 'st_union',
+ 'st_validate',
+ 'st_within',
+ 'st_x',
+ 'st_y',
+ 'statement_digest',
+ 'statement_digest_text',
+ 'std',
+ 'stddev',
+ 'stddev_pop',
+ 'stddev_samp',
+ 'str_to_date',
+ 'strcmp',
+ 'subdate',
+ 'substr',
+ 'substring',
+ 'substring_index',
+ 'subtime',
+ 'sum',
+ 'sysdate',
+ 'system_user',
+ 'tan',
+ 'time_format',
+ 'time_to_sec',
+ 'timediff',
+ 'to_base64',
+ 'to_days',
+ 'to_seconds',
+ 'trim',
+ 'ucase',
+ 'uncompress',
+ 'uncompressed_length',
+ 'unhex',
+ 'unix_timestamp',
+ 'updatexml',
+ 'upper',
+ 'uuid',
+ 'uuid_short',
+ 'uuid_to_bin',
+ 'validate_password_strength',
+ 'var_pop',
+ 'var_samp',
+ 'variance',
+ 'version',
+ 'wait_for_executed_gtid_set',
+ 'wait_until_sql_thread_after_gtids',
+ 'weekday',
+ 'weekofyear',
+ 'yearweek',
+)
+
+
+MYSQL_OPTIMIZER_HINTS = (
+ 'bka',
+ 'bnl',
+ 'dupsweedout',
+ 'firstmatch',
+ 'group_index',
+ 'hash_join',
+ 'index',
+ 'index_merge',
+ 'intoexists',
+ 'join_fixed_order',
+ 'join_index',
+ 'join_order',
+ 'join_prefix',
+ 'join_suffix',
+ 'loosescan',
+ 'materialization',
+ 'max_execution_time',
+ 'merge',
+ 'mrr',
+ 'no_bka',
+ 'no_bnl',
+ 'no_group_index',
+ 'no_hash_join',
+ 'no_icp',
+ 'no_index',
+ 'no_index_merge',
+ 'no_join_index',
+ 'no_merge',
+ 'no_mrr',
+ 'no_order_index',
+ 'no_range_optimization',
+ 'no_semijoin',
+ 'no_skip_scan',
+ 'order_index',
+ 'qb_name',
+ 'resource_group',
+ 'semijoin',
+ 'set_var',
+ 'skip_scan',
+ 'subquery',
+)
+
+
+MYSQL_KEYWORDS = (
+ 'accessible',
+ 'account',
+ 'action',
+ 'active',
+ 'add',
+ 'admin',
+ 'after',
+ 'against',
+ 'aggregate',
+ 'algorithm',
+ 'all',
+ 'alter',
+ 'always',
+ 'analyze',
+ 'and',
+ 'any',
+ 'array',
+ 'as',
+ 'asc',
+ 'ascii',
+ 'asensitive',
+ 'at',
+ 'attribute',
+ 'auto_increment',
+ 'autoextend_size',
+ 'avg',
+ 'avg_row_length',
+ 'backup',
+ 'before',
+ 'begin',
+ 'between',
+ 'binlog',
+ 'block',
+ 'both',
+ 'btree',
+ 'buckets',
+ 'by',
+ 'byte',
+ 'cache',
+ 'call',
+ 'cascade',
+ 'cascaded',
+ 'case',
+ 'catalog_name',
+ 'chain',
+ 'change',
+ 'changed',
+ 'channel',
+ 'character',
+ 'charset',
+ 'check',
+ 'checksum',
+ 'cipher',
+ 'class_origin',
+ 'client',
+ 'clone',
+ 'close',
+ 'coalesce',
+ 'code',
+ 'collate',
+ 'collation',
+ 'column',
+ 'column_format',
+ 'column_name',
+ 'columns',
+ 'comment',
+ 'commit',
+ 'committed',
+ 'compact',
+ 'completion',
+ 'component',
+ 'compressed',
+ 'compression',
+ 'concurrent',
+ 'condition',
+ 'connection',
+ 'consistent',
+ 'constraint',
+ 'constraint_catalog',
+ 'constraint_name',
+ 'constraint_schema',
+ 'contains',
+ 'context',
+ 'continue',
+ 'convert',
+ 'cpu',
+ 'create',
+ 'cross',
+ 'cube',
+ 'cume_dist',
+ 'current',
+ 'current_date',
+ 'current_time',
+ 'current_timestamp',
+ 'current_user',
+ 'cursor',
+ 'cursor_name',
+ 'data',
+ 'database',
+ 'databases',
+ 'datafile',
+ 'day',
+ 'day_hour',
+ 'day_microsecond',
+ 'day_minute',
+ 'day_second',
+ 'deallocate',
+ 'declare',
+ 'default',
+ 'default_auth',
+ 'definer',
+ 'definition',
+ 'delay_key_write',
+ 'delayed',
+ 'delete',
+ 'dense_rank',
+ 'desc',
+ 'describe',
+ 'description',
+ 'deterministic',
+ 'diagnostics',
+ 'directory',
+ 'disable',
+ 'discard',
+ 'disk',
+ 'distinct',
+ 'distinctrow',
+ 'div',
+ 'do',
+ 'drop',
+ 'dual',
+ 'dumpfile',
+ 'duplicate',
+ 'dynamic',
+ 'each',
+ 'else',
+ 'elseif',
+ 'empty',
+ 'enable',
+ 'enclosed',
+ 'encryption',
+ 'end',
+ 'ends',
+ 'enforced',
+ 'engine',
+ 'engine_attribute',
+ 'engines',
+ 'error',
+ 'errors',
+ 'escape',
+ 'escaped',
+ 'event',
+ 'events',
+ 'every',
+ 'except',
+ 'exchange',
+ 'exclude',
+ 'execute',
+ 'exists',
+ 'exit',
+ 'expansion',
+ 'expire',
+ 'explain',
+ 'export',
+ 'extended',
+ 'extent_size',
+ 'failed_login_attempts',
+ 'false',
+ 'fast',
+ 'faults',
+ 'fetch',
+ 'fields',
+ 'file',
+ 'file_block_size',
+ 'filter',
+ 'first',
+ 'first_value',
+ 'flush',
+ 'following',
+ 'follows',
+ 'for',
+ 'force',
+ 'foreign',
+ 'format',
+ 'found',
+ 'from',
+ 'full',
+ 'fulltext',
+ 'function',
+ 'general',
+ 'generated',
+ 'geomcollection',
+ 'get',
+ 'get_format',
+ 'get_master_public_key',
+ 'global',
+ 'grant',
+ 'grants',
+ 'group',
+ 'group_replication',
+ 'grouping',
+ 'groups',
+ 'handler',
+ 'hash',
+ 'having',
+ 'help',
+ 'high_priority',
+ 'histogram',
+ 'history',
+ 'host',
+ 'hosts',
+ 'hour',
+ 'hour_microsecond',
+ 'hour_minute',
+ 'hour_second',
+ 'identified',
+ 'if',
+ 'ignore',
+ 'ignore_server_ids',
+ 'import',
+ 'in',
+ 'inactive',
+ 'index',
+ 'indexes',
+ 'infile',
+ 'initial_size',
+ 'inner',
+ 'inout',
+ 'insensitive',
+ 'insert',
+ 'insert_method',
+ 'install',
+ 'instance',
+ 'interval',
+ 'into',
+ 'invisible',
+ 'invoker',
+ 'io',
+ 'io_after_gtids',
+ 'io_before_gtids',
+ 'io_thread',
+ 'ipc',
+ 'is',
+ 'isolation',
+ 'issuer',
+ 'iterate',
+ 'join',
+ 'json_table',
+ 'json_value',
+ 'key',
+ 'key_block_size',
+ 'keys',
+ 'kill',
+ 'lag',
+ 'language',
+ 'last',
+ 'last_value',
+ 'lateral',
+ 'lead',
+ 'leading',
+ 'leave',
+ 'leaves',
+ 'left',
+ 'less',
+ 'level',
+ 'like',
+ 'limit',
+ 'linear',
+ 'lines',
+ 'list',
+ 'load',
+ 'local',
+ 'localtime',
+ 'localtimestamp',
+ 'lock',
+ 'locked',
+ 'locks',
+ 'logfile',
+ 'logs',
+ 'loop',
+ 'low_priority',
+ 'master',
+ 'master_auto_position',
+ 'master_bind',
+ 'master_compression_algorithms',
+ 'master_connect_retry',
+ 'master_delay',
+ 'master_heartbeat_period',
+ 'master_host',
+ 'master_log_file',
+ 'master_log_pos',
+ 'master_password',
+ 'master_port',
+ 'master_public_key_path',
+ 'master_retry_count',
+ 'master_server_id',
+ 'master_ssl',
+ 'master_ssl_ca',
+ 'master_ssl_capath',
+ 'master_ssl_cert',
+ 'master_ssl_cipher',
+ 'master_ssl_crl',
+ 'master_ssl_crlpath',
+ 'master_ssl_key',
+ 'master_ssl_verify_server_cert',
+ 'master_tls_ciphersuites',
+ 'master_tls_version',
+ 'master_user',
+ 'master_zstd_compression_level',
+ 'match',
+ 'max_connections_per_hour',
+ 'max_queries_per_hour',
+ 'max_rows',
+ 'max_size',
+ 'max_updates_per_hour',
+ 'max_user_connections',
+ 'maxvalue',
+ 'medium',
+ 'member',
+ 'memory',
+ 'merge',
+ 'message_text',
+ 'microsecond',
+ 'migrate',
+ 'min_rows',
+ 'minute',
+ 'minute_microsecond',
+ 'minute_second',
+ 'mod',
+ 'mode',
+ 'modifies',
+ 'modify',
+ 'month',
+ 'mutex',
+ 'mysql_errno',
+ 'name',
+ 'names',
+ 'natural',
+ 'ndb',
+ 'ndbcluster',
+ 'nested',
+ 'network_namespace',
+ 'never',
+ 'new',
+ 'next',
+ 'no',
+ 'no_wait',
+ 'no_write_to_binlog',
+ 'nodegroup',
+ 'none',
+ 'not',
+ 'nowait',
+ 'nth_value',
+ 'ntile',
+ 'null',
+ 'nulls',
+ 'number',
+ 'of',
+ 'off',
+ 'offset',
+ 'oj',
+ 'old',
+ 'on',
+ 'one',
+ 'only',
+ 'open',
+ 'optimize',
+ 'optimizer_costs',
+ 'option',
+ 'optional',
+ 'optionally',
+ 'options',
+ 'or',
+ 'order',
+ 'ordinality',
+ 'organization',
+ 'others',
+ 'out',
+ 'outer',
+ 'outfile',
+ 'over',
+ 'owner',
+ 'pack_keys',
+ 'page',
+ 'parser',
+ 'partial',
+ 'partition',
+ 'partitioning',
+ 'partitions',
+ 'password',
+ 'password_lock_time',
+ 'path',
+ 'percent_rank',
+ 'persist',
+ 'persist_only',
+ 'phase',
+ 'plugin',
+ 'plugin_dir',
+ 'plugins',
+ 'port',
+ 'precedes',
+ 'preceding',
+ 'prepare',
+ 'preserve',
+ 'prev',
+ 'primary',
+ 'privilege_checks_user',
+ 'privileges',
+ 'procedure',
+ 'process',
+ 'processlist',
+ 'profile',
+ 'profiles',
+ 'proxy',
+ 'purge',
+ 'quarter',
+ 'query',
+ 'quick',
+ 'random',
+ 'range',
+ 'rank',
+ 'read',
+ 'read_only',
+ 'read_write',
+ 'reads',
+ 'rebuild',
+ 'recover',
+ 'recursive',
+ 'redo_buffer_size',
+ 'redundant',
+ 'reference',
+ 'references',
+ 'regexp',
+ 'relay',
+ 'relay_log_file',
+ 'relay_log_pos',
+ 'relay_thread',
+ 'relaylog',
+ 'release',
+ 'reload',
+ 'remove',
+ 'rename',
+ 'reorganize',
+ 'repair',
+ 'repeat',
+ 'repeatable',
+ 'replace',
+ 'replicate_do_db',
+ 'replicate_do_table',
+ 'replicate_ignore_db',
+ 'replicate_ignore_table',
+ 'replicate_rewrite_db',
+ 'replicate_wild_do_table',
+ 'replicate_wild_ignore_table',
+ 'replication',
+ 'require',
+ 'require_row_format',
+ 'require_table_primary_key_check',
+ 'reset',
+ 'resignal',
+ 'resource',
+ 'respect',
+ 'restart',
+ 'restore',
+ 'restrict',
+ 'resume',
+ 'retain',
+ 'return',
+ 'returned_sqlstate',
+ 'returning',
+ 'returns',
+ 'reuse',
+ 'reverse',
+ 'revoke',
+ 'right',
+ 'rlike',
+ 'role',
+ 'rollback',
+ 'rollup',
+ 'rotate',
+ 'routine',
+ 'row',
+ 'row_count',
+ 'row_format',
+ 'row_number',
+ 'rows',
+ 'rtree',
+ 'savepoint',
+ 'schedule',
+ 'schema',
+ 'schema_name',
+ 'schemas',
+ 'second',
+ 'second_microsecond',
+ 'secondary',
+ 'secondary_engine',
+ 'secondary_engine_attribute',
+ 'secondary_load',
+ 'secondary_unload',
+ 'security',
+ 'select',
+ 'sensitive',
+ 'separator',
+ 'serializable',
+ 'server',
+ 'session',
+ 'share',
+ 'show',
+ 'shutdown',
+ 'signal',
+ 'signed',
+ 'simple',
+ 'skip',
+ 'slave',
+ 'slow',
+ 'snapshot',
+ 'socket',
+ 'some',
+ 'soname',
+ 'sounds',
+ 'source',
+ 'spatial',
+ 'specific',
+ 'sql',
+ 'sql_after_gtids',
+ 'sql_after_mts_gaps',
+ 'sql_before_gtids',
+ 'sql_big_result',
+ 'sql_buffer_result',
+ 'sql_calc_found_rows',
+ 'sql_no_cache',
+ 'sql_small_result',
+ 'sql_thread',
+ 'sql_tsi_day',
+ 'sql_tsi_hour',
+ 'sql_tsi_minute',
+ 'sql_tsi_month',
+ 'sql_tsi_quarter',
+ 'sql_tsi_second',
+ 'sql_tsi_week',
+ 'sql_tsi_year',
+ 'sqlexception',
+ 'sqlstate',
+ 'sqlwarning',
+ 'srid',
+ 'ssl',
+ 'stacked',
+ 'start',
+ 'starting',
+ 'starts',
+ 'stats_auto_recalc',
+ 'stats_persistent',
+ 'stats_sample_pages',
+ 'status',
+ 'stop',
+ 'storage',
+ 'stored',
+ 'straight_join',
+ 'stream',
+ 'string',
+ 'subclass_origin',
+ 'subject',
+ 'subpartition',
+ 'subpartitions',
+ 'super',
+ 'suspend',
+ 'swaps',
+ 'switches',
+ 'system',
+ 'table',
+ 'table_checksum',
+ 'table_name',
+ 'tables',
+ 'tablespace',
+ 'temporary',
+ 'temptable',
+ 'terminated',
+ 'than',
+ 'then',
+ 'thread_priority',
+ 'ties',
+ 'timestampadd',
+ 'timestampdiff',
+ 'tls',
+ 'to',
+ 'trailing',
+ 'transaction',
+ 'trigger',
+ 'triggers',
+ 'true',
+ 'truncate',
+ 'type',
+ 'types',
+ 'unbounded',
+ 'uncommitted',
+ 'undefined',
+ 'undo',
+ 'undo_buffer_size',
+ 'undofile',
+ 'unicode',
+ 'uninstall',
+ 'union',
+ 'unique',
+ 'unknown',
+ 'unlock',
+ 'unsigned',
+ 'until',
+ 'update',
+ 'upgrade',
+ 'usage',
+ 'use',
+ 'use_frm',
+ 'user',
+ 'user_resources',
+ 'using',
+ 'utc_date',
+ 'utc_time',
+ 'utc_timestamp',
+ 'validation',
+ 'value',
+ 'values',
+ 'variables',
+ 'vcpu',
+ 'view',
+ 'virtual',
+ 'visible',
+ 'wait',
+ 'warnings',
+ 'week',
+ 'weight_string',
+ 'when',
+ 'where',
+ 'while',
+ 'window',
+ 'with',
+ 'without',
+ 'work',
+ 'wrapper',
+ 'write',
+ 'x509',
+ 'xa',
+ 'xid',
+ 'xml',
+ 'xor',
+ 'year_month',
+ 'zerofill',
+)
+
+
+if __name__ == '__main__': # pragma: no cover
+ import re
+ from urllib.request import urlopen
+
+ from pygments.util import format_lines
+
+ # MySQL source code
+ SOURCE_URL = 'https://github.com/mysql/mysql-server/raw/8.0'
+ LEX_URL = SOURCE_URL + '/sql/lex.h'
+ ITEM_CREATE_URL = SOURCE_URL + '/sql/item_create.cc'
+
+
+ def update_myself():
+ # Pull content from lex.h.
+ lex_file = urlopen(LEX_URL).read().decode('utf8', errors='ignore')
+ keywords = parse_lex_keywords(lex_file)
+ functions = parse_lex_functions(lex_file)
+ optimizer_hints = parse_lex_optimizer_hints(lex_file)
+
+ # Parse content in item_create.cc.
+ item_create_file = urlopen(ITEM_CREATE_URL).read().decode('utf8', errors='ignore')
+ functions.update(parse_item_create_functions(item_create_file))
+
+ # Remove data types from the set of keywords.
+ keywords -= set(MYSQL_DATATYPES)
+
+ update_content('MYSQL_FUNCTIONS', tuple(sorted(functions)))
+ update_content('MYSQL_KEYWORDS', tuple(sorted(keywords)))
+ update_content('MYSQL_OPTIMIZER_HINTS', tuple(sorted(optimizer_hints)))
+
+
+ def parse_lex_keywords(f):
+ """Parse keywords in lex.h."""
+
+ results = set()
+ for m in re.finditer(r'{SYM(?:_HK)?\("(?P<keyword>[a-z0-9_]+)",', f, flags=re.I):
+ results.add(m.group('keyword').lower())
+
+ if not results:
+ raise ValueError('No keywords found')
+
+ return results
+
+
+ def parse_lex_optimizer_hints(f):
+ """Parse optimizer hints in lex.h."""
+
+ results = set()
+ for m in re.finditer(r'{SYM_H\("(?P<keyword>[a-z0-9_]+)",', f, flags=re.I):
+ results.add(m.group('keyword').lower())
+
+ if not results:
+ raise ValueError('No optimizer hints found')
+
+ return results
+
+
+ def parse_lex_functions(f):
+ """Parse MySQL function names from lex.h."""
+
+ results = set()
+ for m in re.finditer(r'{SYM_FN?\("(?P<function>[a-z0-9_]+)",', f, flags=re.I):
+ results.add(m.group('function').lower())
+
+ if not results:
+ raise ValueError('No lex functions found')
+
+ return results
+
+
+ def parse_item_create_functions(f):
+ """Parse MySQL function names from item_create.cc."""
+
+ results = set()
+ for m in re.finditer(r'{"(?P<function>[^"]+?)",\s*SQL_F[^(]+?\(', f, flags=re.I):
+ results.add(m.group('function').lower())
+
+ if not results:
+ raise ValueError('No item_create functions found')
+
+ return results
+
+
+ def update_content(field_name, content):
+ """Overwrite this file with content parsed from MySQL's source code."""
+
+ with open(__file__) as f:
+ data = f.read()
+
+ # Line to start/end inserting
+ re_match = re.compile(r'^%s\s*=\s*\($.*?^\s*\)$' % field_name, re.M | re.S)
+ m = re_match.search(data)
+ if not m:
+ raise ValueError('Could not find an existing definition for %s' % field_name)
+
+ new_block = format_lines(field_name, content)
+ data = data[:m.start()] + new_block + data[m.end():]
+
+ with open(__file__, 'w', newline='\n') as f:
+ f.write(data)
+
+ update_myself()
diff --git a/pygments/lexers/sql.py b/pygments/lexers/sql.py
index d58abe2a..7cce4d40 100644
--- a/pygments/lexers/sql.py
+++ b/pygments/lexers/sql.py
@@ -42,11 +42,18 @@ import re
from pygments.lexer import Lexer, RegexLexer, do_insertions, bygroups, words
from pygments.token import Punctuation, Whitespace, Text, Comment, Operator, \
- Keyword, Name, String, Number, Generic
+ Keyword, Name, String, Number, Generic, Literal
from pygments.lexers import get_lexer_by_name, ClassNotFound
from pygments.lexers._postgres_builtins import KEYWORDS, DATATYPES, \
PSEUDO_TYPES, PLPGSQL_KEYWORDS
+from pygments.lexers._mysql_builtins import \
+ MYSQL_CONSTANTS, \
+ MYSQL_DATATYPES, \
+ MYSQL_FUNCTIONS, \
+ MYSQL_KEYWORDS, \
+ MYSQL_OPTIMIZER_HINTS
+
from pygments.lexers import _tsql_builtins
@@ -579,8 +586,12 @@ class TransactSqlLexer(RegexLexer):
class MySqlLexer(RegexLexer):
- """
- Special lexer for MySQL.
+ """The Oracle MySQL lexer.
+
+ This lexer does not attempt to maintain strict compatibility with
+ MariaDB syntax or keywords. Although MySQL and MariaDB's common code
+ history suggests there may be significant overlap between the two,
+ compatibility between the two is not a target for this lexer.
"""
name = 'MySQL'
@@ -591,63 +602,149 @@ class MySqlLexer(RegexLexer):
tokens = {
'root': [
(r'\s+', Text),
- (r'(#|--\s+).*\n?', Comment.Single),
- (r'/\*', Comment.Multiline, 'multiline-comments'),
+
+ # Comments
+ (r'(?:#|--\s+).*', Comment.Single),
+ (r'/\*\+', Comment.Special, 'optimizer-hints'),
+ (r'/\*', Comment.Multiline, 'multiline-comment'),
+
+ # Hexadecimal literals
+ (r"x'([0-9a-f]{2})+'", Number.Hex), # MySQL requires paired hex characters in this form.
+ (r'0x[0-9a-f]+', Number.Hex),
+
+ # Binary literals
+ (r"b'[01]+'", Number.Bin),
+ (r'0b[01]+', Number.Bin),
+
+ # Numeric literals
+ (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float), # Mandatory integer, optional fraction and exponent
+ (r'[0-9]*\.[0-9]+(e[+-]?[0-9]+)?', Number.Float), # Mandatory fraction, optional integer and exponent
+ (r'[0-9]+e[+-]?[0-9]+', Number.Float), # Exponents with integer significands are still floats
(r'[0-9]+', Number.Integer),
- (r'[0-9]*\.[0-9]+(e[+-][0-9]+)', Number.Float),
- (r"'(\\\\|\\'|''|[^'])*'", String.Single),
- (r'"(\\\\|\\"|""|[^"])*"', String.Double),
- (r"`(\\\\|\\`|``|[^`])*`", String.Symbol),
- (r'[+*/<>=~!@#%^&|`?-]', Operator),
- (r'\b(tinyint|smallint|mediumint|int|integer|bigint|date|'
- r'datetime|time|bit|bool|tinytext|mediumtext|longtext|text|'
- r'tinyblob|mediumblob|longblob|blob|float|double|double\s+'
- r'precision|real|numeric|dec|decimal|timestamp|year|char|'
- r'varchar|varbinary|varcharacter|enum|set)(\b\s*)(\()?',
- bygroups(Keyword.Type, Text, Punctuation)),
- (r'\b(add|all|alter|analyze|and|as|asc|asensitive|before|between|'
- r'bigint|binary|blob|both|by|call|cascade|case|change|char|'
- r'character|check|collate|column|condition|constraint|continue|'
- r'convert|create|cross|current_date|current_time|'
- r'current_timestamp|current_user|cursor|database|databases|'
- r'day_hour|day_microsecond|day_minute|day_second|dec|decimal|'
- r'declare|default|delayed|delete|desc|describe|deterministic|'
- r'distinct|distinctrow|div|double|drop|dual|each|else|elseif|'
- r'enclosed|escaped|exists|exit|explain|fetch|flush|float|float4|'
- r'float8|for|force|foreign|from|fulltext|grant|group|having|'
- r'high_priority|hour_microsecond|hour_minute|hour_second|if|'
- r'ignore|in|index|infile|inner|inout|insensitive|insert|int|'
- r'int1|int2|int3|int4|int8|integer|interval|into|is|iterate|'
- r'join|key|keys|kill|leading|leave|left|like|limit|lines|load|'
- r'localtime|localtimestamp|lock|long|loop|low_priority|match|'
- r'minute_microsecond|minute_second|mod|modifies|natural|'
- r'no_write_to_binlog|not|numeric|on|optimize|option|optionally|'
- r'or|order|out|outer|outfile|precision|primary|procedure|purge|'
- r'raid0|read|reads|real|references|regexp|release|rename|repeat|'
- r'replace|require|restrict|return|revoke|right|rlike|schema|'
- r'schemas|second_microsecond|select|sensitive|separator|set|'
- r'show|smallint|soname|spatial|specific|sql|sql_big_result|'
- r'sql_calc_found_rows|sql_small_result|sqlexception|sqlstate|'
- r'sqlwarning|ssl|starting|straight_join|table|terminated|then|'
- r'to|trailing|trigger|undo|union|unique|unlock|unsigned|update|'
- r'usage|use|using|utc_date|utc_time|utc_timestamp|values|'
- r'varying|when|where|while|with|write|x509|xor|year_month|'
- r'zerofill)\b', Keyword),
- # TODO: this list is not complete
- (r'\b(auto_increment|engine|charset|tables)\b', Keyword.Pseudo),
- (r'(true|false|null)', Name.Constant),
- (r'([a-z_]\w*)(\s*)(\()',
+
+ # Date literals
+ (r"\{\s*d\s*(?P<quote>['\"])\s*\d{2}(\d{2})?.?\d{2}.?\d{2}\s*(?P=quote)\s*\}", Literal.Date),
+
+ # Time literals
+ (r"\{\s*t\s*(?P<quote>['\"])\s*(?:\d+\s+)?\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?\s*(?P=quote)\s*\}", Literal.Date),
+
+ # Timestamp literals
+ (
+ r"\{\s*ts\s*(?P<quote>['\"])\s*"
+ r"\d{2}(?:\d{2})?.?\d{2}.?\d{2}" # Date part
+ r"\s+" # Whitespace between date and time
+ r"\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?" # Time part
+ r"\s*(?P=quote)\s*\}",
+ Literal.Date
+ ),
+
+ # String literals
+ (r"'", String.Single, 'single-quoted-string'),
+ (r'"', String.Double, 'double-quoted-string'),
+
+ # Variables
+ (r'@@(?:global\.|persist\.|persist_only\.|session\.)?[a-z_]+', Name.Variable),
+ (r'@[a-z0-9_$.]+', Name.Variable),
+ (r"@'", Name.Variable, 'single-quoted-variable'),
+ (r'@"', Name.Variable, 'double-quoted-variable'),
+ (r"@`", Name.Variable, 'backtick-quoted-variable'),
+ (r'\?', Name.Variable), # For demonstrating prepared statements
+
+ # Operators
+ (r'[!%&*+/:<=>^|~-]+', Operator),
+
+ # Exceptions; these words tokenize differently in different contexts.
+ (r'\b(set)(?!\s*\()', Keyword),
+ (r'\b(character)(\s+)(set)\b', bygroups(Keyword, Text, Keyword)),
+ # In all other known cases, "SET" is tokenized by MYSQL_DATATYPES.
+
+ (words(MYSQL_CONSTANTS, prefix=r'\b', suffix=r'\b'), Name.Constant),
+ (words(MYSQL_DATATYPES, prefix=r'\b', suffix=r'\b'), Keyword.Type),
+ (words(MYSQL_KEYWORDS, prefix=r'\b', suffix=r'\b'), Keyword),
+ (words(MYSQL_FUNCTIONS, prefix=r'\b', suffix=r'\b(\s*)(\()'),
bygroups(Name.Function, Text, Punctuation)),
- (r'[a-z_]\w*', Name),
- (r'@[a-z0-9]*[._]*[a-z0-9]*', Name.Variable),
- (r'[;:()\[\],.]', Punctuation)
+
+ # Schema object names
+ #
+ # Note: Although the first regex supports unquoted all-numeric
+ # identifiers, this will not be a problem in practice because
+ # numeric literals have already been handled above.
+ #
+ ('[0-9a-z$_\u0080-\uffff]+', Name),
+ (r'`', Name, 'schema-object-name'),
+
+ # Punctuation
+ (r'[(),.;]', Punctuation),
],
- 'multiline-comments': [
- (r'/\*', Comment.Multiline, 'multiline-comments'),
+
+ # Multiline comment substates
+ # ---------------------------
+
+ 'optimizer-hints': [
+ (r'[^*a-z]+', Comment.Special),
+ (r'\*/', Comment.Special, '#pop'),
+ (words(MYSQL_OPTIMIZER_HINTS, suffix=r'\b'), Comment.Preproc),
+ ('[a-z]+', Comment.Special),
+ (r'\*', Comment.Special),
+ ],
+
+ 'multiline-comment': [
+ (r'[^*]+', Comment.Multiline),
(r'\*/', Comment.Multiline, '#pop'),
- (r'[^/*]+', Comment.Multiline),
- (r'[/*]', Comment.Multiline)
- ]
+ (r'\*', Comment.Multiline),
+ ],
+
+ # String substates
+ # ----------------
+
+ 'single-quoted-string': [
+ (r"[^'\\]+", String.Single),
+ (r"''", String.Escape),
+ (r"""\\[0'"bnrtZ\\%_]""", String.Escape),
+ (r"'", String.Single, '#pop'),
+ ],
+
+ 'double-quoted-string': [
+ (r'[^"\\]+', String.Double),
+ (r'""', String.Escape),
+ (r"""\\[0'"bnrtZ\\%_]""", String.Escape),
+ (r'"', String.Double, '#pop'),
+ ],
+
+ # Variable substates
+ # ------------------
+
+ 'single-quoted-variable': [
+ (r"[^']+", Name.Variable),
+ (r"''", Name.Variable),
+ (r"'", Name.Variable, '#pop'),
+ ],
+
+ 'double-quoted-variable': [
+ (r'[^"]+', Name.Variable),
+ (r'""', Name.Variable),
+ (r'"', Name.Variable, '#pop'),
+ ],
+
+ 'backtick-quoted-variable': [
+ (r'[^`]+', Name.Variable),
+ (r'``', Name.Variable),
+ (r'`', Name.Variable, '#pop'),
+ ],
+
+ # Schema object name substates
+ # ----------------------------
+ #
+ # Backtick-quoted schema object names support escape characters.
+ # It may be desirable to tokenize escape sequences differently,
+ # but currently Pygments does not have an obvious token type for
+ # this unique situation (for example, "Name.Escape").
+ #
+ 'schema-object-name': [
+ (r'[^`\\]+', Name),
+ (r'(?:\\\\|\\`|``)', Name), # This could be an escaped name token type.
+ (r'`', Name, '#pop'),
+ ],
}
def analyse_text(text):
diff --git a/tests/examplefiles/mysql.txt b/tests/examplefiles/mysql.txt
new file mode 100644
index 00000000..4927abd8
--- /dev/null
+++ b/tests/examplefiles/mysql.txt
@@ -0,0 +1,132 @@
+-- Samples of MySQL parsing
+
+
+-- Comments
+# standalone comment line
+-- standalone comment line
+SELECT 1; -- trailing comment
+SELECT 1; # trailing comment
+SELECT 1; /* trailing comment */
+SELECT /* interruption */ /**/ 1;
+ /*
+ Multiline / * / comment
+ */
+ /* /* MySQL does not support nested comments */
+SELECT 'If this line is a comment then nested commenting is enabled (and therefore broken).';
+
+
+-- Optimizer hints
+SELECT /*+ SEMIJOIN(FIRSTMATCH, LOOSESCAN) */ 1;
+SELECT /*+ SET_VAR(foreign_key_checks=OFF) */ 1;
+
+
+-- Literals
+SELECT
+ -- Integers
+ 123,
+
+ -- Floats
+ .123, 1.23, 123.,
+
+ -- Exponents
+ 1e10, 1e-10, 1.e20, .1e-20,
+
+ -- Hexadecimal
+ X'0af019', x'0AF019', 0xaf019,
+
+ -- Binary
+ B'010', b'010', 0b010,
+
+ -- Temporal literals
+ {d'2020-01-01'}, { d ' 2020^01@01 ' },
+ {t'8 9:10:11'}, { t ' 09:10:11.12 ' }, { t ' 091011 ' },
+ {ts"2020-01-01 09:10:11"}, { ts ' 2020@01/01 09:10:11 ' },
+
+ -- Strings
+ '', 'abc', '1''2\03\%4\_5\\6\'7\"8',
+ "", "abc", "1""2\03\%4\_5\\6\'7\"8",
+;
+
+
+-- Variables
+SET @a = 1, @1 = 2, @._.$ = 3;
+SET @'?' = 1, @'abc''def"`ghi' = 2;
+SET @"#" = 1, @"abc""def'`ghi" = 2;
+SET @`^` = 1, @`abc``def'"ghi` = 2;
+SELECT
+ @@timestamp,
+ @@global.auto_increment_offset,
+ @@session.auto_increment_offset,
+ @@auto_increment_offset
+;
+
+
+-- Prepared statements
+SELECT POW(?, 3) AS cubed;
+
+
+-- Constants
+SELECT TRUE, FALSE, NULL, UNKNOWN;
+
+
+-- Data types
+CREATE TABLE table1 (
+ id INT AUTO_INCREMENT PRIMARY KEY,
+ name VARCHAR(20) NOT NULL,
+ birthyear YEAR
+);
+
+
+-- Keywords
+INSERT INTO table1 (person, birthyear) VALUES ('abc', 2020);
+
+WITH RECURSIVE example (n) AS (
+ SELECT 1
+ UNION ALL
+ SELECT n + 1 FROM example
+ WHERE n < 10
+)
+SELECT n FROM example;
+
+SELECT 17 MEMBER OF ('[23, "abc", 17, "ab", 10]');
+
+
+-- Functions
+SELECT CONCAT('function');
+SELECT MAX(quantity) FROM example;
+
+
+-- Schema object names
+CREATE TABLE basic (
+ example INT,
+ 股票编号 INT,
+ `select` INT,
+ `concat(` INT
+);
+
+SELECT e1.`apple` AS a, `example2`.b
+FROM example1 AS e1
+JOIN example2 e2
+ON `example1`.`id` = e2.id;
+
+
+-- Operators
+SELECT 1 + 2 - 3 << 2;
+SELECT 1::DECIMAL(5, 2);
+SET @a = 1;
+SET a := 1;
+SELECT c->>'$.name' FROM example;
+
+
+
+-- Exceptions
+CREATE TABLE t1
+(
+ c1 VARCHAR(5) CHARACTER SET latin1,
+ c2 SET('r', 'g', 'b')
+);
+
+
+-- Introducers
+SELECT _latin1'abc';
+SELECT _binary'abc';
diff --git a/tests/test_mysql.py b/tests/test_mysql.py
new file mode 100644
index 00000000..9b5e2b8c
--- /dev/null
+++ b/tests/test_mysql.py
@@ -0,0 +1,249 @@
+# -*- coding: utf-8 -*-
+"""
+ Pygments MySQL lexer tests
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ :copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS.
+ :license: BSD, see LICENSE for details.
+"""
+
+import pytest
+
+from pygments.lexers.sql import MySqlLexer
+
+from pygments.token import \
+ Comment, \
+ Keyword, \
+ Literal, \
+ Name, \
+ Number, \
+ Operator, \
+ Punctuation, \
+ String, \
+ Text
+
+
+@pytest.fixture(scope='module')
+def lexer():
+ yield MySqlLexer()
+
+
+@pytest.mark.parametrize('text', ('123',))
+def test_integer_literals(lexer, text):
+ assert list(lexer.get_tokens(text))[0] == (Number.Integer, text)
+
+
+@pytest.mark.parametrize(
+ 'text',
+ (
+ '.123', '1.23', '123.',
+ '1e10', '1.0e10', '1.e-10', '.1e+10',
+ ),
+)
+def test_float_literals(lexer, text):
+ assert list(lexer.get_tokens(text))[0] == (Number.Float, text)
+
+
+@pytest.mark.parametrize('text', ("X'0af019'", "x'0AF019'", "0xaf019"))
+def test_hexadecimal_literals(lexer, text):
+ assert list(lexer.get_tokens(text))[0] == (Number.Hex, text)
+
+
+@pytest.mark.parametrize('text', ("B'010'", "b'010'", "0b010"))
+def test_binary_literals(lexer, text):
+ assert list(lexer.get_tokens(text))[0] == (Number.Bin, text)
+
+
+@pytest.mark.parametrize(
+ 'text',
+ (
+ "{d'2020-01-01'}", "{ d ' 2020^01@01 ' }",
+ "{t'8 9:10:11'}", "{ t ' 09:10:11.12 ' }", "{ t ' 091011 ' }",
+ '{ts"2020-01-01 09:10:11"}', "{ ts ' 2020@01/01 09:10:11 ' }",
+ ),
+)
+def test_temporal_literals(lexer, text):
+ assert list(lexer.get_tokens(text))[0] == (Literal.Date, text)
+
+
+@pytest.mark.parametrize(
+ 'text, expected_types',
+ (
+ (r"'a'", (String.Single,) * 3),
+ (r"""'""'""", (String.Single,) * 3),
+ (r"''''", (String.Single, String.Escape, String.Single)),
+ (r"'\''", (String.Single, String.Escape, String.Single)),
+ (r'"a"', (String.Double,) * 3),
+ (r'''"''"''', (String.Double,) * 3),
+ (r'""""', (String.Double, String.Escape, String.Double)),
+ (r'"\""', (String.Double, String.Escape, String.Double)),
+ ),
+)
+def test_string_literals(lexer, text, expected_types):
+ tokens = list(lexer.get_tokens(text))[:len(expected_types)]
+ assert all(t[0] == e for t, e in zip(tokens, expected_types))
+
+
+@pytest.mark.parametrize(
+ 'text',
+ (
+ "@a", "@1", "@._.$",
+ "@'?'", """@'abc''def"`ghi'""",
+ '@"#"', '''@"abc""def'`ghi"''',
+ '@`^`', """@`abc``def'"ghi`""",
+ "@@timestamp",
+ "@@session.auto_increment_offset",
+ "@@global.auto_increment_offset",
+ "@@persist.auto_increment_offset",
+ "@@persist_only.auto_increment_offset",
+ '?',
+ ),
+)
+def test_variables(lexer, text):
+ tokens = list(lexer.get_tokens(text))
+ assert all(t[0] == Name.Variable for t in tokens[:-1])
+ assert ''.join([t[1] for t in tokens]).strip() == text.strip()
+
+
+@pytest.mark.parametrize('text', ('true', 'false', 'null', 'unknown'))
+def test_constants(lexer, text):
+ assert list(lexer.get_tokens(text))[0] == (Name.Constant, text)
+
+
+@pytest.mark.parametrize('text', ('-- abc', '--\tabc', '#abc'))
+def test_comments_single_line(lexer, text):
+ # Test the standalone comment.
+ tokens = list(lexer.get_tokens(text))
+ assert tokens[0] == (Comment.Single, text)
+
+ # Test the comment with mixed tokens.
+ tokens = list(lexer.get_tokens('select' + text + '\nselect'))
+ assert tokens[0] == (Keyword, 'select')
+ assert tokens[1] == (Comment.Single, text)
+ assert tokens[-2] == (Keyword, 'select')
+
+
+@pytest.mark.parametrize(
+ 'text',
+ (
+ '/**/a', '/*a*b/c*/a', '/*\nabc\n*/a',
+ '/* /* */a'
+ )
+)
+def test_comments_multi_line(lexer, text):
+ tokens = list(lexer.get_tokens(text))
+ assert all(token[0] == Comment.Multiline for token in tokens[:-2])
+ assert ''.join(token[1] for token in tokens).strip() == text.strip()
+
+ # Validate nested comments are not supported.
+ assert tokens[-2][0] != Comment.Multiline
+
+
+@pytest.mark.parametrize(
+ 'text', ('BKA', 'SEMIJOIN'))
+def test_optimizer_hints(lexer, text):
+ good = '/*+ ' + text + '(), */'
+ ignore = '/* ' + text + ' */'
+ bad1 = '/*+ a' + text + '() */'
+ bad2 = '/*+ ' + text + 'a */'
+ assert (Comment.Preproc, text) in lexer.get_tokens(good)
+ assert (Comment.Preproc, text) not in lexer.get_tokens(ignore)
+ assert (Comment.Preproc, text) not in lexer.get_tokens(bad1)
+ assert (Comment.Preproc, text) not in lexer.get_tokens(bad2)
+
+
+@pytest.mark.parametrize(
+ 'text, expected_types',
+ (
+ # SET exceptions
+ ('SET', (Keyword,)),
+ ('SET abc = 1;', (Keyword,)),
+ ('SET @abc = 1;', (Keyword,)),
+ ('CHARACTER SET latin1', (Keyword, Text, Keyword)),
+ ('SET("r", "g", "b")', (Keyword.Type, Punctuation)),
+ ('SET ("r", "g", "b")', (Keyword.Type, Text, Punctuation)),
+ ),
+)
+def test_exceptions(lexer, text, expected_types):
+ tokens = list(lexer.get_tokens(text))[:len(expected_types)]
+ assert all(t[0] == e for t, e in zip(tokens, expected_types))
+
+
+@pytest.mark.parametrize(
+ 'text',
+ (
+ 'SHOW', 'CREATE', 'ALTER', 'DROP',
+ 'SELECT', 'INSERT', 'UPDATE', 'DELETE',
+ 'WHERE', 'GROUP', 'ORDER', 'BY', 'AS',
+ 'DISTINCT', 'JOIN', 'WITH', 'RECURSIVE',
+ 'PARTITION', 'NTILE', 'MASTER_PASSWORD', 'XA',
+ 'REQUIRE_TABLE_PRIMARY_KEY_CHECK', 'STREAM',
+ ),
+)
+def test_keywords(lexer, text):
+ assert list(lexer.get_tokens(text))[0] == (Keyword, text)
+
+
+@pytest.mark.parametrize(
+ 'text',
+ (
+ # Standard
+ 'INT(', 'VARCHAR(', 'ENUM(', 'DATETIME', 'GEOMETRY', 'POINT', 'JSON',
+ # Aliases and compatibility
+ 'FIXED', 'MEDIUMINT', 'INT3', 'REAL', 'SERIAL',
+ 'LONG', 'NATIONAL', 'PRECISION', 'VARYING',
+ ),
+)
+def test_data_types(lexer, text):
+ assert list(lexer.get_tokens(text))[0] == (Keyword.Type, text.strip('('))
+
+
+@pytest.mark.parametrize(
+ 'text',
+ (
+ # Common
+ 'CAST', 'CONCAT_WS', 'DAYNAME', 'IFNULL', 'NOW', 'SUBSTR',
+ # Less common
+ 'CAN_ACCESS_COLUMN', 'JSON_CONTAINS_PATH', 'ST_GEOMFROMGEOJSON',
+ ),
+)
+def test_functions(lexer, text):
+ assert list(lexer.get_tokens(text + '('))[0] == (Name.Function, text)
+ assert list(lexer.get_tokens(text + ' ('))[0] == (Name.Function, text)
+
+
+@pytest.mark.parametrize(
+ 'text',
+ (
+ 'abc_$123', '上市年限', 'ひらがな',
+ '`a`', '`上市年限`', '`ひらがな`', '`select`', '`concat(`',
+ '````', r'`\``', r'`\\`',
+ '`-- `', '`/*`', '`#`',
+ ),
+)
+def test_schema_object_names(lexer, text):
+ tokens = list(lexer.get_tokens(text))[:-1]
+ assert all(token[0] == Name for token in tokens)
+ assert ''.join(token[1] for token in tokens) == text
+
+
+@pytest.mark.parametrize(
+ 'text',
+ ('+', '*', '/', '%', '&&', ':=', '!', '<', '->>', '^', '|', '~'),
+)
+def test_operators(lexer, text):
+ assert list(lexer.get_tokens(text))[0] == (Operator, text)
+
+
+@pytest.mark.parametrize(
+ 'text, expected_types',
+ (
+ ('abc.efg', (Name, Punctuation, Name)),
+ ('abc,efg', (Name, Punctuation, Name)),
+ ('MAX(abc)', (Name.Function, Punctuation, Name, Punctuation)),
+ ('efg;', (Name, Punctuation)),
+ ),
+)
+def test_punctuation(lexer, text, expected_types):
+ tokens = list(lexer.get_tokens(text))[:len(expected_types)]
+ assert all(t[0] == e for t, e in zip(tokens, expected_types))