5 files changed, 1817 insertions, 57 deletions
diff --git a/AUTHORS b/AUTHORS
index 9b0ad009..5058c612 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -138,7 +138,7 @@ Other contributors, listed alphabetically, are:
 * Stephen McKamey -- Duel/JBST lexer
 * Brian McKenna -- F# lexer
 * Charles McLaughlin -- Puppet lexer
-* Kurt McKee -- Tera Term macro lexer, PostgreSQL updates
+* Kurt McKee -- Tera Term macro lexer, PostgreSQL updates, MySQL overhaul
 * Lukas Meuser -- BBCode formatter, Lua lexer
 * Cat Miller -- Pig lexer
 * Paul Miller -- LiveScript lexer
diff --git a/pygments/lexers/_mysql_builtins.py b/pygments/lexers/_mysql_builtins.py
new file mode 100644
index 00000000..121054c3
--- /dev/null
+++ b/pygments/lexers/_mysql_builtins.py
@@ -0,0 +1,1282 @@
+# -*- coding: utf-8 -*-
+"""
+    pygments.lexers._mysql_builtins
+    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+    Self-updating data files for the MySQL lexer.
+
+    :copyright: Copyright 2020 by the Pygments team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+
+
+MYSQL_CONSTANTS = (
+    'false',
+    'null',
+    'true',
+    'unknown',
+)
+
+
+# At this time, no easily-parsed, definitive list of data types
+# has been found in the MySQL source code or documentation. (The
+# `sql/sql_yacc.yy` file is definitive but is difficult to parse.)
+# Therefore these types are currently maintained manually.
+#
+# Some words in this list -- like "long", "national", "precision",
+# and "varying" -- appear to only occur in combination with other
+# data type keywords. Therefore they are included as separate words
+# even though they do not naturally occur in syntax separately.
+#
+# This list is also used to strip data types out of the list of
+# MySQL keywords, which is automatically updated later in the file.
+#
+MYSQL_DATATYPES = (
+    # Numeric data types
+    'bigint',
+    'bit',
+    'bool',
+    'boolean',
+    'dec',
+    'decimal',
+    'double',
+    'fixed',
+    'float',
+    'float4',
+    'float8',
+    'int',
+    'int1',
+    'int2',
+    'int3',
+    'int4',
+    'int8',
+    'integer',
+    'mediumint',
+    'middleint',
+    'numeric',
+    'precision',
+    'real',
+    'serial',
+    'smallint',
+    'tinyint',
+
+    # Date and time data types
+    'date',
+    'datetime',
+    'time',
+    'timestamp',
+    'year',
+
+    # String data types
+    'binary',
+    'blob',
+    'char',
+    'enum',
+    'long',
+    'longblob',
+    'longtext',
+    'mediumblob',
+    'mediumtext',
+    'national',
+    'nchar',
+    'nvarchar',
+    'set',
+    'text',
+    'tinyblob',
+    'tinytext',
+    'varbinary',
+    'varchar',
+    'varcharacter',
+    'varying',
+
+    # Spatial data types
+    'geometry',
+    'geometrycollection',
+    'linestring',
+    'multilinestring',
+    'multipoint',
+    'multipolygon',
+    'point',
+    'polygon',
+
+    # JSON data types
+    'json',
+)
+
+# Everything below this line is auto-generated from the MySQL source code.
+# Run this file in Python and it will update itself.
+# -----------------------------------------------------------------------------
+
+MYSQL_FUNCTIONS = (
+    'abs',
+    'acos',
+    'adddate',
+    'addtime',
+    'aes_decrypt',
+    'aes_encrypt',
+    'any_value',
+    'asin',
+    'atan',
+    'atan2',
+    'benchmark',
+    'bin',
+    'bin_to_uuid',
+    'bit_and',
+    'bit_count',
+    'bit_length',
+    'bit_or',
+    'bit_xor',
+    'can_access_column',
+    'can_access_database',
+    'can_access_event',
+    'can_access_resource_group',
+    'can_access_routine',
+    'can_access_table',
+    'can_access_trigger',
+    'can_access_view',
+    'cast',
+    'ceil',
+    'ceiling',
+    'char_length',
+    'character_length',
+    'coercibility',
+    'compress',
+    'concat',
+    'concat_ws',
+    'connection_id',
+    'conv',
+    'convert_cpu_id_mask',
+    'convert_interval_to_user_interval',
+    'convert_tz',
+    'cos',
+    'cot',
+    'count',
+    'crc32',
+    'curdate',
+    'current_role',
+    'curtime',
+    'date_add',
+    'date_format',
+    'date_sub',
+    'datediff',
+    'dayname',
+    'dayofmonth',
+    'dayofweek',
+    'dayofyear',
+    'degrees',
+    'elt',
+    'exp',
+    'export_set',
+    'extract',
+    'extractvalue',
+    'field',
+    'find_in_set',
+    'floor',
+    'format_bytes',
+    'format_pico_time',
+    'found_rows',
+    'from_base64',
+    'from_days',
+    'from_unixtime',
+    'get_dd_column_privileges',
+    'get_dd_create_options',
+    'get_dd_index_private_data',
+    'get_dd_index_sub_part_length',
+    'get_dd_property_key_value',
+    'get_dd_tablespace_private_data',
+    'get_lock',
+    'greatest',
+    'group_concat',
+    'gtid_subset',
+    'gtid_subtract',
+    'hex',
+    'icu_version',
+    'ifnull',
+    'inet6_aton',
+    'inet6_ntoa',
+    'inet_aton',
+    'inet_ntoa',
+    'instr',
+    'internal_auto_increment',
+    'internal_avg_row_length',
+    'internal_check_time',
+    'internal_checksum',
+    'internal_data_free',
+    'internal_data_length',
+    'internal_dd_char_length',
+    'internal_get_comment_or_error',
+    'internal_get_dd_column_extra',
+    'internal_get_enabled_role_json',
+    'internal_get_hostname',
+    'internal_get_mandatory_roles_json',
+    'internal_get_partition_nodegroup',
+    'internal_get_username',
+    'internal_get_view_warning_or_error',
+    'internal_index_column_cardinality',
+    'internal_index_length',
+    'internal_is_enabled_role',
+    'internal_is_mandatory_role',
+    'internal_keys_disabled',
+    'internal_max_data_length',
+    'internal_table_rows',
+    'internal_tablespace_autoextend_size',
+    'internal_tablespace_data_free',
+    'internal_tablespace_extent_size',
+    'internal_tablespace_extra',
+    'internal_tablespace_free_extents',
+    'internal_tablespace_id',
+    'internal_tablespace_initial_size',
+    'internal_tablespace_logfile_group_name',
+    'internal_tablespace_logfile_group_number',
+    'internal_tablespace_maximum_size',
+    'internal_tablespace_row_format',
+    'internal_tablespace_status',
+    'internal_tablespace_total_extents',
+    'internal_tablespace_type',
+    'internal_tablespace_version',
+    'internal_update_time',
+    'is_free_lock',
+    'is_ipv4',
+    'is_ipv4_compat',
+    'is_ipv4_mapped',
+    'is_ipv6',
+    'is_used_lock',
+    'is_uuid',
+    'is_visible_dd_object',
+    'isnull',
+    'json_array',
+    'json_array_append',
+    'json_array_insert',
+    'json_arrayagg',
+    'json_contains',
+    'json_contains_path',
+    'json_depth',
+    'json_extract',
+    'json_insert',
+    'json_keys',
+    'json_length',
+    'json_merge',
+    'json_merge_patch',
+    'json_merge_preserve',
+    'json_object',
+    'json_objectagg',
+    'json_overlaps',
+    'json_pretty',
+    'json_quote',
+    'json_remove',
+    'json_replace',
+    'json_schema_valid',
+    'json_schema_validation_report',
+    'json_search',
+    'json_set',
+    'json_storage_free',
+    'json_storage_size',
+    'json_type',
+    'json_unquote',
+    'json_valid',
+    'last_day',
+    'last_insert_id',
+    'lcase',
+    'least',
+    'length',
+    'like_range_max',
+    'like_range_min',
+    'ln',
+    'load_file',
+    'locate',
+    'log',
+    'log10',
+    'log2',
+    'lower',
+    'lpad',
+    'ltrim',
+    'make_set',
+    'makedate',
+    'maketime',
+    'master_pos_wait',
+    'max',
+    'mbrcontains',
+    'mbrcoveredby',
+    'mbrcovers',
+    'mbrdisjoint',
+    'mbrequals',
+    'mbrintersects',
+    'mbroverlaps',
+    'mbrtouches',
+    'mbrwithin',
+    'md5',
+    'mid',
+    'min',
+    'monthname',
+    'name_const',
+    'now',
+    'nullif',
+    'oct',
+    'octet_length',
+    'ord',
+    'period_add',
+    'period_diff',
+    'pi',
+    'position',
+    'pow',
+    'power',
+    'ps_current_thread_id',
+    'ps_thread_id',
+    'quote',
+    'radians',
+    'rand',
+    'random_bytes',
+    'regexp_instr',
+    'regexp_like',
+    'regexp_replace',
+    'regexp_substr',
+    'release_all_locks',
+    'release_lock',
+    'remove_dd_property_key',
+    'reverse',
+    'roles_graphml',
+    'round',
+    'rpad',
+    'rtrim',
+    'sec_to_time',
+    'session_user',
+    'sha',
+    'sha1',
+    'sha2',
+    'sign',
+    'sin',
+    'sleep',
+    'soundex',
+    'space',
+    'sqrt',
+    'st_area',
+    'st_asbinary',
+    'st_asgeojson',
+    'st_astext',
+    'st_aswkb',
+    'st_aswkt',
+    'st_buffer',
+    'st_buffer_strategy',
+    'st_centroid',
+    'st_contains',
+    'st_convexhull',
+    'st_crosses',
+    'st_difference',
+    'st_dimension',
+    'st_disjoint',
+    'st_distance',
+    'st_distance_sphere',
+    'st_endpoint',
+    'st_envelope',
+    'st_equals',
+    'st_exteriorring',
+    'st_geohash',
+    'st_geomcollfromtext',
+    'st_geomcollfromtxt',
+    'st_geomcollfromwkb',
+    'st_geometrycollectionfromtext',
+    'st_geometrycollectionfromwkb',
+    'st_geometryfromtext',
+    'st_geometryfromwkb',
+    'st_geometryn',
+    'st_geometrytype',
+    'st_geomfromgeojson',
+    'st_geomfromtext',
+    'st_geomfromwkb',
+    'st_interiorringn',
+    'st_intersection',
+    'st_intersects',
+    'st_isclosed',
+    'st_isempty',
+    'st_issimple',
+    'st_isvalid',
+    'st_latfromgeohash',
+    'st_latitude',
+    'st_length',
+    'st_linefromtext',
+    'st_linefromwkb',
+    'st_linestringfromtext',
+    'st_linestringfromwkb',
+    'st_longfromgeohash',
+    'st_longitude',
+    'st_makeenvelope',
+    'st_mlinefromtext',
+    'st_mlinefromwkb',
+    'st_mpointfromtext',
+    'st_mpointfromwkb',
+    'st_mpolyfromtext',
+    'st_mpolyfromwkb',
+    'st_multilinestringfromtext',
+    'st_multilinestringfromwkb',
+    'st_multipointfromtext',
+    'st_multipointfromwkb',
+    'st_multipolygonfromtext',
+    'st_multipolygonfromwkb',
+    'st_numgeometries',
+    'st_numinteriorring',
+    'st_numinteriorrings',
+    'st_numpoints',
+    'st_overlaps',
+    'st_pointfromgeohash',
+    'st_pointfromtext',
+    'st_pointfromwkb',
+    'st_pointn',
+    'st_polyfromtext',
+    'st_polyfromwkb',
+    'st_polygonfromtext',
+    'st_polygonfromwkb',
+    'st_simplify',
+    'st_srid',
+    'st_startpoint',
+    'st_swapxy',
+    'st_symdifference',
+    'st_touches',
+    'st_transform',
+    'st_union',
+    'st_validate',
+    'st_within',
+    'st_x',
+    'st_y',
+    'statement_digest',
+    'statement_digest_text',
+    'std',
+    'stddev',
+    'stddev_pop',
+    'stddev_samp',
+    'str_to_date',
+    'strcmp',
+    'subdate',
+    'substr',
+    'substring',
+    'substring_index',
+    'subtime',
+    'sum',
+    'sysdate',
+    'system_user',
+    'tan',
+    'time_format',
+    'time_to_sec',
+    'timediff',
+    'to_base64',
+    'to_days',
+    'to_seconds',
+    'trim',
+    'ucase',
+    'uncompress',
+    'uncompressed_length',
+    'unhex',
+    'unix_timestamp',
+    'updatexml',
+    'upper',
+    'uuid',
+    'uuid_short',
+    'uuid_to_bin',
+    'validate_password_strength',
+    'var_pop',
+    'var_samp',
+    'variance',
+    'version',
+    'wait_for_executed_gtid_set',
+    'wait_until_sql_thread_after_gtids',
+    'weekday',
+    'weekofyear',
+    'yearweek',
+)
+
+
+MYSQL_OPTIMIZER_HINTS = (
+    'bka',
+    'bnl',
+    'dupsweedout',
+    'firstmatch',
+    'group_index',
+    'hash_join',
+    'index',
+    'index_merge',
+    'intoexists',
+    'join_fixed_order',
+    'join_index',
+    'join_order',
+    'join_prefix',
+    'join_suffix',
+    'loosescan',
+    'materialization',
+    'max_execution_time',
+    'merge',
+    'mrr',
+    'no_bka',
+    'no_bnl',
+    'no_group_index',
+    'no_hash_join',
+    'no_icp',
+    'no_index',
+    'no_index_merge',
+    'no_join_index',
+    'no_merge',
+    'no_mrr',
+    'no_order_index',
+    'no_range_optimization',
+    'no_semijoin',
+    'no_skip_scan',
+    'order_index',
+    'qb_name',
+    'resource_group',
+    'semijoin',
+    'set_var',
+    'skip_scan',
+    'subquery',
+)
+
+
+MYSQL_KEYWORDS = (
+    'accessible',
+    'account',
+    'action',
+    'active',
+    'add',
+    'admin',
+    'after',
+    'against',
+    'aggregate',
+    'algorithm',
+    'all',
+    'alter',
+    'always',
+    'analyze',
+    'and',
+    'any',
+    'array',
+    'as',
+    'asc',
+    'ascii',
+    'asensitive',
+    'at',
+    'attribute',
+    'auto_increment',
+    'autoextend_size',
+    'avg',
+    'avg_row_length',
+    'backup',
+    'before',
+    'begin',
+    'between',
+    'binlog',
+    'block',
+    'both',
+    'btree',
+    'buckets',
+    'by',
+    'byte',
+    'cache',
+    'call',
+    'cascade',
+    'cascaded',
+    'case',
+    'catalog_name',
+    'chain',
+    'change',
+    'changed',
+    'channel',
+    'character',
+    'charset',
+    'check',
+    'checksum',
+    'cipher',
+    'class_origin',
+    'client',
+    'clone',
+    'close',
+    'coalesce',
+    'code',
+    'collate',
+    'collation',
+    'column',
+    'column_format',
+    'column_name',
+    'columns',
+    'comment',
+    'commit',
+    'committed',
+    'compact',
+    'completion',
+    'component',
+    'compressed',
+    'compression',
+    'concurrent',
+    'condition',
+    'connection',
+    'consistent',
+    'constraint',
+    'constraint_catalog',
+    'constraint_name',
+    'constraint_schema',
+    'contains',
+    'context',
+    'continue',
+    'convert',
+    'cpu',
+    'create',
+    'cross',
+    'cube',
+    'cume_dist',
+    'current',
+    'current_date',
+    'current_time',
+    'current_timestamp',
+    'current_user',
+    'cursor',
+    'cursor_name',
+    'data',
+    'database',
+    'databases',
+    'datafile',
+    'day',
+    'day_hour',
+    'day_microsecond',
+    'day_minute',
+    'day_second',
+    'deallocate',
+    'declare',
+    'default',
+    'default_auth',
+    'definer',
+    'definition',
+    'delay_key_write',
+    'delayed',
+    'delete',
+    'dense_rank',
+    'desc',
+    'describe',
+    'description',
+    'deterministic',
+    'diagnostics',
+    'directory',
+    'disable',
+    'discard',
+    'disk',
+    'distinct',
+    'distinctrow',
+    'div',
+    'do',
+    'drop',
+    'dual',
+    'dumpfile',
+    'duplicate',
+    'dynamic',
+    'each',
+    'else',
+    'elseif',
+    'empty',
+    'enable',
+    'enclosed',
+    'encryption',
+    'end',
+    'ends',
+    'enforced',
+    'engine',
+    'engine_attribute',
+    'engines',
+    'error',
+    'errors',
+    'escape',
+    'escaped',
+    'event',
+    'events',
+    'every',
+    'except',
+    'exchange',
+    'exclude',
+    'execute',
+    'exists',
+    'exit',
+    'expansion',
+    'expire',
+    'explain',
+    'export',
+    'extended',
+    'extent_size',
+    'failed_login_attempts',
+    'false',
+    'fast',
+    'faults',
+    'fetch',
+    'fields',
+    'file',
+    'file_block_size',
+    'filter',
+    'first',
+    'first_value',
+    'flush',
+    'following',
+    'follows',
+    'for',
+    'force',
+    'foreign',
+    'format',
+    'found',
+    'from',
+    'full',
+    'fulltext',
+    'function',
+    'general',
+    'generated',
+    'geomcollection',
+    'get',
+    'get_format',
+    'get_master_public_key',
+    'global',
+    'grant',
+    'grants',
+    'group',
+    'group_replication',
+    'grouping',
+    'groups',
+    'handler',
+    'hash',
+    'having',
+    'help',
+    'high_priority',
+    'histogram',
+    'history',
+    'host',
+    'hosts',
+    'hour',
+    'hour_microsecond',
+    'hour_minute',
+    'hour_second',
+    'identified',
+    'if',
+    'ignore',
+    'ignore_server_ids',
+    'import',
+    'in',
+    'inactive',
+    'index',
+    'indexes',
+    'infile',
+    'initial_size',
+    'inner',
+    'inout',
+    'insensitive',
+    'insert',
+    'insert_method',
+    'install',
+    'instance',
+    'interval',
+    'into',
+    'invisible',
+    'invoker',
+    'io',
+    'io_after_gtids',
+    'io_before_gtids',
+    'io_thread',
+    'ipc',
+    'is',
+    'isolation',
+    'issuer',
+    'iterate',
+    'join',
+    'json_table',
+    'json_value',
+    'key',
+    'key_block_size',
+    'keys',
+    'kill',
+    'lag',
+    'language',
+    'last',
+    'last_value',
+    'lateral',
+    'lead',
+    'leading',
+    'leave',
+    'leaves',
+    'left',
+    'less',
+    'level',
+    'like',
+    'limit',
+    'linear',
+    'lines',
+    'list',
+    'load',
+    'local',
+    'localtime',
+    'localtimestamp',
+    'lock',
+    'locked',
+    'locks',
+    'logfile',
+    'logs',
+    'loop',
+    'low_priority',
+    'master',
+    'master_auto_position',
+    'master_bind',
+    'master_compression_algorithms',
+    'master_connect_retry',
+    'master_delay',
+    'master_heartbeat_period',
+    'master_host',
+    'master_log_file',
+    'master_log_pos',
+    'master_password',
+    'master_port',
+    'master_public_key_path',
+    'master_retry_count',
+    'master_server_id',
+    'master_ssl',
+    'master_ssl_ca',
+    'master_ssl_capath',
+    'master_ssl_cert',
+    'master_ssl_cipher',
+    'master_ssl_crl',
+    'master_ssl_crlpath',
+    'master_ssl_key',
+    'master_ssl_verify_server_cert',
+    'master_tls_ciphersuites',
+    'master_tls_version',
+    'master_user',
+    'master_zstd_compression_level',
+    'match',
+    'max_connections_per_hour',
+    'max_queries_per_hour',
+    'max_rows',
+    'max_size',
+    'max_updates_per_hour',
+    'max_user_connections',
+    'maxvalue',
+    'medium',
+    'member',
+    'memory',
+    'merge',
+    'message_text',
+    'microsecond',
+    'migrate',
+    'min_rows',
+    'minute',
+    'minute_microsecond',
+    'minute_second',
+    'mod',
+    'mode',
+    'modifies',
+    'modify',
+    'month',
+    'mutex',
+    'mysql_errno',
+    'name',
+    'names',
+    'natural',
+    'ndb',
+    'ndbcluster',
+    'nested',
+    'network_namespace',
+    'never',
+    'new',
+    'next',
+    'no',
+    'no_wait',
+    'no_write_to_binlog',
+    'nodegroup',
+    'none',
+    'not',
+    'nowait',
+    'nth_value',
+    'ntile',
+    'null',
+    'nulls',
+    'number',
+    'of',
+    'off',
+    'offset',
+    'oj',
+    'old',
+    'on',
+    'one',
+    'only',
+    'open',
+    'optimize',
+    'optimizer_costs',
+    'option',
+    'optional',
+    'optionally',
+    'options',
+    'or',
+    'order',
+    'ordinality',
+    'organization',
+    'others',
+    'out',
+    'outer',
+    'outfile',
+    'over',
+    'owner',
+    'pack_keys',
+    'page',
+    'parser',
+    'partial',
+    'partition',
+    'partitioning',
+    'partitions',
+    'password',
+    'password_lock_time',
+    'path',
+    'percent_rank',
+    'persist',
+    'persist_only',
+    'phase',
+    'plugin',
+    'plugin_dir',
+    'plugins',
+    'port',
+    'precedes',
+    'preceding',
+    'prepare',
+    'preserve',
+    'prev',
+    'primary',
+    'privilege_checks_user',
+    'privileges',
+    'procedure',
+    'process',
+    'processlist',
+    'profile',
+    'profiles',
+    'proxy',
+    'purge',
+    'quarter',
+    'query',
+    'quick',
+    'random',
+    'range',
+    'rank',
+    'read',
+    'read_only',
+    'read_write',
+    'reads',
+    'rebuild',
+    'recover',
+    'recursive',
+    'redo_buffer_size',
+    'redundant',
+    'reference',
+    'references',
+    'regexp',
+    'relay',
+    'relay_log_file',
+    'relay_log_pos',
+    'relay_thread',
+    'relaylog',
+    'release',
+    'reload',
+    'remove',
+    'rename',
+    'reorganize',
+    'repair',
+    'repeat',
+    'repeatable',
+    'replace',
+    'replicate_do_db',
+    'replicate_do_table',
+    'replicate_ignore_db',
+    'replicate_ignore_table',
+    'replicate_rewrite_db',
+    'replicate_wild_do_table',
+    'replicate_wild_ignore_table',
+    'replication',
+    'require',
+    'require_row_format',
+    'require_table_primary_key_check',
+    'reset',
+    'resignal',
+    'resource',
+    'respect',
+    'restart',
+    'restore',
+    'restrict',
+    'resume',
+    'retain',
+    'return',
+    'returned_sqlstate',
+    'returning',
+    'returns',
+    'reuse',
+    'reverse',
+    'revoke',
+    'right',
+    'rlike',
+    'role',
+    'rollback',
+    'rollup',
+    'rotate',
+    'routine',
+    'row',
+    'row_count',
+    'row_format',
+    'row_number',
+    'rows',
+    'rtree',
+    'savepoint',
+    'schedule',
+    'schema',
+    'schema_name',
+    'schemas',
+    'second',
+    'second_microsecond',
+    'secondary',
+    'secondary_engine',
+    'secondary_engine_attribute',
+    'secondary_load',
+    'secondary_unload',
+    'security',
+    'select',
+    'sensitive',
+    'separator',
+    'serializable',
+    'server',
+    'session',
+    'share',
+    'show',
+    'shutdown',
+    'signal',
+    'signed',
+    'simple',
+    'skip',
+    'slave',
+    'slow',
+    'snapshot',
+    'socket',
+    'some',
+    'soname',
+    'sounds',
+    'source',
+    'spatial',
+    'specific',
+    'sql',
+    'sql_after_gtids',
+    'sql_after_mts_gaps',
+    'sql_before_gtids',
+    'sql_big_result',
+    'sql_buffer_result',
+    'sql_calc_found_rows',
+    'sql_no_cache',
+    'sql_small_result',
+    'sql_thread',
+    'sql_tsi_day',
+    'sql_tsi_hour',
+    'sql_tsi_minute',
+    'sql_tsi_month',
+    'sql_tsi_quarter',
+    'sql_tsi_second',
+    'sql_tsi_week',
+    'sql_tsi_year',
+    'sqlexception',
+    'sqlstate',
+    'sqlwarning',
+    'srid',
+    'ssl',
+    'stacked',
+    'start',
+    'starting',
+    'starts',
+    'stats_auto_recalc',
+    'stats_persistent',
+    'stats_sample_pages',
+    'status',
+    'stop',
+    'storage',
+    'stored',
+    'straight_join',
+    'stream',
+    'string',
+    'subclass_origin',
+    'subject',
+    'subpartition',
+    'subpartitions',
+    'super',
+    'suspend',
+    'swaps',
+    'switches',
+    'system',
+    'table',
+    'table_checksum',
+    'table_name',
+    'tables',
+    'tablespace',
+    'temporary',
+    'temptable',
+    'terminated',
+    'than',
+    'then',
+    'thread_priority',
+    'ties',
+    'timestampadd',
+    'timestampdiff',
+    'tls',
+    'to',
+    'trailing',
+    'transaction',
+    'trigger',
+    'triggers',
+    'true',
+    'truncate',
+    'type',
+    'types',
+    'unbounded',
+    'uncommitted',
+    'undefined',
+    'undo',
+    'undo_buffer_size',
+    'undofile',
+    'unicode',
+    'uninstall',
+    'union',
+    'unique',
+    'unknown',
+    'unlock',
+    'unsigned',
+    'until',
+    'update',
+    'upgrade',
+    'usage',
+    'use',
+    'use_frm',
+    'user',
+    'user_resources',
+    'using',
+    'utc_date',
+    'utc_time',
+    'utc_timestamp',
+    'validation',
+    'value',
+    'values',
+    'variables',
+    'vcpu',
+    'view',
+    'virtual',
+    'visible',
+    'wait',
+    'warnings',
+    'week',
+    'weight_string',
+    'when',
+    'where',
+    'while',
+    'window',
+    'with',
+    'without',
+    'work',
+    'wrapper',
+    'write',
+    'x509',
+    'xa',
+    'xid',
+    'xml',
+    'xor',
+    'year_month',
+    'zerofill',
+)
+
+
+if __name__ == '__main__':  # pragma: no cover
+    import re
+    from urllib.request import urlopen
+
+    from pygments.util import format_lines
+
+    # MySQL source code
+    SOURCE_URL = 'https://github.com/mysql/mysql-server/raw/8.0'
+    LEX_URL = SOURCE_URL + '/sql/lex.h'
+    ITEM_CREATE_URL = SOURCE_URL + '/sql/item_create.cc'
+
+
+    def update_myself():
+        # Pull content from lex.h.
+        lex_file = urlopen(LEX_URL).read().decode('utf8', errors='ignore')
+        keywords = parse_lex_keywords(lex_file)
+        functions = parse_lex_functions(lex_file)
+        optimizer_hints = parse_lex_optimizer_hints(lex_file)
+
+        # Parse content in item_create.cc.
+        item_create_file = urlopen(ITEM_CREATE_URL).read().decode('utf8', errors='ignore')
+        functions.update(parse_item_create_functions(item_create_file))
+
+        # Remove data types from the set of keywords.
+        keywords -= set(MYSQL_DATATYPES)
+
+        update_content('MYSQL_FUNCTIONS', tuple(sorted(functions)))
+        update_content('MYSQL_KEYWORDS', tuple(sorted(keywords)))
+        update_content('MYSQL_OPTIMIZER_HINTS', tuple(sorted(optimizer_hints)))
+
+
+    def parse_lex_keywords(f):
+        """Parse keywords in lex.h."""
+
+        results = set()
+        for m in re.finditer(r'{SYM(?:_HK)?\("(?P<keyword>[a-z0-9_]+)",', f, flags=re.I):
+            results.add(m.group('keyword').lower())
+
+        if not results:
+            raise ValueError('No keywords found')
+
+        return results
+
+
+    def parse_lex_optimizer_hints(f):
+        """Parse optimizer hints in lex.h."""
+
+        results = set()
+        for m in re.finditer(r'{SYM_H\("(?P<keyword>[a-z0-9_]+)",', f, flags=re.I):
+            results.add(m.group('keyword').lower())
+
+        if not results:
+            raise ValueError('No optimizer hints found')
+
+        return results
+
+
+    def parse_lex_functions(f):
+        """Parse MySQL function names from lex.h."""
+
+        results = set()
+        for m in re.finditer(r'{SYM_FN?\("(?P<function>[a-z0-9_]+)",', f, flags=re.I):
+            results.add(m.group('function').lower())
+
+        if not results:
+            raise ValueError('No lex functions found')
+
+        return results
+
+
+    def parse_item_create_functions(f):
+        """Parse MySQL function names from item_create.cc."""
+
+        results = set()
+        for m in re.finditer(r'{"(?P<function>[^"]+?)",\s*SQL_F[^(]+?\(', f, flags=re.I):
+            results.add(m.group('function').lower())
+
+        if not results:
+            raise ValueError('No item_create functions found')
+
+        return results
+
+
+    def update_content(field_name, content):
+        """Overwrite this file with content parsed from MySQL's source code."""
+
+        with open(__file__) as f:
+            data = f.read()
+
+        # Line to start/end inserting
+        re_match = re.compile(r'^%s\s*=\s*\($.*?^\s*\)$' % field_name, re.M | re.S)
+        m = re_match.search(data)
+        if not m:
+            raise ValueError('Could not find an existing definition for %s' % field_name)
+
+        new_block = format_lines(field_name, content)
+        data = data[:m.start()] + new_block + data[m.end():]
+
+        with open(__file__, 'w', newline='\n') as f:
+            f.write(data)
+
+    update_myself()
diff --git a/pygments/lexers/sql.py b/pygments/lexers/sql.py
index d58abe2a..7cce4d40 100644
--- a/pygments/lexers/sql.py
+++ b/pygments/lexers/sql.py
@@ -42,11 +42,18 @@ import re
 
 from pygments.lexer import Lexer, RegexLexer, do_insertions, bygroups, words
 from pygments.token import Punctuation, Whitespace, Text, Comment, Operator, \
-    Keyword, Name, String, Number, Generic
+    Keyword, Name, String, Number, Generic, Literal
 from pygments.lexers import get_lexer_by_name, ClassNotFound
 
 from pygments.lexers._postgres_builtins import KEYWORDS, DATATYPES, \
     PSEUDO_TYPES, PLPGSQL_KEYWORDS
+from pygments.lexers._mysql_builtins import \
+    MYSQL_CONSTANTS, \
+    MYSQL_DATATYPES, \
+    MYSQL_FUNCTIONS, \
+    MYSQL_KEYWORDS, \
+    MYSQL_OPTIMIZER_HINTS
+
 from pygments.lexers import _tsql_builtins
 
 
@@ -579,8 +586,12 @@ class TransactSqlLexer(RegexLexer):
 
 
 class MySqlLexer(RegexLexer):
-    """
-    Special lexer for MySQL.
+    """The Oracle MySQL lexer.
+
+    This lexer does not attempt to maintain strict compatibility with
+    MariaDB syntax or keywords. Although MySQL and MariaDB's common code
+    history suggests there may be significant overlap between the two,
+    compatibility between the two is not a target for this lexer.
     """
 
     name = 'MySQL'
@@ -591,63 +602,149 @@ class MySqlLexer(RegexLexer):
     tokens = {
         'root': [
             (r'\s+', Text),
-            (r'(#|--\s+).*\n?', Comment.Single),
-            (r'/\*', Comment.Multiline, 'multiline-comments'),
+
+            # Comments
+            (r'(?:#|--\s+).*', Comment.Single),
+            (r'/\*\+', Comment.Special, 'optimizer-hints'),
+            (r'/\*', Comment.Multiline, 'multiline-comment'),
+
+            # Hexadecimal literals
+            (r"x'([0-9a-f]{2})+'", Number.Hex),  # MySQL requires paired hex characters in this form.
+            (r'0x[0-9a-f]+', Number.Hex),
+
+            # Binary literals
+            (r"b'[01]+'", Number.Bin),
+            (r'0b[01]+', Number.Bin),
+
+            # Numeric literals
+            (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float),  # Mandatory integer, optional fraction and exponent
+            (r'[0-9]*\.[0-9]+(e[+-]?[0-9]+)?', Number.Float),  # Mandatory fraction, optional integer and exponent
+            (r'[0-9]+e[+-]?[0-9]+', Number.Float),  # Exponents with integer significands are still floats
             (r'[0-9]+', Number.Integer),
-            (r'[0-9]*\.[0-9]+(e[+-][0-9]+)', Number.Float),
-            (r"'(\\\\|\\'|''|[^'])*'", String.Single),
-            (r'"(\\\\|\\"|""|[^"])*"', String.Double),
-            (r"`(\\\\|\\`|``|[^`])*`", String.Symbol),
-            (r'[+*/<>=~!@#%^&|`?-]', Operator),
-            (r'\b(tinyint|smallint|mediumint|int|integer|bigint|date|'
-             r'datetime|time|bit|bool|tinytext|mediumtext|longtext|text|'
-             r'tinyblob|mediumblob|longblob|blob|float|double|double\s+'
-             r'precision|real|numeric|dec|decimal|timestamp|year|char|'
-             r'varchar|varbinary|varcharacter|enum|set)(\b\s*)(\()?',
-             bygroups(Keyword.Type, Text, Punctuation)),
-            (r'\b(add|all|alter|analyze|and|as|asc|asensitive|before|between|'
-             r'bigint|binary|blob|both|by|call|cascade|case|change|char|'
-             r'character|check|collate|column|condition|constraint|continue|'
-             r'convert|create|cross|current_date|current_time|'
-             r'current_timestamp|current_user|cursor|database|databases|'
-             r'day_hour|day_microsecond|day_minute|day_second|dec|decimal|'
-             r'declare|default|delayed|delete|desc|describe|deterministic|'
-             r'distinct|distinctrow|div|double|drop|dual|each|else|elseif|'
-             r'enclosed|escaped|exists|exit|explain|fetch|flush|float|float4|'
-             r'float8|for|force|foreign|from|fulltext|grant|group|having|'
-             r'high_priority|hour_microsecond|hour_minute|hour_second|if|'
-             r'ignore|in|index|infile|inner|inout|insensitive|insert|int|'
-             r'int1|int2|int3|int4|int8|integer|interval|into|is|iterate|'
-             r'join|key|keys|kill|leading|leave|left|like|limit|lines|load|'
-             r'localtime|localtimestamp|lock|long|loop|low_priority|match|'
-             r'minute_microsecond|minute_second|mod|modifies|natural|'
-             r'no_write_to_binlog|not|numeric|on|optimize|option|optionally|'
-             r'or|order|out|outer|outfile|precision|primary|procedure|purge|'
-             r'raid0|read|reads|real|references|regexp|release|rename|repeat|'
-             r'replace|require|restrict|return|revoke|right|rlike|schema|'
-             r'schemas|second_microsecond|select|sensitive|separator|set|'
-             r'show|smallint|soname|spatial|specific|sql|sql_big_result|'
-             r'sql_calc_found_rows|sql_small_result|sqlexception|sqlstate|'
-             r'sqlwarning|ssl|starting|straight_join|table|terminated|then|'
-             r'to|trailing|trigger|undo|union|unique|unlock|unsigned|update|'
-             r'usage|use|using|utc_date|utc_time|utc_timestamp|values|'
-             r'varying|when|where|while|with|write|x509|xor|year_month|'
-             r'zerofill)\b', Keyword),
-            # TODO: this list is not complete
-            (r'\b(auto_increment|engine|charset|tables)\b', Keyword.Pseudo),
-            (r'(true|false|null)', Name.Constant),
-            (r'([a-z_]\w*)(\s*)(\()',
+
+            # Date literals
+            (r"\{\s*d\s*(?P<quote>['\"])\s*\d{2}(\d{2})?.?\d{2}.?\d{2}\s*(?P=quote)\s*\}", Literal.Date),
+
+            # Time literals
+            (r"\{\s*t\s*(?P<quote>['\"])\s*(?:\d+\s+)?\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?\s*(?P=quote)\s*\}", Literal.Date),
+
+            # Timestamp literals
+            (
+                r"\{\s*ts\s*(?P<quote>['\"])\s*"
+                r"\d{2}(?:\d{2})?.?\d{2}.?\d{2}"  # Date part
+                r"\s+"  # Whitespace between date and time
+                r"\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?"  # Time part
+                r"\s*(?P=quote)\s*\}",
+                Literal.Date
+            ),
+
+            # String literals
+            (r"'", String.Single, 'single-quoted-string'),
+            (r'"', String.Double, 'double-quoted-string'),
+
+            # Variables
+            (r'@@(?:global\.|persist\.|persist_only\.|session\.)?[a-z_]+', Name.Variable),
+            (r'@[a-z0-9_$.]+', Name.Variable),
+            (r"@'", Name.Variable, 'single-quoted-variable'),
+            (r'@"', Name.Variable, 'double-quoted-variable'),
+            (r"@`", Name.Variable, 'backtick-quoted-variable'),
+            (r'\?', Name.Variable),  # For demonstrating prepared statements
+
+            # Operators
+            (r'[!%&*+/:<=>^|~-]+', Operator),
+
+            # Exceptions; these words tokenize differently in different contexts.
+            (r'\b(set)(?!\s*\()', Keyword),
+            (r'\b(character)(\s+)(set)\b', bygroups(Keyword, Text, Keyword)),
+            # In all other known cases, "SET" is tokenized by MYSQL_DATATYPES.
+
+            (words(MYSQL_CONSTANTS, prefix=r'\b', suffix=r'\b'), Name.Constant),
+            (words(MYSQL_DATATYPES, prefix=r'\b', suffix=r'\b'), Keyword.Type),
+            (words(MYSQL_KEYWORDS, prefix=r'\b', suffix=r'\b'), Keyword),
+            (words(MYSQL_FUNCTIONS, prefix=r'\b', suffix=r'\b(\s*)(\()'),
              bygroups(Name.Function, Text, Punctuation)),
-            (r'[a-z_]\w*', Name),
-            (r'@[a-z0-9]*[._]*[a-z0-9]*', Name.Variable),
-            (r'[;:()\[\],.]', Punctuation)
+
+            # Schema object names
+            #
+            # Note: Although the first regex supports unquoted all-numeric
+            # identifiers, this will not be a problem in practice because
+            # numeric literals have already been handled above.
+            #
+            ('[0-9a-z$_\u0080-\uffff]+', Name),
+            (r'`', Name, 'schema-object-name'),
+
+            # Punctuation
+            (r'[(),.;]', Punctuation),
         ],
-        'multiline-comments': [
-            (r'/\*', Comment.Multiline, 'multiline-comments'),
+
+        # Multiline comment substates
+        # ---------------------------
+
+        'optimizer-hints': [
+            (r'[^*a-z]+', Comment.Special),
+            (r'\*/', Comment.Special, '#pop'),
+            (words(MYSQL_OPTIMIZER_HINTS, suffix=r'\b'), Comment.Preproc),
+            ('[a-z]+', Comment.Special),
+            (r'\*', Comment.Special),
+        ],
+
+        'multiline-comment': [
+            (r'[^*]+', Comment.Multiline),
             (r'\*/', Comment.Multiline, '#pop'),
-            (r'[^/*]+', Comment.Multiline),
-            (r'[/*]', Comment.Multiline)
-        ]
+            (r'\*', Comment.Multiline),
+        ],
+
+        # String substates
+        # ----------------
+
+        'single-quoted-string': [
+            (r"[^'\\]+", String.Single),
+            (r"''", String.Escape),
+            (r"""\\[0'"bnrtZ\\%_]""", String.Escape),
+            (r"'", String.Single, '#pop'),
+        ],
+
+        'double-quoted-string': [
+            (r'[^"\\]+', String.Double),
+            (r'""', String.Escape),
+            (r"""\\[0'"bnrtZ\\%_]""", String.Escape),
+            (r'"', String.Double, '#pop'),
+        ],
+
+        # Variable substates
+        # ------------------
+
+        'single-quoted-variable': [
+            (r"[^']+", Name.Variable),
+            (r"''", Name.Variable),
+            (r"'", Name.Variable, '#pop'),
+        ],
+
+        'double-quoted-variable': [
+            (r'[^"]+', Name.Variable),
+            (r'""', Name.Variable),
+            (r'"', Name.Variable, '#pop'),
+        ],
+
+        'backtick-quoted-variable': [
+            (r'[^`]+', Name.Variable),
+            (r'``', Name.Variable),
+            (r'`', Name.Variable, '#pop'),
+        ],
+
+        # Schema object name substates
+        # ----------------------------
+        #
+        # Backtick-quoted schema object names support escape characters.
+        # It may be desirable to tokenize escape sequences differently,
+        # but currently Pygments does not have an obvious token type for
+        # this unique situation (for example, "Name.Escape").
+        #
+        'schema-object-name': [
+            (r'[^`\\]+', Name),
+            (r'(?:\\\\|\\`|``)', Name),  # This could be an escaped name token type.
+            (r'`', Name, '#pop'),
+        ],
     }
 
     def analyse_text(text):
diff --git a/tests/examplefiles/mysql.txt b/tests/examplefiles/mysql.txt
new file mode 100644
index 00000000..4927abd8
--- /dev/null
+++ b/tests/examplefiles/mysql.txt
@@ -0,0 +1,132 @@
+-- Samples of MySQL parsing
+
+
+-- Comments
+# standalone comment line
+-- standalone comment line
+SELECT 1; -- trailing comment
+SELECT 1; # trailing comment
+SELECT 1; /* trailing comment */
+SELECT /* interruption */  /**/ 1;
+ /*
+    Multiline / * / comment
+ */
+ /* /* MySQL does not support nested comments */
+SELECT 'If this line is a comment then nested commenting is enabled (and therefore broken).';
+
+
+-- Optimizer hints
+SELECT /*+ SEMIJOIN(FIRSTMATCH, LOOSESCAN) */ 1;
+SELECT /*+ SET_VAR(foreign_key_checks=OFF) */ 1;
+
+
+-- Literals
+SELECT
+    -- Integers
+    123,
+
+    -- Floats
+    .123, 1.23, 123.,
+
+    -- Exponents
+    1e10, 1e-10, 1.e20, .1e-20,
+
+    -- Hexadecimal
+    X'0af019', x'0AF019', 0xaf019,
+
+    -- Binary
+    B'010', b'010', 0b010,
+
+    -- Temporal literals
+    {d'2020-01-01'}, { d ' 2020^01@01 ' },
+    {t'8 9:10:11'}, { t ' 09:10:11.12 ' }, { t ' 091011 ' },
+    {ts"2020-01-01 09:10:11"}, { ts ' 2020@01/01  09:10:11 ' },
+
+    -- Strings
+    '', 'abc', '1''2\03\%4\_5\\6\'7\"8',
+    "", "abc", "1""2\03\%4\_5\\6\'7\"8",
+;
+
+
+-- Variables
+SET @a = 1, @1 = 2, @._.$ = 3;
+SET @'?' = 1, @'abc''def"`ghi' = 2;
+SET @"#" = 1, @"abc""def'`ghi" = 2;
+SET @`^` = 1, @`abc``def'"ghi` = 2;
+SELECT
+    @@timestamp,
+    @@global.auto_increment_offset,
+    @@session.auto_increment_offset,
+    @@auto_increment_offset
+;
+
+
+-- Prepared statements
+SELECT POW(?, 3) AS cubed;
+
+
+-- Constants
+SELECT TRUE, FALSE, NULL, UNKNOWN;
+
+
+-- Data types
+CREATE TABLE table1 (
+    id INT AUTO_INCREMENT PRIMARY KEY,
+    name VARCHAR(20) NOT NULL,
+    birthyear YEAR
+);
+
+
+-- Keywords
+INSERT INTO table1 (person, birthyear) VALUES ('abc', 2020);
+
+WITH RECURSIVE example (n) AS (
+    SELECT 1
+    UNION ALL
+    SELECT n + 1 FROM example
+    WHERE n < 10
+)
+SELECT n FROM example;
+
+SELECT 17 MEMBER OF ('[23, "abc", 17, "ab", 10]');
+
+
+-- Functions
+SELECT CONCAT('function');
+SELECT MAX(quantity) FROM example;
+
+
+-- Schema object names
+CREATE TABLE basic (
+    example INT,
+    股票编号 INT,
+    `select` INT,
+    `concat(` INT
+);
+
+SELECT e1.`apple` AS a, `example2`.b
+FROM example1 AS e1
+JOIN example2 e2
+ON `example1`.`id` = e2.id;
+
+
+-- Operators
+SELECT 1 + 2 - 3 << 2;
+SELECT 1::DECIMAL(5, 2);
+SET @a = 1;
+SET a := 1;
+SELECT c->>'$.name' FROM example;
+
+
+
+-- Exceptions
+CREATE TABLE t1
+(
+    c1 VARCHAR(5) CHARACTER SET latin1,
+    c2 SET('r', 'g', 'b')
+);
+
+
+-- Introducers
+SELECT _latin1'abc';
+SELECT _binary'abc';
diff --git a/tests/test_mysql.py b/tests/test_mysql.py
new file mode 100644
index 00000000..9b5e2b8c
--- /dev/null
+++ b/tests/test_mysql.py
@@ -0,0 +1,249 @@
+# -*- coding: utf-8 -*-
+"""
+    Pygments MySQL lexer tests
+    ~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+    :copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+
+import pytest
+
+from pygments.lexers.sql import MySqlLexer
+
+from pygments.token import \
+    Comment, \
+    Keyword, \
+    Literal, \
+    Name, \
+    Number, \
+    Operator, \
+    Punctuation, \
+    String, \
+    Text
+
+
+@pytest.fixture(scope='module')
+def lexer():
+    yield MySqlLexer()
+
+
+@pytest.mark.parametrize('text', ('123',))
+def test_integer_literals(lexer, text):
+    assert list(lexer.get_tokens(text))[0] == (Number.Integer, text)
+
+
+@pytest.mark.parametrize(
+    'text',
+    (
+        '.123', '1.23', '123.',
+        '1e10', '1.0e10', '1.e-10', '.1e+10',
+    ),
+)
+def test_float_literals(lexer, text):
+    assert list(lexer.get_tokens(text))[0] == (Number.Float, text)
+
+
+@pytest.mark.parametrize('text', ("X'0af019'", "x'0AF019'", "0xaf019"))
+def test_hexadecimal_literals(lexer, text):
+    assert list(lexer.get_tokens(text))[0] == (Number.Hex, text)
+
+
+@pytest.mark.parametrize('text', ("B'010'", "b'010'", "0b010"))
+def test_binary_literals(lexer, text):
+    assert list(lexer.get_tokens(text))[0] == (Number.Bin, text)
+
+
+@pytest.mark.parametrize(
+    'text',
+    (
+        "{d'2020-01-01'}", "{ d ' 2020^01@01 ' }",
+        "{t'8 9:10:11'}", "{ t ' 09:10:11.12 ' }", "{ t ' 091011 ' }",
+        '{ts"2020-01-01 09:10:11"}', "{ ts ' 2020@01/01  09:10:11 ' }",
+    ),
+)
+def test_temporal_literals(lexer, text):
+    assert list(lexer.get_tokens(text))[0] == (Literal.Date, text)
+
+
+@pytest.mark.parametrize(
+    'text, expected_types',
+    (
+        (r"'a'", (String.Single,) * 3),
+        (r"""'""'""", (String.Single,) * 3),
+        (r"''''", (String.Single, String.Escape, String.Single)),
+        (r"'\''", (String.Single, String.Escape, String.Single)),
+        (r'"a"', (String.Double,) * 3),
+        (r'''"''"''', (String.Double,) * 3),
+        (r'""""', (String.Double, String.Escape, String.Double)),
+        (r'"\""', (String.Double, String.Escape, String.Double)),
+    ),
+)
+def test_string_literals(lexer, text, expected_types):
+    tokens = list(lexer.get_tokens(text))[:len(expected_types)]
+    assert all(t[0] == e for t, e in zip(tokens, expected_types))
+
+
+@pytest.mark.parametrize(
+    'text',
+    (
+        "@a", "@1", "@._.$",
+        "@'?'", """@'abc''def"`ghi'""",
+        '@"#"', '''@"abc""def'`ghi"''',
+        '@`^`', """@`abc``def'"ghi`""",
+        "@@timestamp",
+        "@@session.auto_increment_offset",
+        "@@global.auto_increment_offset",
+        "@@persist.auto_increment_offset",
+        "@@persist_only.auto_increment_offset",
+        '?',
+    ),
+)
+def test_variables(lexer, text):
+    tokens = list(lexer.get_tokens(text))
+    assert all(t[0] == Name.Variable for t in tokens[:-1])
+    assert ''.join([t[1] for t in tokens]).strip() == text.strip()
+
+
+@pytest.mark.parametrize('text', ('true', 'false', 'null', 'unknown'))
+def test_constants(lexer, text):
+    assert list(lexer.get_tokens(text))[0] == (Name.Constant, text)
+
+
+@pytest.mark.parametrize('text', ('-- abc', '--\tabc', '#abc'))
+def test_comments_single_line(lexer, text):
+    # Test the standalone comment.
+    tokens = list(lexer.get_tokens(text))
+    assert tokens[0] == (Comment.Single, text)
+
+    # Test the comment with mixed tokens.
+    tokens = list(lexer.get_tokens('select' + text + '\nselect'))
+    assert tokens[0] == (Keyword, 'select')
+    assert tokens[1] == (Comment.Single, text)
+    assert tokens[-2] == (Keyword, 'select')
+
+
+@pytest.mark.parametrize(
+    'text',
+    (
+        '/**/a', '/*a*b/c*/a', '/*\nabc\n*/a',
+        '/* /* */a'
+    )
+)
+def test_comments_multi_line(lexer, text):
+    tokens = list(lexer.get_tokens(text))
+    assert all(token[0] == Comment.Multiline for token in tokens[:-2])
+    assert ''.join(token[1] for token in tokens).strip() == text.strip()
+
+    # Validate nested comments are not supported.
+    assert tokens[-2][0] != Comment.Multiline
+
+
+@pytest.mark.parametrize(
+    'text', ('BKA', 'SEMIJOIN'))
+def test_optimizer_hints(lexer, text):
+    good = '/*+ ' + text + '(), */'
+    ignore = '/* ' + text + ' */'
+    bad1 = '/*+ a' + text + '() */'
+    bad2 = '/*+ ' + text + 'a */'
+    assert (Comment.Preproc, text) in lexer.get_tokens(good)
+    assert (Comment.Preproc, text) not in lexer.get_tokens(ignore)
+    assert (Comment.Preproc, text) not in lexer.get_tokens(bad1)
+    assert (Comment.Preproc, text) not in lexer.get_tokens(bad2)
+
+
+@pytest.mark.parametrize(
+    'text, expected_types',
+    (
+        # SET exceptions
+        ('SET', (Keyword,)),
+        ('SET abc = 1;', (Keyword,)),
+        ('SET @abc = 1;', (Keyword,)),
+        ('CHARACTER SET latin1', (Keyword, Text, Keyword)),
+        ('SET("r", "g", "b")', (Keyword.Type, Punctuation)),
+        ('SET ("r", "g", "b")', (Keyword.Type, Text, Punctuation)),
+    ),
+)
+def test_exceptions(lexer, text, expected_types):
+    tokens = list(lexer.get_tokens(text))[:len(expected_types)]
+    assert all(t[0] == e for t, e in zip(tokens, expected_types))
+
+
+@pytest.mark.parametrize(
+    'text',
+    (
+        'SHOW', 'CREATE', 'ALTER', 'DROP',
+        'SELECT', 'INSERT', 'UPDATE', 'DELETE',
+        'WHERE', 'GROUP', 'ORDER', 'BY', 'AS',
+        'DISTINCT', 'JOIN', 'WITH', 'RECURSIVE',
+        'PARTITION', 'NTILE', 'MASTER_PASSWORD', 'XA',
+        'REQUIRE_TABLE_PRIMARY_KEY_CHECK', 'STREAM',
+    ),
+)
+def test_keywords(lexer, text):
+    assert list(lexer.get_tokens(text))[0] == (Keyword, text)
+
+
+@pytest.mark.parametrize(
+    'text',
+    (
+        # Standard
+        'INT(', 'VARCHAR(', 'ENUM(', 'DATETIME', 'GEOMETRY', 'POINT', 'JSON',
+        # Aliases and compatibility
+        'FIXED', 'MEDIUMINT', 'INT3', 'REAL', 'SERIAL',
+        'LONG', 'NATIONAL', 'PRECISION', 'VARYING',
+    ),
+)
+def test_data_types(lexer, text):
+    assert list(lexer.get_tokens(text))[0] == (Keyword.Type, text.strip('('))
+
+
+@pytest.mark.parametrize(
+    'text',
+    (
+        # Common
+        'CAST', 'CONCAT_WS', 'DAYNAME', 'IFNULL', 'NOW', 'SUBSTR',
+        # Less common
+        'CAN_ACCESS_COLUMN', 'JSON_CONTAINS_PATH', 'ST_GEOMFROMGEOJSON',
+    ),
+)
+def test_functions(lexer, text):
+    assert list(lexer.get_tokens(text + '('))[0] == (Name.Function, text)
+    assert list(lexer.get_tokens(text + ' ('))[0] == (Name.Function, text)
+
+
+@pytest.mark.parametrize(
+    'text',
+    (
+        'abc_$123', '上市年限', 'ひらがな',
+        '`a`', '`上市年限`', '`ひらがな`', '`select`', '`concat(`',
+        '````', r'`\``', r'`\\`',
+        '`-- `', '`/*`', '`#`',
+    ),
+)
+def test_schema_object_names(lexer, text):
+    tokens = list(lexer.get_tokens(text))[:-1]
+    assert all(token[0] == Name for token in tokens)
+    assert ''.join(token[1] for token in tokens) == text
+
+
+@pytest.mark.parametrize(
+    'text',
+    ('+', '*', '/', '%', '&&', ':=', '!', '<', '->>', '^', '|', '~'),
+)
+def test_operators(lexer, text):
+    assert list(lexer.get_tokens(text))[0] == (Operator, text)
+
+
+@pytest.mark.parametrize(
+    'text, expected_types',
+    (
+        ('abc.efg', (Name, Punctuation, Name)),
+        ('abc,efg', (Name, Punctuation, Name)),
+        ('MAX(abc)', (Name.Function, Punctuation, Name, Punctuation)),
+        ('efg;', (Name, Punctuation)),
+    ),
+)
+def test_punctuation(lexer, text, expected_types):
+    tokens = list(lexer.get_tokens(text))[:len(expected_types)]
+    assert all(t[0] == e for t, e in zip(tokens, expected_types))