diff options
-rw-r--r-- | AUTHORS | 2 | ||||
-rw-r--r-- | pygments/lexers/_mysql_builtins.py | 1282 | ||||
-rw-r--r-- | pygments/lexers/sql.py | 209 | ||||
-rw-r--r-- | tests/examplefiles/mysql.txt | 132 | ||||
-rw-r--r-- | tests/test_mysql.py | 249 |
5 files changed, 1817 insertions, 57 deletions
@@ -138,7 +138,7 @@ Other contributors, listed alphabetically, are: * Stephen McKamey -- Duel/JBST lexer * Brian McKenna -- F# lexer * Charles McLaughlin -- Puppet lexer -* Kurt McKee -- Tera Term macro lexer, PostgreSQL updates +* Kurt McKee -- Tera Term macro lexer, PostgreSQL updates, MySQL overhaul * Lukas Meuser -- BBCode formatter, Lua lexer * Cat Miller -- Pig lexer * Paul Miller -- LiveScript lexer diff --git a/pygments/lexers/_mysql_builtins.py b/pygments/lexers/_mysql_builtins.py new file mode 100644 index 00000000..121054c3 --- /dev/null +++ b/pygments/lexers/_mysql_builtins.py @@ -0,0 +1,1282 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers._mysql_builtins + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Self-updating data files for the MySQL lexer. + + :copyright: Copyright 2020 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + + +MYSQL_CONSTANTS = ( + 'false', + 'null', + 'true', + 'unknown', +) + + +# At this time, no easily-parsed, definitive list of data types +# has been found in the MySQL source code or documentation. (The +# `sql/sql_yacc.yy` file is definitive but is difficult to parse.) +# Therefore these types are currently maintained manually. +# +# Some words in this list -- like "long", "national", "precision", +# and "varying" -- appear to only occur in combination with other +# data type keywords. Therefore they are included as separate words +# even though they do not naturally occur in syntax separately. +# +# This list is also used to strip data types out of the list of +# MySQL keywords, which is automatically updated later in the file. +# +MYSQL_DATATYPES = ( + # Numeric data types + 'bigint', + 'bit', + 'bool', + 'boolean', + 'dec', + 'decimal', + 'double', + 'fixed', + 'float', + 'float4', + 'float8', + 'int', + 'int1', + 'int2', + 'int3', + 'int4', + 'int8', + 'integer', + 'mediumint', + 'middleint', + 'numeric', + 'precision', + 'real', + 'serial', + 'smallint', + 'tinyint', + + # Date and time data types + 'date', + 'datetime', + 'time', + 'timestamp', + 'year', + + # String data types + 'binary', + 'blob', + 'char', + 'enum', + 'long', + 'longblob', + 'longtext', + 'mediumblob', + 'mediumtext', + 'national', + 'nchar', + 'nvarchar', + 'set', + 'text', + 'tinyblob', + 'tinytext', + 'varbinary', + 'varchar', + 'varcharacter', + 'varying', + + # Spatial data types + 'geometry', + 'geometrycollection', + 'linestring', + 'multilinestring', + 'multipoint', + 'multipolygon', + 'point', + 'polygon', + + # JSON data types + 'json', +) + +# Everything below this line is auto-generated from the MySQL source code. +# Run this file in Python and it will update itself. +# ----------------------------------------------------------------------------- + +MYSQL_FUNCTIONS = ( + 'abs', + 'acos', + 'adddate', + 'addtime', + 'aes_decrypt', + 'aes_encrypt', + 'any_value', + 'asin', + 'atan', + 'atan2', + 'benchmark', + 'bin', + 'bin_to_uuid', + 'bit_and', + 'bit_count', + 'bit_length', + 'bit_or', + 'bit_xor', + 'can_access_column', + 'can_access_database', + 'can_access_event', + 'can_access_resource_group', + 'can_access_routine', + 'can_access_table', + 'can_access_trigger', + 'can_access_view', + 'cast', + 'ceil', + 'ceiling', + 'char_length', + 'character_length', + 'coercibility', + 'compress', + 'concat', + 'concat_ws', + 'connection_id', + 'conv', + 'convert_cpu_id_mask', + 'convert_interval_to_user_interval', + 'convert_tz', + 'cos', + 'cot', + 'count', + 'crc32', + 'curdate', + 'current_role', + 'curtime', + 'date_add', + 'date_format', + 'date_sub', + 'datediff', + 'dayname', + 'dayofmonth', + 'dayofweek', + 'dayofyear', + 'degrees', + 'elt', + 'exp', + 'export_set', + 'extract', + 'extractvalue', + 'field', + 'find_in_set', + 'floor', + 'format_bytes', + 'format_pico_time', + 'found_rows', + 'from_base64', + 'from_days', + 'from_unixtime', + 'get_dd_column_privileges', + 'get_dd_create_options', + 'get_dd_index_private_data', + 'get_dd_index_sub_part_length', + 'get_dd_property_key_value', + 'get_dd_tablespace_private_data', + 'get_lock', + 'greatest', + 'group_concat', + 'gtid_subset', + 'gtid_subtract', + 'hex', + 'icu_version', + 'ifnull', + 'inet6_aton', + 'inet6_ntoa', + 'inet_aton', + 'inet_ntoa', + 'instr', + 'internal_auto_increment', + 'internal_avg_row_length', + 'internal_check_time', + 'internal_checksum', + 'internal_data_free', + 'internal_data_length', + 'internal_dd_char_length', + 'internal_get_comment_or_error', + 'internal_get_dd_column_extra', + 'internal_get_enabled_role_json', + 'internal_get_hostname', + 'internal_get_mandatory_roles_json', + 'internal_get_partition_nodegroup', + 'internal_get_username', + 'internal_get_view_warning_or_error', + 'internal_index_column_cardinality', + 'internal_index_length', + 'internal_is_enabled_role', + 'internal_is_mandatory_role', + 'internal_keys_disabled', + 'internal_max_data_length', + 'internal_table_rows', + 'internal_tablespace_autoextend_size', + 'internal_tablespace_data_free', + 'internal_tablespace_extent_size', + 'internal_tablespace_extra', + 'internal_tablespace_free_extents', + 'internal_tablespace_id', + 'internal_tablespace_initial_size', + 'internal_tablespace_logfile_group_name', + 'internal_tablespace_logfile_group_number', + 'internal_tablespace_maximum_size', + 'internal_tablespace_row_format', + 'internal_tablespace_status', + 'internal_tablespace_total_extents', + 'internal_tablespace_type', + 'internal_tablespace_version', + 'internal_update_time', + 'is_free_lock', + 'is_ipv4', + 'is_ipv4_compat', + 'is_ipv4_mapped', + 'is_ipv6', + 'is_used_lock', + 'is_uuid', + 'is_visible_dd_object', + 'isnull', + 'json_array', + 'json_array_append', + 'json_array_insert', + 'json_arrayagg', + 'json_contains', + 'json_contains_path', + 'json_depth', + 'json_extract', + 'json_insert', + 'json_keys', + 'json_length', + 'json_merge', + 'json_merge_patch', + 'json_merge_preserve', + 'json_object', + 'json_objectagg', + 'json_overlaps', + 'json_pretty', + 'json_quote', + 'json_remove', + 'json_replace', + 'json_schema_valid', + 'json_schema_validation_report', + 'json_search', + 'json_set', + 'json_storage_free', + 'json_storage_size', + 'json_type', + 'json_unquote', + 'json_valid', + 'last_day', + 'last_insert_id', + 'lcase', + 'least', + 'length', + 'like_range_max', + 'like_range_min', + 'ln', + 'load_file', + 'locate', + 'log', + 'log10', + 'log2', + 'lower', + 'lpad', + 'ltrim', + 'make_set', + 'makedate', + 'maketime', + 'master_pos_wait', + 'max', + 'mbrcontains', + 'mbrcoveredby', + 'mbrcovers', + 'mbrdisjoint', + 'mbrequals', + 'mbrintersects', + 'mbroverlaps', + 'mbrtouches', + 'mbrwithin', + 'md5', + 'mid', + 'min', + 'monthname', + 'name_const', + 'now', + 'nullif', + 'oct', + 'octet_length', + 'ord', + 'period_add', + 'period_diff', + 'pi', + 'position', + 'pow', + 'power', + 'ps_current_thread_id', + 'ps_thread_id', + 'quote', + 'radians', + 'rand', + 'random_bytes', + 'regexp_instr', + 'regexp_like', + 'regexp_replace', + 'regexp_substr', + 'release_all_locks', + 'release_lock', + 'remove_dd_property_key', + 'reverse', + 'roles_graphml', + 'round', + 'rpad', + 'rtrim', + 'sec_to_time', + 'session_user', + 'sha', + 'sha1', + 'sha2', + 'sign', + 'sin', + 'sleep', + 'soundex', + 'space', + 'sqrt', + 'st_area', + 'st_asbinary', + 'st_asgeojson', + 'st_astext', + 'st_aswkb', + 'st_aswkt', + 'st_buffer', + 'st_buffer_strategy', + 'st_centroid', + 'st_contains', + 'st_convexhull', + 'st_crosses', + 'st_difference', + 'st_dimension', + 'st_disjoint', + 'st_distance', + 'st_distance_sphere', + 'st_endpoint', + 'st_envelope', + 'st_equals', + 'st_exteriorring', + 'st_geohash', + 'st_geomcollfromtext', + 'st_geomcollfromtxt', + 'st_geomcollfromwkb', + 'st_geometrycollectionfromtext', + 'st_geometrycollectionfromwkb', + 'st_geometryfromtext', + 'st_geometryfromwkb', + 'st_geometryn', + 'st_geometrytype', + 'st_geomfromgeojson', + 'st_geomfromtext', + 'st_geomfromwkb', + 'st_interiorringn', + 'st_intersection', + 'st_intersects', + 'st_isclosed', + 'st_isempty', + 'st_issimple', + 'st_isvalid', + 'st_latfromgeohash', + 'st_latitude', + 'st_length', + 'st_linefromtext', + 'st_linefromwkb', + 'st_linestringfromtext', + 'st_linestringfromwkb', + 'st_longfromgeohash', + 'st_longitude', + 'st_makeenvelope', + 'st_mlinefromtext', + 'st_mlinefromwkb', + 'st_mpointfromtext', + 'st_mpointfromwkb', + 'st_mpolyfromtext', + 'st_mpolyfromwkb', + 'st_multilinestringfromtext', + 'st_multilinestringfromwkb', + 'st_multipointfromtext', + 'st_multipointfromwkb', + 'st_multipolygonfromtext', + 'st_multipolygonfromwkb', + 'st_numgeometries', + 'st_numinteriorring', + 'st_numinteriorrings', + 'st_numpoints', + 'st_overlaps', + 'st_pointfromgeohash', + 'st_pointfromtext', + 'st_pointfromwkb', + 'st_pointn', + 'st_polyfromtext', + 'st_polyfromwkb', + 'st_polygonfromtext', + 'st_polygonfromwkb', + 'st_simplify', + 'st_srid', + 'st_startpoint', + 'st_swapxy', + 'st_symdifference', + 'st_touches', + 'st_transform', + 'st_union', + 'st_validate', + 'st_within', + 'st_x', + 'st_y', + 'statement_digest', + 'statement_digest_text', + 'std', + 'stddev', + 'stddev_pop', + 'stddev_samp', + 'str_to_date', + 'strcmp', + 'subdate', + 'substr', + 'substring', + 'substring_index', + 'subtime', + 'sum', + 'sysdate', + 'system_user', + 'tan', + 'time_format', + 'time_to_sec', + 'timediff', + 'to_base64', + 'to_days', + 'to_seconds', + 'trim', + 'ucase', + 'uncompress', + 'uncompressed_length', + 'unhex', + 'unix_timestamp', + 'updatexml', + 'upper', + 'uuid', + 'uuid_short', + 'uuid_to_bin', + 'validate_password_strength', + 'var_pop', + 'var_samp', + 'variance', + 'version', + 'wait_for_executed_gtid_set', + 'wait_until_sql_thread_after_gtids', + 'weekday', + 'weekofyear', + 'yearweek', +) + + +MYSQL_OPTIMIZER_HINTS = ( + 'bka', + 'bnl', + 'dupsweedout', + 'firstmatch', + 'group_index', + 'hash_join', + 'index', + 'index_merge', + 'intoexists', + 'join_fixed_order', + 'join_index', + 'join_order', + 'join_prefix', + 'join_suffix', + 'loosescan', + 'materialization', + 'max_execution_time', + 'merge', + 'mrr', + 'no_bka', + 'no_bnl', + 'no_group_index', + 'no_hash_join', + 'no_icp', + 'no_index', + 'no_index_merge', + 'no_join_index', + 'no_merge', + 'no_mrr', + 'no_order_index', + 'no_range_optimization', + 'no_semijoin', + 'no_skip_scan', + 'order_index', + 'qb_name', + 'resource_group', + 'semijoin', + 'set_var', + 'skip_scan', + 'subquery', +) + + +MYSQL_KEYWORDS = ( + 'accessible', + 'account', + 'action', + 'active', + 'add', + 'admin', + 'after', + 'against', + 'aggregate', + 'algorithm', + 'all', + 'alter', + 'always', + 'analyze', + 'and', + 'any', + 'array', + 'as', + 'asc', + 'ascii', + 'asensitive', + 'at', + 'attribute', + 'auto_increment', + 'autoextend_size', + 'avg', + 'avg_row_length', + 'backup', + 'before', + 'begin', + 'between', + 'binlog', + 'block', + 'both', + 'btree', + 'buckets', + 'by', + 'byte', + 'cache', + 'call', + 'cascade', + 'cascaded', + 'case', + 'catalog_name', + 'chain', + 'change', + 'changed', + 'channel', + 'character', + 'charset', + 'check', + 'checksum', + 'cipher', + 'class_origin', + 'client', + 'clone', + 'close', + 'coalesce', + 'code', + 'collate', + 'collation', + 'column', + 'column_format', + 'column_name', + 'columns', + 'comment', + 'commit', + 'committed', + 'compact', + 'completion', + 'component', + 'compressed', + 'compression', + 'concurrent', + 'condition', + 'connection', + 'consistent', + 'constraint', + 'constraint_catalog', + 'constraint_name', + 'constraint_schema', + 'contains', + 'context', + 'continue', + 'convert', + 'cpu', + 'create', + 'cross', + 'cube', + 'cume_dist', + 'current', + 'current_date', + 'current_time', + 'current_timestamp', + 'current_user', + 'cursor', + 'cursor_name', + 'data', + 'database', + 'databases', + 'datafile', + 'day', + 'day_hour', + 'day_microsecond', + 'day_minute', + 'day_second', + 'deallocate', + 'declare', + 'default', + 'default_auth', + 'definer', + 'definition', + 'delay_key_write', + 'delayed', + 'delete', + 'dense_rank', + 'desc', + 'describe', + 'description', + 'deterministic', + 'diagnostics', + 'directory', + 'disable', + 'discard', + 'disk', + 'distinct', + 'distinctrow', + 'div', + 'do', + 'drop', + 'dual', + 'dumpfile', + 'duplicate', + 'dynamic', + 'each', + 'else', + 'elseif', + 'empty', + 'enable', + 'enclosed', + 'encryption', + 'end', + 'ends', + 'enforced', + 'engine', + 'engine_attribute', + 'engines', + 'error', + 'errors', + 'escape', + 'escaped', + 'event', + 'events', + 'every', + 'except', + 'exchange', + 'exclude', + 'execute', + 'exists', + 'exit', + 'expansion', + 'expire', + 'explain', + 'export', + 'extended', + 'extent_size', + 'failed_login_attempts', + 'false', + 'fast', + 'faults', + 'fetch', + 'fields', + 'file', + 'file_block_size', + 'filter', + 'first', + 'first_value', + 'flush', + 'following', + 'follows', + 'for', + 'force', + 'foreign', + 'format', + 'found', + 'from', + 'full', + 'fulltext', + 'function', + 'general', + 'generated', + 'geomcollection', + 'get', + 'get_format', + 'get_master_public_key', + 'global', + 'grant', + 'grants', + 'group', + 'group_replication', + 'grouping', + 'groups', + 'handler', + 'hash', + 'having', + 'help', + 'high_priority', + 'histogram', + 'history', + 'host', + 'hosts', + 'hour', + 'hour_microsecond', + 'hour_minute', + 'hour_second', + 'identified', + 'if', + 'ignore', + 'ignore_server_ids', + 'import', + 'in', + 'inactive', + 'index', + 'indexes', + 'infile', + 'initial_size', + 'inner', + 'inout', + 'insensitive', + 'insert', + 'insert_method', + 'install', + 'instance', + 'interval', + 'into', + 'invisible', + 'invoker', + 'io', + 'io_after_gtids', + 'io_before_gtids', + 'io_thread', + 'ipc', + 'is', + 'isolation', + 'issuer', + 'iterate', + 'join', + 'json_table', + 'json_value', + 'key', + 'key_block_size', + 'keys', + 'kill', + 'lag', + 'language', + 'last', + 'last_value', + 'lateral', + 'lead', + 'leading', + 'leave', + 'leaves', + 'left', + 'less', + 'level', + 'like', + 'limit', + 'linear', + 'lines', + 'list', + 'load', + 'local', + 'localtime', + 'localtimestamp', + 'lock', + 'locked', + 'locks', + 'logfile', + 'logs', + 'loop', + 'low_priority', + 'master', + 'master_auto_position', + 'master_bind', + 'master_compression_algorithms', + 'master_connect_retry', + 'master_delay', + 'master_heartbeat_period', + 'master_host', + 'master_log_file', + 'master_log_pos', + 'master_password', + 'master_port', + 'master_public_key_path', + 'master_retry_count', + 'master_server_id', + 'master_ssl', + 'master_ssl_ca', + 'master_ssl_capath', + 'master_ssl_cert', + 'master_ssl_cipher', + 'master_ssl_crl', + 'master_ssl_crlpath', + 'master_ssl_key', + 'master_ssl_verify_server_cert', + 'master_tls_ciphersuites', + 'master_tls_version', + 'master_user', + 'master_zstd_compression_level', + 'match', + 'max_connections_per_hour', + 'max_queries_per_hour', + 'max_rows', + 'max_size', + 'max_updates_per_hour', + 'max_user_connections', + 'maxvalue', + 'medium', + 'member', + 'memory', + 'merge', + 'message_text', + 'microsecond', + 'migrate', + 'min_rows', + 'minute', + 'minute_microsecond', + 'minute_second', + 'mod', + 'mode', + 'modifies', + 'modify', + 'month', + 'mutex', + 'mysql_errno', + 'name', + 'names', + 'natural', + 'ndb', + 'ndbcluster', + 'nested', + 'network_namespace', + 'never', + 'new', + 'next', + 'no', + 'no_wait', + 'no_write_to_binlog', + 'nodegroup', + 'none', + 'not', + 'nowait', + 'nth_value', + 'ntile', + 'null', + 'nulls', + 'number', + 'of', + 'off', + 'offset', + 'oj', + 'old', + 'on', + 'one', + 'only', + 'open', + 'optimize', + 'optimizer_costs', + 'option', + 'optional', + 'optionally', + 'options', + 'or', + 'order', + 'ordinality', + 'organization', + 'others', + 'out', + 'outer', + 'outfile', + 'over', + 'owner', + 'pack_keys', + 'page', + 'parser', + 'partial', + 'partition', + 'partitioning', + 'partitions', + 'password', + 'password_lock_time', + 'path', + 'percent_rank', + 'persist', + 'persist_only', + 'phase', + 'plugin', + 'plugin_dir', + 'plugins', + 'port', + 'precedes', + 'preceding', + 'prepare', + 'preserve', + 'prev', + 'primary', + 'privilege_checks_user', + 'privileges', + 'procedure', + 'process', + 'processlist', + 'profile', + 'profiles', + 'proxy', + 'purge', + 'quarter', + 'query', + 'quick', + 'random', + 'range', + 'rank', + 'read', + 'read_only', + 'read_write', + 'reads', + 'rebuild', + 'recover', + 'recursive', + 'redo_buffer_size', + 'redundant', + 'reference', + 'references', + 'regexp', + 'relay', + 'relay_log_file', + 'relay_log_pos', + 'relay_thread', + 'relaylog', + 'release', + 'reload', + 'remove', + 'rename', + 'reorganize', + 'repair', + 'repeat', + 'repeatable', + 'replace', + 'replicate_do_db', + 'replicate_do_table', + 'replicate_ignore_db', + 'replicate_ignore_table', + 'replicate_rewrite_db', + 'replicate_wild_do_table', + 'replicate_wild_ignore_table', + 'replication', + 'require', + 'require_row_format', + 'require_table_primary_key_check', + 'reset', + 'resignal', + 'resource', + 'respect', + 'restart', + 'restore', + 'restrict', + 'resume', + 'retain', + 'return', + 'returned_sqlstate', + 'returning', + 'returns', + 'reuse', + 'reverse', + 'revoke', + 'right', + 'rlike', + 'role', + 'rollback', + 'rollup', + 'rotate', + 'routine', + 'row', + 'row_count', + 'row_format', + 'row_number', + 'rows', + 'rtree', + 'savepoint', + 'schedule', + 'schema', + 'schema_name', + 'schemas', + 'second', + 'second_microsecond', + 'secondary', + 'secondary_engine', + 'secondary_engine_attribute', + 'secondary_load', + 'secondary_unload', + 'security', + 'select', + 'sensitive', + 'separator', + 'serializable', + 'server', + 'session', + 'share', + 'show', + 'shutdown', + 'signal', + 'signed', + 'simple', + 'skip', + 'slave', + 'slow', + 'snapshot', + 'socket', + 'some', + 'soname', + 'sounds', + 'source', + 'spatial', + 'specific', + 'sql', + 'sql_after_gtids', + 'sql_after_mts_gaps', + 'sql_before_gtids', + 'sql_big_result', + 'sql_buffer_result', + 'sql_calc_found_rows', + 'sql_no_cache', + 'sql_small_result', + 'sql_thread', + 'sql_tsi_day', + 'sql_tsi_hour', + 'sql_tsi_minute', + 'sql_tsi_month', + 'sql_tsi_quarter', + 'sql_tsi_second', + 'sql_tsi_week', + 'sql_tsi_year', + 'sqlexception', + 'sqlstate', + 'sqlwarning', + 'srid', + 'ssl', + 'stacked', + 'start', + 'starting', + 'starts', + 'stats_auto_recalc', + 'stats_persistent', + 'stats_sample_pages', + 'status', + 'stop', + 'storage', + 'stored', + 'straight_join', + 'stream', + 'string', + 'subclass_origin', + 'subject', + 'subpartition', + 'subpartitions', + 'super', + 'suspend', + 'swaps', + 'switches', + 'system', + 'table', + 'table_checksum', + 'table_name', + 'tables', + 'tablespace', + 'temporary', + 'temptable', + 'terminated', + 'than', + 'then', + 'thread_priority', + 'ties', + 'timestampadd', + 'timestampdiff', + 'tls', + 'to', + 'trailing', + 'transaction', + 'trigger', + 'triggers', + 'true', + 'truncate', + 'type', + 'types', + 'unbounded', + 'uncommitted', + 'undefined', + 'undo', + 'undo_buffer_size', + 'undofile', + 'unicode', + 'uninstall', + 'union', + 'unique', + 'unknown', + 'unlock', + 'unsigned', + 'until', + 'update', + 'upgrade', + 'usage', + 'use', + 'use_frm', + 'user', + 'user_resources', + 'using', + 'utc_date', + 'utc_time', + 'utc_timestamp', + 'validation', + 'value', + 'values', + 'variables', + 'vcpu', + 'view', + 'virtual', + 'visible', + 'wait', + 'warnings', + 'week', + 'weight_string', + 'when', + 'where', + 'while', + 'window', + 'with', + 'without', + 'work', + 'wrapper', + 'write', + 'x509', + 'xa', + 'xid', + 'xml', + 'xor', + 'year_month', + 'zerofill', +) + + +if __name__ == '__main__': # pragma: no cover + import re + from urllib.request import urlopen + + from pygments.util import format_lines + + # MySQL source code + SOURCE_URL = 'https://github.com/mysql/mysql-server/raw/8.0' + LEX_URL = SOURCE_URL + '/sql/lex.h' + ITEM_CREATE_URL = SOURCE_URL + '/sql/item_create.cc' + + + def update_myself(): + # Pull content from lex.h. + lex_file = urlopen(LEX_URL).read().decode('utf8', errors='ignore') + keywords = parse_lex_keywords(lex_file) + functions = parse_lex_functions(lex_file) + optimizer_hints = parse_lex_optimizer_hints(lex_file) + + # Parse content in item_create.cc. + item_create_file = urlopen(ITEM_CREATE_URL).read().decode('utf8', errors='ignore') + functions.update(parse_item_create_functions(item_create_file)) + + # Remove data types from the set of keywords. + keywords -= set(MYSQL_DATATYPES) + + update_content('MYSQL_FUNCTIONS', tuple(sorted(functions))) + update_content('MYSQL_KEYWORDS', tuple(sorted(keywords))) + update_content('MYSQL_OPTIMIZER_HINTS', tuple(sorted(optimizer_hints))) + + + def parse_lex_keywords(f): + """Parse keywords in lex.h.""" + + results = set() + for m in re.finditer(r'{SYM(?:_HK)?\("(?P<keyword>[a-z0-9_]+)",', f, flags=re.I): + results.add(m.group('keyword').lower()) + + if not results: + raise ValueError('No keywords found') + + return results + + + def parse_lex_optimizer_hints(f): + """Parse optimizer hints in lex.h.""" + + results = set() + for m in re.finditer(r'{SYM_H\("(?P<keyword>[a-z0-9_]+)",', f, flags=re.I): + results.add(m.group('keyword').lower()) + + if not results: + raise ValueError('No optimizer hints found') + + return results + + + def parse_lex_functions(f): + """Parse MySQL function names from lex.h.""" + + results = set() + for m in re.finditer(r'{SYM_FN?\("(?P<function>[a-z0-9_]+)",', f, flags=re.I): + results.add(m.group('function').lower()) + + if not results: + raise ValueError('No lex functions found') + + return results + + + def parse_item_create_functions(f): + """Parse MySQL function names from item_create.cc.""" + + results = set() + for m in re.finditer(r'{"(?P<function>[^"]+?)",\s*SQL_F[^(]+?\(', f, flags=re.I): + results.add(m.group('function').lower()) + + if not results: + raise ValueError('No item_create functions found') + + return results + + + def update_content(field_name, content): + """Overwrite this file with content parsed from MySQL's source code.""" + + with open(__file__) as f: + data = f.read() + + # Line to start/end inserting + re_match = re.compile(r'^%s\s*=\s*\($.*?^\s*\)$' % field_name, re.M | re.S) + m = re_match.search(data) + if not m: + raise ValueError('Could not find an existing definition for %s' % field_name) + + new_block = format_lines(field_name, content) + data = data[:m.start()] + new_block + data[m.end():] + + with open(__file__, 'w', newline='\n') as f: + f.write(data) + + update_myself() diff --git a/pygments/lexers/sql.py b/pygments/lexers/sql.py index d58abe2a..7cce4d40 100644 --- a/pygments/lexers/sql.py +++ b/pygments/lexers/sql.py @@ -42,11 +42,18 @@ import re from pygments.lexer import Lexer, RegexLexer, do_insertions, bygroups, words from pygments.token import Punctuation, Whitespace, Text, Comment, Operator, \ - Keyword, Name, String, Number, Generic + Keyword, Name, String, Number, Generic, Literal from pygments.lexers import get_lexer_by_name, ClassNotFound from pygments.lexers._postgres_builtins import KEYWORDS, DATATYPES, \ PSEUDO_TYPES, PLPGSQL_KEYWORDS +from pygments.lexers._mysql_builtins import \ + MYSQL_CONSTANTS, \ + MYSQL_DATATYPES, \ + MYSQL_FUNCTIONS, \ + MYSQL_KEYWORDS, \ + MYSQL_OPTIMIZER_HINTS + from pygments.lexers import _tsql_builtins @@ -579,8 +586,12 @@ class TransactSqlLexer(RegexLexer): class MySqlLexer(RegexLexer): - """ - Special lexer for MySQL. + """The Oracle MySQL lexer. + + This lexer does not attempt to maintain strict compatibility with + MariaDB syntax or keywords. Although MySQL and MariaDB's common code + history suggests there may be significant overlap between the two, + compatibility between the two is not a target for this lexer. """ name = 'MySQL' @@ -591,63 +602,149 @@ class MySqlLexer(RegexLexer): tokens = { 'root': [ (r'\s+', Text), - (r'(#|--\s+).*\n?', Comment.Single), - (r'/\*', Comment.Multiline, 'multiline-comments'), + + # Comments + (r'(?:#|--\s+).*', Comment.Single), + (r'/\*\+', Comment.Special, 'optimizer-hints'), + (r'/\*', Comment.Multiline, 'multiline-comment'), + + # Hexadecimal literals + (r"x'([0-9a-f]{2})+'", Number.Hex), # MySQL requires paired hex characters in this form. + (r'0x[0-9a-f]+', Number.Hex), + + # Binary literals + (r"b'[01]+'", Number.Bin), + (r'0b[01]+', Number.Bin), + + # Numeric literals + (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float), # Mandatory integer, optional fraction and exponent + (r'[0-9]*\.[0-9]+(e[+-]?[0-9]+)?', Number.Float), # Mandatory fraction, optional integer and exponent + (r'[0-9]+e[+-]?[0-9]+', Number.Float), # Exponents with integer significands are still floats (r'[0-9]+', Number.Integer), - (r'[0-9]*\.[0-9]+(e[+-][0-9]+)', Number.Float), - (r"'(\\\\|\\'|''|[^'])*'", String.Single), - (r'"(\\\\|\\"|""|[^"])*"', String.Double), - (r"`(\\\\|\\`|``|[^`])*`", String.Symbol), - (r'[+*/<>=~!@#%^&|`?-]', Operator), - (r'\b(tinyint|smallint|mediumint|int|integer|bigint|date|' - r'datetime|time|bit|bool|tinytext|mediumtext|longtext|text|' - r'tinyblob|mediumblob|longblob|blob|float|double|double\s+' - r'precision|real|numeric|dec|decimal|timestamp|year|char|' - r'varchar|varbinary|varcharacter|enum|set)(\b\s*)(\()?', - bygroups(Keyword.Type, Text, Punctuation)), - (r'\b(add|all|alter|analyze|and|as|asc|asensitive|before|between|' - r'bigint|binary|blob|both|by|call|cascade|case|change|char|' - r'character|check|collate|column|condition|constraint|continue|' - r'convert|create|cross|current_date|current_time|' - r'current_timestamp|current_user|cursor|database|databases|' - r'day_hour|day_microsecond|day_minute|day_second|dec|decimal|' - r'declare|default|delayed|delete|desc|describe|deterministic|' - r'distinct|distinctrow|div|double|drop|dual|each|else|elseif|' - r'enclosed|escaped|exists|exit|explain|fetch|flush|float|float4|' - r'float8|for|force|foreign|from|fulltext|grant|group|having|' - r'high_priority|hour_microsecond|hour_minute|hour_second|if|' - r'ignore|in|index|infile|inner|inout|insensitive|insert|int|' - r'int1|int2|int3|int4|int8|integer|interval|into|is|iterate|' - r'join|key|keys|kill|leading|leave|left|like|limit|lines|load|' - r'localtime|localtimestamp|lock|long|loop|low_priority|match|' - r'minute_microsecond|minute_second|mod|modifies|natural|' - r'no_write_to_binlog|not|numeric|on|optimize|option|optionally|' - r'or|order|out|outer|outfile|precision|primary|procedure|purge|' - r'raid0|read|reads|real|references|regexp|release|rename|repeat|' - r'replace|require|restrict|return|revoke|right|rlike|schema|' - r'schemas|second_microsecond|select|sensitive|separator|set|' - r'show|smallint|soname|spatial|specific|sql|sql_big_result|' - r'sql_calc_found_rows|sql_small_result|sqlexception|sqlstate|' - r'sqlwarning|ssl|starting|straight_join|table|terminated|then|' - r'to|trailing|trigger|undo|union|unique|unlock|unsigned|update|' - r'usage|use|using|utc_date|utc_time|utc_timestamp|values|' - r'varying|when|where|while|with|write|x509|xor|year_month|' - r'zerofill)\b', Keyword), - # TODO: this list is not complete - (r'\b(auto_increment|engine|charset|tables)\b', Keyword.Pseudo), - (r'(true|false|null)', Name.Constant), - (r'([a-z_]\w*)(\s*)(\()', + + # Date literals + (r"\{\s*d\s*(?P<quote>['\"])\s*\d{2}(\d{2})?.?\d{2}.?\d{2}\s*(?P=quote)\s*\}", Literal.Date), + + # Time literals + (r"\{\s*t\s*(?P<quote>['\"])\s*(?:\d+\s+)?\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?\s*(?P=quote)\s*\}", Literal.Date), + + # Timestamp literals + ( + r"\{\s*ts\s*(?P<quote>['\"])\s*" + r"\d{2}(?:\d{2})?.?\d{2}.?\d{2}" # Date part + r"\s+" # Whitespace between date and time + r"\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?" # Time part + r"\s*(?P=quote)\s*\}", + Literal.Date + ), + + # String literals + (r"'", String.Single, 'single-quoted-string'), + (r'"', String.Double, 'double-quoted-string'), + + # Variables + (r'@@(?:global\.|persist\.|persist_only\.|session\.)?[a-z_]+', Name.Variable), + (r'@[a-z0-9_$.]+', Name.Variable), + (r"@'", Name.Variable, 'single-quoted-variable'), + (r'@"', Name.Variable, 'double-quoted-variable'), + (r"@`", Name.Variable, 'backtick-quoted-variable'), + (r'\?', Name.Variable), # For demonstrating prepared statements + + # Operators + (r'[!%&*+/:<=>^|~-]+', Operator), + + # Exceptions; these words tokenize differently in different contexts. + (r'\b(set)(?!\s*\()', Keyword), + (r'\b(character)(\s+)(set)\b', bygroups(Keyword, Text, Keyword)), + # In all other known cases, "SET" is tokenized by MYSQL_DATATYPES. + + (words(MYSQL_CONSTANTS, prefix=r'\b', suffix=r'\b'), Name.Constant), + (words(MYSQL_DATATYPES, prefix=r'\b', suffix=r'\b'), Keyword.Type), + (words(MYSQL_KEYWORDS, prefix=r'\b', suffix=r'\b'), Keyword), + (words(MYSQL_FUNCTIONS, prefix=r'\b', suffix=r'\b(\s*)(\()'), bygroups(Name.Function, Text, Punctuation)), - (r'[a-z_]\w*', Name), - (r'@[a-z0-9]*[._]*[a-z0-9]*', Name.Variable), - (r'[;:()\[\],.]', Punctuation) + + # Schema object names + # + # Note: Although the first regex supports unquoted all-numeric + # identifiers, this will not be a problem in practice because + # numeric literals have already been handled above. + # + ('[0-9a-z$_\u0080-\uffff]+', Name), + (r'`', Name, 'schema-object-name'), + + # Punctuation + (r'[(),.;]', Punctuation), ], - 'multiline-comments': [ - (r'/\*', Comment.Multiline, 'multiline-comments'), + + # Multiline comment substates + # --------------------------- + + 'optimizer-hints': [ + (r'[^*a-z]+', Comment.Special), + (r'\*/', Comment.Special, '#pop'), + (words(MYSQL_OPTIMIZER_HINTS, suffix=r'\b'), Comment.Preproc), + ('[a-z]+', Comment.Special), + (r'\*', Comment.Special), + ], + + 'multiline-comment': [ + (r'[^*]+', Comment.Multiline), (r'\*/', Comment.Multiline, '#pop'), - (r'[^/*]+', Comment.Multiline), - (r'[/*]', Comment.Multiline) - ] + (r'\*', Comment.Multiline), + ], + + # String substates + # ---------------- + + 'single-quoted-string': [ + (r"[^'\\]+", String.Single), + (r"''", String.Escape), + (r"""\\[0'"bnrtZ\\%_]""", String.Escape), + (r"'", String.Single, '#pop'), + ], + + 'double-quoted-string': [ + (r'[^"\\]+', String.Double), + (r'""', String.Escape), + (r"""\\[0'"bnrtZ\\%_]""", String.Escape), + (r'"', String.Double, '#pop'), + ], + + # Variable substates + # ------------------ + + 'single-quoted-variable': [ + (r"[^']+", Name.Variable), + (r"''", Name.Variable), + (r"'", Name.Variable, '#pop'), + ], + + 'double-quoted-variable': [ + (r'[^"]+', Name.Variable), + (r'""', Name.Variable), + (r'"', Name.Variable, '#pop'), + ], + + 'backtick-quoted-variable': [ + (r'[^`]+', Name.Variable), + (r'``', Name.Variable), + (r'`', Name.Variable, '#pop'), + ], + + # Schema object name substates + # ---------------------------- + # + # Backtick-quoted schema object names support escape characters. + # It may be desirable to tokenize escape sequences differently, + # but currently Pygments does not have an obvious token type for + # this unique situation (for example, "Name.Escape"). + # + 'schema-object-name': [ + (r'[^`\\]+', Name), + (r'(?:\\\\|\\`|``)', Name), # This could be an escaped name token type. + (r'`', Name, '#pop'), + ], } def analyse_text(text): diff --git a/tests/examplefiles/mysql.txt b/tests/examplefiles/mysql.txt new file mode 100644 index 00000000..4927abd8 --- /dev/null +++ b/tests/examplefiles/mysql.txt @@ -0,0 +1,132 @@ +-- Samples of MySQL parsing
+
+
+-- Comments
+# standalone comment line
+-- standalone comment line
+SELECT 1; -- trailing comment
+SELECT 1; # trailing comment
+SELECT 1; /* trailing comment */
+SELECT /* interruption */ /**/ 1;
+ /*
+ Multiline / * / comment
+ */
+ /* /* MySQL does not support nested comments */
+SELECT 'If this line is a comment then nested commenting is enabled (and therefore broken).';
+
+
+-- Optimizer hints
+SELECT /*+ SEMIJOIN(FIRSTMATCH, LOOSESCAN) */ 1;
+SELECT /*+ SET_VAR(foreign_key_checks=OFF) */ 1;
+
+
+-- Literals
+SELECT
+ -- Integers
+ 123,
+
+ -- Floats
+ .123, 1.23, 123.,
+
+ -- Exponents
+ 1e10, 1e-10, 1.e20, .1e-20,
+
+ -- Hexadecimal
+ X'0af019', x'0AF019', 0xaf019,
+
+ -- Binary
+ B'010', b'010', 0b010,
+
+ -- Temporal literals
+ {d'2020-01-01'}, { d ' 2020^01@01 ' },
+ {t'8 9:10:11'}, { t ' 09:10:11.12 ' }, { t ' 091011 ' },
+ {ts"2020-01-01 09:10:11"}, { ts ' 2020@01/01 09:10:11 ' },
+
+ -- Strings
+ '', 'abc', '1''2\03\%4\_5\\6\'7\"8',
+ "", "abc", "1""2\03\%4\_5\\6\'7\"8",
+;
+
+
+-- Variables
+SET @a = 1, @1 = 2, @._.$ = 3;
+SET @'?' = 1, @'abc''def"`ghi' = 2;
+SET @"#" = 1, @"abc""def'`ghi" = 2;
+SET @`^` = 1, @`abc``def'"ghi` = 2;
+SELECT
+ @@timestamp,
+ @@global.auto_increment_offset,
+ @@session.auto_increment_offset,
+ @@auto_increment_offset
+;
+
+
+-- Prepared statements
+SELECT POW(?, 3) AS cubed;
+
+
+-- Constants
+SELECT TRUE, FALSE, NULL, UNKNOWN;
+
+
+-- Data types
+CREATE TABLE table1 (
+ id INT AUTO_INCREMENT PRIMARY KEY,
+ name VARCHAR(20) NOT NULL,
+ birthyear YEAR
+);
+
+
+-- Keywords
+INSERT INTO table1 (person, birthyear) VALUES ('abc', 2020);
+
+WITH RECURSIVE example (n) AS (
+ SELECT 1
+ UNION ALL
+ SELECT n + 1 FROM example
+ WHERE n < 10
+)
+SELECT n FROM example;
+
+SELECT 17 MEMBER OF ('[23, "abc", 17, "ab", 10]');
+
+
+-- Functions
+SELECT CONCAT('function');
+SELECT MAX(quantity) FROM example;
+
+
+-- Schema object names
+CREATE TABLE basic (
+ example INT,
+ 股票编号 INT,
+ `select` INT,
+ `concat(` INT
+);
+
+SELECT e1.`apple` AS a, `example2`.b
+FROM example1 AS e1
+JOIN example2 e2
+ON `example1`.`id` = e2.id;
+
+
+-- Operators
+SELECT 1 + 2 - 3 << 2;
+SELECT 1::DECIMAL(5, 2);
+SET @a = 1;
+SET a := 1;
+SELECT c->>'$.name' FROM example;
+
+
+
+-- Exceptions
+CREATE TABLE t1
+(
+ c1 VARCHAR(5) CHARACTER SET latin1,
+ c2 SET('r', 'g', 'b')
+);
+
+
+-- Introducers
+SELECT _latin1'abc';
+SELECT _binary'abc';
diff --git a/tests/test_mysql.py b/tests/test_mysql.py new file mode 100644 index 00000000..9b5e2b8c --- /dev/null +++ b/tests/test_mysql.py @@ -0,0 +1,249 @@ +# -*- coding: utf-8 -*- +""" + Pygments MySQL lexer tests + ~~~~~~~~~~~~~~~~~~~~~~~~~~ + + :copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import pytest + +from pygments.lexers.sql import MySqlLexer + +from pygments.token import \ + Comment, \ + Keyword, \ + Literal, \ + Name, \ + Number, \ + Operator, \ + Punctuation, \ + String, \ + Text + + +@pytest.fixture(scope='module') +def lexer(): + yield MySqlLexer() + + +@pytest.mark.parametrize('text', ('123',)) +def test_integer_literals(lexer, text): + assert list(lexer.get_tokens(text))[0] == (Number.Integer, text) + + +@pytest.mark.parametrize( + 'text', + ( + '.123', '1.23', '123.', + '1e10', '1.0e10', '1.e-10', '.1e+10', + ), +) +def test_float_literals(lexer, text): + assert list(lexer.get_tokens(text))[0] == (Number.Float, text) + + +@pytest.mark.parametrize('text', ("X'0af019'", "x'0AF019'", "0xaf019")) +def test_hexadecimal_literals(lexer, text): + assert list(lexer.get_tokens(text))[0] == (Number.Hex, text) + + +@pytest.mark.parametrize('text', ("B'010'", "b'010'", "0b010")) +def test_binary_literals(lexer, text): + assert list(lexer.get_tokens(text))[0] == (Number.Bin, text) + + +@pytest.mark.parametrize( + 'text', + ( + "{d'2020-01-01'}", "{ d ' 2020^01@01 ' }", + "{t'8 9:10:11'}", "{ t ' 09:10:11.12 ' }", "{ t ' 091011 ' }", + '{ts"2020-01-01 09:10:11"}', "{ ts ' 2020@01/01 09:10:11 ' }", + ), +) +def test_temporal_literals(lexer, text): + assert list(lexer.get_tokens(text))[0] == (Literal.Date, text) + + +@pytest.mark.parametrize( + 'text, expected_types', + ( + (r"'a'", (String.Single,) * 3), + (r"""'""'""", (String.Single,) * 3), + (r"''''", (String.Single, String.Escape, String.Single)), + (r"'\''", (String.Single, String.Escape, String.Single)), + (r'"a"', (String.Double,) * 3), + (r'''"''"''', (String.Double,) * 3), + (r'""""', (String.Double, String.Escape, String.Double)), + (r'"\""', (String.Double, String.Escape, String.Double)), + ), +) +def test_string_literals(lexer, text, expected_types): + tokens = list(lexer.get_tokens(text))[:len(expected_types)] + assert all(t[0] == e for t, e in zip(tokens, expected_types)) + + +@pytest.mark.parametrize( + 'text', + ( + "@a", "@1", "@._.$", + "@'?'", """@'abc''def"`ghi'""", + '@"#"', '''@"abc""def'`ghi"''', + '@`^`', """@`abc``def'"ghi`""", + "@@timestamp", + "@@session.auto_increment_offset", + "@@global.auto_increment_offset", + "@@persist.auto_increment_offset", + "@@persist_only.auto_increment_offset", + '?', + ), +) +def test_variables(lexer, text): + tokens = list(lexer.get_tokens(text)) + assert all(t[0] == Name.Variable for t in tokens[:-1]) + assert ''.join([t[1] for t in tokens]).strip() == text.strip() + + +@pytest.mark.parametrize('text', ('true', 'false', 'null', 'unknown')) +def test_constants(lexer, text): + assert list(lexer.get_tokens(text))[0] == (Name.Constant, text) + + +@pytest.mark.parametrize('text', ('-- abc', '--\tabc', '#abc')) +def test_comments_single_line(lexer, text): + # Test the standalone comment. + tokens = list(lexer.get_tokens(text)) + assert tokens[0] == (Comment.Single, text) + + # Test the comment with mixed tokens. + tokens = list(lexer.get_tokens('select' + text + '\nselect')) + assert tokens[0] == (Keyword, 'select') + assert tokens[1] == (Comment.Single, text) + assert tokens[-2] == (Keyword, 'select') + + +@pytest.mark.parametrize( + 'text', + ( + '/**/a', '/*a*b/c*/a', '/*\nabc\n*/a', + '/* /* */a' + ) +) +def test_comments_multi_line(lexer, text): + tokens = list(lexer.get_tokens(text)) + assert all(token[0] == Comment.Multiline for token in tokens[:-2]) + assert ''.join(token[1] for token in tokens).strip() == text.strip() + + # Validate nested comments are not supported. + assert tokens[-2][0] != Comment.Multiline + + +@pytest.mark.parametrize( + 'text', ('BKA', 'SEMIJOIN')) +def test_optimizer_hints(lexer, text): + good = '/*+ ' + text + '(), */' + ignore = '/* ' + text + ' */' + bad1 = '/*+ a' + text + '() */' + bad2 = '/*+ ' + text + 'a */' + assert (Comment.Preproc, text) in lexer.get_tokens(good) + assert (Comment.Preproc, text) not in lexer.get_tokens(ignore) + assert (Comment.Preproc, text) not in lexer.get_tokens(bad1) + assert (Comment.Preproc, text) not in lexer.get_tokens(bad2) + + +@pytest.mark.parametrize( + 'text, expected_types', + ( + # SET exceptions + ('SET', (Keyword,)), + ('SET abc = 1;', (Keyword,)), + ('SET @abc = 1;', (Keyword,)), + ('CHARACTER SET latin1', (Keyword, Text, Keyword)), + ('SET("r", "g", "b")', (Keyword.Type, Punctuation)), + ('SET ("r", "g", "b")', (Keyword.Type, Text, Punctuation)), + ), +) +def test_exceptions(lexer, text, expected_types): + tokens = list(lexer.get_tokens(text))[:len(expected_types)] + assert all(t[0] == e for t, e in zip(tokens, expected_types)) + + +@pytest.mark.parametrize( + 'text', + ( + 'SHOW', 'CREATE', 'ALTER', 'DROP', + 'SELECT', 'INSERT', 'UPDATE', 'DELETE', + 'WHERE', 'GROUP', 'ORDER', 'BY', 'AS', + 'DISTINCT', 'JOIN', 'WITH', 'RECURSIVE', + 'PARTITION', 'NTILE', 'MASTER_PASSWORD', 'XA', + 'REQUIRE_TABLE_PRIMARY_KEY_CHECK', 'STREAM', + ), +) +def test_keywords(lexer, text): + assert list(lexer.get_tokens(text))[0] == (Keyword, text) + + +@pytest.mark.parametrize( + 'text', + ( + # Standard + 'INT(', 'VARCHAR(', 'ENUM(', 'DATETIME', 'GEOMETRY', 'POINT', 'JSON', + # Aliases and compatibility + 'FIXED', 'MEDIUMINT', 'INT3', 'REAL', 'SERIAL', + 'LONG', 'NATIONAL', 'PRECISION', 'VARYING', + ), +) +def test_data_types(lexer, text): + assert list(lexer.get_tokens(text))[0] == (Keyword.Type, text.strip('(')) + + +@pytest.mark.parametrize( + 'text', + ( + # Common + 'CAST', 'CONCAT_WS', 'DAYNAME', 'IFNULL', 'NOW', 'SUBSTR', + # Less common + 'CAN_ACCESS_COLUMN', 'JSON_CONTAINS_PATH', 'ST_GEOMFROMGEOJSON', + ), +) +def test_functions(lexer, text): + assert list(lexer.get_tokens(text + '('))[0] == (Name.Function, text) + assert list(lexer.get_tokens(text + ' ('))[0] == (Name.Function, text) + + +@pytest.mark.parametrize( + 'text', + ( + 'abc_$123', '上市年限', 'ひらがな', + '`a`', '`上市年限`', '`ひらがな`', '`select`', '`concat(`', + '````', r'`\``', r'`\\`', + '`-- `', '`/*`', '`#`', + ), +) +def test_schema_object_names(lexer, text): + tokens = list(lexer.get_tokens(text))[:-1] + assert all(token[0] == Name for token in tokens) + assert ''.join(token[1] for token in tokens) == text + + +@pytest.mark.parametrize( + 'text', + ('+', '*', '/', '%', '&&', ':=', '!', '<', '->>', '^', '|', '~'), +) +def test_operators(lexer, text): + assert list(lexer.get_tokens(text))[0] == (Operator, text) + + +@pytest.mark.parametrize( + 'text, expected_types', + ( + ('abc.efg', (Name, Punctuation, Name)), + ('abc,efg', (Name, Punctuation, Name)), + ('MAX(abc)', (Name.Function, Punctuation, Name, Punctuation)), + ('efg;', (Name, Punctuation)), + ), +) +def test_punctuation(lexer, text, expected_types): + tokens = list(lexer.get_tokens(text))[:len(expected_types)] + assert all(t[0] == e for t, e in zip(tokens, expected_types)) |