diff options
Diffstat (limited to 'sql/handler.h')
-rw-r--r-- | sql/handler.h | 1585 |
1 files changed, 1327 insertions, 258 deletions
diff --git a/sql/handler.h b/sql/handler.h index 32a7686f6c9..ae63e87426a 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -31,15 +31,20 @@ #include "thr_lock.h" /* thr_lock_type, THR_LOCK_DATA */ #include "sql_cache.h" #include "structs.h" /* SHOW_COMP_OPTION */ +#include "sql_array.h" /* Dynamic_array<> */ +#include "mdl.h" #include <my_compare.h> #include <ft_global.h> #include <keycache.h> +#include <mysql/psi/mysql_table.h> #if MAX_KEY > 128 #error MAX_KEY is too large. Values up to 128 are supported. #endif +class Alter_info; + // the following is for checking tables #define HA_ADMIN_ALREADY_DONE 1 @@ -57,11 +62,27 @@ #define HA_ADMIN_NEEDS_ALTER -11 #define HA_ADMIN_NEEDS_CHECK -12 +/** + Return values for check_if_supported_inplace_alter(). + + @see check_if_supported_inplace_alter() for description of + the individual values. +*/ +enum enum_alter_inplace_result { + HA_ALTER_ERROR, + HA_ALTER_INPLACE_NOT_SUPPORTED, + HA_ALTER_INPLACE_EXCLUSIVE_LOCK, + HA_ALTER_INPLACE_SHARED_LOCK_AFTER_PREPARE, + HA_ALTER_INPLACE_SHARED_LOCK, + HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE, + HA_ALTER_INPLACE_NO_LOCK +}; + /* Bits in table_flags() to show what database can do */ -#define HA_NO_TRANSACTIONS (1 << 0) /* Doesn't support transactions */ -#define HA_PARTIAL_COLUMN_READ (1 << 1) /* read may not return all columns */ -#define HA_TABLE_SCAN_ON_INDEX (1 << 2) /* No separate data/index file */ +#define HA_NO_TRANSACTIONS (1ULL << 0) /* Doesn't support transactions */ +#define HA_PARTIAL_COLUMN_READ (1ULL << 1) /* read may not return all columns */ +#define HA_TABLE_SCAN_ON_INDEX (1ULL << 2) /* No separate data/index file */ /* The following should be set if the following is not true when scanning a table with rnd_next() @@ -70,37 +91,37 @@ If this flag is not set, filesort will do a position() call for each matched row to be able to find the row later. */ -#define HA_REC_NOT_IN_SEQ (1 << 3) -#define HA_CAN_GEOMETRY (1 << 4) +#define HA_REC_NOT_IN_SEQ (1ULL << 3) +#define HA_CAN_GEOMETRY (1ULL << 4) /* Reading keys in random order is as fast as reading keys in sort order (Used in records.cc to decide if we should use a record cache and by filesort to decide if we should sort key + data or key + pointer-to-row */ -#define HA_FAST_KEY_READ (1 << 5) +#define HA_FAST_KEY_READ (1ULL << 5) /* Set the following flag if we on delete should force all key to be read and on update read all keys that changes */ -#define HA_REQUIRES_KEY_COLUMNS_FOR_DELETE (1 << 6) -#define HA_NULL_IN_KEY (1 << 7) /* One can have keys with NULL */ -#define HA_DUPLICATE_POS (1 << 8) /* ha_position() gives dup row */ -#define HA_NO_BLOBS (1 << 9) /* Doesn't support blobs */ -#define HA_CAN_INDEX_BLOBS (1 << 10) -#define HA_AUTO_PART_KEY (1 << 11) /* auto-increment in multi-part key */ -#define HA_REQUIRE_PRIMARY_KEY (1 << 12) /* .. and can't create a hidden one */ -#define HA_STATS_RECORDS_IS_EXACT (1 << 13) /* stats.records is exact */ +#define HA_REQUIRES_KEY_COLUMNS_FOR_DELETE (1ULL << 6) +#define HA_NULL_IN_KEY (1ULL << 7) /* One can have keys with NULL */ +#define HA_DUPLICATE_POS (1ULL << 8) /* ha_position() gives dup row */ +#define HA_NO_BLOBS (1ULL << 9) /* Doesn't support blobs */ +#define HA_CAN_INDEX_BLOBS (1ULL << 10) +#define HA_AUTO_PART_KEY (1ULL << 11) /* auto-increment in multi-part key */ +#define HA_REQUIRE_PRIMARY_KEY (1ULL << 12) /* .. and can't create a hidden one */ +#define HA_STATS_RECORDS_IS_EXACT (1ULL << 13) /* stats.records is exact */ /* INSERT_DELAYED only works with handlers that uses MySQL internal table level locks */ -#define HA_CAN_INSERT_DELAYED (1 << 14) +#define HA_CAN_INSERT_DELAYED (1ULL << 14) /* If we get the primary key columns for free when we do an index read - It also implies that we have to retrive the primary key when using - position() and rnd_pos(). + (usually, it also implies that HA_PRIMARY_KEY_REQUIRED_FOR_POSITION + flag is set). */ -#define HA_PRIMARY_KEY_IN_READ_INDEX (1 << 15) +#define HA_PRIMARY_KEY_IN_READ_INDEX (1ULL << 15) /* If HA_PRIMARY_KEY_REQUIRED_FOR_POSITION is set, it means that to position() uses a primary key given by the record argument. @@ -108,36 +129,36 @@ If not set, the position is returned as the current rows position regardless of what argument is given. */ -#define HA_PRIMARY_KEY_REQUIRED_FOR_POSITION (1 << 16) -#define HA_CAN_RTREEKEYS (1 << 17) -#define HA_NOT_DELETE_WITH_CACHE (1 << 18) +#define HA_PRIMARY_KEY_REQUIRED_FOR_POSITION (1ULL << 16) +#define HA_CAN_RTREEKEYS (1ULL << 17) +#define HA_NOT_DELETE_WITH_CACHE (1ULL << 18) /* The following is we need to a primary key to delete (and update) a row. If there is no primary key, all columns needs to be read on update and delete */ -#define HA_PRIMARY_KEY_REQUIRED_FOR_DELETE (1 << 19) -#define HA_NO_PREFIX_CHAR_KEYS (1 << 20) -#define HA_CAN_FULLTEXT (1 << 21) -#define HA_CAN_SQL_HANDLER (1 << 22) -#define HA_NO_AUTO_INCREMENT (1 << 23) +#define HA_PRIMARY_KEY_REQUIRED_FOR_DELETE (1ULL << 19) +#define HA_NO_PREFIX_CHAR_KEYS (1ULL << 20) +#define HA_CAN_FULLTEXT (1ULL << 21) +#define HA_CAN_SQL_HANDLER (1ULL << 22) +#define HA_NO_AUTO_INCREMENT (1ULL << 23) /* Has automatic checksums and uses the old checksum format */ -#define HA_HAS_OLD_CHECKSUM (1 << 24) +#define HA_HAS_OLD_CHECKSUM (1ULL << 24) /* Table data are stored in separate files (for lower_case_table_names) */ -#define HA_FILE_BASED (1 << 26) -#define HA_NO_VARCHAR (1 << 27) -#define HA_CAN_BIT_FIELD (1 << 28) /* supports bit fields */ -#define HA_NEED_READ_RANGE_BUFFER (1 << 29) /* for read_multi_range */ -#define HA_ANY_INDEX_MAY_BE_UNIQUE (1 << 30) -#define HA_NO_COPY_ON_ALTER (LL(1) << 31) -#define HA_HAS_RECORDS (LL(1) << 32) /* records() gives exact count*/ +#define HA_FILE_BASED (1ULL << 26) +#define HA_NO_VARCHAR (1ULL << 27) +#define HA_CAN_BIT_FIELD (1ULL << 28) /* supports bit fields */ +#define HA_NEED_READ_RANGE_BUFFER (1ULL << 29) /* for read_multi_range */ +#define HA_ANY_INDEX_MAY_BE_UNIQUE (1ULL << 30) +#define HA_NO_COPY_ON_ALTER (1ULL << 31) +#define HA_HAS_RECORDS (1ULL << 32) /* records() gives exact count*/ /* Has it's own method of binlog logging */ -#define HA_HAS_OWN_BINLOGGING (LL(1) << 33) +#define HA_HAS_OWN_BINLOGGING (1ULL << 33) /* Engine is capable of row-format and statement-format logging, respectively */ -#define HA_BINLOG_ROW_CAPABLE (LL(1) << 34) -#define HA_BINLOG_STMT_CAPABLE (LL(1) << 35) +#define HA_BINLOG_ROW_CAPABLE (1ULL << 34) +#define HA_BINLOG_STMT_CAPABLE (1ULL << 35) /* When a multiple key conflict happens in a REPLACE command mysql expects the conflicts to be reported in the ascending order of @@ -160,20 +181,20 @@ This flag helps the underlying SE to inform the server that the keys are not ordered. */ -#define HA_DUPLICATE_KEY_NOT_IN_ORDER (LL(1) << 36) +#define HA_DUPLICATE_KEY_NOT_IN_ORDER (1ULL << 36) /* Engine supports REPAIR TABLE. Used by CHECK TABLE FOR UPGRADE if an incompatible table is detected. If this flag is set, CHECK TABLE FOR UPGRADE will report ER_TABLE_NEEDS_UPGRADE, otherwise ER_TABLE_NEED_REBUILD. */ -#define HA_CAN_REPAIR (LL(1) << 37) +#define HA_CAN_REPAIR (1ULL << 37) /* Has automatic checksums and uses the new checksum format */ -#define HA_HAS_NEW_CHECKSUM (LL(1) << 38) -#define HA_CAN_VIRTUAL_COLUMNS (LL(1) << 39) -#define HA_MRR_CANT_SORT (LL(1) << 40) -#define HA_RECORD_MUST_BE_CLEAN_ON_WRITE (LL(1) << 41) +#define HA_HAS_NEW_CHECKSUM (1ULL << 38) +#define HA_CAN_VIRTUAL_COLUMNS (1ULL << 39) +#define HA_MRR_CANT_SORT (1ULL << 40) +#define HA_RECORD_MUST_BE_CLEAN_ON_WRITE (1ULL << 41) /* Table condition pushdown must be performed regardless of @@ -186,7 +207,55 @@ then the "query=..." condition must be always pushed down into storage engine. */ -#define HA_MUST_USE_TABLE_CONDITION_PUSHDOWN (LL(1) << 42) +#define HA_MUST_USE_TABLE_CONDITION_PUSHDOWN (1ULL << 42) + +/** + The handler supports read before write removal optimization + + Read before write removal may be used for storage engines which support + write without previous read of the row to be updated. Handler returning + this flag must implement start_read_removal() and end_read_removal(). + The handler may return "fake" rows constructed from the key of the row + asked for. This is used to optimize UPDATE and DELETE by reducing the + numer of roundtrips between handler and storage engine. + + Example: + UPDATE a=1 WHERE pk IN (<keys>) + + mysql_update() + { + if (<conditions for starting read removal>) + start_read_removal() + -> handler returns true if read removal supported for this table/query + + while(read_record("pk=<key>")) + -> handler returns fake row with column "pk" set to <key> + + ha_update_row() + -> handler sends write "a=1" for row with "pk=<key>" + + end_read_removal() + -> handler returns the number of rows actually written + } + + @note This optimization in combination with batching may be used to + remove even more roundtrips. +*/ +#define HA_READ_BEFORE_WRITE_REMOVAL (1LL << 43) + +/* + Engine supports extended fulltext API + */ +#define HA_CAN_FULLTEXT_EXT (1LL << 44) + +/* + Storage engine supports table export using the + FLUSH TABLE <table_list> FOR EXPORT statement + (meaning, after this statement one can copy table files out of the + datadir and later "import" (somehow) in another MariaDB instance) + */ +#define HA_CAN_EXPORT (1LL << 45) + /* Set of all binlog flags. Currently only contain the capabilities @@ -315,10 +384,17 @@ #define HA_KEY_NULL_LENGTH 1 #define HA_KEY_BLOB_LENGTH 2 +/* Maximum length of any index lookup key, in bytes */ + +#define MAX_KEY_LENGTH (MAX_DATA_LENGTH_FOR_KEY \ + +(MAX_REF_PARTS \ + *(HA_KEY_NULL_LENGTH + HA_KEY_BLOB_LENGTH))) + #define HA_LEX_CREATE_TMP_TABLE 1 #define HA_LEX_CREATE_IF_NOT_EXISTS 2 #define HA_LEX_CREATE_TABLE_LIKE 4 -#define HA_CREATE_TMP_ALTER 8 +#define HA_CREATE_TMP_ALTER 8 +#define HA_LEX_CREATE_REPLACE 16 #define HA_MAX_REC_LENGTH 65535 /* Table caching type */ @@ -327,8 +403,24 @@ #define HA_CACHE_TBL_ASKTRANSACT 2 #define HA_CACHE_TBL_TRANSACT 4 -/* Options of START TRANSACTION statement (and later of SET TRANSACTION stmt) */ -#define MYSQL_START_TRANS_OPT_WITH_CONS_SNAPSHOT 1 +/** + Options for the START TRANSACTION statement. + + Note that READ ONLY and READ WRITE are logically mutually exclusive. + This is enforced by the parser and depended upon by trans_begin(). + + We need two flags instead of one in order to differentiate between + situation when no READ WRITE/ONLY clause were given and thus transaction + is implicitly READ WRITE and the case when READ WRITE clause was used + explicitly. +*/ + +// WITH CONSISTENT SNAPSHOT option +static const uint MYSQL_START_TRANS_OPT_WITH_CONS_SNAPSHOT = 1; +// READ ONLY option +static const uint MYSQL_START_TRANS_OPT_READ_ONLY = 2; +// READ WRITE option +static const uint MYSQL_START_TRANS_OPT_READ_WRITE = 4; /* Flags for method is_fatal_error */ #define HA_CHECK_DUP_KEY 1 @@ -339,25 +431,22 @@ enum legacy_db_type { - DB_TYPE_UNKNOWN=0,DB_TYPE_DIAB_ISAM=1, - DB_TYPE_HASH,DB_TYPE_MISAM,DB_TYPE_PISAM, - DB_TYPE_RMS_ISAM, DB_TYPE_HEAP, DB_TYPE_ISAM, - DB_TYPE_MRG_ISAM, DB_TYPE_MYISAM, DB_TYPE_MRG_MYISAM, - DB_TYPE_BERKELEY_DB, DB_TYPE_INNODB, - DB_TYPE_GEMINI, DB_TYPE_NDBCLUSTER, - DB_TYPE_EXAMPLE_DB, DB_TYPE_ARCHIVE_DB, DB_TYPE_CSV_DB, - DB_TYPE_FEDERATED_DB, - DB_TYPE_BLACKHOLE_DB, - DB_TYPE_PARTITION_DB, - DB_TYPE_BINLOG, - DB_TYPE_SOLID, - DB_TYPE_PBXT, - DB_TYPE_TABLE_FUNCTION, - DB_TYPE_MEMCACHE, - DB_TYPE_FALCON, - DB_TYPE_MARIA, - /** Performance schema engine. */ - DB_TYPE_PERFORMANCE_SCHEMA, + /* note these numerical values are fixed and can *not* be changed */ + DB_TYPE_UNKNOWN=0, + DB_TYPE_HEAP=6, + DB_TYPE_MYISAM=9, + DB_TYPE_MRG_MYISAM=10, + DB_TYPE_INNODB=12, + DB_TYPE_NDBCLUSTER=14, + DB_TYPE_EXAMPLE_DB=15, + DB_TYPE_ARCHIVE_DB=16, + DB_TYPE_CSV_DB=17, + DB_TYPE_FEDERATED_DB=18, + DB_TYPE_BLACKHOLE_DB=19, + DB_TYPE_PARTITION_DB=20, + DB_TYPE_BINLOG=21, + DB_TYPE_PBXT=23, + DB_TYPE_PERFORMANCE_SCHEMA=28, DB_TYPE_ARIA=42, DB_TYPE_TOKUDB=43, DB_TYPE_FIRST_DYNAMIC=44, @@ -376,6 +465,13 @@ enum row_type { ROW_TYPE_NOT_USED=-1, ROW_TYPE_DEFAULT, ROW_TYPE_FIXED, /* not part of the enum, so that it shouldn't be in switch(row_type) */ #define ROW_TYPE_MAX ((uint)ROW_TYPE_PAGE + 1) +/* Specifies data storage format for individual columns */ +enum column_format_type { + COLUMN_FORMAT_TYPE_DEFAULT= 0, /* Not specified (use engine default) */ + COLUMN_FORMAT_TYPE_FIXED= 1, /* FIXED format */ + COLUMN_FORMAT_TYPE_DYNAMIC= 2 /* DYNAMIC format */ +}; + enum enum_binlog_func { BFN_RESET_LOGS= 1, BFN_RESET_SLAVE= 2, @@ -420,6 +516,45 @@ enum enum_binlog_command { /* The following two are used by Maria engine: */ #define HA_CREATE_USED_TRANSACTIONAL (1L << 20) #define HA_CREATE_USED_PAGE_CHECKSUM (1L << 21) +/** This is set whenever STATS_PERSISTENT=0|1|default has been +specified in CREATE/ALTER TABLE. See also HA_OPTION_STATS_PERSISTENT in +include/my_base.h. It is possible to distinguish whether +STATS_PERSISTENT=default has been specified or no STATS_PERSISTENT= is +given at all. */ +#define HA_CREATE_USED_STATS_PERSISTENT (1L << 22) +/** + This is set whenever STATS_AUTO_RECALC=0|1|default has been + specified in CREATE/ALTER TABLE. See enum_stats_auto_recalc. + It is possible to distinguish whether STATS_AUTO_RECALC=default + has been specified or no STATS_AUTO_RECALC= is given at all. +*/ +#define HA_CREATE_USED_STATS_AUTO_RECALC (1L << 23) +/** + This is set whenever STATS_SAMPLE_PAGES=N|default has been + specified in CREATE/ALTER TABLE. It is possible to distinguish whether + STATS_SAMPLE_PAGES=default has been specified or no STATS_SAMPLE_PAGES= is + given at all. +*/ +#define HA_CREATE_USED_STATS_SAMPLE_PAGES (1L << 24) + + +/* + This is master database for most of system tables. However there + can be other databases which can hold system tables. Respective + storage engines define their own system database names. +*/ +extern const char *mysqld_system_database; + +/* + Structure to hold list of system_database.system_table. + This is used at both mysqld and storage engine layer. +*/ +struct st_system_tablename +{ + const char *db; + const char *tablename; +}; + typedef ulonglong my_xid; // this line is the same as in log_event.h #define MYSQL_XID_PREFIX "MySQLXid" @@ -598,14 +733,18 @@ struct TABLE; */ enum enum_schema_tables { - SCH_CHARSETS= 0, + SCH_ALL_PLUGINS, + SCH_APPLICABLE_ROLES, + SCH_CHARSETS, SCH_CLIENT_STATS, SCH_COLLATIONS, SCH_COLLATION_CHARACTER_SET_APPLICABILITY, SCH_COLUMNS, SCH_COLUMN_PRIVILEGES, + SCH_ENABLED_ROLES, SCH_ENGINES, SCH_EVENTS, + SCH_EXPLAIN, SCH_FILES, SCH_GLOBAL_STATUS, SCH_GLOBAL_VARIABLES, @@ -640,6 +779,7 @@ enum enum_schema_tables }; struct TABLE_SHARE; +struct HA_CREATE_INFO; struct st_foreign_key_info; typedef struct st_foreign_key_info FOREIGN_KEY_INFO; typedef bool (stat_print_fn)(THD *thd, const char *type, uint type_len, @@ -719,22 +859,26 @@ struct ha_index_option_struct; enum ha_option_type { HA_OPTION_TYPE_ULL, /* unsigned long long */ HA_OPTION_TYPE_STRING, /* char * */ HA_OPTION_TYPE_ENUM, /* uint */ - HA_OPTION_TYPE_BOOL}; /* bool */ + HA_OPTION_TYPE_BOOL, /* bool */ + HA_OPTION_TYPE_SYSVAR};/* type of the sysval */ #define HA_xOPTION_NUMBER(name, struc, field, def, min, max, blk_siz) \ { HA_OPTION_TYPE_ULL, name, sizeof(name)-1, \ - offsetof(struc, field), def, min, max, blk_siz, 0 } + offsetof(struc, field), def, min, max, blk_siz, 0, 0 } #define HA_xOPTION_STRING(name, struc, field) \ { HA_OPTION_TYPE_STRING, name, sizeof(name)-1, \ - offsetof(struc, field), 0, 0, 0, 0, 0 } + offsetof(struc, field), 0, 0, 0, 0, 0, 0} #define HA_xOPTION_ENUM(name, struc, field, values, def) \ { HA_OPTION_TYPE_ENUM, name, sizeof(name)-1, \ offsetof(struc, field), def, 0, \ - sizeof(values)-1, 0, values } + sizeof(values)-1, 0, values, 0 } #define HA_xOPTION_BOOL(name, struc, field, def) \ { HA_OPTION_TYPE_BOOL, name, sizeof(name)-1, \ - offsetof(struc, field), def, 0, 1, 0, 0 } -#define HA_xOPTION_END { HA_OPTION_TYPE_ULL, 0, 0, 0, 0, 0, 0, 0, 0 } + offsetof(struc, field), def, 0, 1, 0, 0, 0 } +#define HA_xOPTION_SYSVAR(name, struc, field, sysvar) \ + { HA_OPTION_TYPE_SYSVAR, name, sizeof(name)-1, \ + offsetof(struc, field), 0, 0, 0, 0, 0, MYSQL_SYSVAR(sysvar) } +#define HA_xOPTION_END { HA_OPTION_TYPE_ULL, 0, 0, 0, 0, 0, 0, 0, 0, 0 } #define HA_TOPTION_NUMBER(name, field, def, min, max, blk_siz) \ HA_xOPTION_NUMBER(name, ha_table_option_struct, field, def, min, max, blk_siz) @@ -744,6 +888,8 @@ enum ha_option_type { HA_OPTION_TYPE_ULL, /* unsigned long long */ HA_xOPTION_ENUM(name, ha_table_option_struct, field, values, def) #define HA_TOPTION_BOOL(name, field, def) \ HA_xOPTION_BOOL(name, ha_table_option_struct, field, def) +#define HA_TOPTION_SYSVAR(name, field, sysvar) \ + HA_xOPTION_SYSVAR(name, ha_table_option_struct, field, sysvar) #define HA_TOPTION_END HA_xOPTION_END #define HA_FOPTION_NUMBER(name, field, def, min, max, blk_siz) \ @@ -754,6 +900,8 @@ enum ha_option_type { HA_OPTION_TYPE_ULL, /* unsigned long long */ HA_xOPTION_ENUM(name, ha_field_option_struct, field, values, def) #define HA_FOPTION_BOOL(name, field, def) \ HA_xOPTION_BOOL(name, ha_field_option_struct, field, def) +#define HA_FOPTION_SYSVAR(name, field, sysvar) \ + HA_xOPTION_SYSVAR(name, ha_field_option_struct, field, sysvar) #define HA_FOPTION_END HA_xOPTION_END #define HA_IOPTION_NUMBER(name, field, def, min, max, blk_siz) \ @@ -764,6 +912,8 @@ enum ha_option_type { HA_OPTION_TYPE_ULL, /* unsigned long long */ HA_xOPTION_ENUM(name, ha_index_option_struct, field, values, def) #define HA_IOPTION_BOOL(name, field, def) \ HA_xOPTION_BOOL(name, ha_index_option_struct, field, def) +#define HA_IOPTION_SYSVAR(name, field, sysvar) \ + HA_xOPTION_SYSVAR(name, ha_index_option_struct, field, sysvar) #define HA_IOPTION_END HA_xOPTION_END typedef struct st_ha_create_table_option { @@ -774,6 +924,7 @@ typedef struct st_ha_create_table_option { ulonglong def_value; ulonglong min_value, max_value, block_size; const char *values; + struct st_mysql_sys_var *var; } ha_create_table_option; enum handler_iterator_type @@ -884,6 +1035,13 @@ struct handlerton to the savepoint_set call */ int (*savepoint_rollback)(handlerton *hton, THD *thd, void *sv); + /** + Check if storage engine allows to release metadata locks which were + acquired after the savepoint if rollback to savepoint is done. + @return true - If it is safe to release MDL locks. + false - If it is not. + */ + bool (*savepoint_rollback_can_release_mdl)(handlerton *hton, THD *thd); int (*savepoint_release)(handlerton *hton, THD *thd, void *sv); /* 'all' is true if it's a real commit, that makes persistent changes @@ -986,6 +1144,46 @@ struct handlerton int (*recover)(handlerton *hton, XID *xid_list, uint len); int (*commit_by_xid)(handlerton *hton, XID *xid); int (*rollback_by_xid)(handlerton *hton, XID *xid); + /* + The commit_checkpoint_request() handlerton method is used to checkpoint + the XA recovery process for storage engines that support two-phase + commit. + + The method is optional - an engine that does not implemented is expected + to work the traditional way, where every commit() durably flushes the + transaction to disk in the engine before completion, so XA recovery will + no longer be needed for that transaction. + + An engine that does implement commit_checkpoint_request() is also + expected to implement commit_ordered(), so that ordering of commits is + consistent between 2pc participants. Such engine is no longer required to + durably flush to disk transactions in commit(), provided that the + transaction has been successfully prepare()d and commit_ordered(); thus + potentionally saving one fsync() call. (Engine must still durably flush + to disk in commit() when no prepare()/commit_ordered() steps took place, + at least if durable commits are wanted; this happens eg. if binlog is + disabled). + + The TC will periodically (eg. once per binlog rotation) call + commit_checkpoint_request(). When this happens, the engine must arrange + for all transaction that have completed commit_ordered() to be durably + flushed to disk (this does not include transactions that might be in the + middle of executing commit_ordered()). When such flush has completed, the + engine must call commit_checkpoint_notify_ha(), passing back the opaque + "cookie". + + The flush and call of commit_checkpoint_notify_ha() need not happen + immediately - it can be scheduled and performed asynchroneously (ie. as + part of next prepare(), or sync every second, or whatever), but should + not be postponed indefinitely. It is however also permissible to do it + immediately, before returning from commit_checkpoint_request(). + + When commit_checkpoint_notify_ha() is called, the TC will know that the + transactions are durably committed, and thus no longer require XA + recovery. It uses that to reduce the work needed for any subsequent XA + recovery process. + */ + void (*commit_checkpoint_request)(handlerton *hton, void *cookie); /* "Disable or enable checkpointing internal to the storage engine. This is used for FLUSH TABLES WITH READ LOCK AND DISABLE CHECKPOINT to ensure that @@ -1042,18 +1240,6 @@ struct handlerton enum handler_create_iterator_result (*create_iterator)(handlerton *hton, enum handler_iterator_type type, struct handler_iterator *fill_this_in); - int (*discover)(handlerton *hton, THD* thd, const char *db, - const char *name, - uchar **frmblob, - size_t *frmlen); - int (*find_files)(handlerton *hton, THD *thd, - const char *db, - const char *path, - const char *wild, bool dir, List<LEX_STRING> *files); - int (*table_exists_in_engine)(handlerton *hton, THD* thd, const char *db, - const char *name); - - uint32 license; /* Flag for Engine License */ /* Optional clauses in the CREATE/ALTER TABLE */ @@ -1061,14 +1247,129 @@ struct handlerton ha_create_table_option *field_options; // these are specified per field ha_create_table_option *index_options; // these are specified per index + /** + The list of extensions of files created for a single table in the + database directory (datadir/db_name/). + + Used by open_table_error(), by the default rename_table and delete_table + handler methods, and by the default discovery implementation. + + For engines that have more than one file name extentions (separate + metadata, index, and/or data files), the order of elements is relevant. + First element of engine file name extentions array should be metadata + file extention. This is implied by the open_table_error() + and the default discovery implementation. + + Second element - data file extention. This is implied + assumed by REPAIR TABLE ... USE_FRM implementation. + */ + const char **tablefile_extensions; // by default - empty list + + /********************************************************************* + Table discovery API. + It allows the server to "discover" tables that exist in the storage + engine, without user issuing an explicit CREATE TABLE statement. + **********************************************************************/ + + /* + This method is required for any engine that supports automatic table + discovery, there is no default implementation. + + Given a TABLE_SHARE discover_table() fills it in with a correct table + structure using one of the TABLE_SHARE::init_from_* methods. + + Returns HA_ERR_NO_SUCH_TABLE if the table did not exist in the engine, + zero if the table was discovered successfully, or any other + HA_ERR_* error code as appropriate if the table existed, but the + discovery failed. + */ + int (*discover_table)(handlerton *hton, THD* thd, TABLE_SHARE *share); + + /* + The discover_table_names method tells the server + about all tables in the specified database that the engine + knows about. Tables (or file names of tables) are added to + the provided discovered_list collector object using + add_table() or add_file() methods. + */ + class discovered_list + { + public: + virtual bool add_table(const char *tname, size_t tlen) = 0; + virtual bool add_file(const char *fname) = 0; + protected: virtual ~discovered_list() {} + }; + + /* + By default (if not implemented by the engine, but the discover_table() is + implemented) it will perform a file-based discovery: + + - if tablefile_extensions[0] is not null, this will discovers all tables + with the tablefile_extensions[0] extension. + + Returns 0 on success and 1 on error. + */ + int (*discover_table_names)(handlerton *hton, LEX_STRING *db, MY_DIR *dir, + discovered_list *result); + + /* + This is a method that allows to server to check if a table exists without + an overhead of the complete discovery. + + By default (if not implemented by the engine, but the discovery_table() is + implemented) it will try to perform a file-based discovery: + + - if tablefile_extensions[0] is not null this will look for a file name + with the tablefile_extensions[0] extension. + + - if tablefile_extensions[0] is null, this will resort to discover_table(). + + Note that resorting to discover_table() is slow and the engine + should probably implement its own discover_table_existence() method, + if its tablefile_extensions[0] is null. + + Returns 1 if the table exists and 0 if it does not. + */ + int (*discover_table_existence)(handlerton *hton, const char *db, + const char *table_name); + + /* + This is the assisted table discovery method. Unlike the fully + automatic discovery as above, here a user is expected to issue an + explicit CREATE TABLE with the appropriate table attributes to + "assist" the discovery of a table. But this "discovering" CREATE TABLE + statement will not specify the table structure - the engine discovers + it using this method. For example, FederatedX uses it in + + CREATE TABLE t1 ENGINE=FEDERATED CONNECTION="mysql://foo/bar/t1"; + + Given a TABLE_SHARE discover_table_structure() fills it in with a correct + table structure using one of the TABLE_SHARE::init_from_* methods. + + Assisted discovery works independently from the automatic discover. + An engine is allowed to support only assisted discovery and not + support automatic one. Or vice versa. + */ + int (*discover_table_structure)(handlerton *hton, THD* thd, + TABLE_SHARE *share, HA_CREATE_INFO *info); }; -inline LEX_STRING *hton_name(const handlerton *hton) +static inline LEX_STRING *hton_name(const handlerton *hton) { return &(hton2plugin[hton->slot]->name); } +static inline handlerton *plugin_hton(plugin_ref plugin) +{ + return plugin_data(plugin, handlerton *); +} + +static inline sys_var *find_hton_sysvar(handlerton *hton, st_mysql_sys_var *var) +{ + return find_plugin_sysvar(hton2plugin[hton->slot], var); +} + /* Possible flags of a handlerton (there can be 32 of them) */ #define HTON_NO_FLAGS 0 @@ -1076,11 +1377,27 @@ inline LEX_STRING *hton_name(const handlerton *hton) #define HTON_ALTER_NOT_SUPPORTED (1 << 1) //Engine does not support alter #define HTON_CAN_RECREATE (1 << 2) //Delete all is used for truncate #define HTON_HIDDEN (1 << 3) //Engine does not appear in lists +#define HTON_FLUSH_AFTER_RENAME (1 << 4) #define HTON_NOT_USER_SELECTABLE (1 << 5) #define HTON_TEMPORARY_NOT_SUPPORTED (1 << 6) //Having temporary tables not supported #define HTON_SUPPORT_LOG_TABLES (1 << 7) //Engine supports log tables -#define HTON_NO_PARTITION (1 << 8) //You can not partition these tables -#define HTON_EXTENDED_KEYS (1 << 9) //supports extended keys +#define HTON_NO_PARTITION (1 << 8) //Not partition of these tables + +/* + This flag should be set when deciding that the engine does not allow + row based binary logging (RBL) optimizations. + + Currently, setting this flag, means that table's read/write_set will + be left untouched when logging changes to tables in this engine. In + practice this means that the server will not mess around with + table->write_set and/or table->read_set when using RBL and deciding + whether to log full or minimal rows. + + It's valuable for instance for virtual tables, eg: Performance + Schema which have no meaning for replication. +*/ +#define HTON_NO_BINLOG_ROW_OPT (1 << 9) +#define HTON_SUPPORTS_EXTENDED_KEYS (1 <<10) //supports extended keys // MySQL compatibility. Unused. #define HTON_SUPPORTS_FOREIGN_KEYS (1 << 0) //Foreign key constraint supported. @@ -1126,6 +1443,22 @@ struct THD_TRANS void reset() { no_2pc= FALSE; modified_non_trans_table= FALSE; } bool is_empty() const { return ha_list == NULL; } THD_TRANS() {} /* Remove gcc warning */ + + unsigned int m_unsafe_rollback_flags; + /* + Define the type of statemens which cannot be rolled back safely. + Each type occupies one bit in m_unsafe_rollback_flags. + */ + static unsigned int const MODIFIED_NON_TRANS_TABLE= 0x01; + static unsigned int const CREATED_TEMP_TABLE= 0x02; + static unsigned int const DROPPED_TEMP_TABLE= 0x04; + + void mark_created_temp_table() + { + DBUG_PRINT("debug", ("mark_created_temp_table")); + m_unsafe_rollback_flags|= CREATED_TEMP_TABLE; + } + }; @@ -1249,13 +1582,17 @@ struct st_table_log_memory_entry; class partition_info; struct st_partition_iter; -#define NOT_A_PARTITION_ID ((uint32)-1) enum ha_choice { HA_CHOICE_UNDEF, HA_CHOICE_NO, HA_CHOICE_YES, HA_CHOICE_MAX }; -typedef struct st_ha_create_information +enum enum_stats_auto_recalc { HA_STATS_AUTO_RECALC_DEFAULT= 0, + HA_STATS_AUTO_RECALC_ON, + HA_STATS_AUTO_RECALC_OFF }; + +struct HA_CREATE_INFO { CHARSET_INFO *table_charset, *default_table_charset; + LEX_CUSTRING tabledef_version; LEX_STRING connect_string; const char *password, *tablespace; LEX_STRING comment; @@ -1263,10 +1600,20 @@ typedef struct st_ha_create_information const char *alias; ulonglong max_rows,min_rows; ulonglong auto_increment_value; - ulong table_options; + ulong table_options; ///< HA_OPTION_ values ulong avg_row_length; ulong used_fields; ulong key_block_size; + /* + number of pages to sample during + stats estimation, if used, otherwise 0. + */ + uint stats_sample_pages; + uint null_bits; /* NULL bits at start of record */ + uint options; /* OR of HA_CREATE_ options */ + uint org_options; /* original options from query */ + uint merge_insert_method; + uint extra_size; /* length of extra data segment */ SQL_I_List<TABLE_LIST> merge_list; handlerton *db_type; /** @@ -1276,24 +1623,381 @@ typedef struct st_ha_create_information For ALTER TABLE defaults to ROW_TYPE_NOT_USED (means "keep the current"). Can be changed either explicitly by the parser. - If nothing speficied inherits the value of the original table (if present). + If nothing specified inherits the value of the original table (if present). */ enum row_type row_type; - uint null_bits; /* NULL bits at start of record */ - uint options; /* OR of HA_CREATE_ options */ - uint merge_insert_method; - uint extra_size; /* length of extra data segment */ enum ha_choice transactional; - bool frm_only; ///< 1 if no ha_create_table() - bool varchar; ///< 1 if table has a VARCHAR enum ha_storage_media storage_media; ///< DEFAULT, DISK or MEMORY enum ha_choice page_checksum; ///< If we have page_checksums engine_option_value *option_list; ///< list of table create options + enum_stats_auto_recalc stats_auto_recalc; + bool varchar; ///< 1 if table has a VARCHAR + /* the following three are only for ALTER TABLE, check_if_incompatible_data() */ ha_table_option_struct *option_struct; ///< structure with parsed table options ha_field_option_struct **fields_option_struct; ///< array of field option structures ha_index_option_struct **indexes_option_struct; ///< array of index option structures -} HA_CREATE_INFO; + + /* The following is used to remember the old state for CREATE OR REPLACE */ + TABLE *table; + TABLE_LIST *pos_in_locked_tables; + MDL_ticket *mdl_ticket; + bool table_was_deleted; + + bool tmp_table() { return options & HA_LEX_CREATE_TMP_TABLE; } + bool check_conflicting_charset_declarations(CHARSET_INFO *cs); + bool add_table_option_default_charset(CHARSET_INFO *cs) + { + // cs can be NULL, e.g.: CREATE TABLE t1 (..) CHARACTER SET DEFAULT; + if (check_conflicting_charset_declarations(cs)) + return true; + default_table_charset= cs; + used_fields|= HA_CREATE_USED_DEFAULT_CHARSET; + return false; + } + bool add_alter_list_item_convert_to_charset(CHARSET_INFO *cs) + { + /* + cs cannot be NULL, as sql_yacc.yy translates + CONVERT TO CHARACTER SET DEFAULT + to + CONVERT TO CHARACTER SET <character-set-of-the-current-database> + TODO: Should't we postpone resolution of DEFAULT until the + character set of the table owner database is loaded from its db.opt? + */ + DBUG_ASSERT(cs); + if (check_conflicting_charset_declarations(cs)) + return true; + table_charset= default_table_charset= cs; + used_fields|= (HA_CREATE_USED_CHARSET | HA_CREATE_USED_DEFAULT_CHARSET); + return false; + } + ulong table_options_with_row_type() + { + if (row_type == ROW_TYPE_DYNAMIC || row_type == ROW_TYPE_PAGE) + return table_options | HA_OPTION_PACK_RECORD; + else + return table_options; + } +}; + + +/** + In-place alter handler context. + + This is a superclass intended to be subclassed by individual handlers + in order to store handler unique context between in-place alter API calls. + + The handler is responsible for creating the object. This can be done + as early as during check_if_supported_inplace_alter(). + + The SQL layer is responsible for destroying the object. + The class extends Sql_alloc so the memory will be mem root allocated. + + @see Alter_inplace_info +*/ + +class inplace_alter_handler_ctx : public Sql_alloc +{ +public: + inplace_alter_handler_ctx() {} + + virtual ~inplace_alter_handler_ctx() {} +}; + + +/** + Class describing changes to be done by ALTER TABLE. + Instance of this class is passed to storage engine in order + to determine if this ALTER TABLE can be done using in-place + algorithm. It is also used for executing the ALTER TABLE + using in-place algorithm. +*/ + +class Alter_inplace_info +{ +public: + /** + Bits to show in detail what operations the storage engine is + to execute. + + All these operations are supported as in-place operations by the + SQL layer. This means that operations that by their nature must + be performed by copying the table to a temporary table, will not + have their own flags here. + + We generally try to specify handler flags only if there are real + changes. But in cases when it is cumbersome to determine if some + attribute has really changed we might choose to set flag + pessimistically, for example, relying on parser output only. + */ + typedef ulong HA_ALTER_FLAGS; + + // Add non-unique, non-primary index + static const HA_ALTER_FLAGS ADD_INDEX = 1L << 0; + + // Drop non-unique, non-primary index + static const HA_ALTER_FLAGS DROP_INDEX = 1L << 1; + + // Add unique, non-primary index + static const HA_ALTER_FLAGS ADD_UNIQUE_INDEX = 1L << 2; + + // Drop unique, non-primary index + static const HA_ALTER_FLAGS DROP_UNIQUE_INDEX = 1L << 3; + + // Add primary index + static const HA_ALTER_FLAGS ADD_PK_INDEX = 1L << 4; + + // Drop primary index + static const HA_ALTER_FLAGS DROP_PK_INDEX = 1L << 5; + + // Add column + static const HA_ALTER_FLAGS ADD_COLUMN = 1L << 6; + + // Drop column + static const HA_ALTER_FLAGS DROP_COLUMN = 1L << 7; + + // Rename column + static const HA_ALTER_FLAGS ALTER_COLUMN_NAME = 1L << 8; + + // Change column datatype + static const HA_ALTER_FLAGS ALTER_COLUMN_TYPE = 1L << 9; + + /** + Change column datatype in such way that new type has compatible + packed representation with old type, so it is theoretically + possible to perform change by only updating data dictionary + without changing table rows. + */ + static const HA_ALTER_FLAGS ALTER_COLUMN_EQUAL_PACK_LENGTH = 1L << 10; + + // Reorder column + static const HA_ALTER_FLAGS ALTER_COLUMN_ORDER = 1L << 11; + + // Change column from NOT NULL to NULL + static const HA_ALTER_FLAGS ALTER_COLUMN_NULLABLE = 1L << 12; + + // Change column from NULL to NOT NULL + static const HA_ALTER_FLAGS ALTER_COLUMN_NOT_NULLABLE = 1L << 13; + + // Set or remove default column value + static const HA_ALTER_FLAGS ALTER_COLUMN_DEFAULT = 1L << 14; + + // Add foreign key + static const HA_ALTER_FLAGS ADD_FOREIGN_KEY = 1L << 15; + + // Drop foreign key + static const HA_ALTER_FLAGS DROP_FOREIGN_KEY = 1L << 16; + + // table_options changed, see HA_CREATE_INFO::used_fields for details. + static const HA_ALTER_FLAGS CHANGE_CREATE_OPTION = 1L << 17; + + // Table is renamed + static const HA_ALTER_FLAGS ALTER_RENAME = 1L << 18; + + // column's engine options changed, something in field->option_struct + static const HA_ALTER_FLAGS ALTER_COLUMN_OPTION = 1L << 19; + + // MySQL alias for the same thing: + static const HA_ALTER_FLAGS ALTER_COLUMN_STORAGE_TYPE = 1L << 19; + + // Change the column format of column + static const HA_ALTER_FLAGS ALTER_COLUMN_COLUMN_FORMAT = 1L << 20; + + // Add partition + static const HA_ALTER_FLAGS ADD_PARTITION = 1L << 21; + + // Drop partition + static const HA_ALTER_FLAGS DROP_PARTITION = 1L << 22; + + // Changing partition options + static const HA_ALTER_FLAGS ALTER_PARTITION = 1L << 23; + + // Coalesce partition + static const HA_ALTER_FLAGS COALESCE_PARTITION = 1L << 24; + + // Reorganize partition ... into + static const HA_ALTER_FLAGS REORGANIZE_PARTITION = 1L << 25; + + // Reorganize partition + static const HA_ALTER_FLAGS ALTER_TABLE_REORG = 1L << 26; + + // Remove partitioning + static const HA_ALTER_FLAGS ALTER_REMOVE_PARTITIONING = 1L << 27; + + // Partition operation with ALL keyword + static const HA_ALTER_FLAGS ALTER_ALL_PARTITION = 1L << 28; + + /** + Recreate the table for ALTER TABLE FORCE, ALTER TABLE ENGINE + and OPTIMIZE TABLE operations. + */ + static const HA_ALTER_FLAGS RECREATE_TABLE = 1L << 29; + + // Virtual columns changed + static const HA_ALTER_FLAGS ALTER_COLUMN_VCOL = 1L << 30; + + /** + ALTER TABLE for a partitioned table. The engine needs to commit + online alter of all partitions atomically (using group_commit_ctx) + */ + static const HA_ALTER_FLAGS ALTER_PARTITIONED = 1L << 31; + + /** + Create options (like MAX_ROWS) for the new version of table. + + @note The referenced instance of HA_CREATE_INFO object was already + used to create new .FRM file for table being altered. So it + has been processed by mysql_prepare_create_table() already. + For example, this means that it has HA_OPTION_PACK_RECORD + flag in HA_CREATE_INFO::table_options member correctly set. + */ + HA_CREATE_INFO *create_info; + + /** + Alter options, fields and keys for the new version of table. + + @note The referenced instance of Alter_info object was already + used to create new .FRM file for table being altered. So it + has been processed by mysql_prepare_create_table() already. + In particular, this means that in Create_field objects for + fields which were present in some form in the old version + of table, Create_field::field member points to corresponding + Field instance for old version of table. + */ + Alter_info *alter_info; + + /** + Array of KEYs for new version of table - including KEYs to be added. + + @note Currently this array is produced as result of + mysql_prepare_create_table() call. + This means that it follows different convention for + KEY_PART_INFO::fieldnr values than objects in TABLE::key_info + array. + + @todo This is mainly due to the fact that we need to keep compatibility + with removed handler::add_index() call. We plan to switch to + TABLE::key_info numbering later. + + KEYs are sorted - see sort_keys(). + */ + KEY *key_info_buffer; + + /** Size of key_info_buffer array. */ + uint key_count; + + /** Size of index_drop_buffer array. */ + uint index_drop_count; + + /** + Array of pointers to KEYs to be dropped belonging to the TABLE instance + for the old version of the table. + */ + KEY **index_drop_buffer; + + /** Size of index_add_buffer array. */ + uint index_add_count; + + /** + Array of indexes into key_info_buffer for KEYs to be added, + sorted in increasing order. + */ + uint *index_add_buffer; + + /** + Context information to allow handlers to keep context between in-place + alter API calls. + + @see inplace_alter_handler_ctx for information about object lifecycle. + */ + inplace_alter_handler_ctx *handler_ctx; + + /** + If the table uses several handlers, like ha_partition uses one handler + per partition, this contains a Null terminated array of ctx pointers + that should all be committed together. + Or NULL if only handler_ctx should be committed. + Set to NULL if the low level handler::commit_inplace_alter_table uses it, + to signal to the main handler that everything was committed as atomically. + + @see inplace_alter_handler_ctx for information about object lifecycle. + */ + inplace_alter_handler_ctx **group_commit_ctx; + + /** + Flags describing in detail which operations the storage engine is to execute. + */ + HA_ALTER_FLAGS handler_flags; + + /** + Partition_info taking into account the partition changes to be performed. + Contains all partitions which are present in the old version of the table + with partitions to be dropped or changed marked as such + all partitions + to be added in the new version of table marked as such. + */ + partition_info *modified_part_info; + + /** true for ALTER IGNORE TABLE ... */ + const bool ignore; + + /** true for online operation (LOCK=NONE) */ + bool online; + + /** + Can be set by handler to describe why a given operation cannot be done + in-place (HA_ALTER_INPLACE_NOT_SUPPORTED) or why it cannot be done + online (HA_ALTER_INPLACE_NO_LOCK or + HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE) + If set, it will be used with ER_ALTER_OPERATION_NOT_SUPPORTED_REASON if + results from handler::check_if_supported_inplace_alter() doesn't match + requirements set by user. If not set, the more generic + ER_ALTER_OPERATION_NOT_SUPPORTED will be used. + + Please set to a properly localized string, for example using + my_get_err_msg(), so that the error message as a whole is localized. + */ + const char *unsupported_reason; + + Alter_inplace_info(HA_CREATE_INFO *create_info_arg, + Alter_info *alter_info_arg, + KEY *key_info_arg, uint key_count_arg, + partition_info *modified_part_info_arg, + bool ignore_arg) + : create_info(create_info_arg), + alter_info(alter_info_arg), + key_info_buffer(key_info_arg), + key_count(key_count_arg), + index_drop_count(0), + index_drop_buffer(NULL), + index_add_count(0), + index_add_buffer(NULL), + handler_ctx(NULL), + group_commit_ctx(NULL), + handler_flags(0), + modified_part_info(modified_part_info_arg), + ignore(ignore_arg), + online(false), + unsupported_reason(NULL) + {} + + ~Alter_inplace_info() + { + delete handler_ctx; + } + + /** + Used after check_if_supported_inplace_alter() to report + error if the result does not match the LOCK/ALGORITHM + requirements set by the user. + + @param not_supported Part of statement that was not supported. + @param try_instead Suggestion as to what the user should + replace not_supported with. + */ + void report_unsupported_error(const char *not_supported, + const char *try_instead); +}; typedef struct st_key_create_information @@ -1302,6 +2006,12 @@ typedef struct st_key_create_information ulong block_size; LEX_STRING parser_name; LEX_STRING comment; + /** + A flag to determine if we will check for duplicate indexes. + This typically means that the key information was specified + directly by the user (set by the parser). + */ + bool check_for_duplicate_indexes; } KEY_CREATE_INFO; @@ -1460,21 +2170,24 @@ typedef struct st_range_seq_if typedef bool (*SKIP_INDEX_TUPLE_FUNC) (range_seq_t seq, range_id_t range_info); -class COST_VECT +class Cost_estimate { public: double io_count; /* number of I/O */ double avg_io_cost; /* cost of an average I/O oper. */ double cpu_cost; /* cost of operations in CPU */ - double mem_cost; /* cost of used memory */ double import_cost; /* cost of remote operations */ + double mem_cost; /* cost of used memory */ enum { IO_COEFF=1 }; enum { CPU_COEFF=1 }; enum { MEM_COEFF=1 }; enum { IMPORT_COEFF=1 }; - COST_VECT() {} // keep gcc happy + Cost_estimate() + { + reset(); + } double total_cost() { @@ -1482,7 +2195,17 @@ public: MEM_COEFF*mem_cost + IMPORT_COEFF*import_cost; } - void zero() + /** + Whether or not all costs in the object are zero + + @return true if all costs are zero, false otherwise + */ + bool is_zero() const + { + return !(io_count || cpu_cost || import_cost || mem_cost); + } + + void reset() { avg_io_cost= 1.0; io_count= cpu_cost= mem_cost= import_cost= 0.0; @@ -1496,13 +2219,14 @@ public: /* Don't multiply mem_cost */ } - void add(const COST_VECT* cost) + void add(const Cost_estimate* cost) { double io_count_sum= io_count + cost->io_count; add_io(cost->io_count, cost->avg_io_cost); io_count= io_count_sum; cpu_cost += cost->cpu_cost; } + void add_io(double add_io_cnt, double add_avg_cost) { /* In edge cases add_io_cnt may be zero */ @@ -1515,20 +2239,28 @@ public: } } + /// Add to CPU cost + void add_cpu(double add_cpu_cost) { cpu_cost+= add_cpu_cost; } + + /// Add to import cost + void add_import(double add_import_cost) { import_cost+= add_import_cost; } + + /// Add to memory cost + void add_mem(double add_mem_cost) { mem_cost+= add_mem_cost; } + /* To be used when we go from old single value-based cost calculations to - the new COST_VECT-based. + the new Cost_estimate-based. */ void convert_from_cost(double cost) { - zero(); - avg_io_cost= 1.0; + reset(); io_count= cost; } }; void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted, - COST_VECT *cost); + Cost_estimate *cost); /* Indicates that all scanned ranges will be singlepoint (aka equality) ranges. @@ -1681,34 +2413,61 @@ uint calculate_key_len(TABLE *, uint, const uchar *, key_part_map); #define make_prev_keypart_map(N) (((key_part_map)1 << (N)) - 1) -/** - Index creation context. - Created by handler::add_index() and destroyed by handler::final_add_index(). - And finally freed at the end of the statement. - (Sql_alloc does not free in delete). -*/ - -class handler_add_index : public Sql_alloc +/** Base class to be used by handlers different shares */ +class Handler_share { public: - /* Table where the indexes are added */ - TABLE* const table; - /* Indexes being created */ - KEY* const key_info; - /* Size of key_info[] */ - const uint num_of_keys; - handler_add_index(TABLE *table_arg, KEY *key_info_arg, uint num_of_keys_arg) - : table (table_arg), key_info (key_info_arg), num_of_keys (num_of_keys_arg) - {} - virtual ~handler_add_index() {} + Handler_share() {} + virtual ~Handler_share() {} }; -class Query_cache; -struct Query_cache_block_table; + /** The handler class is the interface for dynamically loadable storage engines. Do not add ifdefs and take care when adding or changing virtual functions to avoid vtable confusion + + Functions in this class accept and return table columns data. Two data + representation formats are used: + 1. TableRecordFormat - Used to pass [partial] table records to/from + storage engine + + 2. KeyTupleFormat - used to pass index search tuples (aka "keys") to + storage engine. See opt_range.cc for description of this format. + + TableRecordFormat + ================= + [Warning: this description is work in progress and may be incomplete] + The table record is stored in a fixed-size buffer: + + record: null_bytes, column1_data, column2_data, ... + + The offsets of the parts of the buffer are also fixed: every column has + an offset to its column{i}_data, and if it is nullable it also has its own + bit in null_bytes. + + The record buffer only includes data about columns that are marked in the + relevant column set (table->read_set and/or table->write_set, depending on + the situation). + <not-sure>It could be that it is required that null bits of non-present + columns are set to 1</not-sure> + + VARIOUS EXCEPTIONS AND SPECIAL CASES + + If the table has no nullable columns, then null_bytes is still + present, its length is one byte <not-sure> which must be set to 0xFF + at all times. </not-sure> + + If the table has columns of type BIT, then certain bits from those columns + may be stored in null_bytes as well. Grep around for Field_bit for + details. + + For blob columns (see Field_blob), the record buffer stores length of the + data, following by memory pointer to the blob data. The pointer is owned + by the storage engine and is valid until the next operation. + + If a blob column has NULL value, then its length and blob data pointer + must be set to 0. */ class handler :public Sql_alloc @@ -1761,7 +2520,6 @@ public: uint ref_length; FT_INFO *ft_handler; enum {NONE=0, INDEX, RND} inited; - bool locked; bool implicit_emptied; /* Can be !=0 only if HEAP */ const COND *pushed_cond; /** @@ -1818,6 +2576,24 @@ public: */ PSI_table *m_psi; + virtual void unbind_psi(); + virtual void rebind_psi(); + +private: + /** + The lock type set by when calling::ha_external_lock(). This is + propagated down to the storage engine. The reason for also storing + it here, is that when doing MRR we need to create/clone a second handler + object. This cloned handler object needs to know about the lock_type used. + */ + int m_lock_type; + /** + Pointer where to store/retrieve the Handler_share pointer. + For non partitioned handlers this is &TABLE_SHARE::ha_share. + */ + Handler_share **ha_share; + +public: handler(handlerton *ht_arg, TABLE_SHARE *share_arg) :table_share(share_arg), table(0), estimation_rows_to_insert(0), ht(ht_arg), @@ -1825,18 +2601,21 @@ public: in_range_check_pushed_down(FALSE), ref_length(sizeof(my_off_t)), ft_handler(0), inited(NONE), - locked(FALSE), implicit_emptied(0), + implicit_emptied(0), pushed_cond(0), next_insert_id(0), insert_id_for_cur_row(0), pushed_idx_cond(NULL), pushed_idx_cond_keyno(MAX_KEY), auto_inc_intervals_count(0), - m_psi(NULL) + m_psi(NULL), m_lock_type(F_UNLCK), ha_share(NULL) { + DBUG_PRINT("info", + ("handler created F_UNLCK %d F_RDLCK %d F_WRLCK %d", + F_UNLCK, F_RDLCK, F_WRLCK)); reset_statistics(); } virtual ~handler(void) { - DBUG_ASSERT(locked == FALSE); + DBUG_ASSERT(m_lock_type == F_UNLCK); DBUG_ASSERT(inited == NONE); } virtual handler *clone(const char *name, MEM_ROOT *mem_root); @@ -1846,7 +2625,7 @@ public: cached_table_flags= table_flags(); } /* ha_ methods: pubilc wrappers for private virtual API */ - + int ha_open(TABLE *table, const char *name, int mode, uint test_if_locked); int ha_index_init(uint idx, bool sorted) { @@ -1929,11 +2708,11 @@ public: /** to be actually called to get 'check()' functionality*/ int ha_check(THD *thd, HA_CHECK_OPT *check_opt); int ha_repair(THD* thd, HA_CHECK_OPT* check_opt); - void ha_start_bulk_insert(ha_rows rows) + void ha_start_bulk_insert(ha_rows rows, uint flags= 0) { DBUG_ENTER("handler::ha_start_bulk_insert"); estimation_rows_to_insert= rows; - start_bulk_insert(rows); + start_bulk_insert(rows, flags); DBUG_VOID_RETURN; } int ha_end_bulk_insert() @@ -1954,15 +2733,14 @@ public: int ha_disable_indexes(uint mode); int ha_enable_indexes(uint mode); int ha_discard_or_import_tablespace(my_bool discard); - void ha_prepare_for_alter(); int ha_rename_table(const char *from, const char *to); int ha_delete_table(const char *name); void ha_drop_table(const char *name); int ha_create(const char *name, TABLE *form, HA_CREATE_INFO *info); - int ha_create_handler_files(const char *name, const char *old_name, - int action_flag, HA_CREATE_INFO *info); + int ha_create_partitioning_metadata(const char *name, const char *old_name, + int action_flag); int ha_change_partitions(HA_CREATE_INFO *create_info, const char *path, @@ -1975,10 +2753,32 @@ public: void adjust_next_insert_id_after_explicit_value(ulonglong nr); int update_auto_increment(); - void print_keydup_error(uint key_nr, const char *msg, myf errflag); virtual void print_error(int error, myf errflag); virtual bool get_error_message(int error, String *buf); uint get_dup_key(int error); + /** + Retrieves the names of the table and the key for which there was a + duplicate entry in the case of HA_ERR_FOREIGN_DUPLICATE_KEY. + + If any of the table or key name is not available this method will return + false and will not change any of child_table_name or child_key_name. + + @param child_table_name[out] Table name + @param child_table_name_len[in] Table name buffer size + @param child_key_name[out] Key name + @param child_key_name_len[in] Key name buffer size + + @retval true table and key names were available + and were written into the corresponding + out parameters. + @retval false table and key names were not available, + the out parameters were not touched. + */ + virtual bool get_foreign_dup_key(char *child_table_name, + uint child_table_name_len, + char *child_key_name, + uint child_key_name_len) + { DBUG_ASSERT(false); return(false); } void reset_statistics() { rows_read= rows_changed= rows_tmp_read= 0; @@ -1992,6 +2792,18 @@ public: } virtual double scan_time() { return ulonglong2double(stats.data_file_length) / IO_SIZE + 2; } + + /** + The cost of reading a set of ranges from the table using an index + to access it. + + @param index The index number. + @param ranges The number of ranges to be read. + @param rows Total number of rows to be read. + + This method can be used to calculate the total cost of scanning a table + using an index by calling it using read_time(index, 1, table_size). + */ virtual double read_time(uint index, uint ranges, ha_rows rows) { return rows2double(ranges+rows); } @@ -2161,18 +2973,17 @@ protected: } public: - /* Similar functions like the above, but does statistics counting */ - inline int ha_index_read_map(uchar * buf, const uchar * key, - key_part_map keypart_map, - enum ha_rkey_function find_flag); - inline int ha_index_read_idx_map(uchar * buf, uint index, const uchar * key, - key_part_map keypart_map, - enum ha_rkey_function find_flag); - inline int ha_index_next(uchar * buf); - inline int ha_index_prev(uchar * buf); - inline int ha_index_first(uchar * buf); - inline int ha_index_last(uchar * buf); - inline int ha_index_next_same(uchar *buf, const uchar *key, uint keylen); + int ha_index_read_map(uchar * buf, const uchar * key, + key_part_map keypart_map, + enum ha_rkey_function find_flag); + int ha_index_read_idx_map(uchar * buf, uint index, const uchar * key, + key_part_map keypart_map, + enum ha_rkey_function find_flag); + int ha_index_next(uchar * buf); + int ha_index_prev(uchar * buf); + int ha_index_first(uchar * buf); + int ha_index_last(uchar * buf); + int ha_index_next_same(uchar *buf, const uchar *key, uint keylen); /* TODO: should we make for those functions non-virtual ha_func_name wrappers, too? @@ -2180,10 +2991,11 @@ public: virtual ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, void *seq_init_param, uint n_ranges, uint *bufsz, - uint *mrr_mode, COST_VECT *cost); + uint *mrr_mode, + Cost_estimate *cost); virtual ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys, uint key_parts, uint *bufsz, - uint *mrr_mode, COST_VECT *cost); + uint *mrr_mode, Cost_estimate *cost); virtual int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param, uint n_ranges, uint mrr_mode, HANDLER_BUFFER *buf); @@ -2218,6 +3030,7 @@ public: const key_range *end_key, bool eq_range, bool sorted); virtual int read_range_next(); + void set_end_range(const key_range *end_key); int compare_key(key_range *range); int compare_key2(key_range *range); virtual int ft_init() { return HA_ERR_WRONG_COMMAND; } @@ -2235,16 +3048,25 @@ private: */ virtual int rnd_pos_by_record(uchar *record) { + int error; + DBUG_ASSERT(table_flags() & HA_PRIMARY_KEY_REQUIRED_FOR_POSITION); + + error = ha_rnd_init(false); + if (error != 0) + return error; + position(record); - return rnd_pos(record, ref); + error = ha_rnd_pos(record, ref); + ha_rnd_end(); + return error; } virtual int read_first_row(uchar *buf, uint primary_key); public: /* Same as above, but with statistics */ inline int ha_ft_read(uchar *buf); - inline int ha_rnd_next(uchar *buf); - inline int ha_rnd_pos(uchar *buf, uchar *pos); + int ha_rnd_next(uchar *buf); + int ha_rnd_pos(uchar *buf, uchar *pos); inline int ha_rnd_pos_by_record(uchar *buf); inline int ha_read_first_row(uchar *buf, uint primary_key); @@ -2289,6 +3111,7 @@ public: If this method returns nonzero, it will also signal the storage engine that the next read will be a locking re-read of the row. */ + bool ha_was_semi_consistent_read(); virtual bool was_semi_consistent_read() { return 0; } /** Tell the engine whether it should avoid unnecessary lock waits. @@ -2350,10 +3173,15 @@ public: { return FALSE; } virtual char* get_foreign_key_create_info() { return(NULL);} /* gets foreign key create string from InnoDB */ - virtual char* get_tablespace_name(THD *thd, char *name, uint name_len) - { return(NULL);} /* gets tablespace name from handler */ - /** used in ALTER TABLE; 1 if changing storage engine is allowed */ - virtual bool can_switch_engines() { return 1; } + /** + Used in ALTER TABLE to check if changing storage engine is allowed. + + @note Called without holding thr_lock.c lock. + + @retval true Changing storage engine is allowed. + @retval false Changing storage engine not allowed. + */ + virtual bool can_switch_engines() { return true; } virtual int can_continue_handler_scan() { return 0; } /** Get the list of foreign keys in this table. @@ -2388,19 +3216,8 @@ public: { return; } /* prepare InnoDB for HANDLER */ virtual void free_foreign_key_create_info(char* str) {} /** The following can be called without an open handler */ - virtual const char *table_type() const =0; - /** - If frm_error() is called then we will use this to find out what file - extentions exist for the storage engine. This is also used by the default - rename_table and delete_table method in handler.cc. - - For engines that have two file name extentions (separate meta/index file - and data file), the order of elements is relevant. First element of engine - file name extentions array should be meta/index file extention. Second - element - data file extention. This order is assumed by - prepare_for_repair() when REPAIR TABLE ... USE_FRM is issued. - */ - virtual const char **bas_ext() const =0; + const char *table_type() const { return hton_name(ht)->str; } + const char **bas_ext() const { return ht->tablefile_extensions; } virtual int get_default_no_partitions(HA_CREATE_INFO *create_info) { return 1;} @@ -2415,57 +3232,21 @@ public: virtual ulong index_flags(uint idx, uint part, bool all_parts) const =0; -/** - First phase of in-place add index. - Handlers are supposed to create new indexes here but not make them - visible. - - @param table_arg Table to add index to - @param key_info Information about new indexes - @param num_of_key Number of new indexes - @param add[out] Context of handler specific information needed - for final_add_index(). - - @note This function can be called with less than exclusive metadata - lock depending on which flags are listed in alter_table_flags. -*/ - virtual int add_index(TABLE *table_arg, KEY *key_info, uint num_of_keys, - handler_add_index **add) - { return (HA_ERR_WRONG_COMMAND); } - -/** - Second and last phase of in-place add index. - Commit or rollback pending new indexes. - - @param add Context of handler specific information from add_index(). - @param commit If true, commit. If false, rollback index changes. - - @note This function is called with exclusive metadata lock. -*/ - virtual int final_add_index(handler_add_index *add, bool commit) - { return (HA_ERR_WRONG_COMMAND); } - - virtual int prepare_drop_index(TABLE *table_arg, uint *key_num, - uint num_of_keys) - { return (HA_ERR_WRONG_COMMAND); } - virtual int final_drop_index(TABLE *table_arg) - { return (HA_ERR_WRONG_COMMAND); } - uint max_record_length() const - { return min(HA_MAX_REC_LENGTH, max_supported_record_length()); } + { return MY_MIN(HA_MAX_REC_LENGTH, max_supported_record_length()); } uint max_keys() const - { return min(MAX_KEY, max_supported_keys()); } + { return MY_MIN(MAX_KEY, max_supported_keys()); } uint max_key_parts() const - { return min(MAX_REF_PARTS, max_supported_key_parts()); } + { return MY_MIN(MAX_REF_PARTS, max_supported_key_parts()); } uint max_key_length() const - { return min(MAX_KEY_LENGTH, max_supported_key_length()); } + { return MY_MIN(MAX_DATA_LENGTH_FOR_KEY, max_supported_key_length()); } uint max_key_part_length() const - { return min(MAX_KEY_LENGTH, max_supported_key_part_length()); } + { return MY_MIN(MAX_DATA_LENGTH_FOR_KEY, max_supported_key_part_length()); } virtual uint max_supported_record_length() const { return HA_MAX_REC_LENGTH; } virtual uint max_supported_keys() const { return 0; } virtual uint max_supported_key_parts() const { return MAX_REF_PARTS; } - virtual uint max_supported_key_length() const { return MAX_KEY_LENGTH; } + virtual uint max_supported_key_length() const { return MAX_DATA_LENGTH_FOR_KEY; } virtual uint max_supported_key_part_length() const { return 255; } virtual uint min_record_length(uint options) const { return 1; } @@ -2684,10 +3465,274 @@ public: pushed_idx_cond_keyno= MAX_KEY; in_range_check_pushed_down= false; } + /** + Part of old, deprecated in-place ALTER API. + */ virtual bool check_if_incompatible_data(HA_CREATE_INFO *create_info, uint table_changes) { return COMPATIBLE_DATA_NO; } + /* On-line/in-place ALTER TABLE interface. */ + + /* + Here is an outline of on-line/in-place ALTER TABLE execution through + this interface. + + Phase 1 : Initialization + ======================== + During this phase we determine which algorithm should be used + for execution of ALTER TABLE and what level concurrency it will + require. + + *) This phase starts by opening the table and preparing description + of the new version of the table. + *) Then we check if it is impossible even in theory to carry out + this ALTER TABLE using the in-place algorithm. For example, because + we need to change storage engine or the user has explicitly requested + usage of the "copy" algorithm. + *) If in-place ALTER TABLE is theoretically possible, we continue + by compiling differences between old and new versions of the table + in the form of HA_ALTER_FLAGS bitmap. We also build a few + auxiliary structures describing requested changes and store + all these data in the Alter_inplace_info object. + *) Then the handler::check_if_supported_inplace_alter() method is called + in order to find if the storage engine can carry out changes requested + by this ALTER TABLE using the in-place algorithm. To determine this, + the engine can rely on data in HA_ALTER_FLAGS/Alter_inplace_info + passed to it as well as on its own checks. If the in-place algorithm + can be used for this ALTER TABLE, the level of required concurrency for + its execution is also returned. + If any errors occur during the handler call, ALTER TABLE is aborted + and no further handler functions are called. + *) Locking requirements of the in-place algorithm are compared to any + concurrency requirements specified by user. If there is a conflict + between them, we either switch to the copy algorithm or emit an error. + + Phase 2 : Execution + =================== + + In this phase the operations are executed. + + *) As the first step, we acquire a lock corresponding to the concurrency + level which was returned by handler::check_if_supported_inplace_alter() + and requested by the user. This lock is held for most of the + duration of in-place ALTER (if HA_ALTER_INPLACE_SHARED_LOCK_AFTER_PREPARE + or HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE were returned we acquire an + exclusive lock for duration of the next step only). + *) After that we call handler::ha_prepare_inplace_alter_table() to give the + storage engine a chance to update its internal structures with a higher + lock level than the one that will be used for the main step of algorithm. + After that we downgrade the lock if it is necessary. + *) After that, the main step of this phase and algorithm is executed. + We call the handler::ha_inplace_alter_table() method, which carries out the + changes requested by ALTER TABLE but does not makes them visible to other + connections yet. + *) We ensure that no other connection uses the table by upgrading our + lock on it to exclusive. + *) a) If the previous step succeeds, handler::ha_commit_inplace_alter_table() is + called to allow the storage engine to do any final updates to its structures, + to make all earlier changes durable and visible to other connections. + b) If we have failed to upgrade lock or any errors have occurred during the + handler functions calls (including commit), we call + handler::ha_commit_inplace_alter_table() + to rollback all changes which were done during previous steps. + + Phase 3 : Final + =============== + + In this phase we: + + *) Update SQL-layer data-dictionary by installing .FRM file for the new version + of the table. + *) Inform the storage engine about this change by calling the + handler::ha_notify_table_changed() method. + *) Destroy the Alter_inplace_info and handler_ctx objects. + + */ + + /** + Check if a storage engine supports a particular alter table in-place + + @param altered_table TABLE object for new version of table. + @param ha_alter_info Structure describing changes to be done + by ALTER TABLE and holding data used + during in-place alter. + + @retval HA_ALTER_ERROR Unexpected error. + @retval HA_ALTER_INPLACE_NOT_SUPPORTED Not supported, must use copy. + @retval HA_ALTER_INPLACE_EXCLUSIVE_LOCK Supported, but requires X lock. + @retval HA_ALTER_INPLACE_SHARED_LOCK_AFTER_PREPARE + Supported, but requires SNW lock + during main phase. Prepare phase + requires X lock. + @retval HA_ALTER_INPLACE_SHARED_LOCK Supported, but requires SNW lock. + @retval HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE + Supported, concurrent reads/writes + allowed. However, prepare phase + requires X lock. + @retval HA_ALTER_INPLACE_NO_LOCK Supported, concurrent + reads/writes allowed. + + @note The default implementation uses the old in-place ALTER API + to determine if the storage engine supports in-place ALTER or not. + + @note Called without holding thr_lock.c lock. + */ + virtual enum_alter_inplace_result + check_if_supported_inplace_alter(TABLE *altered_table, + Alter_inplace_info *ha_alter_info); + + + /** + Public functions wrapping the actual handler call. + @see prepare_inplace_alter_table() + */ + bool ha_prepare_inplace_alter_table(TABLE *altered_table, + Alter_inplace_info *ha_alter_info); + + + /** + Public function wrapping the actual handler call. + @see inplace_alter_table() + */ + bool ha_inplace_alter_table(TABLE *altered_table, + Alter_inplace_info *ha_alter_info) + { + return inplace_alter_table(altered_table, ha_alter_info); + } + + + /** + Public function wrapping the actual handler call. + Allows us to enforce asserts regardless of handler implementation. + @see commit_inplace_alter_table() + */ + bool ha_commit_inplace_alter_table(TABLE *altered_table, + Alter_inplace_info *ha_alter_info, + bool commit); + + + /** + Public function wrapping the actual handler call. + @see notify_table_changed() + */ + void ha_notify_table_changed() + { + notify_table_changed(); + } + + +protected: + /** + Allows the storage engine to update internal structures with concurrent + writes blocked. If check_if_supported_inplace_alter() returns + HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE or + HA_ALTER_INPLACE_SHARED_AFTER_PREPARE, this function is called with + exclusive lock otherwise the same level of locking as for + inplace_alter_table() will be used. + + @note Storage engines are responsible for reporting any errors by + calling my_error()/print_error() + + @note If this function reports error, commit_inplace_alter_table() + will be called with commit= false. + + @note For partitioning, failing to prepare one partition, means that + commit_inplace_alter_table() will be called to roll back changes for + all partitions. This means that commit_inplace_alter_table() might be + called without prepare_inplace_alter_table() having been called first + for a given partition. + + @param altered_table TABLE object for new version of table. + @param ha_alter_info Structure describing changes to be done + by ALTER TABLE and holding data used + during in-place alter. + + @retval true Error + @retval false Success + */ + virtual bool prepare_inplace_alter_table(TABLE *altered_table, + Alter_inplace_info *ha_alter_info) + { return false; } + + + /** + Alter the table structure in-place with operations specified using HA_ALTER_FLAGS + and Alter_inplace_info. The level of concurrency allowed during this + operation depends on the return value from check_if_supported_inplace_alter(). + + @note Storage engines are responsible for reporting any errors by + calling my_error()/print_error() + + @note If this function reports error, commit_inplace_alter_table() + will be called with commit= false. + + @param altered_table TABLE object for new version of table. + @param ha_alter_info Structure describing changes to be done + by ALTER TABLE and holding data used + during in-place alter. + + @retval true Error + @retval false Success + */ + virtual bool inplace_alter_table(TABLE *altered_table, + Alter_inplace_info *ha_alter_info) + { return false; } + + + /** + Commit or rollback the changes made during prepare_inplace_alter_table() + and inplace_alter_table() inside the storage engine. + Note that in case of rollback the allowed level of concurrency during + this operation will be the same as for inplace_alter_table() and thus + might be higher than during prepare_inplace_alter_table(). (For example, + concurrent writes were blocked during prepare, but might not be during + rollback). + + @note Storage engines are responsible for reporting any errors by + calling my_error()/print_error() + + @note If this function with commit= true reports error, it will be called + again with commit= false. + + @note In case of partitioning, this function might be called for rollback + without prepare_inplace_alter_table() having been called first. + Also partitioned tables sets ha_alter_info->group_commit_ctx to a NULL + terminated array of the partitions handlers and if all of them are + committed as one, then group_commit_ctx should be set to NULL to indicate + to the partitioning handler that all partitions handlers are committed. + @see prepare_inplace_alter_table(). + + @param altered_table TABLE object for new version of table. + @param ha_alter_info Structure describing changes to be done + by ALTER TABLE and holding data used + during in-place alter. + @param commit True => Commit, False => Rollback. + + @retval true Error + @retval false Success + */ + virtual bool commit_inplace_alter_table(TABLE *altered_table, + Alter_inplace_info *ha_alter_info, + bool commit) +{ + /* Nothing to commit/rollback, mark all handlers committed! */ + ha_alter_info->group_commit_ctx= NULL; + return false; +} + + + /** + Notify the storage engine that the table structure (.FRM) has been updated. + + @note No errors are allowed during notify_table_changed(). + */ + virtual void notify_table_changed() { } + +public: + /* End of On-line/in-place ALTER TABLE interface. */ + + /** use_hidden_primary_key() is called in case of an update/delete when (table_flags() and HA_PRIMARY_KEY_REQUIRED_FOR_DELETE) is defined @@ -2727,36 +3772,9 @@ protected: /** Acquire the instrumented table information from a table share. - @param share a table share @return an instrumented table share, or NULL. */ - PSI_table_share *ha_table_share_psi(const TABLE_SHARE *share) const; - - inline void psi_open() - { - DBUG_ASSERT(m_psi == NULL); - DBUG_ASSERT(table_share != NULL); -#ifdef HAVE_PSI_INTERFACE - if (PSI_server) - { - PSI_table_share *share_psi= ha_table_share_psi(table_share); - if (share_psi) - m_psi= PSI_server->open_table(share_psi, this); - } -#endif - } - - inline void psi_close() - { -#ifdef HAVE_PSI_INTERFACE - if (PSI_server && m_psi) - { - PSI_server->close_table(m_psi); - m_psi= NULL; /* instrumentation handle, invalid after close_table() */ - } -#endif - DBUG_ASSERT(m_psi == NULL); - } + PSI_table_share *ha_table_share_psi() const; /** Default rename_table() and delete_table() rename/delete files with a @@ -2803,6 +3821,14 @@ private: return HA_ERR_WRONG_COMMAND; } + /** + Update a single row. + + Note: If HA_ERR_FOUND_DUPP_KEY is returned, the handler must read + all columns of the row so MySQL can create an error message. If + the columns required for the error message are not read, the error + message will contain garbage. + */ virtual int update_row(const uchar *old_data __attribute__((unused)), uchar *new_data __attribute__((unused))) { @@ -2864,11 +3890,14 @@ private: DBUG_ASSERT(!(ha_table_flags() & HA_CAN_REPAIR)); return HA_ADMIN_NOT_IMPLEMENTED; } - virtual void start_bulk_insert(ha_rows rows) {} + virtual void start_bulk_insert(ha_rows rows, uint flags) {} virtual int end_bulk_insert() { return 0; } +protected: virtual int index_read(uchar * buf, const uchar * key, uint key_len, enum ha_rkey_function find_flag) { return HA_ERR_WRONG_COMMAND; } + friend class ha_partition; +public: /** This method is similar to update_row, however the handler doesn't need to execute the updates at this point in time. The handler can be certain @@ -2938,8 +3967,8 @@ private: virtual void drop_table(const char *name); virtual int create(const char *name, TABLE *form, HA_CREATE_INFO *info)=0; - virtual int create_handler_files(const char *name, const char *old_name, - int action_flag, HA_CREATE_INFO *info) + virtual int create_partitioning_metadata(const char *name, const char *old_name, + int action_flag) { return FALSE; } virtual int change_partitions(HA_CREATE_INFO *create_info, @@ -2953,7 +3982,16 @@ private: { return HA_ERR_WRONG_COMMAND; } virtual int rename_partitions(const char *path) { return HA_ERR_WRONG_COMMAND; } - friend class ha_partition; + virtual bool set_ha_share_ref(Handler_share **arg_ha_share) + { + DBUG_ASSERT(!ha_share); + DBUG_ASSERT(arg_ha_share); + if (ha_share || !arg_ha_share) + return true; + ha_share= arg_ha_share; + return false; + } + int get_lock_type() const { return m_lock_type; } public: /* XXX to be removed, see ha_partition::partition_ht() */ virtual handlerton *partition_ht() const @@ -2964,6 +4002,11 @@ public: virtual void set_lock_type(enum thr_lock_type lock); friend enum icp_result handler_index_cond_check(void* h_arg); +protected: + Handler_share *get_ha_share_ptr(); + void set_ha_share_ptr(Handler_share *arg_ha_share); + void lock_shared_ha_data(); + void unlock_shared_ha_data(); }; #include "multi_range_read.h" @@ -3002,7 +4045,7 @@ static inline const char *ha_resolve_storage_engine_name(const handlerton *db_ty static inline bool ha_check_storage_engine_flag(const handlerton *db_type, uint32 flag) { - return db_type == NULL ? FALSE : test(db_type->flags & flag); + return db_type == NULL ? FALSE : MY_TEST(db_type->flags & flag); } static inline bool ha_storage_engine_is_enabled(const handlerton *db_type) @@ -3011,6 +4054,8 @@ static inline bool ha_storage_engine_is_enabled(const handlerton *db_type) (db_type->state == SHOW_OPTION_YES) : FALSE; } +#define view_pseudo_hton ((handlerton *)1) + /* basic stuff */ int ha_init_errors(void); int ha_init(void); @@ -3025,10 +4070,10 @@ void ha_kill_query(THD* thd, enum thd_kill_levels level); bool ha_flush_logs(handlerton *db_type); void ha_drop_database(char* path); void ha_checkpoint_state(bool disable); +void ha_commit_checkpoint_request(void *cookie, void (*pre_hook)(void *)); int ha_create_table(THD *thd, const char *path, const char *db, const char *table_name, - HA_CREATE_INFO *create_info, - bool update_create_info); + HA_CREATE_INFO *create_info, LEX_CUSTRING *frm); int ha_delete_table(THD *thd, handlerton *db_type, const char *path, const char *db, const char *alias, bool generate_warning); @@ -3036,14 +4081,34 @@ int ha_delete_table(THD *thd, handlerton *db_type, const char *path, bool ha_show_status(THD *thd, handlerton *db_type, enum ha_stat_type stat); /* discovery */ -int ha_create_table_from_engine(THD* thd, const char *db, const char *name); -bool ha_check_if_table_exists(THD* thd, const char *db, const char *name, - bool *exists); -int ha_discover(THD* thd, const char* dbname, const char* name, - uchar** frmblob, size_t* frmlen); -int ha_find_files(THD *thd,const char *db,const char *path, - const char *wild, bool dir, List<LEX_STRING>* files); -int ha_table_exists_in_engine(THD* thd, const char* db, const char* name); +#ifdef MYSQL_SERVER +class Discovered_table_list: public handlerton::discovered_list +{ + THD *thd; + const char *wild, *wend; + bool with_temps; // whether to include temp tables in the result +public: + Dynamic_array<LEX_STRING*> *tables; + + Discovered_table_list(THD *thd_arg, Dynamic_array<LEX_STRING*> *tables_arg, + const LEX_STRING *wild_arg); + Discovered_table_list(THD *thd_arg, Dynamic_array<LEX_STRING*> *tables_arg) + : thd(thd_arg), wild(NULL), with_temps(true), tables(tables_arg) {} + ~Discovered_table_list() {} + + bool add_table(const char *tname, size_t tlen); + bool add_file(const char *fname); + + void sort(); + void remove_duplicates(); // assumes that the list is sorted +}; + +int ha_discover_table(THD *thd, TABLE_SHARE *share); +int ha_discover_table_names(THD *thd, LEX_STRING *db, MY_DIR *dirp, + Discovered_table_list *result, bool reusable); +bool ha_table_exists(THD *thd, const char *db, const char *table_name, + handlerton **hton= 0); +#endif /* key cache */ extern "C" int ha_init_key_cache(const char *name, KEY_CACHE *key_cache, void *); @@ -3069,6 +4134,7 @@ int ha_enable_transaction(THD *thd, bool on); /* savepoints */ int ha_rollback_to_savepoint(THD *thd, SAVEPOINT *sv); +bool ha_rollback_to_savepoint_can_release_mdl(THD *thd); int ha_savepoint(THD *thd, SAVEPOINT *sv); int ha_release_savepoint(THD *thd, SAVEPOINT *sv); @@ -3105,10 +4171,13 @@ int ha_binlog_end(THD *thd); const char *get_canonical_filename(handler *file, const char *path, char *tmp_path); bool mysql_xa_recover(THD *thd); +void commit_checkpoint_notify_ha(handlerton *hton, void *cookie); inline const char *table_case_name(HA_CREATE_INFO *info, const char *name) { return ((lower_case_table_names == 2 && info->alias) ? info->alias : name); } +void print_keydup_error(TABLE *table, KEY *key, const char *msg, myf errflag); +void print_keydup_error(TABLE *table, KEY *key, myf errflag); #endif /* HANDLER_INCLUDED */ |