diff options
273 files changed, 23385 insertions, 1094 deletions
diff --git a/include/maria.h b/include/maria.h index 44079f3f288..5618e8ae1a4 100644 --- a/include/maria.h +++ b/include/maria.h @@ -268,6 +268,8 @@ extern my_bool maria_delay_key_write; extern my_off_t maria_max_temp_length; extern ulong maria_bulk_insert_tree_size, maria_data_pointer_size; extern MY_TMPDIR *maria_tmpdir; +extern my_bool maria_encrypt_tables; + /* This is used to check if a symlink points into the mysql data home, which is normally forbidden as it can be used to get access to diff --git a/include/my_aes.h b/include/my_aes.h index 58a78919023..1616d79d70a 100644 --- a/include/my_aes.h +++ b/include/my_aes.h @@ -1,6 +1,3 @@ -#ifndef MY_AES_INCLUDED -#define MY_AES_INCLUDED - /* Copyright (c) 2002, 2006 MySQL AB, 2009 Sun Microsystems, Inc. Use is subject to license terms. @@ -21,47 +18,192 @@ /* Header file for my_aes.c */ /* Wrapper to give simple interface for MySQL to AES standard encryption */ +#ifndef MY_AES_INCLUDED +#define MY_AES_INCLUDED + +/* We expect same result code from encryption functions as in my_aes.h */ +typedef int Crypt_result; + +#define AES_OK 0 +#define AES_BAD_DATA -1 +#define AES_BAD_IV -2 +#define AES_INVALID -3 +#define AES_OPENSSL_ERROR -4 +#define AES_BAD_KEYSIZE -5 +#define AES_KEY_CREATION_FAILED -10 + +#define CRYPT_KEY_OK 0 +#define CRYPT_BUFFER_TO_SMALL -11; +#define CRYPT_KEY_UNKNOWN -48; + +/* The max block sizes of all supported algorithms */ +#define MY_AES_BLOCK_SIZE 16 + +/* The max key length of all supported algorithms */ +#define MY_AES_MAX_KEY_LENGTH 32 + + #include "rijndael.h" C_MODE_START #define AES_KEY_LENGTH 128 /* Must be 128 192 or 256 */ -/* - my_aes_encrypt - Crypt buffer with AES encryption algorithm. - source - Pointer to data for encryption - source_length - size of encryption data - dest - buffer to place encrypted data (must be large enough) - key - Key to be used for encryption - kel_length - Length of the key. Will handle keys of any length +/** + Crypt buffer with AES dynamic (defined at startup) encryption algorithm. + + SYNOPSIS + my_aes_encrypt_dynamic() + @param source [in] Pointer to data for encryption + @param source_length [in] Size of encryption data + @param dest [out] Buffer to place encrypted data (must be large enough) + @param dest_length [out] Pointer to size of encrypted data + @param key [in] Key to be used for encryption + @param key_length [in] Length of the key. 16, 24 or 32 + @param iv [in] Iv to be used for encryption + @param iv_length [in] Length of the iv. should be 16. + @param noPadding [in] if set, algorithm specific padding behaviour is used + + Method used defined by calling my_aes_init_dynamic_encrypt() at startup. + + @return + != 0 error + 0 no error +*/ + +typedef int (*my_aes_encrypt_dynamic_type)(const uchar* source, uint32 source_length, + uchar* dest, uint32* dest_length, + const uchar* key, uint8 key_length, + const uchar* iv, uint8 iv_length, + uint noPadding); + +extern my_aes_encrypt_dynamic_type my_aes_encrypt_dynamic; + +/** + AES decryption AES dynamic (defined at startup) encryption algorithm. + + SYNOPSIS + my_aes_decrypt_dynamic() + @param source [in] Pointer to data to decrypt + @param source_length [in] Size of data + @param dest [out] Buffer to place decrypted data (must be large enough) + @param dest_length [out] Pointer to size of decrypted data + @param key [in] Key to be used for decryption + @param key_length [in] Length of the key. 16, 24 or 32 + @param iv [in] Iv to be used for encryption + @param iv_length [in] Length of the iv. should be 16. + @param noPadding [in] if set, algorithm specific padding behaviour is used + + @return + != 0 error + 0 no error + + Method used defined by calling my_aes_init_dynamic_encrypt() at startup. +*/ + +typedef int (*my_aes_decrypt_dynamic_type)(const uchar *source, + uint32 source_length, + uchar *dest, uint32 *dest_length, + const uchar *key, uint8 key_length, + const uchar *iv, uint8 iv_length, + uint noPadding); +extern my_aes_decrypt_dynamic_type my_aes_decrypt_dynamic; + +/** + Initialize dynamic crypt functions +*/ + +enum enum_my_aes_encryption_algorithm +{ + MY_AES_ALGORITHM_NONE, MY_AES_ALGORITHM_ECB, MY_AES_ALGORITHM_CBC, + MY_AES_ALGORITHM_CTR +}; - returns - size of encrypted data, or negative in case of error. +my_aes_decrypt_dynamic_type get_aes_decrypt_func(enum enum_my_aes_encryption_algorithm method); +my_aes_encrypt_dynamic_type get_aes_encrypt_func(enum enum_my_aes_encryption_algorithm method); + + +my_bool my_aes_init_dynamic_encrypt(enum enum_my_aes_encryption_algorithm method); + +extern MYSQL_PLUGIN_IMPORT enum enum_my_aes_encryption_algorithm current_aes_dynamic_method; + + + +/** + Calculate key and iv from a given salt and secret as it is handled in openssl + encrypted files via console + + SYNOPSIS + my_bytes_to_key() + + @param salt [in] the given salt as extracted from the encrypted file + @param secret [in] the given secret as String, provided by the user + @param key [out] 32 Bytes of key are written to this pointer + @param iv [out] 16 Bytes of iv are written to this pointer + */ + +void my_bytes_to_key(const uchar *salt, + const char *secret, uchar *key, + uchar *iv); + +/** + Decode Hexencoded String to uint8[]. + + SYNOPSIS + my_aes_hex2uint() + @param iv [in] Pointer to hexadecimal encoded IV String + @param dest [out] Pointer to output uint8 array. Memory needs to be + allocated by caller + @param iv_length [in] Size of destination array. + */ + +void my_aes_hex2uint(const char *in, uchar *out, int dest_length); + +/** + Crypt buffer with AES encryption algorithm. + + SYNOPSIS + my_aes_encrypt() + + @param source Pointer to data for encryption + @param source_length Size of encryption data + @param dest Buffer to place encrypted data (must be large enough) + @param key Key to be used for encryption + @param kel_length Length of the key. Will handle keys of any length + + @return Size of encrypted data, or negative in case of error. */ -int my_aes_encrypt(const char *source, int source_length, char *dest, +int my_aes_encrypt(const uchar *source, int source_length, uchar *dest, const char *key, int key_length); -/* - my_aes_decrypt - DeCrypt buffer with AES encryption algorithm. - source - Pointer to data for decryption - source_length - size of encrypted data - dest - buffer to place decrypted data (must be large enough) - key - Key to be used for decryption - kel_length - Length of the key. Will handle keys of any length +/** + DeCrypt buffer with AES encryption algorithm. + + SYNOPSIS + my_aes_decrypt() - returns - size of original data, or negative in case of error. + @param source Pointer to data for decryption + @param source_length size of encrypted data + @param dest buffer to place decrypted data (must be large enough) + @param key Key to be used for decryption + @param kel_length Length of the key. Will handle keys of any length + + @return size of original data, or negative in case of error. */ -int my_aes_decrypt(const char *source, int source_length, char *dest, +int my_aes_decrypt(const uchar *source, int source_length, uchar *dest, const char *key, int key_length); -/* - my_aes_get_size - get size of buffer which will be large enough for encrypted - data - source_length - length of data to be encrypted +/** + get size of buffer which will be large enough for encrypted data + + SYNOPSIS + my_aes_get_size() + @param source_length Length of data to be encrypted - returns - size of buffer required to store encrypted data + @return Size of buffer required to store encrypted data */ int my_aes_get_size(int source_length); diff --git a/include/my_base.h b/include/my_base.h index a443b4d161c..3f34adcef6d 100644 --- a/include/my_base.h +++ b/include/my_base.h @@ -354,6 +354,8 @@ enum ha_base_keytype { #define HA_CREATE_DELAY_KEY_WRITE 64 #define HA_CREATE_RELIES_ON_SQL_LAYER 128 #define HA_CREATE_INTERNAL_TABLE 256 +#define HA_CREATE_ENCRYPTED 512 +#define HA_INSERT_ORDER 1024 /* Flags used by start_bulk_insert */ diff --git a/include/my_crypt.h b/include/my_crypt.h new file mode 100644 index 00000000000..c6b5e734b5e --- /dev/null +++ b/include/my_crypt.h @@ -0,0 +1,42 @@ +// TODO: Add Windows support + +#ifndef MYSYS_MY_CRYPT_H_ +#define MYSYS_MY_CRYPT_H_ + +#include <my_aes.h> + +#if !defined(HAVE_YASSL) && defined(HAVE_OPENSSL) + +#define HAVE_EncryptAes128Ctr + +C_MODE_START +Crypt_result my_aes_encrypt_ctr(const uchar* source, uint32 source_length, + uchar* dest, uint32* dest_length, + const unsigned char* key, uint8 key_length, + const unsigned char* iv, uint8 iv_length, + uint noPadding); + +Crypt_result my_aes_decrypt_ctr(const uchar* source, uint32 source_length, + uchar* dest, uint32* dest_length, + const unsigned char* key, uint8 key_length, + const unsigned char* iv, uint8 iv_length, + uint noPadding); +C_MODE_END + +Crypt_result EncryptAes128Ctr(const uchar* key, + const uchar* iv, int iv_size, + const uchar* plaintext, int plaintext_size, + uchar* ciphertext, int* ciphertext_used); + +Crypt_result DecryptAes128Ctr(const uchar* key, + const uchar* iv, int iv_size, + const uchar* ciphertext, int ciphertext_size, + uchar* plaintext, int* plaintext_used); + +#endif /* !defined(HAVE_YASSL) && defined(HAVE_OPENSSL) */ + +C_MODE_START +Crypt_result my_random_bytes(uchar* buf, int num); +C_MODE_END + +#endif /* MYSYS_MY_CRYPT_H_ */ diff --git a/include/my_crypt_key_management.h b/include/my_crypt_key_management.h new file mode 100644 index 00000000000..3da0ab2b90e --- /dev/null +++ b/include/my_crypt_key_management.h @@ -0,0 +1,80 @@ + +#ifndef MYSYS_MY_CRYPT_KEY_MANAGMENT_H_ +#define MYSYS_MY_CRYPT_KEY_MANAGMENT_H_ + +#include "my_global.h" +#include "my_pthread.h" +#include "mysql/psi/psi.h" + +#ifndef DBUG_OFF +extern my_bool debug_use_static_encryption_keys; + +#ifdef HAVE_PSI_INTERFACE +extern PSI_rwlock_key key_LOCK_dbug_encryption_key_version; +#endif + +extern mysql_rwlock_t LOCK_dbug_encryption_key_version; +extern uint opt_debug_encryption_key_version; +#endif /* DBUG_OFF */ + +C_MODE_START +/** + * function returning latest key version + */ +typedef int (* GetLatestCryptoKeyVersionFunc_t)(); + +/** + * function returning if the key exists + */ +typedef unsigned int (* HasKeyVersionFunc_t)(unsigned int version); + +/** + * function returning the key size + */ +typedef int (* GetKeySizeFunc_t)(unsigned int version); + +/** + * function returning a key for a key version + */ +typedef int (* GetCryptoKeyFunc_t)(unsigned int version, + unsigned char* key, + unsigned keybufsize); + +/** + * function returning an iv for a key version + */ +typedef int (* GetCryptoIVFunc_t)(unsigned int version, + unsigned char* iv, + unsigned ivbufsize); + + +struct CryptoKeyFuncs_t +{ + GetLatestCryptoKeyVersionFunc_t getLatestCryptoKeyVersionFunc; + HasKeyVersionFunc_t hasCryptoKeyFunc; + GetKeySizeFunc_t getCryptoKeySize; + GetCryptoKeyFunc_t getCryptoKeyFunc; + GetCryptoIVFunc_t getCryptoIVFunc; +}; + +/** + * Install functions to use for key management + */ +void +InstallCryptoKeyFunctions(const struct CryptoKeyFuncs_t* cryptoKeyFuncs); + +/** + * Functions to interact with key management + */ + +int GetLatestCryptoKeyVersion(); +unsigned int HasCryptoKey(unsigned int version); +int GetCryptoKeySize(unsigned int version); +int GetCryptoKey(unsigned int version, unsigned char* key_buffer, + unsigned int size); +int GetCryptoIV(unsigned int version, unsigned char* key_buffer, + unsigned int size); + +C_MODE_END + +#endif // MYSYS_MY_CRYPT_KEY_MANAGMENT_H_ diff --git a/include/my_dbug.h b/include/my_dbug.h index bcf2015466d..8d8d4fabd6c 100644 --- a/include/my_dbug.h +++ b/include/my_dbug.h @@ -176,6 +176,9 @@ extern void _db_suicide_(); #define DBUG_SYNC_POINT(lock_name,lock_timeout) \ debug_sync_point(lock_name,lock_timeout) void debug_sync_point(const char* lock_name, uint lock_timeout); + +/* Extern function for debugging */ +extern void dump_buffer(FILE *stream, unsigned n, const unsigned char* buf); #else #define DBUG_SYNC_POINT(lock_name,lock_timeout) #endif /* EXTRA_DEBUG */ diff --git a/include/my_md5.h b/include/my_md5.h index 141ea309cae..34d3e931811 100644 --- a/include/my_md5.h +++ b/include/my_md5.h @@ -28,7 +28,7 @@ extern "C" { #endif -#define compute_md5_hash(A,B,C) my_md5(A,B,C) +#define compute_md5_hash(A,B,C) my_md5((unsigned char *)A,B,C) /* Convert an array of bytes to a hexadecimal representation. diff --git a/include/mysql/plugin.h b/include/mysql/plugin.h index 640dc0725cc..df74be0209d 100644 --- a/include/mysql/plugin.h +++ b/include/mysql/plugin.h @@ -88,7 +88,8 @@ typedef struct st_mysql_xid MYSQL_XID; #define MYSQL_AUDIT_PLUGIN 5 #define MYSQL_REPLICATION_PLUGIN 6 #define MYSQL_AUTHENTICATION_PLUGIN 7 -#define MYSQL_MAX_PLUGIN_TYPE_NUM 9 /* The number of plugin types */ +#define MYSQL_KEY_MANAGEMENT_PLUGIN 9 +#define MYSQL_MAX_PLUGIN_TYPE_NUM 10 /* The number of plugin types */ /* MariaDB plugin types */ #define MariaDB_PASSWORD_VALIDATION_PLUGIN 8 diff --git a/libmysqld/CMakeLists.txt b/libmysqld/CMakeLists.txt index 7dbc8cd70f5..3826fbbc9bf 100644 --- a/libmysqld/CMakeLists.txt +++ b/libmysqld/CMakeLists.txt @@ -26,7 +26,6 @@ ${PCRE_INCLUDES} ${ZLIB_INCLUDE_DIR} ${SSL_INCLUDE_DIRS} ${SSL_INTERNAL_INCLUDE_DIRS} -${CMAKE_SOURCE_DIR}/sql/backup ) SET(GEN_SOURCES diff --git a/mysql-test/include/have_innodb_encryption.inc b/mysql-test/include/have_innodb_encryption.inc new file mode 100644 index 00000000000..f81d1527d50 --- /dev/null +++ b/mysql-test/include/have_innodb_encryption.inc @@ -0,0 +1,7 @@ +# +# Ensure we have innodb encryption incompiled + +if (`select count(*)=0 from information_schema.global_variables where variable_name="innodb_data_encryption_providertype"`) +{ + --skip Test requires InnoDB encryption. +} diff --git a/mysql-test/include/maria_verify_recovery.inc b/mysql-test/include/maria_verify_recovery.inc index 9b56061b2b4..b0f95d2a94b 100644 --- a/mysql-test/include/maria_verify_recovery.inc +++ b/mysql-test/include/maria_verify_recovery.inc @@ -78,6 +78,8 @@ let $mms_purpose=comparison; let $mms_compare_physically=$mms_compare_physically_save; while ($mms_table_to_use) { + # the size of the index file is different for with/without encryption + --replace_result 372 <SIZE> 394 <SIZE> eval check table $mms_tname$mms_table_to_use extended; --echo * testing that checksum after recovery is as expected let $new_checksum=`CHECKSUM TABLE $mms_tname$mms_table_to_use`; diff --git a/mysql-test/include/not_encrypted.inc b/mysql-test/include/not_encrypted.inc new file mode 100644 index 00000000000..2702fb3d8f0 --- /dev/null +++ b/mysql-test/include/not_encrypted.inc @@ -0,0 +1,4 @@ +if (`select @@innodb_encrypt_tables = 1`) +{ + skip only without encryption; +} diff --git a/mysql-test/include/restart_mysqld.inc b/mysql-test/include/restart_mysqld.inc index 3d53fada870..b8625f2f18a 100644 --- a/mysql-test/include/restart_mysqld.inc +++ b/mysql-test/include/restart_mysqld.inc @@ -19,7 +19,14 @@ if ($rpl_inited) shutdown_server 60; # Write file to make mysql-test-run.pl start up the server again ---exec echo "restart" > $_expect_file_name +if ($restart_parameters) +{ + --exec echo "restart: $restart_parameters" > $_expect_file_name +} +if (!$restart_parameters) +{ + --exec echo "restart" > $_expect_file_name +} # Turn on reconnect --enable_reconnect diff --git a/mysql-test/r/information_schema_all_engines.result b/mysql-test/r/information_schema_all_engines.result index 2abecfa346f..7b3e3a5f328 100644 --- a/mysql-test/r/information_schema_all_engines.result +++ b/mysql-test/r/information_schema_all_engines.result @@ -31,6 +31,8 @@ INNODB_SYS_FOREIGN_COLS INNODB_SYS_INDEXES INNODB_SYS_TABLES INNODB_SYS_TABLESTATS +INNODB_TABLESPACES_ENCRYPTION +INNODB_TABLESPACES_SCRUBBING INNODB_TRX KEY_CACHES KEY_COLUMN_USAGE @@ -102,6 +104,8 @@ INNODB_SYS_FOREIGN_COLS ID INNODB_SYS_INDEXES INDEX_ID INNODB_SYS_TABLES TABLE_ID INNODB_SYS_TABLESTATS TABLE_ID +INNODB_TABLESPACES_ENCRYPTION SPACE +INNODB_TABLESPACES_SCRUBBING SPACE INNODB_TRX trx_id KEY_CACHES KEY_CACHE_NAME KEY_COLUMN_USAGE CONSTRAINT_SCHEMA @@ -173,6 +177,8 @@ INNODB_SYS_FOREIGN_COLS ID INNODB_SYS_INDEXES INDEX_ID INNODB_SYS_TABLES TABLE_ID INNODB_SYS_TABLESTATS TABLE_ID +INNODB_TABLESPACES_ENCRYPTION SPACE +INNODB_TABLESPACES_SCRUBBING SPACE INNODB_TRX trx_id KEY_CACHES KEY_CACHE_NAME KEY_COLUMN_USAGE CONSTRAINT_SCHEMA @@ -249,6 +255,8 @@ INNODB_SYS_FOREIGN_COLS information_schema.INNODB_SYS_FOREIGN_COLS 1 INNODB_SYS_INDEXES information_schema.INNODB_SYS_INDEXES 1 INNODB_SYS_TABLES information_schema.INNODB_SYS_TABLES 1 INNODB_SYS_TABLESTATS information_schema.INNODB_SYS_TABLESTATS 1 +INNODB_TABLESPACES_ENCRYPTION information_schema.INNODB_TABLESPACES_ENCRYPTION 1 +INNODB_TABLESPACES_SCRUBBING information_schema.INNODB_TABLESPACES_SCRUBBING 1 INNODB_TRX information_schema.INNODB_TRX 1 KEY_CACHES information_schema.KEY_CACHES 1 KEY_COLUMN_USAGE information_schema.KEY_COLUMN_USAGE 1 @@ -310,6 +318,8 @@ Database: information_schema | INNODB_SYS_INDEXES | | INNODB_SYS_TABLES | | INNODB_SYS_TABLESTATS | +| INNODB_TABLESPACES_ENCRYPTION | +| INNODB_TABLESPACES_SCRUBBING | | INNODB_TRX | | KEY_CACHES | | KEY_COLUMN_USAGE | @@ -371,6 +381,8 @@ Database: INFORMATION_SCHEMA | INNODB_SYS_INDEXES | | INNODB_SYS_TABLES | | INNODB_SYS_TABLESTATS | +| INNODB_TABLESPACES_ENCRYPTION | +| INNODB_TABLESPACES_SCRUBBING | | INNODB_TRX | | KEY_CACHES | | KEY_COLUMN_USAGE | @@ -405,5 +417,5 @@ Wildcard: inf_rmation_schema | information_schema | SELECT table_schema, count(*) FROM information_schema.TABLES WHERE table_schema IN ('mysql', 'INFORMATION_SCHEMA', 'test', 'mysqltest') GROUP BY TABLE_SCHEMA; table_schema count(*) -information_schema 56 +information_schema 59 mysql 30 diff --git a/mysql-test/r/mysqld--help.result b/mysql-test/r/mysqld--help.result index ce1198a994a..1a6897d4de2 100644 --- a/mysql-test/r/mysqld--help.result +++ b/mysql-test/r/mysqld--help.result @@ -162,6 +162,13 @@ The following options may be given as the first argument: --div-precision-increment=# Precision of the result of '/' operator will be increased on that value + --encrypt-tmp-disk-tables + Encrypt tmp disk tables (created as part of query + execution) + --encryption-algorithm=name + Which encryption algorithm to use for table encryption. + aes_cbc is the recommended one.. One of: none, aes_ecb, + aes_cbc, aes_ctr --event-scheduler[=name] Enable the event scheduler. Possible values are ON, OFF, and DISABLED (keep the event scheduler completely @@ -1141,6 +1148,8 @@ delayed-insert-limit 100 delayed-insert-timeout 300 delayed-queue-size 1000 div-precision-increment 4 +encrypt-tmp-disk-tables FALSE +encryption-algorithm none event-scheduler OFF expensive-subquery-limit 100 expire-logs-days 0 diff --git a/mysql-test/suite/funcs_1/r/is_engines_innodb.result b/mysql-test/suite/funcs_1/r/is_engines_innodb.result index 5713b417cd1..b8c6399fe94 100644 --- a/mysql-test/suite/funcs_1/r/is_engines_innodb.result +++ b/mysql-test/suite/funcs_1/r/is_engines_innodb.result @@ -2,7 +2,7 @@ SELECT * FROM information_schema.engines WHERE ENGINE = 'InnoDB'; ENGINE InnoDB SUPPORT YES -COMMENT Supports transactions, row-level locking, and foreign keys +COMMENT Supports transactions, row-level locking, foreign keys and encryption for tables TRANSACTIONS YES XA YES SAVEPOINTS YES diff --git a/mysql-test/suite/innodb/include/keys.txt b/mysql-test/suite/innodb/include/keys.txt new file mode 100644 index 00000000000..419b76f698f --- /dev/null +++ b/mysql-test/suite/innodb/include/keys.txt @@ -0,0 +1,6 @@ +1;F5502320F8429037B8DAEF761B189D12;770A8A65DA156D24EE2A093277530142 +2;35B2FF0795FB84BBD666DB8430CA214E;4D92199549E0F2EF009B4160F3582E5528A11A45017F3EF8 +3;7E892875A52C59A3B588306B13C31FBD;B374A26A71490437AA024E4FADD5B497FDFF1A8EA6FF12F6FB65AF2720B59CCF +4;021B0663D4DD7B54E2EBC852677E40BD;18420B5CBA31CCDFFE9716E91EB61374D05914F3ADE23E03 +5;9BF92CEA026CE732DA80821122A8CE97;966050D7777350B6FD5CCB3E5F648DA45C63BEFB6DEDDFA13443F156B7D35C84 +6;BC44D4AFD2D9FCD82A679E4DC6700D06;B5EA210C8C09EF20DB95EC584714A89F diff --git a/mysql-test/suite/innodb/r/innodb-page_encryption.result b/mysql-test/suite/innodb/r/innodb-page_encryption.result new file mode 100644 index 00000000000..79902d0abe8 --- /dev/null +++ b/mysql-test/suite/innodb/r/innodb-page_encryption.result @@ -0,0 +1,186 @@ +SET GLOBAL innodb_file_format = `Barracuda`; +SET GLOBAL innodb_file_per_table = ON; +create table innodb_normal(c1 bigint not null, b char(200)) engine=innodb; +create table innodb_compact(c1 bigint not null, b char(200)) engine=innodb row_format=compact page_encryption=1 page_encryption_key=1; +create table innodb_compressed(c1 bigint not null, b char(200)) engine=innodb row_format=compressed page_encryption=1 page_encryption_key=2; +create table innodb_dynamic(c1 bigint not null, b char(200)) engine=innodb row_format=dynamic page_encryption=1 page_encryption_key=3; +create table innodb_redundant(c1 bigint not null, b char(200)) engine=innodb row_format=redundant page_encryption=1 page_encryption_key=4; +show create table innodb_compact; +Table Create Table +innodb_compact CREATE TABLE `innodb_compact` ( + `c1` bigint(20) NOT NULL, + `b` char(200) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=COMPACT `page_encryption`=1 `page_encryption_key`=1 +show create table innodb_compressed; +Table Create Table +innodb_compressed CREATE TABLE `innodb_compressed` ( + `c1` bigint(20) NOT NULL, + `b` char(200) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=COMPRESSED `page_encryption`=1 `page_encryption_key`=2 +show create table innodb_dynamic; +Table Create Table +innodb_dynamic CREATE TABLE `innodb_dynamic` ( + `c1` bigint(20) NOT NULL, + `b` char(200) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC `page_encryption`=1 `page_encryption_key`=3 +show create table innodb_redundant; +Table Create Table +innodb_redundant CREATE TABLE `innodb_redundant` ( + `c1` bigint(20) NOT NULL, + `b` char(200) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=REDUNDANT `page_encryption`=1 `page_encryption_key`=4 +create procedure innodb_insert_proc (repeat_count int) +begin +declare current_num int; +set current_num = 0; +while current_num < repeat_count do +insert into innodb_normal values(current_num, substring(MD5(RAND()), -64)); +set current_num = current_num + 1; +end while; +end// +commit; +set autocommit=0; +call innodb_insert_proc(5000); +commit; +set autocommit=1; +insert into innodb_compact select * from innodb_normal; +insert into innodb_compressed select * from innodb_normal; +insert into innodb_dynamic select * from innodb_normal; +insert into innodb_redundant select * from innodb_normal; +update innodb_compact set c1 = c1 + 1; +update innodb_compressed set c1 = c1 + 1; +update innodb_dynamic set c1 = c1 + 1; +update innodb_redundant set c1 = c1 + 1; +select count(*) from innodb_compact where c1 < 1500000; +count(*) +5000 +select count(*) from innodb_compressed where c1 < 1500000; +count(*) +5000 +select count(*) from innodb_dynamic where c1 < 1500000; +count(*) +5000 +select count(*) from innodb_redundant where c1 < 1500000; +count(*) +5000 +SELECT variable_value >= 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_encrypted'; +variable_value >= 0 +1 +SELECT variable_value >= 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_decrypted'; +variable_value >= 0 +1 +SELECT variable_value = 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_encryption_error'; +variable_value = 0 +1 +SET GLOBAL innodb_file_format = `Barracuda`; +SET GLOBAL innodb_file_per_table = ON; +update innodb_compact set c1 = c1 + 1; +update innodb_compressed set c1 = c1 + 1; +update innodb_dynamic set c1 = c1 + 1; +update innodb_redundant set c1 = c1 + 1; +select count(*) from innodb_compact where c1 < 1500000; +count(*) +5000 +select count(*) from innodb_compressed where c1 < 1500000; +count(*) +5000 +select count(*) from innodb_dynamic where c1 < 1500000; +count(*) +5000 +select count(*) from innodb_redundant where c1 < 1500000; +count(*) +5000 +SELECT variable_value >= 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_encrypted'; +variable_value >= 0 +1 +SELECT variable_value >= 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_decrypted'; +variable_value >= 0 +1 +SELECT variable_value = 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_encryption_error'; +variable_value = 0 +1 +alter table innodb_compact engine=innodb page_encryption=DEFAULT page_encryption_key=DEFAULT; +show create table innodb_compact; +Table Create Table +innodb_compact CREATE TABLE `innodb_compact` ( + `c1` bigint(20) NOT NULL, + `b` char(200) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=COMPACT +alter table innodb_compressed engine=innodb page_encryption=DEFAULT page_encryption_key=DEFAULT; +show create table innodb_compressed; +Table Create Table +innodb_compressed CREATE TABLE `innodb_compressed` ( + `c1` bigint(20) NOT NULL, + `b` char(200) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=COMPRESSED +alter table innodb_dynamic engine=innodb page_encryption=DEFAULT page_encryption_key=DEFAULT; +show create table innodb_dynamic; +Table Create Table +innodb_dynamic CREATE TABLE `innodb_dynamic` ( + `c1` bigint(20) NOT NULL, + `b` char(200) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC +alter table innodb_redundant engine=innodb page_encryption=DEFAULT page_encryption_key=DEFAULT; +show create table innodb_redundant; +Table Create Table +innodb_redundant CREATE TABLE `innodb_redundant` ( + `c1` bigint(20) NOT NULL, + `b` char(200) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=REDUNDANT +SET GLOBAL innodb_file_format = `Barracuda`; +SET GLOBAL innodb_file_per_table = ON; +show create table innodb_compact; +Table Create Table +innodb_compact CREATE TABLE `innodb_compact` ( + `c1` bigint(20) NOT NULL, + `b` char(200) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=COMPACT +show create table innodb_compressed; +Table Create Table +innodb_compressed CREATE TABLE `innodb_compressed` ( + `c1` bigint(20) NOT NULL, + `b` char(200) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=COMPRESSED +show create table innodb_dynamic; +Table Create Table +innodb_dynamic CREATE TABLE `innodb_dynamic` ( + `c1` bigint(20) NOT NULL, + `b` char(200) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC +show create table innodb_redundant; +Table Create Table +innodb_redundant CREATE TABLE `innodb_redundant` ( + `c1` bigint(20) NOT NULL, + `b` char(200) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=REDUNDANT +update innodb_compact set c1 = c1 + 1; +update innodb_compressed set c1 = c1 + 1; +update innodb_dynamic set c1 = c1 + 1; +update innodb_redundant set c1 = c1 + 1; +select count(*) from innodb_compact where c1 < 1500000; +count(*) +5000 +select count(*) from innodb_compressed where c1 < 1500000; +count(*) +5000 +select count(*) from innodb_dynamic where c1 < 1500000; +count(*) +5000 +select count(*) from innodb_redundant where c1 < 1500000; +count(*) +5000 +SELECT variable_value = 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_encrypted'; +variable_value = 0 +1 +SELECT variable_value = 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_decrypted'; +variable_value = 0 +1 +SELECT variable_value = 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_encryption_error'; +variable_value = 0 +1 +drop procedure innodb_insert_proc; +drop table innodb_normal; +drop table innodb_compact; +drop table innodb_compressed; +drop table innodb_dynamic; +drop table innodb_redundant; diff --git a/mysql-test/suite/innodb/r/innodb-page_encryption_compression.result b/mysql-test/suite/innodb/r/innodb-page_encryption_compression.result new file mode 100644 index 00000000000..a9494795814 --- /dev/null +++ b/mysql-test/suite/innodb/r/innodb-page_encryption_compression.result @@ -0,0 +1,171 @@ +SET GLOBAL innodb_file_format = `Barracuda`; +SET GLOBAL innodb_file_per_table = ON; +set global innodb_compression_algorithm = 1; +create table innodb_normal(c1 bigint not null, b char(200)) engine=innodb page_compressed=1; +create table innodb_compact(c1 bigint not null, b char(200)) engine=innodb row_format=compact page_encryption=1 page_encryption_key=1 page_compressed=1; +create table innodb_dynamic(c1 bigint not null, b char(200)) engine=innodb row_format=dynamic page_encryption=1 page_encryption_key=2 page_compressed=1; +show warnings; +Level Code Message +show create table innodb_normal; +Table Create Table +innodb_normal CREATE TABLE `innodb_normal` ( + `c1` bigint(20) NOT NULL, + `b` char(200) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 `page_compressed`=1 +show create table innodb_compact; +Table Create Table +innodb_compact CREATE TABLE `innodb_compact` ( + `c1` bigint(20) NOT NULL, + `b` char(200) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=COMPACT `page_encryption`=1 `page_encryption_key`=1 `page_compressed`=1 +show create table innodb_dynamic; +Table Create Table +innodb_dynamic CREATE TABLE `innodb_dynamic` ( + `c1` bigint(20) NOT NULL, + `b` char(200) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC `page_encryption`=1 `page_encryption_key`=2 `page_compressed`=1 +create procedure innodb_insert_proc (repeat_count int) +begin +declare current_num int; +set current_num = 0; +while current_num < repeat_count do +insert into innodb_normal values(current_num, substring(MD5(RAND()), -64)); +set current_num = current_num + 1; +end while; +end// +commit; +set autocommit=0; +call innodb_insert_proc(5000); +commit; +set autocommit=1; +insert into innodb_compact select * from innodb_normal; +insert into innodb_dynamic select * from innodb_normal; +update innodb_normal set c1 = c1 + 1; +update innodb_compact set c1 = c1 + 1; +update innodb_dynamic set c1 = c1 + 1; +select count(*) from innodb_normal; +count(*) +5000 +select count(*) from innodb_compact where c1 < 1500000; +count(*) +5000 +select count(*) from innodb_dynamic where c1 < 1500000; +count(*) +5000 +SELECT variable_value >= 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_encrypted'; +variable_value >= 0 +1 +SELECT variable_value >= 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_decrypted'; +variable_value >= 0 +1 +SELECT variable_value = 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_encryption_error'; +variable_value = 0 +1 +SELECT variable_value >= 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_compressed'; +variable_value >= 0 +1 +SELECT variable_value >= 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_decompressed'; +variable_value >= 0 +1 +SET GLOBAL innodb_file_format = `Barracuda`; +SET GLOBAL innodb_file_per_table = ON; +set global innodb_compression_algorithm = 1; +update innodb_normal set c1 = c1 + 1; +update innodb_compact set c1 = c1 + 1; +update innodb_dynamic set c1 = c1 + 1; +select count(*) from innodb_normal; +count(*) +5000 +select count(*) from innodb_compact where c1 < 1500000; +count(*) +5000 +select count(*) from innodb_dynamic where c1 < 1500000; +count(*) +5000 +SELECT variable_value >= 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_encrypted'; +variable_value >= 0 +1 +SELECT variable_value >= 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_decrypted'; +variable_value >= 0 +1 +SELECT variable_value = 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_encryption_error'; +variable_value = 0 +1 +SELECT variable_value > 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_compressed'; +variable_value > 0 +0 +SELECT variable_value > 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_decompressed'; +variable_value > 0 +1 +alter table innodb_normal engine=innodb page_compressed=DEFAULT; +show create table innodb_normal; +Table Create Table +innodb_normal CREATE TABLE `innodb_normal` ( + `c1` bigint(20) NOT NULL, + `b` char(200) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 +alter table innodb_compact engine=innodb page_encryption=DEFAULT page_encryption_key=DEFAULT page_compressed=DEFAULT; +show create table innodb_compact; +Table Create Table +innodb_compact CREATE TABLE `innodb_compact` ( + `c1` bigint(20) NOT NULL, + `b` char(200) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=COMPACT +alter table innodb_dynamic engine=innodb page_encryption=DEFAULT page_encryption_key=DEFAULT page_compressed=DEFAULT; +show create table innodb_dynamic; +Table Create Table +innodb_dynamic CREATE TABLE `innodb_dynamic` ( + `c1` bigint(20) NOT NULL, + `b` char(200) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC +SET GLOBAL innodb_file_format = `Barracuda`; +SET GLOBAL innodb_file_per_table = ON; +show create table innodb_normal; +Table Create Table +innodb_normal CREATE TABLE `innodb_normal` ( + `c1` bigint(20) NOT NULL, + `b` char(200) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 +show create table innodb_compact; +Table Create Table +innodb_compact CREATE TABLE `innodb_compact` ( + `c1` bigint(20) NOT NULL, + `b` char(200) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=COMPACT +show create table innodb_dynamic; +Table Create Table +innodb_dynamic CREATE TABLE `innodb_dynamic` ( + `c1` bigint(20) NOT NULL, + `b` char(200) DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 ROW_FORMAT=DYNAMIC +update innodb_normal set c1 = c1 + 1; +update innodb_compact set c1 = c1 + 1; +update innodb_dynamic set c1 = c1 + 1; +select count(*) from innodb_normal; +count(*) +5000 +select count(*) from innodb_compact where c1 < 1500000; +count(*) +5000 +select count(*) from innodb_dynamic where c1 < 1500000; +count(*) +5000 +SELECT variable_value = 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_encrypted'; +variable_value = 0 +1 +SELECT variable_value = 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_decrypted'; +variable_value = 0 +1 +SELECT variable_value = 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_encryption_error'; +variable_value = 0 +1 +SELECT variable_value = 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_compressed'; +variable_value = 0 +1 +SELECT variable_value = 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_decompressed'; +variable_value = 0 +1 +drop procedure innodb_insert_proc; +drop table innodb_normal; +drop table innodb_compact; +drop table innodb_dynamic; diff --git a/mysql-test/suite/innodb/r/innodb_encryption.result b/mysql-test/suite/innodb/r/innodb_encryption.result new file mode 100644 index 00000000000..040cbd294f9 --- /dev/null +++ b/mysql-test/suite/innodb/r/innodb_encryption.result @@ -0,0 +1,57 @@ +SET @start_global_value = @@global.innodb_encryption_threads; +SHOW VARIABLES LIKE 'innodb_encrypt%'; +Variable_name Value +innodb_encrypt_log OFF +innodb_encrypt_tables ON +innodb_encryption_rotate_key_age 15 +innodb_encryption_rotation_iops 100 +innodb_encryption_threads 4 +DESCRIBE INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION; +Field Type Null Key Default Extra +SPACE int(11) unsigned NO 0 +NAME varchar(655) YES NULL +ENCRYPTION_SCHEME int(11) unsigned NO 0 +KEYSERVER_REQUESTS int(11) unsigned NO 0 +MIN_KEY_VERSION int(11) unsigned NO 0 +CURRENT_KEY_VERSION int(11) unsigned NO 0 +KEY_ROTATION_PAGE_NUMBER bigint(21) unsigned YES NULL +KEY_ROTATION_MAX_PAGE_NUMBER bigint(21) unsigned YES NULL +# Wait max 5 min for key encryption threads to encrypt one space +# Success! +# Wait max 10 min for key encryption threads to encrypt all space +# Success! +# Now turn off encryption and wait for threads to decrypt everything +SET GLOBAL innodb_encrypt_tables = off; +# Wait max 10 min for key encryption threads to decrypt all space +# Success! +# Shutdown innodb_encryption_threads +SET GLOBAL innodb_encryption_threads=0; +# Turn on encryption +# since threads are off tables should remain unencrypted +SET GLOBAL innodb_encrypt_tables = on; +# Wait 15s to check that nothing gets encrypted +# Success! +# Startup innodb_encryption_threads +SET GLOBAL innodb_encryption_threads=@start_global_value; +# Wait 1 min to check that it start encrypting again +# Success! +# +# Check that restart with encryption turned off works +# even if spaces are encrypted +# +# First wait max 10 min for key encryption threads to encrypt all spaces +# Success! +# Restart mysqld --innodb_encrypt_tables=0 --innodb_encryption_threads=0 +SHOW VARIABLES LIKE 'innodb_encrypt%'; +Variable_name Value +innodb_encrypt_log OFF +innodb_encrypt_tables OFF +innodb_encryption_rotate_key_age 15 +innodb_encryption_rotation_iops 100 +innodb_encryption_threads 0 +SELECT COUNT(*) > 0 as should_be_1 +FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION +WHERE MIN_KEY_VERSION <> 0; +should_be_1 +1 +# Restart mysqld again...with default options diff --git a/mysql-test/suite/innodb/r/innodb_monitor.result b/mysql-test/suite/innodb/r/innodb_monitor.result index 84668cf49d0..02e72ae6a80 100644 --- a/mysql-test/suite/innodb/r/innodb_monitor.result +++ b/mysql-test/suite/innodb/r/innodb_monitor.result @@ -180,6 +180,9 @@ compress_page_compressed_trim_op disabled compress_page_compressed_trim_op_saved disabled compress_pages_page_decompressed disabled compress_pages_page_compression_error disabled +compress_pages_page_encrypted disabled +compress_pages_page_decrypted disabled +compress_pages_page_encryption_error disabled index_page_splits disabled index_page_merge_attempts disabled index_page_merge_successful disabled diff --git a/mysql-test/suite/innodb/r/innodb_scrub.result b/mysql-test/suite/innodb/r/innodb_scrub.result new file mode 100644 index 00000000000..95f0aed3226 --- /dev/null +++ b/mysql-test/suite/innodb/r/innodb_scrub.result @@ -0,0 +1,224 @@ +create table snapshot_status engine = myisam +select * from information_schema.global_status +where variable_name like 'innodb_scrub%'; +# +# Test delete of records +# +create table t1 ( +a int auto_increment primary key, +b varchar(256), +c text) engine = innodb row_format=compact; +# Populate table with rows +delete from t1; +# restart mysqld so that all pages are flushed +# read all rows from table +select * from t1; +# compact: delete from: grep -c bicycle t1.ibd +0 +# compact: delete from: grep -c bicycle ibdata1 +0 +# compact: delete from: grep -c repairman t1.ibd +0 +# compact: delete from: grep -c repairman ibdata1 +0 +drop table t1; +# +# Test delete+rollback+delete +# +create table t1 ( +a int auto_increment primary key, +b varchar(256), +c text) engine = innodb row_format=compact; +# Populate table with rows +begin; +delete from t1; +rollback; +delete from t1; +# restart mysqld so that all pages are flushed +# read all rows from table +select * from t1; +# compact: delete rollback: grep -c bicycle t1.ibd +0 +# compact: delete rollback: grep -c bicycle ibdata1 +0 +# compact: delete rollback: grep -c repairman t1.ibd +0 +# compact: delete rollback: grep -c repairman ibdata1 +0 +drop table t1; +# +# Test insert+rollback +# +create table t1 ( +a int auto_increment primary key, +b varchar(256), +c text) engine = innodb row_format=compact; +# Populate table with rows +begin; +rollback; +# restart mysqld so that all pages are flushed +# read all rows from table +select * from t1; +# compact: insert rollback: grep -c bicycle t1.ibd +0 +# compact: insert rollback: grep -c bicycle ibdata1 +0 +# compact: insert rollback: grep -c repairman t1.ibd +0 +# compact: insert rollback: grep -c repairman ibdata1 +0 +drop table t1; +# +# Test delete of records +# +create table t1 ( +a int auto_increment primary key, +b varchar(256), +c text) engine = innodb row_format=redundant; +# Populate table with rows +delete from t1; +# restart mysqld so that all pages are flushed +# read all rows from table +select * from t1; +# redundant: delete from: grep -c bicycle t1.ibd +0 +# redundant: delete from: grep -c bicycle ibdata1 +0 +# redundant: delete from: grep -c repairman t1.ibd +0 +# redundant: delete from: grep -c repairman ibdata1 +0 +drop table t1; +# +# Test delete+rollback+delete +# +create table t1 ( +a int auto_increment primary key, +b varchar(256), +c text) engine = innodb row_format=redundant; +# Populate table with rows +begin; +delete from t1; +rollback; +delete from t1; +# restart mysqld so that all pages are flushed +# read all rows from table +select * from t1; +# redundant: delete rollback: grep -c bicycle t1.ibd +0 +# redundant: delete rollback: grep -c bicycle ibdata1 +0 +# redundant: delete rollback: grep -c repairman t1.ibd +0 +# redundant: delete rollback: grep -c repairman ibdata1 +0 +drop table t1; +# +# Test insert+rollback +# +create table t1 ( +a int auto_increment primary key, +b varchar(256), +c text) engine = innodb row_format=redundant; +# Populate table with rows +begin; +rollback; +# restart mysqld so that all pages are flushed +# read all rows from table +select * from t1; +# redundant: insert rollback: grep -c bicycle t1.ibd +0 +# redundant: insert rollback: grep -c bicycle ibdata1 +0 +# redundant: insert rollback: grep -c repairman t1.ibd +0 +# redundant: insert rollback: grep -c repairman ibdata1 +0 +drop table t1; +# +# Test delete of records +# +create table t1 ( +a int auto_increment primary key, +b varchar(256), +c text) engine = innodb row_format=dynamic; +# Populate table with rows +delete from t1; +# restart mysqld so that all pages are flushed +# read all rows from table +select * from t1; +# dynamic: delete from: grep -c bicycle t1.ibd +0 +# dynamic: delete from: grep -c bicycle ibdata1 +0 +# dynamic: delete from: grep -c repairman t1.ibd +0 +# dynamic: delete from: grep -c repairman ibdata1 +0 +drop table t1; +# +# Test delete+rollback+delete +# +create table t1 ( +a int auto_increment primary key, +b varchar(256), +c text) engine = innodb row_format=dynamic; +# Populate table with rows +begin; +delete from t1; +rollback; +delete from t1; +# restart mysqld so that all pages are flushed +# read all rows from table +select * from t1; +# dynamic: delete rollback: grep -c bicycle t1.ibd +0 +# dynamic: delete rollback: grep -c bicycle ibdata1 +0 +# dynamic: delete rollback: grep -c repairman t1.ibd +0 +# dynamic: delete rollback: grep -c repairman ibdata1 +0 +drop table t1; +# +# Test insert+rollback +# +create table t1 ( +a int auto_increment primary key, +b varchar(256), +c text) engine = innodb row_format=dynamic; +# Populate table with rows +begin; +rollback; +# restart mysqld so that all pages are flushed +# read all rows from table +select * from t1; +# dynamic: insert rollback: grep -c bicycle t1.ibd +0 +# dynamic: insert rollback: grep -c bicycle ibdata1 +0 +# dynamic: insert rollback: grep -c repairman t1.ibd +0 +# dynamic: insert rollback: grep -c repairman ibdata1 +0 +drop table t1; +show variables like 'innodb_%scrub_data%'; +Variable_name Value +innodb_background_scrub_data_check_interval 3600 +innodb_background_scrub_data_compressed OFF +innodb_background_scrub_data_interval 604800 +innodb_background_scrub_data_uncompressed OFF +innodb_immediate_scrub_data_uncompressed ON +# verify that this test have not caused any background scrubbing +select ss.variable_name, gs.variable_value - ss.variable_value as variable_value +from snapshot_status ss, +information_schema.global_status gs +where ss.variable_name = gs.variable_name; +variable_name variable_value +INNODB_SCRUB_BACKGROUND_PAGE_REORGANIZATIONS 0 +INNODB_SCRUB_BACKGROUND_PAGE_SPLITS 0 +INNODB_SCRUB_BACKGROUND_PAGE_SPLIT_FAILURES_MISSING_INDEX 0 +INNODB_SCRUB_BACKGROUND_PAGE_SPLIT_FAILURES_OUT_OF_FILESPACE 0 +INNODB_SCRUB_BACKGROUND_PAGE_SPLIT_FAILURES_UNDERFLOW 0 +INNODB_SCRUB_BACKGROUND_PAGE_SPLIT_FAILURES_UNKNOWN 0 +drop table snapshot_status; diff --git a/mysql-test/suite/innodb/r/innodb_scrub_background.result b/mysql-test/suite/innodb/r/innodb_scrub_background.result new file mode 100644 index 00000000000..cd7416efedd --- /dev/null +++ b/mysql-test/suite/innodb/r/innodb_scrub_background.result @@ -0,0 +1,88 @@ +# +# immediate scrubbing is off +# background scrubbing is on +# +show variables like 'innodb_%scrub_data%'; +Variable_name Value +innodb_background_scrub_data_check_interval 3600 +innodb_background_scrub_data_compressed ON +innodb_background_scrub_data_interval 604800 +innodb_background_scrub_data_uncompressed ON +innodb_immediate_scrub_data_uncompressed OFF +# make sure spaces are checked quickly +SET GLOBAL innodb_background_scrub_data_check_interval=1; +create table snapshot_status engine = myisam +select * from information_schema.global_status +where variable_name like 'innodb_scrub%'; +truncate table snapshot_status; +insert into snapshot_status +select * from information_schema.global_status +where variable_name like 'innodb_scrub%'; +# +# Test delete of records +# +create table t1 ( +a int auto_increment primary key, +b varchar(256), +c text, index(b)) engine = innodb row_format=dynamic; +# Populate table with rows +delete from t1; +# +# Test delete+rollback+delete +# +create table t2 ( +a int auto_increment primary key, +b varchar(256), +c text, index(b)) engine = innodb row_format=dynamic; +# Populate table with rows +begin; +delete from t2; +rollback; +delete from t2; +# +# Test insert+rollback +# +create table t3 ( +a int auto_increment primary key, +b varchar(256), +c text, index(b)) engine = innodb row_format=dynamic; +# Populate table with rows +begin; +rollback; +# start scrubbing threads +SET GLOBAL innodb_encryption_threads=5; +# Wait max 10 min for scrubbing +# Success! +# stop scrubbing threads +SET GLOBAL innodb_encryption_threads=0; +# verify that this test have caused background scrubbing +select sum(gs.variable_value - ss.variable_value) > 0 as should_be_1 +from snapshot_status ss, +information_schema.global_status gs +where ss.variable_name = gs.variable_name; +should_be_1 +1 +# restart mysqld so that all pages are flushed +# read all rows from table +select * from t1; +# dynamic: delete: grep -c bicycle t1.ibd +0 +# dynamic: delete: grep -c repairman t1.ibd +0 +# dynamic: delete rollback: grep -c bicycle t2.ibd +0 +# dynamic: delete rollback: grep -c repairman t2.ibd +0 +# dynamic: insert rollback: grep -c bicycle t3.ibd +0 +# dynamic: insert rollback: grep -c repairman t3.ibd +0 +drop table t1, t2, t3; +show variables like 'innodb_%scrub_data%'; +Variable_name Value +innodb_background_scrub_data_check_interval 3600 +innodb_background_scrub_data_compressed ON +innodb_background_scrub_data_interval 604800 +innodb_background_scrub_data_uncompressed ON +innodb_immediate_scrub_data_uncompressed OFF +drop table snapshot_status; diff --git a/mysql-test/suite/innodb/r/innodb_scrub_compressed.result b/mysql-test/suite/innodb/r/innodb_scrub_compressed.result new file mode 100644 index 00000000000..0b5e9f11a05 --- /dev/null +++ b/mysql-test/suite/innodb/r/innodb_scrub_compressed.result @@ -0,0 +1,71 @@ +# make sure spaces are checked quickly +SET GLOBAL innodb_background_scrub_data_check_interval=1; +# +# Test delete of records +# +create table t1 ( +a int auto_increment primary key, +b varchar(256), +c text) engine = innodb row_format=compressed; +# Populate table with rows +delete from t1; +# +# Test delete+rollback+delete +# +create table t2 ( +a int auto_increment primary key, +b varchar(256), +c text) engine = innodb row_format=compressed; +# Populate table with rows +begin; +delete from t2; +rollback; +delete from t2; +# +# Test insert+rollback +# +create table t3 ( +a int auto_increment primary key, +b varchar(256), +c text) engine = innodb row_format=compressed; +# Populate table with rows +begin; +rollback; +# start scrubbing threads +SET GLOBAL innodb_encryption_threads=5; +# Wait max 10 min for scrubbing of this table +# Success! +# stop scrubbing threads +SET GLOBAL innodb_encryption_threads=0; +# Now there should be background scrubs +# restart mysqld so that all pages are flushed (encryption off) +# so that grep will find stuff +# read all rows from table +select * from t1; +select * from t2; +select * from t3; +# grep -c bicycle t1.ibd +0 +# grep -c bicycle ibdata1 +0 +# grep -c repairman t1.ibd +0 +# grep -c repairman ibdata1 +0 +# grep -c boondoggle t2.ibd +0 +# grep -c boondoggle ibdata1 +0 +# grep -c waste t2.ibd +0 +# grep -c waste ibdata1 +0 +# grep -c keso t3.ibd +0 +# grep -c keso ibdata1 +0 +# grep -c kent t3.ibd +0 +# grep -c kent ibdata1 +0 +drop table t1, t2, t3; diff --git a/mysql-test/suite/innodb/t/innodb-page_encryption-32k-master.opt b/mysql-test/suite/innodb/t/innodb-page_encryption-32k-master.opt new file mode 100644 index 00000000000..723c0b360fd --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb-page_encryption-32k-master.opt @@ -0,0 +1,4 @@ +--default-storage-engine=InnoDB +--encryption-algorithm=aes_cbs +--file-key-management-plugin-filename=$MYSQL_TEST_DIR/suite/innodb/include/keys.txt +--innodb-buffer-pool-size=24M diff --git a/mysql-test/suite/innodb/t/innodb-page_encryption-32k.test b/mysql-test/suite/innodb/t/innodb-page_encryption-32k.test new file mode 100644 index 00000000000..e96e352da12 --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb-page_encryption-32k.test @@ -0,0 +1,94 @@ +--source include/no_valgrind_without_big.inc +# Tests for setting innodb-page-size=32k; +--source include/have_xtradb.inc +--source include/have_innodb_32k.inc + +call mtr.add_suppression("InnoDB: Warning: innodb_page_size has been changed from default value *"); + +--disable_query_log +# These values can change during the test +let $innodb_file_format_orig = `SELECT @@innodb_file_format`; +let $innodb_file_per_table_orig = `SELECT @@innodb_file_per_table`; +--enable_query_log + +SET GLOBAL innodb_file_format = `Barracuda`; +SET GLOBAL innodb_file_per_table = ON; + +create table innodb_normal(c1 bigint not null, b char(200)) engine=innodb; +create table innodb_compact(c1 bigint not null, b char(200)) engine=innodb row_format=compact page_encryption=1 page_encryption_key=1; +create table innodb_dynamic(c1 bigint not null, b char(200)) engine=innodb row_format=dynamic page_encryption=1 page_encryption_key=3; +create table innodb_redundant(c1 bigint not null, b char(200)) engine=innodb row_format=redundant page_encryption=1 page_encryption_key=4; + +show create table innodb_compact; +show create table innodb_dynamic; +show create table innodb_redundant; + +delimiter //; +create procedure innodb_insert_proc (repeat_count int) +begin + declare current_num int; + set current_num = 0; + while current_num < repeat_count do + insert into innodb_normal values(current_num, substring(MD5(RAND()), -150)); + set current_num = current_num + 1; + end while; +end// +delimiter ;// +commit; + +set autocommit=0; +call innodb_insert_proc(5000); +commit; +set autocommit=1; + +insert into innodb_compact select * from innodb_normal; +insert into innodb_dynamic select * from innodb_normal; +insert into innodb_redundant select * from innodb_normal; + +update innodb_compact set c1 = c1 + 1; +update innodb_dynamic set c1 = c1 + 1; +update innodb_redundant set c1 = c1 + 1; +select count(*) from innodb_compact where c1 < 1500000; +select count(*) from innodb_dynamic where c1 < 1500000; +select count(*) from innodb_redundant where c1 < 1500000; + +--source include/restart_mysqld.inc + +update innodb_compact set c1 = c1 + 1; +update innodb_dynamic set c1 = c1 + 1; +update innodb_redundant set c1 = c1 + 1; +select count(*) from innodb_compact where c1 < 1500000; +select count(*) from innodb_dynamic where c1 < 1500000; +select count(*) from innodb_redundant where c1 < 1500000; + +alter table innodb_compact engine=innodb page_encryption=0; +show create table innodb_compact; +alter table innodb_dynamic engine=innodb page_encryption=0; +show create table innodb_dynamic; +alter table innodb_redundant engine=innodb page_encryption=0; +show create table innodb_redundant; + +--source include/restart_mysqld.inc + +show create table innodb_compact; +show create table innodb_dynamic; +show create table innodb_redundant; + +update innodb_compact set c1 = c1 + 1; +update innodb_dynamic set c1 = c1 + 1; +update innodb_redundant set c1 = c1 + 1; +select count(*) from innodb_compact where c1 < 1500000; +select count(*) from innodb_dynamic where c1 < 1500000; +select count(*) from innodb_redundant where c1 < 1500000; + +drop procedure innodb_insert_proc; +drop table innodb_normal; +drop table innodb_compact; +drop table innodb_dynamic; +drop table innodb_redundant; + +# reset system +--disable_query_log +EVAL SET GLOBAL innodb_file_per_table = $innodb_file_per_table_orig; +EVAL SET GLOBAL innodb_file_format = $innodb_file_format_orig; +--enable_query_log diff --git a/mysql-test/suite/innodb/t/innodb-page_encryption.opt b/mysql-test/suite/innodb/t/innodb-page_encryption.opt new file mode 100644 index 00000000000..f3aa00059a8 --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb-page_encryption.opt @@ -0,0 +1,3 @@ +--enable-file-key-management-plugin +--encryption-algorithm=aes_cbs +--file-key-management-plugin-filename=$MYSQL_TEST_DIR/suite/innodb/include/keys.txt diff --git a/mysql-test/suite/innodb/t/innodb-page_encryption.test b/mysql-test/suite/innodb/t/innodb-page_encryption.test new file mode 100644 index 00000000000..a3ba64c5d48 --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb-page_encryption.test @@ -0,0 +1,121 @@ +-- source include/have_innodb.inc + +--disable_query_log +let $innodb_file_format_orig = `SELECT @@innodb_file_format`; +let $innodb_file_per_table_orig = `SELECT @@innodb_file_per_table`; +--enable_query_log + +SET GLOBAL innodb_file_format = `Barracuda`; +SET GLOBAL innodb_file_per_table = ON; + +create table innodb_normal(c1 bigint not null, b char(200)) engine=innodb; +create table innodb_compact(c1 bigint not null, b char(200)) engine=innodb row_format=compact page_encryption=1 page_encryption_key=1; +create table innodb_compressed(c1 bigint not null, b char(200)) engine=innodb row_format=compressed page_encryption=1 page_encryption_key=2; +create table innodb_dynamic(c1 bigint not null, b char(200)) engine=innodb row_format=dynamic page_encryption=1 page_encryption_key=3; +create table innodb_redundant(c1 bigint not null, b char(200)) engine=innodb row_format=redundant page_encryption=1 page_encryption_key=4; + +show create table innodb_compact; +show create table innodb_compressed; +show create table innodb_dynamic; +show create table innodb_redundant; + +delimiter //; +create procedure innodb_insert_proc (repeat_count int) +begin + declare current_num int; + set current_num = 0; + while current_num < repeat_count do + insert into innodb_normal values(current_num, substring(MD5(RAND()), -64)); + set current_num = current_num + 1; + end while; +end// +delimiter ;// +commit; + +set autocommit=0; +call innodb_insert_proc(5000); +commit; +set autocommit=1; + +insert into innodb_compact select * from innodb_normal; +insert into innodb_compressed select * from innodb_normal; +insert into innodb_dynamic select * from innodb_normal; +insert into innodb_redundant select * from innodb_normal; + +update innodb_compact set c1 = c1 + 1; +update innodb_compressed set c1 = c1 + 1; +update innodb_dynamic set c1 = c1 + 1; +update innodb_redundant set c1 = c1 + 1; +select count(*) from innodb_compact where c1 < 1500000; +select count(*) from innodb_compressed where c1 < 1500000; +select count(*) from innodb_dynamic where c1 < 1500000; +select count(*) from innodb_redundant where c1 < 1500000; + +# Note there that these variables are updated only when real I/O is done, thus they are not reliable +SELECT variable_value >= 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_encrypted'; +SELECT variable_value >= 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_decrypted'; +SELECT variable_value = 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_encryption_error'; + +--source include/restart_mysqld.inc + +SET GLOBAL innodb_file_format = `Barracuda`; +SET GLOBAL innodb_file_per_table = ON; + +update innodb_compact set c1 = c1 + 1; +update innodb_compressed set c1 = c1 + 1; +update innodb_dynamic set c1 = c1 + 1; +update innodb_redundant set c1 = c1 + 1; +select count(*) from innodb_compact where c1 < 1500000; +select count(*) from innodb_compressed where c1 < 1500000; +select count(*) from innodb_dynamic where c1 < 1500000; +select count(*) from innodb_redundant where c1 < 1500000; + +SELECT variable_value >= 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_encrypted'; +SELECT variable_value >= 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_decrypted'; +SELECT variable_value = 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_encryption_error'; + +alter table innodb_compact engine=innodb page_encryption=DEFAULT page_encryption_key=DEFAULT; +show create table innodb_compact; +alter table innodb_compressed engine=innodb page_encryption=DEFAULT page_encryption_key=DEFAULT; +show create table innodb_compressed; +alter table innodb_dynamic engine=innodb page_encryption=DEFAULT page_encryption_key=DEFAULT; +show create table innodb_dynamic; +alter table innodb_redundant engine=innodb page_encryption=DEFAULT page_encryption_key=DEFAULT; +show create table innodb_redundant; + +--source include/restart_mysqld.inc + +SET GLOBAL innodb_file_format = `Barracuda`; +SET GLOBAL innodb_file_per_table = ON; + +show create table innodb_compact; +show create table innodb_compressed; +show create table innodb_dynamic; +show create table innodb_redundant; + +update innodb_compact set c1 = c1 + 1; +update innodb_compressed set c1 = c1 + 1; +update innodb_dynamic set c1 = c1 + 1; +update innodb_redundant set c1 = c1 + 1; +select count(*) from innodb_compact where c1 < 1500000; +select count(*) from innodb_compressed where c1 < 1500000; +select count(*) from innodb_dynamic where c1 < 1500000; +select count(*) from innodb_redundant where c1 < 1500000; + +# After alter+restart these should be 0 +SELECT variable_value = 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_encrypted'; +SELECT variable_value = 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_decrypted'; +SELECT variable_value = 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_encryption_error'; + +drop procedure innodb_insert_proc; +drop table innodb_normal; +drop table innodb_compact; +drop table innodb_compressed; +drop table innodb_dynamic; +drop table innodb_redundant; + +# reset system +--disable_query_log +EVAL SET GLOBAL innodb_file_per_table = $innodb_file_per_table_orig; +EVAL SET GLOBAL innodb_file_format = $innodb_file_format_orig; +--enable_query_log diff --git a/mysql-test/suite/innodb/t/innodb-page_encryption_compression.opt b/mysql-test/suite/innodb/t/innodb-page_encryption_compression.opt new file mode 100644 index 00000000000..f3aa00059a8 --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb-page_encryption_compression.opt @@ -0,0 +1,3 @@ +--enable-file-key-management-plugin +--encryption-algorithm=aes_cbs +--file-key-management-plugin-filename=$MYSQL_TEST_DIR/suite/innodb/include/keys.txt diff --git a/mysql-test/suite/innodb/t/innodb-page_encryption_compression.test b/mysql-test/suite/innodb/t/innodb-page_encryption_compression.test new file mode 100644 index 00000000000..c07a03a1e37 --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb-page_encryption_compression.test @@ -0,0 +1,116 @@ +-- source include/have_innodb.inc + +--disable_query_log +let $innodb_compression_algorithm_orig=`SELECT @@innodb_compression_algorithm`; +let $innodb_file_format_orig = `SELECT @@innodb_file_format`; +let $innodb_file_per_table_orig = `SELECT @@innodb_file_per_table`; +--enable_query_log + +SET GLOBAL innodb_file_format = `Barracuda`; +SET GLOBAL innodb_file_per_table = ON; +# zlib +set global innodb_compression_algorithm = 1; + +create table innodb_normal(c1 bigint not null, b char(200)) engine=innodb page_compressed=1; +create table innodb_compact(c1 bigint not null, b char(200)) engine=innodb row_format=compact page_encryption=1 page_encryption_key=1 page_compressed=1; +create table innodb_dynamic(c1 bigint not null, b char(200)) engine=innodb row_format=dynamic page_encryption=1 page_encryption_key=2 page_compressed=1; +show warnings; + +show create table innodb_normal; +show create table innodb_compact; +show create table innodb_dynamic; + +delimiter //; +create procedure innodb_insert_proc (repeat_count int) +begin + declare current_num int; + set current_num = 0; + while current_num < repeat_count do + insert into innodb_normal values(current_num, substring(MD5(RAND()), -64)); + set current_num = current_num + 1; + end while; +end// +delimiter ;// +commit; + +set autocommit=0; +call innodb_insert_proc(5000); +commit; +set autocommit=1; + +insert into innodb_compact select * from innodb_normal; +insert into innodb_dynamic select * from innodb_normal; + +update innodb_normal set c1 = c1 + 1; +update innodb_compact set c1 = c1 + 1; +update innodb_dynamic set c1 = c1 + 1; +select count(*) from innodb_normal; +select count(*) from innodb_compact where c1 < 1500000; +select count(*) from innodb_dynamic where c1 < 1500000; + +SELECT variable_value >= 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_encrypted'; +SELECT variable_value >= 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_decrypted'; +SELECT variable_value = 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_encryption_error'; +SELECT variable_value >= 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_compressed'; +SELECT variable_value >= 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_decompressed'; + +--source include/restart_mysqld.inc + +SET GLOBAL innodb_file_format = `Barracuda`; +SET GLOBAL innodb_file_per_table = ON; +# zlib +set global innodb_compression_algorithm = 1; + +update innodb_normal set c1 = c1 + 1; +update innodb_compact set c1 = c1 + 1; +update innodb_dynamic set c1 = c1 + 1; +select count(*) from innodb_normal; +select count(*) from innodb_compact where c1 < 1500000; +select count(*) from innodb_dynamic where c1 < 1500000; + +SELECT variable_value >= 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_encrypted'; +SELECT variable_value >= 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_decrypted'; +SELECT variable_value = 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_encryption_error'; +SELECT variable_value > 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_compressed'; +SELECT variable_value > 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_decompressed'; + +alter table innodb_normal engine=innodb page_compressed=DEFAULT; +show create table innodb_normal; +alter table innodb_compact engine=innodb page_encryption=DEFAULT page_encryption_key=DEFAULT page_compressed=DEFAULT; +show create table innodb_compact; +alter table innodb_dynamic engine=innodb page_encryption=DEFAULT page_encryption_key=DEFAULT page_compressed=DEFAULT; +show create table innodb_dynamic; + +--source include/restart_mysqld.inc + +SET GLOBAL innodb_file_format = `Barracuda`; +SET GLOBAL innodb_file_per_table = ON; + +show create table innodb_normal; +show create table innodb_compact; +show create table innodb_dynamic; + +update innodb_normal set c1 = c1 + 1; +update innodb_compact set c1 = c1 + 1; +update innodb_dynamic set c1 = c1 + 1; +select count(*) from innodb_normal; +select count(*) from innodb_compact where c1 < 1500000; +select count(*) from innodb_dynamic where c1 < 1500000; + +SELECT variable_value = 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_encrypted'; +SELECT variable_value = 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_decrypted'; +SELECT variable_value = 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_encryption_error'; +SELECT variable_value = 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_compressed'; +SELECT variable_value = 0 FROM information_schema.global_status WHERE LOWER(variable_name) = 'innodb_num_pages_page_decompressed'; + +drop procedure innodb_insert_proc; +drop table innodb_normal; +drop table innodb_compact; +drop table innodb_dynamic; + +# reset system +--disable_query_log +EVAL SET GLOBAL innodb_compression_algorithm = $innodb_compression_algorithm_orig; +EVAL SET GLOBAL innodb_file_per_table = $innodb_file_per_table_orig; +EVAL SET GLOBAL innodb_file_format = $innodb_file_format_orig; +--enable_query_log diff --git a/mysql-test/suite/innodb/t/innodb_bug14147491.test b/mysql-test/suite/innodb/t/innodb_bug14147491.test index 6f0bfca8e1d..ee9bb4814bc 100644 --- a/mysql-test/suite/innodb/t/innodb_bug14147491.test +++ b/mysql-test/suite/innodb/t/innodb_bug14147491.test @@ -2,6 +2,8 @@ # Test opening a corrupted table. # +-- source include/not_encrypted.inc + # Don't test under valgrind, memory leaks will occur source include/not_valgrind.inc; # Avoid CrashReporter popup on Mac diff --git a/mysql-test/suite/innodb/t/innodb_bug60049.test b/mysql-test/suite/innodb/t/innodb_bug60049.test index 6760d1a1f02..b1d56d16a5e 100644 --- a/mysql-test/suite/innodb/t/innodb_bug60049.test +++ b/mysql-test/suite/innodb/t/innodb_bug60049.test @@ -5,6 +5,7 @@ -- source include/not_embedded.inc -- source include/have_innodb.inc -- source include/have_innodb_16k.inc +-- source include/not_encrypted.inc call mtr.add_suppression('InnoDB: Error: Table "mysql"."innodb_(table|index)_stats" not found'); call mtr.add_suppression('InnoDB: Error: Fetch of persistent statistics requested'); diff --git a/mysql-test/suite/innodb/t/innodb_corrupt_bit.opt b/mysql-test/suite/innodb/t/innodb_corrupt_bit.opt new file mode 100644 index 00000000000..683a276da2a --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb_corrupt_bit.opt @@ -0,0 +1 @@ +--innodb-encryption-threads=0 diff --git a/mysql-test/suite/innodb/t/innodb_encryption.opt b/mysql-test/suite/innodb/t/innodb_encryption.opt new file mode 100644 index 00000000000..e2fc72eb5a8 --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb_encryption.opt @@ -0,0 +1,8 @@ +--enable-example-key-management-plugin +--encrypt-tmp-disk-tables=ON +--aria-encrypt-tables=ON +--innodb-encryption-threads=4 +--innodb-encryption-rotate-key-age=15 +--innodb-encrypt-tables=ON +--innodb-tablespaces-encryption +--encryption-algorithm=aes_ctr diff --git a/mysql-test/suite/innodb/t/innodb_encryption.test b/mysql-test/suite/innodb/t/innodb_encryption.test new file mode 100644 index 00000000000..ffd57e47c01 --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb_encryption.test @@ -0,0 +1,175 @@ +# +# +# +-- source include/have_innodb.inc + +# embedded does not support restart +-- source include/not_embedded.inc + +SET @start_global_value = @@global.innodb_encryption_threads; + +SHOW VARIABLES LIKE 'innodb_encrypt%'; + +DESCRIBE INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION; + +--echo # Wait max 5 min for key encryption threads to encrypt one space +let $cnt=300; +while ($cnt) +{ + let $success=`SELECT COUNT(*) > 0 FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION > 0`; + if ($success) + { + let $cnt=0; + } + if (!$success) + { + real_sleep 1; + dec $cnt; + } +} +if (!$success) +{ + SELECT * FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION; + SHOW STATUS LIKE 'innodb_encryption%'; + -- die Timeout waiting for encryption threads +} +--echo # Success! + +--echo # Wait max 10 min for key encryption threads to encrypt all space +let $cnt=600; +while ($cnt) +{ + let $success=`SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION = 0`; + if ($success) + { + let $cnt=0; + } + if (!$success) + { + real_sleep 1; + dec $cnt; + } +} +if (!$success) +{ + SELECT * FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION; + SHOW STATUS LIKE 'innodb_encryption%'; + -- die Timeout waiting for encryption threads +} +--echo # Success! + +--echo # Now turn off encryption and wait for threads to decrypt everything +SET GLOBAL innodb_encrypt_tables = off; + +--echo # Wait max 10 min for key encryption threads to decrypt all space +let $cnt=600; +while ($cnt) +{ + let $success=`SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION <> 0`; + if ($success) + { + let $cnt=0; + } + if (!$success) + { + real_sleep 1; + dec $cnt; + } +} +if (!$success) +{ + SELECT * FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION; + SHOW STATUS LIKE 'innodb_encryption%'; + -- die Timeout waiting for encryption threads +} +--echo # Success! + +--echo # Shutdown innodb_encryption_threads +SET GLOBAL innodb_encryption_threads=0; + +--echo # Turn on encryption +--echo # since threads are off tables should remain unencrypted +SET GLOBAL innodb_encrypt_tables = on; + +--echo # Wait 15s to check that nothing gets encrypted +let $cnt=15; +while ($cnt) +{ + let $success=`SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION <> 0`; + if ($success) + { + real_sleep 1; + dec $cnt; + } + if (!$success) + { + SELECT * FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION <> 0; + -- die Failure, tablespace getting encrypted even if innodb_encryption_threads=0 + } +} +--echo # Success! + +--echo # Startup innodb_encryption_threads +SET GLOBAL innodb_encryption_threads=@start_global_value; + +--echo # Wait 1 min to check that it start encrypting again +let $cnt=60; +while ($cnt) +{ + let $success=`SELECT COUNT(*) > 0 FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION <> 0 OR KEY_ROTATION_PAGE_NUMBER IS NOT NULL`; + if ($success) + { + let $cnt=0; + } + if (!$success) + { + real_sleep 1; + dec $cnt; + } +} +if (!$success) +{ + SELECT * FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION; + SHOW STATUS LIKE 'innodb_encryption%'; + -- die Timeout waiting for encryption threads +} +--echo # Success! + +--echo # +--echo # Check that restart with encryption turned off works +--echo # even if spaces are encrypted +--echo # +--echo # First wait max 10 min for key encryption threads to encrypt all spaces +let $cnt=600; +while ($cnt) +{ + let $success=`SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION = 0`; + if ($success) + { + let $cnt=0; + } + if (!$success) + { + real_sleep 1; + dec $cnt; + } +} +if (!$success) +{ + SELECT * FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION; + SHOW STATUS LIKE 'innodb_encryption%'; + -- die Timeout waiting for encryption threads +} +--echo # Success! +--echo # Restart mysqld --innodb_encrypt_tables=0 --innodb_encryption_threads=0 +-- let $restart_parameters=--innodb_encrypt_tables=0 --innodb_encryption_threads=0 +-- source include/restart_mysqld.inc + +SHOW VARIABLES LIKE 'innodb_encrypt%'; +SELECT COUNT(*) > 0 as should_be_1 +FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION +WHERE MIN_KEY_VERSION <> 0; + +--echo # Restart mysqld again...with default options +-- let $restart_parameters= +-- source include/restart_mysqld.inc diff --git a/mysql-test/suite/innodb/t/innodb_information_schema.test b/mysql-test/suite/innodb/t/innodb_information_schema.test index 95b436d676c..da7ee016f29 100644 --- a/mysql-test/suite/innodb/t/innodb_information_schema.test +++ b/mysql-test/suite/innodb/t/innodb_information_schema.test @@ -5,6 +5,13 @@ -- source include/have_innodb.inc +# lock data that is part of result set for this testcase +# is retreived using buf_page_try_get. i.e only show if page +# happen to be in buffer pool, with key rotation threads +# chances are substantial that pages have been evicted and lock_data +# get NULL +-- source include/not_encrypted.inc + -- disable_query_log -- disable_result_log diff --git a/mysql-test/suite/innodb/t/innodb_information_schema_buffer.test b/mysql-test/suite/innodb/t/innodb_information_schema_buffer.test index 6858b898649..5bfac22ec39 100644 --- a/mysql-test/suite/innodb/t/innodb_information_schema_buffer.test +++ b/mysql-test/suite/innodb/t/innodb_information_schema_buffer.test @@ -1,6 +1,11 @@ # Exercise the code path for INFORMATION_SCHEMA.INNODB_BUFFER_POOL_STATS # and INFORMATION_SCHEMA.INNODB_BUFFER_PAGE +# This test assumes that buffer pool is idle +# with key rotation threads buffer pages gets evicted this +# testcase gets flaky +-- source include/not_encrypted.inc + -- source include/have_innodb.inc -- disable_result_log diff --git a/mysql-test/suite/innodb/t/innodb_scrub.opt b/mysql-test/suite/innodb/t/innodb_scrub.opt new file mode 100644 index 00000000000..fd165b269b5 --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb_scrub.opt @@ -0,0 +1,14 @@ +--enable-example-key-management-plugin +--innodb-background-scrub-data-compressed=OFF +--innodb-background-scrub-data-uncompressed=OFF +--innodb-encrypt-tables=0 +--innodb-encryption-threads=0 +--innodb-file-format=Barracuda +--innodb-file-per-table=1 +--innodb-immediate-scrub-data-uncompressed=ON +--loose-aria-encrypt-tables=ON +--loose-encrypt-tmp-disk-tables=ON +--loose-innodb-encrypt-tables=ON +--loose-innodb-encryption-rotate-key-age=15 +--loose-innodb-encryption-threads=4 +--loose-innodb-scrub-force-testing=ON diff --git a/mysql-test/suite/innodb/t/innodb_scrub.test b/mysql-test/suite/innodb/t/innodb_scrub.test new file mode 100644 index 00000000000..9bb7a359e68 --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb_scrub.test @@ -0,0 +1,154 @@ +-- source include/have_innodb.inc +-- source include/not_embedded.inc + +let $MYSQLD_DATADIR=`select @@datadir`; +let ib1_IBD = $MYSQLD_DATADIR/ibdata1; +let t1_IBD = $MYSQLD_DATADIR/test/t1.ibd; + +create table snapshot_status engine = myisam +select * from information_schema.global_status +where variable_name like 'innodb_scrub%'; + +let $rowcount=500; +let $formatno = 3; +while ($formatno) +{ +let $format = `select case $formatno + when 1 then 'dynamic' + when 2 then 'redundant' + when 3 then 'compact' + end`; +dec $formatno; + +-- echo # +-- echo # Test delete of records +-- echo # + +eval create table t1 ( + a int auto_increment primary key, + b varchar(256), + c text) engine = innodb row_format=$format; + +let $numinserts = $rowcount; +-- echo # Populate table with rows +--disable_query_log +while ($numinserts) +{ + dec $numinserts; + insert into t1(b,c) values ('bicycle', repeat('repairman', 1000)); +} +--enable_query_log + +delete from t1; + +-- echo # restart mysqld so that all pages are flushed +-- source include/restart_mysqld.inc +-- echo # read all rows from table +-- disable_result_log +select * from t1; +-- enable_result_log + +-- echo # $format: delete from: grep -c bicycle t1.ibd +-- exec grep -c bicycle $t1_IBD || true +-- echo # $format: delete from: grep -c bicycle ibdata1 +-- exec grep -c bicycle $ib1_IBD || true +-- echo # $format: delete from: grep -c repairman t1.ibd +-- exec grep -c repairman $t1_IBD || true +-- echo # $format: delete from: grep -c repairman ibdata1 +-- exec grep -c repairman $ib1_IBD || true + +drop table t1; + +-- echo # +-- echo # Test delete+rollback+delete +-- echo # + +eval create table t1 ( + a int auto_increment primary key, + b varchar(256), + c text) engine = innodb row_format=$format; + +let $numinserts = $rowcount; +-- echo # Populate table with rows +--disable_query_log +while ($numinserts) +{ + dec $numinserts; + insert into t1(b,c) values ('bicycle', repeat('repairman', 1000)); +} +--enable_query_log + +begin; +delete from t1; +rollback; +delete from t1; + +-- echo # restart mysqld so that all pages are flushed +-- source include/restart_mysqld.inc +-- echo # read all rows from table +-- disable_result_log +select * from t1; +-- enable_result_log + +-- echo # $format: delete rollback: grep -c bicycle t1.ibd +-- exec grep -c bicycle $t1_IBD || true +-- echo # $format: delete rollback: grep -c bicycle ibdata1 +-- exec grep -c bicycle $ib1_IBD || true +-- echo # $format: delete rollback: grep -c repairman t1.ibd +-- exec grep -c repairman $t1_IBD || true +-- echo # $format: delete rollback: grep -c repairman ibdata1 +-- exec grep -c repairman $ib1_IBD || true + +drop table t1; + +-- echo # +-- echo # Test insert+rollback +-- echo # + +eval create table t1 ( + a int auto_increment primary key, + b varchar(256), + c text) engine = innodb row_format=$format; + +let $numinserts = $rowcount; +-- echo # Populate table with rows +begin; +--disable_query_log +while ($numinserts) +{ + dec $numinserts; + insert into t1(b,c) values ('bicycle', repeat('repairman', 1000)); +} +--enable_query_log + +rollback; + +-- echo # restart mysqld so that all pages are flushed +-- source include/restart_mysqld.inc +-- echo # read all rows from table +-- disable_result_log +select * from t1; +-- enable_result_log + +-- echo # $format: insert rollback: grep -c bicycle t1.ibd +-- exec grep -c bicycle $t1_IBD || true +-- echo # $format: insert rollback: grep -c bicycle ibdata1 +-- exec grep -c bicycle $ib1_IBD || true +-- echo # $format: insert rollback: grep -c repairman t1.ibd +-- exec grep -c repairman $t1_IBD || true +-- echo # $format: insert rollback: grep -c repairman ibdata1 +-- exec grep -c repairman $ib1_IBD || true + +drop table t1; +} + +show variables like 'innodb_%scrub_data%'; + +--echo # verify that this test have not caused any background scrubbing +--sorted_result +select ss.variable_name, gs.variable_value - ss.variable_value as variable_value +from snapshot_status ss, + information_schema.global_status gs +where ss.variable_name = gs.variable_name; + +drop table snapshot_status; diff --git a/mysql-test/suite/innodb/t/innodb_scrub_background.opt b/mysql-test/suite/innodb/t/innodb_scrub_background.opt new file mode 100644 index 00000000000..e1fc6d82626 --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb_scrub_background.opt @@ -0,0 +1,15 @@ +--enable-example-key-management-plugin +--innodb-background-scrub-data-compressed=ON +--innodb-background-scrub-data-uncompressed=ON +--innodb-encrypt-tables=0 +--innodb-encryption-threads=0 +--innodb-file-format=Barracuda +--innodb-file-per-table=1 +--innodb-immediate-scrub-data-uncompressed=OFF +--innodb-tablespaces-scrubbing +--loose-aria-encrypt-tables=ON +--loose-encrypt-tmp-disk-tables=ON +--loose-innodb-encrypt-tables=ON +--loose-innodb-encryption-rotate-key-age=15 +--loose-innodb-encryption-threads=4 +--loose-innodb-scrub-force-testing=ON diff --git a/mysql-test/suite/innodb/t/innodb_scrub_background.test b/mysql-test/suite/innodb/t/innodb_scrub_background.test new file mode 100644 index 00000000000..931177f6fd4 --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb_scrub_background.test @@ -0,0 +1,168 @@ +-- source include/have_innodb.inc +-- source include/not_embedded.inc + +let $MYSQLD_DATADIR=`select @@datadir`; +let ib1_IBD = $MYSQLD_DATADIR/ibdata1; +let t1_IBD = $MYSQLD_DATADIR/test/t1.ibd; +let t2_IBD = $MYSQLD_DATADIR/test/t2.ibd; +let t3_IBD = $MYSQLD_DATADIR/test/t3.ibd; + +--echo # +--echo # immediate scrubbing is off +--echo # background scrubbing is on +--echo # +show variables like 'innodb_%scrub_data%'; + +-- echo # make sure spaces are checked quickly +SET GLOBAL innodb_background_scrub_data_check_interval=1; + +create table snapshot_status engine = myisam +select * from information_schema.global_status +where variable_name like 'innodb_scrub%'; + +let $rowcount=500; +let $formatno = 1; +while ($formatno) +{ +let $format = `select case $formatno + when 1 then 'dynamic' + when 2 then 'redundant' + when 3 then 'compact' + when 4 then 'compressed' + end`; +dec $formatno; + +truncate table snapshot_status; +insert into snapshot_status +select * from information_schema.global_status +where variable_name like 'innodb_scrub%'; + +-- echo # +-- echo # Test delete of records +-- echo # + +eval create table t1 ( + a int auto_increment primary key, + b varchar(256), + c text, index(b)) engine = innodb row_format=$format; + +let $numinserts = $rowcount; +-- echo # Populate table with rows +--disable_query_log +while ($numinserts) +{ + dec $numinserts; + insert into t1(b,c) values ('bicycle', repeat('repairman', 1000)); +} +--enable_query_log + +delete from t1; + +-- echo # +-- echo # Test delete+rollback+delete +-- echo # + +eval create table t2 ( + a int auto_increment primary key, + b varchar(256), + c text, index(b)) engine = innodb row_format=$format; + +let $numinserts = $rowcount; +-- echo # Populate table with rows +--disable_query_log +while ($numinserts) +{ + dec $numinserts; + insert into t2(b,c) values ('bicycle', repeat('repairman', 1000)); +} +--enable_query_log + +begin; +delete from t2; +rollback; +delete from t2; + +-- echo # +-- echo # Test insert+rollback +-- echo # + +eval create table t3 ( + a int auto_increment primary key, + b varchar(256), + c text, index(b)) engine = innodb row_format=$format; + +let $numinserts = $rowcount; +-- echo # Populate table with rows +begin; +--disable_query_log +while ($numinserts) +{ + dec $numinserts; + insert into t3(b,c) values ('bicycle', repeat('repairman', 1000)); +} +--enable_query_log + +rollback; + +-- echo # start scrubbing threads +SET GLOBAL innodb_encryption_threads=5; +-- echo # Wait max 10 min for scrubbing +let $cnt=600; +while ($cnt) +{ + let $success=`SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_SCRUBBING WHERE LAST_SCRUB_COMPLETED IS NULL AND ( NAME in ('test/t1', 'test/t2', 'test/t3') OR SPACE = 0 )`; + if ($success) + { + let $cnt=0; + } + if (!$success) + { + real_sleep 1; + dec $cnt; + } +} +if (!$success) +{ + SELECT * FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_SCRUBBING; + SHOW STATUS LIKE 'innodb_%scrub%'; + -- die Timeout waiting for background threads +} +-- echo # Success! +-- echo # stop scrubbing threads +SET GLOBAL innodb_encryption_threads=0; + +-- echo # verify that this test have caused background scrubbing +--sorted_result +select sum(gs.variable_value - ss.variable_value) > 0 as should_be_1 +from snapshot_status ss, + information_schema.global_status gs +where ss.variable_name = gs.variable_name; + +-- echo # restart mysqld so that all pages are flushed +-- source include/restart_mysqld.inc +-- echo # read all rows from table +-- disable_result_log +select * from t1; +-- enable_result_log + +-- echo # $format: delete: grep -c bicycle t1.ibd +-- exec grep -c bicycle $t1_IBD || true +-- echo # $format: delete: grep -c repairman t1.ibd +-- exec grep -c repairman $t1_IBD || true + +-- echo # $format: delete rollback: grep -c bicycle t2.ibd +-- exec grep -c bicycle $t2_IBD || true +-- echo # $format: delete rollback: grep -c repairman t2.ibd +-- exec grep -c repairman $t2_IBD || true + +-- echo # $format: insert rollback: grep -c bicycle t3.ibd +-- exec grep -c bicycle $t3_IBD || true +-- echo # $format: insert rollback: grep -c repairman t3.ibd +-- exec grep -c repairman $t3_IBD || true + +drop table t1, t2, t3; +} + +show variables like 'innodb_%scrub_data%'; + +drop table snapshot_status; diff --git a/mysql-test/suite/innodb/t/innodb_scrub_compressed.opt b/mysql-test/suite/innodb/t/innodb_scrub_compressed.opt new file mode 100644 index 00000000000..b956866e648 --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb_scrub_compressed.opt @@ -0,0 +1,15 @@ +--enable-example-key-management-plugin +--innodb-background-scrub-data-compressed=ON +--innodb-background-scrub-data-uncompressed=ON +--innodb-encrypt-tables=off +--innodb-encryption-threads=0 +--innodb-file-format=Barracuda +--innodb-file-per-table=1 +--innodb-immediate-scrub-data-uncompressed=ON +--innodb-tablespaces-scrubbing +--loose-aria-encrypt-tables=ON +--loose-encrypt-tmp-disk-tables=ON +--loose-innodb-encrypt-tables=ON +--loose-innodb-encryption-rotate-key-age=15 +--loose-innodb-encryption-threads=4 +--loose-innodb-scrub-force-testing=ON diff --git a/mysql-test/suite/innodb/t/innodb_scrub_compressed.test b/mysql-test/suite/innodb/t/innodb_scrub_compressed.test new file mode 100644 index 00000000000..4e9cf6d9ec9 --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb_scrub_compressed.test @@ -0,0 +1,159 @@ +-- source include/have_innodb.inc +-- source include/not_embedded.inc + +let $MYSQLD_DATADIR=`select @@datadir`; +let ib1_IBD = $MYSQLD_DATADIR/ibdata1; +let t1_IBD = $MYSQLD_DATADIR/test/t1.ibd; +let t2_IBD = $MYSQLD_DATADIR/test/t2.ibd; +let t3_IBD = $MYSQLD_DATADIR/test/t3.ibd; + +let $rowcount=500; + +-- echo # make sure spaces are checked quickly +SET GLOBAL innodb_background_scrub_data_check_interval=1; + +-- echo # +-- echo # Test delete of records +-- echo # + +eval create table t1 ( + a int auto_increment primary key, + b varchar(256), + c text) engine = innodb row_format=compressed; + +let $numinserts = $rowcount; +-- echo # Populate table with rows +--disable_query_log +while ($numinserts) +{ + dec $numinserts; + insert into t1(b,c) values ('bicycle', repeat('repairman', 1000)); +} +--enable_query_log + +delete from t1; + +-- echo # +-- echo # Test delete+rollback+delete +-- echo # + +eval create table t2 ( + a int auto_increment primary key, + b varchar(256), + c text) engine = innodb row_format=compressed; + +let $numinserts = $rowcount; +-- echo # Populate table with rows +--disable_query_log +while ($numinserts) +{ + dec $numinserts; + insert into t2(b,c) values ('boondoggle', repeat('waste of time', 1000)); +} +--enable_query_log + +begin; +delete from t2; +rollback; +delete from t2; + +-- echo # +-- echo # Test insert+rollback +-- echo # + +eval create table t3 ( + a int auto_increment primary key, + b varchar(256), + c text) engine = innodb row_format=compressed; + +let $numinserts = $rowcount; +-- echo # Populate table with rows +begin; +--disable_query_log +while ($numinserts) +{ + dec $numinserts; + insert into t3(b,c) values ('keso', repeat('kent', 1000)); +} +--enable_query_log + +rollback; + +-- echo # start scrubbing threads +SET GLOBAL innodb_encryption_threads=5; +-- echo # Wait max 10 min for scrubbing of this table +let $cnt=600; +while ($cnt) +{ + let $success=`SELECT COUNT(*) = 0 +FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_SCRUBBING +WHERE LAST_SCRUB_COMPLETED IS NULL AND ( NAME like 'test/%' OR SPACE = 0 )`; + + if ($success) + { + let $cnt=0; + } + if (!$success) + { + real_sleep 1; + dec $cnt; + } +} +if (!$success) +{ + SELECT * FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_SCRUBBING; + SHOW STATUS LIKE 'innodb_%scrub%'; + -- die Timeout waiting for background threads +} +-- echo # Success! +-- echo # stop scrubbing threads +SET GLOBAL innodb_encryption_threads=0; + +--echo # Now there should be background scrubs +let $success=`select sum(variable_value) > 0 +from information_schema.global_status +where variable_name in ('innodb_scrub_background_page_reorganizations', +'innodb_scrub_background_page_splits')`; + +if (!$success) { + show status like 'innodb_scrub%'; +} + +-- echo # restart mysqld so that all pages are flushed (encryption off) +-- echo # so that grep will find stuff +-- source include/restart_mysqld.inc +-- echo # read all rows from table +-- disable_result_log +select * from t1; +select * from t2; +select * from t3; +-- enable_result_log + +-- echo # grep -c bicycle t1.ibd +-- exec grep -c bicycle $t1_IBD || true +-- echo # grep -c bicycle ibdata1 +-- exec grep -c bicycle $ib1_IBD || true +-- echo # grep -c repairman t1.ibd +-- exec grep -c repairman $t1_IBD || true +-- echo # grep -c repairman ibdata1 +-- exec grep -c repairman $ib1_IBD || true + +-- echo # grep -c boondoggle t2.ibd +-- exec grep -c boondoggle $t2_IBD || true +-- echo # grep -c boondoggle ibdata1 +-- exec grep -c boondoggle $ib1_IBD || true +-- echo # grep -c waste t2.ibd +-- exec grep -c waste $t2_IBD || true +-- echo # grep -c waste ibdata1 +-- exec grep -c waste $ib1_IBD || true + +-- echo # grep -c keso t3.ibd +-- exec grep -c keso $t3_IBD || true +-- echo # grep -c keso ibdata1 +-- exec grep -c keso $ib1_IBD || true +-- echo # grep -c kent t3.ibd +-- exec grep -c kent $t3_IBD || true +-- echo # grep -c kent ibdata1 +-- exec grep -c kent $ib1_IBD || true + +drop table t1, t2, t3; diff --git a/mysql-test/suite/innodb_zip/t/innodb_cmp_drop_table.test b/mysql-test/suite/innodb_zip/t/innodb_cmp_drop_table.test index 4263e839c85..35e4b2c6bf3 100644 --- a/mysql-test/suite/innodb_zip/t/innodb_cmp_drop_table.test +++ b/mysql-test/suite/innodb_zip/t/innodb_cmp_drop_table.test @@ -1,5 +1,9 @@ --source include/have_innodb.inc +# lazy evition might not be lazy enough when key rotation +# scans through pages +-- source include/not_encrypted.inc + let $per_table=`select @@innodb_file_per_table`; let $format=`select @@innodb_file_format`; diff --git a/mysql-test/suite/maria/maria-recovery3.result b/mysql-test/suite/maria/maria-recovery3.result index d71a3f6c00a..84fd6fbb91e 100644 --- a/mysql-test/suite/maria/maria-recovery3.result +++ b/mysql-test/suite/maria/maria-recovery3.result @@ -78,7 +78,7 @@ ERROR HY000: Lost connection to MySQL server during query * recovery happens check table t1 extended; Table Op Msg_type Msg_text -mysqltest.t1 check warning Size of indexfile is: 372 Expected: 8192 +mysqltest.t1 check warning Size of indexfile is: <SIZE> Expected: 8192 mysqltest.t1 check status OK * testing that checksum after recovery is as expected Checksum-check diff --git a/mysql-test/suite/maria/maria3.result b/mysql-test/suite/maria/maria3.result index 74eed530bd9..feb5fa82cd4 100644 --- a/mysql-test/suite/maria/maria3.result +++ b/mysql-test/suite/maria/maria3.result @@ -305,6 +305,7 @@ Variable_name Value aria_block_size 8192 aria_checkpoint_interval 30 aria_checkpoint_log_activity 1048576 +aria_encrypt_tables OFF aria_force_start_after_recovery_failures 0 aria_group_commit none aria_group_commit_interval 0 diff --git a/mysql-test/suite/sys_vars/r/aria_encrypt_tables_basic.result b/mysql-test/suite/sys_vars/r/aria_encrypt_tables_basic.result new file mode 100644 index 00000000000..bf40f534bd6 --- /dev/null +++ b/mysql-test/suite/sys_vars/r/aria_encrypt_tables_basic.result @@ -0,0 +1,41 @@ +SET @start_global_value = @@global.aria_encrypt_tables; +select @@global.aria_encrypt_tables; +@@global.aria_encrypt_tables +0 +select @@session.aria_encrypt_tables; +ERROR HY000: Variable 'aria_encrypt_tables' is a GLOBAL variable +show global variables like 'aria_encrypt_tables'; +Variable_name Value +aria_encrypt_tables OFF +show session variables like 'aria_encrypt_tables'; +Variable_name Value +aria_encrypt_tables OFF +select * from information_schema.global_variables +where variable_name='aria_encrypt_tables'; +VARIABLE_NAME VARIABLE_VALUE +ARIA_ENCRYPT_TABLES OFF +select * from information_schema.session_variables +where variable_name='aria_encrypt_tables'; +VARIABLE_NAME VARIABLE_VALUE +ARIA_ENCRYPT_TABLES OFF +set global aria_encrypt_tables=ON; +select @@global.aria_encrypt_tables; +@@global.aria_encrypt_tables +1 +set global aria_encrypt_tables=OFF; +select @@global.aria_encrypt_tables; +@@global.aria_encrypt_tables +0 +set global aria_encrypt_tables=1; +select @@global.aria_encrypt_tables; +@@global.aria_encrypt_tables +1 +set session aria_encrypt_tables=1; +ERROR HY000: Variable 'aria_encrypt_tables' is a GLOBAL variable and should be set with SET GLOBAL +set global aria_encrypt_tables=1.1; +ERROR 42000: Incorrect argument type to variable 'aria_encrypt_tables' +set global aria_encrypt_tables=1e1; +ERROR 42000: Incorrect argument type to variable 'aria_encrypt_tables' +set global aria_encrypt_tables="foo"; +ERROR 42000: Variable 'aria_encrypt_tables' can't be set to the value of 'foo' +SET @@global.aria_encrypt_tables = @start_global_value; diff --git a/mysql-test/suite/sys_vars/r/debug_use_static_encryption_keys_basic.result b/mysql-test/suite/sys_vars/r/debug_use_static_encryption_keys_basic.result new file mode 100644 index 00000000000..a0d4f45cdbf --- /dev/null +++ b/mysql-test/suite/sys_vars/r/debug_use_static_encryption_keys_basic.result @@ -0,0 +1,3 @@ +show global variables like "debug_use_static_encryption_keys"; +Variable_name Value +debug_use_static_encryption_keys OFF diff --git a/mysql-test/suite/sys_vars/r/encrypt_tmp_disk_tables_basic.result b/mysql-test/suite/sys_vars/r/encrypt_tmp_disk_tables_basic.result new file mode 100644 index 00000000000..833ad5287ba --- /dev/null +++ b/mysql-test/suite/sys_vars/r/encrypt_tmp_disk_tables_basic.result @@ -0,0 +1,41 @@ +SET @start_global_value = @@global.encrypt_tmp_disk_tables; +select @@global.encrypt_tmp_disk_tables; +@@global.encrypt_tmp_disk_tables +0 +select @@session.encrypt_tmp_disk_tables; +ERROR HY000: Variable 'encrypt_tmp_disk_tables' is a GLOBAL variable +show global variables like 'encrypt_tmp_disk_tables'; +Variable_name Value +encrypt_tmp_disk_tables OFF +show session variables like 'encrypt_tmp_disk_tables'; +Variable_name Value +encrypt_tmp_disk_tables OFF +select * from information_schema.global_variables +where variable_name='encrypt_tmp_disk_tables'; +VARIABLE_NAME VARIABLE_VALUE +ENCRYPT_TMP_DISK_TABLES OFF +select * from information_schema.session_variables +where variable_name='encrypt_tmp_disk_tables'; +VARIABLE_NAME VARIABLE_VALUE +ENCRYPT_TMP_DISK_TABLES OFF +set global encrypt_tmp_disk_tables=ON; +select @@global.encrypt_tmp_disk_tables; +@@global.encrypt_tmp_disk_tables +1 +set global encrypt_tmp_disk_tables=OFF; +select @@global.encrypt_tmp_disk_tables; +@@global.encrypt_tmp_disk_tables +0 +set global encrypt_tmp_disk_tables=1; +select @@global.encrypt_tmp_disk_tables; +@@global.encrypt_tmp_disk_tables +1 +set session encrypt_tmp_disk_tables=1; +ERROR HY000: Variable 'encrypt_tmp_disk_tables' is a GLOBAL variable and should be set with SET GLOBAL +set global encrypt_tmp_disk_tables=1.1; +ERROR 42000: Incorrect argument type to variable 'encrypt_tmp_disk_tables' +set global encrypt_tmp_disk_tables=1e1; +ERROR 42000: Incorrect argument type to variable 'encrypt_tmp_disk_tables' +set global encrypt_tmp_disk_tables="foo"; +ERROR 42000: Variable 'encrypt_tmp_disk_tables' can't be set to the value of 'foo' +SET @@global.encrypt_tmp_disk_tables = @start_global_value; diff --git a/mysql-test/suite/sys_vars/r/encryption_algorithm_basic.result b/mysql-test/suite/sys_vars/r/encryption_algorithm_basic.result new file mode 100644 index 00000000000..a9101b0f950 --- /dev/null +++ b/mysql-test/suite/sys_vars/r/encryption_algorithm_basic.result @@ -0,0 +1,7 @@ +select @@global.encryption_algorithm; +@@global.encryption_algorithm +none +select @@session.encryption_algorithm; +ERROR HY000: Variable 'encryption_algorithm' is a GLOBAL variable +set global encryption_algorithm="none"; +ERROR HY000: Variable 'encryption_algorithm' is a read only variable diff --git a/mysql-test/suite/sys_vars/r/innodb_background_scrub_data_check_interval_basic.result b/mysql-test/suite/sys_vars/r/innodb_background_scrub_data_check_interval_basic.result new file mode 100644 index 00000000000..5a8734a9446 --- /dev/null +++ b/mysql-test/suite/sys_vars/r/innodb_background_scrub_data_check_interval_basic.result @@ -0,0 +1,72 @@ +SET @start_global_value = @@global.innodb_background_scrub_data_check_interval; +# +# default value +# +select @@global.innodb_background_scrub_data_check_interval; +@@global.innodb_background_scrub_data_check_interval +3600 +set global innodb_background_scrub_data_check_interval=10; +select @@global.innodb_background_scrub_data_check_interval; +@@global.innodb_background_scrub_data_check_interval +10 +set global innodb_background_scrub_data_check_interval=DEFAULT; +select @@global.innodb_background_scrub_data_check_interval; +@@global.innodb_background_scrub_data_check_interval +3600 +set global innodb_background_scrub_data_check_interval=20; +select @@global.innodb_background_scrub_data_check_interval; +@@global.innodb_background_scrub_data_check_interval +20 +set global innodb_background_scrub_data_check_interval=DEFAULT; +select @@global.innodb_background_scrub_data_check_interval; +@@global.innodb_background_scrub_data_check_interval +3600 +# +# exists as global only +# +select @@global.innodb_background_scrub_data_check_interval; +@@global.innodb_background_scrub_data_check_interval +3600 +select @@session.innodb_background_scrub_data_check_interval; +ERROR HY000: Variable 'innodb_background_scrub_data_check_interval' is a GLOBAL variable +show global variables like 'innodb_background_scrub_data_check_interval'; +Variable_name Value +innodb_background_scrub_data_check_interval 3600 +show session variables like 'innodb_background_scrub_data_check_interval'; +Variable_name Value +innodb_background_scrub_data_check_interval 3600 +select * from information_schema.global_variables +where variable_name='innodb_background_scrub_data_check_interval'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_BACKGROUND_SCRUB_DATA_CHECK_INTERVAL 3600 +select * from information_schema.session_variables +where variable_name='innodb_background_scrub_data_check_interval'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_BACKGROUND_SCRUB_DATA_CHECK_INTERVAL 3600 +# +# show that it's writable +# +set global innodb_background_scrub_data_check_interval=10; +select @@global.innodb_background_scrub_data_check_interval; +@@global.innodb_background_scrub_data_check_interval +10 +set global innodb_background_scrub_data_check_interval=20; +select @@global.innodb_background_scrub_data_check_interval; +@@global.innodb_background_scrub_data_check_interval +20 +set global innodb_background_scrub_data_check_interval=1; +select @@global.innodb_background_scrub_data_check_interval; +@@global.innodb_background_scrub_data_check_interval +1 +set session innodb_background_scrub_data_check_interval=1; +ERROR HY000: Variable 'innodb_background_scrub_data_check_interval' is a GLOBAL variable and should be set with SET GLOBAL +# +# incorrect types +# +set global innodb_background_scrub_data_check_interval=1.1; +ERROR 42000: Incorrect argument type to variable 'innodb_background_scrub_data_check_interval' +set global innodb_background_scrub_data_check_interval=1e1; +ERROR 42000: Incorrect argument type to variable 'innodb_background_scrub_data_check_interval' +set global innodb_background_scrub_data_check_interval="foo"; +ERROR 42000: Incorrect argument type to variable 'innodb_background_scrub_data_check_interval' +SET @@global.innodb_background_scrub_data_check_interval = @start_global_value; diff --git a/mysql-test/suite/sys_vars/r/innodb_background_scrub_data_compressed_basic.result b/mysql-test/suite/sys_vars/r/innodb_background_scrub_data_compressed_basic.result new file mode 100644 index 00000000000..7a1fd978bd2 --- /dev/null +++ b/mysql-test/suite/sys_vars/r/innodb_background_scrub_data_compressed_basic.result @@ -0,0 +1,50 @@ +SET @start_global_value = @@global.innodb_background_scrub_data_compressed; +# +# exists as global only +# +select @@global.innodb_background_scrub_data_compressed; +@@global.innodb_background_scrub_data_compressed +0 +select @@session.innodb_background_scrub_data_compressed; +ERROR HY000: Variable 'innodb_background_scrub_data_compressed' is a GLOBAL variable +show global variables like 'innodb_background_scrub_data_compressed'; +Variable_name Value +innodb_background_scrub_data_compressed OFF +show session variables like 'innodb_background_scrub_data_compressed'; +Variable_name Value +innodb_background_scrub_data_compressed OFF +select * from information_schema.global_variables +where variable_name='innodb_background_scrub_data_compressed'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_BACKGROUND_SCRUB_DATA_COMPRESSED OFF +select * from information_schema.session_variables +where variable_name='innodb_background_scrub_data_compressed'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_BACKGROUND_SCRUB_DATA_COMPRESSED OFF +# +# show that it's writable +# +set global innodb_background_scrub_data_compressed=ON; +select @@global.innodb_background_scrub_data_compressed; +@@global.innodb_background_scrub_data_compressed +1 +set global innodb_background_scrub_data_compressed=OFF; +select @@global.innodb_background_scrub_data_compressed; +@@global.innodb_background_scrub_data_compressed +0 +set global innodb_background_scrub_data_compressed=1; +select @@global.innodb_background_scrub_data_compressed; +@@global.innodb_background_scrub_data_compressed +1 +set session innodb_background_scrub_data_compressed=1; +ERROR HY000: Variable 'innodb_background_scrub_data_compressed' is a GLOBAL variable and should be set with SET GLOBAL +# +# incorrect types +# +set global innodb_background_scrub_data_compressed=1.1; +ERROR 42000: Incorrect argument type to variable 'innodb_background_scrub_data_compressed' +set global innodb_background_scrub_data_compressed=1e1; +ERROR 42000: Incorrect argument type to variable 'innodb_background_scrub_data_compressed' +set global innodb_background_scrub_data_compressed="foo"; +ERROR 42000: Variable 'innodb_background_scrub_data_compressed' can't be set to the value of 'foo' +SET @@global.innodb_background_scrub_data_compressed = @start_global_value; diff --git a/mysql-test/suite/sys_vars/r/innodb_background_scrub_data_interval_basic.result b/mysql-test/suite/sys_vars/r/innodb_background_scrub_data_interval_basic.result new file mode 100644 index 00000000000..49bbc8cd34a --- /dev/null +++ b/mysql-test/suite/sys_vars/r/innodb_background_scrub_data_interval_basic.result @@ -0,0 +1,50 @@ +SET @start_global_value = @@global.innodb_background_scrub_data_interval; +# +# exists as global only +# +select @@global.innodb_background_scrub_data_interval; +@@global.innodb_background_scrub_data_interval +604800 +select @@session.innodb_background_scrub_data_interval; +ERROR HY000: Variable 'innodb_background_scrub_data_interval' is a GLOBAL variable +show global variables like 'innodb_background_scrub_data_interval'; +Variable_name Value +innodb_background_scrub_data_interval 604800 +show session variables like 'innodb_background_scrub_data_interval'; +Variable_name Value +innodb_background_scrub_data_interval 604800 +select * from information_schema.global_variables +where variable_name='innodb_background_scrub_data_interval'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_BACKGROUND_SCRUB_DATA_INTERVAL 604800 +select * from information_schema.session_variables +where variable_name='innodb_background_scrub_data_interval'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_BACKGROUND_SCRUB_DATA_INTERVAL 604800 +# +# show that it's writable +# +set global innodb_background_scrub_data_interval=100; +select @@global.innodb_background_scrub_data_interval; +@@global.innodb_background_scrub_data_interval +100 +set global innodb_background_scrub_data_interval=200; +select @@global.innodb_background_scrub_data_interval; +@@global.innodb_background_scrub_data_interval +200 +set global innodb_background_scrub_data_interval=300; +select @@global.innodb_background_scrub_data_interval; +@@global.innodb_background_scrub_data_interval +300 +set session innodb_background_scrub_data_interval=400; +ERROR HY000: Variable 'innodb_background_scrub_data_interval' is a GLOBAL variable and should be set with SET GLOBAL +# +# incorrect types +# +set global innodb_background_scrub_data_interval=1.1; +ERROR 42000: Incorrect argument type to variable 'innodb_background_scrub_data_interval' +set global innodb_background_scrub_data_interval=1e1; +ERROR 42000: Incorrect argument type to variable 'innodb_background_scrub_data_interval' +set global innodb_background_scrub_data_interval="foo"; +ERROR 42000: Incorrect argument type to variable 'innodb_background_scrub_data_interval' +SET @@global.innodb_background_scrub_data_interval = @start_global_value; diff --git a/mysql-test/suite/sys_vars/r/innodb_background_scrub_data_uncompressed_basic.result b/mysql-test/suite/sys_vars/r/innodb_background_scrub_data_uncompressed_basic.result new file mode 100644 index 00000000000..c85bde6493f --- /dev/null +++ b/mysql-test/suite/sys_vars/r/innodb_background_scrub_data_uncompressed_basic.result @@ -0,0 +1,50 @@ +SET @start_global_value = @@global.innodb_background_scrub_data_uncompressed; +# +# exists as global only +# +select @@global.innodb_background_scrub_data_uncompressed; +@@global.innodb_background_scrub_data_uncompressed +0 +select @@session.innodb_background_scrub_data_uncompressed; +ERROR HY000: Variable 'innodb_background_scrub_data_uncompressed' is a GLOBAL variable +show global variables like 'innodb_background_scrub_data_uncompressed'; +Variable_name Value +innodb_background_scrub_data_uncompressed OFF +show session variables like 'innodb_background_scrub_data_uncompressed'; +Variable_name Value +innodb_background_scrub_data_uncompressed OFF +select * from information_schema.global_variables +where variable_name='innodb_background_scrub_data_uncompressed'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_BACKGROUND_SCRUB_DATA_UNCOMPRESSED OFF +select * from information_schema.session_variables +where variable_name='innodb_background_scrub_data_uncompressed'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_BACKGROUND_SCRUB_DATA_UNCOMPRESSED OFF +# +# show that it's writable +# +set global innodb_background_scrub_data_uncompressed=ON; +select @@global.innodb_background_scrub_data_uncompressed; +@@global.innodb_background_scrub_data_uncompressed +1 +set global innodb_background_scrub_data_uncompressed=OFF; +select @@global.innodb_background_scrub_data_uncompressed; +@@global.innodb_background_scrub_data_uncompressed +0 +set global innodb_background_scrub_data_uncompressed=1; +select @@global.innodb_background_scrub_data_uncompressed; +@@global.innodb_background_scrub_data_uncompressed +1 +set session innodb_background_scrub_data_uncompressed=1; +ERROR HY000: Variable 'innodb_background_scrub_data_uncompressed' is a GLOBAL variable and should be set with SET GLOBAL +# +# incorrect types +# +set global innodb_background_scrub_data_uncompressed=1.1; +ERROR 42000: Incorrect argument type to variable 'innodb_background_scrub_data_uncompressed' +set global innodb_background_scrub_data_uncompressed=1e1; +ERROR 42000: Incorrect argument type to variable 'innodb_background_scrub_data_uncompressed' +set global innodb_background_scrub_data_uncompressed="foo"; +ERROR 42000: Variable 'innodb_background_scrub_data_uncompressed' can't be set to the value of 'foo' +SET @@global.innodb_background_scrub_data_uncompressed = @start_global_value; diff --git a/mysql-test/suite/sys_vars/r/innodb_data_encryption_filekey_basic.result b/mysql-test/suite/sys_vars/r/innodb_data_encryption_filekey_basic.result new file mode 100644 index 00000000000..f7660620a2f --- /dev/null +++ b/mysql-test/suite/sys_vars/r/innodb_data_encryption_filekey_basic.result @@ -0,0 +1,9 @@ +SELECT @start_data_encryption_filekey; +@start_data_encryption_filekey +NULL +SELECT COUNT(@@GLOBAL.innodb_data_encryption_filekey); +COUNT(@@GLOBAL.innodb_data_encryption_filekey) +0 +1 Expected +SET @@GLOBAL.innodb_data_encryption_filekey='secret'; +ERROR HY000: Variable 'innodb_data_encryption_filekey' is a read only variable diff --git a/mysql-test/suite/sys_vars/r/innodb_data_encryption_providername_basic.result b/mysql-test/suite/sys_vars/r/innodb_data_encryption_providername_basic.result new file mode 100644 index 00000000000..12b4407290b --- /dev/null +++ b/mysql-test/suite/sys_vars/r/innodb_data_encryption_providername_basic.result @@ -0,0 +1,9 @@ +SELECT @start_data_encryption_providername; +@start_data_encryption_providername +NULL +SELECT COUNT(@@GLOBAL.innodb_data_encryption_providername); +COUNT(@@GLOBAL.innodb_data_encryption_providername) +0 +1 Expected +SET @@GLOBAL.innodb_data_encryption_providername='key.txt'; +ERROR HY000: Variable 'innodb_data_encryption_providername' is a read only variable diff --git a/mysql-test/suite/sys_vars/r/innodb_data_encryption_providertype_basic.result b/mysql-test/suite/sys_vars/r/innodb_data_encryption_providertype_basic.result new file mode 100644 index 00000000000..8a45857496e --- /dev/null +++ b/mysql-test/suite/sys_vars/r/innodb_data_encryption_providertype_basic.result @@ -0,0 +1,11 @@ +SELECT @start_data_encryption_providertype; +@start_data_encryption_providertype +NULL +SELECT COUNT(@@GLOBAL.innodb_data_encryption_providertype); +COUNT(@@GLOBAL.innodb_data_encryption_providertype) +1 +1 Expected +SET @@GLOBAL.innodb_data_encryption_providertype=1; +ERROR HY000: Variable 'innodb_data_encryption_providertype' is a read only variable +SET @@GLOBAL.innodb_data_encryption_providertype=k; +ERROR HY000: Variable 'innodb_data_encryption_providertype' is a read only variable diff --git a/mysql-test/suite/sys_vars/r/innodb_data_encryption_providerurl_basic.result b/mysql-test/suite/sys_vars/r/innodb_data_encryption_providerurl_basic.result new file mode 100644 index 00000000000..2a0fa74b347 --- /dev/null +++ b/mysql-test/suite/sys_vars/r/innodb_data_encryption_providerurl_basic.result @@ -0,0 +1,9 @@ +SELECT @start_data_encryption_providerurl; +@start_data_encryption_providerurl +NULL +SELECT COUNT(@@GLOBAL.innodb_data_encryption_providerurl); +COUNT(@@GLOBAL.innodb_data_encryption_providerurl) +0 +1 Expected +SET @@GLOBAL.innodb_data_encryption_providerurl='http://www.google.com'; +ERROR HY000: Variable 'innodb_data_encryption_providerurl' is a read only variable diff --git a/mysql-test/suite/sys_vars/r/innodb_encrypt_log_basic.result b/mysql-test/suite/sys_vars/r/innodb_encrypt_log_basic.result new file mode 100644 index 00000000000..4beb1a01edf --- /dev/null +++ b/mysql-test/suite/sys_vars/r/innodb_encrypt_log_basic.result @@ -0,0 +1,48 @@ +SELECT @@GLOBAL.innodb_encrypt_log; +@@GLOBAL.innodb_encrypt_log +0 +0 Expected +SET @@GLOBAL.innodb_encrypt_log=1; +ERROR HY000: Variable 'innodb_encrypt_log' is a read only variable +Expected error 'Read only variable' +SELECT @@GLOBAL.innodb_encrypt_log; +@@GLOBAL.innodb_encrypt_log +0 +0 Expected +SELECT IF(@@GLOBAL.innodb_encrypt_log, 'ON', 'OFF') = VARIABLE_VALUE +FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES +WHERE VARIABLE_NAME='innodb_encrypt_log'; +IF(@@GLOBAL.innodb_encrypt_log, 'ON', 'OFF') = VARIABLE_VALUE +1 +1 Expected +SELECT @@GLOBAL.innodb_encrypt_log; +@@GLOBAL.innodb_encrypt_log +0 +0 Expected +SELECT VARIABLE_VALUE +FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES +WHERE VARIABLE_NAME='innodb_encrypt_log'; +VARIABLE_VALUE +OFF +0 Expected +SELECT @@innodb_encrypt_log = @@GLOBAL.innodb_encrypt_log; +@@innodb_encrypt_log = @@GLOBAL.innodb_encrypt_log +1 +1 Expected +SELECT @@innodb_encrypt_log; +@@innodb_encrypt_log +0 +0 Expected +SELECT COUNT(@@local.innodb_encrypt_log); +ERROR HY000: Variable 'innodb_encrypt_log' is a GLOBAL variable +Expected error 'Variable is a GLOBAL variable' +SELECT COUNT(@@SESSION.innodb_encrypt_log); +ERROR HY000: Variable 'innodb_encrypt_log' is a GLOBAL variable +Expected error 'Variable is a GLOBAL variable' +SELECT @@GLOBAL.innodb_encrypt_log; +@@GLOBAL.innodb_encrypt_log +0 +0 Expected +SELECT innodb_encrypt_log; +ERROR 42S22: Unknown column 'innodb_encrypt_log' in 'field list' +Expected error 'Unknown column in field list' diff --git a/mysql-test/suite/sys_vars/r/innodb_encrypt_tables_basic.result b/mysql-test/suite/sys_vars/r/innodb_encrypt_tables_basic.result new file mode 100644 index 00000000000..87212399f09 --- /dev/null +++ b/mysql-test/suite/sys_vars/r/innodb_encrypt_tables_basic.result @@ -0,0 +1,41 @@ +SET @start_global_value = @@global.innodb_encrypt_tables; +select @@global.innodb_encrypt_tables; +@@global.innodb_encrypt_tables +0 +select @@session.innodb_encrypt_tables; +ERROR HY000: Variable 'innodb_encrypt_tables' is a GLOBAL variable +show global variables like 'innodb_encrypt_tables'; +Variable_name Value +innodb_encrypt_tables OFF +show session variables like 'innodb_encrypt_tables'; +Variable_name Value +innodb_encrypt_tables OFF +select * from information_schema.global_variables +where variable_name='innodb_encrypt_tables'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_ENCRYPT_TABLES OFF +select * from information_schema.session_variables +where variable_name='innodb_encrypt_tables'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_ENCRYPT_TABLES OFF +set global innodb_encrypt_tables=ON; +select @@global.innodb_encrypt_tables; +@@global.innodb_encrypt_tables +1 +set global innodb_encrypt_tables=OFF; +select @@global.innodb_encrypt_tables; +@@global.innodb_encrypt_tables +0 +set global innodb_encrypt_tables=1; +select @@global.innodb_encrypt_tables; +@@global.innodb_encrypt_tables +1 +set session innodb_encrypt_tables=1; +ERROR HY000: Variable 'innodb_encrypt_tables' is a GLOBAL variable and should be set with SET GLOBAL +set global innodb_encrypt_tables=1.1; +ERROR 42000: Incorrect argument type to variable 'innodb_encrypt_tables' +set global innodb_encrypt_tables=1e1; +ERROR 42000: Incorrect argument type to variable 'innodb_encrypt_tables' +set global innodb_encrypt_tables="foo"; +ERROR 42000: Variable 'innodb_encrypt_tables' can't be set to the value of 'foo' +SET @@global.innodb_encrypt_tables = @start_global_value; diff --git a/mysql-test/suite/sys_vars/r/innodb_encryption_rotate_key_age_basic.result b/mysql-test/suite/sys_vars/r/innodb_encryption_rotate_key_age_basic.result new file mode 100644 index 00000000000..9f4c672f719 --- /dev/null +++ b/mysql-test/suite/sys_vars/r/innodb_encryption_rotate_key_age_basic.result @@ -0,0 +1,41 @@ +SET @start_global_value = @@global.innodb_encryption_rotate_key_age; +select @@global.innodb_encryption_rotate_key_age; +@@global.innodb_encryption_rotate_key_age +1 +select @@session.innodb_encryption_rotate_key_age; +ERROR HY000: Variable 'innodb_encryption_rotate_key_age' is a GLOBAL variable +show global variables like 'innodb_encryption_rotate_key_age'; +Variable_name Value +innodb_encryption_rotate_key_age 1 +show session variables like 'innodb_encryption_rotate_key_age'; +Variable_name Value +innodb_encryption_rotate_key_age 1 +select * from information_schema.global_variables +where variable_name='innodb_encryption_rotate_key_age'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_ENCRYPTION_ROTATE_KEY_AGE 1 +select * from information_schema.session_variables +where variable_name='innodb_encryption_rotate_key_age'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_ENCRYPTION_ROTATE_KEY_AGE 1 +set global innodb_encryption_rotate_key_age=1; +select @@global.innodb_encryption_rotate_key_age; +@@global.innodb_encryption_rotate_key_age +1 +set global innodb_encryption_rotate_key_age=2; +select @@global.innodb_encryption_rotate_key_age; +@@global.innodb_encryption_rotate_key_age +2 +set global innodb_encryption_rotate_key_age=1; +select @@global.innodb_encryption_rotate_key_age; +@@global.innodb_encryption_rotate_key_age +1 +set session innodb_encryption_rotate_key_age=1; +ERROR HY000: Variable 'innodb_encryption_rotate_key_age' is a GLOBAL variable and should be set with SET GLOBAL +set global innodb_encryption_rotate_key_age=1.1; +ERROR 42000: Incorrect argument type to variable 'innodb_encryption_rotate_key_age' +set global innodb_encryption_rotate_key_age=1e1; +ERROR 42000: Incorrect argument type to variable 'innodb_encryption_rotate_key_age' +set global innodb_encryption_rotate_key_age="foo"; +ERROR 42000: Incorrect argument type to variable 'innodb_encryption_rotate_key_age' +SET @@global.innodb_encryption_rotate_key_age = @start_global_value; diff --git a/mysql-test/suite/sys_vars/r/innodb_encryption_rotation_iops_basic.result b/mysql-test/suite/sys_vars/r/innodb_encryption_rotation_iops_basic.result new file mode 100644 index 00000000000..5a7267d2815 --- /dev/null +++ b/mysql-test/suite/sys_vars/r/innodb_encryption_rotation_iops_basic.result @@ -0,0 +1,41 @@ +SET @start_global_value = @@global.innodb_encryption_rotation_iops; +select @@global.innodb_encryption_rotation_iops; +@@global.innodb_encryption_rotation_iops +100 +select @@session.innodb_encryption_rotation_iops; +ERROR HY000: Variable 'innodb_encryption_rotation_iops' is a GLOBAL variable +show global variables like 'innodb_encryption_rotation_iops'; +Variable_name Value +innodb_encryption_rotation_iops 100 +show session variables like 'innodb_encryption_rotation_iops'; +Variable_name Value +innodb_encryption_rotation_iops 100 +select * from information_schema.global_variables +where variable_name='innodb_encryption_rotation_iops'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_ENCRYPTION_ROTATION_IOPS 100 +select * from information_schema.session_variables +where variable_name='innodb_encryption_rotation_iops'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_ENCRYPTION_ROTATION_IOPS 100 +set global innodb_encryption_rotation_iops=100; +select @@global.innodb_encryption_rotation_iops; +@@global.innodb_encryption_rotation_iops +100 +set global innodb_encryption_rotation_iops=50; +select @@global.innodb_encryption_rotation_iops; +@@global.innodb_encryption_rotation_iops +50 +set global innodb_encryption_rotation_iops=100; +select @@global.innodb_encryption_rotation_iops; +@@global.innodb_encryption_rotation_iops +100 +set session innodb_encryption_rotation_iops=50; +ERROR HY000: Variable 'innodb_encryption_rotation_iops' is a GLOBAL variable and should be set with SET GLOBAL +set global innodb_encryption_rotation_iops=1.1; +ERROR 42000: Incorrect argument type to variable 'innodb_encryption_rotation_iops' +set global innodb_encryption_rotation_iops=1e1; +ERROR 42000: Incorrect argument type to variable 'innodb_encryption_rotation_iops' +set global innodb_encryption_rotation_iops="foo"; +ERROR 42000: Incorrect argument type to variable 'innodb_encryption_rotation_iops' +SET @@global.innodb_encryption_rotation_iops = @start_global_value; diff --git a/mysql-test/suite/sys_vars/r/innodb_encryption_threads_basic.result b/mysql-test/suite/sys_vars/r/innodb_encryption_threads_basic.result new file mode 100644 index 00000000000..727a08c9cc6 --- /dev/null +++ b/mysql-test/suite/sys_vars/r/innodb_encryption_threads_basic.result @@ -0,0 +1,41 @@ +SET @start_global_value = @@global.innodb_encryption_threads; +select @@global.innodb_encryption_threads; +@@global.innodb_encryption_threads +0 +select @@session.innodb_encryption_threads; +ERROR HY000: Variable 'innodb_encryption_threads' is a GLOBAL variable +show global variables like 'innodb_encryption_threads'; +Variable_name Value +innodb_encryption_threads 0 +show session variables like 'innodb_encryption_threads'; +Variable_name Value +innodb_encryption_threads 0 +select * from information_schema.global_variables +where variable_name='innodb_encryption_threads'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_ENCRYPTION_THREADS 0 +select * from information_schema.session_variables +where variable_name='innodb_encryption_threads'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_ENCRYPTION_THREADS 0 +set global innodb_encryption_threads=0; +select @@global.innodb_encryption_threads; +@@global.innodb_encryption_threads +0 +set global innodb_encryption_threads=5; +select @@global.innodb_encryption_threads; +@@global.innodb_encryption_threads +5 +set global innodb_encryption_threads=1; +select @@global.innodb_encryption_threads; +@@global.innodb_encryption_threads +1 +set session innodb_encryption_threads=1; +ERROR HY000: Variable 'innodb_encryption_threads' is a GLOBAL variable and should be set with SET GLOBAL +set global innodb_encryption_threads=1.1; +ERROR 42000: Incorrect argument type to variable 'innodb_encryption_threads' +set global innodb_encryption_threads=1e1; +ERROR 42000: Incorrect argument type to variable 'innodb_encryption_threads' +set global innodb_encryption_threads="foo"; +ERROR 42000: Incorrect argument type to variable 'innodb_encryption_threads' +SET @@global.innodb_encryption_threads = @start_global_value; diff --git a/mysql-test/suite/sys_vars/r/innodb_immediate_scrub_data_uncompressed_basic.result b/mysql-test/suite/sys_vars/r/innodb_immediate_scrub_data_uncompressed_basic.result new file mode 100644 index 00000000000..5b31918c5d2 --- /dev/null +++ b/mysql-test/suite/sys_vars/r/innodb_immediate_scrub_data_uncompressed_basic.result @@ -0,0 +1,50 @@ +SET @start_global_value = @@global.innodb_immediate_scrub_data_uncompressed; +# +# exists as global only +# +select @@global.innodb_immediate_scrub_data_uncompressed; +@@global.innodb_immediate_scrub_data_uncompressed +0 +select @@session.innodb_immediate_scrub_data_uncompressed; +ERROR HY000: Variable 'innodb_immediate_scrub_data_uncompressed' is a GLOBAL variable +show global variables like 'innodb_immediate_scrub_data_uncompressed'; +Variable_name Value +innodb_immediate_scrub_data_uncompressed OFF +show session variables like 'innodb_immediate_scrub_data_uncompressed'; +Variable_name Value +innodb_immediate_scrub_data_uncompressed OFF +select * from information_schema.global_variables +where variable_name='innodb_immediate_scrub_data_uncompressed'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_IMMEDIATE_SCRUB_DATA_UNCOMPRESSED OFF +select * from information_schema.session_variables +where variable_name='innodb_immediate_scrub_data_uncompressed'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_IMMEDIATE_SCRUB_DATA_UNCOMPRESSED OFF +# +# show that it's writable +# +set global innodb_immediate_scrub_data_uncompressed=ON; +select @@global.innodb_immediate_scrub_data_uncompressed; +@@global.innodb_immediate_scrub_data_uncompressed +1 +set global innodb_immediate_scrub_data_uncompressed=OFF; +select @@global.innodb_immediate_scrub_data_uncompressed; +@@global.innodb_immediate_scrub_data_uncompressed +0 +set global innodb_immediate_scrub_data_uncompressed=1; +select @@global.innodb_immediate_scrub_data_uncompressed; +@@global.innodb_immediate_scrub_data_uncompressed +1 +set session innodb_immediate_scrub_data_uncompressed=1; +ERROR HY000: Variable 'innodb_immediate_scrub_data_uncompressed' is a GLOBAL variable and should be set with SET GLOBAL +# +# incorrect types +# +set global innodb_immediate_scrub_data_uncompressed=1.1; +ERROR 42000: Incorrect argument type to variable 'innodb_immediate_scrub_data_uncompressed' +set global innodb_immediate_scrub_data_uncompressed=1e1; +ERROR 42000: Incorrect argument type to variable 'innodb_immediate_scrub_data_uncompressed' +set global innodb_immediate_scrub_data_uncompressed="foo"; +ERROR 42000: Variable 'innodb_immediate_scrub_data_uncompressed' can't be set to the value of 'foo' +SET @@global.innodb_immediate_scrub_data_uncompressed = @start_global_value; diff --git a/mysql-test/suite/sys_vars/r/innodb_monitor_disable_basic.result b/mysql-test/suite/sys_vars/r/innodb_monitor_disable_basic.result index 8a88b7c4d49..85fe22dd816 100644 --- a/mysql-test/suite/sys_vars/r/innodb_monitor_disable_basic.result +++ b/mysql-test/suite/sys_vars/r/innodb_monitor_disable_basic.result @@ -180,6 +180,9 @@ compress_page_compressed_trim_op disabled compress_page_compressed_trim_op_saved disabled compress_pages_page_decompressed disabled compress_pages_page_compression_error disabled +compress_pages_page_encrypted disabled +compress_pages_page_decrypted disabled +compress_pages_page_encryption_error disabled index_page_splits disabled index_page_merge_attempts disabled index_page_merge_successful disabled diff --git a/mysql-test/suite/sys_vars/r/innodb_monitor_enable_basic.result b/mysql-test/suite/sys_vars/r/innodb_monitor_enable_basic.result index 8a88b7c4d49..85fe22dd816 100644 --- a/mysql-test/suite/sys_vars/r/innodb_monitor_enable_basic.result +++ b/mysql-test/suite/sys_vars/r/innodb_monitor_enable_basic.result @@ -180,6 +180,9 @@ compress_page_compressed_trim_op disabled compress_page_compressed_trim_op_saved disabled compress_pages_page_decompressed disabled compress_pages_page_compression_error disabled +compress_pages_page_encrypted disabled +compress_pages_page_decrypted disabled +compress_pages_page_encryption_error disabled index_page_splits disabled index_page_merge_attempts disabled index_page_merge_successful disabled diff --git a/mysql-test/suite/sys_vars/r/innodb_monitor_reset_all_basic.result b/mysql-test/suite/sys_vars/r/innodb_monitor_reset_all_basic.result index 8a88b7c4d49..85fe22dd816 100644 --- a/mysql-test/suite/sys_vars/r/innodb_monitor_reset_all_basic.result +++ b/mysql-test/suite/sys_vars/r/innodb_monitor_reset_all_basic.result @@ -180,6 +180,9 @@ compress_page_compressed_trim_op disabled compress_page_compressed_trim_op_saved disabled compress_pages_page_decompressed disabled compress_pages_page_compression_error disabled +compress_pages_page_encrypted disabled +compress_pages_page_decrypted disabled +compress_pages_page_encryption_error disabled index_page_splits disabled index_page_merge_attempts disabled index_page_merge_successful disabled diff --git a/mysql-test/suite/sys_vars/r/innodb_monitor_reset_basic.result b/mysql-test/suite/sys_vars/r/innodb_monitor_reset_basic.result index 8a88b7c4d49..85fe22dd816 100644 --- a/mysql-test/suite/sys_vars/r/innodb_monitor_reset_basic.result +++ b/mysql-test/suite/sys_vars/r/innodb_monitor_reset_basic.result @@ -180,6 +180,9 @@ compress_page_compressed_trim_op disabled compress_page_compressed_trim_op_saved disabled compress_pages_page_decompressed disabled compress_pages_page_compression_error disabled +compress_pages_page_encrypted disabled +compress_pages_page_decrypted disabled +compress_pages_page_encryption_error disabled index_page_splits disabled index_page_merge_attempts disabled index_page_merge_successful disabled diff --git a/mysql-test/suite/sys_vars/r/innodb_scrub_force_testing_basic.result b/mysql-test/suite/sys_vars/r/innodb_scrub_force_testing_basic.result new file mode 100644 index 00000000000..24287efaffc --- /dev/null +++ b/mysql-test/suite/sys_vars/r/innodb_scrub_force_testing_basic.result @@ -0,0 +1,50 @@ +SET @start_global_value = @@global.innodb_scrub_force_testing; +# +# exists as global only +# +select @@global.innodb_scrub_force_testing; +@@global.innodb_scrub_force_testing +0 +select @@session.innodb_scrub_force_testing; +ERROR HY000: Variable 'innodb_scrub_force_testing' is a GLOBAL variable +show global variables like 'innodb_scrub_force_testing'; +Variable_name Value +innodb_scrub_force_testing OFF +show session variables like 'innodb_scrub_force_testing'; +Variable_name Value +innodb_scrub_force_testing OFF +select * from information_schema.global_variables +where variable_name='innodb_scrub_force_testing'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_SCRUB_FORCE_TESTING OFF +select * from information_schema.session_variables +where variable_name='innodb_scrub_force_testing'; +VARIABLE_NAME VARIABLE_VALUE +INNODB_SCRUB_FORCE_TESTING OFF +# +# show that it's writable +# +set global innodb_scrub_force_testing=ON; +select @@global.innodb_scrub_force_testing; +@@global.innodb_scrub_force_testing +1 +set global innodb_scrub_force_testing=OFF; +select @@global.innodb_scrub_force_testing; +@@global.innodb_scrub_force_testing +0 +set global innodb_scrub_force_testing=1; +select @@global.innodb_scrub_force_testing; +@@global.innodb_scrub_force_testing +1 +set session innodb_scrub_force_testing=1; +ERROR HY000: Variable 'innodb_scrub_force_testing' is a GLOBAL variable and should be set with SET GLOBAL +# +# incorrect types +# +set global innodb_scrub_force_testing=1.1; +ERROR 42000: Incorrect argument type to variable 'innodb_scrub_force_testing' +set global innodb_scrub_force_testing=1e1; +ERROR 42000: Incorrect argument type to variable 'innodb_scrub_force_testing' +set global innodb_scrub_force_testing="foo"; +ERROR 42000: Variable 'innodb_scrub_force_testing' can't be set to the value of 'foo' +SET @@global.innodb_scrub_force_testing = @start_global_value; diff --git a/mysql-test/suite/sys_vars/r/innodb_scrub_log_basic.result b/mysql-test/suite/sys_vars/r/innodb_scrub_log_basic.result new file mode 100644 index 00000000000..fc3a31fc5a2 --- /dev/null +++ b/mysql-test/suite/sys_vars/r/innodb_scrub_log_basic.result @@ -0,0 +1,48 @@ +SELECT @@GLOBAL.innodb_scrub_log; +@@GLOBAL.innodb_scrub_log +0 +0 Expected +SET @@GLOBAL.innodb_scrub_log=1; +ERROR HY000: Variable 'innodb_scrub_log' is a read only variable +Expected error 'Read only variable' +SELECT @@GLOBAL.innodb_scrub_log; +@@GLOBAL.innodb_scrub_log +0 +0 Expected +SELECT IF(@@GLOBAL.innodb_scrub_log, 'ON', 'OFF') = VARIABLE_VALUE +FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES +WHERE VARIABLE_NAME='innodb_scrub_log'; +IF(@@GLOBAL.innodb_scrub_log, 'ON', 'OFF') = VARIABLE_VALUE +1 +1 Expected +SELECT @@GLOBAL.innodb_scrub_log; +@@GLOBAL.innodb_scrub_log +0 +0 Expected +SELECT VARIABLE_VALUE +FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES +WHERE VARIABLE_NAME='innodb_scrub_log'; +VARIABLE_VALUE +OFF +0 Expected +SELECT @@innodb_scrub_log = @@GLOBAL.innodb_scrub_log; +@@innodb_scrub_log = @@GLOBAL.innodb_scrub_log +1 +1 Expected +SELECT @@innodb_scrub_log; +@@innodb_scrub_log +0 +0 Expected +SELECT @@local.innodb_scrub_log; +ERROR HY000: Variable 'innodb_scrub_log' is a GLOBAL variable +Expected error 'Variable is a GLOBAL variable' +SELECT @@SESSION.innodb_scrub_log; +ERROR HY000: Variable 'innodb_scrub_log' is a GLOBAL variable +Expected error 'Variable is a GLOBAL variable' +SELECT @@GLOBAL.innodb_scrub_log; +@@GLOBAL.innodb_scrub_log +0 +0 Expected +SELECT innodb_scrub_log; +ERROR 42S22: Unknown column 'innodb_scrub_log' in 'field list' +Expected error 'Unknow column in field list' diff --git a/mysql-test/suite/sys_vars/r/innodb_scrub_log_interval_basic.result b/mysql-test/suite/sys_vars/r/innodb_scrub_log_interval_basic.result new file mode 100644 index 00000000000..0d7bc7e61c6 --- /dev/null +++ b/mysql-test/suite/sys_vars/r/innodb_scrub_log_interval_basic.result @@ -0,0 +1,53 @@ +SELECT @@GLOBAL.innodb_scrub_log_interval; +@@GLOBAL.innodb_scrub_log_interval +2000 +200 Expected +SET @@GLOBAL.innodb_scrub_log_interval=100; +1 Expected +SELECT @@GLOBAL.innodb_scrub_log_interval; +@@GLOBAL.innodb_scrub_log_interval +100 +100 Expected +SET @@GLOBAL.innodb_scrub_log_interval=DEFAULT; +1 Expected +SELECT @@GLOBAL.innodb_scrub_log_interval; +@@GLOBAL.innodb_scrub_log_interval +2000 +200 Expected +SELECT @@GLOBAL.innodb_scrub_log_interval = VARIABLE_VALUE +FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES +WHERE VARIABLE_NAME='innodb_scrub_log_interval'; +@@GLOBAL.innodb_scrub_log_interval = VARIABLE_VALUE +1 +1 Expected +SELECT @@GLOBAL.innodb_scrub_log_interval; +@@GLOBAL.innodb_scrub_log_interval +2000 +200 Expected +SELECT VARIABLE_VALUE +FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES +WHERE VARIABLE_NAME='innodb_scrub_log_interval'; +VARIABLE_VALUE +2000 +200 Expected +SELECT @@innodb_scrub_log_interval = @@GLOBAL.innodb_scrub_log_interval; +@@innodb_scrub_log_interval = @@GLOBAL.innodb_scrub_log_interval +1 +1 Expected +SELECT @@innodb_scrub_log_interval; +@@innodb_scrub_log_interval +2000 +200 Expected +SELECT @@local.innodb_scrub_log_interval; +ERROR HY000: Variable 'innodb_scrub_log_interval' is a GLOBAL variable +Expected error 'Variable is a GLOBAL variable' +SELECT @@SESSION.innodb_scrub_log_interval; +ERROR HY000: Variable 'innodb_scrub_log_interval' is a GLOBAL variable +Expected error 'Variable is a GLOBAL variable' +SELECT @@GLOBAL.innodb_scrub_log_interval; +@@GLOBAL.innodb_scrub_log_interval +2000 +200 Expected +SELECT innodb_scrub_log_interval; +ERROR 42S22: Unknown column 'innodb_scrub_log_interval' in 'field list' +Expected error 'Unknow column in field list' diff --git a/mysql-test/suite/sys_vars/r/sysvars_aria.result b/mysql-test/suite/sys_vars/r/sysvars_aria.result index c717fdb8203..fdaa20f414c 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_aria.result +++ b/mysql-test/suite/sys_vars/r/sysvars_aria.result @@ -43,6 +43,20 @@ NUMERIC_BLOCK_SIZE 1 ENUM_VALUE_LIST NULL READ_ONLY NO COMMAND_LINE_ARGUMENT REQUIRED +VARIABLE_NAME ARIA_ENCRYPT_TABLES +SESSION_VALUE NULL +GLOBAL_VALUE OFF +GLOBAL_VALUE_ORIGIN COMPILE-TIME +DEFAULT_VALUE OFF +VARIABLE_SCOPE GLOBAL +VARIABLE_TYPE BOOLEAN +VARIABLE_COMMENT Encrypt tables (only for tables with ROW_FORMAT=PAGE (default) and not FIXED/DYNAMIC) +NUMERIC_MIN_VALUE NULL +NUMERIC_MAX_VALUE NULL +NUMERIC_BLOCK_SIZE NULL +ENUM_VALUE_LIST NULL +READ_ONLY NO +COMMAND_LINE_ARGUMENT REQUIRED VARIABLE_NAME ARIA_FORCE_START_AFTER_RECOVERY_FAILURES SESSION_VALUE NULL GLOBAL_VALUE 0 diff --git a/mysql-test/suite/sys_vars/r/sysvars_debug.result b/mysql-test/suite/sys_vars/r/sysvars_debug.result index a46e135af0a..b7f169dc22e 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_debug.result +++ b/mysql-test/suite/sys_vars/r/sysvars_debug.result @@ -57,6 +57,20 @@ NUMERIC_BLOCK_SIZE NULL ENUM_VALUE_LIST NULL READ_ONLY NO COMMAND_LINE_ARGUMENT OPTIONAL +VARIABLE_NAME DEBUG_ENCRYPTION_KEY_VERSION +SESSION_VALUE NULL +GLOBAL_VALUE 0 +GLOBAL_VALUE_ORIGIN COMPILE-TIME +DEFAULT_VALUE 0 +VARIABLE_SCOPE GLOBAL +VARIABLE_TYPE INT UNSIGNED +VARIABLE_COMMENT Encryption key version. Only to be used in internal testing. +NUMERIC_MIN_VALUE 0 +NUMERIC_MAX_VALUE 4294967295 +NUMERIC_BLOCK_SIZE 1 +ENUM_VALUE_LIST NULL +READ_ONLY NO +COMMAND_LINE_ARGUMENT REQUIRED VARIABLE_NAME DEBUG_MUTEX_DEADLOCK_DETECTOR SESSION_VALUE NULL GLOBAL_VALUE ON @@ -99,3 +113,17 @@ NUMERIC_BLOCK_SIZE NULL ENUM_VALUE_LIST NULL READ_ONLY NO COMMAND_LINE_ARGUMENT NULL +VARIABLE_NAME DEBUG_USE_STATIC_ENCRYPTION_KEYS +SESSION_VALUE NULL +GLOBAL_VALUE OFF +GLOBAL_VALUE_ORIGIN COMPILE-TIME +DEFAULT_VALUE OFF +VARIABLE_SCOPE GLOBAL +VARIABLE_TYPE BOOLEAN +VARIABLE_COMMENT Enable use of nonrandom encryption keys. Only to be used in internal testing +NUMERIC_MIN_VALUE NULL +NUMERIC_MAX_VALUE NULL +NUMERIC_BLOCK_SIZE NULL +ENUM_VALUE_LIST OFF,ON +READ_ONLY YES +COMMAND_LINE_ARGUMENT OPTIONAL diff --git a/mysql-test/suite/sys_vars/r/sysvars_innodb.result b/mysql-test/suite/sys_vars/r/sysvars_innodb.result index 3ee4a09b954..966187a61ab 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_innodb.result +++ b/mysql-test/suite/sys_vars/r/sysvars_innodb.result @@ -173,6 +173,62 @@ NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY YES COMMAND_LINE_ARGUMENT REQUIRED +VARIABLE_NAME INNODB_BACKGROUND_SCRUB_DATA_CHECK_INTERVAL +SESSION_VALUE NULL +GLOBAL_VALUE 3600 +GLOBAL_VALUE_ORIGIN COMPILE-TIME +DEFAULT_VALUE 3600 +VARIABLE_SCOPE GLOBAL +VARIABLE_TYPE INT UNSIGNED +VARIABLE_COMMENT check if spaces needs scrubbing every innodb_background_scrub_data_check_interval seconds +NUMERIC_MIN_VALUE 1 +NUMERIC_MAX_VALUE 4294967295 +NUMERIC_BLOCK_SIZE 0 +ENUM_VALUE_LIST NULL +READ_ONLY NO +COMMAND_LINE_ARGUMENT REQUIRED +VARIABLE_NAME INNODB_BACKGROUND_SCRUB_DATA_COMPRESSED +SESSION_VALUE NULL +GLOBAL_VALUE OFF +GLOBAL_VALUE_ORIGIN COMPILE-TIME +DEFAULT_VALUE OFF +VARIABLE_SCOPE GLOBAL +VARIABLE_TYPE BOOLEAN +VARIABLE_COMMENT Enable scrubbing of compressed data by background threads (same as encryption_threads) +NUMERIC_MIN_VALUE NULL +NUMERIC_MAX_VALUE NULL +NUMERIC_BLOCK_SIZE NULL +ENUM_VALUE_LIST NULL +READ_ONLY NO +COMMAND_LINE_ARGUMENT REQUIRED +VARIABLE_NAME INNODB_BACKGROUND_SCRUB_DATA_INTERVAL +SESSION_VALUE NULL +GLOBAL_VALUE 604800 +GLOBAL_VALUE_ORIGIN COMPILE-TIME +DEFAULT_VALUE 604800 +VARIABLE_SCOPE GLOBAL +VARIABLE_TYPE INT UNSIGNED +VARIABLE_COMMENT scrub spaces that were last scrubbed longer than innodb_background_scrub_data_interval seconds ago +NUMERIC_MIN_VALUE 1 +NUMERIC_MAX_VALUE 4294967295 +NUMERIC_BLOCK_SIZE 0 +ENUM_VALUE_LIST NULL +READ_ONLY NO +COMMAND_LINE_ARGUMENT REQUIRED +VARIABLE_NAME INNODB_BACKGROUND_SCRUB_DATA_UNCOMPRESSED +SESSION_VALUE NULL +GLOBAL_VALUE OFF +GLOBAL_VALUE_ORIGIN COMPILE-TIME +DEFAULT_VALUE OFF +VARIABLE_SCOPE GLOBAL +VARIABLE_TYPE BOOLEAN +VARIABLE_COMMENT Enable scrubbing of uncompressed data by background threads (same as encryption_threads) +NUMERIC_MIN_VALUE NULL +NUMERIC_MAX_VALUE NULL +NUMERIC_BLOCK_SIZE NULL +ENUM_VALUE_LIST NULL +READ_ONLY NO +COMMAND_LINE_ARGUMENT REQUIRED VARIABLE_NAME INNODB_BUFFER_POOL_DUMP_AT_SHUTDOWN SESSION_VALUE NULL GLOBAL_VALUE OFF @@ -649,6 +705,76 @@ NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY YES COMMAND_LINE_ARGUMENT OPTIONAL +VARIABLE_NAME INNODB_ENCRYPTION_ROTATE_KEY_AGE +SESSION_VALUE NULL +GLOBAL_VALUE 1 +GLOBAL_VALUE_ORIGIN COMPILE-TIME +DEFAULT_VALUE 1 +VARIABLE_SCOPE GLOBAL +VARIABLE_TYPE INT UNSIGNED +VARIABLE_COMMENT Rotate any page having a key older than this +NUMERIC_MIN_VALUE 0 +NUMERIC_MAX_VALUE 4294967295 +NUMERIC_BLOCK_SIZE 0 +ENUM_VALUE_LIST NULL +READ_ONLY NO +COMMAND_LINE_ARGUMENT REQUIRED +VARIABLE_NAME INNODB_ENCRYPTION_ROTATION_IOPS +SESSION_VALUE NULL +GLOBAL_VALUE 100 +GLOBAL_VALUE_ORIGIN COMPILE-TIME +DEFAULT_VALUE 100 +VARIABLE_SCOPE GLOBAL +VARIABLE_TYPE INT UNSIGNED +VARIABLE_COMMENT Use this many iops for background key rotation +NUMERIC_MIN_VALUE 0 +NUMERIC_MAX_VALUE 4294967295 +NUMERIC_BLOCK_SIZE 0 +ENUM_VALUE_LIST NULL +READ_ONLY NO +COMMAND_LINE_ARGUMENT REQUIRED +VARIABLE_NAME INNODB_ENCRYPTION_THREADS +SESSION_VALUE NULL +GLOBAL_VALUE 0 +GLOBAL_VALUE_ORIGIN COMPILE-TIME +DEFAULT_VALUE 0 +VARIABLE_SCOPE GLOBAL +VARIABLE_TYPE INT UNSIGNED +VARIABLE_COMMENT No of threads performing background key rotation and scrubbing +NUMERIC_MIN_VALUE 0 +NUMERIC_MAX_VALUE 4294967295 +NUMERIC_BLOCK_SIZE 0 +ENUM_VALUE_LIST NULL +READ_ONLY NO +COMMAND_LINE_ARGUMENT REQUIRED +VARIABLE_NAME INNODB_ENCRYPT_LOG +SESSION_VALUE NULL +GLOBAL_VALUE OFF +GLOBAL_VALUE_ORIGIN COMPILE-TIME +DEFAULT_VALUE OFF +VARIABLE_SCOPE GLOBAL +VARIABLE_TYPE BOOLEAN +VARIABLE_COMMENT Enable redo log encryption/decryption. +NUMERIC_MIN_VALUE NULL +NUMERIC_MAX_VALUE NULL +NUMERIC_BLOCK_SIZE NULL +ENUM_VALUE_LIST NULL +READ_ONLY YES +COMMAND_LINE_ARGUMENT OPTIONAL +VARIABLE_NAME INNODB_ENCRYPT_TABLES +SESSION_VALUE NULL +GLOBAL_VALUE OFF +GLOBAL_VALUE_ORIGIN COMPILE-TIME +DEFAULT_VALUE OFF +VARIABLE_SCOPE GLOBAL +VARIABLE_TYPE BOOLEAN +VARIABLE_COMMENT Encrypt all tables in the storage engine +NUMERIC_MIN_VALUE NULL +NUMERIC_MAX_VALUE NULL +NUMERIC_BLOCK_SIZE NULL +ENUM_VALUE_LIST NULL +READ_ONLY NO +COMMAND_LINE_ARGUMENT REQUIRED VARIABLE_NAME INNODB_FAST_SHUTDOWN SESSION_VALUE NULL GLOBAL_VALUE 1 @@ -1055,6 +1181,20 @@ NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY NO COMMAND_LINE_ARGUMENT REQUIRED +VARIABLE_NAME INNODB_IMMEDIATE_SCRUB_DATA_UNCOMPRESSED +SESSION_VALUE NULL +GLOBAL_VALUE OFF +GLOBAL_VALUE_ORIGIN COMPILE-TIME +DEFAULT_VALUE OFF +VARIABLE_SCOPE GLOBAL +VARIABLE_TYPE BOOLEAN +VARIABLE_COMMENT Enable scrubbing of data +NUMERIC_MIN_VALUE NULL +NUMERIC_MAX_VALUE NULL +NUMERIC_BLOCK_SIZE NULL +ENUM_VALUE_LIST NULL +READ_ONLY NO +COMMAND_LINE_ARGUMENT REQUIRED VARIABLE_NAME INNODB_IO_CAPACITY SESSION_VALUE NULL GLOBAL_VALUE 200 @@ -1671,6 +1811,48 @@ NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY NO COMMAND_LINE_ARGUMENT OPTIONAL +VARIABLE_NAME INNODB_SCRUB_FORCE_TESTING +SESSION_VALUE NULL +GLOBAL_VALUE OFF +GLOBAL_VALUE_ORIGIN COMPILE-TIME +DEFAULT_VALUE OFF +VARIABLE_SCOPE GLOBAL +VARIABLE_TYPE BOOLEAN +VARIABLE_COMMENT Perform extra scrubbing to increase test exposure +NUMERIC_MIN_VALUE NULL +NUMERIC_MAX_VALUE NULL +NUMERIC_BLOCK_SIZE NULL +ENUM_VALUE_LIST NULL +READ_ONLY NO +COMMAND_LINE_ARGUMENT REQUIRED +VARIABLE_NAME INNODB_SCRUB_LOG +SESSION_VALUE NULL +GLOBAL_VALUE OFF +GLOBAL_VALUE_ORIGIN COMPILE-TIME +DEFAULT_VALUE OFF +VARIABLE_SCOPE GLOBAL +VARIABLE_TYPE BOOLEAN +VARIABLE_COMMENT Enable redo log scrubbing +NUMERIC_MIN_VALUE NULL +NUMERIC_MAX_VALUE NULL +NUMERIC_BLOCK_SIZE NULL +ENUM_VALUE_LIST NULL +READ_ONLY YES +COMMAND_LINE_ARGUMENT OPTIONAL +VARIABLE_NAME INNODB_SCRUB_LOG_INTERVAL +SESSION_VALUE NULL +GLOBAL_VALUE 2000 +GLOBAL_VALUE_ORIGIN COMPILE-TIME +DEFAULT_VALUE 2000 +VARIABLE_SCOPE GLOBAL +VARIABLE_TYPE BIGINT UNSIGNED +VARIABLE_COMMENT Innodb redo log scrubbing interval in ms +NUMERIC_MIN_VALUE 10 +NUMERIC_MAX_VALUE 18446744073709551615 +NUMERIC_BLOCK_SIZE 0 +ENUM_VALUE_LIST NULL +READ_ONLY NO +COMMAND_LINE_ARGUMENT OPTIONAL VARIABLE_NAME INNODB_SIMULATE_COMP_FAILURES SESSION_VALUE NULL GLOBAL_VALUE 0 diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result index 2bf6814633f..48e5fd621e9 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result +++ b/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result @@ -679,6 +679,34 @@ NUMERIC_BLOCK_SIZE 1 ENUM_VALUE_LIST NULL READ_ONLY NO COMMAND_LINE_ARGUMENT REQUIRED +VARIABLE_NAME ENCRYPTION_ALGORITHM +SESSION_VALUE NULL +GLOBAL_VALUE none +GLOBAL_VALUE_ORIGIN COMPILE-TIME +DEFAULT_VALUE none +VARIABLE_SCOPE GLOBAL +VARIABLE_TYPE ENUM +VARIABLE_COMMENT Which encryption algorithm to use for table encryption. aes_cbc is the recommended one. +NUMERIC_MIN_VALUE NULL +NUMERIC_MAX_VALUE NULL +NUMERIC_BLOCK_SIZE NULL +ENUM_VALUE_LIST none,aes_ecb,aes_cbc,aes_ctr +READ_ONLY YES +COMMAND_LINE_ARGUMENT REQUIRED +VARIABLE_NAME ENCRYPT_TMP_DISK_TABLES +SESSION_VALUE NULL +GLOBAL_VALUE OFF +GLOBAL_VALUE_ORIGIN COMPILE-TIME +DEFAULT_VALUE OFF +VARIABLE_SCOPE GLOBAL +VARIABLE_TYPE BOOLEAN +VARIABLE_COMMENT Encrypt tmp disk tables (created as part of query execution) +NUMERIC_MIN_VALUE NULL +NUMERIC_MAX_VALUE NULL +NUMERIC_BLOCK_SIZE NULL +ENUM_VALUE_LIST OFF,ON +READ_ONLY NO +COMMAND_LINE_ARGUMENT OPTIONAL VARIABLE_NAME ERROR_COUNT SESSION_VALUE 0 GLOBAL_VALUE NULL diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result index 0f3b71b1421..00f11a87792 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result +++ b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result @@ -693,6 +693,34 @@ NUMERIC_BLOCK_SIZE 1 ENUM_VALUE_LIST NULL READ_ONLY NO COMMAND_LINE_ARGUMENT REQUIRED +VARIABLE_NAME ENCRYPTION_ALGORITHM +SESSION_VALUE NULL +GLOBAL_VALUE none +GLOBAL_VALUE_ORIGIN COMPILE-TIME +DEFAULT_VALUE none +VARIABLE_SCOPE GLOBAL +VARIABLE_TYPE ENUM +VARIABLE_COMMENT Which encryption algorithm to use for table encryption. aes_cbc is the recommended one. +NUMERIC_MIN_VALUE NULL +NUMERIC_MAX_VALUE NULL +NUMERIC_BLOCK_SIZE NULL +ENUM_VALUE_LIST none,aes_ecb,aes_cbc,aes_ctr +READ_ONLY YES +COMMAND_LINE_ARGUMENT REQUIRED +VARIABLE_NAME ENCRYPT_TMP_DISK_TABLES +SESSION_VALUE NULL +GLOBAL_VALUE OFF +GLOBAL_VALUE_ORIGIN COMPILE-TIME +DEFAULT_VALUE OFF +VARIABLE_SCOPE GLOBAL +VARIABLE_TYPE BOOLEAN +VARIABLE_COMMENT Encrypt tmp disk tables (created as part of query execution) +NUMERIC_MIN_VALUE NULL +NUMERIC_MAX_VALUE NULL +NUMERIC_BLOCK_SIZE NULL +ENUM_VALUE_LIST OFF,ON +READ_ONLY NO +COMMAND_LINE_ARGUMENT OPTIONAL VARIABLE_NAME ERROR_COUNT SESSION_VALUE 0 GLOBAL_VALUE NULL diff --git a/mysql-test/suite/sys_vars/t/aria_encrypt_tables_basic.test b/mysql-test/suite/sys_vars/t/aria_encrypt_tables_basic.test new file mode 100644 index 00000000000..2db0708e925 --- /dev/null +++ b/mysql-test/suite/sys_vars/t/aria_encrypt_tables_basic.test @@ -0,0 +1,41 @@ +# bool global +--source include/have_maria.inc + +SET @start_global_value = @@global.aria_encrypt_tables; + +# +# exists as global only +# +select @@global.aria_encrypt_tables; +--error ER_INCORRECT_GLOBAL_LOCAL_VAR +select @@session.aria_encrypt_tables; +show global variables like 'aria_encrypt_tables'; +show session variables like 'aria_encrypt_tables'; +select * from information_schema.global_variables +where variable_name='aria_encrypt_tables'; +select * from information_schema.session_variables +where variable_name='aria_encrypt_tables'; + +# +# show that it's writable +# +set global aria_encrypt_tables=ON; +select @@global.aria_encrypt_tables; +set global aria_encrypt_tables=OFF; +select @@global.aria_encrypt_tables; +set global aria_encrypt_tables=1; +select @@global.aria_encrypt_tables; +--error ER_GLOBAL_VARIABLE +set session aria_encrypt_tables=1; + +# +# incorrect types +# +--error ER_WRONG_TYPE_FOR_VAR +set global aria_encrypt_tables=1.1; +--error ER_WRONG_TYPE_FOR_VAR +set global aria_encrypt_tables=1e1; +--error ER_WRONG_VALUE_FOR_VAR +set global aria_encrypt_tables="foo"; + +SET @@global.aria_encrypt_tables = @start_global_value; diff --git a/mysql-test/suite/sys_vars/t/debug_encryption_key_version_basic.test b/mysql-test/suite/sys_vars/t/debug_encryption_key_version_basic.test new file mode 100644 index 00000000000..007724b0966 --- /dev/null +++ b/mysql-test/suite/sys_vars/t/debug_encryption_key_version_basic.test @@ -0,0 +1,3 @@ +--source include/have_debug.inc +# This is just to satisfy all_vars +select 1; diff --git a/mysql-test/suite/sys_vars/t/debug_use_static_encryption_keys_basic.test b/mysql-test/suite/sys_vars/t/debug_use_static_encryption_keys_basic.test new file mode 100644 index 00000000000..2e0d51e89b7 --- /dev/null +++ b/mysql-test/suite/sys_vars/t/debug_use_static_encryption_keys_basic.test @@ -0,0 +1,3 @@ +# This is just to satisfy all_vars +--source include/have_debug.inc +show global variables like "debug_use_static_encryption_keys"; diff --git a/mysql-test/suite/sys_vars/t/encrypt_tmp_disk_tables_basic.test b/mysql-test/suite/sys_vars/t/encrypt_tmp_disk_tables_basic.test new file mode 100644 index 00000000000..ee01e6d8ba4 --- /dev/null +++ b/mysql-test/suite/sys_vars/t/encrypt_tmp_disk_tables_basic.test @@ -0,0 +1,41 @@ +# bool global +--source include/have_maria.inc + +SET @start_global_value = @@global.encrypt_tmp_disk_tables; + +# +# exists as global only +# +select @@global.encrypt_tmp_disk_tables; +--error ER_INCORRECT_GLOBAL_LOCAL_VAR +select @@session.encrypt_tmp_disk_tables; +show global variables like 'encrypt_tmp_disk_tables'; +show session variables like 'encrypt_tmp_disk_tables'; +select * from information_schema.global_variables +where variable_name='encrypt_tmp_disk_tables'; +select * from information_schema.session_variables +where variable_name='encrypt_tmp_disk_tables'; + +# +# show that it's writable +# +set global encrypt_tmp_disk_tables=ON; +select @@global.encrypt_tmp_disk_tables; +set global encrypt_tmp_disk_tables=OFF; +select @@global.encrypt_tmp_disk_tables; +set global encrypt_tmp_disk_tables=1; +select @@global.encrypt_tmp_disk_tables; +--error ER_GLOBAL_VARIABLE +set session encrypt_tmp_disk_tables=1; + +# +# incorrect types +# +--error ER_WRONG_TYPE_FOR_VAR +set global encrypt_tmp_disk_tables=1.1; +--error ER_WRONG_TYPE_FOR_VAR +set global encrypt_tmp_disk_tables=1e1; +--error ER_WRONG_VALUE_FOR_VAR +set global encrypt_tmp_disk_tables="foo"; + +SET @@global.encrypt_tmp_disk_tables = @start_global_value; diff --git a/mysql-test/suite/sys_vars/t/encryption_algorithm_basic.test b/mysql-test/suite/sys_vars/t/encryption_algorithm_basic.test new file mode 100644 index 00000000000..065453eba34 --- /dev/null +++ b/mysql-test/suite/sys_vars/t/encryption_algorithm_basic.test @@ -0,0 +1,13 @@ +# bool global + +# exists as global only +# +select @@global.encryption_algorithm; +--error ER_INCORRECT_GLOBAL_LOCAL_VAR +select @@session.encryption_algorithm; + +# +# show that it's not writable +# +--error 1238 +set global encryption_algorithm="none"; diff --git a/mysql-test/suite/sys_vars/t/innodb_background_scrub_data_check_interval_basic.test b/mysql-test/suite/sys_vars/t/innodb_background_scrub_data_check_interval_basic.test new file mode 100644 index 00000000000..77c8671a1bc --- /dev/null +++ b/mysql-test/suite/sys_vars/t/innodb_background_scrub_data_check_interval_basic.test @@ -0,0 +1,54 @@ +# bool global +--source include/have_innodb.inc + +SET @start_global_value = @@global.innodb_background_scrub_data_check_interval; + +--echo # +--echo # default value +--echo # +select @@global.innodb_background_scrub_data_check_interval; +set global innodb_background_scrub_data_check_interval=10; +select @@global.innodb_background_scrub_data_check_interval; +set global innodb_background_scrub_data_check_interval=DEFAULT; +select @@global.innodb_background_scrub_data_check_interval; +set global innodb_background_scrub_data_check_interval=20; +select @@global.innodb_background_scrub_data_check_interval; +set global innodb_background_scrub_data_check_interval=DEFAULT; +select @@global.innodb_background_scrub_data_check_interval; + +--echo # +--echo # exists as global only +--echo # +select @@global.innodb_background_scrub_data_check_interval; +--error ER_INCORRECT_GLOBAL_LOCAL_VAR +select @@session.innodb_background_scrub_data_check_interval; +show global variables like 'innodb_background_scrub_data_check_interval'; +show session variables like 'innodb_background_scrub_data_check_interval'; +select * from information_schema.global_variables +where variable_name='innodb_background_scrub_data_check_interval'; +select * from information_schema.session_variables +where variable_name='innodb_background_scrub_data_check_interval'; + +--echo # +--echo # show that it's writable +--echo # +set global innodb_background_scrub_data_check_interval=10; +select @@global.innodb_background_scrub_data_check_interval; +set global innodb_background_scrub_data_check_interval=20; +select @@global.innodb_background_scrub_data_check_interval; +set global innodb_background_scrub_data_check_interval=1; +select @@global.innodb_background_scrub_data_check_interval; +--error ER_GLOBAL_VARIABLE +set session innodb_background_scrub_data_check_interval=1; + +--echo # +--echo # incorrect types +--echo # +--error ER_WRONG_TYPE_FOR_VAR +set global innodb_background_scrub_data_check_interval=1.1; +--error ER_WRONG_TYPE_FOR_VAR +set global innodb_background_scrub_data_check_interval=1e1; +--error ER_WRONG_TYPE_FOR_VAR +set global innodb_background_scrub_data_check_interval="foo"; + +SET @@global.innodb_background_scrub_data_check_interval = @start_global_value; diff --git a/mysql-test/suite/sys_vars/t/innodb_background_scrub_data_compressed_basic.test b/mysql-test/suite/sys_vars/t/innodb_background_scrub_data_compressed_basic.test new file mode 100644 index 00000000000..fdf77bbb780 --- /dev/null +++ b/mysql-test/suite/sys_vars/t/innodb_background_scrub_data_compressed_basic.test @@ -0,0 +1,41 @@ +# bool global +--source include/have_innodb.inc + +SET @start_global_value = @@global.innodb_background_scrub_data_compressed; + +--echo # +--echo # exists as global only +--echo # +select @@global.innodb_background_scrub_data_compressed; +--error ER_INCORRECT_GLOBAL_LOCAL_VAR +select @@session.innodb_background_scrub_data_compressed; +show global variables like 'innodb_background_scrub_data_compressed'; +show session variables like 'innodb_background_scrub_data_compressed'; +select * from information_schema.global_variables +where variable_name='innodb_background_scrub_data_compressed'; +select * from information_schema.session_variables +where variable_name='innodb_background_scrub_data_compressed'; + +--echo # +--echo # show that it's writable +--echo # +set global innodb_background_scrub_data_compressed=ON; +select @@global.innodb_background_scrub_data_compressed; +set global innodb_background_scrub_data_compressed=OFF; +select @@global.innodb_background_scrub_data_compressed; +set global innodb_background_scrub_data_compressed=1; +select @@global.innodb_background_scrub_data_compressed; +--error ER_GLOBAL_VARIABLE +set session innodb_background_scrub_data_compressed=1; + +--echo # +--echo # incorrect types +--echo # +--error ER_WRONG_TYPE_FOR_VAR +set global innodb_background_scrub_data_compressed=1.1; +--error ER_WRONG_TYPE_FOR_VAR +set global innodb_background_scrub_data_compressed=1e1; +--error ER_WRONG_VALUE_FOR_VAR +set global innodb_background_scrub_data_compressed="foo"; + +SET @@global.innodb_background_scrub_data_compressed = @start_global_value; diff --git a/mysql-test/suite/sys_vars/t/innodb_background_scrub_data_interval_basic.test b/mysql-test/suite/sys_vars/t/innodb_background_scrub_data_interval_basic.test new file mode 100644 index 00000000000..9f1187a97ff --- /dev/null +++ b/mysql-test/suite/sys_vars/t/innodb_background_scrub_data_interval_basic.test @@ -0,0 +1,41 @@ +# bool global +--source include/have_innodb.inc + +SET @start_global_value = @@global.innodb_background_scrub_data_interval; + +--echo # +--echo # exists as global only +--echo # +select @@global.innodb_background_scrub_data_interval; +--error ER_INCORRECT_GLOBAL_LOCAL_VAR +select @@session.innodb_background_scrub_data_interval; +show global variables like 'innodb_background_scrub_data_interval'; +show session variables like 'innodb_background_scrub_data_interval'; +select * from information_schema.global_variables +where variable_name='innodb_background_scrub_data_interval'; +select * from information_schema.session_variables +where variable_name='innodb_background_scrub_data_interval'; + +--echo # +--echo # show that it's writable +--echo # +set global innodb_background_scrub_data_interval=100; +select @@global.innodb_background_scrub_data_interval; +set global innodb_background_scrub_data_interval=200; +select @@global.innodb_background_scrub_data_interval; +set global innodb_background_scrub_data_interval=300; +select @@global.innodb_background_scrub_data_interval; +--error ER_GLOBAL_VARIABLE +set session innodb_background_scrub_data_interval=400; + +--echo # +--echo # incorrect types +--echo # +--error ER_WRONG_TYPE_FOR_VAR +set global innodb_background_scrub_data_interval=1.1; +--error ER_WRONG_TYPE_FOR_VAR +set global innodb_background_scrub_data_interval=1e1; +--error ER_WRONG_TYPE_FOR_VAR +set global innodb_background_scrub_data_interval="foo"; + +SET @@global.innodb_background_scrub_data_interval = @start_global_value; diff --git a/mysql-test/suite/sys_vars/t/innodb_background_scrub_data_uncompressed_basic.test b/mysql-test/suite/sys_vars/t/innodb_background_scrub_data_uncompressed_basic.test new file mode 100644 index 00000000000..76d6535ad81 --- /dev/null +++ b/mysql-test/suite/sys_vars/t/innodb_background_scrub_data_uncompressed_basic.test @@ -0,0 +1,41 @@ +# bool global +--source include/have_innodb.inc + +SET @start_global_value = @@global.innodb_background_scrub_data_uncompressed; + +--echo # +--echo # exists as global only +--echo # +select @@global.innodb_background_scrub_data_uncompressed; +--error ER_INCORRECT_GLOBAL_LOCAL_VAR +select @@session.innodb_background_scrub_data_uncompressed; +show global variables like 'innodb_background_scrub_data_uncompressed'; +show session variables like 'innodb_background_scrub_data_uncompressed'; +select * from information_schema.global_variables +where variable_name='innodb_background_scrub_data_uncompressed'; +select * from information_schema.session_variables +where variable_name='innodb_background_scrub_data_uncompressed'; + +--echo # +--echo # show that it's writable +--echo # +set global innodb_background_scrub_data_uncompressed=ON; +select @@global.innodb_background_scrub_data_uncompressed; +set global innodb_background_scrub_data_uncompressed=OFF; +select @@global.innodb_background_scrub_data_uncompressed; +set global innodb_background_scrub_data_uncompressed=1; +select @@global.innodb_background_scrub_data_uncompressed; +--error ER_GLOBAL_VARIABLE +set session innodb_background_scrub_data_uncompressed=1; + +--echo # +--echo # incorrect types +--echo # +--error ER_WRONG_TYPE_FOR_VAR +set global innodb_background_scrub_data_uncompressed=1.1; +--error ER_WRONG_TYPE_FOR_VAR +set global innodb_background_scrub_data_uncompressed=1e1; +--error ER_WRONG_VALUE_FOR_VAR +set global innodb_background_scrub_data_uncompressed="foo"; + +SET @@global.innodb_background_scrub_data_uncompressed = @start_global_value; diff --git a/mysql-test/suite/sys_vars/t/innodb_checksum_algorithm_basic.test b/mysql-test/suite/sys_vars/t/innodb_checksum_algorithm_basic.test index bb0f3417f87..e7098b7e3b3 100644 --- a/mysql-test/suite/sys_vars/t/innodb_checksum_algorithm_basic.test +++ b/mysql-test/suite/sys_vars/t/innodb_checksum_algorithm_basic.test @@ -1,4 +1,5 @@ --source include/have_innodb.inc +--source include/not_encrypted.inc # Check the default value SET @orig = @@global.innodb_checksum_algorithm; diff --git a/mysql-test/suite/sys_vars/t/innodb_data_encryption_filekey_basic.test b/mysql-test/suite/sys_vars/t/innodb_data_encryption_filekey_basic.test new file mode 100644 index 00000000000..a35be702c25 --- /dev/null +++ b/mysql-test/suite/sys_vars/t/innodb_data_encryption_filekey_basic.test @@ -0,0 +1,14 @@ +--source include/have_innodb.inc +--source include/have_innodb_encryption.inc + +SELECT @start_data_encryption_filekey; + +SELECT COUNT(@@GLOBAL.innodb_data_encryption_filekey); +--echo 1 Expected + +# This variable is read only variable +--error 1238 +SET @@GLOBAL.innodb_data_encryption_filekey='secret'; + + + diff --git a/mysql-test/suite/sys_vars/t/innodb_data_encryption_providername_basic.test b/mysql-test/suite/sys_vars/t/innodb_data_encryption_providername_basic.test new file mode 100644 index 00000000000..80e8282af8f --- /dev/null +++ b/mysql-test/suite/sys_vars/t/innodb_data_encryption_providername_basic.test @@ -0,0 +1,11 @@ +--source include/have_innodb.inc +--source include/have_innodb_encryption.inc + +SELECT @start_data_encryption_providername; + +SELECT COUNT(@@GLOBAL.innodb_data_encryption_providername); +--echo 1 Expected + +# This variable is read only variable +--error 1238 +SET @@GLOBAL.innodb_data_encryption_providername='key.txt'; diff --git a/mysql-test/suite/sys_vars/t/innodb_data_encryption_providertype_basic.test b/mysql-test/suite/sys_vars/t/innodb_data_encryption_providertype_basic.test new file mode 100644 index 00000000000..7e841a27d27 --- /dev/null +++ b/mysql-test/suite/sys_vars/t/innodb_data_encryption_providertype_basic.test @@ -0,0 +1,16 @@ +--source include/have_innodb.inc +--source include/have_innodb_encryption.inc + +SELECT @start_data_encryption_providertype; + +SELECT COUNT(@@GLOBAL.innodb_data_encryption_providertype); +--echo 1 Expected + +# This variable is read only variable +--error 1238 +SET @@GLOBAL.innodb_data_encryption_providertype=1; + +# This variable is read only variable +--error 1238 +SET @@GLOBAL.innodb_data_encryption_providertype=k; + diff --git a/mysql-test/suite/sys_vars/t/innodb_data_encryption_providerurl_basic.test b/mysql-test/suite/sys_vars/t/innodb_data_encryption_providerurl_basic.test new file mode 100644 index 00000000000..d742fe2aa06 --- /dev/null +++ b/mysql-test/suite/sys_vars/t/innodb_data_encryption_providerurl_basic.test @@ -0,0 +1,14 @@ +--source include/have_innodb.inc +--source include/have_innodb_encryption.inc + +SELECT @start_data_encryption_providerurl; + +SELECT COUNT(@@GLOBAL.innodb_data_encryption_providerurl); +--echo 1 Expected + +# This variable is read only variable +--error 1238 +SET @@GLOBAL.innodb_data_encryption_providerurl='http://www.google.com'; + + + diff --git a/mysql-test/suite/sys_vars/t/innodb_encrypt_log_basic.test b/mysql-test/suite/sys_vars/t/innodb_encrypt_log_basic.test new file mode 100644 index 00000000000..775e8a3b944 --- /dev/null +++ b/mysql-test/suite/sys_vars/t/innodb_encrypt_log_basic.test @@ -0,0 +1,50 @@ +--source include/have_innodb.inc + +# Display default value +SELECT @@GLOBAL.innodb_encrypt_log; +--echo 0 Expected + +# Check if value can be set +--error ER_INCORRECT_GLOBAL_LOCAL_VAR +SET @@GLOBAL.innodb_encrypt_log=1; +--echo Expected error 'Read only variable' + +SELECT @@GLOBAL.innodb_encrypt_log; +--echo 0 Expected + +# Check if the value in GLOBAL TABLE matches value in variable +SELECT IF(@@GLOBAL.innodb_encrypt_log, 'ON', 'OFF') = VARIABLE_VALUE +FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES +WHERE VARIABLE_NAME='innodb_encrypt_log'; +--echo 1 Expected + +SELECT @@GLOBAL.innodb_encrypt_log; +--echo 0 Expected + +SELECT VARIABLE_VALUE +FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES +WHERE VARIABLE_NAME='innodb_encrypt_log'; +--echo 0 Expected + +# Check if accessing variable with and without GLOBAL point to same variable +SELECT @@innodb_encrypt_log = @@GLOBAL.innodb_encrypt_log; +--echo 1 Expected + +# Check if innodb_encrypt_log can be accessed with and without @@ sign +SELECT @@innodb_encrypt_log; +--echo 0 Expected + +--Error ER_INCORRECT_GLOBAL_LOCAL_VAR +SELECT COUNT(@@local.innodb_encrypt_log); +--echo Expected error 'Variable is a GLOBAL variable' + +--Error ER_INCORRECT_GLOBAL_LOCAL_VAR +SELECT COUNT(@@SESSION.innodb_encrypt_log); +--echo Expected error 'Variable is a GLOBAL variable' + +SELECT @@GLOBAL.innodb_encrypt_log; +--echo 0 Expected + +--Error ER_BAD_FIELD_ERROR +SELECT innodb_encrypt_log; +--echo Expected error 'Unknown column in field list' diff --git a/mysql-test/suite/sys_vars/t/innodb_encrypt_tables_basic.test b/mysql-test/suite/sys_vars/t/innodb_encrypt_tables_basic.test new file mode 100644 index 00000000000..6d6c6b94c0f --- /dev/null +++ b/mysql-test/suite/sys_vars/t/innodb_encrypt_tables_basic.test @@ -0,0 +1,41 @@ +# bool global +--source include/have_innodb.inc + +SET @start_global_value = @@global.innodb_encrypt_tables; + +# +# exists as global only +# +select @@global.innodb_encrypt_tables; +--error ER_INCORRECT_GLOBAL_LOCAL_VAR +select @@session.innodb_encrypt_tables; +show global variables like 'innodb_encrypt_tables'; +show session variables like 'innodb_encrypt_tables'; +select * from information_schema.global_variables +where variable_name='innodb_encrypt_tables'; +select * from information_schema.session_variables +where variable_name='innodb_encrypt_tables'; + +# +# show that it's writable +# +set global innodb_encrypt_tables=ON; +select @@global.innodb_encrypt_tables; +set global innodb_encrypt_tables=OFF; +select @@global.innodb_encrypt_tables; +set global innodb_encrypt_tables=1; +select @@global.innodb_encrypt_tables; +--error ER_GLOBAL_VARIABLE +set session innodb_encrypt_tables=1; + +# +# incorrect types +# +--error ER_WRONG_TYPE_FOR_VAR +set global innodb_encrypt_tables=1.1; +--error ER_WRONG_TYPE_FOR_VAR +set global innodb_encrypt_tables=1e1; +--error ER_WRONG_VALUE_FOR_VAR +set global innodb_encrypt_tables="foo"; + +SET @@global.innodb_encrypt_tables = @start_global_value; diff --git a/mysql-test/suite/sys_vars/t/innodb_encryption_rotate_key_age_basic.test b/mysql-test/suite/sys_vars/t/innodb_encryption_rotate_key_age_basic.test new file mode 100644 index 00000000000..51112886130 --- /dev/null +++ b/mysql-test/suite/sys_vars/t/innodb_encryption_rotate_key_age_basic.test @@ -0,0 +1,41 @@ +# bool global +--source include/have_innodb.inc + +SET @start_global_value = @@global.innodb_encryption_rotate_key_age; + +# +# exists as global only +# +select @@global.innodb_encryption_rotate_key_age; +--error ER_INCORRECT_GLOBAL_LOCAL_VAR +select @@session.innodb_encryption_rotate_key_age; +show global variables like 'innodb_encryption_rotate_key_age'; +show session variables like 'innodb_encryption_rotate_key_age'; +select * from information_schema.global_variables +where variable_name='innodb_encryption_rotate_key_age'; +select * from information_schema.session_variables +where variable_name='innodb_encryption_rotate_key_age'; + +# +# show that it's writable +# +set global innodb_encryption_rotate_key_age=1; +select @@global.innodb_encryption_rotate_key_age; +set global innodb_encryption_rotate_key_age=2; +select @@global.innodb_encryption_rotate_key_age; +set global innodb_encryption_rotate_key_age=1; +select @@global.innodb_encryption_rotate_key_age; +--error ER_GLOBAL_VARIABLE +set session innodb_encryption_rotate_key_age=1; + +# +# incorrect types +# +--error ER_WRONG_TYPE_FOR_VAR +set global innodb_encryption_rotate_key_age=1.1; +--error ER_WRONG_TYPE_FOR_VAR +set global innodb_encryption_rotate_key_age=1e1; +--error ER_WRONG_TYPE_FOR_VAR +set global innodb_encryption_rotate_key_age="foo"; + +SET @@global.innodb_encryption_rotate_key_age = @start_global_value; diff --git a/mysql-test/suite/sys_vars/t/innodb_encryption_rotation_iops_basic.test b/mysql-test/suite/sys_vars/t/innodb_encryption_rotation_iops_basic.test new file mode 100644 index 00000000000..cdbada6c5c8 --- /dev/null +++ b/mysql-test/suite/sys_vars/t/innodb_encryption_rotation_iops_basic.test @@ -0,0 +1,41 @@ +# bool global +--source include/have_innodb.inc + +SET @start_global_value = @@global.innodb_encryption_rotation_iops; + +# +# exists as global only +# +select @@global.innodb_encryption_rotation_iops; +--error ER_INCORRECT_GLOBAL_LOCAL_VAR +select @@session.innodb_encryption_rotation_iops; +show global variables like 'innodb_encryption_rotation_iops'; +show session variables like 'innodb_encryption_rotation_iops'; +select * from information_schema.global_variables +where variable_name='innodb_encryption_rotation_iops'; +select * from information_schema.session_variables +where variable_name='innodb_encryption_rotation_iops'; + +# +# show that it's writable +# +set global innodb_encryption_rotation_iops=100; +select @@global.innodb_encryption_rotation_iops; +set global innodb_encryption_rotation_iops=50; +select @@global.innodb_encryption_rotation_iops; +set global innodb_encryption_rotation_iops=100; +select @@global.innodb_encryption_rotation_iops; +--error ER_GLOBAL_VARIABLE +set session innodb_encryption_rotation_iops=50; + +# +# incorrect types +# +--error ER_WRONG_TYPE_FOR_VAR +set global innodb_encryption_rotation_iops=1.1; +--error ER_WRONG_TYPE_FOR_VAR +set global innodb_encryption_rotation_iops=1e1; +--error ER_WRONG_TYPE_FOR_VAR +set global innodb_encryption_rotation_iops="foo"; + +SET @@global.innodb_encryption_rotation_iops = @start_global_value; diff --git a/mysql-test/suite/sys_vars/t/innodb_encryption_threads_basic.test b/mysql-test/suite/sys_vars/t/innodb_encryption_threads_basic.test new file mode 100644 index 00000000000..c3e163faef4 --- /dev/null +++ b/mysql-test/suite/sys_vars/t/innodb_encryption_threads_basic.test @@ -0,0 +1,41 @@ +# bool global +--source include/have_innodb.inc + +SET @start_global_value = @@global.innodb_encryption_threads; + +# +# exists as global only +# +select @@global.innodb_encryption_threads; +--error ER_INCORRECT_GLOBAL_LOCAL_VAR +select @@session.innodb_encryption_threads; +show global variables like 'innodb_encryption_threads'; +show session variables like 'innodb_encryption_threads'; +select * from information_schema.global_variables +where variable_name='innodb_encryption_threads'; +select * from information_schema.session_variables +where variable_name='innodb_encryption_threads'; + +# +# show that it's writable +# +set global innodb_encryption_threads=0; +select @@global.innodb_encryption_threads; +set global innodb_encryption_threads=5; +select @@global.innodb_encryption_threads; +set global innodb_encryption_threads=1; +select @@global.innodb_encryption_threads; +--error ER_GLOBAL_VARIABLE +set session innodb_encryption_threads=1; + +# +# incorrect types +# +--error ER_WRONG_TYPE_FOR_VAR +set global innodb_encryption_threads=1.1; +--error ER_WRONG_TYPE_FOR_VAR +set global innodb_encryption_threads=1e1; +--error ER_WRONG_TYPE_FOR_VAR +set global innodb_encryption_threads="foo"; + +SET @@global.innodb_encryption_threads = @start_global_value; diff --git a/mysql-test/suite/sys_vars/t/innodb_immediate_scrub_data_uncompressed_basic.test b/mysql-test/suite/sys_vars/t/innodb_immediate_scrub_data_uncompressed_basic.test new file mode 100644 index 00000000000..f3a3fba153a --- /dev/null +++ b/mysql-test/suite/sys_vars/t/innodb_immediate_scrub_data_uncompressed_basic.test @@ -0,0 +1,41 @@ +# bool global +--source include/have_innodb.inc + +SET @start_global_value = @@global.innodb_immediate_scrub_data_uncompressed; + +--echo # +--echo # exists as global only +--echo # +select @@global.innodb_immediate_scrub_data_uncompressed; +--error ER_INCORRECT_GLOBAL_LOCAL_VAR +select @@session.innodb_immediate_scrub_data_uncompressed; +show global variables like 'innodb_immediate_scrub_data_uncompressed'; +show session variables like 'innodb_immediate_scrub_data_uncompressed'; +select * from information_schema.global_variables +where variable_name='innodb_immediate_scrub_data_uncompressed'; +select * from information_schema.session_variables +where variable_name='innodb_immediate_scrub_data_uncompressed'; + +--echo # +--echo # show that it's writable +--echo # +set global innodb_immediate_scrub_data_uncompressed=ON; +select @@global.innodb_immediate_scrub_data_uncompressed; +set global innodb_immediate_scrub_data_uncompressed=OFF; +select @@global.innodb_immediate_scrub_data_uncompressed; +set global innodb_immediate_scrub_data_uncompressed=1; +select @@global.innodb_immediate_scrub_data_uncompressed; +--error ER_GLOBAL_VARIABLE +set session innodb_immediate_scrub_data_uncompressed=1; + +--echo # +--echo # incorrect types +--echo # +--error ER_WRONG_TYPE_FOR_VAR +set global innodb_immediate_scrub_data_uncompressed=1.1; +--error ER_WRONG_TYPE_FOR_VAR +set global innodb_immediate_scrub_data_uncompressed=1e1; +--error ER_WRONG_VALUE_FOR_VAR +set global innodb_immediate_scrub_data_uncompressed="foo"; + +SET @@global.innodb_immediate_scrub_data_uncompressed = @start_global_value; diff --git a/mysql-test/suite/sys_vars/t/innodb_scrub_force_testing_basic.test b/mysql-test/suite/sys_vars/t/innodb_scrub_force_testing_basic.test new file mode 100644 index 00000000000..e2df0de4e28 --- /dev/null +++ b/mysql-test/suite/sys_vars/t/innodb_scrub_force_testing_basic.test @@ -0,0 +1,42 @@ +# bool global +--source include/have_innodb.inc +--source include/have_debug.inc + +SET @start_global_value = @@global.innodb_scrub_force_testing; + +--echo # +--echo # exists as global only +--echo # +select @@global.innodb_scrub_force_testing; +--error ER_INCORRECT_GLOBAL_LOCAL_VAR +select @@session.innodb_scrub_force_testing; +show global variables like 'innodb_scrub_force_testing'; +show session variables like 'innodb_scrub_force_testing'; +select * from information_schema.global_variables +where variable_name='innodb_scrub_force_testing'; +select * from information_schema.session_variables +where variable_name='innodb_scrub_force_testing'; + +--echo # +--echo # show that it's writable +--echo # +set global innodb_scrub_force_testing=ON; +select @@global.innodb_scrub_force_testing; +set global innodb_scrub_force_testing=OFF; +select @@global.innodb_scrub_force_testing; +set global innodb_scrub_force_testing=1; +select @@global.innodb_scrub_force_testing; +--error ER_GLOBAL_VARIABLE +set session innodb_scrub_force_testing=1; + +--echo # +--echo # incorrect types +--echo # +--error ER_WRONG_TYPE_FOR_VAR +set global innodb_scrub_force_testing=1.1; +--error ER_WRONG_TYPE_FOR_VAR +set global innodb_scrub_force_testing=1e1; +--error ER_WRONG_VALUE_FOR_VAR +set global innodb_scrub_force_testing="foo"; + +SET @@global.innodb_scrub_force_testing = @start_global_value; diff --git a/mysql-test/suite/sys_vars/t/innodb_scrub_log_basic.test b/mysql-test/suite/sys_vars/t/innodb_scrub_log_basic.test new file mode 100644 index 00000000000..302f2a963c4 --- /dev/null +++ b/mysql-test/suite/sys_vars/t/innodb_scrub_log_basic.test @@ -0,0 +1,50 @@ +--source include/have_innodb.inc + +# Display default value +SELECT @@GLOBAL.innodb_scrub_log; +--echo 0 Expected + +# Check if value can be set +--error ER_INCORRECT_GLOBAL_LOCAL_VAR +SET @@GLOBAL.innodb_scrub_log=1; +--echo Expected error 'Read only variable' + +SELECT @@GLOBAL.innodb_scrub_log; +--echo 0 Expected + +# Check if the value in GLOBAL TABLE matches value in variable +SELECT IF(@@GLOBAL.innodb_scrub_log, 'ON', 'OFF') = VARIABLE_VALUE +FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES +WHERE VARIABLE_NAME='innodb_scrub_log'; +--echo 1 Expected + +SELECT @@GLOBAL.innodb_scrub_log; +--echo 0 Expected + +SELECT VARIABLE_VALUE +FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES +WHERE VARIABLE_NAME='innodb_scrub_log'; +--echo 0 Expected + +# Check if accessing variable with and without GLOBAL point to same variable +SELECT @@innodb_scrub_log = @@GLOBAL.innodb_scrub_log; +--echo 1 Expected + +# Check if innodb_scrub_log can be accessed with and without @@ sign +SELECT @@innodb_scrub_log; +--echo 0 Expected + +--Error ER_INCORRECT_GLOBAL_LOCAL_VAR +SELECT @@local.innodb_scrub_log; +--echo Expected error 'Variable is a GLOBAL variable' + +--Error ER_INCORRECT_GLOBAL_LOCAL_VAR +SELECT @@SESSION.innodb_scrub_log; +--echo Expected error 'Variable is a GLOBAL variable' + +SELECT @@GLOBAL.innodb_scrub_log; +--echo 0 Expected + +--Error ER_BAD_FIELD_ERROR +SELECT innodb_scrub_log; +--echo Expected error 'Unknow column in field list' diff --git a/mysql-test/suite/sys_vars/t/innodb_scrub_log_interval_basic.test b/mysql-test/suite/sys_vars/t/innodb_scrub_log_interval_basic.test new file mode 100644 index 00000000000..e8d4f1bc737 --- /dev/null +++ b/mysql-test/suite/sys_vars/t/innodb_scrub_log_interval_basic.test @@ -0,0 +1,55 @@ +--source include/have_innodb.inc + +# Display default value +SELECT @@GLOBAL.innodb_scrub_log_interval; +--echo 200 Expected + +# Check if value can be set +SET @@GLOBAL.innodb_scrub_log_interval=100; +--echo 1 Expected + +SELECT @@GLOBAL.innodb_scrub_log_interval; +--echo 100 Expected + +SET @@GLOBAL.innodb_scrub_log_interval=DEFAULT; +--echo 1 Expected + +SELECT @@GLOBAL.innodb_scrub_log_interval; +--echo 200 Expected + +# Check if the value in GLOBAL TABLE matches value in variable +SELECT @@GLOBAL.innodb_scrub_log_interval = VARIABLE_VALUE +FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES +WHERE VARIABLE_NAME='innodb_scrub_log_interval'; +--echo 1 Expected + +SELECT @@GLOBAL.innodb_scrub_log_interval; +--echo 200 Expected + +SELECT VARIABLE_VALUE +FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES +WHERE VARIABLE_NAME='innodb_scrub_log_interval'; +--echo 200 Expected + +# Check if accessing variable with and without GLOBAL point to same variable +SELECT @@innodb_scrub_log_interval = @@GLOBAL.innodb_scrub_log_interval; +--echo 1 Expected + +# Check if innodb_scrub_log_interval can be accessed with and without @@ sign +SELECT @@innodb_scrub_log_interval; +--echo 200 Expected + +--Error ER_INCORRECT_GLOBAL_LOCAL_VAR +SELECT @@local.innodb_scrub_log_interval; +--echo Expected error 'Variable is a GLOBAL variable' + +--Error ER_INCORRECT_GLOBAL_LOCAL_VAR +SELECT @@SESSION.innodb_scrub_log_interval; +--echo Expected error 'Variable is a GLOBAL variable' + +SELECT @@GLOBAL.innodb_scrub_log_interval; +--echo 200 Expected + +--Error ER_BAD_FIELD_ERROR +SELECT innodb_scrub_log_interval; +--echo Expected error 'Unknow column in field list' diff --git a/mysql-test/t/information_schema_all_engines-master.opt b/mysql-test/t/information_schema_all_engines-master.opt index e37aeaac933..dec3b51813f 100644 --- a/mysql-test/t/information_schema_all_engines-master.opt +++ b/mysql-test/t/information_schema_all_engines-master.opt @@ -13,3 +13,5 @@ --loose-innodb-sys-foreign-cols --loose-innodb-sys-tables --loose-innodb-sys-tablestats +--loose-innodb-tablespaces-encryption +--loose-innodb-tablespaces-scrubbing diff --git a/mysql-test/t/mysqld--help.test b/mysql-test/t/mysqld--help.test index 83f58171333..3ffcd31092d 100644 --- a/mysql-test/t/mysqld--help.test +++ b/mysql-test/t/mysqld--help.test @@ -30,7 +30,8 @@ perl; thread-concurrency super-large-pages mutex-deadlock-detector connect null-audit aria oqgraph sphinx thread-handling test-sql-discovery rpl-semi-sync query-cache-info - query-response-time metadata-lock-info locales wsrep/; + query-response-time metadata-lock-info locales wsrep + file-key-management/; # And substitute the content some environment variables with their # names: diff --git a/mysys/CMakeLists.txt b/mysys/CMakeLists.txt index d432c22b966..0fd3eb12657 100644 --- a/mysys/CMakeLists.txt +++ b/mysys/CMakeLists.txt @@ -70,7 +70,7 @@ IF(HAVE_MLOCK) ENDIF() ADD_CONVENIENCE_LIBRARY(mysys ${MYSYS_SOURCES}) -TARGET_LINK_LIBRARIES(mysys dbug strings ${ZLIB_LIBRARY} +TARGET_LINK_LIBRARIES(mysys dbug strings mysys_ssl ${ZLIB_LIBRARY} ${LIBNSL} ${LIBM} ${LIBRT} ${LIBSOCKET} ${LIBEXECINFO}) DTRACE_INSTRUMENT(mysys) diff --git a/mysys/my_default.c b/mysys/my_default.c index 87258a3b27e..e40e24fd5bc 100644 --- a/mysys/my_default.c +++ b/mysys/my_default.c @@ -102,8 +102,7 @@ static const char *f_extensions[]= { ".cnf", 0 }; #define NEWLINE "\n" #endif -static int handle_default_option(void *in_ctx, const char *group_name, - const char *option); +static int handle_default_option(void *, const char *, const char *); /* This structure defines the context that we pass to callback @@ -917,7 +916,7 @@ static int search_default_file_with_ext(Process_option_func opt_handler, end= remove_end_comment(ptr); if ((value= strchr(ptr, '='))) - end= value; /* Option without argument */ + end= value; for ( ; my_isspace(&my_charset_latin1,end[-1]) ; end--) ; if (!value) { diff --git a/mysys/my_thr_init.c b/mysys/my_thr_init.c index d9dbacc8524..d49a2eff970 100644 --- a/mysys/my_thr_init.c +++ b/mysys/my_thr_init.c @@ -22,6 +22,7 @@ #include "mysys_priv.h" #include <m_string.h> #include <signal.h> +#include <my_crypt_key_management.h> pthread_key(struct st_my_thread_var*, THR_KEY_mysys); mysql_mutex_t THR_LOCK_malloc, THR_LOCK_open, @@ -65,6 +66,10 @@ static void my_thread_init_common_mutex(void) #if !defined(HAVE_LOCALTIME_R) || !defined(HAVE_GMTIME_R) mysql_mutex_init(key_LOCK_localtime_r, &LOCK_localtime_r, MY_MUTEX_INIT_SLOW); #endif +#ifndef DBUG_OFF + mysql_rwlock_init(key_LOCK_dbug_encryption_key_version, + &LOCK_dbug_encryption_key_version); +#endif } void my_thread_destroy_common_mutex(void) @@ -79,6 +84,9 @@ void my_thread_destroy_common_mutex(void) #if !defined(HAVE_LOCALTIME_R) || !defined(HAVE_GMTIME_R) mysql_mutex_destroy(&LOCK_localtime_r); #endif +#ifndef DBUG_OFF + mysql_rwlock_destroy(&LOCK_dbug_encryption_key_version); +#endif } diff --git a/mysys_ssl/CMakeLists.txt b/mysys_ssl/CMakeLists.txt index 45867095e4a..05b04b4d8cb 100644 --- a/mysys_ssl/CMakeLists.txt +++ b/mysys_ssl/CMakeLists.txt @@ -39,6 +39,9 @@ SET(MYSYS_SSL_SOURCES my_sha2.cc my_md5.cc my_rnd.cc + my_crypt.cc + my_crypt_key_management.cc + my_crypt_key_management_impl.cc ) ADD_CONVENIENCE_LIBRARY(mysys_ssl ${MYSYS_SSL_SOURCES}) diff --git a/mysys_ssl/my_aes.cc b/mysys_ssl/my_aes.cc index 9327bc32a3b..20bd03551c2 100644 --- a/mysys_ssl/my_aes.cc +++ b/mysys_ssl/my_aes.cc @@ -17,13 +17,17 @@ #include <my_global.h> #include <m_string.h> #include <my_aes.h> +#include <my_crypt.h> #if defined(HAVE_YASSL) #include "aes.hpp" #include "openssl/ssl.h" +#include "crypto_wrapper.hpp" #elif defined(HAVE_OPENSSL) #include <openssl/aes.h> #include <openssl/evp.h> +#include <openssl/buffer.h> +#include <openssl/conf.h> // Wrap C struct, to ensure resources are released. struct MyCipherCtx @@ -37,11 +41,6 @@ struct MyCipherCtx enum encrypt_dir { MY_AES_ENCRYPT, MY_AES_DECRYPT }; -#define MY_AES_BLOCK_SIZE 16 /* Block size in bytes */ - -/* If bad data discovered during decoding */ -#define AES_BAD_DATA -1 - /** This is internal function just keeps joint code of Key generation @@ -101,28 +100,796 @@ static int my_aes_create_key(const char *key, int key_length, uint8 *rkey) return 0; } +/** + Decode Hexencoded String to uint8[]. + + SYNOPSIS + my_aes_hex2uint() + @param iv [in] Pointer to hexadecimal encoded IV String + @param dest [out] Pointer to output uint8 array. Memory allocated by caller + @param iv_length [in] Size of destination array. + */ + +void my_aes_hex2uint(const char* in, unsigned char *out, int dest_length) +{ + const char *pos= in; + int count; + for (count = 0; count < dest_length; count++) + { + uchar res; + sscanf(pos, "%2hhx", &res); + out[count] = res; + pos += 2 * sizeof(char); + } +} + + +/** + Calculate key and iv from a given salt and secret as it is handled + in openssl encrypted files via console + + SYNOPSIS + my_bytes_to_key() + @param salt [in] the given salt as extracted from the encrypted file + @param secret [in] the given secret as String, provided by the user + @param key [out] 32 Bytes of key are written to this pointer + @param iv [out] 16 Bytes of iv are written to this pointer +*/ + +void my_bytes_to_key(const unsigned char *salt, const char *secret, unsigned char *key, + unsigned char *iv) +{ +#ifdef HAVE_YASSL + /* the yassl function has no support for SHA1. Reason unknown. */ + int keyLen = 32; + int ivLen = 16; + int EVP_SALT_SZ = 8; + const int SHA_LEN = 20; + yaSSL::SHA myMD; + uint digestSz = myMD.get_digestSize(); + unsigned char digest[SHA_LEN]; // max size + int sz = strlen(secret); + int count = 1; + int keyLeft = keyLen; + int ivLeft = ivLen; + int keyOutput = 0; + + while (keyOutput < (keyLen + ivLen)) + { + int digestLeft = digestSz; + if (keyOutput) // first time D_0 is empty + myMD.update(digest, digestSz); + myMD.update((yaSSL::byte* )secret, sz); + if (salt) + myMD.update(salt, EVP_SALT_SZ); + myMD.get_digest(digest); + for (int j = 1; j < count; j++) + { + myMD.update(digest, digestSz); + myMD.get_digest(digest); + } + + if (keyLeft) + { + int store = MY_MIN(keyLeft, static_cast<int>(digestSz)); + memcpy(&key[keyLen - keyLeft], digest, store); + + keyOutput += store; + keyLeft -= store; + digestLeft -= store; + } + + if (ivLeft && digestLeft) + { + int store = MY_MIN(ivLeft, digestLeft); + memcpy(&iv[ivLen - ivLeft], &digest[digestSz - digestLeft], store); + + keyOutput += store; + ivLeft -= store; + } + } +#elif defined(HAVE_OPENSSL) + const EVP_CIPHER *type = EVP_aes_256_cbc(); + const EVP_MD *digest = EVP_sha1(); + EVP_BytesToKey(type, digest, salt, (uchar*) secret, strlen(secret), 1, key, iv); +#endif +} /** - Crypt buffer with AES encryption algorithm. + Crypt buffer with AES CBC encryption algorithm. SYNOPSIS - my_aes_encrypt() + my_aes_encrypt_cbc() @param source [in] Pointer to data for encryption @param source_length [in] Size of encryption data @param dest [out] Buffer to place encrypted data (must be large enough) + @param dest_length [out] Pointer to size of encrypted data @param key [in] Key to be used for encryption - @param key_length [in] Length of the key. Will handle keys of any length + @param key_length [in] Length of the key. 16, 24 or 32 + @param iv [in] Iv to be used for encryption + @param iv_length [in] Length of the iv. should be 16. + @param noPadding [in] if set to true, no padding is used. if the input length is not a + multiple of the AES block size, trailing bytes are only copied to destination buffer. + This allows currently the same interface for CBC, ECB and CTR encryption. + @return + != 0 error + 0 no error +*/ + +static int my_aes_encrypt_cbc(const uchar* source, uint32 source_length, + uchar* dest, uint32* dest_length, + const unsigned char* key, uint8 key_length, + const unsigned char* iv, uint8 iv_length, + uint noPadding) +{ + uint8 remaining_bytes = (noPadding == 0) ? 0 : source_length % MY_AES_BLOCK_SIZE; + source_length = source_length - remaining_bytes; + +#ifdef HAVE_YASSL + TaoCrypt::AES_CBC_Encryption enc; + /* 128 bit block used for padding */ + uint8 block[MY_AES_BLOCK_SIZE]; + int num_blocks; /* number of complete blocks */ + int i; + switch(key_length) { + case 16: + break; + case 24: + break; + case 32: + break; + default: + return AES_BAD_KEYSIZE; + } + + enc.SetKey((const TaoCrypt::byte *) key, key_length, (const TaoCrypt::byte *) iv); + + num_blocks = source_length / MY_AES_BLOCK_SIZE; + + for (i = num_blocks; i > 0; i--) /* Encode complete blocks */ + { + enc.Process((TaoCrypt::byte *) dest, (const TaoCrypt::byte *) source, + MY_AES_BLOCK_SIZE); + source += MY_AES_BLOCK_SIZE; + dest += MY_AES_BLOCK_SIZE; + } + + if (noPadding) { + if (remaining_bytes!=0) { + memcpy(dest + source_length, source + source_length, remaining_bytes); + } + *dest_length = MY_AES_BLOCK_SIZE * (num_blocks) + remaining_bytes; + return AES_OK; + + } + + /* Encode the rest. We always have incomplete block */ + char pad_len = MY_AES_BLOCK_SIZE - (source_length - + MY_AES_BLOCK_SIZE * num_blocks); + memcpy(block, source, 16 - pad_len); + memset(block + MY_AES_BLOCK_SIZE - pad_len, pad_len, pad_len); + + enc.Process((TaoCrypt::byte *) dest, (const TaoCrypt::byte *) block, + MY_AES_BLOCK_SIZE); + + *dest_length = MY_AES_BLOCK_SIZE * (num_blocks + 1); + return AES_OK; +#elif defined(HAVE_OPENSSL) + MyCipherCtx ctx; + int u_len, f_len; + /* The real key to be used for encryption */ + const EVP_CIPHER* cipher; + switch(key_length) { + case 16: + cipher = EVP_aes_128_cbc(); + break; + case 24: + cipher = EVP_aes_192_cbc(); + break; + case 32: + cipher = EVP_aes_256_cbc(); + break; + default: + return AES_BAD_KEYSIZE; + } + //Initialize Encryption Engine here, default software Engine is default + ENGINE *engine = NULL; + + if (! EVP_EncryptInit_ex(&ctx.ctx, cipher, engine, key, iv)) + return AES_BAD_DATA; /* Error */ + if (noPadding) { + EVP_CIPHER_CTX_set_padding(&ctx.ctx, 0); + } + EVP_CIPHER_CTX_key_length(&ctx.ctx); + OPENSSL_assert(EVP_CIPHER_CTX_key_length(&ctx.ctx) == key_length); + OPENSSL_assert(EVP_CIPHER_CTX_iv_length(&ctx.ctx) == iv_length); + OPENSSL_assert(EVP_CIPHER_CTX_block_size(&ctx.ctx) == 16); + if (! EVP_EncryptUpdate(&ctx.ctx, (unsigned char *) dest, &u_len, + (unsigned const char *) source, source_length)) + return AES_BAD_DATA; /* Error */ + if (! EVP_EncryptFinal_ex(&ctx.ctx, (unsigned char *) dest + u_len, &f_len)) + return AES_BAD_DATA; /* Error */ + + if (remaining_bytes!=0) { + memcpy(dest + source_length, source + source_length, remaining_bytes); + } + *dest_length = (unsigned long int) (u_len + f_len + remaining_bytes); + + return AES_OK; +#else + /* currently Open SSL is required */ + return AES_BAD_DATA; +#endif +} + + +/** + Crypt buffer with AES ECB encryption algorithm. + SYNOPSIS + my_aes_encrypt_ecb() + @param source [in] Pointer to data for encryption + @param source_length [in] Size of encryption data + @param dest [out] Buffer to place encrypted data (must be large enough) + @param dest_length [out] Pointer to size of encrypted data + @param key [in] Key to be used for encryption + @param key_length [in] Length of the key. 16, 24 or 32 + @param iv [in] Iv to be used for encryption + @param iv_length [in] Length of the iv. should be 16. + @param noPadding [in] if set to true, no padding is used. if the input length is not a + multiple of the AES block size, trailing bytes are only copied to destination buffer. + This allows currently the same interface for CBC, ECB and CTR encryption. @return - >= 0 Size of encrypted data - < 0 Error + != 0 error + 0 no error +*/ + +static int my_aes_encrypt_ecb(const uchar* source, uint32 source_length, + uchar* dest, uint32* dest_length, + const unsigned char* key, uint8 key_length, + const unsigned char* iv, uint8 iv_length, + uint noPadding) +{ + uint8 remaining_bytes = (noPadding == 0) ? 0 : source_length % MY_AES_BLOCK_SIZE; + source_length = source_length - remaining_bytes; + +#ifdef HAVE_YASSL + TaoCrypt::AES_ECB_Encryption enc; + /* 128 bit block used for padding */ + uint8 block[MY_AES_BLOCK_SIZE]; + int num_blocks; /* number of complete blocks */ + int i; + switch(key_length) { + case 16: + break; + case 24: + break; + case 32: + break; + default: + return AES_BAD_KEYSIZE; + } + + enc.SetKey((const TaoCrypt::byte *) key, key_length, (const TaoCrypt::byte *) iv); + + num_blocks = source_length / MY_AES_BLOCK_SIZE; + + for (i = num_blocks; i > 0; i--) /* Encode complete blocks */ + { + enc.Process((TaoCrypt::byte *) dest, (const TaoCrypt::byte *) source, + MY_AES_BLOCK_SIZE); + source += MY_AES_BLOCK_SIZE; + dest += MY_AES_BLOCK_SIZE; + } + + if (noPadding) { + if (remaining_bytes!=0) { + memcpy(dest + source_length, source + source_length, remaining_bytes); + } + *dest_length = MY_AES_BLOCK_SIZE * (num_blocks) + remaining_bytes; + return AES_OK; + + } + + /* Encode the rest. We always have incomplete block */ + char pad_len = MY_AES_BLOCK_SIZE - (source_length - + MY_AES_BLOCK_SIZE * num_blocks); + memcpy(block, source, 16 - pad_len); + memset(block + MY_AES_BLOCK_SIZE - pad_len, pad_len, pad_len); + + enc.Process((TaoCrypt::byte *) dest, (const TaoCrypt::byte *) block, + MY_AES_BLOCK_SIZE); + + *dest_length = MY_AES_BLOCK_SIZE * (num_blocks + 1); + return AES_OK; +#elif defined(HAVE_OPENSSL) + MyCipherCtx ctx; + int u_len, f_len; + /* The real key to be used for encryption */ + const EVP_CIPHER* cipher; + switch(key_length) { + case 16: + cipher = EVP_aes_128_ecb(); + break; + case 24: + cipher = EVP_aes_192_ecb(); + break; + case 32: + cipher = EVP_aes_256_ecb(); + break; + default: + return AES_BAD_KEYSIZE; + } + //Initialize Encryption Engine here, default software Engine is default + ENGINE *engine = NULL; + + if (! EVP_EncryptInit_ex(&ctx.ctx, cipher, engine, key, iv)) + return AES_BAD_DATA; /* Error */ + if (noPadding) { + EVP_CIPHER_CTX_set_padding(&ctx.ctx, 0); + } + EVP_CIPHER_CTX_key_length(&ctx.ctx); + OPENSSL_assert(EVP_CIPHER_CTX_key_length(&ctx.ctx) == key_length); + OPENSSL_assert(EVP_CIPHER_CTX_iv_length(&ctx.ctx) == iv_length); + OPENSSL_assert(EVP_CIPHER_CTX_block_size(&ctx.ctx) == 16); + if (! EVP_EncryptUpdate(&ctx.ctx, (unsigned char *) dest, &u_len, + (unsigned const char *) source, source_length)) + return AES_BAD_DATA; /* Error */ + if (! EVP_EncryptFinal_ex(&ctx.ctx, (unsigned char *) dest + u_len, &f_len)) + return AES_BAD_DATA; /* Error */ + + if (remaining_bytes!=0) { + memcpy(dest + source_length, source + source_length, remaining_bytes); + } + *dest_length = (unsigned long int) (u_len + f_len + remaining_bytes); + + return AES_OK; +#else + /* currently Open SSL is required */ + return AES_BAD_DATA; +#endif +} + + + +/** + AES decryption - CBC mode + + SYNOPSIS + my_aes_decrypt_cbc() + @param source [in] Pointer to data to decrypt + @param source_length [in] Size of data + @param dest [out] Buffer to place decrypted data (must be large enough) + @param dest_length [out] Pointer to size of decrypted data + @param key [in] Key to be used for decryption + @param key_length [in] Length of the key. 16, 24 or 32 + @param iv [in] Iv to be used for encryption + @param iv_length [in] Length of the iv. should be 16. + @param noPadding [in] if set to true, no padding is used. if the input length is not a + multiple of the AES block size, trailing bytes are only copied to destination buffer. + This allows currently the same interface for CBC, ECB and CTR encryption. + + @return + != 0 error + 0 no error +*/ + +static int my_aes_decrypt_cbc(const uchar* source, uint32 source_length, + uchar* dest, uint32 *dest_length, + const unsigned char* key, uint8 key_length, + const unsigned char* iv, uint8 iv_length, + uint noPadding) +{ + uint8 remaining_bytes = (noPadding == 0) ? 0 : source_length % MY_AES_BLOCK_SIZE; + source_length = source_length - remaining_bytes; + + +#ifdef HAVE_YASSL + TaoCrypt::AES_CBC_Decryption dec; + /* 128 bit block used for padding */ + uint8 block[MY_AES_BLOCK_SIZE]; + uint num_blocks; /* Number of complete blocks */ + int i; + switch(key_length) { + case 16: + break; + case 24: + break; + case 32: + break; + default: + return AES_BAD_KEYSIZE; + } + + dec.SetKey((const TaoCrypt::byte *) key, key_length, iv); + + num_blocks = source_length / MY_AES_BLOCK_SIZE; + + if ((source_length != num_blocks * MY_AES_BLOCK_SIZE) || num_blocks == 0 ) + /* Input size has to be even and at least one block */ + return AES_BAD_DATA; + + /* Decode all but last blocks */ + for (i = num_blocks - 1; i > 0; i--) + { + dec.Process((TaoCrypt::byte *) dest, (const TaoCrypt::byte *) source, + MY_AES_BLOCK_SIZE); + source += MY_AES_BLOCK_SIZE; + dest += MY_AES_BLOCK_SIZE; + } + + dec.Process((TaoCrypt::byte *) block, (const TaoCrypt::byte *) source, + MY_AES_BLOCK_SIZE); + + if (noPadding) { + memcpy(dest, block, MY_AES_BLOCK_SIZE); + if (remaining_bytes!=0) { + memcpy(dest + source_length, source + source_length, remaining_bytes); + } + *dest_length = MY_AES_BLOCK_SIZE * num_blocks + remaining_bytes; + return AES_OK; + } + + /* Use last char in the block as size */ + uint pad_len = (uint) (uchar) block[MY_AES_BLOCK_SIZE - 1]; + + if (pad_len > MY_AES_BLOCK_SIZE) + return AES_BAD_DATA; + /* We could also check whole padding but we do not really need this */ + + memcpy(dest, block, MY_AES_BLOCK_SIZE - pad_len); + *dest_length = MY_AES_BLOCK_SIZE * num_blocks - pad_len; + return AES_OK; +#elif defined(HAVE_OPENSSL) + MyCipherCtx ctx; + int u_len, f_len; + + const EVP_CIPHER* cipher; + switch(key_length) { + case 16: + cipher = EVP_aes_128_cbc(); + break; + case 24: + cipher = EVP_aes_192_cbc(); + break; + case 32: + cipher = EVP_aes_256_cbc(); + break; + default: + return AES_BAD_KEYSIZE; + } + //Initialize Encryption Engine here, default software Engine is default + ENGINE *engine = NULL; + + if (! EVP_DecryptInit_ex(&ctx.ctx, cipher, engine, key, iv)) + return AES_BAD_DATA; /* Error */ + if (noPadding) { + EVP_CIPHER_CTX_set_padding(&ctx.ctx, 0); + } + OPENSSL_assert(EVP_CIPHER_CTX_key_length(&ctx.ctx) == key_length); + OPENSSL_assert(EVP_CIPHER_CTX_iv_length(&ctx.ctx) == iv_length); + OPENSSL_assert(EVP_CIPHER_CTX_block_size(&ctx.ctx) == 16); + if (! EVP_DecryptUpdate(&ctx.ctx, (unsigned char *) dest, &u_len, + (unsigned char *)source, source_length)) + return AES_BAD_DATA; /* Error */ + if (! EVP_DecryptFinal_ex(&ctx.ctx, (unsigned char *) dest + u_len, &f_len)) { + *dest_length = (unsigned long int) u_len; + return AES_BAD_DATA; + } + if (remaining_bytes!=0) { + memcpy(dest + source_length, source + source_length, remaining_bytes); + } + *dest_length = (unsigned long int) (u_len + f_len) + remaining_bytes; +#endif + return AES_OK; +} + +/** + AES decryption - ECB mode + + SYNOPSIS + my_aes_decrypt_ecb() + @param source [in] Pointer to data to decrypt + @param source_length [in] Size of data + @param dest [out] Buffer to place decrypted data (must be large enough) + @param dest_length [out] Pointer to size of decrypted data + @param key [in] Key to be used for decryption + @param key_length [in] Length of the key. 16, 24 or 32 + @param iv [in] Iv to be used for encryption + @param iv_length [in] Length of the iv. should be 16. + @param noPadding [in] if set to true, no padding is used. if the input length is not a + multiple of the AES block size, trailing bytes are only copied to destination buffer. + This allows currently the same interface for CBC, ECB and CTR encryption. + + @return + != 0 error + 0 no error */ -int my_aes_encrypt(const char* source, int source_length, char* dest, +static int my_aes_decrypt_ecb(const uchar* source, uint32 source_length, + uchar* dest, uint32 *dest_length, + const unsigned char* key, uint8 key_length, + const unsigned char* iv, uint8 iv_length, + uint noPadding) +{ + uint8 remaining_bytes = (noPadding == 0) ? 0 : source_length % MY_AES_BLOCK_SIZE; + source_length = source_length - remaining_bytes; + + +#ifdef HAVE_YASSL + TaoCrypt::AES_ECB_Decryption dec; + /* 128 bit block used for padding */ + uint8 block[MY_AES_BLOCK_SIZE]; + uint num_blocks; /* Number of complete blocks */ + int i; + switch(key_length) { + case 16: + break; + case 24: + break; + case 32: + break; + default: + return AES_BAD_KEYSIZE; + } + + dec.SetKey((const TaoCrypt::byte *) key, key_length, iv); + + num_blocks = source_length / MY_AES_BLOCK_SIZE; + + if ((source_length != num_blocks * MY_AES_BLOCK_SIZE) || num_blocks == 0 ) + /* Input size has to be even and at least one block */ + return AES_BAD_DATA; + + /* Decode all but last blocks */ + for (i = num_blocks - 1; i > 0; i--) + { + dec.Process((TaoCrypt::byte *) dest, (const TaoCrypt::byte *) source, + MY_AES_BLOCK_SIZE); + source += MY_AES_BLOCK_SIZE; + dest += MY_AES_BLOCK_SIZE; + } + + dec.Process((TaoCrypt::byte *) block, (const TaoCrypt::byte *) source, + MY_AES_BLOCK_SIZE); + + if (noPadding) { + memcpy(dest, block, MY_AES_BLOCK_SIZE); + if (remaining_bytes!=0) { + memcpy(dest + source_length, source + source_length, remaining_bytes); + } + *dest_length = MY_AES_BLOCK_SIZE * num_blocks + remaining_bytes; + return AES_OK; + } + + /* Use last char in the block as size */ + uint pad_len = (uint) (uchar) block[MY_AES_BLOCK_SIZE - 1]; + + if (pad_len > MY_AES_BLOCK_SIZE) + return AES_BAD_DATA; + /* We could also check whole padding but we do not really need this */ + + memcpy(dest, block, MY_AES_BLOCK_SIZE - pad_len); + *dest_length = MY_AES_BLOCK_SIZE * num_blocks - pad_len; + return AES_OK; +#elif defined(HAVE_OPENSSL) + MyCipherCtx ctx; + int u_len, f_len; + + const EVP_CIPHER* cipher; + switch(key_length) { + case 16: + cipher = EVP_aes_128_ecb(); + break; + case 24: + cipher = EVP_aes_192_ecb(); + break; + case 32: + cipher = EVP_aes_256_ecb(); + break; + default: + return AES_BAD_KEYSIZE; + } + //Initialize Encryption Engine here, default software Engine is default + ENGINE *engine = NULL; + + if (! EVP_DecryptInit_ex(&ctx.ctx, cipher, engine, key, iv)) + return AES_BAD_DATA; /* Error */ + if (noPadding) { + EVP_CIPHER_CTX_set_padding(&ctx.ctx, 0); + } + OPENSSL_assert(EVP_CIPHER_CTX_key_length(&ctx.ctx) == key_length); + OPENSSL_assert(EVP_CIPHER_CTX_iv_length(&ctx.ctx) == iv_length); + OPENSSL_assert(EVP_CIPHER_CTX_block_size(&ctx.ctx) == 16); + if (! EVP_DecryptUpdate(&ctx.ctx, (unsigned char *) dest, &u_len, + (unsigned char *)source, source_length)) + return AES_BAD_DATA; /* Error */ + if (! EVP_DecryptFinal_ex(&ctx.ctx, (unsigned char *) dest + u_len, &f_len)) { + *dest_length = (unsigned long int) u_len; + return AES_BAD_DATA; + } + if (remaining_bytes!=0) { + memcpy(dest + source_length, source + source_length, remaining_bytes); + } + *dest_length = (unsigned long int) (u_len + f_len) + remaining_bytes; + +#endif + return AES_OK; +} + + + + + +/** + Encryption interface that doesn't do anything (for testing) + + SYNOPSIS + my_aes_encrypt_none() + @param source [in] Pointer to data for encryption + @param source_length [in] Size of encryption data + @param dest [out] Buffer to place encrypted data (must be large enough) + @param dest_length [out] Pointer to size of encrypted data + @param key [in] Key to be used for encryption + @param key_length [in] Length of the key. 16, 24 or 32 + @param iv [in] Iv to be used for encryption + @param iv_length [in] Length of the iv. should be 16. + @param noPadding [in] unused + @return + != 0 error + 0 no error +*/ + +static int my_aes_encrypt_none(const uchar* source, uint32 source_length, + uchar* dest, uint32* dest_length, + const unsigned char* key, uint8 key_length, + const unsigned char* iv, uint8 iv_length, + uint noPadding) +{ + memcpy(dest, source, source_length); + *dest_length= source_length; + return 0; +} + + +/** + Decryption interface that doesn't do anything (for testing) + + SYNOPSIS + my_aes_decrypt_none() + @param source [in] Pointer to data to decrypt + @param source_length [in] Size of data + @param dest [out] Buffer to place decrypted data (must be large enough) + @param dest_length [out] Pointer to size of decrypted data + @param key [in] Key to be used for decryption + @param key_length [in] Length of the key. 16, 24 or 32 + @param iv [in] Iv to be used for encryption + @param iv_length [in] Length of the iv. should be 16. + @param noPadding [in] unused + + @return + != 0 error + 0 no error +*/ + +int my_aes_decrypt_none(const uchar* source, uint32 source_length, + uchar* dest, uint32 *dest_length, + const unsigned char* key, uint8 key_length, + const unsigned char* iv, uint8 iv_length, + uint noPadding) +{ + memcpy(dest, source, source_length); + *dest_length= source_length; + return 0; +} + +/** + Initialize encryption methods +*/ + +my_aes_decrypt_dynamic_type my_aes_decrypt_dynamic= my_aes_decrypt_none; +my_aes_encrypt_dynamic_type my_aes_encrypt_dynamic= my_aes_encrypt_none; +enum_my_aes_encryption_algorithm current_aes_dynamic_method= MY_AES_ALGORITHM_NONE; + +my_bool my_aes_init_dynamic_encrypt(enum_my_aes_encryption_algorithm method) +{ + switch (method) + { + /* used for encrypting tables */ + case MY_AES_ALGORITHM_ECB: + my_aes_encrypt_dynamic= my_aes_encrypt_ecb; + my_aes_decrypt_dynamic= my_aes_decrypt_ecb; + break; + case MY_AES_ALGORITHM_CBC: + my_aes_encrypt_dynamic= my_aes_encrypt_cbc; + my_aes_decrypt_dynamic= my_aes_decrypt_cbc; + break; +#ifdef HAVE_EncryptAes128Ctr + /* encrypt everything, with a set of keys */ + case MY_AES_ALGORITHM_CTR: + my_aes_encrypt_dynamic= my_aes_encrypt_ctr; + my_aes_decrypt_dynamic= my_aes_decrypt_ctr; + break; +#endif + /* Simulate encrypting interface */ + case MY_AES_ALGORITHM_NONE: + my_aes_encrypt_dynamic= my_aes_encrypt_none; + my_aes_decrypt_dynamic= my_aes_decrypt_none; + break; + default: + return 1; + } + current_aes_dynamic_method= method; + return 0; +} + +my_aes_decrypt_dynamic_type +get_aes_decrypt_func(enum_my_aes_encryption_algorithm method) +{ + switch (method) + { + /* used for encrypting tables */ + case MY_AES_ALGORITHM_ECB: + return my_aes_decrypt_ecb; + break; + case MY_AES_ALGORITHM_CBC: + return my_aes_decrypt_cbc; + break; +#ifdef HAVE_EncryptAes128Ctr + /* encrypt everything, with a set of keys */ + case MY_AES_ALGORITHM_CTR: + return my_aes_decrypt_ctr; + break; +#endif + /* Simulate encrypting interface */ + case MY_AES_ALGORITHM_NONE: + return my_aes_decrypt_none; + break; + default: + return NULL; + } + return NULL; +} + +my_aes_encrypt_dynamic_type +get_aes_encrypt_func(enum_my_aes_encryption_algorithm method) +{ + switch (method) + { + /* used for encrypting tables */ + case MY_AES_ALGORITHM_ECB: + return my_aes_encrypt_ecb; + break; + case MY_AES_ALGORITHM_CBC: + return my_aes_encrypt_cbc; + break; +#ifdef HAVE_EncryptAes128Ctr + /* encrypt everything, with a set of keys */ + case MY_AES_ALGORITHM_CTR: + return my_aes_encrypt_ctr; + break; +#endif + /* Simulate encrypting interface */ + case MY_AES_ALGORITHM_NONE: + return my_aes_encrypt_none; + break; + default: + return NULL; + } + return NULL; +} + + +/**************************************************************** + Encryption function visible to MariaDB users +****************************************************************/ + +int my_aes_encrypt(const uchar* source, int source_length, uchar* dest, const char* key, int key_length) { #if defined(HAVE_YASSL) TaoCrypt::AES_ECB_Encryption enc; + /* 128 bit block used for padding */ uint8 block[MY_AES_BLOCK_SIZE]; int num_blocks; /* number of complete blocks */ @@ -194,7 +961,7 @@ int my_aes_encrypt(const char* source, int source_length, char* dest, < 0 Error */ -int my_aes_decrypt(const char *source, int source_length, char *dest, +int my_aes_decrypt(const uchar *source, int source_length, uchar *dest, const char *key, int key_length) { #if defined(HAVE_YASSL) @@ -275,4 +1042,3 @@ int my_aes_get_size(int source_length) return MY_AES_BLOCK_SIZE * (source_length / MY_AES_BLOCK_SIZE) + MY_AES_BLOCK_SIZE; } - diff --git a/mysys_ssl/my_crypt.cc b/mysys_ssl/my_crypt.cc new file mode 100644 index 00000000000..0d49999fdfa --- /dev/null +++ b/mysys_ssl/my_crypt.cc @@ -0,0 +1,370 @@ +/* + TODO: add support for YASSL +*/ + +#include <my_global.h> +#include <my_crypt.h> + +/* YASSL doesn't support EVP_CIPHER_CTX */ +#ifdef HAVE_EncryptAes128Ctr + +#include "mysql.h" +#include <openssl/evp.h> +#include <openssl/aes.h> +#include <openssl/rand.h> + +static const int CRYPT_ENCRYPT = 1; +static const int CRYPT_DECRYPT = 0; + +class Encrypter { + public: + virtual ~Encrypter() {} + + virtual Crypt_result Encrypt(const uchar* plaintext, + int plaintext_size, + uchar* ciphertext, + int* ciphertext_used) = 0; + virtual Crypt_result GetTag(uchar* tag, int tag_size) = 0; +}; + +class Decrypter { + public: + virtual ~Decrypter() {} + + virtual Crypt_result SetTag(const uchar* tag, int tag_size) = 0; + virtual Crypt_result Decrypt(const uchar* ciphertext, + int ciphertext_size, + uchar* plaintext, + int* plaintext_used) = 0; + virtual Crypt_result CheckTag() = 0; +}; + +class Crypto { + public: + virtual ~Crypto(); + + Crypt_result Crypt(const uchar* input, int input_size, + uchar* output, int* output_used); + + protected: + Crypto(); + + EVP_CIPHER_CTX ctx; +}; + + +/* Various crypto implementations */ + +class Aes128CtrCrypto : public Crypto { + public: + virtual Crypt_result Init(const uchar* key, const uchar* iv, + int iv_size); + + protected: + Aes128CtrCrypto() {} + + virtual int mode() = 0; +}; + +class Aes128CtrEncrypter : public Aes128CtrCrypto, public Encrypter { + public: + Aes128CtrEncrypter() {} + virtual Crypt_result Encrypt(const uchar* plaintext, + int plaintext_size, + uchar* ciphertext, + int* ciphertext_used); + + virtual Crypt_result GetTag(uchar* tag, int tag_size) { + DBUG_ASSERT(false); + return AES_INVALID; + } + + protected: + virtual int mode() { + return CRYPT_ENCRYPT; + } + + private: + Aes128CtrEncrypter(const Aes128CtrEncrypter& o); + Aes128CtrEncrypter& operator=(const Aes128CtrEncrypter& o); +}; + +class Aes128CtrDecrypter : public Aes128CtrCrypto, public Decrypter { + public: + Aes128CtrDecrypter() {} + virtual Crypt_result Decrypt(const uchar* ciphertext, + int ciphertext_size, + uchar* plaintext, + int* plaintext_used); + + virtual Crypt_result SetTag(const uchar* tag, int tag_size) { + DBUG_ASSERT(false); + return AES_INVALID; + } + + virtual Crypt_result CheckTag() { + DBUG_ASSERT(false); + return AES_INVALID; + } + + protected: + virtual int mode() { + return CRYPT_DECRYPT; + } + + private: + Aes128CtrDecrypter(const Aes128CtrDecrypter& o); + Aes128CtrDecrypter& operator=(const Aes128CtrDecrypter& o); +}; + +class Aes128EcbCrypto : public Crypto { + public: + virtual Crypt_result Init(const unsigned char* key); + + protected: + Aes128EcbCrypto() {} + + virtual int mode() = 0; +}; + +class Aes128EcbEncrypter : public Aes128EcbCrypto, public Encrypter { + public: + Aes128EcbEncrypter() {} + virtual Crypt_result Encrypt(const unsigned char* plaintext, + int plaintext_size, + unsigned char* ciphertext, + int* ciphertext_used); + + virtual Crypt_result GetTag(unsigned char* tag, int tag_size) { + DBUG_ASSERT(false); + return AES_INVALID; + } + + protected: + virtual int mode() { + return CRYPT_ENCRYPT; + } + + private: + Aes128EcbEncrypter(const Aes128EcbEncrypter& o); + Aes128EcbEncrypter& operator=(const Aes128EcbEncrypter& o); +}; + +class Aes128EcbDecrypter : public Aes128EcbCrypto, public Decrypter { + public: + Aes128EcbDecrypter() {} + virtual Crypt_result Decrypt(const unsigned char* ciphertext, + int ciphertext_size, + unsigned char* plaintext, + int* plaintext_used); + + virtual Crypt_result SetTag(const unsigned char* tag, int tag_size) { + DBUG_ASSERT(false); + return AES_INVALID; + } + + virtual Crypt_result CheckTag() { + DBUG_ASSERT(false); + return AES_INVALID; + } + + protected: + virtual int mode() { + return CRYPT_DECRYPT; + } + + private: + Aes128EcbDecrypter(const Aes128EcbDecrypter& o); + Aes128EcbDecrypter& operator=(const Aes128EcbDecrypter& o); +}; + + +Crypto::~Crypto() { + EVP_CIPHER_CTX_cleanup(&ctx); +} + +Crypto::Crypto() { + EVP_CIPHER_CTX_init(&ctx); +} + +/* + WARNING: It is allowed to have output == NULL, for special cases like AAD + support in AES GCM. output_used however must never be NULL. +*/ + +Crypt_result Crypto::Crypt(const uchar* input, int input_size, + uchar* output, int* output_used) { + DBUG_ASSERT(input != NULL); + DBUG_ASSERT(output_used != NULL); + if (!EVP_CipherUpdate(&ctx, output, output_used, input, input_size)) { + return AES_OPENSSL_ERROR; + } + + return AES_OK; +} + +Crypt_result Aes128CtrCrypto::Init(const uchar* key, + const uchar* iv, + int iv_size) { + if (iv_size != 16) { + DBUG_ASSERT(false); + return AES_BAD_IV; + } + + if (!EVP_CipherInit_ex(&ctx, EVP_aes_128_ctr(), NULL, key, iv, mode())) { + return AES_OPENSSL_ERROR; + } + + return AES_OK; +} + +Crypt_result Aes128CtrEncrypter::Encrypt(const uchar* plaintext, + int plaintext_size, + uchar* ciphertext, + int* ciphertext_used) { + Crypt_result res = Crypt(plaintext, plaintext_size, ciphertext, + ciphertext_used); + DBUG_ASSERT(*ciphertext_used == plaintext_size); + return res; +} + +Crypt_result Aes128CtrDecrypter::Decrypt(const uchar* ciphertext, + int ciphertext_size, + uchar* plaintext, + int* plaintext_used) { + Crypt_result res = Crypt(ciphertext, ciphertext_size, plaintext, + plaintext_used); + DBUG_ASSERT(*plaintext_used == ciphertext_size); + return res; +} + + +Crypt_result Aes128EcbCrypto::Init(const unsigned char* key) { + if (!EVP_CipherInit_ex(&ctx, EVP_aes_128_ecb(), NULL, key, NULL, mode())) { + return AES_OPENSSL_ERROR; + } + + return AES_OK; +} + +Crypt_result Aes128EcbEncrypter::Encrypt(const unsigned char* plaintext, + int plaintext_size, + unsigned char* ciphertext, + int* ciphertext_used) { + Crypt_result res = Crypt(plaintext, plaintext_size, + ciphertext, ciphertext_used); + DBUG_ASSERT(*ciphertext_used == plaintext_size); + return res; +} + +Crypt_result Aes128EcbDecrypter::Decrypt(const unsigned char* ciphertext, + int ciphertext_size, + unsigned char* plaintext, + int* plaintext_used) { + Crypt_result res = Crypt(ciphertext, ciphertext_size, + plaintext, plaintext_used); + DBUG_ASSERT(*plaintext_used == ciphertext_size); + return res; +} + +C_MODE_START + + + /* Encrypt and decrypt according to Aes128Ctr */ + +Crypt_result my_aes_encrypt_ctr(const uchar* source, uint32 source_length, + uchar* dest, uint32* dest_length, + const unsigned char* key, uint8 key_length, + const unsigned char* iv, uint8 iv_length, + uint noPadding) +{ + Aes128CtrEncrypter encrypter; + Crypt_result res = encrypter.Init(key, iv, iv_length); + if (res != AES_OK) + return res; + return encrypter.Encrypt(source, source_length, dest, (int*)dest_length); +} + + +Crypt_result my_aes_decrypt_ctr(const uchar* source, uint32 source_length, + uchar* dest, uint32* dest_length, + const unsigned char* key, uint8 key_length, + const unsigned char* iv, uint8 iv_length, + uint noPadding) +{ + Aes128CtrDecrypter decrypter; + + Crypt_result res = decrypter.Init(key, iv, iv_length); + if (res != AES_OK) + return res; + return decrypter.Decrypt(source, source_length, dest, (int*)dest_length); +} + + +Crypt_result my_aes_encrypt_ecb(const uchar* source, uint32 source_length, + uchar* dest, uint32* dest_length, + const unsigned char* key, uint8 key_length, + const unsigned char* iv, uint8 iv_length, + uint noPadding) +{ + Aes128EcbEncrypter encrypter; + Crypt_result res = encrypter.Init(key); + if (res != AES_OK) + return res; + return encrypter.Encrypt(source, source_length, dest, (int*)dest_length); +} + +Crypt_result my_aes_decrypt_ecb(const uchar* source, uint32 source_length, + uchar* dest, uint32* dest_length, + const unsigned char* key, uint8 key_length, + const unsigned char* iv, uint8 iv_length, + uint noPadding) +{ + Aes128EcbDecrypter decrypter; + + Crypt_result res = decrypter.Init(key); + + if (res != AES_OK) + return res; + return decrypter.Decrypt(source, source_length, dest, (int*)dest_length); +} + +C_MODE_END + +#endif /* HAVE_EncryptAes128Ctr */ + +#if defined(HAVE_YASSL) + +#include <random.hpp> + +C_MODE_START + +Crypt_result my_random_bytes(uchar* buf, int num) +{ + TaoCrypt::RandomNumberGenerator rand; + rand.GenerateBlock((TaoCrypt::byte*) buf, num); + return AES_OK; +} + +C_MODE_END + +#else /* OpenSSL */ + +C_MODE_START + +Crypt_result my_random_bytes(uchar* buf, int num) +{ + /* + Unfortunately RAND_bytes manual page does not provide any guarantees + in relation to blocking behavior. Here we explicitly use SSLeay random + instead of whatever random engine is currently set in OpenSSL. That way + we are guaranteed to have a non-blocking random. + */ + RAND_METHOD* rand = RAND_SSLeay(); + if (rand == NULL || rand->bytes(buf, num) != 1) + return AES_OPENSSL_ERROR; + return AES_OK; +} + +C_MODE_END +#endif /* HAVE_YASSL */ diff --git a/mysys_ssl/my_crypt_key_management.cc b/mysys_ssl/my_crypt_key_management.cc new file mode 100644 index 00000000000..69efed32567 --- /dev/null +++ b/mysys_ssl/my_crypt_key_management.cc @@ -0,0 +1,110 @@ +#include <my_global.h> +#include <my_crypt_key_management.h> +#include <cstring> + +#ifndef DBUG_OFF +#include <myisampack.h> +my_bool debug_use_static_encryption_keys = 0; + +#ifdef HAVE_PSI_INTERFACE +PSI_rwlock_key key_LOCK_dbug_encryption_key_version; +#endif +mysql_rwlock_t LOCK_dbug_encryption_key_version; +unsigned int opt_debug_encryption_key_version = 0; +#endif + +/** + * Default functions + */ +int GetLatestCryptoKeyVersionImpl(); +unsigned int HasCryptoKeyImpl(unsigned int version); +int GetCryptoKeySizeImpl(unsigned int version); +int GetCryptoKeyImpl(unsigned int version, unsigned char* key_buffer, + unsigned int size); +int GetCryptoIVImpl(unsigned int version, unsigned char* key_buffer, + unsigned int size); + +/** + * Function pointers for + * - GetLatestCryptoKeyVersion + * - GetCryptoKey + */ +static +struct CryptoKeyFuncs_t cryptoKeyFuncs = { + GetLatestCryptoKeyVersionImpl, + HasCryptoKeyImpl, + GetCryptoKeySizeImpl, + GetCryptoKeyImpl, + GetCryptoIVImpl +}; + +extern "C" +int GetLatestCryptoKeyVersion() { +#ifndef DBUG_OFF + if (debug_use_static_encryption_keys) { + mysql_rwlock_rdlock(&LOCK_dbug_encryption_key_version); + unsigned int res = opt_debug_encryption_key_version; + mysql_rwlock_unlock(&LOCK_dbug_encryption_key_version); + return res; + } +#endif + + return (* cryptoKeyFuncs.getLatestCryptoKeyVersionFunc)(); +} + +extern "C" +unsigned int HasCryptoKey(unsigned int version) { + return (* cryptoKeyFuncs.hasCryptoKeyFunc)(version); +} + +extern "C" +int GetCryptoKeySize(unsigned int version) { + return (* cryptoKeyFuncs.getCryptoKeySize)(version); +} + +extern "C" +int GetCryptoKey(unsigned int version, unsigned char* key, unsigned int size) { +#ifndef DBUG_OFF + if (debug_use_static_encryption_keys) { + memset(key, 0, size); + // Just don't support tiny keys, no point anyway. + if (size < 4) { + return 1; + } + + mi_int4store(key, version); + return 0; + } +#endif + + return (* cryptoKeyFuncs.getCryptoKeyFunc)(version, key, size); +} + +extern "C" +int GetCryptoIV(unsigned int version, unsigned char* key, unsigned int size) { + return (* cryptoKeyFuncs.getCryptoIVFunc)(version, key, size); +} + +extern "C" +void +InstallCryptoKeyFunctions(const struct CryptoKeyFuncs_t* _cryptoKeyFuncs) +{ + if (_cryptoKeyFuncs == NULL) + { + /* restore defaults wHashhen called with NULL argument */ + cryptoKeyFuncs.getLatestCryptoKeyVersionFunc = + GetLatestCryptoKeyVersionImpl; + cryptoKeyFuncs.hasCryptoKeyFunc = + HasCryptoKeyImpl; + cryptoKeyFuncs.getCryptoKeySize = + GetCryptoKeySizeImpl; + cryptoKeyFuncs.getCryptoKeyFunc = + GetCryptoKeyImpl; + cryptoKeyFuncs.getCryptoIVFunc = + GetCryptoIVImpl; + } + else + { + cryptoKeyFuncs = *_cryptoKeyFuncs; + } +} diff --git a/mysys_ssl/my_crypt_key_management_impl.cc b/mysys_ssl/my_crypt_key_management_impl.cc new file mode 100644 index 00000000000..af2077d8d15 --- /dev/null +++ b/mysys_ssl/my_crypt_key_management_impl.cc @@ -0,0 +1,34 @@ +#include <my_global.h> + +// TODO Not yet implemented. +int GetLatestCryptoKeyVersionImpl() +{ + abort(); + return 0; /* Keep compiler happy */ +} + +unsigned int HasCryptoKeyImpl(unsigned int version) +{ + abort(); + return 0; /* Keep compiler happy */ +} + +int GetCryptoKeySizeImpl(unsigned int version) +{ + abort(); + return 0; /* Keep compiler happy */ +} + +int GetCryptoKeyImpl(unsigned int version, unsigned char* key, + unsigned int key_size) +{ + abort(); + return 0; /* Keep compiler happy */ +} + +int GetCryptoIVImpl(unsigned int version, unsigned char* key, + unsigned int key_size) +{ + abort(); + return 0; /* Keep compiler happy */ +} diff --git a/plugin/example_key_management_plugin/CMakeLists.txt b/plugin/example_key_management_plugin/CMakeLists.txt new file mode 100644 index 00000000000..237d21a1e8e --- /dev/null +++ b/plugin/example_key_management_plugin/CMakeLists.txt @@ -0,0 +1 @@ +MYSQL_ADD_PLUGIN(EXAMPLE_KEY_MANAGEMENT_PLUGIN example_key_management_plugin.cc MANDATORY) diff --git a/plugin/example_key_management_plugin/example_key_management_plugin.cc b/plugin/example_key_management_plugin/example_key_management_plugin.cc new file mode 100644 index 00000000000..cdc32e87aad --- /dev/null +++ b/plugin/example_key_management_plugin/example_key_management_plugin.cc @@ -0,0 +1,129 @@ +// Copyright (C) 2014 Google Inc. + +#include <mysql_version.h> +#include <my_global.h> +#include <my_pthread.h> +#include <my_aes.h> +#include <my_crypt_key_management.h> +#include <my_md5.h> +#include <my_rnd.h> + +/* rotate key randomly between 45 and 90 seconds */ +#define KEY_ROTATION_MIN 45 +#define KEY_ROTATION_MAX 90 + +static struct my_rnd_struct seed; +static unsigned int key_version = 0; +static unsigned int next_key_version = 0; +static pthread_mutex_t mutex; + +static +int +get_latest_key_version() +{ + uint now = time(0); + pthread_mutex_lock(&mutex); + if (now >= next_key_version) + { + key_version = now; + unsigned int interval = KEY_ROTATION_MAX - KEY_ROTATION_MIN; + next_key_version = now + KEY_ROTATION_MIN + my_rnd(&seed) * interval; + } + pthread_mutex_unlock(&mutex); + + return key_version; +} + +static +int +get_key(unsigned int version, unsigned char* dstbuf, unsigned buflen) +{ + char *dst = (char*)dstbuf; // md5 function takes char* as argument... + unsigned len = 0; + for (; len + MD5_HASH_SIZE <= buflen; len += MD5_HASH_SIZE) + { + compute_md5_hash(dst, (const char*)&version, sizeof(version)); + dst += MD5_HASH_SIZE; + version++; + } + if (len < buflen) + { + memset(dst, 0, buflen - len); + } + return 0; +} + +static unsigned int has_key_func(unsigned int keyID) +{ + return true; +} + +static int get_key_size(unsigned int keyID) +{ + return 16; +} + +static int get_iv(unsigned int keyID, unsigned char* dstbuf, unsigned buflen) +{ + if (buflen < 16) + { + return CRYPT_BUFFER_TO_SMALL; + } + + for (int i=0; i<16; i++) + dstbuf[i] = 0; + + return CRYPT_KEY_OK; +} + + +static int example_key_management_plugin_init(void *p) +{ + /* init */ + my_rnd_init(&seed, time(0), 0); + get_latest_key_version(); + + my_aes_init_dynamic_encrypt(MY_AES_ALGORITHM_CTR); + + pthread_mutex_init(&mutex, NULL); + + struct CryptoKeyFuncs_t func; + func.getLatestCryptoKeyVersionFunc = get_latest_key_version; + func.hasCryptoKeyFunc = has_key_func; + func.getCryptoKeySize = get_key_size; + func.getCryptoKeyFunc = get_key; + func.getCryptoIVFunc = get_iv; + InstallCryptoKeyFunctions(&func); + return 0; +} + +static int example_key_management_plugin_deinit(void *p) +{ + pthread_mutex_destroy(&mutex); + return 0; +} + +struct st_mysql_daemon example_key_management_plugin= { + MYSQL_DAEMON_INTERFACE_VERSION +}; + +/* + Plugin library descriptor +*/ +maria_declare_plugin(example_key_management_plugin) +{ + MYSQL_DAEMON_PLUGIN, + &example_key_management_plugin, + "example_key_management_plugin", + "Jonas Oreland", + "Example key management plugin", + PLUGIN_LICENSE_GPL, + example_key_management_plugin_init, + example_key_management_plugin_deinit, + 0x0100 /* 1.0 */, + NULL, /* status variables */ + NULL, /* system variables */ + "1.0", + MariaDB_PLUGIN_MATURITY_UNKNOWN +} +maria_declare_plugin_end; diff --git a/plugin/file_key_management_plugin/CMakeLists.txt b/plugin/file_key_management_plugin/CMakeLists.txt new file mode 100644 index 00000000000..d5412abf714 --- /dev/null +++ b/plugin/file_key_management_plugin/CMakeLists.txt @@ -0,0 +1,4 @@ +SET(FILE_KEY_MANAGEMENT_PLUGIN_SOURCES file_key_management_plugin.cc EncKeys.cc KeySingleton.cc) + +MYSQL_ADD_PLUGIN(FILE_KEY_MANAGEMENT_PLUGIN ${FILE_KEY_MANAGEMENT_PLUGIN_SOURCES} DEFAULT + LINK_LIBRARIES pcre) diff --git a/plugin/file_key_management_plugin/EncKeys.cc b/plugin/file_key_management_plugin/EncKeys.cc new file mode 100644 index 00000000000..266a88d2c3f --- /dev/null +++ b/plugin/file_key_management_plugin/EncKeys.cc @@ -0,0 +1,479 @@ +/* Copyright (C) 2014 eperi GmbH. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +/******************************************************************//** + @file EncKeys.cc + A class to keep keys for encryption/decryption. + +How it works... +The location and usage can be configured via the configuration file. +Example + +[mysqld] +... +file_key_management_plugin_filename = /home/mdb/keys.enc +file_key_management_plugin_filekey = secret +file_key_management_plugin_encryption_method = aes_cbc + +... + +Optional configuration value +file_key_management_plugin_encryption_method determines the method +used for encryption. +Supported are aes_cbc, aes_ecb or aes_ctr. aes_cbc is default. +The plug-in sets the default aes encryption/decryption method to the given method. + +The keys are read from a file. +The filename is set up via the file_key_management_plugin_filename +configuration value. +file_key_management_plugin_filename is used to configure the absolute +path to this file. + +Examples: +file_key_management_plugin_filename = \\\\unc\\keys.enc (windows share) +file_key_management_plugin_filename = e:/tmp/keys.enc (windows path) +file_key_management_plugin_filename = /tmp/keys.enc (linux path) + +The key file contains AES keys and initialization vectors as +hex-encoded Strings. +Supported are keys of size 128, 192 or 256 bits. IV consists of 16 bytes. +Example: +1;F5502320F8429037B8DAEF761B189D12;770A8A65DA156D24EE2A093277530142 + +1 is the key identifier which can be used for table creation, a 16 +byte IV follows, and finally a 16 byte AES key. +255 entries are supported. + +The key file should be encrypted and the key to decrypt the file can +be given with the optional file_key_management_plugin_filekey +parameter. + +The file key can also be located if FILE: is prepended to the +key. Then the following part is interpreted as absolute path to the +file containing the file key. This file can optionally be encrypted, +currently with a fix key. + +Example: + +file_key_management_plugin_filekey = FILE:y:/secret256.enc + +If the key file can not be read at server startup, for example if the +file key is not present, page_encryption feature is not availabe and +access to page_encryption tables is not possible. + +Open SSL command line utility can be used to create an encrypted key file. +Examples: +openssl enc –aes-256-cbc –md sha1 –k secret –in keys.txt –out keys.enc +openssl enc –aes-256-cbc –md sha1 –k <initialPwd> –in secret –out secret.enc + + Created 09/15/2014 +***********************************************************************/ + +#ifdef __WIN__ +#define PCRE_STATIC 1 +#endif + +#include <my_global.h> +#include <sql_class.h> /* For sql_print_error */ +#include "EncKeys.h" +#include <my_aes.h> +#include <memory.h> +#include <my_sys.h> +#include <pcre.h> +#include <string.h> +#include <my_sys.h> + +const char* EncKeys::strMAGIC= "Salted__"; +const int EncKeys::magicSize= 8;//strlen(strMAGIC); // 8 byte +const char* EncKeys::newLine= "\n"; + +const char* EncKeys::errorNoKeyId= "KeyID %u not found or with error. Check the key and the log file.\n"; +const char* EncKeys::errorInMatches= "Wrong match of the keyID in line %u, see the template.\n"; +const char* EncKeys::errorExceedKeyFileSize= "The size of the key file %s exceeds " + "the maximum allowed of %u bytes.\n"; +const char* EncKeys::errorExceedKeySize= "The key size exceeds the maximum allowed size of %u in line %u.\n"; +const char* EncKeys::errorEqualDoubleKey= "More than one identical key with keyID %u found" + " in lines %u and %u.\nDelete one of them in the key file.\n"; +const char* EncKeys::errorUnequalDoubleKey= "More than one not identical key with keyID %u found" + " in lines %u and %u.\nChoose the right one and delete the other in the key file.\n" + "I'll take the key from line %u\n"; +#define errorNoInitializedKey "The key could not be initialized.\n" +const char* EncKeys::errorNotImplemented= "Initializing keys through key server is not" + " yet implemented.\nYou can not read encrypted tables or columns\n\n"; +const char* EncKeys::errorOpenFile= "Could not open %s for reading. You can not read encrypted tables or columns.\n\n"; +const char* EncKeys::errorReadingFile= "Could not read from %s. You can not read encrypted tables or columns\n\n"; +const char* EncKeys::errorFileSize= "Could not get the file size from %s. You can not read encrypted tables or columns\n\n"; +const char* EncKeys::errorFalseFileKey= "Wrong encryption / decryption key for keyfile '%s'.\n"; + +/* read this from a secret source in some later version */ +const char* EncKeys::initialPwd= "lg28s9ac5ffa537fd8798875c98e190df289da7e047c05"; + +EncKeys::EncKeys() +{ + countKeys= keyLineInKeyFile= 0; + for (int ii= 0; ii < MAX_KEYS; ii++) { + keys[ii].id= 0; + keys[ii].iv= keys[ii].key= NULL; + } + oneKey= NULL; +} + + +EncKeys::~EncKeys() +{ + for (int ii= MAX_KEYS - 1; ii >= 0 ; ii--) { + delete[] keys[ii].iv; + keys[ii].iv= NULL; + delete[] keys[ii].key; + keys[ii].key= NULL; + } +} + + +bool EncKeys::initKeys(const char *filename, const char *filekey) +{ + if (filename==NULL) + return false; + + const char *MAGIC= "FILE:"; + const short MAGIC_LEN= 5; + + char *secret= (char*) malloc(MAX_SECRET_SIZE +1 * sizeof(char)); + + if (filekey != NULL) + { + //If secret starts with FILE: interpret the secret as filename. + if(memcmp(MAGIC, filekey, MAGIC_LEN) == 0) + { + int fk_len= strlen(filekey); + char *secretfile= (char*)malloc((1 + fk_len - MAGIC_LEN)* sizeof(char)); + memcpy(secretfile, filekey+MAGIC_LEN, fk_len - MAGIC_LEN); + secretfile[fk_len-MAGIC_LEN]= '\0'; + parseSecret(secretfile, secret); + free(secretfile); + } else + { + sprintf(secret, "%s", filekey); + } + } + + int ret= parseFile((const char *)filename, 254, secret); + free(secret); + return (ret==NO_ERROR_KEY_FILE_PARSE_OK); +} + + +/* + secret is limited to MAX_SECRET_SIZE characters +*/ + +void EncKeys::parseSecret(const char *secretfile, char *secret) +{ + size_t maxSize= (MAX_SECRET_SIZE +16 + magicSize*2) ; + char* buf= (char*)malloc((maxSize) * sizeof(char)); + char* _initPwd= (char*)malloc((strlen(initialPwd)+1) * sizeof(char)); + FILE *fp= fopen(secretfile, "rb"); + fseek(fp, 0L, SEEK_END); + long file_size= ftell(fp); + rewind(fp); + size_t bytes_to_read= ((maxSize >= (size_t) file_size) ? (size_t) file_size : + maxSize); + bytes_to_read= fread(buf, 1, bytes_to_read, fp); + if (memcmp(buf, strMAGIC, magicSize)) + { + bytes_to_read= (bytes_to_read>MAX_SECRET_SIZE) ? MAX_SECRET_SIZE : + bytes_to_read; + memcpy(secret, buf, bytes_to_read); + secret[bytes_to_read]= '\0'; + } + else + { + unsigned char salt[magicSize]; + unsigned char *key= new unsigned char[keySize32]; + unsigned char *iv= new unsigned char[ivSize16]; + memcpy(&salt, buf + magicSize, magicSize); + memcpy(_initPwd, initialPwd, strlen(initialPwd)); + _initPwd[strlen(initialPwd)]= '\0'; + my_bytes_to_key((unsigned char *) salt, _initPwd, key, iv); + uint32 d_size= 0; + my_aes_decrypt_dynamic_type func= get_aes_decrypt_func(MY_AES_ALGORITHM_CBC); + int re= (* func)((const uchar*)buf + 2 * magicSize, + bytes_to_read - 2 * magicSize, + (uchar*)secret, &d_size, (const uchar*)key, keySize32, + iv, ivSize16, 0); + if (re) + d_size= 0; + if (d_size>EncKeys::MAX_SECRET_SIZE) + { + d_size= EncKeys::MAX_SECRET_SIZE; + } + secret[d_size]= '\0'; + delete[] key; + delete[] iv; + } + free(buf); + free(_initPwd); + fclose(fp); +} + + +/** + * Returns a struct keyentry with the asked 'id' or NULL. + */ +keyentry *EncKeys::getKeys(int id) +{ + if (KEY_MIN <= id && KEY_MAX >= id && (&keys[id - 1])->iv) + { + return &keys[id - 1]; + } +#ifndef DBUG_OFF + else + { + sql_print_error(errorNoKeyId, id); + } +#endif + return NULL; +} + +/** + Get the keys from the key file <filename> and decrypt it with the + key <secret>. Store the keys with id smaller then <maxKeyId> in an + array of structs keyentry. Returns NO_ERROR_PARSE_OK or an + appropriate error code. + */ + +int EncKeys::parseFile(const char* filename, const uint32 maxKeyId, + const char *secret) +{ + int errorCode= 0; + char *buffer= decryptFile(filename, secret, &errorCode); + uint32 id= 0; + + if (errorCode != NO_ERROR_PARSE_OK) + return errorCode; + errorCode= NO_ERROR_KEY_FILE_PARSE_OK; + + char *line= strtok(buffer, newLine); + while (NULL != line) + { + keyLineInKeyFile++; + switch (parseLine(line, maxKeyId)) { + case NO_ERROR_PARSE_OK: + id= oneKey->id; + keys[oneKey->id - 1]= *oneKey; + delete(oneKey); + countKeys++; + break; + case ERROR_ID_TOO_BIG: + sql_print_error(errorExceedKeySize, KEY_MAX, + keyLineInKeyFile); + sql_print_error(" ---> %s\n", line); + errorCode= ERROR_KEY_FILE_EXCEEDS_MAX_NUMBERS_OF_KEYS; + break; + case ERROR_NOINITIALIZEDKEY: + sql_print_error(errorNoInitializedKey); + sql_print_error(" ----> %s\n", line); + errorCode= ERROR_KEY_FILE_PARSE_NULL; + break; + case ERROR_WRONG_NUMBER_OF_MATCHES: + sql_print_error(errorInMatches, keyLineInKeyFile); + sql_print_error(" -----> %s\n", line); + errorCode= ERROR_KEY_FILE_PARSE_NULL; + break; + case NO_ERROR_KEY_GREATER_THAN_ASKED: + sql_print_error("No asked key in line %u: %s\n", + keyLineInKeyFile, line); + break; + case NO_ERROR_ISCOMMENT: + sql_print_error("Is comment in line %u: %s\n", + keyLineInKeyFile, line); + default: + break; + } + line= strtok(NULL, newLine); + } + + free(line); + line= NULL; + delete[] buffer; + buffer= NULL; + return errorCode; +} + + +int EncKeys::parseLine(const char *line, const uint32 maxKeyId) +{ + int ret= NO_ERROR_PARSE_OK; + if (isComment(line)) + ret= NO_ERROR_ISCOMMENT; + else + { + const char *error_p= NULL; + int offset; + pcre *pattern= pcre_compile( + "([0-9]+);([0-9,a-f,A-F]{32});([0-9,a-f,A-F]{64}|[0-9,a-f,A-F]{48}|[0-9,a-f,A-F]{32})", + 0, &error_p, &offset, NULL); + if (NULL != error_p) + sql_print_error("Error: %s\nOffset: %d\n", error_p, offset); + + int m_len= (int) strlen(line), ovector[MAX_OFFSETS_IN_PCRE_PATTERNS]; + int rc= pcre_exec(pattern, NULL, line, m_len, 0, 0, ovector, + MAX_OFFSETS_IN_PCRE_PATTERNS); + pcre_free(pattern); + if (4 == rc) + { + char lin[MAX_KEY_LINE_SIZE + 1]; + strncpy(lin, line, MAX_KEY_LINE_SIZE); + lin[MAX_KEY_LINE_SIZE]= '\0'; + char *substring_start= lin + ovector[2]; + int substr_length= ovector[3] - ovector[2]; + if (3 < substr_length) + ret= ERROR_ID_TOO_BIG; + else + { + char buffer[4]; + sprintf(buffer, "%.*s", substr_length, substring_start); + uint32 id= atoi(buffer); + if (0 == id) ret= ERROR_NOINITIALIZEDKEY; + else if (KEY_MAX < id) ret= ERROR_ID_TOO_BIG; + else if (maxKeyId < id) ret= NO_ERROR_KEY_GREATER_THAN_ASKED; + else + { + oneKey= new keyentry; + oneKey->id= id; + substring_start= lin + ovector[4]; + substr_length= ovector[5] - ovector[4]; + oneKey->iv= new char[substr_length + 1]; + sprintf(oneKey->iv, "%.*s", substr_length, substring_start); + substring_start= lin + ovector[6]; + substr_length= ovector[7] - ovector[6]; + oneKey->key= new char[substr_length + 1]; + sprintf(oneKey->key, "%.*s", substr_length, substring_start); + } + } + } + else + ret= ERROR_WRONG_NUMBER_OF_MATCHES; + } + return ret; +} + +/** + Decrypt the key file 'filename' if it is encrypted with the key + 'secret'. Store the content of the decrypted file in 'buffer'. The + buffer has to be freed in the calling function. + */ + +char* EncKeys::decryptFile(const char* filename, const char *secret, + int *errorCode) +{ + *errorCode= NO_ERROR_PARSE_OK; + FILE *fp= fopen(filename, "rb"); + if (NULL == fp) + { + sql_print_error(errorOpenFile, filename); + *errorCode= ERROR_OPEN_FILE; + return NULL; + } + + if (fseek(fp, 0L, SEEK_END)) + { + *errorCode= ERROR_READING_FILE; + return NULL; + } + long file_size= ftell(fp); // get the file size + if (MAX_KEY_FILE_SIZE < file_size) + { + sql_print_error(errorExceedKeyFileSize, filename, MAX_KEY_FILE_SIZE); + *errorCode= ERROR_KEY_FILE_TOO_BIG; + fclose(fp); + return NULL; + } + else if (-1L == file_size) + { + sql_print_error(errorFileSize, filename); + *errorCode= ERROR_READING_FILE; + return NULL; + } + + rewind(fp); + //Read file into buffer + uchar *buffer= new uchar[file_size + 1]; + file_size= fread(buffer, 1, file_size, fp); + buffer[file_size]= '\0'; + fclose(fp); + //Check for file encryption + if (0 == memcmp(buffer, strMAGIC, magicSize)) + { + //If file is encrypted, decrypt it first. + unsigned char salt[magicSize]; + unsigned char *key= new unsigned char[keySize32]; + unsigned char *iv= new unsigned char[ivSize16]; + uchar *decrypted= new uchar[file_size]; + memcpy(&salt, buffer + magicSize, magicSize); + my_bytes_to_key((unsigned char *) salt, secret, key, iv); + uint32 d_size= 0; + my_aes_decrypt_dynamic_type func= get_aes_decrypt_func(MY_AES_ALGORITHM_CBC); + int res= (* func)((const uchar*)buffer + 2 * magicSize, + file_size - 2 * magicSize, + decrypted, &d_size, (const uchar*) key, keySize32, + iv, ivSize16, 0); + if(0 != res) + { + *errorCode= ERROR_FALSE_FILE_KEY; + delete[] buffer; buffer= NULL; + sql_print_error(errorFalseFileKey, filename); + } + else + { + memcpy(buffer, decrypted, d_size); + buffer[d_size]= '\0'; + } + + delete[] decrypted; decrypted= NULL; + delete[] key; key= NULL; + delete[] iv; iv= NULL; + } + return (char*) buffer; +} + +bool EncKeys::isComment(const char *line) +{ + const char *error_p; + int offset, m_len= (int) strlen(line), + ovector[MAX_OFFSETS_IN_PCRE_PATTERNS]; + pcre *pattern= pcre_compile("\\s*#.*", 0, &error_p, &offset, NULL); + int rc= pcre_exec(pattern, NULL, line, m_len, 0, 0, ovector, + MAX_OFFSETS_IN_PCRE_PATTERNS); + pcre_free(pattern); + return (rc >= 0); +} + + +void EncKeys::printKeyEntry(uint32 id) +{ +#ifndef DBUG_OFF + keyentry *entry= getKeys(id); + if (NULL == entry) + { + sql_print_error("No such keyID: %u\n",id); + } + else + { + sql_print_error("Key: id: %3u\tiv:%lu bytes\tkey:%lu bytes\n", + entry->id, strlen(entry->iv)/2, strlen(entry->key)/2); + } +#endif /* DBUG_OFF */ +} diff --git a/plugin/file_key_management_plugin/EncKeys.h b/plugin/file_key_management_plugin/EncKeys.h new file mode 100644 index 00000000000..c0ab98b8e9d --- /dev/null +++ b/plugin/file_key_management_plugin/EncKeys.h @@ -0,0 +1,87 @@ +/* Copyright (C) 2014 eperi GmbH. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +/******************************************************************//** +@file EncKeys.h +A structure and class to keep keys for encryption/decryption. + +Created 09/15/2014 +***********************************************************************/ + +#ifndef ENCKEYS_H_ +#define ENCKEYS_H_ + +#include <my_global.h> +#include <sys/types.h> +#include <stdio.h> + + + + + +struct keyentry { + uint32 id; + char *iv; + char *key; +}; + + +class EncKeys +{ +private: + static const char *strMAGIC, *newLine; + static const int magicSize; + + enum constants { MAX_OFFSETS_IN_PCRE_PATTERNS = 30}; + enum keyAttributes { KEY_MIN = 1, KEY_MAX = 255, MAX_KEYS = 255, + MAX_IVLEN = 256, MAX_KEYLEN = 512, ivSize16 = 16, keySize32 = 32 }; + enum keyInitType { KEYINITTYPE_FILE = 1, KEYINITTYPE_SERVER = 2 }; + enum errorAttributes { MAX_KEY_LINE_SIZE = 3 * MAX_KEYLEN, MAX_KEY_FILE_SIZE = 1048576 }; + enum errorCodesLine { NO_ERROR_PARSE_OK = 0, NO_ERROR_ISCOMMENT = 10, NO_ERROR_KEY_GREATER_THAN_ASKED = 20, + ERROR_NOINITIALIZEDKEY = 30, ERROR_ID_TOO_BIG = 40, ERROR_WRONG_NUMBER_OF_MATCHES = 50, + ERROR_EQUAL_DOUBLE_KEY = 60, ERROR_UNEQUAL_DOUBLE_KEY = 70 }; + + static const char *errorNoKeyId, *errorInMatches, *errorExceedKeyFileSize, + *errorExceedKeySize, *errorEqualDoubleKey, *errorUnequalDoubleKey, + *errorNoInitializedKey, *errorFalseFileKey, + *errorNotImplemented, *errorOpenFile, *errorReadingFile, *errorFileSize; + + static const char* initialPwd; + uint32 countKeys, keyLineInKeyFile; + keyentry keys[MAX_KEYS], *oneKey; + + void printKeyEntry( uint32 id); + bool isComment( const char *line); + char * decryptFile( const char* filename, const char *secret, int *errorCode); + int parseFile( const char* filename, const uint32 maxKeyId, const char *secret); + int parseLine( const char *line, const uint32 maxKeyId); + +public: + static const size_t MAX_SECRET_SIZE = 256; + + enum errorCodesFile { NO_ERROR_KEY_FILE_PARSE_OK = 0, ERROR_KEY_FILE_PARSE_NULL = 110, + ERROR_KEY_FILE_TOO_BIG = 120, ERROR_KEY_FILE_EXCEEDS_MAX_NUMBERS_OF_KEYS = 130, + ERROR_OPEN_FILE = 140, ERROR_READING_FILE = 150, ERROR_FALSE_FILE_KEY = 160, + ERROR_KEYINITTYPE_SERVER_NOT_IMPLEMENTED = 170, ERROR_ENCRYPTION_SECRET_NULL = 180 }; + EncKeys(); + virtual ~EncKeys(); + bool initKeys( const char *filename, const char *filekey); + keyentry *getKeys( int id); + /* made public for unit testing */ + static void parseSecret( const char *filename, char *secret ); + +}; + +#endif /* ENCKEYS_H_ */ diff --git a/plugin/file_key_management_plugin/KeySingleton.cc b/plugin/file_key_management_plugin/KeySingleton.cc new file mode 100644 index 00000000000..936a7cf71a9 --- /dev/null +++ b/plugin/file_key_management_plugin/KeySingleton.cc @@ -0,0 +1,64 @@ +/* Copyright (C) 2014 eperi GmbH. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +/******************************************************************//** +@file KeySingleton.cc +Implementation of single pattern to keep keys for encrypting/decrypting pages. + +Created 09/13/2014 +***********************************************************************/ + + +#include <my_global.h> +#include <sql_class.h> +#include "KeySingleton.h" + +bool KeySingleton::instanceInited = false; +KeySingleton KeySingleton::theInstance; +EncKeys KeySingleton::encKeys; + +KeySingleton & KeySingleton::getInstance() +{ +#ifndef DBUG_OFF + if( !instanceInited) + { + sql_print_error("Encryption / decryption keys were not initialized. " + "You can not read encrypted tables or columns\n"); + } +#endif /* DBUG_OFF */ + return theInstance; +} + +KeySingleton & KeySingleton::getInstance(const char *filename, + const char *filekey) +{ + if (!instanceInited) + { + if (!(instanceInited = encKeys.initKeys(filename, filekey))) + sql_print_error("Could not initialize any of the encryption / " + "decryption keys. You can not read encrypted tables"); + } + return theInstance; +} + +keyentry *KeySingleton::getKeys(int id) +{ + return encKeys.getKeys(id); +} + +bool KeySingleton::hasKey(int id) +{ + return encKeys.getKeys(id) != NULL; +} diff --git a/plugin/file_key_management_plugin/KeySingleton.h b/plugin/file_key_management_plugin/KeySingleton.h new file mode 100644 index 00000000000..37246c1b802 --- /dev/null +++ b/plugin/file_key_management_plugin/KeySingleton.h @@ -0,0 +1,58 @@ +/* Copyright (C) 2014 eperi GmbH. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +/******************************************************************//** +@file KeySingletonPattern.h +Implementation of single pattern to keep keys for encrypting/decrypting pages. + +Created 09/13/2014 +***********************************************************************/ + + +#ifndef KEYSINGLETON_H_ +#define KEYSINGLETON_H_ + +#include "EncKeys.h" + + +class KeySingleton +{ +private: + static bool instanceInited; + static KeySingleton theInstance; + static EncKeys encKeys; + + // No new instance or object possible + KeySingleton() {} + + // No new instance possible through copy constructor + KeySingleton( const KeySingleton&) {} + + // No new instance possible through copy + KeySingleton & operator = (const KeySingleton&); + +public: + virtual ~KeySingleton() {encKeys.~EncKeys();} + static KeySingleton& getInstance(); + // Init the instance for only one time + static KeySingleton& getInstance(const char *filename, const char *filekey); + keyentry *getKeys(int id); + bool hasKey(int id); + static bool isAvailable() { + return instanceInited; + } +}; + +#endif /* KEYSINGLETON_H_ */ diff --git a/plugin/file_key_management_plugin/file_key_management_plugin.cc b/plugin/file_key_management_plugin/file_key_management_plugin.cc new file mode 100644 index 00000000000..11892d74385 --- /dev/null +++ b/plugin/file_key_management_plugin/file_key_management_plugin.cc @@ -0,0 +1,201 @@ +/* Copyright (c) 2002, 2012, eperi GmbH. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + + +#include <my_global.h> +#include <mysql_version.h> +#include <my_aes.h> +#include <my_crypt_key_management.h> +#include "sql_class.h" +#include "KeySingleton.h" +#include "EncKeys.h" + +/* Encryption for tables and columns */ +static char* filename = NULL; +static char* filekey = NULL; + +static MYSQL_SYSVAR_STR(filename, filename, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Path and name of the key file.", + NULL, NULL, NULL); + +static MYSQL_SYSVAR_STR(filekey, filekey, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Key to encrypt / decrypt the keyfile.", + NULL, NULL, NULL); + +static struct st_mysql_sys_var* settings[] = { + MYSQL_SYSVAR(filename), + MYSQL_SYSVAR(filekey), + NULL +}; + + + +/** + This method is using with the id 0 if exists. + This method is used by innobase/xtradb for the key + rotation feature of encrypting log files. +*/ + +static int get_highest_key_used_in_key_file() +{ + if (KeySingleton::getInstance().hasKey(0)) + { + return 0; + } + else + return CRYPT_KEY_UNKNOWN; +} + +static unsigned int has_key_from_key_file(unsigned int keyID) +{ + keyentry* entry = KeySingleton::getInstance().getKeys(keyID); + + return entry != NULL; +} + +static int get_key_size_from_key_file(unsigned int keyID) +{ + keyentry* entry = KeySingleton::getInstance().getKeys(keyID); + + if (entry != NULL) + { + char* keyString = entry->key; + size_t key_len = strlen(keyString)/2; + + return key_len; + } + else + { + return CRYPT_KEY_UNKNOWN; + } +} + +static int get_key_from_key_file(unsigned int keyID, unsigned char* dstbuf, + unsigned buflen) +{ + keyentry* entry = KeySingleton::getInstance().getKeys((int)keyID); + + if (entry != NULL) + { + char* keyString = entry->key; + size_t key_len = strlen(keyString)/2; + + if (buflen < key_len) + { + return CRYPT_BUFFER_TO_SMALL; + } + + my_aes_hex2uint(keyString, (unsigned char*)dstbuf, key_len); + + return CRYPT_KEY_OK; + } + else + { + return CRYPT_KEY_UNKNOWN; + } +} + +static int get_iv_from_key_file(unsigned int keyID, unsigned char* dstbuf, + unsigned buflen) +{ + keyentry* entry = KeySingleton::getInstance().getKeys((int)keyID); + + if (entry != NULL) + { + char* ivString = entry->iv; + size_t iv_len = strlen(ivString)/2; + + if (buflen < iv_len) + { + return CRYPT_BUFFER_TO_SMALL; + } + + my_aes_hex2uint(ivString, (unsigned char*)dstbuf, iv_len); + + return CRYPT_KEY_OK; + } + else + { + return CRYPT_KEY_UNKNOWN; + } +} + + +static int file_key_management_plugin_init(void *p) +{ + /* init */ + + if (current_aes_dynamic_method == MY_AES_ALGORITHM_NONE) + { + sql_print_error("No encryption method choosen with --encryption-algorithm. " + "file_key_management_plugin disabled"); + return 1; + } + + /* Initializing the key provider */ + struct CryptoKeyFuncs_t func; + func.getLatestCryptoKeyVersionFunc = get_highest_key_used_in_key_file; + func.hasCryptoKeyFunc = has_key_from_key_file; + func.getCryptoKeySize = get_key_size_from_key_file; + func.getCryptoKeyFunc = get_key_from_key_file; + func.getCryptoIVFunc = get_iv_from_key_file; + + InstallCryptoKeyFunctions(&func); + + if (filename == NULL || strcmp("", filename) == 0) + { + sql_print_error("Parameter file_key_management_plugin_filename is required"); + + return 1; + } + + KeySingleton::getInstance(filename, filekey); + + return 0; +} + +static int file_key_management_plugin_deinit(void *p) +{ + KeySingleton::getInstance().~KeySingleton(); + + return 0; +} + +struct st_mysql_daemon file_key_management_plugin= { + MYSQL_DAEMON_INTERFACE_VERSION +}; + +/* + Plugin library descriptor +*/ +maria_declare_plugin(file_key_management_plugin) +{ + MYSQL_KEY_MANAGEMENT_PLUGIN, + &file_key_management_plugin, + "file_key_management_plugin", + "Denis Endro eperi GmbH", + "File key management plugin", + PLUGIN_LICENSE_GPL, + file_key_management_plugin_init, /* Plugin Init */ + file_key_management_plugin_deinit, /* Plugin Deinit */ + 0x0100 /* 1.0 */, + NULL, /* status variables */ + settings, + "1.0", + MariaDB_PLUGIN_MATURITY_UNKNOWN +} +maria_declare_plugin_end; diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc index 25d5613669d..50e29aa4b7c 100644 --- a/sql/item_strfunc.cc +++ b/sql/item_strfunc.cc @@ -383,8 +383,8 @@ String *Item_func_aes_encrypt::val_str(String *str) if (!str_value.alloc(aes_length)) // Ensure that memory is free { // finally encrypt directly to allocated buffer. - if (my_aes_encrypt(sptr->ptr(),sptr->length(), (char*) str_value.ptr(), - key->ptr(), key->length()) == aes_length) + if (my_aes_encrypt((const uchar*) sptr->ptr(), sptr->length(), (uchar*) str_value.ptr(), + key->ptr(), key->length()) == aes_length) { // We got the expected result length str_value.length((uint) aes_length); @@ -420,8 +420,8 @@ String *Item_func_aes_decrypt::val_str(String *str) { // finally decrypt directly to allocated buffer. int length; - length=my_aes_decrypt(sptr->ptr(), sptr->length(), - (char*) str_value.ptr(), + length=my_aes_decrypt((const uchar*)sptr->ptr(), sptr->length(), + (uchar*) str_value.ptr(), key->ptr(), key->length()); if (length >= 0) // if we got correct data data { diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 885d37d1cd7..da02b952f3c 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -105,6 +105,7 @@ #include "sp_rcontext.h" #include "sp_cache.h" #include "sql_reload.h" // reload_acl_and_cache +#include <my_aes.h> #ifdef HAVE_POLL_H #include <poll.h> @@ -627,6 +628,9 @@ char server_version[SERVER_VERSION_LENGTH]; char *mysqld_unix_port, *opt_mysql_tmpdir; ulong thread_handling; +my_bool encrypt_tmp_disk_tables; +ulong encryption_algorithm; + /** name of reference on left expression in rewritten IN subquery */ const char *in_left_expr_name= "<left expr>"; /** name of additional condition */ @@ -4797,6 +4801,14 @@ static int init_server_components() my_rnd_init(&sql_rand,(ulong) server_start_time,(ulong) server_start_time/2); setup_fpu(); init_thr_lock(); + if (my_aes_init_dynamic_encrypt((enum_my_aes_encryption_algorithm) + encryption_algorithm)) + { + fprintf(stderr, "Can't initialize encryption algorithm to \"%s\".\nCheck that the program is linked with the right library (openssl?)\n", + encryption_algorithm_names[encryption_algorithm]); + unireg_abort(1); + } + #ifndef EMBEDDED_LIBRARY if (init_thr_timer(thread_scheduler->max_threads + extra_max_connections)) { diff --git a/sql/mysqld.h b/sql/mysqld.h index 6a1ad65bd67..71d586cc4c1 100644 --- a/sql/mysqld.h +++ b/sql/mysqld.h @@ -249,6 +249,9 @@ extern ulong connection_errors_internal; extern ulong connection_errors_max_connection; extern ulong connection_errors_peer_addr; extern ulong log_warnings; +extern my_bool encrypt_tmp_disk_tables; +extern ulong encryption_algorithm; +extern const char *encryption_algorithm_names[]; /* THR_MALLOC is a key which will be used to set/get MEM_ROOT** for a thread, diff --git a/sql/sql_plugin.cc b/sql/sql_plugin.cc index 30f840301ba..8fce66ae19f 100644 --- a/sql/sql_plugin.cc +++ b/sql/sql_plugin.cc @@ -1233,9 +1233,24 @@ static void reap_plugins(void) mysql_mutex_unlock(&LOCK_plugin); + /* + First free all normal plugins, last the key management plugin. + This is becasue the storage engines may need the key management plugin + during deinitialization. + */ + list= reap; + while ((plugin= *(--list))) + { + if (plugin->plugin->type != MYSQL_KEY_MANAGEMENT_PLUGIN) + plugin_deinitialize(plugin, true); + } + list= reap; while ((plugin= *(--list))) - plugin_deinitialize(plugin, true); + { + if (plugin->state != PLUGIN_IS_UNINITIALIZED) + plugin_deinitialize(plugin, true); + } mysql_mutex_lock(&LOCK_plugin); @@ -1481,7 +1496,7 @@ static void init_plugin_psi_keys(void) */ int plugin_init(int *argc, char **argv, int flags) { - uint i; + uint i,j; bool is_myisam; struct st_maria_plugin **builtins; struct st_maria_plugin *plugin; @@ -1631,16 +1646,22 @@ int plugin_init(int *argc, char **argv, int flags) reap= (st_plugin_int **) my_alloca((plugin_array.elements+1) * sizeof(void*)); *(reap++)= NULL; - for (i= 0; i < plugin_array.elements; i++) + /* first MYSQL_KEY_MANAGEMENT_PLUGIN, then the rest */ + for (j= 0 ; j <= 1; j++) { - plugin_ptr= *dynamic_element(&plugin_array, i, struct st_plugin_int **); - if (plugin_ptr->plugin_dl && plugin_ptr->state == PLUGIN_IS_UNINITIALIZED) + for (i= 0; i < plugin_array.elements; i++) { - if (plugin_initialize(&tmp_root, plugin_ptr, argc, argv, - (flags & PLUGIN_INIT_SKIP_INITIALIZATION))) + plugin_ptr= *dynamic_element(&plugin_array, i, struct st_plugin_int **); + if (((j == 0 && plugin->type == MYSQL_KEY_MANAGEMENT_PLUGIN) || j > 0) && + plugin_ptr->plugin_dl && + plugin_ptr->state == PLUGIN_IS_UNINITIALIZED) { - plugin_ptr->state= PLUGIN_IS_DYING; - *(reap++)= plugin_ptr; + if (plugin_initialize(&tmp_root, plugin_ptr, argc, argv, + (flags & PLUGIN_INIT_SKIP_INITIALIZATION))) + { + plugin_ptr->state= PLUGIN_IS_DYING; + *(reap++)= plugin_ptr; + } } } } diff --git a/sql/sql_plugin.h b/sql/sql_plugin.h index 9cfdba1edcd..2bfdcb29dfc 100644 --- a/sql/sql_plugin.h +++ b/sql/sql_plugin.h @@ -78,8 +78,6 @@ typedef struct st_mysql_show_var SHOW_VAR; #define PLUGIN_IS_DYING 16 #define PLUGIN_IS_DISABLED 32 -/* A handle for the dynamic library containing a plugin or plugins. */ - struct st_ptr_backup { void **ptr; void *value; @@ -88,6 +86,8 @@ struct st_ptr_backup { void restore() { *ptr= value; } }; +/* A handle for the dynamic library containing a plugin or plugins. */ + struct st_plugin_dl { LEX_STRING dl; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 87e2206cfb5..9d85c2646be 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -16886,6 +16886,7 @@ bool create_internal_tmp_table(TABLE *table, KEY *keyinfo, MARIA_UNIQUEDEF uniquedef; TABLE_SHARE *share= table->s; MARIA_CREATE_INFO create_info; + my_bool encrypt= encrypt_tmp_disk_tables; DBUG_ENTER("create_internal_tmp_table"); if (share->keys) @@ -16988,24 +16989,56 @@ bool create_internal_tmp_table(TABLE *table, KEY *keyinfo, delete the row. The cases when this can happen is when there is a group by and no sum functions or if distinct is used. */ - if ((error= maria_create(share->table_name.str, - table->no_rows ? NO_RECORD : - (share->reclength < 64 && - !share->blob_fields ? STATIC_RECORD : - table->used_for_duplicate_elimination || - table->keep_row_order ? - DYNAMIC_RECORD : BLOCK_RECORD), - share->keys, &keydef, - (uint) (*recinfo-start_recinfo), - start_recinfo, - share->uniques, &uniquedef, - &create_info, - HA_CREATE_TMP_TABLE | HA_CREATE_INTERNAL_TABLE))) { - table->file->print_error(error,MYF(0)); /* purecov: inspected */ - table->db_stat=0; - goto err; + enum data_file_type file_type= table->no_rows ? NO_RECORD : + (share->reclength < 64 && !share->blob_fields ? STATIC_RECORD : + table->used_for_duplicate_elimination || table->keep_row_order ? + DYNAMIC_RECORD : BLOCK_RECORD); + uint create_flags= HA_CREATE_TMP_TABLE | HA_CREATE_INTERNAL_TABLE; + + if (file_type != NO_RECORD && MY_TEST(encrypt)) + { + /* encryption is only supported for BLOCK_RECORD */ + file_type= BLOCK_RECORD; + create_flags|= HA_CREATE_ENCRYPTED; + if (table->keep_row_order) + { + create_flags|= HA_INSERT_ORDER; + } + + if (table->used_for_duplicate_elimination) + { + /* + sql-layer expect the last column to be stored/restored also + when it's null. + + This is probably a bug (that sql-layer doesn't annotate + the column as not-null) but both heap, aria-static, aria-dynamic and + myisam has this property. aria-block_record does not since it + does not store null-columns at all. + Emulate behaviour by making column not-nullable when creating the + table. + */ + uint cols= (*recinfo-start_recinfo); + start_recinfo[cols-1].null_bit= 0; + } + } + + if ((error= maria_create(share->table_name.str, + file_type, + share->keys, &keydef, + (uint) (*recinfo-start_recinfo), + start_recinfo, + share->uniques, &uniquedef, + &create_info, + create_flags))) + { + table->file->print_error(error,MYF(0)); /* purecov: inspected */ + table->db_stat=0; + goto err; + } } + table->in_use->inc_status_created_tmp_disk_tables(); table->in_use->query_plan_flags|= QPLAN_TMP_DISK; share->db_record_offset= 1; diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 1a4628d0d72..e7092354606 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -62,6 +62,7 @@ #include "sql_repl.h" #include "opt_range.h" #include "rpl_parallel.h" +#include "my_crypt_key_management.h" /* The rule for this file: everything should be 'static'. When a sys_var @@ -1124,6 +1125,26 @@ static Sys_var_mybool Sys_log_bin( "log_bin", "Whether the binary log is enabled", READ_ONLY GLOBAL_VAR(opt_bin_log), NO_CMD_LINE, DEFAULT(FALSE)); + +#ifndef DBUG_OFF +static Sys_var_mybool Sys_debug_use_static_keys( + "debug_use_static_encryption_keys", + "Enable use of nonrandom encryption keys. Only to be used in " + "internal testing", + READ_ONLY GLOBAL_VAR(debug_use_static_encryption_keys), + CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +static PolyLock_rwlock PLock_sys_debug_encryption_key_version( + &LOCK_dbug_encryption_key_version); + +static Sys_var_uint Sys_debug_encryption_key_version( + "debug_encryption_key_version", + "Encryption key version. Only to be used in internal testing.", + GLOBAL_VAR(opt_debug_encryption_key_version), + CMD_LINE(REQUIRED_ARG), VALID_RANGE(0,UINT_MAX), DEFAULT(0), + BLOCK_SIZE(1), &PLock_sys_debug_encryption_key_version); +#endif + static Sys_var_mybool Sys_trust_function_creators( "log_bin_trust_function_creators", "If set to FALSE (the default), then when --log-bin is used, creation " @@ -5133,6 +5154,20 @@ static Sys_var_harows Sys_expensive_subquery_limit( SESSION_VAR(expensive_subquery_limit), CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, HA_POS_ERROR), DEFAULT(100), BLOCK_SIZE(1)); +static Sys_var_mybool Sys_encrypt_tmp_disk_tables( + "encrypt_tmp_disk_tables", + "Encrypt tmp disk tables (created as part of query execution)", + GLOBAL_VAR(encrypt_tmp_disk_tables), + CMD_LINE(OPT_ARG), DEFAULT(FALSE)); + +const char *encryption_algorithm_names[]= +{ "none", "aes_ecb", "aes_cbc", "aes_ctr", 0 }; +static Sys_var_enum Sys_encryption_algorithm( + "encryption_algorithm", + "Which encryption algorithm to use for table encryption. aes_cbc is the recommended one.", + READ_ONLY GLOBAL_VAR(encryption_algorithm),CMD_LINE(REQUIRED_ARG), + encryption_algorithm_names, DEFAULT(0)); + static bool check_pseudo_slave_mode(sys_var *self, THD *thd, set_var *var) { longlong previous_val= thd->variables.pseudo_slave_mode; diff --git a/sql/sys_vars.h b/sql/sys_vars.h index 61af931c189..2b83e9747b7 100644 --- a/sql/sys_vars.h +++ b/sql/sys_vars.h @@ -94,6 +94,7 @@ enum charset_enum {IN_SYSTEM_CHARSET, IN_FS_CHARSET}; static const char *bool_values[3]= {"OFF", "ON", 0}; TYPELIB bool_typelib={ array_elements(bool_values)-1, "", bool_values, 0 }; +extern const char *encrypt_algorithm_names[]; /** A small wrapper class to pass getopt arguments as a pair diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt index 964294a962d..e3e1e70feb7 100644 --- a/storage/innobase/CMakeLists.txt +++ b/storage/innobase/CMakeLists.txt @@ -336,6 +336,7 @@ SET(INNOBASE_SOURCES btr/btr0btr.cc btr/btr0cur.cc btr/btr0pcur.cc + btr/btr0scrub.cc btr/btr0sea.cc btr/btr0defragment.cc buf/buf0buddy.cc @@ -360,7 +361,8 @@ SET(INNOBASE_SOURCES eval/eval0eval.cc eval/eval0proc.cc fil/fil0fil.cc - fil/fil0pagecompress.cc + fil/fil0pagecompress.cc + fil/fil0crypt.cc fsp/fsp0fsp.cc fut/fut0fut.cc fut/fut0lst.cc @@ -385,6 +387,7 @@ SET(INNOBASE_SOURCES lock/lock0wait.cc log/log0log.cc log/log0recv.cc + log/log0crypt.cc mach/mach0data.cc mem/mem0mem.cc mem/mem0pool.cc @@ -469,5 +472,5 @@ ENDIF() MYSQL_ADD_PLUGIN(innobase ${INNOBASE_SOURCES} STORAGE_ENGINE MODULE_ONLY MODULE_OUTPUT_NAME ha_innodb - LINK_LIBRARIES ${ZLIB_LIBRARY} ${LINKER_SCRIPT}) + LINK_LIBRARIES ${ZLIB_LIBRARY} ${LINKER_SCRIPT} pcre pcreposix mysys_ssl) diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc index ff27b470974..92539ce1524 100644 --- a/storage/innobase/btr/btr0btr.cc +++ b/storage/innobase/btr/btr0btr.cc @@ -1137,9 +1137,27 @@ btr_page_alloc_low( reservation for free extents, and thus we know that a page can be allocated: */ - return(fseg_alloc_free_page_general( - seg_header, hint_page_no, file_direction, - TRUE, mtr, init_mtr)); + buf_block_t* block = fseg_alloc_free_page_general( + seg_header, hint_page_no, file_direction, + TRUE, mtr, init_mtr); + +#ifdef UNIV_DEBUG_SCRUBBING + if (block != NULL) { + fprintf(stderr, + "alloc %lu:%lu to index: %lu root: %lu\n", + buf_block_get_page_no(block), + buf_block_get_space(block), + index->id, + dict_index_get_page(index)); + } else { + fprintf(stderr, + "failed alloc index: %lu root: %lu\n", + index->id, + dict_index_get_page(index)); + } +#endif /* UNIV_DEBUG_SCRUBBING */ + + return block; } /**************************************************************//** @@ -1287,6 +1305,7 @@ btr_page_free_low( dict_index_t* index, /*!< in: index tree */ buf_block_t* block, /*!< in: block to be freed, x-latched */ ulint level, /*!< in: page level */ + bool blob, /*!< in: blob page */ mtr_t* mtr) /*!< in: mtr */ { fseg_header_t* seg_header; @@ -1299,6 +1318,76 @@ btr_page_free_low( buf_block_modify_clock_inc(block); btr_blob_dbg_assert_empty(index, buf_block_get_page_no(block)); + if (blob) { + ut_a(level == 0); + } + + bool scrub = srv_immediate_scrub_data_uncompressed; + /* scrub page */ + if (scrub && blob) { + /* blob page: scrub entire page */ + // TODO(jonaso): scrub only what is actually needed + page_t* page = buf_block_get_frame(block); + memset(page + PAGE_HEADER, 0, + UNIV_PAGE_SIZE - PAGE_HEADER); +#ifdef UNIV_DEBUG_SCRUBBING + fprintf(stderr, + "btr_page_free_low: scrub blob page %lu/%lu\n", + buf_block_get_space(block), + buf_block_get_page_no(block)); +#endif /* UNIV_DEBUG_SCRUBBING */ + } else if (scrub) { + /* scrub records on page */ + + /* TODO(jonaso): in theory we could clear full page + * but, since page still remains in buffer pool, and + * gets flushed etc. Lots of routines validates consistency + * of it. And in order to remain structurally consistent + * we clear each record by it own + * + * NOTE: The TODO below mentions removing page from buffer pool + * and removing redo entries, once that is done, clearing full + * pages should be possible + */ + uint cnt = 0; + uint bytes = 0; + page_t* page = buf_block_get_frame(block); + mem_heap_t* heap = NULL; + ulint* offsets = NULL; + rec_t* rec = page_rec_get_next(page_get_infimum_rec(page)); + while (!page_rec_is_supremum(rec)) { + offsets = rec_get_offsets(rec, index, + offsets, ULINT_UNDEFINED, + &heap); + uint size = rec_offs_data_size(offsets); + memset(rec, 0, size); + rec = page_rec_get_next(rec); + cnt++; + bytes += size; + } +#ifdef UNIV_DEBUG_SCRUBBING + fprintf(stderr, + "btr_page_free_low: scrub %lu/%lu - " + "%u records %u bytes\n", + buf_block_get_space(block), + buf_block_get_page_no(block), + cnt, bytes); +#endif /* UNIV_DEBUG_SCRUBBING */ + if (heap) { + mem_heap_free(heap); + } + } + +#ifdef UNIV_DEBUG_SCRUBBING + if (scrub == false) { + fprintf(stderr, + "btr_page_free_low %lu/%lu blob: %u\n", + buf_block_get_space(block), + buf_block_get_page_no(block), + blob); + } +#endif /* UNIV_DEBUG_SCRUBBING */ + if (dict_index_is_ibuf(index)) { btr_page_free_for_ibuf(index, block, mtr); @@ -1314,6 +1403,14 @@ btr_page_free_low( seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP; } + if (scrub) { + /** + * Reset page type so that scrub thread won't try to scrub it + */ + mlog_write_ulint(buf_block_get_frame(block) + FIL_PAGE_TYPE, + FIL_PAGE_TYPE_ALLOCATED, MLOG_2BYTES, mtr); + } + fseg_free_page(seg_header, buf_block_get_space(block), buf_block_get_page_no(block), mtr); @@ -1343,7 +1440,7 @@ btr_page_free( ulint level = btr_page_get_level(page, mtr); ut_ad(fil_page_get_type(block->frame) == FIL_PAGE_INDEX); - btr_page_free_low(index, block, level, mtr); + btr_page_free_low(index, block, level, false, mtr); } /**************************************************************//** @@ -2284,9 +2381,14 @@ btr_root_raise_and_insert( ibuf_reset_free_bits(new_block); } - /* Reposition the cursor to the child node */ - page_cur_search(new_block, index, tuple, - PAGE_CUR_LE, page_cursor); + if (tuple != NULL) { + /* Reposition the cursor to the child node */ + page_cur_search(new_block, index, tuple, + PAGE_CUR_LE, page_cursor); + } else { + /* Set cursor to first record on child node */ + page_cur_set_before_first(new_block, page_cursor); + } /* Split the child and insert tuple */ return(btr_page_split_and_insert(flags, cursor, offsets, heap, @@ -2962,6 +3064,9 @@ function must always succeed, we cannot reverse it: therefore enough free disk space (2 pages) must be guaranteed to be available before this function is called. +NOTE: jonaso added support for calling function with tuple == NULL +which cause it to only split a page. + @return inserted record */ UNIV_INTERN rec_t* @@ -3039,7 +3144,7 @@ func_start: half-page */ insert_left = FALSE; - if (n_iterations > 0) { + if (tuple != NULL && n_iterations > 0) { direction = FSP_UP; hint_page_no = page_no + 1; split_rec = btr_page_get_split_rec(cursor, tuple, n_ext); @@ -3100,7 +3205,12 @@ func_start: *offsets = rec_get_offsets(split_rec, cursor->index, *offsets, n_uniq, heap); - insert_left = cmp_dtuple_rec(tuple, split_rec, *offsets) < 0; + if (tuple != NULL) { + insert_left = cmp_dtuple_rec( + tuple, split_rec, *offsets) < 0; + } else { + insert_left = 1; + } if (!insert_left && new_page_zip && n_iterations > 0) { /* If a compressed page has already been split, @@ -3134,8 +3244,10 @@ insert_empty: on the appropriate half-page, we may release the tree x-latch. We can then move the records after releasing the tree latch, thus reducing the tree latch contention. */ - - if (split_rec) { + if (tuple == NULL) { + insert_will_fit = 1; + } + else if (split_rec) { insert_will_fit = !new_page_zip && btr_page_insert_fits(cursor, split_rec, offsets, tuple, n_ext, heap); @@ -3256,6 +3368,11 @@ insert_empty: /* 6. The split and the tree modification is now completed. Decide the page where the tuple should be inserted */ + if (tuple == NULL) { + rec = NULL; + goto func_exit; + } + if (insert_left) { insert_block = left_block; } else { @@ -3343,6 +3460,9 @@ func_exit: ut_ad(page_validate(buf_block_get_frame(left_block), cursor->index)); ut_ad(page_validate(buf_block_get_frame(right_block), cursor->index)); + if (tuple == NULL) { + ut_ad(rec == NULL); + } ut_ad(!rec || rec_offs_validate(rec, cursor->index, *offsets)); return(rec); } diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index e68179a5c19..3992eda6e1c 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -4738,11 +4738,11 @@ alloc_another: change when B-tree nodes are split or merged. */ mlog_write_ulint(page - + FIL_PAGE_FILE_FLUSH_LSN, + + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, space_id, MLOG_4BYTES, &mtr); mlog_write_ulint(page - + FIL_PAGE_FILE_FLUSH_LSN + 4, + + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 4, rec_page_no, MLOG_4BYTES, &mtr); @@ -4750,9 +4750,10 @@ alloc_another: memset(page + page_zip_get_size(page_zip) - c_stream.avail_out, 0, c_stream.avail_out); - mlog_log_string(page + FIL_PAGE_FILE_FLUSH_LSN, + mlog_log_string(page + + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, page_zip_get_size(page_zip) - - FIL_PAGE_FILE_FLUSH_LSN, + - FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, &mtr); /* Copy the page to compressed storage, because it will be flushed to disk @@ -4925,7 +4926,7 @@ func_exit: ut_ad(btr_blob_op_is_update(op)); for (i = 0; i < n_freed_pages; i++) { - btr_page_free_low(index, freed_pages[i], 0, alloc_mtr); + btr_page_free_low(index, freed_pages[i], 0, true, alloc_mtr); } DBUG_EXECUTE_IF("btr_store_big_rec_extern", @@ -5163,7 +5164,7 @@ btr_free_externally_stored_field( } next_page_no = mach_read_from_4(page + FIL_PAGE_NEXT); - btr_page_free_low(index, ext_block, 0, &mtr); + btr_page_free_low(index, ext_block, 0, true, &mtr); if (page_zip != NULL) { mach_write_to_4(field_ref + BTR_EXTERN_PAGE_NO, @@ -5194,7 +5195,7 @@ btr_free_externally_stored_field( because we did not store it on the page (we save the space overhead from an index page header. */ - btr_page_free_low(index, ext_block, 0, &mtr); + btr_page_free_low(index, ext_block, 0, true, &mtr); mlog_write_ulint(field_ref + BTR_EXTERN_PAGE_NO, next_page_no, diff --git a/storage/innobase/btr/btr0scrub.cc b/storage/innobase/btr/btr0scrub.cc new file mode 100644 index 00000000000..d53b478e429 --- /dev/null +++ b/storage/innobase/btr/btr0scrub.cc @@ -0,0 +1,898 @@ +// Copyright (c) 2014, Google Inc. + +/**************************************************//** +@file btr/btr0scrub.cc +Scrubbing of btree pages + +*******************************************************/ + +#include "btr0btr.h" +#include "btr0cur.h" +#include "btr0scrub.h" +#include "ibuf0ibuf.h" +#include "fsp0fsp.h" +#include "dict0dict.h" +#include "mtr0mtr.h" + +/* used when trying to acquire dict-lock */ +UNIV_INTERN bool fil_crypt_is_closing(ulint space); + +/** +* scrub data at delete time (e.g purge thread) +*/ +my_bool srv_immediate_scrub_data_uncompressed = false; + +/** +* background scrub uncompressed data +* +* if srv_immediate_scrub_data_uncompressed is enabled +* this is only needed to handle "old" data +*/ +my_bool srv_background_scrub_data_uncompressed = false; + +/** +* backgrounds scrub compressed data +* +* reorganize compressed page for scrubbing +* (only way to scrub compressed data) +*/ +my_bool srv_background_scrub_data_compressed = false; + +/* check spaces once per hour */ +UNIV_INTERN uint srv_background_scrub_data_check_interval = (60 * 60); + +/* default to scrub spaces that hasn't been scrubbed in a week */ +UNIV_INTERN uint srv_background_scrub_data_interval = (7 * 24 * 60 * 60); + +/** +* statistics for scrubbing by background threads +*/ +static btr_scrub_stat_t scrub_stat; +static ib_mutex_t scrub_stat_mutex; +#ifdef UNIV_PFS_MUTEX +UNIV_INTERN mysql_pfs_key_t scrub_stat_mutex_key; +#endif + +#ifdef UNIV_DEBUG +/** +* srv_scrub_force_testing +* +* - force scrubbing using background threads even for uncompressed tables +* - force pessimistic scrubbing (page split) even if not needed +* (see test_pessimistic_scrub_pct) +*/ +my_bool srv_scrub_force_testing = true; + +/** +* Force pessimistic scrubbing in 50% of the cases (UNIV_DEBUG only) +*/ +static int test_pessimistic_scrub_pct = 50; + +#endif +static uint scrub_compression_level = page_zip_level; + +/**************************************************************//** +Log a scrubbing failure */ +static +void +log_scrub_failure( +/*===============*/ + btr_scrub_t* scrub_data, /*!< in: data to store statistics on */ + buf_block_t* block, /*!< in: block */ + dberr_t err) /*!< in: error */ +{ + const char* reason = "unknown"; + switch(err) { + case DB_UNDERFLOW: + reason = "too few records on page"; + scrub_data->scrub_stat.page_split_failures_underflow++; + break; + case DB_INDEX_CORRUPT: + reason = "unable to find index!"; + scrub_data->scrub_stat.page_split_failures_missing_index++; + break; + case DB_OUT_OF_FILE_SPACE: + reason = "out of filespace"; + scrub_data->scrub_stat.page_split_failures_out_of_filespace++; + break; + default: + ut_ad(0); + reason = "unknown"; + scrub_data->scrub_stat.page_split_failures_unknown++; + } + fprintf(stderr, + "InnoDB: Warning: Failed to scrub page %lu in space %lu : %s\n", + buf_block_get_page_no(block), + buf_block_get_space(block), + reason); +} + +/**************************************************************** +Lock dict mutexes */ +static +bool +btr_scrub_lock_dict_func(ulint space, bool lock_to_close_table, + const char * file, uint line) +{ + uint start = time(0); + uint last = start; + + while (mutex_enter_nowait_func(&(dict_sys->mutex), file, line)) { + /* if we lock to close a table, we wait forever + * if we don't lock to close a table, we check if space + * is closing, and then instead give up + */ + if (lock_to_close_table == false) { + if (fil_crypt_is_closing(space)) { + return false; + } + } + os_thread_sleep(250000); + + uint now = time(0); + if (now >= last + 30) { + fprintf(stderr, + "WARNING: %s:%u waited %u seconds for" + " dict_sys lock, space: %lu" + " lock_to_close_table: %u\n", + file, line, now - start, space, + lock_to_close_table); + + last = now; + } + } + + ut_ad(mutex_own(&dict_sys->mutex)); + return true; +} + +#define btr_scrub_lock_dict(space, lock_to_close_table) \ + btr_scrub_lock_dict_func(space, lock_to_close_table, __FILE__, __LINE__) + +/**************************************************************** +Unlock dict mutexes */ +static +void +btr_scrub_unlock_dict() +{ + dict_mutex_exit_for_mysql(); +} + +/**************************************************************** +Release reference to table +*/ +static +void +btr_scrub_table_close( +/*==================*/ + dict_table_t* table) /*!< in: table */ +{ + bool dict_locked = true; + bool try_drop = false; + table->stats_bg_flag &= ~BG_SCRUB_IN_PROGRESS; + dict_table_close(table, dict_locked, try_drop); +} + +/**************************************************************** +Release reference to table +*/ +static +void +btr_scrub_table_close_for_thread( + btr_scrub_t *scrub_data) +{ + if (scrub_data->current_table == NULL) + return; + + bool lock_for_close = true; + btr_scrub_lock_dict(scrub_data->space, lock_for_close); + + /* perform the actual closing */ + btr_scrub_table_close(scrub_data->current_table); + + btr_scrub_unlock_dict(); + + scrub_data->current_table = NULL; + scrub_data->current_index = NULL; +} + +/**************************************************************//** +Check if scrubbing is turned ON or OFF */ +static +bool +check_scrub_setting( +/*=====================*/ + btr_scrub_t* scrub_data) /*!< in: scrub data */ +{ + if (scrub_data->compressed) + return srv_background_scrub_data_compressed; + else + return srv_background_scrub_data_uncompressed; +} + +#define IBUF_INDEX_ID (DICT_IBUF_ID_MIN + IBUF_SPACE_ID) + +/**************************************************************//** +Check if a page needs scrubbing */ +UNIV_INTERN +int +btr_page_needs_scrubbing( +/*=====================*/ + btr_scrub_t* scrub_data, /*!< in: scrub data */ + buf_block_t* block, /*!< in: block to check, latched */ + btr_scrub_page_allocation_status_t allocated) /*!< in: is block known + to be allocated */ +{ + /** + * Check if scrubbing has been turned OFF. + * + * at start of space, we check if scrubbing is ON or OFF + * here we only check if scrubbing is turned OFF. + * + * Motivation is that it's only valueable to have a full table (space) + * scrubbed. + */ + if (!check_scrub_setting(scrub_data)) { + bool before_value = scrub_data->scrubbing; + scrub_data->scrubbing = false; + + if (before_value == true) { + /* we toggle scrubbing from on to off */ + return BTR_SCRUB_TURNED_OFF; + } + } + + if (scrub_data->scrubbing == false) { + return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; + } + + page_t* page = buf_block_get_frame(block); + uint type = fil_page_get_type(page); + + if (allocated == BTR_SCRUB_PAGE_ALLOCATED) { + if (type != FIL_PAGE_INDEX) { + /* this function is called from fil-crypt-threads. + * these threads iterate all pages of all tablespaces + * and don't know about fil_page_type. + * But scrubbing is only needed for index-pages. */ + + /** + * NOTE: scrubbing is also needed for UNDO pages, + * but they are scrubbed at purge-time, since they are + * uncompressed + */ + + /* if encountering page type not needing scrubbing + release reference to table object */ + return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; + } + + if (page_has_garbage(page) == false) { + /* no garbage (from deleted/shrunken records) */ + return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; + } + + } else if (allocated == BTR_SCRUB_PAGE_FREE || + allocated == BTR_SCRUB_PAGE_ALLOCATION_UNKNOWN) { + + if (! (type == FIL_PAGE_INDEX || + type == FIL_PAGE_TYPE_BLOB || + type == FIL_PAGE_TYPE_ZBLOB || + type == FIL_PAGE_TYPE_ZBLOB2)) { + + /** + * If this is a dropped page, we also need to scrub + * BLOB pages + */ + + /* if encountering page type not needing scrubbing + release reference to table object */ + return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; + } + } + + if (btr_page_get_index_id(page) == IBUF_INDEX_ID) { + /* skip ibuf */ + return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; + } + + return BTR_SCRUB_PAGE; +} + +/**************************************************************** +Handle a skipped page +*/ +UNIV_INTERN +void +btr_scrub_skip_page( +/*==================*/ + btr_scrub_t* scrub_data, /*!< in: data with scrub state */ + int needs_scrubbing) /*!< in: return code from + btr_page_needs_scrubbing */ +{ + switch(needs_scrubbing) { + case BTR_SCRUB_SKIP_PAGE: + /* nothing todo */ + return; + case BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE: + btr_scrub_table_close_for_thread(scrub_data); + return; + case BTR_SCRUB_TURNED_OFF: + case BTR_SCRUB_SKIP_PAGE_AND_COMPLETE_SPACE: + btr_scrub_complete_space(scrub_data); + return; + } + + /* unknown value. should not happen */ + ut_a(0); +} + +/**************************************************************** +Try to scrub a page using btr_page_reorganize_low +return DB_SUCCESS on success or DB_OVERFLOW on failure */ +static +dberr_t +btr_optimistic_scrub( +/*==================*/ + btr_scrub_t* scrub_data, /*!< in: data with scrub state */ + buf_block_t* block, /*!< in: block to scrub */ + dict_index_t* index, /*!< in: index */ + mtr_t* mtr) /*!< in: mtr */ +{ +#ifdef UNIV_DEBUG + if (srv_scrub_force_testing && + page_get_n_recs(buf_block_get_frame(block)) > 2 && + (rand() % 100) < test_pessimistic_scrub_pct) { + + fprintf(stderr, + "scrub: simulate btr_page_reorganize failed %lu:%lu " + " table: %llu:%s index: %llu:%s get_n_recs(): %lu\n", + buf_block_get_space(block), + buf_block_get_page_no(block), + (ulonglong)scrub_data->current_table->id, + scrub_data->current_table->name, + (ulonglong)scrub_data->current_index->id, + scrub_data->current_index->name, + page_get_n_recs(buf_block_get_frame(block))); + return DB_OVERFLOW; + } +#endif + + page_cur_t cur; + page_cur_set_before_first(block, &cur); + bool recovery = false; + if (!btr_page_reorganize_low(recovery, scrub_compression_level, + &cur, index, mtr)) { + return DB_OVERFLOW; + } + + /* We play safe and reset the free bits */ + if (!dict_index_is_clust(index) && + page_is_leaf(buf_block_get_frame(block))) { + + ibuf_reset_free_bits(block); + } + + scrub_data->scrub_stat.page_reorganizations++; + return DB_SUCCESS; +} + +/**************************************************************** +Try to scrub a page by splitting it +return DB_SUCCESS on success +DB_UNDERFLOW if page has too few records +DB_OUT_OF_FILE_SPACE if we can't find space for split */ +static +dberr_t +btr_pessimistic_scrub( +/*==================*/ + btr_scrub_t* scrub_data, /*!< in: data with scrub state */ + buf_block_t* block, /*!< in: block to scrub */ + dict_index_t* index, /*!< in: index */ + mtr_t* mtr) /*!< in: mtr */ +{ + page_t* page = buf_block_get_frame(block); + if (page_get_n_recs(page) < 2) { + /** + * There is no way we can split a page with < 2 records + */ + log_scrub_failure(scrub_data, block, DB_UNDERFLOW); + return DB_UNDERFLOW; + } + + /** + * Splitting page needs new space, allocate it here + * so that splitting won't fail due to this */ + ulint n_extents = 3; + ulint n_reserved = 0; + if (!fsp_reserve_free_extents(&n_reserved, index->space, + n_extents, FSP_NORMAL, mtr)) { + log_scrub_failure(scrub_data, block, + DB_OUT_OF_FILE_SPACE); + return DB_OUT_OF_FILE_SPACE; + } + + /* read block variables */ + ulint space = buf_block_get_space(block); + ulint page_no = buf_block_get_page_no(block); + ulint zip_size = buf_block_get_zip_size(block); + ulint left_page_no = btr_page_get_prev(page, mtr); + ulint right_page_no = btr_page_get_next(page, mtr); + + /** + * When splitting page, we need X-latches on left/right brothers + * see e.g btr_cur_latch_leaves + */ + + if (left_page_no != FIL_NULL) { + /** + * pages needs to be locked left-to-right, release block + * and re-lock. We still have x-lock on index + * so this should be safe + */ + mtr_release_buf_page_at_savepoint(mtr, scrub_data->savepoint, + block); + + buf_block_t* get_block = btr_block_get( + space, zip_size, left_page_no, + RW_X_LATCH, index, mtr); + get_block->check_index_page_at_flush = TRUE; + + /** + * Refetch block and re-initialize page + */ + block = btr_block_get( + space, zip_size, page_no, + RW_X_LATCH, index, mtr); + + page = buf_block_get_frame(block); + + /** + * structure should be unchanged + */ + ut_a(left_page_no == btr_page_get_prev(page, mtr)); + ut_a(right_page_no == btr_page_get_next(page, mtr)); + } + + if (right_page_no != FIL_NULL) { + buf_block_t* get_block = btr_block_get( + space, zip_size, right_page_no, + RW_X_LATCH, index, mtr); + get_block->check_index_page_at_flush = TRUE; + } + + /* arguments to btr_page_split_and_insert */ + mem_heap_t* heap = NULL; + dtuple_t* entry = NULL; + ulint* offsets = NULL; + ulint n_ext = 0; + ulint flags = BTR_MODIFY_TREE; + + /** + * position a cursor on first record on page + */ + rec_t* rec = page_rec_get_next(page_get_infimum_rec(page)); + btr_cur_t cursor; + btr_cur_position(index, rec, block, &cursor); + + /** + * call split page with NULL as argument for entry to insert + */ + if (dict_index_get_page(index) == buf_block_get_page_no(block)) { + /* The page is the root page + * NOTE: ibuf_reset_free_bits is called inside + * btr_root_raise_and_insert */ + rec = btr_root_raise_and_insert( + flags, &cursor, &offsets, &heap, entry, n_ext, mtr); + } else { + /* We play safe and reset the free bits + * NOTE: need to call this prior to btr_page_split_and_insert */ + if (!dict_index_is_clust(index) && + page_is_leaf(buf_block_get_frame(block))) { + + ibuf_reset_free_bits(block); + } + + rec = btr_page_split_and_insert( + flags, &cursor, &offsets, &heap, entry, n_ext, mtr); + } + + if (heap) { + mem_heap_free(heap); + } + + if (n_reserved > 0) { + fil_space_release_free_extents(index->space, n_reserved); + } + + scrub_data->scrub_stat.page_splits++; + return DB_SUCCESS; +} + +/**************************************************************** +Location index by id for a table +return index or NULL */ +static +dict_index_t* +find_index( +/*========*/ + dict_table_t* table, /*!< in: table */ + index_id_t index_id) /*!< in: index id */ +{ + if (table != NULL) { + dict_index_t* index = dict_table_get_first_index(table); + while (index != NULL) { + if (index->id == index_id) + return index; + index = dict_table_get_next_index(index); + } + } + + return NULL; +} + +/**************************************************************** +Check if table should be scrubbed +*/ +static +bool +btr_scrub_table_needs_scrubbing( +/*============================*/ + dict_table_t* table) /*!< in: table */ +{ + if (table == NULL) + return false; + + if (table->stats_bg_flag & BG_STAT_SHOULD_QUIT) { + return false; + } + + if (table->to_be_dropped) { + return false; + } + + if (table->corrupted) { + return false; + } + + return true; +} + +/**************************************************************** +Check if index should be scrubbed +*/ +static +bool +btr_scrub_index_needs_scrubbing( +/*============================*/ + dict_index_t* index) /*!< in: index */ +{ + if (index == NULL) + return false; + + if (dict_index_is_ibuf(index)) { + return false; + } + + if (dict_index_is_online_ddl(index)) { + return false; + } + + return true; +} + +/**************************************************************** +Get table and index and store it on scrub_data +*/ +static +void +btr_scrub_get_table_and_index( +/*=========================*/ + btr_scrub_t* scrub_data, /*!< in/out: scrub data */ + index_id_t index_id) /*!< in: index id */ +{ + /* first check if it's an index to current table */ + scrub_data->current_index = find_index(scrub_data->current_table, + index_id); + + if (scrub_data->current_index != NULL) { + /* yes it was */ + return; + } + + if (!btr_scrub_lock_dict(scrub_data->space, false)) { + btr_scrub_complete_space(scrub_data); + return; + } + + /* close current table (if any) */ + if (scrub_data->current_table != NULL) { + btr_scrub_table_close(scrub_data->current_table); + scrub_data->current_table = NULL; + } + + /* argument to dict_table_open_on_index_id */ + bool dict_locked = true; + + /* open table based on index_id */ + dict_table_t* table = dict_table_open_on_index_id( + index_id, + dict_locked); + + if (table != NULL) { + /* mark table as being scrubbed */ + table->stats_bg_flag |= BG_SCRUB_IN_PROGRESS; + + if (!btr_scrub_table_needs_scrubbing(table)) { + btr_scrub_table_close(table); + btr_scrub_unlock_dict(); + return; + } + } + + btr_scrub_unlock_dict(); + scrub_data->current_table = table; + scrub_data->current_index = find_index(table, index_id); +} + +/**************************************************************** +Handle free page */ +UNIV_INTERN +int +btr_scrub_free_page( +/*====================*/ + btr_scrub_t* scrub_data, /*!< in/out: scrub data */ + buf_block_t* block, /*!< in: block to scrub */ + mtr_t* mtr) /*!< in: mtr */ +{ + // TODO(jonaso): scrub only what is actually needed + + { + /* note: perform both the memset and setting of FIL_PAGE_TYPE + * wo/ logging. so that if we crash before page is flushed + * it will be found by scrubbing thread again + */ + memset(buf_block_get_frame(block) + PAGE_HEADER, 0, + UNIV_PAGE_SIZE - PAGE_HEADER); + + mach_write_to_2(buf_block_get_frame(block) + FIL_PAGE_TYPE, + FIL_PAGE_TYPE_ALLOCATED); + } + + ulint compact = 1; + page_create(block, mtr, compact); + + mtr_commit(mtr); + + /* page doesn't need further processing => SKIP + * and close table/index so that we don't keep references too long */ + return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; +} + +/**************************************************************** +Recheck if a page needs scrubbing, and if it does load appropriate +table and index */ +UNIV_INTERN +int +btr_scrub_recheck_page( +/*====================*/ + btr_scrub_t* scrub_data, /*!< inut: scrub data */ + buf_block_t* block, /*!< in: block */ + btr_scrub_page_allocation_status_t allocated, /*!< in: is block + allocated or free */ + mtr_t* mtr) /*!< in: mtr */ +{ + /* recheck if page needs scrubbing (knowing allocation status) */ + int needs_scrubbing = btr_page_needs_scrubbing( + scrub_data, block, allocated); + + if (needs_scrubbing != BTR_SCRUB_PAGE) { + mtr_commit(mtr); + return needs_scrubbing; + } + + if (allocated == BTR_SCRUB_PAGE_FREE) { + /** we don't need to load table/index for free pages + * so scrub directly here */ + /* mtr is committed inside btr_scrub_page_free */ + return btr_scrub_free_page(scrub_data, + block, + mtr); + } + + page_t* page = buf_block_get_frame(block); + index_id_t index_id = btr_page_get_index_id(page); + + if (scrub_data->current_index == NULL || + scrub_data->current_index->id != index_id) { + + /** + * commit mtr (i.e release locks on block) + * and try to get table&index potentially loading it + * from disk + */ + mtr_commit(mtr); + btr_scrub_get_table_and_index(scrub_data, index_id); + } else { + /* we already have correct index + * commit mtr so that we can lock index before fetching page + */ + mtr_commit(mtr); + } + + /* check if table is about to be dropped */ + if (!btr_scrub_table_needs_scrubbing(scrub_data->current_table)) { + return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; + } + + /* check if index is scrubbable */ + if (!btr_scrub_index_needs_scrubbing(scrub_data->current_index)) { + return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; + } + + mtr_start(mtr); + mtr_x_lock(dict_index_get_lock(scrub_data->current_index), mtr); + /** set savepoint for X-latch of block */ + scrub_data->savepoint = mtr_set_savepoint(mtr); + return BTR_SCRUB_PAGE; +} + +/**************************************************************** +Perform actual scrubbing of page */ +UNIV_INTERN +int +btr_scrub_page( +/*============*/ + btr_scrub_t* scrub_data, /*!< in/out: scrub data */ + buf_block_t* block, /*!< in: block */ + btr_scrub_page_allocation_status_t allocated, /*!< in: is block + allocated or free */ + mtr_t* mtr) /*!< in: mtr */ +{ + /* recheck if page needs scrubbing (knowing allocation status) */ + int needs_scrubbing = btr_page_needs_scrubbing( + scrub_data, block, allocated); + if (needs_scrubbing != BTR_SCRUB_PAGE) { + mtr_commit(mtr); + return needs_scrubbing; + } + + if (allocated == BTR_SCRUB_PAGE_FREE) { + /* mtr is committed inside btr_scrub_page_free */ + return btr_scrub_free_page(scrub_data, + block, + mtr); + } + + /* check that table/index still match now that they are loaded */ + + if (scrub_data->current_table->space != scrub_data->space) { + /* this is truncate table */ + mtr_commit(mtr); + return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; + } + + if (scrub_data->current_index->space != scrub_data->space) { + /* this is truncate table */ + mtr_commit(mtr); + return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; + } + + if (scrub_data->current_index->page == FIL_NULL) { + /* this is truncate table */ + mtr_commit(mtr); + return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; + } + + if (btr_page_get_index_id(buf_block_get_frame(block)) != + scrub_data->current_index->id) { + /* page has been reallocated to new index */ + mtr_commit(mtr); + return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; + } + + /* check if I can scrub (reorganize) page wo/ overflow */ + if (btr_optimistic_scrub(scrub_data, + block, + scrub_data->current_index, + mtr) != DB_SUCCESS) { + + /** + * Can't reorganize page...need to split it + */ + btr_pessimistic_scrub(scrub_data, + block, + scrub_data->current_index, + mtr); + } + mtr_commit(mtr); + + return BTR_SCRUB_SKIP_PAGE; // no further action needed +} + +/**************************************************************//** +Start iterating a space */ +UNIV_INTERN +bool +btr_scrub_start_space( +/*===================*/ + ulint space, /*!< in: space */ + btr_scrub_t* scrub_data) /*!< in/out: scrub data */ +{ + scrub_data->space = space; + scrub_data->current_table = NULL; + scrub_data->current_index = NULL; + + scrub_data->compressed = fil_space_get_zip_size(space) > 0; + scrub_data->scrubbing = check_scrub_setting(scrub_data); + return scrub_data->scrubbing; +} + +/*********************************************************************** +Update global statistics with thread statistics */ +static +void +btr_scrub_update_total_stat(btr_scrub_t *scrub_data) +{ + mutex_enter(&scrub_stat_mutex); + scrub_stat.page_reorganizations += + scrub_data->scrub_stat.page_reorganizations; + scrub_stat.page_splits += + scrub_data->scrub_stat.page_splits; + scrub_stat.page_split_failures_underflow += + scrub_data->scrub_stat.page_split_failures_underflow; + scrub_stat.page_split_failures_out_of_filespace += + scrub_data->scrub_stat.page_split_failures_out_of_filespace; + scrub_stat.page_split_failures_missing_index += + scrub_data->scrub_stat.page_split_failures_missing_index; + scrub_stat.page_split_failures_unknown += + scrub_data->scrub_stat.page_split_failures_unknown; + mutex_exit(&scrub_stat_mutex); + + // clear stat + memset(&scrub_data->scrub_stat, 0, sizeof(scrub_data->scrub_stat)); +} + +/**************************************************************//** +Complete iterating a space */ +UNIV_INTERN +bool +btr_scrub_complete_space( +/*=====================*/ + btr_scrub_t* scrub_data) /*!< in/out: scrub data */ +{ + btr_scrub_table_close_for_thread(scrub_data); + btr_scrub_update_total_stat(scrub_data); + return scrub_data->scrubbing; +} + +/********************************************************************* +Return scrub statistics */ +void +btr_scrub_total_stat(btr_scrub_stat_t *stat) +{ + mutex_enter(&scrub_stat_mutex); + *stat = scrub_stat; + mutex_exit(&scrub_stat_mutex); +} + +/********************************************************************* +Init global variables */ +UNIV_INTERN +void +btr_scrub_init() +{ + mutex_create(scrub_stat_mutex_key, + &scrub_stat_mutex, SYNC_NO_ORDER_CHECK); + + memset(&scrub_stat, 0, sizeof(scrub_stat)); +} + +/********************************************************************* +Cleanup globals */ +UNIV_INTERN +void +btr_scrub_cleanup() +{ + mutex_free(&scrub_stat_mutex); +} diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index f4ad7875bea..8b9f5a49e7d 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -54,7 +54,9 @@ Created 11/5/1995 Heikki Tuuri #include "page0zip.h" #include "srv0mon.h" #include "buf0checksum.h" - +#include "fil0pageencryption.h" +#include "fil0pagecompress.h" +#include "ut0byte.h" #include <new> /* @@ -502,12 +504,13 @@ buf_page_is_corrupted( ulint zip_size) /*!< in: size of compressed page; 0 for uncompressed pages */ { + ulint page_encrypted = fil_page_is_compressed_encrypted(read_buf) || fil_page_is_encrypted(read_buf); ulint checksum_field1; ulint checksum_field2; ibool crc32_inited = FALSE; ib_uint32_t crc32 = ULINT32_UNDEFINED; - if (!zip_size + if (!page_encrypted && !zip_size && memcmp(read_buf + FIL_PAGE_LSN + 4, read_buf + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) { @@ -560,6 +563,9 @@ buf_page_is_corrupted( if (zip_size) { return(!page_zip_verify_checksum(read_buf, zip_size)); } + if (page_encrypted) { + return (FALSE); + } checksum_field1 = mach_read_from_4( read_buf + FIL_PAGE_SPACE_OR_CHKSUM); @@ -995,6 +1001,11 @@ buf_block_init( block->page.state = BUF_BLOCK_NOT_USED; block->page.buf_fix_count = 0; block->page.io_fix = BUF_IO_NONE; + block->page.crypt_buf = NULL; + block->page.crypt_buf_free = NULL; + block->page.comp_buf = NULL; + block->page.comp_buf_free = NULL; + block->page.key_version = 0; block->modify_clock = 0; @@ -3374,11 +3385,13 @@ page is not in the buffer pool it is not loaded and NULL is returned. Suitable for using when holding the lock_sys_t::mutex. @return pointer to a page or NULL */ UNIV_INTERN -const buf_block_t* +buf_block_t* buf_page_try_get_func( /*==================*/ ulint space_id,/*!< in: tablespace id */ ulint page_no,/*!< in: page number */ + ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */ + bool possibly_freed, const char* file, /*!< in: file name */ ulint line, /*!< in: line where called */ mtr_t* mtr) /*!< in: mini-transaction */ @@ -3416,8 +3429,12 @@ buf_page_try_get_func( buf_block_buf_fix_inc(block, file, line); mutex_exit(&block->mutex); - fix_type = MTR_MEMO_PAGE_S_FIX; - success = rw_lock_s_lock_nowait(&block->lock, file, line); + if (rw_latch == RW_S_LATCH) { + fix_type = MTR_MEMO_PAGE_S_FIX; + success = rw_lock_s_lock_nowait(&block->lock, file, line); + } else { + success = false; + } if (!success) { /* Let us try to get an X-latch. If the current thread @@ -3442,9 +3459,11 @@ buf_page_try_get_func( ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG - mutex_enter(&block->mutex); - ut_a(!block->page.file_page_was_freed); - mutex_exit(&block->mutex); + if (!possibly_freed) { + mutex_enter(&block->mutex); + ut_a(!block->page.file_page_was_freed); + mutex_exit(&block->mutex); + } #endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK); @@ -3474,6 +3493,12 @@ buf_page_init_low( bpage->newest_modification = 0; bpage->oldest_modification = 0; bpage->write_size = 0; + bpage->crypt_buf = NULL; + bpage->crypt_buf_free = NULL; + bpage->comp_buf = NULL; + bpage->comp_buf_free = NULL; + bpage->key_version = 0; + HASH_INVALIDATE(bpage, hash); #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG bpage->file_page_was_freed = FALSE; @@ -3987,7 +4012,7 @@ buf_page_create( Then InnoDB could in a crash recovery print a big, false, corruption warning if the stamp contains an lsn bigger than the ib_logfile lsn. */ - memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8); + memset(frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8); #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG ut_a(++buf_dbg_counter % 5771 || buf_validate()); @@ -4187,6 +4212,16 @@ buf_page_io_complete( ulint read_space_id; byte* frame; + if (!buf_page_decrypt_after_read(bpage)) { + /* encryption error! */ + if (buf_page_get_zip_size(bpage)) { + frame = bpage->zip.data; + } else { + frame = ((buf_block_t*) bpage)->frame; + } + goto corrupt; + } + if (buf_page_get_zip_size(bpage)) { frame = bpage->zip.data; buf_pool->n_pend_unzip++; @@ -4327,6 +4362,9 @@ corrupt: bpage->offset, buf_page_get_zip_size(bpage), TRUE); } + } else { + /* io_type == BUF_IO_WRITE */ + buf_page_encrypt_after_write(bpage); } buf_pool_mutex_enter(buf_pool); @@ -5561,3 +5599,227 @@ buf_page_init_for_backup_restore( } } #endif /* !UNIV_HOTBACKUP */ + +/********************************************************************//** +Encrypts a buffer page right before it's flushed to disk +*/ +byte* +buf_page_encrypt_before_write( +/*==========================*/ + buf_page_t* bpage, /*!< in/out: buffer page to be flushed */ + const byte* src_frame) /*!< in: src frame */ +{ + if (srv_encrypt_tables == FALSE) { + /* Encryption is disabled */ + return const_cast<byte*>(src_frame); + } + + if (bpage->offset == 0) { + /* Page 0 of a tablespace is not encrypted */ + ut_ad(bpage->key_version == 0); + return const_cast<byte*>(src_frame); + } + + if (fil_space_check_encryption_write(bpage->space) == false) { + /* An unencrypted table */ + bpage->key_version = 0; + return const_cast<byte*>(src_frame); + } + + if (bpage->space == TRX_SYS_SPACE && bpage->offset == TRX_SYS_PAGE_NO) { + /* don't encrypt page as it contains address to dblwr buffer */ + bpage->key_version = 0; + return const_cast<byte*>(src_frame); + } + + ulint zip_size = buf_page_get_zip_size(bpage); + ulint page_size = (zip_size) ? zip_size : UNIV_PAGE_SIZE; + + /** + * TODO(jonaso): figure out more clever malloc strategy + * + * This implementation does a malloc/free per iop for encrypted + * tablespaces. Alternative strategies that have been considered are + * + * 1) use buf_block_alloc (i.e alloc from buffer pool) + * this does not work as buf_block_alloc will then be called + * when needing to flush a page, which might be triggered + * due to shortage of memory in buffer pool + * 2) allocate a buffer per fil_node_t + * this would break abstraction layers and has therfore not been + * considered a lot. + */ + + if (bpage->crypt_buf_free == NULL) { + bpage->crypt_buf_free = (byte*)malloc(page_size*2); + // TODO: Is 4k aligment enough ? + bpage->crypt_buf = (byte *)ut_align(bpage->crypt_buf_free, page_size); + } + + byte *dst_frame = bpage->crypt_buf; + + if (!fil_space_is_page_compressed(bpage->space)) { + // encrypt page content + fil_space_encrypt(bpage->space, bpage->offset, + bpage->newest_modification, + src_frame, zip_size, dst_frame, 0); + + unsigned key_version = + mach_read_from_4(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); + ut_ad(key_version == 0 || key_version >= bpage->key_version); + bpage->key_version = key_version; + + // return dst_frame which will be written + return dst_frame; + } else { + // We do compression and encryption later on os0file.cc + dst_frame = (byte *)src_frame; + } + + // return dst_frame which will be written + return dst_frame; +} + +/********************************************************************//** +Release memory after encrypted page has been written to disk +*/ +ibool +buf_page_encrypt_after_write( +/*=========================*/ + buf_page_t* bpage) /*!< in/out: buffer page flushed */ +{ + if (bpage->crypt_buf_free != NULL) { + free(bpage->crypt_buf_free); + bpage->crypt_buf_free = NULL; + bpage->crypt_buf = NULL; + } + + if (bpage->comp_buf_free != NULL) { + free(bpage->comp_buf_free); + bpage->comp_buf_free = NULL; + bpage->comp_buf = NULL; + } + + return (TRUE); +} + +/********************************************************************//** +Allocates memory to read in an encrypted page +*/ +byte* +buf_page_decrypt_before_read( +/*=========================*/ + buf_page_t* bpage, /*!< in/out: buffer page to be read */ + ulint zip_size) /*!< in: compressed page size, or 0 */ +{ + ulint size = (zip_size) ? zip_size : UNIV_PAGE_SIZE; + + /* + Here we only need to allocate space for not header pages + in case of file space encryption. Table encryption is handled + later. + */ + if (!srv_encrypt_tables || bpage->offset == 0 || + fil_space_check_encryption_read(bpage->space) == false) + return zip_size ? bpage->zip.data : ((buf_block_t*) bpage)->frame; + + if (bpage->crypt_buf_free == NULL) + { + // allocate buffer to read data into + bpage->crypt_buf_free = (byte*)malloc(size*2); + // TODO: Is 4K aligment enough ? + bpage->crypt_buf = (byte*)ut_align(bpage->crypt_buf_free, size); + } + return bpage->crypt_buf; +} + +/********************************************************************//** +Decrypt page after it has been read from disk +*/ +ibool +buf_page_decrypt_after_read( +/*========================*/ + buf_page_t* bpage) /*!< in/out: buffer page read from disk */ +{ + ut_ad(bpage->key_version == 0); + ulint zip_size = buf_page_get_zip_size(bpage); + ulint size = (zip_size) ? zip_size : UNIV_PAGE_SIZE; + + byte* dst_frame = (zip_size) ? bpage->zip.data : + ((buf_block_t*) bpage)->frame; + + if (bpage->offset == 0) { + /* File header pages are not encrypted */ + ut_a(bpage->crypt_buf == NULL); + return (TRUE); + } + + + const byte* src_frame = bpage->crypt_buf != NULL ? + bpage->crypt_buf : dst_frame; + + unsigned key_version = + mach_read_from_4(src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); + + bool page_compressed_encrypted = fil_page_is_compressed_encrypted(dst_frame); + + if (key_version == 0) { + /* the page we read is unencrypted */ + if (dst_frame != src_frame) { + /* but we had allocated a crypt_buf */ + // TODO: Can this be avoided ? + memcpy(dst_frame, src_frame, size); + } + } else { + /* the page we read is encrypted */ + if (dst_frame == src_frame) { + /* but we had NOT allocated a crypt buf + * malloc a buffer, copy page to it + * and then decrypt from that into real page*/ + bpage->crypt_buf_free = (byte *)malloc(UNIV_PAGE_SIZE*2); + // TODO: is 4k aligment enough ? + src_frame = bpage->crypt_buf = (byte*)ut_align(bpage->crypt_buf_free, UNIV_PAGE_SIZE); + memcpy(bpage->crypt_buf, dst_frame, size); + } + + /* decrypt from src_frame to dst_frame */ + fil_space_decrypt(bpage->space, + src_frame, size, dst_frame); + + /* decompress from dst_frame to comp_buf and then copy to + buffer pool */ + if (page_compressed_encrypted) { + if (bpage->comp_buf_free == NULL) { + bpage->comp_buf_free = (byte *)malloc(UNIV_PAGE_SIZE*2); + // TODO: is 4k aligment enough ? + bpage->comp_buf = (byte*)ut_align(bpage->comp_buf_free, UNIV_PAGE_SIZE); + } + + fil_decompress_page(bpage->comp_buf, dst_frame, size, NULL); + } + } + bpage->key_version = key_version; + + if (bpage->crypt_buf_free != NULL) { + // free temp page + free(bpage->crypt_buf_free); + bpage->crypt_buf = NULL; + bpage->crypt_buf_free = NULL; + } + return (TRUE); +} + +/********************************************************************//** +Release memory allocated for decryption +*/ +void +buf_page_decrypt_cleanup( +/*=====================*/ + buf_page_t* bpage) /*!< in/out: buffer page */ +{ + if (bpage->crypt_buf != NULL) { + free(bpage->crypt_buf_free); + bpage->crypt_buf = NULL; + bpage->crypt_buf_free = NULL; + } +} diff --git a/storage/innobase/buf/buf0checksum.cc b/storage/innobase/buf/buf0checksum.cc index 4ba65d6f2d0..aa02cda2937 100644 --- a/storage/innobase/buf/buf0checksum.cc +++ b/storage/innobase/buf/buf0checksum.cc @@ -64,7 +64,8 @@ buf_calc_page_crc32( there we store the old formula checksum. */ checksum = ut_crc32(page + FIL_PAGE_OFFSET, - FIL_PAGE_FILE_FLUSH_LSN - FIL_PAGE_OFFSET) + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + - FIL_PAGE_OFFSET) ^ ut_crc32(page + FIL_PAGE_DATA, UNIV_PAGE_SIZE - FIL_PAGE_DATA - FIL_PAGE_END_LSN_OLD_CHKSUM); @@ -94,7 +95,8 @@ buf_calc_page_new_checksum( there we store the old formula checksum. */ checksum = ut_fold_binary(page + FIL_PAGE_OFFSET, - FIL_PAGE_FILE_FLUSH_LSN - FIL_PAGE_OFFSET) + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + - FIL_PAGE_OFFSET) + ut_fold_binary(page + FIL_PAGE_DATA, UNIV_PAGE_SIZE - FIL_PAGE_DATA - FIL_PAGE_END_LSN_OLD_CHKSUM); @@ -119,7 +121,7 @@ buf_calc_page_old_checksum( { ulint checksum; - checksum = ut_fold_binary(page, FIL_PAGE_FILE_FLUSH_LSN); + checksum = ut_fold_binary(page, FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); checksum = checksum & 0xFFFFFFFFUL; diff --git a/storage/innobase/buf/buf0dblwr.cc b/storage/innobase/buf/buf0dblwr.cc index c903f5fbffa..17e8143943c 100644 --- a/storage/innobase/buf/buf0dblwr.cc +++ b/storage/innobase/buf/buf0dblwr.cc @@ -50,6 +50,8 @@ UNIV_INTERN buf_dblwr_t* buf_dblwr = NULL; /** Set to TRUE when the doublewrite buffer is being created */ UNIV_INTERN ibool buf_dblwr_being_created = FALSE; +#define TRX_SYS_DOUBLEWRITE_BLOCKS 2 + /****************************************************************//** Determines if a page number is located inside the doublewrite buffer. @return TRUE if the location is inside the two blocks of the @@ -136,7 +138,7 @@ buf_dblwr_init( /* There are two blocks of same size in the doublewrite buffer. */ - buf_size = 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE; + buf_size = TRX_SYS_DOUBLEWRITE_BLOCKS * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE; /* There must be atleast one buffer for single page writes and one buffer for batch writes. */ @@ -216,7 +218,7 @@ start_again: "Doublewrite buffer not found: creating new"); if (buf_pool_get_curr_size() - < ((2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE + < ((TRX_SYS_DOUBLEWRITE_BLOCKS * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE + FSP_EXTENT_SIZE / 2 + 100) * UNIV_PAGE_SIZE)) { @@ -252,7 +254,7 @@ start_again: fseg_header = doublewrite + TRX_SYS_DOUBLEWRITE_FSEG; prev_page_no = 0; - for (i = 0; i < 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE + for (i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCKS * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE + FSP_EXTENT_SIZE / 2; i++) { new_block = fseg_alloc_free_page( fseg_header, prev_page_no + 1, FSP_UP, &mtr); @@ -375,7 +377,7 @@ buf_dblwr_init_or_load_pages( /* We do the file i/o past the buffer pool */ - unaligned_read_buf = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE)); + unaligned_read_buf = static_cast<byte*>(ut_malloc(3 * UNIV_PAGE_SIZE)); read_buf = static_cast<byte*>( ut_align(unaligned_read_buf, UNIV_PAGE_SIZE)); @@ -387,6 +389,14 @@ buf_dblwr_init_or_load_pages( doublewrite = read_buf + TRX_SYS_DOUBLEWRITE; + if (mach_read_from_4(read_buf + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION) != 0) { + fil_space_decrypt((ulint)TRX_SYS_SPACE, + read_buf, + UNIV_PAGE_SIZE, /* page size */ + read_buf + UNIV_PAGE_SIZE); + doublewrite = read_buf + UNIV_PAGE_SIZE + TRX_SYS_DOUBLEWRITE; + } + if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC) == TRX_SYS_DOUBLEWRITE_MAGIC_N) { /* The doublewrite buffer has been created */ @@ -429,7 +439,7 @@ buf_dblwr_init_or_load_pages( page = buf; - for (i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 2; i++) { + for (i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * TRX_SYS_DOUBLEWRITE_BLOCKS; i++) { ulint source_page_no; @@ -514,11 +524,11 @@ buf_dblwr_process() fil_io(OS_FILE_READ, true, space_id, zip_size, page_no, 0, zip_size ? zip_size : UNIV_PAGE_SIZE, - read_buf, NULL, 0); + read_buf, NULL, 0, 0); - /* Check if the page is corrupt */ - - if (buf_page_is_corrupted(true, read_buf, zip_size)) { + if (fil_space_verify_crypt_checksum(read_buf, zip_size)) { + /* page is encrypted and checksum is OK */ + } else if (buf_page_is_corrupted(true, read_buf, zip_size)) { fprintf(stderr, "InnoDB: Warning: database page" @@ -529,8 +539,11 @@ buf_dblwr_process() " the doublewrite buffer.\n", (ulong) space_id, (ulong) page_no); - if (buf_page_is_corrupted(true, - page, zip_size)) { + if (fil_space_verify_crypt_checksum(page, zip_size)) { + /* the doublewrite buffer page is encrypted and OK */ + } else if (buf_page_is_corrupted(true, + page, + zip_size)) { fprintf(stderr, "InnoDB: Dump of the page:\n"); buf_page_print( @@ -566,7 +579,7 @@ buf_dblwr_process() fil_io(OS_FILE_WRITE, true, space_id, zip_size, page_no, 0, zip_size ? zip_size : UNIV_PAGE_SIZE, - page, NULL, 0); + page, NULL, 0, 0); ib_logf(IB_LOG_LEVEL_INFO, "Recovered the page from" @@ -586,14 +599,34 @@ buf_dblwr_process() zip_size, page_no, 0, zip_size ? zip_size : UNIV_PAGE_SIZE, - page, NULL, 0); + page, NULL, 0, 0); } } } } fil_flush_file_spaces(FIL_TABLESPACE); - ut_free(unaligned_read_buf); + + { + fprintf(stderr, + "Clear dblwr buffer after completing " + "processing of it...\n"); + + size_t bytes = TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE; + byte *unaligned_buf = static_cast<byte*>( + ut_malloc(bytes + UNIV_PAGE_SIZE - 1)); + + byte *buf = static_cast<byte*>( + ut_align(unaligned_buf, UNIV_PAGE_SIZE)); + memset(buf, 0, bytes); + + fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0, + buf_dblwr->block1, 0, bytes, buf, NULL, NULL, 0); + fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0, + buf_dblwr->block2, 0, bytes, buf, NULL, NULL, 0); + + ut_free(unaligned_buf); + } } /****************************************************************//** @@ -665,7 +698,7 @@ buf_dblwr_update( break; case BUF_FLUSH_SINGLE_PAGE: { - const ulint size = 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE; + const ulint size = TRX_SYS_DOUBLEWRITE_BLOCKS * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE; ulint i; mutex_enter(&buf_dblwr->mutex); for (i = srv_doublewrite_batch_size; i < size; ++i) { @@ -792,13 +825,15 @@ buf_dblwr_write_block_to_datafile( ? OS_FILE_WRITE : OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER; + void * frame = buf_page_get_frame(bpage); + if (bpage->zip.data) { fil_io(flags, sync, buf_page_get_space(bpage), buf_page_get_zip_size(bpage), buf_page_get_page_no(bpage), 0, buf_page_get_zip_size(bpage), - (void*) bpage->zip.data, - (void*) bpage, 0); + frame, + (void*) bpage, 0, bpage->newest_modification); return; } @@ -810,7 +845,7 @@ buf_dblwr_write_block_to_datafile( fil_io(flags, sync, buf_block_get_space(block), 0, buf_block_get_page_no(block), 0, UNIV_PAGE_SIZE, - (void*) block->frame, (void*) block, (ulint *)&bpage->write_size); + frame, (void*) block, (ulint *)&bpage->write_size, bpage->newest_modification ); } /********************************************************************//** @@ -904,7 +939,7 @@ try_again: fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0, buf_dblwr->block1, 0, len, - (void*) write_buf, NULL, 0); + (void*) write_buf, NULL, 0, 0); if (buf_dblwr->first_free <= TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) { /* No unwritten pages in the second block. */ @@ -920,7 +955,7 @@ try_again: fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0, buf_dblwr->block2, 0, len, - (void*) write_buf, NULL, 0); + (void*) write_buf, NULL, 0, 0); flush: /* increment the doublewrite flushed pages counter */ @@ -1001,13 +1036,14 @@ try_again: } zip_size = buf_page_get_zip_size(bpage); + void * frame = buf_page_get_frame(bpage); if (zip_size) { UNIV_MEM_ASSERT_RW(bpage->zip.data, zip_size); /* Copy the compressed page and clear the rest. */ memcpy(buf_dblwr->write_buf + UNIV_PAGE_SIZE * buf_dblwr->first_free, - bpage->zip.data, zip_size); + frame, zip_size); memset(buf_dblwr->write_buf + UNIV_PAGE_SIZE * buf_dblwr->first_free + zip_size, 0, UNIV_PAGE_SIZE - zip_size); @@ -1018,7 +1054,7 @@ try_again: memcpy(buf_dblwr->write_buf + UNIV_PAGE_SIZE * buf_dblwr->first_free, - ((buf_block_t*) bpage)->frame, UNIV_PAGE_SIZE); + frame, UNIV_PAGE_SIZE); } buf_dblwr->buf_block_arr[buf_dblwr->first_free] = bpage; @@ -1069,7 +1105,7 @@ buf_dblwr_write_single_page( /* total number of slots available for single page flushes starts from srv_doublewrite_batch_size to the end of the buffer. */ - size = 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE; + size = TRX_SYS_DOUBLEWRITE_BLOCKS * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE; ut_a(size > srv_doublewrite_batch_size); n_slots = size - srv_doublewrite_batch_size; @@ -1140,23 +1176,25 @@ retry: bytes in the doublewrite page with zeros. */ zip_size = buf_page_get_zip_size(bpage); + void * frame = buf_page_get_frame(bpage); + if (zip_size) { memcpy(buf_dblwr->write_buf + UNIV_PAGE_SIZE * i, - bpage->zip.data, zip_size); + frame, zip_size); memset(buf_dblwr->write_buf + UNIV_PAGE_SIZE * i + zip_size, 0, UNIV_PAGE_SIZE - zip_size); fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0, offset, 0, UNIV_PAGE_SIZE, (void*) (buf_dblwr->write_buf - + UNIV_PAGE_SIZE * i), NULL, 0); + + UNIV_PAGE_SIZE * i), NULL, 0, bpage->newest_modification); } else { /* It is a regular page. Write it directly to the doublewrite buffer */ fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0, offset, 0, UNIV_PAGE_SIZE, - (void*) ((buf_block_t*) bpage)->frame, - NULL, 0); + frame, + NULL, 0, bpage->newest_modification); } /* Now flush the doublewrite buffer data to disk */ diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index 20500d10b3f..9c11ae2b43e 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -715,7 +715,7 @@ buf_flush_update_zip_checksum( srv_checksum_algorithm))); mach_write_to_8(page + FIL_PAGE_LSN, lsn); - memset(page + FIL_PAGE_FILE_FLUSH_LSN, 0, 8); + memset(page + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8); mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM, checksum); } @@ -894,7 +894,7 @@ buf_flush_write_block_low( mach_write_to_8(frame + FIL_PAGE_LSN, bpage->newest_modification); - memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8); + memset(frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8); break; case BUF_BLOCK_FILE_PAGE: frame = bpage->zip.data; @@ -909,12 +909,20 @@ buf_flush_write_block_low( break; } + frame = buf_page_encrypt_before_write(bpage, frame); + if (!srv_use_doublewrite_buf || !buf_dblwr) { fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER, - sync, buf_page_get_space(bpage), zip_size, - buf_page_get_page_no(bpage), 0, + sync, + buf_page_get_space(bpage), + zip_size, + buf_page_get_page_no(bpage), + 0, zip_size ? zip_size : UNIV_PAGE_SIZE, - frame, bpage, &bpage->write_size); + frame, + bpage, + &bpage->write_size, + bpage->newest_modification); } else { /* InnoDB uses doublewrite buffer and doublewrite buffer @@ -926,10 +934,16 @@ buf_flush_write_block_low( if (awrites == ATOMIC_WRITES_ON) { fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER, - FALSE, buf_page_get_space(bpage), zip_size, - buf_page_get_page_no(bpage), 0, + FALSE, + buf_page_get_space(bpage), + zip_size, + buf_page_get_page_no(bpage), + 0, zip_size ? zip_size : UNIV_PAGE_SIZE, - frame, bpage, &bpage->write_size); + frame, + bpage, + &bpage->write_size, + bpage->newest_modification); } else if (flush_type == BUF_FLUSH_SINGLE_PAGE) { buf_dblwr_write_single_page(bpage, sync); } else { diff --git a/storage/innobase/buf/buf0rea.cc b/storage/innobase/buf/buf0rea.cc index 9e81d010d0f..19d18dcd870 100644 --- a/storage/innobase/buf/buf0rea.cc +++ b/storage/innobase/buf/buf0rea.cc @@ -176,6 +176,8 @@ buf_read_page_low( ut_ad(buf_page_in_file(bpage)); + byte* frame = buf_page_decrypt_before_read(bpage, zip_size); + if (sync) { thd_wait_begin(NULL, THD_WAIT_DISKIO); } @@ -184,15 +186,15 @@ buf_read_page_low( *err = fil_io(OS_FILE_READ | wake_later | ignore_nonexistent_pages, sync, space, zip_size, offset, 0, zip_size, - bpage->zip.data, bpage, &bpage->write_size); + frame, bpage, &bpage->write_size, 0); } else { ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE); *err = fil_io(OS_FILE_READ | wake_later | ignore_nonexistent_pages, sync, space, 0, offset, 0, UNIV_PAGE_SIZE, - ((buf_block_t*) bpage)->frame, bpage, - &bpage->write_size); + frame, bpage, + &bpage->write_size, 0); } if (sync) { @@ -200,6 +202,7 @@ buf_read_page_low( } if (*err != DB_SUCCESS) { + buf_page_decrypt_cleanup(bpage); if (ignore_nonexistent_pages || *err == DB_TABLESPACE_DELETED) { buf_read_page_handle_error(bpage); return(0); diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc index 25d4bb7c906..31a493855ad 100644 --- a/storage/innobase/dict/dict0dict.cc +++ b/storage/innobase/dict/dict0dict.cc @@ -321,10 +321,10 @@ dict_get_db_name_len( Reserves the dictionary system mutex for MySQL. */ UNIV_INTERN void -dict_mutex_enter_for_mysql(void) +dict_mutex_enter_for_mysql_func(const char * file, ulint line) /*============================*/ { - mutex_enter(&(dict_sys->mutex)); + mutex_enter_func(&(dict_sys->mutex), file, line); } /********************************************************************//** diff --git a/storage/innobase/dict/dict0load.cc b/storage/innobase/dict/dict0load.cc index 16e64da6619..149811dab60 100644 --- a/storage/innobase/dict/dict0load.cc +++ b/storage/innobase/dict/dict0load.cc @@ -1153,6 +1153,12 @@ loop: space_id, name); } + /* We need to read page 0 to get (optional) IV + regardless if encryptions is turned on or not, + since if it's off we should decrypt a potentially + already encrypted table */ + bool read_page_0 = true; + /* We set the 2nd param (fix_dict = true) here because we already have an x-lock on dict_operation_lock and dict_sys->mutex. Besides, @@ -1160,7 +1166,7 @@ loop: If the filepath is not known, it will need to be discovered. */ dberr_t err = fil_open_single_table_tablespace( - false, srv_read_only_mode ? false : true, + read_page_0, srv_read_only_mode ? false : true, space_id, dict_tf_to_fsp_flags(flags), name, filepath); @@ -2640,6 +2646,99 @@ check_rec: return(table); } +/***********************************************************************//** +Loads a table id based on the index id. +@return true if found */ +static +bool +dict_load_table_id_on_index_id( +/*==================*/ + index_id_t index_id, /*!< in: index id */ + table_id_t* table_id) /*!< out: table id */ +{ + /* check hard coded indexes */ + switch(index_id) { + case DICT_TABLES_ID: + case DICT_COLUMNS_ID: + case DICT_INDEXES_ID: + case DICT_FIELDS_ID: + *table_id = index_id; + return true; + case DICT_TABLE_IDS_ID: + /* The following is a secondary index on SYS_TABLES */ + *table_id = DICT_TABLES_ID; + return true; + } + + bool found = false; + mtr_t mtr; + + ut_ad(mutex_own(&(dict_sys->mutex))); + + /* NOTE that the operation of this function is protected by + the dictionary mutex, and therefore no deadlocks can occur + with other dictionary operations. */ + + mtr_start(&mtr); + + btr_pcur_t pcur; + const rec_t* rec = dict_startscan_system(&pcur, &mtr, SYS_INDEXES); + + while (rec) { + ulint len; + const byte* field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_INDEXES__ID, &len); + ut_ad(len == 8); + + /* Check if the index id is the one searched for */ + if (index_id == mach_read_from_8(field)) { + found = true; + /* Now we get the table id */ + const byte* field = rec_get_nth_field_old( + rec, + DICT_FLD__SYS_INDEXES__TABLE_ID, + &len); + *table_id = mach_read_from_8(field); + break; + } + mtr_commit(&mtr); + mtr_start(&mtr); + rec = dict_getnext_system(&pcur, &mtr); + } + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + + return(found); +} + +UNIV_INTERN +dict_table_t* +dict_table_open_on_index_id( +/*==================*/ + index_id_t index_id, /*!< in: index id */ + bool dict_locked) /*!< in: dict locked */ +{ + if (!dict_locked) { + mutex_enter(&dict_sys->mutex); + } + + ut_ad(mutex_own(&dict_sys->mutex)); + table_id_t table_id; + dict_table_t * table = NULL; + if (dict_load_table_id_on_index_id(index_id, &table_id)) { + bool local_dict_locked = true; + table = dict_table_open_on_id(table_id, + local_dict_locked, + DICT_TABLE_OP_LOAD_TABLESPACE); + } + + if (!dict_locked) { + mutex_exit(&dict_sys->mutex); + } + return table; +} + /********************************************************************//** This function is called when the database is booted. Loads system table index definitions except for the clustered index which is added to the diff --git a/storage/innobase/dict/dict0stats_bg.cc b/storage/innobase/dict/dict0stats_bg.cc index 0089f9897ae..076ceb79613 100644 --- a/storage/innobase/dict/dict0stats_bg.cc +++ b/storage/innobase/dict/dict0stats_bg.cc @@ -427,7 +427,7 @@ dict_stats_process_entry_from_recalc_pool() return; } - table->stats_bg_flag = BG_STAT_IN_PROGRESS; + table->stats_bg_flag |= BG_STAT_IN_PROGRESS; mutex_exit(&dict_sys->mutex); @@ -454,7 +454,7 @@ dict_stats_process_entry_from_recalc_pool() mutex_enter(&dict_sys->mutex); - table->stats_bg_flag = BG_STAT_NONE; + table->stats_bg_flag &= ~BG_STAT_IN_PROGRESS; dict_table_close(table, TRUE, FALSE); diff --git a/storage/innobase/fil/fil0crypt.cc b/storage/innobase/fil/fil0crypt.cc new file mode 100644 index 00000000000..8ff2c490d35 --- /dev/null +++ b/storage/innobase/fil/fil0crypt.cc @@ -0,0 +1,2432 @@ +#include "fil0fil.h" +#include "srv0srv.h" +#include "srv0start.h" +#include "mach0data.h" +#include "log0recv.h" +#include "mtr0mtr.h" +#include "mtr0log.h" +#include "page0zip.h" +#include "ut0ut.h" +#include "btr0scrub.h" +#include "fsp0fsp.h" +#include "fil0pagecompress.h" +#include "fil0pageencryption.h" + +#include <my_crypt.h> +#include <my_crypt_key_management.h> + +#include <my_aes.h> +#include <math.h> + + +/** Mutex for keys */ +UNIV_INTERN ib_mutex_t fil_crypt_key_mutex; + +#ifdef UNIV_PFS_MUTEX +UNIV_INTERN mysql_pfs_key_t fil_crypt_key_mutex_key; +#endif + +/** Is encryption enabled/disabled */ +UNIV_INTERN my_bool srv_encrypt_tables = FALSE; + +/** No of key rotation threads requested */ +UNIV_INTERN uint srv_n_fil_crypt_threads = 0; + +/** No of key rotation threads started */ +static uint srv_n_fil_crypt_threads_started = 0; + +/** At this age or older a space/page will be rotated */ +UNIV_INTERN uint srv_fil_crypt_rotate_key_age = 1; + +/** Event to signal FROM the key rotation threads. */ +UNIV_INTERN os_event_t fil_crypt_event; + +/** Event to signal TO the key rotation threads. */ +UNIV_INTERN os_event_t fil_crypt_threads_event; + +/** Event for waking up threads throttle */ +UNIV_INTERN os_event_t fil_crypt_throttle_sleep_event; + +/** Mutex for key rotation threads */ +UNIV_INTERN ib_mutex_t fil_crypt_threads_mutex; + +#ifdef UNIV_PFS_MUTEX +UNIV_INTERN mysql_pfs_key_t fil_crypt_threads_mutex_key; +#endif + +/** Variable ensuring only 1 thread at time does initial conversion */ +static bool fil_crypt_start_converting = false; + +/** Variables for throttling */ +UNIV_INTERN uint srv_n_fil_crypt_iops = 100; // 10ms per iop +static uint srv_alloc_time = 3; // allocate iops for 3s at a time +static uint n_fil_crypt_iops_allocated = 0; + +/** Variables for scrubbing */ +extern uint srv_background_scrub_data_interval; +extern uint srv_background_scrub_data_check_interval; + +#define DEBUG_KEYROTATION_THROTTLING 0 + +/** Statistics variables */ +static fil_crypt_stat_t crypt_stat; +static ib_mutex_t crypt_stat_mutex; + +#ifdef UNIV_PFS_MUTEX +UNIV_INTERN mysql_pfs_key_t fil_crypt_stat_mutex_key; +#endif + +/** + * key for crypt data mutex +*/ +#ifdef UNIV_PFS_MUTEX +UNIV_INTERN mysql_pfs_key_t fil_crypt_data_mutex_key; +#endif + +/** +* Magic pattern in start of crypt data on page 0 +*/ +#define MAGIC_SZ 6 + +static const unsigned char CRYPT_MAGIC[MAGIC_SZ] = { + 's', 0xE, 0xC, 'R', 'E', 't' }; + +static const unsigned char EMPTY_PATTERN[MAGIC_SZ] = { + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +/** + * CRYPT_SCHEME_UNENCRYPTED + * + * Used as intermediate state when convering a space from unencrypted + * to encrypted + */ +#define CRYPT_SCHEME_UNENCRYPTED 0 + +/** + * CRYPT_SCHEME_1 + * + * L = AES_ECB(KEY, IV) + * CRYPT(PAGE) = AES_CRT(KEY=L, IV=C, PAGE) + */ +#define CRYPT_SCHEME_1 1 +#define CRYPT_SCHEME_1_IV_LEN 16 +// cached L given key_version +struct key_struct +{ + uint key_version; + byte key[CRYPT_SCHEME_1_IV_LEN]; +}; + +struct fil_space_rotate_state_t +{ + time_t start_time; // time when rotation started + ulint active_threads; // active threads in space + ulint next_offset; // next "free" offset + ulint max_offset; // max offset needing to be rotated + uint min_key_version_found; // min key version found but not rotated + lsn_t end_lsn; // max lsn created when rotating this space + bool starting; // initial write of IV + bool flushing; // space is being flushed at end of rotate + struct { + bool is_active; // is scrubbing active in this space + time_t last_scrub_completed; // when was last scrub completed + } scrubbing; +}; + +struct fil_space_crypt_struct +{ + ulint type; // CRYPT_SCHEME + uint keyserver_requests; // no of key requests to key server + uint key_count; // No of initalized key-structs + key_struct keys[3]; // cached L = AES_ECB(KEY, IV) + uint min_key_version; // min key version for this space + ulint page0_offset; // byte offset on page 0 for crypt data + + ib_mutex_t mutex; // mutex protecting following variables + bool closing; // is tablespace being closed + fil_space_rotate_state_t rotate_state; + + uint iv_length; // length of IV + byte iv[1]; // IV-data +}; + +/********************************************************************* +Init space crypt */ +UNIV_INTERN +void +fil_space_crypt_init() +{ + mutex_create(fil_crypt_key_mutex_key, + &fil_crypt_key_mutex, SYNC_NO_ORDER_CHECK); + + fil_crypt_throttle_sleep_event = os_event_create(); + + mutex_create(fil_crypt_stat_mutex_key, + &crypt_stat_mutex, SYNC_NO_ORDER_CHECK); + memset(&crypt_stat, 0, sizeof(crypt_stat)); +} + +/********************************************************************* +Cleanup space crypt */ +UNIV_INTERN +void +fil_space_crypt_cleanup() +{ + os_event_free(fil_crypt_throttle_sleep_event); +} + +/****************************************************************** +Get key bytes for a space/key-version */ +static +void +fil_crypt_get_key(byte *dst, uint* key_length, + fil_space_crypt_t* crypt_data, uint version, bool page_encrypted) +{ + unsigned char keybuf[MY_AES_MAX_KEY_LENGTH]; + unsigned char iv[CRYPT_SCHEME_1_IV_LEN]; + ulint iv_len = sizeof(iv); + + if (!page_encrypted) { + mutex_enter(&crypt_data->mutex); + + // Check if we already have key + for (uint i = 0; i < crypt_data->key_count; i++) { + if (crypt_data->keys[i].key_version == version) { + memcpy(dst, crypt_data->keys[i].key, + sizeof(crypt_data->keys[i].key)); + mutex_exit(&crypt_data->mutex); + return; + } + } + // Not found! + crypt_data->keyserver_requests++; + + // Rotate keys to make room for a new + for (uint i = 1; i < array_elements(crypt_data->keys); i++) { + crypt_data->keys[i] = crypt_data->keys[i - 1]; + } + } + else + { + // load iv + + int rc = GetCryptoIV(version, (unsigned char*)iv, iv_len); + + if (rc != CRYPT_KEY_OK) { + ib_logf(IB_LOG_LEVEL_FATAL, + "IV %d can not be found. Reason=%d", version, rc); + ut_error; + } + } + + if (HasCryptoKey(version)) { + *key_length = GetCryptoKeySize(version); + + int rc = GetCryptoKey(version, (unsigned char*)keybuf, *key_length); + + if (rc != CRYPT_KEY_OK) { + ib_logf(IB_LOG_LEVEL_FATAL, + "Key %d can not be found. Reason=%d", version, rc); + ut_error; + } + } else { + ib_logf(IB_LOG_LEVEL_FATAL, + "Key %d not found", version); + ut_error; + } + + + // do ctr key initialization + if (current_aes_dynamic_method == MY_AES_ALGORITHM_CTR) + { + // Now compute L by encrypting IV using this key + const unsigned char* src = page_encrypted ? iv : crypt_data->iv; + const int srclen = page_encrypted ? iv_len : crypt_data->iv_length; + unsigned char* buf = page_encrypted ? keybuf : crypt_data->keys[0].key; + uint32 buflen = page_encrypted ? *key_length : sizeof(crypt_data->keys[0].key); + + // call ecb explicit + my_aes_encrypt_dynamic_type func = get_aes_encrypt_func(MY_AES_ALGORITHM_ECB); + int rc = (*func)(src, srclen, + buf, &buflen, + (unsigned char*)keybuf, *key_length, + NULL, 0, + 1); + + if (rc != AES_OK) { + ib_logf(IB_LOG_LEVEL_FATAL, + "Unable to encrypt key-block " + " src: %p srclen: %d buf: %p buflen: %d." + " return-code: %d. Can't continue!\n", + src, srclen, buf, buflen, rc); + ut_error; + } + + if (!page_encrypted) { + crypt_data->keys[0].key_version = version; + crypt_data->key_count++; + + if (crypt_data->key_count > array_elements(crypt_data->keys)) { + crypt_data->key_count = array_elements(crypt_data->keys); + } + } + + // set the key size to the aes block size because this encrypted data is the key + *key_length = MY_AES_BLOCK_SIZE; + memcpy(dst, buf, buflen); + } + else + { + // otherwise keybuf contains the right key + memcpy(dst, keybuf, *key_length); + } + + if (!page_encrypted) { + mutex_exit(&crypt_data->mutex); + } +} + +/****************************************************************** +Get key bytes for a space/latest(key-version) */ +static inline +void +fil_crypt_get_latest_key(byte *dst, uint* key_length, + fil_space_crypt_t* crypt_data, uint *version) +{ + if (srv_encrypt_tables) { + // used for key rotation - get the next key id from the key provider + int rc = GetLatestCryptoKeyVersion(); + + // if no new key was created use the last one + if (rc >= 0) + { + *version = rc; + } + + return fil_crypt_get_key(dst, key_length, crypt_data, *version, false); + } + return fil_crypt_get_key(dst, key_length, NULL, *version, true); +} + +/****************************************************************** +Create a fil_space_crypt_t object */ +UNIV_INTERN +fil_space_crypt_t* +fil_space_create_crypt_data() +{ + const uint iv_length = CRYPT_SCHEME_1_IV_LEN; + const uint sz = sizeof(fil_space_crypt_t) + iv_length; + fil_space_crypt_t* crypt_data = + static_cast<fil_space_crypt_t*>(malloc(sz)); + memset(crypt_data, 0, sz); + + if (srv_encrypt_tables == FALSE) { + crypt_data->type = CRYPT_SCHEME_UNENCRYPTED; + crypt_data->min_key_version = 0; + } else { + crypt_data->type = CRYPT_SCHEME_1; + crypt_data->min_key_version = GetLatestCryptoKeyVersion(); + } + + mutex_create(fil_crypt_data_mutex_key, + &crypt_data->mutex, SYNC_NO_ORDER_CHECK); + crypt_data->iv_length = iv_length; + my_random_bytes(crypt_data->iv, iv_length); + return crypt_data; +} + +/****************************************************************** +Compare two crypt objects */ +UNIV_INTERN +int +fil_space_crypt_compare(const fil_space_crypt_t* crypt_data1, + const fil_space_crypt_t* crypt_data2) +{ + ut_a(crypt_data1->type == CRYPT_SCHEME_UNENCRYPTED || + crypt_data1->type == CRYPT_SCHEME_1); + ut_a(crypt_data2->type == CRYPT_SCHEME_UNENCRYPTED || + crypt_data2->type == CRYPT_SCHEME_1); + + ut_a(crypt_data1->iv_length == CRYPT_SCHEME_1_IV_LEN); + ut_a(crypt_data2->iv_length == CRYPT_SCHEME_1_IV_LEN); + + /* no support for changing iv (yet?) */ + ut_a(memcmp(crypt_data1->iv, crypt_data2->iv, + crypt_data1->iv_length) == 0); + + return 0; +} + +/****************************************************************** +Read crypt data from a page (0) */ +UNIV_INTERN +fil_space_crypt_t* +fil_space_read_crypt_data(ulint space, const byte* page, ulint offset) +{ + if (memcmp(page + offset, EMPTY_PATTERN, MAGIC_SZ) == 0) { + /* crypt is not stored */ + return NULL; + } + + if (memcmp(page + offset, CRYPT_MAGIC, MAGIC_SZ) != 0) { + fprintf(stderr, + "Warning: found potentially bogus bytes on " + "page 0 offset %lu for space %lu : " + "[ %.2x %.2x %.2x %.2x %.2x %.2x ]. " + "Assuming space is not encrypted!\n", + offset, space, + page[offset + 0], + page[offset + 1], + page[offset + 2], + page[offset + 3], + page[offset + 4], + page[offset + 5]); + return NULL; + } + + ulint type = mach_read_from_1(page + offset + MAGIC_SZ + 0); + + if (! (type == CRYPT_SCHEME_UNENCRYPTED || + type == CRYPT_SCHEME_1)) { + fprintf(stderr, + "Found non sensible crypt scheme: %lu for space %lu " + " offset: %lu bytes: " + "[ %.2x %.2x %.2x %.2x %.2x %.2x ]\n", + type, space, offset, + page[offset + 0 + MAGIC_SZ], + page[offset + 1 + MAGIC_SZ], + page[offset + 2 + MAGIC_SZ], + page[offset + 3 + MAGIC_SZ], + page[offset + 4 + MAGIC_SZ], + page[offset + 5 + MAGIC_SZ]); + ut_error; + } + + ulint iv_length = mach_read_from_1(page + offset + MAGIC_SZ + 1); + if (! (iv_length == CRYPT_SCHEME_1_IV_LEN)) { + fprintf(stderr, + "Found non sensible iv length: %lu for space %lu " + " offset: %lu type: %lu bytes: " + "[ %.2x %.2x %.2x %.2x %.2x %.2x ]\n", + iv_length, space, offset, type, + page[offset + 0 + MAGIC_SZ], + page[offset + 1 + MAGIC_SZ], + page[offset + 2 + MAGIC_SZ], + page[offset + 3 + MAGIC_SZ], + page[offset + 4 + MAGIC_SZ], + page[offset + 5 + MAGIC_SZ]); + ut_error; + } + + uint min_key_version = mach_read_from_4 + (page + offset + MAGIC_SZ + 2 + iv_length); + + const uint sz = sizeof(fil_space_crypt_t) + iv_length; + fil_space_crypt_t* crypt_data = static_cast<fil_space_crypt_t*>( + malloc(sz)); + memset(crypt_data, 0, sz); + + crypt_data->type = type; + crypt_data->min_key_version = min_key_version; + crypt_data->page0_offset = offset; + mutex_create(fil_crypt_data_mutex_key, + &crypt_data->mutex, SYNC_NO_ORDER_CHECK); + crypt_data->iv_length = iv_length; + memcpy(crypt_data->iv, page + offset + MAGIC_SZ + 2, iv_length); + + return crypt_data; +} + +/****************************************************************** +Free a crypt data object */ +UNIV_INTERN +void +fil_space_destroy_crypt_data(fil_space_crypt_t **crypt_data) +{ + if (crypt_data != NULL && (*crypt_data) != NULL) { + /* lock (and unlock) mutex to make sure no one has it locked + * currently */ + mutex_enter(& (*crypt_data)->mutex); + mutex_exit(& (*crypt_data)->mutex); + mutex_free(& (*crypt_data)->mutex); + free(*crypt_data); + (*crypt_data) = NULL; + } +} + +/****************************************************************** +Write crypt data to a page (0) */ +static +void +fil_space_write_crypt_data_low(fil_space_crypt_t *crypt_data, + ulint type, + byte* page, ulint offset, + ulint maxsize, mtr_t* mtr) +{ + ut_a(offset > 0 && offset < UNIV_PAGE_SIZE); + ulint space_id = mach_read_from_4( + page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); + const uint len = crypt_data->iv_length; + const uint min_key_version = crypt_data->min_key_version; + crypt_data->page0_offset = offset; + ut_a(2 + len + 4 + MAGIC_SZ < maxsize); + + /* + redo log this as bytewise updates to page 0 + followed by an MLOG_FILE_WRITE_CRYPT_DATA + (that will during recovery update fil_space_t) + */ + mlog_write_string(page + offset, CRYPT_MAGIC, MAGIC_SZ, mtr); + mlog_write_ulint(page + offset + MAGIC_SZ + 0, type, MLOG_1BYTE, mtr); + mlog_write_ulint(page + offset + MAGIC_SZ + 1, len, MLOG_1BYTE, mtr); + mlog_write_string(page + offset + MAGIC_SZ + 2, crypt_data->iv, len, + mtr); + mlog_write_ulint(page + offset + MAGIC_SZ + 2 + len, min_key_version, + MLOG_4BYTES, mtr); + + byte* log_ptr = mlog_open(mtr, 11 + 12 + len); + if (log_ptr != NULL) { + log_ptr = mlog_write_initial_log_record_fast( + page, + MLOG_FILE_WRITE_CRYPT_DATA, + log_ptr, mtr); + mach_write_to_4(log_ptr, space_id); + log_ptr += 4; + mach_write_to_2(log_ptr, offset); + log_ptr += 2; + mach_write_to_1(log_ptr, type); + log_ptr += 1; + mach_write_to_1(log_ptr, len); + log_ptr += 1; + mach_write_to_4(log_ptr, min_key_version); + log_ptr += 4; + mlog_close(mtr, log_ptr); + + mlog_catenate_string(mtr, crypt_data->iv, len); + } +} + +/****************************************************************** +Write crypt data to a page (0) */ +UNIV_INTERN +void +fil_space_write_crypt_data(ulint space, byte* page, ulint offset, + ulint maxsize, mtr_t* mtr) +{ + fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space); + if (crypt_data == NULL) { + return; + } + + fil_space_write_crypt_data_low(crypt_data, crypt_data->type, + page, offset, maxsize, mtr); +} + +/****************************************************************** +Parse a MLOG_FILE_WRITE_CRYPT_DATA log entry */ +UNIV_INTERN +byte* +fil_parse_write_crypt_data(byte* ptr, byte* end_ptr, + buf_block_t* block) +{ + /* check that redo log entry is complete */ + uint entry_size = + 4 + // size of space_id + 2 + // size of offset + 1 + // size of type + 1 + // size of iv-len + 4; // size of min_key_version + if (end_ptr - ptr < entry_size) + return NULL; + + ulint space_id = mach_read_from_4(ptr); + ptr += 4; + uint offset = mach_read_from_2(ptr); + ptr += 2; + uint type = mach_read_from_1(ptr); + ptr += 1; + uint len = mach_read_from_1(ptr); + ptr += 1; + + ut_a(type == CRYPT_SCHEME_UNENCRYPTED || + type == CRYPT_SCHEME_1); // only supported + ut_a(len == CRYPT_SCHEME_1_IV_LEN); // only supported + uint min_key_version = mach_read_from_4(ptr); + ptr += 4; + + if (end_ptr - ptr < len) + return NULL; + + fil_space_crypt_t* crypt_data = fil_space_create_crypt_data(); + crypt_data->page0_offset = offset; + crypt_data->min_key_version = min_key_version; + memcpy(crypt_data->iv, ptr, len); + ptr += len; + + /* update fil_space memory cache with crypt_data */ + fil_space_set_crypt_data(space_id, crypt_data); + + return ptr; +} + +/****************************************************************** +Clear crypt data from a page (0) */ +UNIV_INTERN +void +fil_space_clear_crypt_data(byte* page, ulint offset) +{ + //TODO(jonaso): pass crypt-data and read len from there + ulint len = CRYPT_SCHEME_1_IV_LEN; + ulint size = + sizeof(CRYPT_MAGIC) + + 1 + // type + 1 + // len + len + // iv + 4; // min key version + memset(page + offset, 0, size); +} + +/********************************************************************* +Check if page shall be encrypted before write */ +UNIV_INTERN +bool +fil_space_check_encryption_write( +/*==============================*/ + ulint space) /*!< in: tablespace id */ +{ + if (srv_encrypt_tables == FALSE) + return false; + + fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space); + if (crypt_data == NULL) + return false; + + if (crypt_data->type == CRYPT_SCHEME_UNENCRYPTED) + return false; + + return true; +} + +/****************************************************************** +Encrypt a page */ +UNIV_INTERN +void +fil_space_encrypt(ulint space, ulint offset, lsn_t lsn, + const byte* src_frame, ulint zip_size, byte* dst_frame, ulint encryption_key) +{ + fil_space_crypt_t* crypt_data; + ulint page_size = (zip_size) ? zip_size : UNIV_PAGE_SIZE; + + // get key (L) + uint key_version; + byte key[MY_AES_MAX_KEY_LENGTH]; + uint key_length; + + if (srv_encrypt_tables) { + crypt_data = fil_space_get_crypt_data(space); + if (crypt_data == NULL) { + //TODO: Is this really needed ? + memcpy(dst_frame, src_frame, page_size); + return; + } + fil_crypt_get_latest_key(key, &key_length, crypt_data, &key_version); + } else { + key_version = encryption_key; + fil_crypt_get_latest_key(key, &key_length, NULL, (uint*)&key_version); + } + + + /* Load the iv or counter (depending to the encryption algorithm used) */ + unsigned char iv[MY_AES_BLOCK_SIZE]; + + if (current_aes_dynamic_method == MY_AES_ALGORITHM_CTR) + { + // create counter block (C) + mach_write_to_4(iv + 0, space); + ulint space_offset = mach_read_from_4( + src_frame + FIL_PAGE_OFFSET); + mach_write_to_4(iv + 4, space_offset); + mach_write_to_8(iv + 8, lsn); + } + else + { + // take the iv from the key provider + + int load_iv_rc = GetCryptoIV(key_version, (uchar *) iv, sizeof(iv)); + + // if the iv can not be loaded the whole page can not be encrypted + if (load_iv_rc != CRYPT_KEY_OK) + { + ib_logf(IB_LOG_LEVEL_FATAL, + "Unable to decrypt data-block. " + " Can not load iv for key %d" + " return-code: %d. Can't continue!\n", + key_version, load_iv_rc); + + ut_error; + } + } + + + ibool page_compressed = (mach_read_from_2(src_frame+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED); + ibool page_encrypted = fil_space_is_page_encrypted(space); + + ulint compression_alg = mach_read_from_8(src_frame+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); + + ulint orig_page_type = mach_read_from_2(src_frame+FIL_PAGE_TYPE); + if (orig_page_type==FIL_PAGE_TYPE_FSP_HDR + || orig_page_type==FIL_PAGE_TYPE_XDES + || orig_page_type== FIL_PAGE_PAGE_ENCRYPTED + || orig_page_type== FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) { + memcpy(dst_frame, src_frame, page_size); + return; + } + + // copy page header + memcpy(dst_frame, src_frame, FIL_PAGE_DATA); + + + if (page_encrypted && !page_compressed) { + // key id + mach_write_to_2(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, + key_version); + // original page type + mach_write_to_2(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 2, + orig_page_type); + // new page type + mach_write_to_2(dst_frame+FIL_PAGE_TYPE, FIL_PAGE_PAGE_ENCRYPTED); + } else { + // store key version + mach_write_to_4(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, + key_version); + } + + // encrypt page data + ulint unencrypted_bytes = FIL_PAGE_DATA + FIL_PAGE_DATA_END; + ulint srclen = page_size - unencrypted_bytes; + const byte* src = src_frame + FIL_PAGE_DATA; + byte* dst = dst_frame + FIL_PAGE_DATA; + uint32 dstlen; + + if (page_compressed) { + srclen = page_size - FIL_PAGE_DATA;; + } + + + int rc = (* my_aes_encrypt_dynamic)(src, srclen, + dst, &dstlen, + (unsigned char*)key, key_length, + (unsigned char*)iv, sizeof(iv), + 1); + + if (! ((rc == AES_OK) && ((ulint) dstlen == srclen))) { + ib_logf(IB_LOG_LEVEL_FATAL, + "Unable to encrypt data-block " + " src: %p srclen: %ld buf: %p buflen: %d." + " return-code: %d. Can't continue!\n", + src, (long)srclen, + dst, dstlen, rc); + ut_error; + } + + if (!page_compressed) { + // copy page trailer + memcpy(dst_frame + page_size - FIL_PAGE_DATA_END, + src_frame + page_size - FIL_PAGE_DATA_END, + FIL_PAGE_DATA_END); + + /* handle post encryption checksum */ + ib_uint32_t checksum = 0; + srv_checksum_algorithm_t algorithm = + static_cast<srv_checksum_algorithm_t>(srv_checksum_algorithm); + + if (zip_size == 0) { + switch (algorithm) { + case SRV_CHECKSUM_ALGORITHM_CRC32: + case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32: + checksum = buf_calc_page_crc32(dst_frame); + break; + case SRV_CHECKSUM_ALGORITHM_INNODB: + case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB: + checksum = (ib_uint32_t) buf_calc_page_new_checksum( + dst_frame); + break; + case SRV_CHECKSUM_ALGORITHM_NONE: + case SRV_CHECKSUM_ALGORITHM_STRICT_NONE: + checksum = BUF_NO_CHECKSUM_MAGIC; + break; + /* no default so the compiler will emit a warning + * if new enum is added and not handled here */ + } + } else { + checksum = page_zip_calc_checksum(dst_frame, zip_size, + algorithm); + } + + // store the post-encryption checksum after the key-version + mach_write_to_4(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 4, + checksum); + } else { + /* Page compressed and encrypted tables have different + FIL_HEADER */ + ulint page_len = log10((double)page_size)/log10((double)2); + /* Set up the correct page type */ + mach_write_to_2(dst_frame+FIL_PAGE_TYPE, FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED); + /* Set up the compression algorithm */ + mach_write_to_2(dst_frame+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION+4, orig_page_type); + /* Set up the compressed size */ + mach_write_to_1(dst_frame+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION+6, page_len); + /* Set up the compression method */ + mach_write_to_1(dst_frame+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION+7, compression_alg); + } + +} + +/********************************************************************* +Check if extra buffer shall be allocated for decrypting after read */ +UNIV_INTERN +bool +fil_space_check_encryption_read( +/*==============================*/ + ulint space) /*!< in: tablespace id */ +{ + fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space); + if (crypt_data == NULL) + return false; + + if (crypt_data->type == CRYPT_SCHEME_UNENCRYPTED) + return false; + + return true; +} + +/****************************************************************** +Decrypt a page */ +UNIV_INTERN +bool +fil_space_decrypt(fil_space_crypt_t* crypt_data, + const byte* src_frame, ulint page_size, byte* dst_frame) +{ + ulint page_type = mach_read_from_2(src_frame+FIL_PAGE_TYPE); + // key version + uint key_version; + bool page_encrypted = (page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED + || page_type == FIL_PAGE_PAGE_ENCRYPTED); + + bool page_compressed = (page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED + || page_type == FIL_PAGE_PAGE_COMPRESSED); + + ulint orig_page_type=0; + if (page_type == FIL_PAGE_PAGE_ENCRYPTED) { + key_version = mach_read_from_2( + src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); + orig_page_type = mach_read_from_2( + src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 2); + } else { + key_version = mach_read_from_4( + src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); + } + + if (key_version == 0 && !page_encrypted) { + //TODO: is this really needed ? + memcpy(dst_frame, src_frame, page_size); + return false; /* page not decrypted */ + } + + // read space & offset & lsn + ulint space = mach_read_from_4( + src_frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); + ulint offset = mach_read_from_4( + src_frame + FIL_PAGE_OFFSET); + ib_uint64_t lsn = mach_read_from_8(src_frame + FIL_PAGE_LSN); + + // copy page header + memcpy(dst_frame, src_frame, FIL_PAGE_DATA); + + if (page_type == FIL_PAGE_PAGE_ENCRYPTED) { + // orig page type + mach_write_to_2(dst_frame+FIL_PAGE_TYPE, orig_page_type); + } + + + // get key + byte key[MY_AES_MAX_KEY_LENGTH]; + uint key_length; + fil_crypt_get_key(key, &key_length, crypt_data, key_version, page_encrypted); + + // get the iv + unsigned char iv[MY_AES_BLOCK_SIZE]; + + if (current_aes_dynamic_method == MY_AES_ALGORITHM_CTR) + { + // create counter block + + mach_write_to_4(iv + 0, space); + mach_write_to_4(iv + 4, offset); + mach_write_to_8(iv + 8, lsn); + } + else + { + // take the iv from the key provider + + int load_iv_rc = GetCryptoIV(key_version, (uchar *) iv, sizeof(iv)); + + // if the iv can not be loaded the whole page can not be decrypted + if (load_iv_rc != CRYPT_KEY_OK) + { + ib_logf(IB_LOG_LEVEL_FATAL, + "Unable to decrypt data-block. " + " Can not load iv for key %d" + " return-code: %d. Can't continue!\n", + key_version, load_iv_rc); + + return AES_KEY_CREATION_FAILED; + } + } + + const byte* src = src_frame + FIL_PAGE_DATA; + byte* dst = dst_frame + FIL_PAGE_DATA; + uint32 dstlen; + ulint srclen = page_size - (FIL_PAGE_DATA + FIL_PAGE_DATA_END); + + ulint compressed_len; + ulint compression_method; + + if (page_compressed) { + orig_page_type = mach_read_from_2(src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION+4); + compressed_len = mach_read_from_1(src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION+6); + compression_method = mach_read_from_1(src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION+7); + } + + if (page_encrypted && !page_compressed) { + orig_page_type = mach_read_from_2(src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION+2); + } + + if (page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) { + srclen = pow((double)2, (double)((int)compressed_len)) - FIL_PAGE_DATA; + } + + int rc = (* my_aes_decrypt_dynamic)(src, srclen, + dst, &dstlen, + (unsigned char*)key, key_length, + (unsigned char*)iv, sizeof(iv), + 1); + + if (! ((rc == AES_OK) && ((ulint) dstlen == srclen))) { + ib_logf(IB_LOG_LEVEL_FATAL, + "Unable to decrypt data-block " + " src: %p srclen: %ld buf: %p buflen: %d." + " return-code: %d. Can't continue!\n", + src, (long)srclen, + dst, dstlen, rc); + ut_error; + } + + if (page_type != FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) { + // copy page trailer + memcpy(dst_frame + page_size - FIL_PAGE_DATA_END, + src_frame + page_size - FIL_PAGE_DATA_END, + FIL_PAGE_DATA_END); + + // clear key-version & crypt-checksum from dst + memset(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8); + } else { + /* For page compressed tables we set up the FIL_HEADER again */ + /* setting original page type */ + mach_write_to_2(dst_frame + FIL_PAGE_TYPE, orig_page_type); + /* page_compression uses BUF_NO_CHECKSUM_MAGIC as checksum */ + mach_write_to_4(dst_frame + FIL_PAGE_SPACE_OR_CHKSUM, BUF_NO_CHECKSUM_MAGIC); + /* Set up the flush lsn to be compression algorithm */ + mach_write_to_8(dst_frame+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, compression_method); + } + + return true; /* page was decrypted */ +} + +/****************************************************************** +Decrypt a page */ +UNIV_INTERN +void +fil_space_decrypt(ulint space, + const byte* src_frame, ulint page_size, byte* dst_frame) +{ + fil_space_decrypt(fil_space_get_crypt_data(space), + src_frame, page_size, dst_frame); +} + +/********************************************************************* +Verify checksum for a page (iff it's encrypted) +NOTE: currently this function can only be run in single threaded mode +as it modifies srv_checksum_algorithm (temporarily) +@return true if page is encrypted AND OK, false otherwise */ +bool +fil_space_verify_crypt_checksum(const byte* src_frame, ulint zip_size) +{ + // key version + uint key_version = mach_read_from_4( + src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); + + if (key_version == 0) { + return false; // unencrypted page + } + + /* "trick" the normal checksum routines by storing the post-encryption + * checksum into the normal checksum field allowing for reuse of + * the normal routines */ + + // post encryption checksum + ib_uint32_t stored_post_encryption = mach_read_from_4( + src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 4); + + // save pre encryption checksum for restore in end of this function + ib_uint32_t stored_pre_encryption = mach_read_from_4( + src_frame + FIL_PAGE_SPACE_OR_CHKSUM); + + ib_uint32_t checksum_field2 = mach_read_from_4( + src_frame + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM); + + /** prepare frame for usage of normal checksum routines */ + mach_write_to_4(const_cast<byte*>(src_frame) + FIL_PAGE_SPACE_OR_CHKSUM, + stored_post_encryption); + + /* NOTE: this function is (currently) only run when restoring + * dblwr-buffer, server is single threaded so it's safe to modify + * srv_checksum_algorithm */ + srv_checksum_algorithm_t save_checksum_algorithm = + (srv_checksum_algorithm_t)srv_checksum_algorithm; + if (zip_size == 0 && + (save_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB || + save_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_INNODB)) { + /* handle ALGORITHM_INNODB specially, + * "downgrade" to ALGORITHM_INNODB and store BUF_NO_CHECKSUM_MAGIC + * checksum_field2 is sort of pointless anyway... + */ + srv_checksum_algorithm = SRV_CHECKSUM_ALGORITHM_INNODB; + mach_write_to_4(const_cast<byte*>(src_frame) + + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM, + BUF_NO_CHECKSUM_MAGIC); + } + + /* verify checksums */ + ibool corrupted = buf_page_is_corrupted(false, src_frame, zip_size); + + /** restore frame & algorithm */ + srv_checksum_algorithm = save_checksum_algorithm; + + mach_write_to_4(const_cast<byte*>(src_frame) + + FIL_PAGE_SPACE_OR_CHKSUM, + stored_pre_encryption); + + mach_write_to_4(const_cast<byte*>(src_frame) + + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM, + checksum_field2); + + if (!corrupted) { + return true; // page was encrypted and checksum matched + } else { + return false; // page was encrypted but checksum didn't match + } +} + +/***********************************************************************/ + +/** A copy of global key state */ +struct key_state_t { + key_state_t() : key_version(0), + rotate_key_age(srv_fil_crypt_rotate_key_age) {} + bool operator==(const key_state_t& other) const { + return key_version == other.key_version && + rotate_key_age == other.rotate_key_age; + } + uint key_version; + uint rotate_key_age; +}; + +/*********************************************************************** +Copy global key state */ +static void +fil_crypt_get_key_state( + key_state_t *new_state) +{ + if (srv_encrypt_tables == TRUE) { + new_state->key_version = GetLatestCryptoKeyVersion(); + new_state->rotate_key_age = srv_fil_crypt_rotate_key_age; + ut_a(new_state->key_version > 0); + } else { + new_state->key_version = 0; + new_state->rotate_key_age = 0; + } +} + +/*********************************************************************** +Check if a key needs rotation given a key_state */ +static bool +fil_crypt_needs_rotation(uint key_version, const key_state_t *key_state) +{ + // TODO(jonaso): Add support for rotating encrypted => unencrypted + + if (key_version == 0 && key_state->key_version != 0) { + /* this is rotation unencrypted => encrypted + * ignore rotate_key_age */ + return true; + } + + if (key_state->key_version == 0 && key_version != 0) { + /* this is rotation encrypted => unencrypted */ + return true; + } + + /* this is rotation encrypted => encrypted, + * only reencrypt if key is sufficiently old */ + if (key_version + key_state->rotate_key_age < key_state->key_version) + return true; + + return false; +} + +/*********************************************************************** +Check if a space is closing (i.e just before drop) */ +UNIV_INTERN bool +fil_crypt_is_closing(ulint space) +{ + bool closing; + fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space); + mutex_enter(&crypt_data->mutex); + closing = crypt_data->closing; + mutex_exit(&crypt_data->mutex); + return closing; +} + +/*********************************************************************** +Start encrypting a space +@return true if a pending op (fil_inc_pending_ops/fil_decr_pending_ops) is held +*/ +static bool +fil_crypt_start_encrypting_space(ulint space, bool *recheck) { + + /* we have a pending op when entering function */ + bool pending_op = true; + + mutex_enter(&fil_crypt_threads_mutex); + fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space); + if (crypt_data != NULL || fil_crypt_start_converting) { + /* someone beat us to it */ + if (fil_crypt_start_converting) + *recheck = true; + + mutex_exit(&fil_crypt_threads_mutex); + return pending_op; + } + + /* NOTE: we need to write and flush page 0 before publishing + * the crypt data. This so that after restart there is no + * risk of finding encrypted pages without having + * crypt data in page 0 */ + + /* 1 - create crypt data */ + crypt_data = fil_space_create_crypt_data(); + if (crypt_data == NULL) { + mutex_exit(&fil_crypt_threads_mutex); + return pending_op; + } + + crypt_data->type = CRYPT_SCHEME_UNENCRYPTED; + crypt_data->min_key_version = 0; // all pages are unencrypted + crypt_data->rotate_state.start_time = time(0); + crypt_data->rotate_state.starting = true; + crypt_data->rotate_state.active_threads = 1; + + mutex_enter(&crypt_data->mutex); + fil_space_set_crypt_data(space, crypt_data); + mutex_exit(&crypt_data->mutex); + + fil_crypt_start_converting = true; + mutex_exit(&fil_crypt_threads_mutex); + + do + { + if (fil_crypt_is_closing(space) || + fil_tablespace_is_being_deleted(space)) + break; + + mtr_t mtr; + mtr_start(&mtr); + + /* 2 - get page 0 */ + ulint offset = 0; + ulint zip_size = fil_space_get_zip_size(space); + buf_block_t* block = buf_page_get_gen(space, zip_size, offset, + RW_X_LATCH, + NULL, + BUF_GET, + __FILE__, __LINE__, + &mtr); + + if (fil_crypt_is_closing(space) || + fil_tablespace_is_being_deleted(space)) { + mtr_commit(&mtr); + break; + } + + /* 3 - compute location to store crypt data */ + byte* frame = buf_block_get_frame(block); + ulint maxsize; + crypt_data->page0_offset = + fsp_header_get_crypt_offset(zip_size, &maxsize); + + /* 4 - write crypt data to page 0 */ + fil_space_write_crypt_data_low(crypt_data, + CRYPT_SCHEME_1, + frame, + crypt_data->page0_offset, + maxsize, &mtr); + + mtr_commit(&mtr); + + if (fil_crypt_is_closing(space) || + fil_tablespace_is_being_deleted(space)) { + break; + } + + /* record lsn of update */ + lsn_t end_lsn = mtr.end_lsn; + + /* 4 - sync tablespace before publishing crypt data */ + + /* release "lock" while syncing */ + fil_decr_pending_ops(space); + pending_op = false; + + bool success = false; + ulint n_pages = 0; + ulint sum_pages = 0; + do { + success = buf_flush_list(ULINT_MAX, end_lsn, &n_pages); + buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST); + sum_pages += n_pages; + } while (!success && + !fil_crypt_is_closing(space) && + !fil_tablespace_is_being_deleted(space)); + + /* try to reacquire pending op */ + if (fil_inc_pending_ops(space, true)) + break; + + /* pending op reacquired! */ + pending_op = true; + + if (fil_crypt_is_closing(space) || + fil_tablespace_is_being_deleted(space)) { + break; + } + + /* 5 - publish crypt data */ + mutex_enter(&fil_crypt_threads_mutex); + mutex_enter(&crypt_data->mutex); + crypt_data->type = CRYPT_SCHEME_1; + ut_a(crypt_data->rotate_state.active_threads == 1); + crypt_data->rotate_state.active_threads = 0; + crypt_data->rotate_state.starting = false; + + fil_crypt_start_converting = false; + mutex_exit(&crypt_data->mutex); + mutex_exit(&fil_crypt_threads_mutex); + + return pending_op; + } while (0); + + mutex_enter(&crypt_data->mutex); + ut_a(crypt_data->rotate_state.active_threads == 1); + crypt_data->rotate_state.active_threads = 0; + mutex_exit(&crypt_data->mutex); + + mutex_enter(&fil_crypt_threads_mutex); + fil_crypt_start_converting = false; + mutex_exit(&fil_crypt_threads_mutex); + + return pending_op; +} + +/*********************************************************************** +Check if space needs rotation given a key_state */ +static bool +fil_crypt_space_needs_rotation(uint space, const key_state_t *key_state, + bool *recheck) +{ + if (fil_space_get_type(space) != FIL_TABLESPACE) + return false; + + if (fil_inc_pending_ops(space, true)) { + /* tablespace being dropped */ + return false; + } + + /* keep track of if we have pending op */ + bool pending_op = true; + + fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space); + if (crypt_data == NULL) { + /** + * space has no crypt data + * start encrypting it... + */ + pending_op = fil_crypt_start_encrypting_space(space, recheck); + crypt_data = fil_space_get_crypt_data(space); + if (crypt_data == NULL) { + if (pending_op) { + fil_decr_pending_ops(space); + } + return false; + } + } + + mutex_enter(&crypt_data->mutex); + do { + /* prevent threads from starting to rotate space */ + if (crypt_data->rotate_state.starting) { + /* recheck this space later */ + *recheck = true; + break; + } + + /* prevent threads from starting to rotate space */ + if (crypt_data->closing) + break; + + if (crypt_data->rotate_state.flushing) + break; + + bool need_key_rotation = fil_crypt_needs_rotation( + crypt_data->min_key_version, key_state); + + time_t diff = time(0) - crypt_data->rotate_state.scrubbing. + last_scrub_completed; + bool need_scrubbing = + diff >= srv_background_scrub_data_interval; + + if (need_key_rotation == false && need_scrubbing == false) + break; + + mutex_exit(&crypt_data->mutex); + /* NOTE! fil_decr_pending_ops is performed outside */ + return true; + } while (0); + + mutex_exit(&crypt_data->mutex); + if (pending_op) { + fil_decr_pending_ops(space); + } + return false; +} + +/** State of a rotation thread */ +struct rotate_thread_t { + explicit rotate_thread_t(uint no) { + memset(this, 0, sizeof(* this)); + thread_no = no; + first = true; + estimated_max_iops = 20; + } + + uint thread_no; + bool first; /*!< is position before first space */ + ulint space; /*!< current space */ + ulint offset; /*!< current offset */ + ulint batch; /*!< #pages to rotate */ + uint min_key_version_found;/*!< min key version found but not rotated */ + lsn_t end_lsn; /*!< max lsn when rotating this space */ + + uint estimated_max_iops; /*!< estimation of max iops */ + uint allocated_iops; /*!< allocated iops */ + uint cnt_waited; /*!< #times waited during this slot */ + uint sum_waited_us; /*!< wait time during this slot */ + + fil_crypt_stat_t crypt_stat; // statistics + + btr_scrub_t scrub_data; /* thread local data used by btr_scrub-functions + * when iterating pages of tablespace */ + + /* check if this thread should shutdown */ + bool should_shutdown() const { + return ! (srv_shutdown_state == SRV_SHUTDOWN_NONE && + thread_no < srv_n_fil_crypt_threads); + } +}; + +/*********************************************************************** +Update global statistics with thread statistics */ +static void +fil_crypt_update_total_stat(rotate_thread_t *state) +{ + mutex_enter(&crypt_stat_mutex); + crypt_stat.pages_read_from_cache += + state->crypt_stat.pages_read_from_cache; + crypt_stat.pages_read_from_disk += + state->crypt_stat.pages_read_from_disk; + crypt_stat.pages_modified += state->crypt_stat.pages_modified; + crypt_stat.pages_flushed += state->crypt_stat.pages_flushed; + // remote old estimate + crypt_stat.estimated_iops -= state->crypt_stat.estimated_iops; + // add new estimate + crypt_stat.estimated_iops += state->estimated_max_iops; + mutex_exit(&crypt_stat_mutex); + + // make new estimate "current" estimate + memset(&state->crypt_stat, 0, sizeof(state->crypt_stat)); + // record our old (current) estimate + state->crypt_stat.estimated_iops = state->estimated_max_iops; +} + +/*********************************************************************** +Allocate iops to thread from global setting, +used before starting to rotate a space */ +static bool +fil_crypt_alloc_iops(rotate_thread_t *state) +{ + ut_ad(state->allocated_iops == 0); + + uint max_iops = state->estimated_max_iops; + mutex_enter(&fil_crypt_threads_mutex); + if (n_fil_crypt_iops_allocated >= srv_n_fil_crypt_iops) { + /* this can happen when user decreases srv_fil_crypt_iops */ + mutex_exit(&fil_crypt_threads_mutex); + return false; + } + + uint alloc = srv_n_fil_crypt_iops - n_fil_crypt_iops_allocated; + if (alloc > max_iops) + alloc = max_iops; + + n_fil_crypt_iops_allocated += alloc; + mutex_exit(&fil_crypt_threads_mutex); + + state->allocated_iops = alloc; + + return alloc > 0; +} + +/*********************************************************************** +Reallocate iops to thread, +used when inside a space */ +static void +fil_crypt_realloc_iops(rotate_thread_t *state) +{ + ut_a(state->allocated_iops > 0); + + if (10 * state->cnt_waited > state->batch) { + /* if we waited more than 10% re-estimate max_iops */ + uint avg_wait_time_us = + state->sum_waited_us / state->cnt_waited; + +#if DEBUG_KEYROTATION_THROTTLING + fprintf(stderr, + "thr_no: %u - update estimated_max_iops from %u to %u\n", + state->thread_no, + state->estimated_max_iops, + 1000000 / avg_wait_time_us); +#endif + if (avg_wait_time_us == 0) + avg_wait_time_us = 1; // prevent division by zero + + state->estimated_max_iops = 1000000 / avg_wait_time_us; + state->cnt_waited = 0; + state->sum_waited_us = 0; + } else { +#if DEBUG_KEYROTATION_THROTTLING + fprintf(stderr, + "thr_no: %u only waited %lu%% skip re-estimate\n", + state->thread_no, + (100 * state->cnt_waited) / state->batch); +#endif + } + + if (state->estimated_max_iops <= state->allocated_iops) { + /* return extra iops */ + uint extra = state->allocated_iops - state->estimated_max_iops; + + if (extra > 0) { + mutex_enter(&fil_crypt_threads_mutex); + if (n_fil_crypt_iops_allocated < extra) { + /* unknown bug! + * crash in debug + * keep n_fil_crypt_iops_allocated unchanged + * in release */ + ut_ad(0); + extra = 0; + } + n_fil_crypt_iops_allocated -= extra; + state->allocated_iops -= extra; + + if (state->allocated_iops == 0) { + /* no matter how slow io system seems to be + * never decrease allocated_iops to 0... */ + state->allocated_iops ++; + n_fil_crypt_iops_allocated ++; + } + mutex_exit(&fil_crypt_threads_mutex); + os_event_set(fil_crypt_threads_event); + } + } else { + /* see if there are more to get */ + mutex_enter(&fil_crypt_threads_mutex); + if (n_fil_crypt_iops_allocated < srv_n_fil_crypt_iops) { + /* there are extra iops free */ + uint extra = srv_n_fil_crypt_iops - + n_fil_crypt_iops_allocated; + if (state->allocated_iops + extra > + state->estimated_max_iops) { + /* but don't alloc more than our max */ + extra = state->estimated_max_iops - + state->allocated_iops; + } + n_fil_crypt_iops_allocated += extra; + state->allocated_iops += extra; +#if DEBUG_KEYROTATION_THROTTLING + fprintf(stderr, + "thr_no: %u increased iops from %u to %u\n", + state->thread_no, + state->allocated_iops - extra, + state->allocated_iops); +#endif + } + mutex_exit(&fil_crypt_threads_mutex); + } + + fil_crypt_update_total_stat(state); +} + +/*********************************************************************** +Return allocated iops to global */ +static void +fil_crypt_return_iops(rotate_thread_t *state) +{ + if (state->allocated_iops > 0) { + uint iops = state->allocated_iops; + mutex_enter(&fil_crypt_threads_mutex); + if (n_fil_crypt_iops_allocated < iops) { + /* unknown bug! + * crash in debug + * keep n_fil_crypt_iops_allocated unchanged + * in release */ + ut_ad(0); + iops = 0; + } + n_fil_crypt_iops_allocated -= iops; + mutex_exit(&fil_crypt_threads_mutex); + state->allocated_iops = 0; + os_event_set(fil_crypt_threads_event); + } + + fil_crypt_update_total_stat(state); +} + +/*********************************************************************** +Search for a space needing rotation */ +bool +fil_crypt_find_space_to_rotate( + const key_state_t *key_state, + rotate_thread_t *state, + bool *recheck) +{ + /* we need iops to start rotating */ + while (!state->should_shutdown() && !fil_crypt_alloc_iops(state)) { + os_event_reset(fil_crypt_threads_event); + os_event_wait_time(fil_crypt_threads_event, 1000000); + } + + if (state->should_shutdown()) + return false; + + if (state->first) { + state->first = false; + state->space = fil_get_first_space(); + } else { + state->space = fil_get_next_space(state->space); + } + + while (!state->should_shutdown() && state->space != ULINT_UNDEFINED) { + + ulint space = state->space; + if (fil_crypt_space_needs_rotation(space, key_state, recheck)) { + /* init state->min_key_version_found before + * starting on a space */ + state->min_key_version_found = key_state->key_version; + return true; + } + + state->space = fil_get_next_space(space); + } + + /* if we didn't find any space return iops */ + fil_crypt_return_iops(state); + + return false; + +} + +/*********************************************************************** +Start rotating a space */ +static +void +fil_crypt_start_rotate_space( + const key_state_t *key_state, + rotate_thread_t *state) +{ + ulint space = state->space; + fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space); + mutex_enter(&crypt_data->mutex); + if (crypt_data->rotate_state.active_threads == 0) { + /* only first thread needs to init */ + crypt_data->rotate_state.next_offset = 1; // skip page 0 + /* no need to rotate beyond current max + * if space extends, it will be encrypted with newer version */ + crypt_data->rotate_state.max_offset = fil_space_get_size(space); + + crypt_data->rotate_state.end_lsn = 0; + crypt_data->rotate_state.min_key_version_found = + key_state->key_version; + + crypt_data->rotate_state.start_time = time(0); + } + + /* count active threads in space */ + crypt_data->rotate_state.active_threads++; + + /* Initialize thread local state */ + state->end_lsn = crypt_data->rotate_state.end_lsn; + state->min_key_version_found = + crypt_data->rotate_state.min_key_version_found; + + /* inform scrubbing */ + crypt_data->rotate_state.scrubbing.is_active = + btr_scrub_start_space(space, &state->scrub_data); + + mutex_exit(&crypt_data->mutex); +} + +/*********************************************************************** +Search for batch of pages needing rotation */ +static +bool +fil_crypt_find_page_to_rotate( + const key_state_t *key_state, + rotate_thread_t *state) +{ + ulint batch = srv_alloc_time * state->allocated_iops; + ulint space = state->space; + fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space); + mutex_enter(&crypt_data->mutex); + if (crypt_data->closing == false && + crypt_data->rotate_state.next_offset < + crypt_data->rotate_state.max_offset) { + + state->offset = crypt_data->rotate_state.next_offset; + ulint remaining = crypt_data->rotate_state.max_offset - + crypt_data->rotate_state.next_offset; + + if (batch <= remaining) + state->batch = batch; + else + state->batch = remaining; + + crypt_data->rotate_state.next_offset += batch; + mutex_exit(&crypt_data->mutex); + return true; + } + + mutex_exit(&crypt_data->mutex); + return false; +} + +/*********************************************************************** +Check if a page is uninitialized (doesn't need to be rotated) */ +static bool +fil_crypt_is_page_uninitialized(const byte* frame, uint zip_size) +{ + if (zip_size) { + ulint stored_checksum = mach_read_from_4( + frame + FIL_PAGE_SPACE_OR_CHKSUM); + /* empty pages aren't encrypted */ + if (stored_checksum == 0) { + return true; + } + } else { + ulint size = UNIV_PAGE_SIZE; + ulint checksum_field1 = mach_read_from_4( + frame + FIL_PAGE_SPACE_OR_CHKSUM); + ulint checksum_field2 = mach_read_from_4( + frame + size - FIL_PAGE_END_LSN_OLD_CHKSUM); + /* empty pages are not encrypted */ + if (checksum_field1 == 0 && checksum_field2 == 0 + && mach_read_from_4(frame + FIL_PAGE_LSN) == 0) { + return true; + } + } + return false; +} + +#define fil_crypt_get_page_throttle(state,space,zip_size,offset,mtr,sleeptime_ms) \ + fil_crypt_get_page_throttle_func(state, space, zip_size, offset, mtr, \ + sleeptime_ms, __FILE__, __LINE__) + +/*********************************************************************** +Get a page and compute sleep time */ +static +buf_block_t* +fil_crypt_get_page_throttle_func(rotate_thread_t *state, + ulint space, uint zip_size, ulint offset, + mtr_t *mtr, + ulint *sleeptime_ms, + const char *file, + ulint line) +{ + buf_block_t* block = buf_page_try_get_func(space, offset, RW_X_LATCH, + true, + file, line, mtr); + if (block != NULL) { + /* page was in buffer pool */ + state->crypt_stat.pages_read_from_cache++; + return block; + } + + state->crypt_stat.pages_read_from_disk++; + + ullint start = ut_time_us(NULL); + block = buf_page_get_gen(space, zip_size, offset, + RW_X_LATCH, + NULL, BUF_GET_POSSIBLY_FREED, + file, line, mtr); + ullint end = ut_time_us(NULL); + + if (end < start) { + end = start; // safety... + } + + state->cnt_waited++; + state->sum_waited_us += (end - start); + + /* average page load */ + ulint add_sleeptime_ms = 0; + ulint avg_wait_time_us = state->sum_waited_us / state->cnt_waited; + ulint alloc_wait_us = 1000000 / state->allocated_iops; + if (avg_wait_time_us < alloc_wait_us) { + /* we reading faster than we allocated */ + add_sleeptime_ms = (alloc_wait_us - avg_wait_time_us) / 1000; + } else { + /* if page load time is longer than we want, skip sleeping */ + } + + *sleeptime_ms += add_sleeptime_ms; + return block; +} + + +/*********************************************************************** +Get block and allocation status + +note: innodb locks fil_space_latch and then block when allocating page +but locks block and then fil_space_latch when freeing page. +*/ +static +buf_block_t* +btr_scrub_get_block_and_allocation_status( + rotate_thread_t *state, + ulint space, + ulint zip_size, + ulint offset, + mtr_t *mtr, + btr_scrub_page_allocation_status_t *allocation_status, + ulint *sleeptime_ms) +{ + mtr_t local_mtr; + buf_block_t *block = NULL; + mtr_start(&local_mtr); + *allocation_status = fsp_page_is_free(space, offset, &local_mtr) ? + BTR_SCRUB_PAGE_FREE : + BTR_SCRUB_PAGE_ALLOCATED; + + if (*allocation_status == BTR_SCRUB_PAGE_FREE) { + /* this is easy case, we lock fil_space_latch first and + then block */ + block = fil_crypt_get_page_throttle(state, + space, zip_size, + offset, mtr, + sleeptime_ms); + mtr_commit(&local_mtr); + } else { + /* page is allocated according to xdes */ + + /* release fil_space_latch *before* fetching block */ + mtr_commit(&local_mtr); + + /* NOTE: when we have locked dict_index_get_lock(), + * it's safe to release fil_space_latch and then fetch block + * as dict_index_get_lock() is needed to make tree modifications + * such as free-ing a page + */ + + block = fil_crypt_get_page_throttle(state, + space, zip_size, + offset, mtr, + sleeptime_ms); + } + + return block; +} + + +/*********************************************************************** +Rotate one page */ +static +void +fil_crypt_rotate_page( + const key_state_t *key_state, + rotate_thread_t *state) +{ + ulint space = state->space; + ulint offset = state->offset; + const uint zip_size = fil_space_get_zip_size(space); + ulint sleeptime_ms = 0; + + /* check if tablespace is closing before reading page */ + if (fil_crypt_is_closing(space)) + return; + + if (space == TRX_SYS_SPACE && offset == TRX_SYS_PAGE_NO) { + /* don't encrypt this as it contains address to dblwr buffer */ + return; + } + + mtr_t mtr; + mtr_start(&mtr); + buf_block_t* block = fil_crypt_get_page_throttle(state, + space, zip_size, + offset, &mtr, + &sleeptime_ms); + + bool modified = false; + int needs_scrubbing = BTR_SCRUB_SKIP_PAGE; + lsn_t block_lsn = block->page.newest_modification; + uint kv = block->page.key_version; + + /* check if tablespace is closing after reading page */ + if (!fil_crypt_is_closing(space)) { + byte* frame = buf_block_get_frame(block); + fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space); + + if (kv == 0 && + fil_crypt_is_page_uninitialized(frame, zip_size)) { + ; + } else if (fil_crypt_needs_rotation(kv, key_state)) { + + /* page can be "fresh" i.e never written in case + * kv == 0 or it should have a key version at least + * as big as the space minimum key version*/ + ut_a(kv == 0 || kv >= crypt_data->min_key_version); + + modified = true; + + /* force rotation by dummy updating page */ + mlog_write_ulint(frame + + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, + space, MLOG_4BYTES, &mtr); + + /* update block */ + block->page.key_version = key_state->key_version; + + /* statistics */ + state->crypt_stat.pages_modified++; + } else { + ut_a(kv >= crypt_data->min_key_version || + (kv == 0 && key_state->key_version == 0)); + + if (kv < state->min_key_version_found) { + state->min_key_version_found = kv; + } + } + + needs_scrubbing = btr_page_needs_scrubbing( + &state->scrub_data, block, + BTR_SCRUB_PAGE_ALLOCATION_UNKNOWN); + } + + mtr_commit(&mtr); + lsn_t end_lsn = mtr.end_lsn; + + if (needs_scrubbing == BTR_SCRUB_PAGE) { + mtr_start(&mtr); + /* + * refetch page and allocation status + */ + btr_scrub_page_allocation_status_t allocated; + block = btr_scrub_get_block_and_allocation_status( + state, space, zip_size, offset, &mtr, + &allocated, + &sleeptime_ms); + + /* get required table/index and index-locks */ + needs_scrubbing = btr_scrub_recheck_page( + &state->scrub_data, block, allocated, &mtr); + + if (needs_scrubbing == BTR_SCRUB_PAGE) { + /* we need to refetch it once more now that we have + * index locked */ + block = btr_scrub_get_block_and_allocation_status( + state, space, zip_size, offset, &mtr, + &allocated, + &sleeptime_ms); + + needs_scrubbing = btr_scrub_page(&state->scrub_data, + block, allocated, + &mtr); + } + + /* NOTE: mtr is committed inside btr_scrub_recheck_page() + * and/or btr_scrub_page. This is to make sure that + * locks & pages are latched in corrected order, + * the mtr is in some circumstances restarted. + * (mtr_commit() + mtr_start()) + */ + } + + if (needs_scrubbing != BTR_SCRUB_PAGE) { + /* if page didn't need scrubbing it might be that cleanups + are needed. do those outside of any mtr to prevent deadlocks. + + the information what kinds of cleanups that are needed are + encoded inside the needs_scrubbing, but this is opaque to + this function (except the value BTR_SCRUB_PAGE) */ + btr_scrub_skip_page(&state->scrub_data, needs_scrubbing); + } + + if (needs_scrubbing == BTR_SCRUB_TURNED_OFF) { + /* if we just detected that scrubbing was turned off + * update global state to reflect this */ + fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space); + mutex_enter(&crypt_data->mutex); + crypt_data->rotate_state.scrubbing.is_active = false; + mutex_exit(&crypt_data->mutex); + } + + if (modified) { + /* if we modified page, we take lsn from mtr */ + ut_a(end_lsn > state->end_lsn); + ut_a(end_lsn > block_lsn); + state->end_lsn = end_lsn; + } else { + /* if we did not modify page, check for max lsn */ + if (block_lsn > state->end_lsn) { + state->end_lsn = block_lsn; + } + } + + if (sleeptime_ms) { + os_event_reset(fil_crypt_throttle_sleep_event); + os_event_wait_time(fil_crypt_throttle_sleep_event, + 1000 * sleeptime_ms); + } +} + +/*********************************************************************** +Rotate a batch of pages */ +static +void +fil_crypt_rotate_pages( + const key_state_t *key_state, + rotate_thread_t *state) +{ + ulint space = state->space; + ulint end = state->offset + state->batch; + for (; state->offset < end; state->offset++) { + + /* we can't rotate pages in dblwr buffer as + * it's not possible to read those due to lots of asserts + * in buffer pool. + * + * However since these are only (short-lived) copies of + * real pages, they will be updated anyway when the + * real page is updated + */ + if (space == TRX_SYS_SPACE && + buf_dblwr_page_inside(state->offset)) { + continue; + } + + fil_crypt_rotate_page(key_state, state); + } +} + +/*********************************************************************** +Flush rotated pages and then update page 0 */ +static +void +fil_crypt_flush_space(rotate_thread_t *state, ulint space) +{ + fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space); + + /* flush tablespace pages so that there are no pages left with old key */ + lsn_t end_lsn = crypt_data->rotate_state.end_lsn; + if (end_lsn > 0 && !fil_crypt_is_closing(space)) { + bool success = false; + ulint n_pages = 0; + ulint sum_pages = 0; + ullint start = ut_time_us(NULL); + do { + success = buf_flush_list(ULINT_MAX, end_lsn, &n_pages); + buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST); + sum_pages += n_pages; + } while (!success && !fil_crypt_is_closing(space)); + ullint end = ut_time_us(NULL); + if (sum_pages && end > start) { + state->cnt_waited += sum_pages; + state->sum_waited_us += (end - start); + + /* statistics */ + state->crypt_stat.pages_flushed += sum_pages; + } + } + + if (crypt_data->min_key_version == 0) { + crypt_data->type = CRYPT_SCHEME_UNENCRYPTED; + } + + /* update page 0 */ + if (!fil_crypt_is_closing(space)) { + mtr_t mtr; + mtr_start(&mtr); + ulint offset = 0; // page 0 + const uint zip_size = fil_space_get_zip_size(space); + buf_block_t* block = buf_page_get_gen(space, zip_size, offset, + RW_X_LATCH, NULL, BUF_GET, + __FILE__, __LINE__, &mtr); + byte* frame = buf_block_get_frame(block); + fil_space_write_crypt_data(space, frame, + crypt_data->page0_offset, + ULINT_MAX, &mtr); + mtr_commit(&mtr); + } +} + +/*********************************************************************** +Complete rotating a space */ +static +void +fil_crypt_complete_rotate_space( + const key_state_t *key_state, + rotate_thread_t *state) +{ + ulint space = state->space; + fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space); + mutex_enter(&crypt_data->mutex); + + /** + * Update crypt data state with state from thread + */ + if (state->min_key_version_found < + crypt_data->rotate_state.min_key_version_found) { + crypt_data->rotate_state.min_key_version_found = + state->min_key_version_found; + } + + if (state->end_lsn > crypt_data->rotate_state.end_lsn) { + crypt_data->rotate_state.end_lsn = state->end_lsn; + } + + ut_a(crypt_data->rotate_state.active_threads > 0); + crypt_data->rotate_state.active_threads--; + bool last = crypt_data->rotate_state.active_threads == 0; + + /** + * check if space is fully done + * this as when threads shutdown, it could be that we "complete" + * iterating before we have scanned the full space. + */ + bool done = crypt_data->rotate_state.next_offset >= + crypt_data->rotate_state.max_offset; + + /** + * we should flush space if we're last thread AND + * the iteration is done + */ + bool should_flush = last && done; + + if (should_flush) { + /* we're the last active thread */ + crypt_data->rotate_state.flushing = true; + crypt_data->min_key_version = + crypt_data->rotate_state.min_key_version_found; + } + + /* inform scrubbing */ + crypt_data->rotate_state.scrubbing.is_active = false; + mutex_exit(&crypt_data->mutex); + + /* all threads must call btr_scrub_complete_space wo/ mutex held */ + if (btr_scrub_complete_space(&state->scrub_data) == true) { + if (should_flush) { + /* only last thread updates last_scrub_completed */ + mutex_enter(&crypt_data->mutex); + crypt_data->rotate_state.scrubbing. + last_scrub_completed = time(0); + mutex_exit(&crypt_data->mutex); + } + } + + if (should_flush) { + fil_crypt_flush_space(state, space); + + mutex_enter(&crypt_data->mutex); + crypt_data->rotate_state.flushing = false; + mutex_exit(&crypt_data->mutex); + } +} + +/*********************************************************************//** +A thread which monitors global key state and rotates tablespaces accordingly +@return a dummy parameter */ +extern "C" UNIV_INTERN +os_thread_ret_t +DECLARE_THREAD(fil_crypt_thread)( +/*===============================*/ + void* arg __attribute__((unused))) /*!< in: a dummy parameter required + * by os_thread_create */ +{ + UT_NOT_USED(arg); + + mutex_enter(&fil_crypt_threads_mutex); + uint thread_no = srv_n_fil_crypt_threads_started; + srv_n_fil_crypt_threads_started++; + mutex_exit(&fil_crypt_threads_mutex); + os_event_set(fil_crypt_event); /* signal that we started */ + + /* state of this thread */ + rotate_thread_t thr(thread_no); + + /* if we find a space that is starting, skip over it and recheck it later */ + bool recheck = false; + + key_state_t key_state; + fil_crypt_get_key_state(&key_state); + + /* make sure that thread always checks all tablespace when starting. + * + * by decreasing key_version, loop that waits for change in key-state + * should exit immediately causing thread to check all spaces when starting */ + key_state.key_version--; + + while (!thr.should_shutdown()) { + + key_state_t new_state; + fil_crypt_get_key_state(&new_state); + + time_t wait_start = time(0); + while (!thr.should_shutdown() && key_state == new_state) { + + /* wait for key state changes + * i.e either new key version of change or + * new rotate_key_age */ + os_event_reset(fil_crypt_threads_event); + os_event_wait_time(fil_crypt_threads_event, 1000000); + fil_crypt_get_key_state(&new_state); + + if (recheck) { + /* check recheck here, after sleep, so + * that we don't busy loop while when one thread is starting + * a space*/ + break; + } + + time_t waited = time(0) - wait_start; + if (waited >= srv_background_scrub_data_check_interval) + break; + } + + recheck = false; + thr.first = true; // restart from first tablespace + key_state = new_state; // save for next loop + + /* iterate all spaces searching for those needing rotation */ + while (!thr.should_shutdown() && + fil_crypt_find_space_to_rotate(&new_state, &thr, &recheck)) { + + /* we found a space to rotate */ + fil_crypt_start_rotate_space(&new_state, &thr); + + /* decrement pending ops that was incremented in + * fil_crypt_space_needs_rotation + * (called from fil_crypt_find_space_to_rotate), + * this makes sure that tablespace won't be dropped + * just after we decided to start processing it. */ + fil_decr_pending_ops(thr.space); + + /* iterate all pages (cooperativly with other threads) */ + while (!thr.should_shutdown() && + fil_crypt_find_page_to_rotate(&new_state, &thr)) { + + /* rotate a (set) of pages */ + fil_crypt_rotate_pages(&new_state, &thr); + + /* realloc iops */ + fil_crypt_realloc_iops(&thr); + } + + /* complete rotation */ + fil_crypt_complete_rotate_space(&new_state, &thr); + + /* refresh key state */ + fil_crypt_get_key_state(&new_state); + + /* return iops */ + fil_crypt_return_iops(&thr); + } + } + + /* return iops if shutting down */ + fil_crypt_return_iops(&thr); + + mutex_enter(&fil_crypt_threads_mutex); + srv_n_fil_crypt_threads_started--; + mutex_exit(&fil_crypt_threads_mutex); + os_event_set(fil_crypt_event); /* signal that we stopped */ + + /* We count the number of threads in os_thread_exit(). A created + thread should always use that to exit and not use return() to exit. */ + + os_thread_exit(NULL); + + OS_THREAD_DUMMY_RETURN; +} + +/********************************************************************* +Adjust thread count for key rotation */ +UNIV_INTERN +void +fil_crypt_set_thread_cnt(uint new_cnt) { + if (new_cnt > srv_n_fil_crypt_threads) { + uint add = new_cnt - srv_n_fil_crypt_threads; + srv_n_fil_crypt_threads = new_cnt; + for (uint i = 0; i < add; i++) { + os_thread_create(fil_crypt_thread, NULL, NULL); + } + } else if (new_cnt < srv_n_fil_crypt_threads) { + srv_n_fil_crypt_threads = new_cnt; + os_event_set(fil_crypt_threads_event); + } + + while(srv_n_fil_crypt_threads_started != srv_n_fil_crypt_threads) { + os_event_reset(fil_crypt_event); + os_event_wait_time(fil_crypt_event, 1000000); + } +} + +/********************************************************************* +Adjust max key age */ +UNIV_INTERN +void +fil_crypt_set_rotate_key_age(uint val) +{ + srv_fil_crypt_rotate_key_age = val; + os_event_set(fil_crypt_threads_event); +} + +/********************************************************************* +Adjust rotation iops */ +UNIV_INTERN +void +fil_crypt_set_rotation_iops(uint val) +{ + srv_n_fil_crypt_iops = val; + os_event_set(fil_crypt_threads_event); +} + +/********************************************************************* +Init threads for key rotation */ +UNIV_INTERN +void +fil_crypt_threads_init() +{ + fil_crypt_event = os_event_create(); + fil_crypt_threads_event = os_event_create(); + mutex_create(fil_crypt_threads_mutex_key, + &fil_crypt_threads_mutex, SYNC_NO_ORDER_CHECK); + + uint cnt = srv_n_fil_crypt_threads; + srv_n_fil_crypt_threads = 0; + fil_crypt_set_thread_cnt(cnt); +} + +/********************************************************************* +End threads for key rotation */ +UNIV_INTERN +void +fil_crypt_threads_end() +{ + /* stop threads */ + fil_crypt_set_thread_cnt(0); +} + +/********************************************************************* +Clean up key rotation threads resources */ +UNIV_INTERN +void +fil_crypt_threads_cleanup() { + os_event_free(fil_crypt_event); + os_event_free(fil_crypt_threads_event); +} + +/********************************************************************* +Mark a space as closing */ +UNIV_INTERN +void +fil_space_crypt_mark_space_closing( + ulint space) +{ + mutex_enter(&fil_crypt_threads_mutex); + fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space); + if (crypt_data == NULL) { + mutex_exit(&fil_crypt_threads_mutex); + return; + } + + mutex_enter(&crypt_data->mutex); + mutex_exit(&fil_crypt_threads_mutex); + crypt_data->closing = true; + mutex_exit(&crypt_data->mutex); +} + +/********************************************************************* +Wait for crypt threads to stop accessing space */ +UNIV_INTERN +void +fil_space_crypt_close_tablespace( + ulint space) +{ + mutex_enter(&fil_crypt_threads_mutex); + fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space); + if (crypt_data == NULL) { + mutex_exit(&fil_crypt_threads_mutex); + return; + } + + uint start = time(0); + uint last = start; + mutex_enter(&crypt_data->mutex); + mutex_exit(&fil_crypt_threads_mutex); + crypt_data->closing = true; + uint cnt = crypt_data->rotate_state.active_threads; + bool flushing = crypt_data->rotate_state.flushing; + while (cnt > 0 || flushing) { + mutex_exit(&crypt_data->mutex); + /* release dict mutex so that scrub threads can release their + * table references */ + dict_mutex_exit_for_mysql(); + /* wakeup throttle (all) sleepers */ + os_event_set(fil_crypt_throttle_sleep_event); + os_thread_sleep(20000); + dict_mutex_enter_for_mysql(); + mutex_enter(&crypt_data->mutex); + cnt = crypt_data->rotate_state.active_threads; + flushing = crypt_data->rotate_state.flushing; + + uint now = time(0); + if (now >= last + 30) { + fprintf(stderr, + "WARNING: " + "waited %u seconds to drop space: %lu\n", + now - start, space); + last = now; + } + } + mutex_exit(&crypt_data->mutex); +} + +/********************************************************************* +Get crypt status for a space (used by information_schema) +return 0 if crypt data present */ +int +fil_space_crypt_get_status( +/*==================*/ + ulint id, /*!< in: space id */ + struct fil_space_crypt_status_t* status) /*!< out: status */ +{ + fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(id); + + if (crypt_data != NULL) { + status->space = id; + status->scheme = crypt_data->type; + mutex_enter(&crypt_data->mutex); + status->keyserver_requests = crypt_data->keyserver_requests; + status->min_key_version = crypt_data->min_key_version; + if (crypt_data->rotate_state.active_threads > 0 || + crypt_data->rotate_state.flushing) { + status->rotating = true; + status->flushing = + crypt_data->rotate_state.flushing; + status->rotate_next_page_number = + crypt_data->rotate_state.next_offset; + status->rotate_max_page_number = + crypt_data->rotate_state.max_offset; + } else { + status->rotating = false; + } + mutex_exit(&crypt_data->mutex); + } else { + memset(status, 0, sizeof(*status)); + } + + if (srv_encrypt_tables == TRUE) { + status->current_key_version = GetLatestCryptoKeyVersion(); + } else { + status->current_key_version = 0; + } + return crypt_data == NULL ? 1 : 0; +} + +/********************************************************************* +Return crypt statistics */ +void +fil_crypt_total_stat(fil_crypt_stat_t *stat) +{ + mutex_enter(&crypt_stat_mutex); + *stat = crypt_stat; + mutex_exit(&crypt_stat_mutex); +} + +/********************************************************************* +Get scrub status for a space (used by information_schema) +return 0 if data found */ +int +fil_space_get_scrub_status( +/*==================*/ + ulint id, /*!< in: space id */ + struct fil_space_scrub_status_t* status) /*!< out: status */ +{ + fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(id); + memset(status, 0, sizeof(*status)); + if (crypt_data != NULL) { + status->space = id; + status->compressed = fil_space_get_zip_size(id) > 0; + mutex_enter(&crypt_data->mutex); + status->last_scrub_completed = + crypt_data->rotate_state.scrubbing.last_scrub_completed; + if (crypt_data->rotate_state.active_threads > 0 && + crypt_data->rotate_state.scrubbing.is_active) { + status->scrubbing = true; + status->current_scrub_started = + crypt_data->rotate_state.start_time; + status->current_scrub_active_threads = + crypt_data->rotate_state.active_threads; + status->current_scrub_page_number = + crypt_data->rotate_state.next_offset; + status->current_scrub_max_page_number = + crypt_data->rotate_state.max_offset; + } else { + status->scrubbing = false; + } + mutex_exit(&crypt_data->mutex); + } else { + memset(status, 0, sizeof(*status)); + } + + return crypt_data == NULL ? 1 : 0; +} diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index 7e62fb46b6f..577effa295b 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -27,6 +27,8 @@ Created 10/25/1995 Heikki Tuuri #include "fil0fil.h" #include "fil0pagecompress.h" #include "fsp0pagecompress.h" +#include "fil0pageencryption.h" +#include "fsp0pageencryption.h" #include <debug_sync.h> #include <my_dbug.h> @@ -282,7 +284,7 @@ fil_read( actual page size does not decrease. */ { return(fil_io(OS_FILE_READ, sync, space_id, zip_size, block_offset, - byte_offset, len, buf, message, write_size)); + byte_offset, len, buf, message, write_size, 0)); } /********************************************************************//** @@ -309,16 +311,17 @@ fil_write( this must be appropriately aligned */ void* message, /*!< in: message for aio handler if non-sync aio used, else ignored */ - ulint* write_size) /*!< in/out: Actual write size initialized + ulint* write_size, /*!< in/out: Actual write size initialized after fist successfull trim operation for this page and if initialized we do not trim again if actual page size does not decrease. */ + lsn_t lsn) /* lsn of the newest modification */ { ut_ad(!srv_read_only_mode); return(fil_io(OS_FILE_WRITE, sync, space_id, zip_size, block_offset, - byte_offset, len, buf, message, write_size)); + byte_offset, len, buf, message, write_size, lsn)); } /*******************************************************************//** @@ -645,8 +648,23 @@ fil_node_open_file( success = os_file_read(node->handle, page, 0, UNIV_PAGE_SIZE, space->flags); + if (fil_page_encryption_status(page)) { + /* if page is (still) encrypted, write an error and return. + * Otherwise the server would crash if decrypting is not possible. + * This may be the case, if the key file could not be + * opened on server startup. + */ + ib_logf(IB_LOG_LEVEL_ERROR, + "InnoDB: can not decrypt page, because " + "keys could not be read.\n" + ); + return false; + + } + space_id = fsp_header_get_space_id(page); flags = fsp_header_get_flags(page); + page_size = fsp_flags_get_page_size(flags); atomic_writes = fsp_flags_get_atomic_writes(flags); @@ -1125,7 +1143,8 @@ fil_space_create( const char* name, /*!< in: space name */ ulint id, /*!< in: space id */ ulint flags, /*!< in: tablespace flags */ - ulint purpose)/*!< in: FIL_TABLESPACE, or FIL_LOG if log */ + ulint purpose,/*!< in: FIL_TABLESPACE, or FIL_LOG if log */ + fil_space_crypt_t* crypt_data) /*!< in: crypt data */ { fil_space_t* space; @@ -1133,6 +1152,21 @@ fil_space_create( ut_a(fil_system); + if (fsp_flags_is_page_encrypted(flags)) { + if (!HasCryptoKey(fsp_flags_get_page_encryption_key(flags))) { + /* by returning here it should be avoided that + * the server crashes, if someone tries to access an + * encrypted table and the encryption key is not available. + * The the table is treaded as non-existent. + */ + ib_logf(IB_LOG_LEVEL_WARN, + "Tablespace '%s' can not be opened, because " + " encryption key can not be found (space id: %lu, key %lu)\n" + , name, (ulong) id, fsp_flags_get_page_encryption_key(flags)); + return (FALSE); + } + } + /* Look for a matching tablespace and if found free it. */ do { mutex_enter(&fil_system->mutex); @@ -1219,6 +1253,8 @@ fil_space_create( UT_LIST_ADD_LAST(space_list, fil_system->space_list, space); + space->crypt_data = crypt_data; + mutex_exit(&fil_system->mutex); return(TRUE); @@ -1353,6 +1389,8 @@ fil_space_free( rw_lock_free(&(space->latch)); + fil_space_destroy_crypt_data(&(space->crypt_data)); + mem_free(space->name); mem_free(space); @@ -1586,6 +1624,8 @@ fil_init( UT_LIST_INIT(fil_system->LRU); fil_system->max_n_open = max_n_open; + + fil_space_crypt_init(); } /*******************************************************************//** @@ -1787,10 +1827,11 @@ fil_write_lsn_and_arch_no_to_file( err = fil_read(TRUE, space, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL, 0); if (err == DB_SUCCESS) { - mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn); + mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, + lsn); err = fil_write(TRUE, space, 0, sum_of_sizes, 0, - UNIV_PAGE_SIZE, buf, NULL, 0); + UNIV_PAGE_SIZE, buf, NULL, 0, lsn); } mem_free(buf1); @@ -1869,6 +1910,7 @@ fil_check_first_page( { ulint space_id; ulint flags; + ulint page_is_encrypted; if (srv_force_recovery >= SRV_FORCE_IGNORE_CORRUPT) { return(NULL); @@ -1876,12 +1918,23 @@ fil_check_first_page( space_id = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_ID + page); flags = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page); - - if (UNIV_PAGE_SIZE != fsp_flags_get_page_size(flags)) { - fprintf(stderr, "InnoDB: Error: Current page size %lu != page size on page %lu\n", - UNIV_PAGE_SIZE, fsp_flags_get_page_size(flags)); - - return("innodb-page-size mismatch"); + /* Note: the 1st page is usually not encrypted. If the Key Provider + or the encryption key is not available, the + check for reading the first page should intentionally fail + with "can not decrypt" message. */ + page_is_encrypted = fil_page_encryption_status(page); + if ((page_is_encrypted == PAGE_ENCRYPTION_KEY_MISSING) && page_is_encrypted) { + page_is_encrypted = 1; + } else { + page_is_encrypted = 0; + if (UNIV_PAGE_SIZE != fsp_flags_get_page_size(flags)) { + fprintf(stderr, + "InnoDB: Error: Current page size %lu != " + " page size on page %lu\n", + UNIV_PAGE_SIZE, fsp_flags_get_page_size(flags)); + + return("innodb-page-size mismatch"); + } } if (!space_id && !flags) { @@ -1897,9 +1950,17 @@ fil_check_first_page( } } - if (buf_page_is_corrupted( + if (!page_is_encrypted && buf_page_is_corrupted( false, page, fsp_flags_get_zip_size(flags))) { return("checksum mismatch"); + } else { + if (page_is_encrypted) { + /* this error message is interpreted by the calling method, which is + * executed if the server starts in recovery mode. + */ + return(MSG_CANNOT_DECRYPT); + + } } if (page_get_space_id(page) == space_id @@ -1935,8 +1996,9 @@ fil_read_first_page( lsn values in data files */ lsn_t* max_flushed_lsn, /*!< out: max of flushed lsn values in data files */ - ulint orig_space_id) /*!< in: original file space + ulint orig_space_id, /*!< in: original file space id */ + fil_space_crypt_t** crypt_data) /*< out: crypt data */ { byte* buf; byte* page; @@ -1974,7 +2036,16 @@ fil_read_first_page( check_msg = fil_check_first_page(page); } - flushed_lsn = mach_read_from_8(page + FIL_PAGE_FILE_FLUSH_LSN); + flushed_lsn = mach_read_from_8(page + + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); + + if (crypt_data) { + ulint space = fsp_header_get_space_id(page); + ulint offset = + fsp_header_get_crypt_offset( + fsp_flags_get_zip_size(*flags), NULL); + *crypt_data = fil_space_read_crypt_data(space, page, offset); + } ut_free(buf); @@ -2459,6 +2530,9 @@ fil_check_pending_operations( *space = 0; + /* Wait for crypt threads to stop accessing space */ + fil_space_crypt_close_tablespace(id); + mutex_enter(&fil_system->mutex); fil_space_t* sp = fil_space_get_by_id(id); if (sp) { @@ -3438,7 +3512,8 @@ fil_create_new_single_table_tablespace( } } - success = fil_space_create(tablename, space_id, flags, FIL_TABLESPACE); + success = fil_space_create(tablename, space_id, flags, FIL_TABLESPACE, + fil_space_create_crypt_data()); if (!success || !fil_node_create(path, size, space_id, FALSE)) { err = DB_ERROR; goto error_exit_1; @@ -3566,6 +3641,7 @@ fil_open_single_table_tablespace( ulint tablespaces_found = 0; ulint valid_tablespaces_found = 0; ulint atomic_writes = 0; + fil_space_crypt_t* crypt_data = NULL; #ifdef UNIV_SYNC_DEBUG ut_ad(!fix_dict || rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); @@ -3667,7 +3743,7 @@ fil_open_single_table_tablespace( #ifdef UNIV_LOG_ARCHIVE &space_arch_log_no, &space_arch_log_no, #endif /* UNIV_LOG_ARCHIVE */ - &def.lsn, &def.lsn, id); + &def.lsn, &def.lsn, id, &def.crypt_data); def.valid = !def.check_msg; /* Validate this single-table-tablespace with SYS_TABLES, @@ -3692,7 +3768,7 @@ fil_open_single_table_tablespace( #ifdef UNIV_LOG_ARCHIVE &remote.arch_log_no, &remote.arch_log_no, #endif /* UNIV_LOG_ARCHIVE */ - &remote.lsn, &remote.lsn, id); + &remote.lsn, &remote.lsn, id, &remote.crypt_data); remote.valid = !remote.check_msg; /* Validate this single-table-tablespace with SYS_TABLES, @@ -3718,7 +3794,7 @@ fil_open_single_table_tablespace( #ifdef UNIV_LOG_ARCHIVE &dict.arch_log_no, &dict.arch_log_no, #endif /* UNIV_LOG_ARCHIVE */ - &dict.lsn, &dict.lsn, id); + &dict.lsn, &dict.lsn, id, &dict.crypt_data); dict.valid = !dict.check_msg; /* Validate this single-table-tablespace with SYS_TABLES, @@ -3871,9 +3947,17 @@ fil_open_single_table_tablespace( } skip_validate: + if (remote.success) + crypt_data = remote.crypt_data; + else if (dict.success) + crypt_data = dict.crypt_data; + else if (def.success) + crypt_data = def.crypt_data; + if (err != DB_SUCCESS) { ; // Don't load the tablespace into the cache - } else if (!fil_space_create(tablename, id, flags, FIL_TABLESPACE)) { + } else if (!fil_space_create(tablename, id, flags, FIL_TABLESPACE, + crypt_data)) { err = DB_ERROR; } else { /* We do not measure the size of the file, that is why @@ -3893,15 +3977,25 @@ cleanup_and_exit: if (remote.filepath) { mem_free(remote.filepath); } + if (remote.crypt_data && remote.crypt_data != crypt_data) { + fil_space_destroy_crypt_data(&remote.crypt_data); + } if (dict.success) { os_file_close(dict.file); } if (dict.filepath) { mem_free(dict.filepath); } + if (dict.crypt_data && dict.crypt_data != crypt_data) { + fil_space_destroy_crypt_data(&dict.crypt_data); + } if (def.success) { os_file_close(def.file); } + if (def.crypt_data && def.crypt_data != crypt_data) { + fil_space_destroy_crypt_data(&def.crypt_data); + } + mem_free(def.filepath); return(err); @@ -4118,16 +4212,25 @@ fil_validate_single_table_tablespace( check_first_page: fsp->success = TRUE; + fsp->encryption_error = 0; if (const char* check_msg = fil_read_first_page( fsp->file, FALSE, &fsp->flags, &fsp->id, #ifdef UNIV_LOG_ARCHIVE &fsp->arch_log_no, &fsp->arch_log_no, #endif /* UNIV_LOG_ARCHIVE */ - &fsp->lsn, &fsp->lsn, ULINT_UNDEFINED)) { + &fsp->lsn, &fsp->lsn, ULINT_UNDEFINED, &fsp->crypt_data)) { ib_logf(IB_LOG_LEVEL_ERROR, "%s in tablespace %s (table %s)", check_msg, fsp->filepath, tablename); fsp->success = FALSE; + if (strncmp(check_msg, MSG_CANNOT_DECRYPT, strlen(check_msg))==0) { + /* by returning here, it should be avoided, that the server crashes, + * if started in recovery mode and can not decrypt tables, if + * the key file can not be read. + */ + fsp->encryption_error = 1; + return; + } } if (!fsp->success) { @@ -4281,6 +4384,14 @@ fil_load_single_table_tablespace( } if (!def.success && !remote.success) { + + if (def.encryption_error || remote.encryption_error) { + fprintf(stderr, + "InnoDB: Error: could not open single-table" + " tablespace file %s. Encryption error!\n", def.filepath); + return; + } + /* The following call prints an error message */ os_file_get_last_error(true); fprintf(stderr, @@ -4464,7 +4575,8 @@ will_not_choose: mutex_exit(&fil_system->mutex); #endif /* UNIV_HOTBACKUP */ ibool file_space_create_success = fil_space_create( - tablename, fsp->id, fsp->flags, FIL_TABLESPACE); + tablename, fsp->id, fsp->flags, FIL_TABLESPACE, + fsp->crypt_data); if (!file_space_create_success) { if (srv_force_recovery > 0) { @@ -5099,7 +5211,7 @@ retry: success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC, node->name, node->handle, buf, offset, page_size * n_pages, - node, NULL, 0, FALSE, 0); + node, NULL, 0, FALSE, 0, 0, 0, 0); #endif /* UNIV_HOTBACKUP */ if (success) { os_has_said_disk_full = FALSE; @@ -5475,11 +5587,12 @@ fil_io( appropriately aligned */ void* message, /*!< in: message for aio handler if non-sync aio used, else ignored */ - ulint* write_size) /*!< in/out: Actual write size initialized + ulint* write_size, /*!< in/out: Actual write size initialized after fist successfull trim operation for this page and if initialized we do not trim again if actual page size does not decrease. */ + lsn_t lsn) /* lsn of the newest modification */ { ulint mode; fil_space_t* space; @@ -5491,6 +5604,8 @@ fil_io( ibool ignore_nonexistent_pages; ibool page_compressed = FALSE; ulint page_compression_level = 0; + ibool page_encrypted; + ulint page_encryption_key; is_log = type & OS_FILE_LOG; type = type & ~OS_FILE_LOG; @@ -5676,6 +5791,8 @@ fil_io( page_compressed = fsp_flags_is_page_compressed(space->flags); page_compression_level = fsp_flags_get_page_compression_level(space->flags); + page_encrypted = fsp_flags_is_page_encrypted(space->flags); + page_encryption_key = fsp_flags_get_page_encryption_key(space->flags); #ifdef UNIV_HOTBACKUP /* In mysqlbackup do normal i/o, not aio */ @@ -5688,9 +5805,23 @@ fil_io( } #else /* Queue the aio request */ - ret = os_aio(type, mode | wake_later, node->name, node->handle, buf, - offset, len, node, message, write_size, - page_compressed, page_compression_level); + ret = os_aio( + type, + mode | wake_later, + node->name, + node->handle, + buf, + offset, + len, + node, + message, + write_size, + page_compressed, + page_compression_level, + page_encrypted, + page_encryption_key, + lsn); + #endif /* UNIV_HOTBACKUP */ @@ -6118,6 +6249,8 @@ void fil_close(void) /*===========*/ { + fil_space_crypt_cleanup(); + #ifndef UNIV_HOTBACKUP /* The mutex should already have been freed. */ ut_ad(fil_system->mutex.magic_n == 0); @@ -6167,6 +6300,8 @@ struct fil_iterator_t { ulint n_io_buffers; /*!< Number of pages to use for IO */ byte* io_buffer; /*!< Buffer to use for IO */ + fil_space_crypt_t *crypt_data; /*!< Crypt data (if encrypted) */ + byte* crypt_io_buffer; /*!< IO buffer when encrypted */ }; /********************************************************************//** @@ -6229,7 +6364,12 @@ fil_iterate( ut_ad(n_bytes > 0); ut_ad(!(n_bytes % iter.page_size)); - if (!os_file_read(iter.file, io_buffer, offset, + byte* readptr = io_buffer; + if (iter.crypt_data != NULL) { + readptr = iter.crypt_io_buffer; + } + + if (!os_file_read(iter.file, readptr, offset, (ulint) n_bytes, fil_space_is_page_compressed(space_id))) { @@ -6244,6 +6384,18 @@ fil_iterate( for (ulint i = 0; i < n_pages_read; ++i) { + if (iter.crypt_data != NULL) { + bool decrypted = fil_space_decrypt( + iter.crypt_data, + readptr + i * iter.page_size, // src + iter.page_size, + io_buffer + i * iter.page_size); // dst + if (decrypted) { + /* write back unencrypted page */ + updated = true; + } + } + buf_block_set_file_page(block, space_id, page_no++); dberr_t err; @@ -6386,6 +6538,13 @@ fil_tablespace_iterate( iter.n_io_buffers = n_io_buffers; iter.page_size = callback.get_page_size(); + ulint crypt_data_offset = fsp_header_get_crypt_offset( + callback.get_zip_size(), 0); + + /* read (optional) crypt data */ + iter.crypt_data = fil_space_read_crypt_data( + 0, page, crypt_data_offset); + /* Compressed pages can't be optimised for block IO for now. We do the IMPORT page by page. */ @@ -6394,6 +6553,14 @@ fil_tablespace_iterate( ut_a(iter.page_size == callback.get_zip_size()); } + /** If tablespace is encrypted, it needs extra buffers */ + if (iter.crypt_data != NULL) { + /* decrease io buffers so that memory + * consumption doesnt double + * note: the +1 is to avoid n_io_buffers getting down to 0 */ + iter.n_io_buffers = (iter.n_io_buffers + 1) / 2; + } + /** Add an extra page for compressed page scratch area. */ void* io_buffer = mem_alloc( @@ -6402,9 +6569,45 @@ fil_tablespace_iterate( iter.io_buffer = static_cast<byte*>( ut_align(io_buffer, UNIV_PAGE_SIZE)); + void* crypt_io_buffer = NULL; + if (iter.crypt_data != NULL) { + crypt_io_buffer = mem_alloc( + iter.n_io_buffers * UNIV_PAGE_SIZE); + iter.crypt_io_buffer = static_cast<byte*>( + crypt_io_buffer); + } + err = fil_iterate(iter, &block, callback); mem_free(io_buffer); + + if (iter.crypt_data != NULL) { + /* clear crypt data from page 0 and write it back */ + os_file_read(file, page, 0, UNIV_PAGE_SIZE, 0); + fil_space_clear_crypt_data(page, crypt_data_offset); + lsn_t lsn = mach_read_from_8(page + FIL_PAGE_LSN); + if (callback.get_zip_size() == 0) { + buf_flush_init_for_writing( + page, 0, lsn); + } else { + buf_flush_update_zip_checksum( + page, callback.get_zip_size(), lsn); + } + + if (!os_file_write( + iter.filepath, iter.file, page, + 0, iter.page_size)) { + + ib_logf(IB_LOG_LEVEL_ERROR, + "os_file_write() failed"); + + return(DB_IO_ERROR); + } + + mem_free(crypt_io_buffer); + iter.crypt_io_buffer = NULL; + fil_space_destroy_crypt_data(&iter.crypt_data); + } } if (err == DB_SUCCESS) { @@ -6569,6 +6772,16 @@ fil_space_name( } /*******************************************************************//** +Return space flags */ +ulint +fil_space_flags( +/*===========*/ + fil_space_t* space) /*!< in: space */ +{ + return (space->flags); +} + +/*******************************************************************//** Return page type name */ const char* fil_get_page_type_name( @@ -6621,3 +6834,137 @@ fil_node_get_block_size( { return (node->file_block_size); } + +/****************************************************************** +Get id of first tablespace or ULINT_UNDEFINED if none */ +UNIV_INTERN +ulint +fil_get_first_space() +{ + ulint out_id = ULINT_UNDEFINED; + fil_space_t* space; + + mutex_enter(&fil_system->mutex); + + space = UT_LIST_GET_FIRST(fil_system->space_list); + if (space != NULL) { + do + { + if (!space->stop_new_ops) { + out_id = space->id; + break; + } + space = UT_LIST_GET_NEXT(space_list, space); + } while (space != NULL); + } + + mutex_exit(&fil_system->mutex); + + return out_id; +} + +/****************************************************************** +Get id of next tablespace or ULINT_UNDEFINED if none */ +UNIV_INTERN +ulint +fil_get_next_space(ulint id) +{ + bool found; + fil_space_t* space; + ulint out_id = ULINT_UNDEFINED; + + mutex_enter(&fil_system->mutex); + + space = fil_space_get_by_id(id); + if (space == NULL) { + /* we didn't find it...search for space with space->id > id */ + found = false; + space = UT_LIST_GET_FIRST(fil_system->space_list); + } else { + /* we found it, take next available space */ + found = true; + } + + while ((space = UT_LIST_GET_NEXT(space_list, space)) != NULL) { + + if (!found && space->id <= id) + continue; + + if (!space->stop_new_ops) { + /* inc reference to prevent drop */ + out_id = space->id; + break; + } + } + + mutex_exit(&fil_system->mutex); + + return out_id; +} + +/****************************************************************** +Get crypt data for a tablespace */ +UNIV_INTERN +fil_space_crypt_t* +fil_space_get_crypt_data( +/*==================*/ + ulint id) /*!< in: space id */ +{ + fil_space_t* space; + fil_space_crypt_t* crypt_data = NULL; + + ut_ad(fil_system); + + mutex_enter(&fil_system->mutex); + + space = fil_space_get_by_id(id); + if (space != NULL) { + crypt_data = space->crypt_data; + } + + mutex_exit(&fil_system->mutex); + + return(crypt_data); +} + +/****************************************************************** +Get crypt data for a tablespace */ +UNIV_INTERN +void +fil_space_set_crypt_data( +/*==================*/ + ulint id, /*!< in: space id */ + fil_space_crypt_t* crypt_data) /*!< in: crypt data */ +{ + fil_space_t* space; + fil_space_crypt_t* old_crypt_data = NULL; + + ut_ad(fil_system); + + mutex_enter(&fil_system->mutex); + + space = fil_space_get_by_id(id); + if (space != NULL) { + + if (space->crypt_data != NULL) { + ut_a(!fil_space_crypt_compare(crypt_data, + space->crypt_data)); + old_crypt_data = space->crypt_data; + } + + space->crypt_data = crypt_data; + } else { + /* there is a small risk that tablespace has been deleted */ + old_crypt_data = crypt_data; + } + + mutex_exit(&fil_system->mutex); + + if (old_crypt_data != NULL) { + /* first assign space->crypt_data + * then destroy old_crypt_data when no new references to + * it can be created. + */ + fil_space_destroy_crypt_data(&old_crypt_data); + } +} diff --git a/storage/innobase/fil/fil0pagecompress.cc b/storage/innobase/fil/fil0pagecompress.cc index 77b9da8b060..29b9580f8e2 100644 --- a/storage/innobase/fil/fil0pagecompress.cc +++ b/storage/innobase/fil/fil0pagecompress.cc @@ -269,15 +269,26 @@ fil_compress_page( int level = 0; ulint header_len = FIL_PAGE_DATA + FIL_PAGE_COMPRESSED_SIZE; ulint write_size=0; - ulint comp_method = innodb_compression_algorithm; /* Cache to avoid - change during - function execution */ + /* Cache to avoid change during function execution */ + ulint comp_method = innodb_compression_algorithm; + ulint orig_page_type; ut_ad(buf); ut_ad(out_buf); ut_ad(len); ut_ad(out_len); + /* read original page type */ + orig_page_type = mach_read_from_2(buf + FIL_PAGE_TYPE); + + /* Let's not compress file space header or + extent descriptor */ + if ((orig_page_type == FIL_PAGE_TYPE_FSP_HDR) + || (orig_page_type == FIL_PAGE_TYPE_XDES) ) { + *out_len = len; + return (buf); + } + level = compression_level; ut_ad(fil_space_is_page_compressed(space_id)); @@ -422,7 +433,7 @@ fil_compress_page( /* Set up the correct page type */ mach_write_to_2(out_buf+FIL_PAGE_TYPE, FIL_PAGE_PAGE_COMPRESSED); /* Set up the flush lsn to be compression algorithm */ - mach_write_to_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN, comp_method); + mach_write_to_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, comp_method); /* Set up the actual payload lenght */ mach_write_to_2(out_buf+FIL_PAGE_DATA, write_size); @@ -431,7 +442,7 @@ fil_compress_page( ut_ad(fil_page_is_compressed(out_buf)); ut_ad(mach_read_from_4(out_buf+FIL_PAGE_SPACE_OR_CHKSUM) == BUF_NO_CHECKSUM_MAGIC); ut_ad(mach_read_from_2(out_buf+FIL_PAGE_DATA) == write_size); - ut_ad(mach_read_from_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN) == (ulint)comp_method); + ut_ad(mach_read_from_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION) == (ulint)comp_method); /* Verify that page can be decompressed */ { @@ -555,7 +566,7 @@ fil_decompress_page( } /* Get compression algorithm */ - compression_alg = mach_read_from_8(buf+FIL_PAGE_FILE_FLUSH_LSN); + compression_alg = mach_read_from_8(buf+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); /* Get the actual size of compressed page */ actual_size = mach_read_from_2(buf+FIL_PAGE_DATA); @@ -726,5 +737,3 @@ fil_decompress_page( ut_free(in_buf); } } - - diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc index d1bb22ed7a9..ee1f2fd9510 100644 --- a/storage/innobase/fsp/fsp0fsp.cc +++ b/storage/innobase/fsp/fsp0fsp.cc @@ -766,7 +766,12 @@ fsp_header_init( } else { fsp_fill_free_list(TRUE, space, header, mtr); } + + ulint maxsize = 0; + ulint offset = fsp_header_get_crypt_offset(zip_size, &maxsize); + fil_space_write_crypt_data(space, page, offset, maxsize, mtr); } + #endif /* !UNIV_HOTBACKUP */ /**********************************************************************//** @@ -4121,3 +4126,61 @@ fsp_print( fprintf(stderr, "NUMBER of file segments: %lu\n", (ulong) n_segs); } #endif /* !UNIV_HOTBACKUP */ + +/**********************************************************************//** +Compute offset after xdes where crypt data can be stored +@return offset */ +ulint +fsp_header_get_crypt_offset( +/*========================*/ + ulint zip_size, /*!< in: zip_size */ + ulint* max_size) /*!< out: free space available for crypt data */ +{ + ulint pageno = 0; + /* compute first page_no that will have xdes stored on page != 0*/ + for (ulint i = 0; + (pageno = xdes_calc_descriptor_page(zip_size, i)) == 0; ) + i++; + + /* use pageno prior to this...i.e last page on page 0 */ + ut_ad(pageno > 0); + pageno--; + + ulint iv_offset = XDES_ARR_OFFSET + + XDES_SIZE * (1 + xdes_calc_descriptor_index(zip_size, pageno)); + + if (max_size != NULL) { + /* return how much free space there is available on page */ + *max_size = (zip_size ? zip_size : UNIV_PAGE_SIZE) - + (FSP_HEADER_OFFSET + iv_offset + FIL_PAGE_DATA_END); + } + + return FSP_HEADER_OFFSET + iv_offset; +} + +/**********************************************************************//** +Checks if a single page is free. +@return true if free */ +UNIV_INTERN +bool +fsp_page_is_free_func( +/*==============*/ + ulint space, /*!< in: space id */ + ulint page_no, /*!< in: page offset */ + mtr_t* mtr, /*!< in/out: mini-transaction */ + const char *file, + ulint line) +{ + ulint flags; + + ut_ad(mtr); + + mtr_x_lock_func(fil_space_get_latch(space, &flags), file, line, mtr); + ulint zip_size = fsp_flags_get_zip_size(flags); + + xdes_t* descr = xdes_get_descriptor(space, zip_size, page_no, mtr); + ut_a(descr); + + return xdes_mtr_get_bit( + descr, XDES_FREE_BIT, page_no % FSP_EXTENT_SIZE, mtr); +} diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index f719594fa98..df5867ce43b 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -235,6 +235,20 @@ static char* internal_innobase_data_file_path = NULL; static char* innodb_version_str = (char*) INNODB_VERSION_STR; +extern my_bool srv_encrypt_tables; +extern uint srv_n_fil_crypt_threads; +extern uint srv_fil_crypt_rotate_key_age; +extern uint srv_n_fil_crypt_iops; + +extern my_bool srv_immediate_scrub_data_uncompressed; +extern my_bool srv_background_scrub_data_uncompressed; +extern my_bool srv_background_scrub_data_compressed; +extern uint srv_background_scrub_data_interval; +extern uint srv_background_scrub_data_check_interval; +#ifdef UNIV_DEBUG +extern my_bool srv_scrub_force_testing; +#endif + /** Possible values for system variable "innodb_stats_method". The values are defined the same as its corresponding MyISAM system variable "myisam_stats_method"(see "myisam_stats_method_names"), for better usability */ @@ -551,6 +565,12 @@ ha_create_table_option innodb_table_option_list[]= HA_TOPTION_NUMBER("PAGE_COMPRESSION_LEVEL", page_compression_level, ULINT_UNDEFINED, 0, 9, 1), /* With this option user can enable atomic writes feature for this table */ HA_TOPTION_ENUM("ATOMIC_WRITES", atomic_writes, "DEFAULT,ON,OFF", 0), + /* With this option the user can enable page encryption for the table */ + HA_TOPTION_BOOL("PAGE_ENCRYPTION", page_encryption, 0), + + /* With this option the user defines the key identifier using for the encryption */ + HA_TOPTION_NUMBER("PAGE_ENCRYPTION_KEY", page_encryption_key, ULINT_UNDEFINED, 1, 255, 1), + HA_TOPTION_END }; @@ -792,6 +812,14 @@ static SHOW_VAR innodb_status_variables[]= { (char*) &export_vars.innodb_page_compressed_trim_op_saved, SHOW_LONGLONG}, {"num_pages_page_decompressed", (char*) &export_vars.innodb_pages_page_decompressed, SHOW_LONGLONG}, + {"num_pages_page_compression_error", + (char*) &export_vars.innodb_pages_page_compression_error, SHOW_LONGLONG}, + {"num_pages_page_encrypted", + (char*) &export_vars.innodb_pages_page_encrypted, SHOW_LONGLONG}, + {"num_pages_page_decrypted", + (char*) &export_vars.innodb_pages_page_decrypted, SHOW_LONGLONG}, + {"num_pages_page_encryption_error", + (char*) &export_vars.innodb_pages_page_encryption_error, SHOW_LONGLONG}, {"have_lz4", (char*) &innodb_have_lz4, SHOW_BOOL}, {"have_lzo", @@ -824,6 +852,42 @@ static SHOW_VAR innodb_status_variables[]= { {"secondary_index_triggered_cluster_reads_avoided", (char*) &export_vars.innodb_sec_rec_cluster_reads_avoided, SHOW_LONG}, + /* Encryption */ + {"encryption_rotation_pages_read_from_cache", + (char*) &export_vars.innodb_encryption_rotation_pages_read_from_cache, + SHOW_LONG}, + {"encryption_rotation_pages_read_from_disk", + (char*) &export_vars.innodb_encryption_rotation_pages_read_from_disk, + SHOW_LONG}, + {"encryption_rotation_pages_modified", + (char*) &export_vars.innodb_encryption_rotation_pages_modified, + SHOW_LONG}, + {"encryption_rotation_pages_flushed", + (char*) &export_vars.innodb_encryption_rotation_pages_flushed, + SHOW_LONG}, + {"encryption_rotation_estimated_iops", + (char*) &export_vars.innodb_encryption_rotation_estimated_iops, + SHOW_LONG}, + + /* scrubing */ + {"scrub_background_page_reorganizations", + (char*) &export_vars.innodb_scrub_page_reorganizations, + SHOW_LONG}, + {"scrub_background_page_splits", + (char*) &export_vars.innodb_scrub_page_splits, + SHOW_LONG}, + {"scrub_background_page_split_failures_underflow", + (char*) &export_vars.innodb_scrub_page_split_failures_underflow, + SHOW_LONG}, + {"scrub_background_page_split_failures_out_of_filespace", + (char*) &export_vars.innodb_scrub_page_split_failures_out_of_filespace, + SHOW_LONG}, + {"scrub_background_page_split_failures_missing_index", + (char*) &export_vars.innodb_scrub_page_split_failures_missing_index, + SHOW_LONG}, + {"scrub_background_page_split_failures_unknown", + (char*) &export_vars.innodb_scrub_page_split_failures_unknown, + SHOW_LONG}, {NullS, NullS, SHOW_LONG} }; @@ -10957,6 +11021,8 @@ innobase_table_flags( modified by another thread while the table is being created. */ const ulint default_compression_level = page_zip_level; + const ulint default_encryption_key = 1; + *flags = 0; *flags2 = 0; @@ -11158,7 +11224,10 @@ index_bad: options->page_compressed, (ulint)options->page_compression_level == ULINT_UNDEFINED ? default_compression_level : options->page_compression_level, - options->atomic_writes); + options->atomic_writes, + options->page_encryption, + (ulint)options->page_encryption_key == ULINT_UNDEFINED ? + default_encryption_key : options->page_encryption_key); if (create_info->options & HA_LEX_CREATE_TMP_TABLE) { *flags2 |= DICT_TF2_TEMPORARY; @@ -11196,6 +11265,24 @@ ha_innobase::check_table_options( ha_table_option_struct *options= table->s->option_struct; atomic_writes_t awrites = (atomic_writes_t)options->atomic_writes; + if (options->page_encryption) { + if (srv_encrypt_tables) { + push_warning( + thd, Sql_condition::WARN_LEVEL_WARN, + HA_WRONG_CREATE_OPTION, + "InnoDB: PAGE_ENCRYPTION not available if innodb_encrypt_tables=ON"); + return "INNODB_ENCRYPT_TABLES"; + } + if (!use_tablespace) { + push_warning( + thd, Sql_condition::WARN_LEVEL_WARN, + HA_WRONG_CREATE_OPTION, + "InnoDB: PAGE_ENCRYPTION requires" + " innodb_file_per_table."); + return "PAGE_ENCRYPTION"; + } + } + /* Check page compression requirements */ if (options->page_compressed) { @@ -11268,6 +11355,33 @@ ha_innobase::check_table_options( } } + if ((ulint)options->page_encryption_key != ULINT_UNDEFINED) { + if (options->page_encryption == false) { + /* ignore this to allow alter table without changing page_encryption_key ...*/ + } + + if (options->page_encryption_key < 1 || options->page_encryption_key > 255) { + push_warning_printf( + thd, Sql_condition::WARN_LEVEL_WARN, + HA_WRONG_CREATE_OPTION, + "InnoDB: invalid PAGE_ENCRYPTION_KEY = %lu." + " Valid values are [1..255]", + options->page_encryption_key); + return "PAGE_ENCRYPTION_KEY"; + } + + if (!HasCryptoKey(options->page_encryption_key)) { + push_warning_printf( + thd, Sql_condition::WARN_LEVEL_WARN, + HA_WRONG_CREATE_OPTION, + "InnoDB: PAGE_ENCRYPTION_KEY encryption key %lu not available", + options->page_encryption_key + ); + return "PAGE_ENCRYPTION_KEY"; + + } + } + /* Check atomic writes requirements */ if (awrites == ATOMIC_WRITES_ON || (awrites == ATOMIC_WRITES_DEFAULT && srv_use_atomic_writes)) { @@ -17636,6 +17750,57 @@ innodb_status_output_update( os_event_set(srv_monitor_event); } +/****************************************************************** +Update the system variable innodb_encryption_threads */ +static +void +innodb_encryption_threads_update( +/*=========================*/ + THD* thd, /*!< in: thread handle */ + struct st_mysql_sys_var* var, /*!< in: pointer to + system variable */ + void* var_ptr,/*!< out: where the + formal string goes */ + const void* save) /*!< in: immediate result + from check function */ +{ + fil_crypt_set_thread_cnt(*static_cast<const uint*>(save)); +} + +/****************************************************************** +Update the system variable innodb_encryption_rotate_key_age */ +static +void +innodb_encryption_rotate_key_age_update( +/*=========================*/ + THD* thd, /*!< in: thread handle */ + struct st_mysql_sys_var* var, /*!< in: pointer to + system variable */ + void* var_ptr,/*!< out: where the + formal string goes */ + const void* save) /*!< in: immediate result + from check function */ +{ + fil_crypt_set_rotate_key_age(*static_cast<const uint*>(save)); +} + +/****************************************************************** +Update the system variable innodb_encryption_rotation_iops */ +static +void +innodb_encryption_rotation_iops_update( +/*=========================*/ + THD* thd, /*!< in: thread handle */ + struct st_mysql_sys_var* var, /*!< in: pointer to + system variable */ + void* var_ptr,/*!< out: where the + formal string goes */ + const void* save) /*!< in: immediate result + from check function */ +{ + fil_crypt_set_rotation_iops(*static_cast<const uint*>(save)); +} + static SHOW_VAR innodb_status_variables_export[]= { {"Innodb", (char*) &show_innodb_vars, SHOW_FUNC}, {NullS, NullS, SHOW_LONG} @@ -18928,6 +19093,108 @@ static MYSQL_SYSVAR_ULONG(fatal_semaphore_wait_threshold, srv_fatal_semaphore_wa UINT_MAX32, /* Maximum setting */ 0); +static MYSQL_SYSVAR_BOOL(encrypt_tables, srv_encrypt_tables, 0, + "Encrypt all tables in the storage engine", + 0, 0, 0); + +static MYSQL_SYSVAR_UINT(encryption_threads, srv_n_fil_crypt_threads, + PLUGIN_VAR_RQCMDARG, + "No of threads performing background key rotation and " + "scrubbing", + NULL, + innodb_encryption_threads_update, + srv_n_fil_crypt_threads, 0, UINT_MAX32, 0); + +static MYSQL_SYSVAR_UINT(encryption_rotate_key_age, + srv_fil_crypt_rotate_key_age, + PLUGIN_VAR_RQCMDARG, + "Rotate any page having a key older than this", + NULL, + innodb_encryption_rotate_key_age_update, + srv_fil_crypt_rotate_key_age, 0, UINT_MAX32, 0); + +static MYSQL_SYSVAR_UINT(encryption_rotation_iops, srv_n_fil_crypt_iops, + PLUGIN_VAR_RQCMDARG, + "Use this many iops for background key rotation", + NULL, + innodb_encryption_rotation_iops_update, + srv_n_fil_crypt_iops, 0, UINT_MAX32, 0); + +static MYSQL_SYSVAR_BOOL(scrub_log, srv_scrub_log, + PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY, + "Enable redo log scrubbing", + 0, 0, 0); + +/* + If innodb_scrub_log is on, logs will be scrubbed in less than + (((innodb_log_file_size * innodb_log_files_in_group) / 512 ) / + ((1000 * 86400) / innodb_scrub_log_interval)) + days. + In above formula, the first line calculates the number of log blocks to scrub, + and the second line calculates the number of log blocks scrubbed in one day. +*/ +static MYSQL_SYSVAR_ULONGLONG(scrub_log_interval, innodb_scrub_log_interval, + PLUGIN_VAR_OPCMDARG, + "Innodb redo log scrubbing interval in ms", + NULL, NULL, + 2000, /* default */ + 10, /* min */ + ULONGLONG_MAX, 0);/* max */ + +static MYSQL_SYSVAR_BOOL(encrypt_log, srv_encrypt_log, + PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY, + "Enable redo log encryption/decryption.", + NULL, NULL, FALSE); + +static MYSQL_SYSVAR_BOOL(immediate_scrub_data_uncompressed, + srv_immediate_scrub_data_uncompressed, + 0, + "Enable scrubbing of data", + NULL, NULL, FALSE); + +static MYSQL_SYSVAR_BOOL(background_scrub_data_uncompressed, + srv_background_scrub_data_uncompressed, + 0, + "Enable scrubbing of uncompressed data by " + "background threads (same as encryption_threads)", + NULL, NULL, FALSE); + +static MYSQL_SYSVAR_BOOL(background_scrub_data_compressed, + srv_background_scrub_data_compressed, + 0, + "Enable scrubbing of compressed data by " + "background threads (same as encryption_threads)", + NULL, NULL, FALSE); + +static MYSQL_SYSVAR_UINT(background_scrub_data_check_interval, + srv_background_scrub_data_check_interval, + 0, + "check if spaces needs scrubbing every " + "innodb_background_scrub_data_check_interval " + "seconds", + NULL, NULL, + srv_background_scrub_data_check_interval, + 1, + UINT_MAX32, 0); + +static MYSQL_SYSVAR_UINT(background_scrub_data_interval, + srv_background_scrub_data_interval, + 0, + "scrub spaces that were last scrubbed longer than " + " innodb_background_scrub_data_interval seconds ago", + NULL, NULL, + srv_background_scrub_data_interval, + 1, + UINT_MAX32, 0); + +#ifdef UNIV_DEBUG +static MYSQL_SYSVAR_BOOL(scrub_force_testing, + srv_scrub_force_testing, + 0, + "Perform extra scrubbing to increase test exposure", + NULL, NULL, FALSE); +#endif /* UNIV_DEBUG */ + static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(additional_mem_pool_size), MYSQL_SYSVAR(api_trx_level), @@ -19100,12 +19367,30 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { #endif /* UNIV_DEBUG */ MYSQL_SYSVAR(simulate_comp_failures), MYSQL_SYSVAR(force_primary_key), + MYSQL_SYSVAR(fatal_semaphore_wait_threshold), + /* Table page compression feature */ MYSQL_SYSVAR(use_trim), MYSQL_SYSVAR(compression_algorithm), MYSQL_SYSVAR(mtflush_threads), MYSQL_SYSVAR(use_mtflush), - - MYSQL_SYSVAR(fatal_semaphore_wait_threshold), + /* Encryption feature */ + MYSQL_SYSVAR(encrypt_tables), + MYSQL_SYSVAR(encryption_threads), + MYSQL_SYSVAR(encryption_rotate_key_age), + MYSQL_SYSVAR(encryption_rotation_iops), + MYSQL_SYSVAR(scrub_log), + MYSQL_SYSVAR(scrub_log_interval), + MYSQL_SYSVAR(encrypt_log), + + /* Scrubing feature */ + MYSQL_SYSVAR(immediate_scrub_data_uncompressed), + MYSQL_SYSVAR(background_scrub_data_uncompressed), + MYSQL_SYSVAR(background_scrub_data_compressed), + MYSQL_SYSVAR(background_scrub_data_interval), + MYSQL_SYSVAR(background_scrub_data_check_interval), +#ifdef UNIV_DEBUG + MYSQL_SYSVAR(scrub_force_testing), +#endif NULL }; @@ -19115,7 +19400,7 @@ maria_declare_plugin(innobase) &innobase_storage_engine, innobase_hton_name, plugin_author, - "Supports transactions, row-level locking, and foreign keys", + "Supports transactions, row-level locking, foreign keys and encryption for tables", PLUGIN_LICENSE_GPL, innobase_init, /* Plugin Init */ NULL, /* Plugin Deinit */ @@ -19152,8 +19437,9 @@ i_s_innodb_sys_fields, i_s_innodb_sys_foreign, i_s_innodb_sys_foreign_cols, i_s_innodb_sys_tablespaces, -i_s_innodb_sys_datafiles - +i_s_innodb_sys_datafiles, +i_s_innodb_tablespaces_encryption, +i_s_innodb_tablespaces_scrubbing maria_declare_plugin_end; /** @brief Initialize the default value of innodb_commit_concurrency. diff --git a/storage/innobase/handler/ha_innodb.h b/storage/innobase/handler/ha_innodb.h index 6da31c8ecc6..7807c7ca7e6 100644 --- a/storage/innobase/handler/ha_innodb.h +++ b/storage/innobase/handler/ha_innodb.h @@ -57,7 +57,7 @@ typedef struct st_innobase_share { /** Prebuilt structures in an InnoDB table handle used within MySQL */ struct row_prebuilt_t; -/** Engine specific table options are definined using this struct */ +/** Engine specific table options are defined using this struct */ struct ha_table_option_struct { bool page_compressed; /*!< Table is using page compression @@ -70,6 +70,8 @@ struct ha_table_option_struct srv_use_atomic_writes=1. Atomic writes are not used if value OFF.*/ + bool page_encryption; /*!< Flag for an encrypted table */ + int page_encryption_key; /*!< ID of the encryption key */ }; diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc index f426c86c7c3..7cc16197f57 100644 --- a/storage/innobase/handler/handler0alter.cc +++ b/storage/innobase/handler/handler0alter.cc @@ -277,6 +277,13 @@ ha_innobase::check_if_supported_inplace_alter( ER_ALTER_OPERATION_NOT_SUPPORTED_REASON); DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); } + + if (new_options->page_encryption != old_options->page_encryption || + new_options->page_encryption_key != old_options->page_encryption_key) { + ha_alter_info->unsupported_reason = innobase_get_err_msg( + ER_ALTER_OPERATION_NOT_SUPPORTED_REASON); + DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); + } } if (ha_alter_info->handler_flags diff --git a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc index f6b3dbd2d5d..f0202e232b7 100644 --- a/storage/innobase/handler/i_s.cc +++ b/storage/innobase/handler/i_s.cc @@ -4465,10 +4465,14 @@ i_s_innodb_stats_fill( info->pages_written_rate)); if (info->n_page_get_delta) { - OK(fields[IDX_BUF_STATS_HIT_RATE]->store( - static_cast<double>( - 1000 - (1000 * info->page_read_delta - / info->n_page_get_delta)))); + if (info->page_read_delta <= info->n_page_get_delta) { + OK(fields[IDX_BUF_STATS_HIT_RATE]->store( + static_cast<double>( + 1000 - (1000 * info->page_read_delta + / info->n_page_get_delta)))); + } else { + OK(fields[IDX_BUF_STATS_HIT_RATE]->store(0)); + } OK(fields[IDX_BUF_STATS_MADE_YOUNG_PCT]->store( static_cast<double>( @@ -8070,3 +8074,583 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_sys_datafiles = STRUCT_FLD(version_info, INNODB_VERSION_STR), STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE), }; + +/** TABLESPACES_ENCRYPTION ********************************************/ +/* Fields of the table INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION */ +static ST_FIELD_INFO innodb_tablespaces_encryption_fields_info[] = +{ +#define TABLESPACES_ENCRYPTION_SPACE 0 + {STRUCT_FLD(field_name, "SPACE"), + STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define TABLESPACES_ENCRYPTION_NAME 1 + {STRUCT_FLD(field_name, "NAME"), + STRUCT_FLD(field_length, MAX_FULL_NAME_LEN + 1), + STRUCT_FLD(field_type, MYSQL_TYPE_STRING), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define TABLESPACES_ENCRYPTION_ENCRYPTION_SCHEME 2 + {STRUCT_FLD(field_name, "ENCRYPTION_SCHEME"), + STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define TABLESPACES_ENCRYPTION_KEYSERVER_REQUESTS 3 + {STRUCT_FLD(field_name, "KEYSERVER_REQUESTS"), + STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define TABLESPACES_ENCRYPTION_MIN_KEY_VERSION 4 + {STRUCT_FLD(field_name, "MIN_KEY_VERSION"), + STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define TABLESPACES_ENCRYPTION_CURRENT_KEY_VERSION 5 + {STRUCT_FLD(field_name, "CURRENT_KEY_VERSION"), + STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define TABLESPACES_ENCRYPTION_KEY_ROTATION_PAGE_NUMBER 6 + {STRUCT_FLD(field_name, "KEY_ROTATION_PAGE_NUMBER"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED | MY_I_S_MAYBE_NULL), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define TABLESPACES_ENCRYPTION_KEY_ROTATION_MAX_PAGE_NUMBER 7 + {STRUCT_FLD(field_name, "KEY_ROTATION_MAX_PAGE_NUMBER"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED | MY_I_S_MAYBE_NULL), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + END_OF_ST_FIELD_INFO +}; + +/**********************************************************************//** +Function to fill INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION +with information collected by scanning SYS_TABLESPACES table and then use +fil_space() +@return 0 on success */ +static +int +i_s_dict_fill_tablespaces_encryption( +/*==========================*/ + THD* thd, /*!< in: thread */ + ulint space, /*!< in: space ID */ + const char* name, /*!< in: tablespace name */ + TABLE* table_to_fill) /*!< in/out: fill this table */ +{ + Field** fields; + struct fil_space_crypt_status_t status; + + DBUG_ENTER("i_s_dict_fill_tablespaces_encryption"); + + fields = table_to_fill->field; + + fil_space_crypt_get_status(space, &status); + OK(fields[TABLESPACES_ENCRYPTION_SPACE]->store(space)); + + OK(field_store_string(fields[TABLESPACES_ENCRYPTION_NAME], + name)); + + OK(fields[TABLESPACES_ENCRYPTION_ENCRYPTION_SCHEME]->store( + status.scheme)); + OK(fields[TABLESPACES_ENCRYPTION_KEYSERVER_REQUESTS]->store( + status.keyserver_requests)); + OK(fields[TABLESPACES_ENCRYPTION_MIN_KEY_VERSION]->store( + status.min_key_version)); + OK(fields[TABLESPACES_ENCRYPTION_CURRENT_KEY_VERSION]->store( + status.current_key_version)); + if (status.rotating) { + fields[TABLESPACES_ENCRYPTION_KEY_ROTATION_PAGE_NUMBER]->set_notnull(); + OK(fields[TABLESPACES_ENCRYPTION_KEY_ROTATION_PAGE_NUMBER]->store( + status.rotate_next_page_number)); + fields[TABLESPACES_ENCRYPTION_KEY_ROTATION_MAX_PAGE_NUMBER]->set_notnull(); + OK(fields[TABLESPACES_ENCRYPTION_KEY_ROTATION_MAX_PAGE_NUMBER]->store( + status.rotate_max_page_number)); + } else { + fields[TABLESPACES_ENCRYPTION_KEY_ROTATION_PAGE_NUMBER] + ->set_null(); + fields[TABLESPACES_ENCRYPTION_KEY_ROTATION_MAX_PAGE_NUMBER] + ->set_null(); + } + OK(schema_table_store_record(thd, table_to_fill)); + + DBUG_RETURN(0); +} +/*******************************************************************//** +Function to populate INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION table. +Loop through each record in TABLESPACES_ENCRYPTION, and extract the column +information and fill the INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION table. +@return 0 on success */ +static +int +i_s_tablespaces_encryption_fill_table( +/*===========================*/ + THD* thd, /*!< in: thread */ + TABLE_LIST* tables, /*!< in/out: tables to fill */ + Item* ) /*!< in: condition (not used) */ +{ + btr_pcur_t pcur; + const rec_t* rec; + mem_heap_t* heap; + mtr_t mtr; + bool found_space_0 = false; + + DBUG_ENTER("i_s_tablespaces_encryption_fill_table"); + + /* deny access to user without PROCESS_ACL privilege */ + if (check_global_access(thd, SUPER_ACL)) { + DBUG_RETURN(0); + } + + heap = mem_heap_create(1000); + mutex_enter(&dict_sys->mutex); + mtr_start(&mtr); + + rec = dict_startscan_system(&pcur, &mtr, SYS_TABLESPACES); + + while (rec) { + const char* err_msg; + ulint space; + const char* name; + ulint flags; + + /* Extract necessary information from a SYS_TABLESPACES row */ + err_msg = dict_process_sys_tablespaces( + heap, rec, &space, &name, &flags); + + mtr_commit(&mtr); + mutex_exit(&dict_sys->mutex); + + if (space == 0) { + found_space_0 = true; + } + + if (!err_msg) { + i_s_dict_fill_tablespaces_encryption( + thd, space, name, tables->table); + } else { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_CANT_FIND_SYSTEM_REC, "%s", + err_msg); + } + + mem_heap_empty(heap); + + /* Get the next record */ + mutex_enter(&dict_sys->mutex); + mtr_start(&mtr); + rec = dict_getnext_system(&pcur, &mtr); + } + + mtr_commit(&mtr); + mutex_exit(&dict_sys->mutex); + mem_heap_free(heap); + + if (found_space_0 == false) { + /* space 0 does for what ever unknown reason not show up + * in iteration above, add it manually */ + ulint space = 0; + const char* name = NULL; + i_s_dict_fill_tablespaces_encryption( + thd, space, name, tables->table); + } + + DBUG_RETURN(0); +} +/*******************************************************************//** +Bind the dynamic table INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION +@return 0 on success */ +static +int +innodb_tablespaces_encryption_init( +/*========================*/ + void* p) /*!< in/out: table schema object */ +{ + ST_SCHEMA_TABLE* schema; + + DBUG_ENTER("innodb_tablespaces_encryption_init"); + + schema = (ST_SCHEMA_TABLE*) p; + + schema->fields_info = innodb_tablespaces_encryption_fields_info; + schema->fill_table = i_s_tablespaces_encryption_fill_table; + + DBUG_RETURN(0); +} + +UNIV_INTERN struct st_maria_plugin i_s_innodb_tablespaces_encryption = +{ + /* the plugin type (a MYSQL_XXX_PLUGIN value) */ + /* int */ + STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), + + /* pointer to type-specific plugin descriptor */ + /* void* */ + STRUCT_FLD(info, &i_s_info), + + /* plugin name */ + /* const char* */ + STRUCT_FLD(name, "INNODB_TABLESPACES_ENCRYPTION"), + + /* plugin author (for SHOW PLUGINS) */ + /* const char* */ + STRUCT_FLD(author, "Google Inc"), + + /* general descriptive text (for SHOW PLUGINS) */ + /* const char* */ + STRUCT_FLD(descr, "InnoDB TABLESPACES_ENCRYPTION"), + + /* the plugin license (PLUGIN_LICENSE_XXX) */ + /* int */ + STRUCT_FLD(license, PLUGIN_LICENSE_BSD), + + /* the function to invoke when plugin is loaded */ + /* int (*)(void*); */ + STRUCT_FLD(init, innodb_tablespaces_encryption_init), + + /* the function to invoke when plugin is unloaded */ + /* int (*)(void*); */ + STRUCT_FLD(deinit, i_s_common_deinit), + + /* plugin version (for SHOW PLUGINS) */ + /* unsigned int */ + STRUCT_FLD(version, INNODB_VERSION_SHORT), + + /* struct st_mysql_show_var* */ + STRUCT_FLD(status_vars, NULL), + + /* struct st_mysql_sys_var** */ + STRUCT_FLD(system_vars, NULL), + + /* Maria extension */ + STRUCT_FLD(version_info, INNODB_VERSION_STR), + STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE) +}; + +/** TABLESPACES_SCRUBBING ********************************************/ +/* Fields of the table INFORMATION_SCHEMA.INNODB_TABLESPACES_SCRUBBING */ +static ST_FIELD_INFO innodb_tablespaces_scrubbing_fields_info[] = +{ +#define TABLESPACES_SCRUBBING_SPACE 0 + {STRUCT_FLD(field_name, "SPACE"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define TABLESPACES_SCRUBBING_NAME 1 + {STRUCT_FLD(field_name, "NAME"), + STRUCT_FLD(field_length, MAX_FULL_NAME_LEN + 1), + STRUCT_FLD(field_type, MYSQL_TYPE_STRING), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define TABLESPACES_SCRUBBING_COMPRESSED 2 + {STRUCT_FLD(field_name, "COMPRESSED"), + STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define TABLESPACES_SCRUBBING_LAST_SCRUB_COMPLETED 3 + {STRUCT_FLD(field_name, "LAST_SCRUB_COMPLETED"), + STRUCT_FLD(field_length, 0), + STRUCT_FLD(field_type, MYSQL_TYPE_DATETIME), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define TABLESPACES_SCRUBBING_CURRENT_SCRUB_STARTED 4 + {STRUCT_FLD(field_name, "CURRENT_SCRUB_STARTED"), + STRUCT_FLD(field_length, 0), + STRUCT_FLD(field_type, MYSQL_TYPE_DATETIME), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define TABLESPACES_SCRUBBING_CURRENT_SCRUB_ACTIVE_THREADS 5 + {STRUCT_FLD(field_name, "CURRENT_SCRUB_ACTIVE_THREADS"), + STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED | MY_I_S_MAYBE_NULL), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define TABLESPACES_SCRUBBING_CURRENT_SCRUB_PAGE_NUMBER 6 + {STRUCT_FLD(field_name, "CURRENT_SCRUB_PAGE_NUMBER"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define TABLESPACES_SCRUBBING_CURRENT_SCRUB_MAX_PAGE_NUMBER 7 + {STRUCT_FLD(field_name, "CURRENT_SCRUB_MAX_PAGE_NUMBER"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + END_OF_ST_FIELD_INFO +}; + +/**********************************************************************//** +Function to fill INFORMATION_SCHEMA.INNODB_TABLESPACES_SCRUBBING +with information collected by scanning SYS_TABLESPACES table and then use +fil_space() +@return 0 on success */ +static +int +i_s_dict_fill_tablespaces_scrubbing( +/*==========================*/ + THD* thd, /*!< in: thread */ + ulint space, /*!< in: space ID */ + const char* name, /*!< in: tablespace name */ + TABLE* table_to_fill) /*!< in/out: fill this table */ +{ + Field** fields; + struct fil_space_scrub_status_t status; + + DBUG_ENTER("i_s_dict_fill_tablespaces_scrubbing"); + + fields = table_to_fill->field; + + fil_space_get_scrub_status(space, &status); + OK(fields[TABLESPACES_SCRUBBING_SPACE]->store(space)); + + OK(field_store_string(fields[TABLESPACES_SCRUBBING_NAME], + name)); + + OK(fields[TABLESPACES_SCRUBBING_COMPRESSED]->store( + status.compressed ? 1 : 0)); + + if (status.last_scrub_completed == 0) { + fields[TABLESPACES_SCRUBBING_LAST_SCRUB_COMPLETED]->set_null(); + } else { + fields[TABLESPACES_SCRUBBING_LAST_SCRUB_COMPLETED] + ->set_notnull(); + OK(field_store_time_t( + fields[TABLESPACES_SCRUBBING_LAST_SCRUB_COMPLETED], + status.last_scrub_completed)); + } + + int field_numbers[] = { + TABLESPACES_SCRUBBING_CURRENT_SCRUB_STARTED, + TABLESPACES_SCRUBBING_CURRENT_SCRUB_ACTIVE_THREADS, + TABLESPACES_SCRUBBING_CURRENT_SCRUB_PAGE_NUMBER, + TABLESPACES_SCRUBBING_CURRENT_SCRUB_MAX_PAGE_NUMBER }; + if (status.scrubbing) { + for (uint i = 0; i < array_elements(field_numbers); i++) { + fields[field_numbers[i]]->set_notnull(); + } + + OK(field_store_time_t( + fields[TABLESPACES_SCRUBBING_CURRENT_SCRUB_STARTED], + status.current_scrub_started)); + OK(fields[TABLESPACES_SCRUBBING_CURRENT_SCRUB_ACTIVE_THREADS] + ->store(status.current_scrub_active_threads)); + OK(fields[TABLESPACES_SCRUBBING_CURRENT_SCRUB_PAGE_NUMBER] + ->store(status.current_scrub_page_number)); + OK(fields[TABLESPACES_SCRUBBING_CURRENT_SCRUB_MAX_PAGE_NUMBER] + ->store(status.current_scrub_max_page_number)); + } else { + for (uint i = 0; i < array_elements(field_numbers); i++) { + fields[field_numbers[i]]->set_null(); + } + } + OK(schema_table_store_record(thd, table_to_fill)); + + DBUG_RETURN(0); +} +/*******************************************************************//** +Function to populate INFORMATION_SCHEMA.INNODB_TABLESPACES_SCRUBBING table. +Loop through each record in TABLESPACES_SCRUBBING, and extract the column +information and fill the INFORMATION_SCHEMA.INNODB_TABLESPACES_SCRUBBING table. +@return 0 on success */ +static +int +i_s_tablespaces_scrubbing_fill_table( +/*===========================*/ + THD* thd, /*!< in: thread */ + TABLE_LIST* tables, /*!< in/out: tables to fill */ + Item* ) /*!< in: condition (not used) */ +{ + btr_pcur_t pcur; + const rec_t* rec; + mem_heap_t* heap; + mtr_t mtr; + bool found_space_0 = false; + + DBUG_ENTER("i_s_tablespaces_scrubbing_fill_table"); + + /* deny access to user without SUPER_ACL privilege */ + if (check_global_access(thd, SUPER_ACL)) { + DBUG_RETURN(0); + } + + heap = mem_heap_create(1000); + mutex_enter(&dict_sys->mutex); + mtr_start(&mtr); + + rec = dict_startscan_system(&pcur, &mtr, SYS_TABLESPACES); + + while (rec) { + const char* err_msg; + ulint space; + const char* name; + ulint flags; + + /* Extract necessary information from a SYS_TABLESPACES row */ + err_msg = dict_process_sys_tablespaces( + heap, rec, &space, &name, &flags); + + mtr_commit(&mtr); + mutex_exit(&dict_sys->mutex); + + if (space == 0) { + found_space_0 = true; + } + + if (!err_msg) { + i_s_dict_fill_tablespaces_scrubbing( + thd, space, name, tables->table); + } else { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_CANT_FIND_SYSTEM_REC, "%s", + err_msg); + } + + mem_heap_empty(heap); + + /* Get the next record */ + mutex_enter(&dict_sys->mutex); + mtr_start(&mtr); + rec = dict_getnext_system(&pcur, &mtr); + } + + mtr_commit(&mtr); + mutex_exit(&dict_sys->mutex); + mem_heap_free(heap); + + if (found_space_0 == false) { + /* space 0 does for what ever unknown reason not show up + * in iteration above, add it manually */ + ulint space = 0; + const char* name = NULL; + i_s_dict_fill_tablespaces_scrubbing( + thd, space, name, tables->table); + } + + DBUG_RETURN(0); +} +/*******************************************************************//** +Bind the dynamic table INFORMATION_SCHEMA.INNODB_TABLESPACES_SCRUBBING +@return 0 on success */ +static +int +innodb_tablespaces_scrubbing_init( +/*========================*/ + void* p) /*!< in/out: table schema object */ +{ + ST_SCHEMA_TABLE* schema; + + DBUG_ENTER("innodb_tablespaces_scrubbing_init"); + + schema = (ST_SCHEMA_TABLE*) p; + + schema->fields_info = innodb_tablespaces_scrubbing_fields_info; + schema->fill_table = i_s_tablespaces_scrubbing_fill_table; + + DBUG_RETURN(0); +} + +UNIV_INTERN struct st_maria_plugin i_s_innodb_tablespaces_scrubbing = +{ + /* the plugin type (a MYSQL_XXX_PLUGIN value) */ + /* int */ + STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), + + /* pointer to type-specific plugin descriptor */ + /* void* */ + STRUCT_FLD(info, &i_s_info), + + /* plugin name */ + /* const char* */ + STRUCT_FLD(name, "INNODB_TABLESPACES_SCRUBBING"), + + /* plugin author (for SHOW PLUGINS) */ + /* const char* */ + STRUCT_FLD(author, "Google Inc"), + + /* general descriptive text (for SHOW PLUGINS) */ + /* const char* */ + STRUCT_FLD(descr, "InnoDB TABLESPACES_SCRUBBING"), + + /* the plugin license (PLUGIN_LICENSE_XXX) */ + /* int */ + STRUCT_FLD(license, PLUGIN_LICENSE_BSD), + + /* the function to invoke when plugin is loaded */ + /* int (*)(void*); */ + STRUCT_FLD(init, innodb_tablespaces_scrubbing_init), + + /* the function to invoke when plugin is unloaded */ + /* int (*)(void*); */ + STRUCT_FLD(deinit, i_s_common_deinit), + + /* plugin version (for SHOW PLUGINS) */ + /* unsigned int */ + STRUCT_FLD(version, INNODB_VERSION_SHORT), + + /* struct st_mysql_show_var* */ + STRUCT_FLD(status_vars, NULL), + + /* struct st_mysql_sys_var** */ + STRUCT_FLD(system_vars, NULL), + + /* Maria extension */ + STRUCT_FLD(version_info, INNODB_VERSION_STR), + STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE) +}; diff --git a/storage/innobase/handler/i_s.h b/storage/innobase/handler/i_s.h index a2b324cb314..4b248b0673a 100644 --- a/storage/innobase/handler/i_s.h +++ b/storage/innobase/handler/i_s.h @@ -56,5 +56,7 @@ extern struct st_maria_plugin i_s_innodb_sys_foreign; extern struct st_maria_plugin i_s_innodb_sys_foreign_cols; extern struct st_maria_plugin i_s_innodb_sys_tablespaces; extern struct st_maria_plugin i_s_innodb_sys_datafiles; +extern struct st_maria_plugin i_s_innodb_tablespaces_encryption; +extern struct st_maria_plugin i_s_innodb_tablespaces_scrubbing; #endif /* i_s_h */ diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h index b6f8a685ae9..68ba7bd4f3f 100644 --- a/storage/innobase/include/btr0btr.h +++ b/storage/innobase/include/btr0btr.h @@ -731,6 +731,7 @@ btr_page_free_low( dict_index_t* index, /*!< in: index tree */ buf_block_t* block, /*!< in: block to be freed, x-latched */ ulint level, /*!< in: page level */ + bool blob, /*!< in: blob page */ mtr_t* mtr) /*!< in: mtr */ __attribute__((nonnull)); /*************************************************************//** @@ -867,4 +868,8 @@ btr_lift_page_up( #include "btr0btr.ic" #endif +/**************************************************************** +Global variable controlling if scrubbing should be performed */ +extern my_bool srv_immediate_scrub_data_uncompressed; + #endif diff --git a/storage/innobase/include/btr0scrub.h b/storage/innobase/include/btr0scrub.h new file mode 100644 index 00000000000..608266c206d --- /dev/null +++ b/storage/innobase/include/btr0scrub.h @@ -0,0 +1,166 @@ +// Copyright 2014 Google + +#ifndef btr0scrub_h +#define btr0scrub_h + +#include "univ.i" + +#include "dict0dict.h" +#include "data0data.h" +#include "page0cur.h" +#include "mtr0mtr.h" +#include "btr0types.h" + +/** + * enum describing page allocation status + */ +enum btr_scrub_page_allocation_status_t { + BTR_SCRUB_PAGE_FREE, + BTR_SCRUB_PAGE_ALLOCATED, + BTR_SCRUB_PAGE_ALLOCATION_UNKNOWN +}; + +/** +* constants returned by btr_page_needs_scrubbing & btr_scrub_recheck_page +*/ +#define BTR_SCRUB_PAGE 1 /* page should be scrubbed */ +#define BTR_SCRUB_SKIP_PAGE 2 /* no scrub & no action */ +#define BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE 3 /* no scrub & close table */ +#define BTR_SCRUB_SKIP_PAGE_AND_COMPLETE_SPACE 4 /* no scrub & complete space */ +#define BTR_SCRUB_TURNED_OFF 5 /* we detected that scrubbing + was disabled by global + variable */ + +/**************************************************************//** +struct for keeping scrub statistics. */ +struct btr_scrub_stat_t { + /* page reorganizations */ + ulint page_reorganizations; + /* page splits */ + ulint page_splits; + /* scrub failures */ + ulint page_split_failures_underflow; + ulint page_split_failures_out_of_filespace; + ulint page_split_failures_missing_index; + ulint page_split_failures_unknown; +}; + +/**************************************************************//** +struct for thread local scrub state. */ +struct btr_scrub_t { + + /* current space */ + ulint space; + + /* is scrubbing enabled for this space */ + bool scrubbing; + + /* is current space compressed */ + bool compressed; + + dict_table_t* current_table; + dict_index_t* current_index; + /* savepoint for X_LATCH of block */ + ulint savepoint; + + /* statistic counters */ + btr_scrub_stat_t scrub_stat; +}; + +/********************************************************************* +Init scrub global variables */ +UNIV_INTERN +void +btr_scrub_init(); + +/********************************************************************* +Cleanup scrub globals */ +UNIV_INTERN +void +btr_scrub_cleanup(); + +/*********************************************************************** +Return crypt statistics */ +UNIV_INTERN +void +btr_scrub_total_stat( +/*==================*/ + btr_scrub_stat_t *stat); /*!< out: stats to update */ + +/**************************************************************//** +Check if a page needs scrubbing +* @return BTR_SCRUB_PAGE if page should be scrubbed +* else btr_scrub_skip_page should be called +* with this return value (and without any latches held) +*/ +UNIV_INTERN +int +btr_page_needs_scrubbing( +/*=====================*/ + btr_scrub_t* scrub_data, /*!< in: scrub data */ + buf_block_t* block, /*!< in: block to check, latched */ + btr_scrub_page_allocation_status_t allocated); /*!< in: is block + allocated, free or + unknown */ + +/**************************************************************** +Recheck if a page needs scrubbing, and if it does load appropriate +table and index +* @return BTR_SCRUB_PAGE if page should be scrubbed +* else btr_scrub_skip_page should be called +* with this return value (and without any latches held) +*/ +UNIV_INTERN +int +btr_scrub_recheck_page( +/*====================*/ + btr_scrub_t* scrub_data, /*!< inut: scrub data */ + buf_block_t* block, /*!< in: block */ + btr_scrub_page_allocation_status_t allocated, /*!< in: is block + allocated or free */ + mtr_t* mtr); /*!< in: mtr */ + +/**************************************************************** +Perform actual scrubbing of page */ +UNIV_INTERN +int +btr_scrub_page( +/*============*/ + btr_scrub_t* scrub_data, /*!< in/out: scrub data */ + buf_block_t* block, /*!< in: block */ + btr_scrub_page_allocation_status_t allocated, /*!< in: is block + allocated or free */ + mtr_t* mtr); /*!< in: mtr */ + +/**************************************************************** +Perform cleanup needed for a page not needing scrubbing */ +UNIV_INTERN +void +btr_scrub_skip_page( +/*============*/ + btr_scrub_t* scrub_data, /*!< in/out: scrub data */ + int needs_scrubbing); /*!< in: return value from + btr_page_needs_scrubbing or + btr_scrub_recheck_page which encodes what kind + of cleanup is needed */ + +/**************************************************************** +Start iterating a space +* @return true if scrubbing is turned on */ +UNIV_INTERN +bool +btr_scrub_start_space( +/*===================*/ + ulint space, /*!< in: space */ + btr_scrub_t* scrub_data); /*!< in/out: scrub data */ + +/**************************************************************** +Complete iterating a space +* @return true if space was scrubbed */ +UNIV_INTERN +bool +btr_scrub_complete_space( +/*=====================*/ + btr_scrub_t* scrub_data); /*!< in/out: scrub data */ + +#endif diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index 5dd5102f65b..2863ab01ff9 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -375,11 +375,13 @@ Given a tablespace id and page number tries to get that page. If the page is not in the buffer pool it is not loaded and NULL is returned. Suitable for using when holding the lock_sys_t::mutex. */ UNIV_INTERN -const buf_block_t* +buf_block_t* buf_page_try_get_func( /*==================*/ ulint space_id,/*!< in: tablespace id */ ulint page_no,/*!< in: page number */ + ulint rw_latch, /*!< in: RW_S_LATCH, RW_X_LATCH */ + bool possibly_freed, /*!< in: don't mind if page is freed */ const char* file, /*!< in: file name */ ulint line, /*!< in: line where called */ mtr_t* mtr); /*!< in: mini-transaction */ @@ -391,7 +393,8 @@ not loaded. Suitable for using when holding the lock_sys_t::mutex. @param mtr in: mini-transaction @return the page if in buffer pool, NULL if not */ #define buf_page_try_get(space_id, page_no, mtr) \ - buf_page_try_get_func(space_id, page_no, __FILE__, __LINE__, mtr); + buf_page_try_get_func(space_id, page_no, RW_S_LATCH, false, \ + __FILE__, __LINE__, mtr); /********************************************************************//** Get read access to a compressed page (usually of type @@ -1434,6 +1437,53 @@ buf_flush_update_zip_checksum( #endif /* !UNIV_HOTBACKUP */ +/********************************************************************//** +The hook that is called just before a page is written to disk. +The function encrypts the content of the page and returns a pointer +to a frame that will be written instead of the real frame. */ +byte* +buf_page_encrypt_before_write( +/*==========================*/ + buf_page_t* page, /*!< in/out: buffer page to be flushed */ + const byte* frame); + +/********************************************************************** +The hook that is called after page is written to disk. +The function releases any resources needed for encryption that was allocated +in buf_page_encrypt_before_write */ +ibool +buf_page_encrypt_after_write( +/*=========================*/ + buf_page_t* page); /*!< in/out: buffer page that was flushed */ + +/********************************************************************//** +The hook that is called just before a page is read from disk. +The function allocates memory that is used to temporarily store disk content +before getting decrypted */ +byte* +buf_page_decrypt_before_read( +/*=========================*/ + buf_page_t* page, /*!< in/out: buffer page read from disk */ + ulint zip_size); /*!< in: compressed page size, or 0 */ + +/********************************************************************//** +The hook that is called just after a page is read from disk. +The function decrypt disk content into buf_page_t and releases the +temporary buffer that was allocated in buf_page_decrypt_before_read */ +ibool +buf_page_decrypt_after_read( +/*========================*/ + buf_page_t* page); /*!< in/out: buffer page read from disk */ + +/********************************************************************//** +Release memory allocated for page decryption. +Only used in scenarios where read fails, e.g due to tablespace being dropped */ +void +buf_page_decrypt_cleanup( +/*=====================*/ + buf_page_t* page); /*!< in/out: buffer page read from disk */ + + /** The common buffer control block structure for compressed and uncompressed frames */ @@ -1499,11 +1549,25 @@ struct buf_page_t{ zip.data == NULL means an active buf_pool->watch */ - ulint write_size; /* Write size is set when this + ulint write_size; /* Write size is set when this page is first time written and then if written again we check is TRIM operation needed. */ -#ifndef UNIV_HOTBACKUP + + unsigned key_version; /*!< key version for this block */ + byte* crypt_buf; /*!< for encryption the data needs to be + copied to a separate buffer before it's + encrypted&written. this as a page can be + read while it's being flushed */ + byte* crypt_buf_free; /*!< for encryption, allocated buffer + that is then alligned */ + byte* comp_buf; /*!< for compression we need + temporal buffer because page + can be read while it's being flushed */ + byte* comp_buf_free; /*!< for compression, allocated + buffer that is then alligned */ + + #ifndef UNIV_HOTBACKUP buf_page_t* hash; /*!< node used in chaining to buf_pool->page_hash or buf_pool->zip_hash */ diff --git a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic index 6e419674f98..51d77272ef9 100644 --- a/storage/innobase/include/buf0buf.ic +++ b/storage/innobase/include/buf0buf.ic @@ -1453,4 +1453,33 @@ buf_get_nth_chunk_block( *chunk_size = chunk->size; return(chunk->blocks); } + +/********************************************************************//** +Get crypt buffer. */ +UNIV_INLINE +byte* +buf_page_get_crypt_buffer( +/*=========================*/ + const buf_page_t* bpage) /*!< in: buffer pool page */ +{ + return bpage->crypt_buf; +} + +/********************************************************************//** +Get buf frame. */ +UNIV_INLINE +void * +buf_page_get_frame( +/*=========================*/ + const buf_page_t* bpage) /*!< in: buffer pool page */ +{ + if (bpage->crypt_buf) { + return buf_page_get_crypt_buffer(bpage); + } else if (bpage->zip.data) { + return bpage->zip.data; + } else { + return ((buf_block_t*) bpage)->frame; + } +} + #endif /* !UNIV_HOTBACKUP */ diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h index 99af4b78e1c..326b9e7c986 100644 --- a/storage/innobase/include/dict0dict.h +++ b/storage/innobase/include/dict0dict.h @@ -139,6 +139,17 @@ dict_table_open_on_id( ibool dict_locked, /*!< in: TRUE=data dictionary locked */ dict_table_op_t table_op) /*!< in: operation to perform */ __attribute__((warn_unused_result)); + +/**********************************************************************//** +Returns a table object based on table id. +@return table, NULL if does not exist */ +UNIV_INTERN +dict_table_t* +dict_table_open_on_index_id( +/*==================*/ + table_id_t table_id, /*!< in: table id */ + bool dict_locked) /*!< in: TRUE=data dictionary locked */ + __attribute__((warn_unused_result)); /********************************************************************//** Decrements the count of open handles to a table. */ UNIV_INTERN @@ -918,8 +929,10 @@ dict_tf_set( pages */ ulint page_compression_level, /*!< in: table page compression level */ - ulint atomic_writes) /*!< in: table atomic + ulint atomic_writes, /*!< in: table atomic writes option value*/ + bool page_encrypted,/*!< in: table uses page encryption */ + ulint page_encryption_key) /*!< in: page encryption key */ __attribute__((nonnull)); /********************************************************************//** Convert a 32 bit integer table flags to the 32 bit integer that is @@ -1446,8 +1459,12 @@ dict_index_calc_min_rec_len( Reserves the dictionary system mutex for MySQL. */ UNIV_INTERN void -dict_mutex_enter_for_mysql(void); +dict_mutex_enter_for_mysql_func(const char * file, ulint line); /*============================*/ + +#define dict_mutex_enter_for_mysql() \ + dict_mutex_enter_for_mysql_func(__FILE__, __LINE__) + /********************************************************************//** Releases the dictionary system mutex for MySQL. */ UNIV_INTERN diff --git a/storage/innobase/include/dict0dict.ic b/storage/innobase/include/dict0dict.ic index 43bd42ae025..7c51faf844e 100644 --- a/storage/innobase/include/dict0dict.ic +++ b/storage/innobase/include/dict0dict.ic @@ -543,6 +543,9 @@ dict_tf_is_valid( ulint data_dir = DICT_TF_HAS_DATA_DIR(flags); ulint atomic_writes = DICT_TF_GET_ATOMIC_WRITES(flags); + ulint page_encryption = DICT_TF_GET_PAGE_ENCRYPTION(flags); + ulint page_encryption_key = DICT_TF_GET_PAGE_ENCRYPTION_KEY(flags); + /* Make sure there are no bits that we do not know about. */ if (unused != 0) { fprintf(stderr, @@ -552,10 +555,12 @@ dict_tf_is_valid( "InnoDB: compact %ld atomic_blobs %ld\n" "InnoDB: unused %ld data_dir %ld zip_ssize %ld\n" "InnoDB: page_compression %ld page_compression_level %ld\n" - "InnoDB: atomic_writes %ld\n", + "InnoDB: atomic_writes %ld\n" + "InnoDB: page_encryption %ld page_encryption_key %ld\n", unused, compact, atomic_blobs, unused, data_dir, zip_ssize, - page_compression, page_compression_level, atomic_writes + page_compression, page_compression_level, atomic_writes, + page_encryption, page_encryption_key ); return(false); @@ -852,7 +857,9 @@ dict_tf_set( pages */ ulint page_compression_level, /*!< in: table page compression level */ - ulint atomic_writes) /*!< in: table atomic writes setup */ + ulint atomic_writes, /*!< in: table atomic writes setup */ + bool page_encrypted, /*!< in: table uses page encryption */ + ulint page_encryption_key /*!< in: page encryption key */) { atomic_writes_t awrites = (atomic_writes_t)atomic_writes; @@ -893,6 +900,11 @@ dict_tf_set( if (use_data_dir) { *flags |= (1 << DICT_TF_POS_DATA_DIR); } + + if (page_encrypted) { + *flags |= (1 << DICT_TF_POS_PAGE_ENCRYPTION) + | (page_encryption_key << DICT_TF_POS_PAGE_ENCRYPTION_KEY); + } } /********************************************************************//** @@ -915,6 +927,10 @@ dict_tf_to_fsp_flags( ulint fsp_flags; ulint page_compression = DICT_TF_GET_PAGE_COMPRESSION(table_flags); ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(table_flags); + + ulint page_encryption = DICT_TF_GET_PAGE_ENCRYPTION(table_flags); + ulint page_encryption_key = DICT_TF_GET_PAGE_ENCRYPTION_KEY(table_flags); + ulint atomic_writes = DICT_TF_GET_ATOMIC_WRITES(table_flags); DBUG_EXECUTE_IF("dict_tf_to_fsp_flags_failure", @@ -942,6 +958,14 @@ dict_tf_to_fsp_flags( if page compression is used for this table. */ fsp_flags |= FSP_FLAGS_SET_PAGE_COMPRESSION_LEVEL(fsp_flags, page_compression_level); + /* In addition, tablespace flags also contain if the page + encryption is used for this table. */ + fsp_flags |= FSP_FLAGS_SET_PAGE_ENCRYPTION(fsp_flags, page_encryption); + + /* In addition, tablespace flags also contain page encryption key if the page + encryption is used for this table. */ + fsp_flags |= FSP_FLAGS_SET_PAGE_ENCRYPTION_KEY(fsp_flags, page_encryption_key); + /* In addition, tablespace flags also contain flag if atomic writes is used for this table */ fsp_flags |= FSP_FLAGS_SET_ATOMIC_WRITES(fsp_flags, atomic_writes); @@ -983,6 +1007,9 @@ dict_sys_tables_type_to_tf( | DICT_TF_MASK_PAGE_COMPRESSION | DICT_TF_MASK_PAGE_COMPRESSION_LEVEL | DICT_TF_MASK_ATOMIC_WRITES + | DICT_TF_MASK_PAGE_ENCRYPTION + | DICT_TF_MASK_PAGE_ENCRYPTION_KEY + ); return(flags); @@ -1018,7 +1045,9 @@ dict_tf_to_sys_tables_type( | DICT_TF_MASK_DATA_DIR | DICT_TF_MASK_PAGE_COMPRESSION | DICT_TF_MASK_PAGE_COMPRESSION_LEVEL - | DICT_TF_MASK_ATOMIC_WRITES); + | DICT_TF_MASK_ATOMIC_WRITES + | DICT_TF_MASK_PAGE_ENCRYPTION + | DICT_TF_MASK_PAGE_ENCRYPTION_KEY); return(type); } diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h index 1d59bc09f6d..3e0ca662bc5 100644 --- a/storage/innobase/include/dict0mem.h +++ b/storage/innobase/include/dict0mem.h @@ -132,6 +132,12 @@ Width of the page compression flag #define DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL 4 /** +Width of the page encryption flag +*/ +#define DICT_TF_WIDTH_PAGE_ENCRYPTION 1 +#define DICT_TF_WIDTH_PAGE_ENCRYPTION_KEY 8 + +/** Width of atomic writes flag DEFAULT=0, ON = 1, OFF = 2 */ @@ -144,7 +150,9 @@ DEFAULT=0, ON = 1, OFF = 2 + DICT_TF_WIDTH_DATA_DIR \ + DICT_TF_WIDTH_PAGE_COMPRESSION \ + DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL \ - + DICT_TF_WIDTH_ATOMIC_WRITES) + + DICT_TF_WIDTH_ATOMIC_WRITES \ + + DICT_TF_WIDTH_PAGE_ENCRYPTION \ + + DICT_TF_WIDTH_PAGE_ENCRYPTION_KEY) /** A mask of all the known/used bits in table flags */ #define DICT_TF_BIT_MASK (~(~0 << DICT_TF_BITS)) @@ -167,12 +175,16 @@ DEFAULT=0, ON = 1, OFF = 2 #define DICT_TF_POS_PAGE_COMPRESSION_LEVEL (DICT_TF_POS_PAGE_COMPRESSION \ + DICT_TF_WIDTH_PAGE_COMPRESSION) /** Zero relative shift position of the ATOMIC_WRITES field */ -#define DICT_TF_POS_ATOMIC_WRITES (DICT_TF_POS_PAGE_COMPRESSION_LEVEL \ +#define DICT_TF_POS_ATOMIC_WRITES (DICT_TF_POS_PAGE_COMPRESSION_LEVEL \ + DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL) - -/** Zero relative shift position of the start of the UNUSED bits */ -#define DICT_TF_POS_UNUSED (DICT_TF_POS_ATOMIC_WRITES \ - + DICT_TF_WIDTH_ATOMIC_WRITES) +/** Zero relative shift position of the PAGE_ENCRYPTION field */ +#define DICT_TF_POS_PAGE_ENCRYPTION (DICT_TF_POS_ATOMIC_WRITES \ + + DICT_TF_WIDTH_ATOMIC_WRITES) +/** Zero relative shift position of the PAGE_ENCRYPTION_KEY field */ +#define DICT_TF_POS_PAGE_ENCRYPTION_KEY (DICT_TF_POS_PAGE_ENCRYPTION \ + + DICT_TF_WIDTH_PAGE_ENCRYPTION) +#define DICT_TF_POS_UNUSED (DICT_TF_POS_PAGE_ENCRYPTION_KEY \ + + DICT_TF_WIDTH_PAGE_ENCRYPTION_KEY) /** Bit mask of the COMPACT field */ #define DICT_TF_MASK_COMPACT \ @@ -202,6 +214,14 @@ DEFAULT=0, ON = 1, OFF = 2 #define DICT_TF_MASK_ATOMIC_WRITES \ ((~(~0 << DICT_TF_WIDTH_ATOMIC_WRITES)) \ << DICT_TF_POS_ATOMIC_WRITES) +/** Bit mask of the PAGE_ENCRYPTION field */ +#define DICT_TF_MASK_PAGE_ENCRYPTION \ + ((~(~0 << DICT_TF_WIDTH_PAGE_ENCRYPTION)) \ + << DICT_TF_POS_PAGE_ENCRYPTION) +/** Bit mask of the PAGE_ENCRYPTION_KEY field */ +#define DICT_TF_MASK_PAGE_ENCRYPTION_KEY \ + ((~(~0 << DICT_TF_WIDTH_PAGE_ENCRYPTION_KEY)) \ + << DICT_TF_POS_PAGE_ENCRYPTION_KEY) /** Return the value of the COMPACT field */ #define DICT_TF_GET_COMPACT(flags) \ @@ -231,6 +251,14 @@ DEFAULT=0, ON = 1, OFF = 2 #define DICT_TF_GET_ATOMIC_WRITES(flags) \ ((flags & DICT_TF_MASK_ATOMIC_WRITES) \ >> DICT_TF_POS_ATOMIC_WRITES) +/** Return the contents of the PAGE_ENCRYPTION field */ +#define DICT_TF_GET_PAGE_ENCRYPTION(flags) \ + ((flags & DICT_TF_MASK_PAGE_ENCRYPTION) \ + >> DICT_TF_POS_PAGE_ENCRYPTION) +/** Return the contents of the PAGE_ENCRYPTION KEY field */ +#define DICT_TF_GET_PAGE_ENCRYPTION_KEY(flags) \ + ((flags & DICT_TF_MASK_PAGE_ENCRYPTION_KEY) \ + >> DICT_TF_POS_PAGE_ENCRYPTION_KEY) /** Return the contents of the UNUSED bits */ #define DICT_TF_GET_UNUSED(flags) \ @@ -1190,20 +1218,29 @@ struct dict_table_t{ calculation; this counter is not protected by any latch, because this is only used for heuristics */ -#define BG_STAT_NONE 0 -#define BG_STAT_IN_PROGRESS (1 << 0) + +#define BG_STAT_IN_PROGRESS ((byte)(1 << 0)) /*!< BG_STAT_IN_PROGRESS is set in stats_bg_flag when the background stats code is working on this table. The DROP TABLE code waits for this to be cleared before proceeding. */ -#define BG_STAT_SHOULD_QUIT (1 << 1) +#define BG_STAT_SHOULD_QUIT ((byte)(1 << 1)) /*!< BG_STAT_SHOULD_QUIT is set in stats_bg_flag when DROP TABLE starts waiting on BG_STAT_IN_PROGRESS to be cleared, the background stats thread will detect this and will eventually quit sooner */ - byte stats_bg_flag; +#define BG_SCRUB_IN_PROGRESS ((byte)(1 << 2)) + /*!< BG_SCRUB_IN_PROGRESS is set in + stats_bg_flag when the background + scrub code is working on this table. The DROP + TABLE code waits for this to be cleared + before proceeding. */ + +#define BG_IN_PROGRESS (BG_STAT_IN_PROGRESS | BG_SCRUB_IN_PROGRESS) + + byte stats_bg_flag; /*!< see BG_STAT_* above. Writes are covered by dict_sys->mutex. Dirty reads are possible. */ diff --git a/storage/innobase/include/dict0pagecompress.ic b/storage/innobase/include/dict0pagecompress.ic index 811976434a8..a71b2b34b07 100644 --- a/storage/innobase/include/dict0pagecompress.ic +++ b/storage/innobase/include/dict0pagecompress.ic @@ -42,6 +42,8 @@ dict_tf_verify_flags( ulint page_compression = DICT_TF_GET_PAGE_COMPRESSION(table_flags); ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(table_flags); ulint atomic_writes = DICT_TF_GET_ATOMIC_WRITES(table_flags); + ulint page_encryption = DICT_TF_GET_PAGE_ENCRYPTION(table_flags); + ulint page_encryption_key = DICT_TF_GET_PAGE_ENCRYPTION_KEY(table_flags); ulint post_antelope = FSP_FLAGS_GET_POST_ANTELOPE(fsp_flags); ulint zip_ssize = FSP_FLAGS_GET_ZIP_SSIZE(fsp_flags); ulint fsp_atomic_blobs = FSP_FLAGS_HAS_ATOMIC_BLOBS(fsp_flags); @@ -50,6 +52,9 @@ dict_tf_verify_flags( ulint fsp_page_compression = FSP_FLAGS_GET_PAGE_COMPRESSION(fsp_flags); ulint fsp_page_compression_level = FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(fsp_flags); ulint fsp_atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(fsp_flags); + ulint fsp_page_encryption = FSP_FLAGS_GET_PAGE_ENCRYPTION(fsp_flags); + ulint fsp_page_encryption_key = FSP_FLAGS_GET_PAGE_ENCRYPTION_KEY(fsp_flags); + DBUG_EXECUTE_IF("dict_tf_verify_flags_failure", return(ULINT_UNDEFINED);); @@ -107,6 +112,27 @@ dict_tf_verify_flags( return (FALSE); } + if (page_encryption != fsp_page_encryption) { + fprintf(stderr, + "InnoDB: Error: table flags has page_encryption %ld" + " in the data dictionary\n" + "InnoDB: but the flags in file has page_encryption %ld\n", + page_encryption, fsp_page_encryption); + + return (FALSE); + } + + if (page_encryption_key != fsp_page_encryption_key) { + fprintf(stderr, + "InnoDB: Error: table flags has page_encryption_key %ld" + " in the data dictionary\n" + "InnoDB: but the flags in file has page_encryption_key %ld\n", + page_encryption_key, fsp_page_encryption_key); + + return (FALSE); + } + + return(TRUE); } diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index c91e1777a13..1ed8cbf3293 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -26,7 +26,7 @@ Created 10/25/1995 Heikki Tuuri #ifndef fil0fil_h #define fil0fil_h - +#define MSG_CANNOT_DECRYPT "can not decrypt" #include "univ.i" #ifndef UNIV_INNOCHECKSUM @@ -127,16 +127,20 @@ extern fil_addr_t fil_addr_null; MySQL/InnoDB 5.1.7 or later, the contents of this field is valid for all uncompressed pages. */ -#define FIL_PAGE_FILE_FLUSH_LSN 26 /*!< this is only defined for the - first page in a system tablespace - data file (ibdata*, not *.ibd): - the file has been flushed to disk - at least up to this lsn */ +#define FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION 26 /*!< for the first page + in a system tablespace data file + (ibdata*, not *.ibd): the file has + been flushed to disk at least up + to this lsn + for other pages: a 32-bit key version + used to encrypt the page + 32-bit checksum + or 64 bits of zero if no encryption + */ /** If page type is FIL_PAGE_COMPRESSED then the 8 bytes starting at FIL_PAGE_FILE_FLUSH_LSN are broken down as follows: */ /** Control information version format (u8) */ -static const ulint FIL_PAGE_VERSION = FIL_PAGE_FILE_FLUSH_LSN; +static const ulint FIL_PAGE_VERSION = FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION; /** Compression algorithm (u8) */ static const ulint FIL_PAGE_ALGORITHM_V1 = FIL_PAGE_VERSION + 1; @@ -169,7 +173,10 @@ static const ulint FIL_PAGE_COMPRESS_SIZE_V1 = FIL_PAGE_ORIGINAL_SIZE_V1 + 2; /* @} */ /** File page types (values of FIL_PAGE_TYPE) @{ */ +#define FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED 35631 /* page compressed + + encrypted page */ #define FIL_PAGE_PAGE_COMPRESSED 34354 /*!< page compressed page */ +#define FIL_PAGE_PAGE_ENCRYPTED 34355 /*!< Page encrypted page */ #define FIL_PAGE_INDEX 17855 /*!< B-tree node */ #define FIL_PAGE_UNDO_LOG 2 /*!< Undo log page */ #define FIL_PAGE_INODE 3 /*!< Index node */ @@ -207,6 +214,9 @@ extern ulint fil_n_pending_tablespace_flushes; /** Number of files currently open */ extern ulint fil_n_file_opened; +/* structure containing encryption specification */ +typedef struct fil_space_crypt_struct fil_space_crypt_t; + struct fsp_open_info { ibool success; /*!< Has the tablespace been opened? */ const char* check_msg; /*!< fil_check_first_page() message */ @@ -216,9 +226,11 @@ struct fsp_open_info { lsn_t lsn; /*!< Flushed LSN from header page */ ulint id; /*!< Space ID */ ulint flags; /*!< Tablespace flags */ + ulint encryption_error; /*!< if an encryption error occurs */ #ifdef UNIV_LOG_ARCHIVE ulint arch_log_no; /*!< latest archived log file number */ #endif /* UNIV_LOG_ARCHIVE */ + fil_space_crypt_t* crypt_data; /*!< crypt data */ }; struct fil_space_t; @@ -333,6 +345,7 @@ struct fil_space_t { unflushed_spaces */ UT_LIST_NODE_T(fil_space_t) space_list; /*!< list of all spaces */ + fil_space_crypt_t* crypt_data; ulint magic_n;/*!< FIL_SPACE_MAGIC_N */ }; @@ -468,7 +481,9 @@ fil_space_create( ulint id, /*!< in: space id */ ulint zip_size,/*!< in: compressed page size, or 0 for uncompressed tablespaces */ - ulint purpose);/*!< in: FIL_TABLESPACE, or FIL_LOG if log */ + ulint purpose, /*!< in: FIL_TABLESPACE, or FIL_LOG if log */ + fil_space_crypt_t* crypt_data); /*!< in: crypt data */ + /*******************************************************************//** Assigns a new space id for a new single-table tablespace. This works simply by incrementing the global counter. If 4 billion id's is not enough, we may need @@ -609,8 +624,10 @@ fil_read_first_page( lsn values in data files */ lsn_t* max_flushed_lsn, /*!< out: max of flushed lsn values in data files */ - ulint orig_space_id) /*!< in: file space id or + ulint orig_space_id, /*!< in: file space id or ULINT_UNDEFINED */ + fil_space_crypt_t** crypt_data) /*!< out: crypt data */ + __attribute__((warn_unused_result)); /*******************************************************************//** Increments the count of pending operation, if space is not being deleted. @@ -976,11 +993,12 @@ fil_io( appropriately aligned */ void* message, /*!< in: message for aio handler if non-sync aio used, else ignored */ - ulint* write_size) /*!< in/out: Actual write size initialized + ulint* write_size, /*!< in/out: Actual write size initialized after fist successfull trim operation for this page and if initialized we do not trim again if actual page size does not decrease. */ + lsn_t lsn) /* lsn of the newest modification */ __attribute__((nonnull(8))); /**********************************************************************//** Waits for an aio operation to complete. This function is used to write the @@ -1231,6 +1249,13 @@ fil_user_tablespace_restore_page( ulint page_no); /* in: page_no to obtain from double write buffer */ +/*******************************************************************//** +Return space flags */ +ulint +fil_space_flags( +/*===========*/ + fil_space_t* space); /*!< in: space */ + #endif /* !UNIV_INNOCHECKSUM */ /****************************************************************//** @@ -1257,6 +1282,277 @@ char* fil_space_name( /*===========*/ fil_space_t* space); /*!< in: space */ + +/****************************************************************** +Get id of first tablespace or ULINT_UNDEFINED if none */ +UNIV_INTERN +ulint +fil_get_first_space(); + +/****************************************************************** +Get id of next tablespace or ULINT_UNDEFINED if none */ +UNIV_INTERN +ulint +fil_get_next_space( + ulint id); /*!< in: space id */ + +/********************************************************************* +Init global resources needed for tablespace encryption/decryption */ +void +fil_space_crypt_init(); + +/********************************************************************* +Cleanup global resources needed for tablespace encryption/decryption */ +void +fil_space_crypt_cleanup(); + +/********************************************************************* +Create crypt data, i.e data that is used for a single tablespace */ +fil_space_crypt_t * +fil_space_create_crypt_data(); + +/********************************************************************* +Destroy crypt data */ +UNIV_INTERN +void +fil_space_destroy_crypt_data( +/*=========================*/ + fil_space_crypt_t **crypt_data); /*!< in/out: crypt data */ + +/********************************************************************* +Get crypt data for a space*/ +fil_space_crypt_t * +fil_space_get_crypt_data( +/*======================*/ + ulint space); /*!< in: tablespace id */ + +/********************************************************************* +Set crypt data for a space*/ +void +fil_space_set_crypt_data( +/*======================*/ + ulint space, /*!< in: tablespace id */ + fil_space_crypt_t* crypt_data); /*!< in: crypt data */ + +/********************************************************************* +Compare crypt data*/ +int +fil_space_crypt_compare( +/*======================*/ + const fil_space_crypt_t* crypt_data1, /*!< in: crypt data */ + const fil_space_crypt_t* crypt_data2); /*!< in: crypt data */ + +/********************************************************************* +Read crypt data from buffer page */ +fil_space_crypt_t * +fil_space_read_crypt_data( +/*======================*/ + ulint space, /*!< in: tablespace id */ + const byte* page, /*!< in: buffer page */ + ulint offset); /*!< in: offset where crypt data is stored */ + +/********************************************************************* +Write crypt data to buffer page */ +void +fil_space_write_crypt_data( +/*=======================*/ + ulint space, /*!< in: tablespace id */ + byte* page, /*!< in: buffer page */ + ulint offset, /*!< in: offset where to store data */ + ulint maxsize, /*!< in: max space available to store crypt data in */ + mtr_t * mtr); /*!< in: mini-transaction */ + +/********************************************************************* +Clear crypt data from page 0 (used for import tablespace) */ +void +fil_space_clear_crypt_data( +/*======================*/ + byte* page, /*!< in: buffer page */ + ulint offset); /*!< in: offset where crypt data is stored */ + +/********************************************************************* +Parse crypt data log record */ +byte* +fil_parse_write_crypt_data( +/*=======================*/ + byte* ptr, /*!< in: start of log record */ + byte* end_ptr, /*!< in: end of log record */ + buf_block_t*); /*!< in: buffer page to apply record to */ + +/********************************************************************* +Check if extra buffer shall be allocated for decrypting after read */ +UNIV_INTERN +bool +fil_space_check_encryption_read( +/*==============================*/ + ulint space); /*!< in: tablespace id */ + +/********************************************************************* +Check if page shall be encrypted before write */ +UNIV_INTERN +bool +fil_space_check_encryption_write( +/*==============================*/ + ulint space); /*!< in: tablespace id */ + +/********************************************************************* +Encrypt buffer page */ +void +fil_space_encrypt( +/*===============*/ + ulint space, /*!< in: tablespace id */ + ulint offset, /*!< in: page no */ + lsn_t lsn, /*!< in: page lsn */ + const byte* src_frame,/*!< in: page frame */ + ulint size, /*!< in: size of data to encrypt */ + byte* dst_frame, /*!< in: where to encrypt to */ + ulint page_encryption_key); /*!< in: page encryption key id if page + encrypted */ + +/********************************************************************* +Decrypt buffer page */ +void +fil_space_decrypt( +/*===============*/ + ulint space, /*!< in: tablespace id */ + const byte* src_frame,/*!< in: page frame */ + ulint page_size, /*!< in: size of data to encrypt */ + byte* dst_frame); /*!< in: where to decrypt to */ + + +/********************************************************************* +Decrypt buffer page +@return true if page was encrypted */ +bool +fil_space_decrypt( +/*===============*/ + fil_space_crypt_t* crypt_data, /*!< in: crypt data */ + const byte* src_frame,/*!< in: page frame */ + ulint page_size, /*!< in: page size */ + byte* dst_frame); /*!< in: where to decrypt to */ + +/********************************************************************* +fil_space_verify_crypt_checksum +NOTE: currently this function can only be run in single threaded mode +as it modifies srv_checksum_algorithm (temporarily) +@return true if page is encrypted AND OK, false otherwise */ +bool +fil_space_verify_crypt_checksum( +/*===============*/ + const byte* src_frame,/*!< in: page frame */ + ulint zip_size); /*!< in: size of data to encrypt */ + +/********************************************************************* +Init threads for key rotation */ +void +fil_crypt_threads_init(); + +/********************************************************************* +Set thread count (e.g start or stops threads) used for key rotation */ +void +fil_crypt_set_thread_cnt( +/*=====================*/ + uint new_cnt); /*!< in: requested #threads */ + +/********************************************************************* +End threads for key rotation */ +void +fil_crypt_threads_end(); + +/********************************************************************* +Cleanup resources for threads for key rotation */ +void +fil_crypt_threads_cleanup(); + +/********************************************************************* +Set rotate key age */ +void +fil_crypt_set_rotate_key_age( +/*=====================*/ + uint rotate_age); /*!< in: requested rotate age */ + +/********************************************************************* +Set rotation threads iops */ +void +fil_crypt_set_rotation_iops( +/*=====================*/ + uint iops); /*!< in: requested iops */ + +/********************************************************************* +Mark a space as closing */ +UNIV_INTERN +void +fil_space_crypt_mark_space_closing( +/*===============*/ + ulint space); /*!< in: tablespace id */ + +/********************************************************************* +Wait for crypt threads to stop accessing space */ +UNIV_INTERN +void +fil_space_crypt_close_tablespace( +/*===============*/ + ulint space); /*!< in: tablespace id */ + +/** Struct for retreiving info about encryption */ +struct fil_space_crypt_status_t { + ulint space; /*!< tablespace id */ + ulint scheme; /*!< encryption scheme */ + uint min_key_version; /*!< min key version */ + uint current_key_version;/*!< current key version */ + uint keyserver_requests;/*!< no of key requests to key server */ + bool rotating; /*!< is key rotation ongoing */ + bool flushing; /*!< is flush at end of rotation ongoing */ + ulint rotate_next_page_number; /*!< next page if key rotating */ + ulint rotate_max_page_number; /*!< max page if key rotating */ +}; + +/********************************************************************* +Get crypt status for a space +@return 0 if crypt data found */ +int +fil_space_crypt_get_status( +/*==================*/ + ulint id, /*!< in: space id */ + struct fil_space_crypt_status_t * status); /*!< out: status */ + +/** Struct for retreiving statistics about encryption key rotation */ +struct fil_crypt_stat_t { + ulint pages_read_from_cache; + ulint pages_read_from_disk; + ulint pages_modified; + ulint pages_flushed; + ulint estimated_iops; +}; + +/********************************************************************* +Get crypt rotation statistics */ +void +fil_crypt_total_stat( +/*==================*/ + fil_crypt_stat_t* stat); /*!< out: crypt stat */ + +/** Struct for retreiving info about scrubbing */ +struct fil_space_scrub_status_t { + ulint space; /*!< tablespace id */ + bool compressed; /*!< is space compressed */ + time_t last_scrub_completed; /*!< when was last scrub completed */ + bool scrubbing; /*!< is scrubbing ongoing */ + time_t current_scrub_started; /*!< when started current scrubbing */ + ulint current_scrub_active_threads; /*!< current scrub active threads */ + ulint current_scrub_page_number; /*!< current scrub page no */ + ulint current_scrub_max_page_number; /*!< current scrub max page no */ +}; + +/********************************************************************* +Get scrub status for a space +@return 0 if no scrub info found */ +int +fil_space_get_scrub_status( +/*==================*/ + ulint id, /*!< in: space id */ + struct fil_space_scrub_status_t * status); /*!< out: status */ + #endif /*******************************************************************//** diff --git a/storage/innobase/include/fil0pageencryption.h b/storage/innobase/include/fil0pageencryption.h new file mode 100644 index 00000000000..9769f8c1912 --- /dev/null +++ b/storage/innobase/include/fil0pageencryption.h @@ -0,0 +1,76 @@ +/***************************************************************************** + +Copyright (C) 2014 eperi GmbH. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*****************************************************************************/ + +#ifndef fil0pageencryption_h +#define fil0pageencryption_h + +#define PAGE_ENCRYPTION_WRONG_KEY 1 +#define PAGE_ENCRYPTION_WRONG_PAGE_TYPE 2 +#define PAGE_ENCRYPTION_ERROR 3 +#define PAGE_ENCRYPTION_KEY_MISSING 4 +#define PAGE_ENCRYPTION_OK 0 +#define PAGE_ENCRYPTION_WILL_NOT_ENCRYPT 5 + +#include "fsp0fsp.h" +#include "fsp0pageencryption.h" + +/******************************************************************//** +@file include/fil0pageencryption.h +Helper functions for encryption/decryption page data on to table space. + +Created 08/25/2014 +***********************************************************************/ + + +/******************************PAGE_ENCRYPTION_ERROR*************************************//** +Returns the page encryption flag of the space, or false if the space +is not encrypted. The tablespace must be cached in the memory cache. +@return true if page encrypted, false if not or space not found */ +ibool +fil_space_is_page_encrypted( +/*=========================*/ + ulint id); /*!< in: space id */ + +/*******************************************************************//** +Find out whether the page is page encrypted +@return true if page is page encrypted, false if not */ +UNIV_INLINE +ibool +fil_page_is_encrypted( +/*==================*/ + const byte *buf); /*!< in: page */ + +/*******************************************************************//** +Find out whether the page is page compressed and then encrypted +@return true if page is page compressed+encrypted, false if not */ +UNIV_INLINE +ibool +fil_page_is_compressed_encrypted( +/*=============================*/ + const byte *buf); /*!< in: page */ + +/*******************************************************************//** +Find out whether the page can be decrypted +@return true if page can be decrypted, false if not. */ +UNIV_INLINE +ulint +fil_page_encryption_status( +/*===================*/ + const byte *buf); /*!< in: page */ + +#endif // fil0pageencryption_h diff --git a/storage/innobase/include/fsp0fsp.h b/storage/innobase/include/fsp0fsp.h index 87f1f5a636d..96e638fcdd9 100644 --- a/storage/innobase/include/fsp0fsp.h +++ b/storage/innobase/include/fsp0fsp.h @@ -57,6 +57,11 @@ is found in a remote location, not the default data directory. */ /** Number of flag bits used to indicate the page compression and compression level */ #define FSP_FLAGS_WIDTH_PAGE_COMPRESSION 1 #define FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL 4 + +/** Number of flag bits used to indicate the page compression and compression level */ +#define FSP_FLAGS_WIDTH_PAGE_ENCRYPTION 1 +#define FSP_FLAGS_WIDTH_PAGE_ENCRYPTION_KEY 8 + /** Number of flag bits used to indicate atomic writes for this tablespace */ #define FSP_FLAGS_WIDTH_ATOMIC_WRITES 2 @@ -68,7 +73,9 @@ is found in a remote location, not the default data directory. */ + FSP_FLAGS_WIDTH_DATA_DIR \ + FSP_FLAGS_WIDTH_PAGE_COMPRESSION \ + FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL \ - + FSP_FLAGS_WIDTH_ATOMIC_WRITES) + + FSP_FLAGS_WIDTH_ATOMIC_WRITES \ + + FSP_FLAGS_WIDTH_PAGE_ENCRYPTION \ + + FSP_FLAGS_WIDTH_PAGE_ENCRYPTION_KEY) /** A mask of all the known/used bits in tablespace flags */ #define FSP_FLAGS_MASK (~(~0 << FSP_FLAGS_WIDTH)) @@ -92,9 +99,15 @@ dictionary */ /** Zero relative shift position of the ATOMIC_WRITES field */ #define FSP_FLAGS_POS_ATOMIC_WRITES (FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL \ + FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL) - /** Zero relative shift position of the PAGE_SSIZE field */ -#define FSP_FLAGS_POS_PAGE_SSIZE (FSP_FLAGS_POS_ATOMIC_WRITES \ - + FSP_FLAGS_WIDTH_ATOMIC_WRITES) +/** Zero relative shift position of the PAGE_ENCRYPTION field */ +#define FSP_FLAGS_POS_PAGE_ENCRYPTION (FSP_FLAGS_POS_ATOMIC_WRITES \ + + FSP_FLAGS_WIDTH_ATOMIC_WRITES) +/** Zero relative shift position of the PAGE_ENCRYPTION_KEY field */ +#define FSP_FLAGS_POS_PAGE_ENCRYPTION_KEY (FSP_FLAGS_POS_PAGE_ENCRYPTION \ + + FSP_FLAGS_WIDTH_PAGE_ENCRYPTION) +/** Zero relative shift position of the PAGE_SSIZE field */ +#define FSP_FLAGS_POS_PAGE_SSIZE (FSP_FLAGS_POS_PAGE_ENCRYPTION_KEY \ + + FSP_FLAGS_WIDTH_PAGE_ENCRYPTION_KEY) /** Zero relative shift position of the start of the UNUSED bits */ #define FSP_FLAGS_POS_DATA_DIR (FSP_FLAGS_POS_PAGE_SSIZE \ + FSP_FLAGS_WIDTH_PAGE_SSIZE) @@ -130,11 +143,18 @@ dictionary */ #define FSP_FLAGS_MASK_PAGE_COMPRESSION_LEVEL \ ((~(~0 << FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL)) \ << FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL) +/** Bit mask of the PAGE_ENCRYPTION field */ +#define FSP_FLAGS_MASK_PAGE_ENCRYPTION \ + ((~(~0 << FSP_FLAGS_WIDTH_PAGE_ENCRYPTION)) \ + << FSP_FLAGS_POS_PAGE_ENCRYPTION) +/** Bit mask of the PAGE_ENCRYPTION_KEY field */ +#define FSP_FLAGS_MASK_PAGE_ENCRYPTION_KEY \ + ((~(~0 << FSP_FLAGS_WIDTH_PAGE_ENCRYPTION_KEY)) \ + << FSP_FLAGS_POS_PAGE_ENCRYPTION_KEY) /** Bit mask of the ATOMIC_WRITES field */ #define FSP_FLAGS_MASK_ATOMIC_WRITES \ ((~(~0 << FSP_FLAGS_WIDTH_ATOMIC_WRITES)) \ << FSP_FLAGS_POS_ATOMIC_WRITES) - /** Return the value of the POST_ANTELOPE field */ #define FSP_FLAGS_GET_POST_ANTELOPE(flags) \ ((flags & FSP_FLAGS_MASK_POST_ANTELOPE) \ @@ -171,6 +191,14 @@ dictionary */ #define FSP_FLAGS_GET_ATOMIC_WRITES(flags) \ ((flags & FSP_FLAGS_MASK_ATOMIC_WRITES) \ >> FSP_FLAGS_POS_ATOMIC_WRITES) +/** Return the value of the PAGE_ENCRYPTION field */ +#define FSP_FLAGS_GET_PAGE_ENCRYPTION(flags) \ + ((flags & FSP_FLAGS_MASK_PAGE_ENCRYPTION) \ + >> FSP_FLAGS_POS_PAGE_ENCRYPTION) +/** Return the value of the PAGE_ENCRYPTION_KEY field */ +#define FSP_FLAGS_GET_PAGE_ENCRYPTION_KEY(flags) \ + ((flags & FSP_FLAGS_MASK_PAGE_ENCRYPTION_KEY) \ + >> FSP_FLAGS_POS_PAGE_ENCRYPTION_KEY) /** Set a PAGE_SSIZE into the correct bits in a given tablespace flags. */ @@ -186,6 +214,14 @@ tablespace flags. */ tablespace flags. */ #define FSP_FLAGS_SET_PAGE_COMPRESSION_LEVEL(flags, level) \ (flags | (level << FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL)) + +/** Set a PAGE_ENCRYPTION into the correct bits in a given tablespace flags. */ +#define FSP_FLAGS_SET_PAGE_ENCRYPTION(flags, encryption) \ + (flags | (encryption << FSP_FLAGS_POS_PAGE_ENCRYPTION)) +/** Set a PAGE_ENCRYPTION_KEY into the correct bits in a given tablespace flags. */ +#define FSP_FLAGS_SET_PAGE_ENCRYPTION_KEY(flags, encryption_key) \ + (flags | (encryption_key << FSP_FLAGS_POS_PAGE_ENCRYPTION_KEY)) + /** Set a ATOMIC_WRITES into the correct bits in a given tablespace flags. */ #define FSP_FLAGS_SET_ATOMIC_WRITES(flags, atomics) \ @@ -800,6 +836,33 @@ fsp_flags_get_page_size( /*====================*/ ulint flags); /*!< in: tablespace flags */ +/*********************************************************************/ +/* @return offset into fsp header where crypt data is stored */ +UNIV_INTERN +ulint +fsp_header_get_crypt_offset( +/*========================*/ + ulint zip_size, /*!< in: zip_size */ + ulint* max_size); /*!< out: free space after offset */ + +#define fsp_page_is_free(space,page,mtr) \ + fsp_page_is_free_func(space,page,mtr, __FILE__, __LINE__) + +#ifndef UNIV_INNOCHECKSUM +/**********************************************************************//** +Checks if a single page is free. +@return true if free */ +UNIV_INTERN +bool +fsp_page_is_free_func( +/*==============*/ + ulint space, /*!< in: space id */ + ulint page, /*!< in: page offset */ + mtr_t* mtr, /*!< in/out: mini-transaction */ + const char *file, + ulint line); +#endif + #ifndef UNIV_NONINL #include "fsp0fsp.ic" #endif diff --git a/storage/innobase/include/fsp0pagecompress.ic b/storage/innobase/include/fsp0pagecompress.ic index 3e59106b05d..4d4ee1c376a 100644 --- a/storage/innobase/include/fsp0pagecompress.ic +++ b/storage/innobase/include/fsp0pagecompress.ic @@ -193,5 +193,5 @@ fil_page_is_lzo_compressed( byte *buf) /*!< in: page */ { return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED && - mach_read_from_8(buf+FIL_PAGE_FILE_FLUSH_LSN) == PAGE_LZO_ALGORITHM); + mach_read_from_8(buf+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION) == PAGE_LZO_ALGORITHM); } diff --git a/storage/innobase/include/fsp0pageencryption.h b/storage/innobase/include/fsp0pageencryption.h new file mode 100644 index 00000000000..52365c8e93c --- /dev/null +++ b/storage/innobase/include/fsp0pageencryption.h @@ -0,0 +1,66 @@ +/***************************************************************************** + + Copyright (C) 2014 eperi GmbH. All Rights Reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +/******************************************************************/ + +/******************************************************************//** +@file include/fsp0pageencryption.h +Helper functions for extracting/storing page encryption information to file space. + +Created 08/28/2014 +***********************************************************************/ + +#ifndef FSP0PAGEENCRYPTION_H_ +#define FSP0PAGEENCRYPTION_H_ + + +#define FIL_PAGE_ENCRYPTION_AES_128 16 /*!< Encryption algorithm AES-128. */ +#define FIL_PAGE_ENCRYPTION_AES_196 24 /*!< Encryption algorithm AES-196. */ +#define FIL_PAGE_ENCRYPTION_AES_256 32 /*!< Encryption algorithm AES-256. */ + +#define FIL_PAGE_ENCRYPTED_SIZE 2 /*!< Number of bytes used to store + actual payload data size on encrypted pages. */ + +/********************************************************************//** +Determine if the tablespace is page encrypted from dict_table_t::flags. +@return TRUE if page encrypted, FALSE if not page encrypted */ +UNIV_INLINE +ibool +fsp_flags_is_page_encrypted( +/*=========================*/ + ulint flags); /*!< in: tablespace flags */ + + +/********************************************************************//** +Extract the page encryption key from tablespace flags. +A tablespace has only one physical page encryption key +whether that page is encrypted or not. +@return page encryption key of the file-per-table tablespace, +or zero if the table is not encrypted. */ +UNIV_INLINE +ulint +fsp_flags_get_page_encryption_key( +/*=================================*/ + ulint flags); /*!< in: tablespace flags */ + + +#ifndef UNIV_NONINL +#include "fsp0pageencryption.ic" +#endif + + +#endif /* FSP0PAGEENCRYPTION_H_ */ diff --git a/storage/innobase/include/fsp0pageencryption.ic b/storage/innobase/include/fsp0pageencryption.ic new file mode 100644 index 00000000000..42c980b0430 --- /dev/null +++ b/storage/innobase/include/fsp0pageencryption.ic @@ -0,0 +1,168 @@ +/***************************************************************************** + + Copyright (C) 2014 eperi GmbH. All Rights Reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +/******************************************************************//** +@file include/fsp0pageencryption.ic +Implementation for helper functions for encrypting/decrypting pages +and atomic writes information to file space. + +Created 08/28/2014 +***********************************************************************/ + +#include "fsp0fsp.h" +#include "fil0pageencryption.h" +#include <my_crypt_key_management.h> + + +/********************************************************************//** +Determine if the tablespace is page encrypted from dict_table_t::flags. +@return TRUE if page encrypted, FALSE if not page encrypted */ +UNIV_INLINE +ibool +fsp_flags_is_page_encrypted( +/*=========================*/ + ulint flags) /*!< in: tablespace flags */ +{ + return(FSP_FLAGS_GET_PAGE_ENCRYPTION(flags)); +} + +/********************************************************************//** +Extract the page encryption key from tablespace flags. +A tablespace has only one physical page encryption key +whether that page is encrypted or not. +@return page encryption key of the file-per-table tablespace, +or zero if the table is not encrypted. */ +UNIV_INLINE +ulint +fsp_flags_get_page_encryption_key( +/*=================================*/ + ulint flags) /*!< in: tablespace flags */ +{ + return(FSP_FLAGS_GET_PAGE_ENCRYPTION_KEY(flags)); +} + + +/*******************************************************************//** +Returns the page encryption flag of the space, or false if the space +is not encrypted. The tablespace must be cached in the memory cache. +@return true if page encrypted, false if not or space not found */ +UNIV_INLINE +ibool +fil_space_is_page_encrypted( +/*=========================*/ + ulint id) /*!< in: space id */ +{ + ulint flags; + + flags = fil_space_get_flags(id); + + if (flags && flags != ULINT_UNDEFINED) { + + return(fsp_flags_is_page_encrypted(flags)); + } + + return(flags); +} + +/*******************************************************************//** +Returns the page encryption key of the space, or 0 if the space +is not encrypted. The tablespace must be cached in the memory cache. +@return page compression level, ULINT_UNDEFINED if space not found */ +UNIV_INLINE +ulint +fil_space_get_page_encryption_key( +/*=================================*/ + ulint id) /*!< in: space id */ +{ + ulint flags; + + flags = fil_space_get_flags(id); + + if (flags && flags != ULINT_UNDEFINED) { + + return(fsp_flags_get_page_encryption_key(flags)); + } + + return(flags); +} + +/*******************************************************************//** +Find out whether the page is page is encrypted +@return true if page is page encrypted, false if not */ +UNIV_INLINE +ibool +fil_page_is_encrypted( +/*==================*/ + const byte *buf) /*!< in: page */ +{ + return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_ENCRYPTED); +} + +/*******************************************************************//** +Find out whether the page is page is first compressed and then encrypted +@return true if page is page compressed+encrypted, false if not */ +UNIV_INLINE +ibool +fil_page_is_compressed_encrypted( +/*=============================*/ + const byte *buf) /*!< in: page */ +{ + return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED); +} + +/*******************************************************************//** +Find out whether the page can be decrypted. +This is the case, if the page is already decrypted and is not the first page of the table space. +If the page is already decrypted it is not of the FIL_PAGE_PAGE_ENCRYPTED type. +if it is the first page of the table space, it is assumed that a page can be decrypted if the +key found in the flags (part of the 1st page) can be read from the key provider. +The case, if the key changed, is currently not caught. +The function for decrypting the page should already be executed before this. +@return PAGE_ENCRYPTION_KEY_MISSING if key provider is available, but key is not available + PAGE_ENCRYPTION_ERROR if other error occurred + 0 if decryption should be possible +*/ +UNIV_INLINE +ulint +fil_page_encryption_status( +/*=====================*/ + const byte *buf) /*!< in: page */ +{ + ulint page_type = mach_read_from_2(buf+FIL_PAGE_TYPE); + + if (page_type == FIL_PAGE_TYPE_FSP_HDR) { + ulint flags = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + buf); + if (fsp_flags_is_page_encrypted(flags)) { + if (!HasCryptoKey(fsp_flags_get_page_encryption_key(flags))) { + /* accessing table would surely fail, because no key or no key provider available */ + if (!HasCryptoKey(fsp_flags_get_page_encryption_key(flags))) { + return PAGE_ENCRYPTION_KEY_MISSING; + } + return PAGE_ENCRYPTION_ERROR; + } + } + } + + if(page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) { + ulint key = mach_read_from_4(buf + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); + if (!HasCryptoKey(key)) { + return PAGE_ENCRYPTION_KEY_MISSING; + } + return PAGE_ENCRYPTION_ERROR; + } + return 0; +} diff --git a/storage/innobase/include/fts0fts.h b/storage/innobase/include/fts0fts.h index a2996ecacc8..f00d754ac66 100644 --- a/storage/innobase/include/fts0fts.h +++ b/storage/innobase/include/fts0fts.h @@ -715,6 +715,34 @@ fts_drop_index_tables( dict_index_t* index) /*!< in: Index to drop */ __attribute__((nonnull, warn_unused_result)); +/****************************************************************** +Wait for background threads to stop using FTS index +*/ +UNIV_INTERN +void +fts_wait_bg_to_stop_using_index( +/*======================*/ + trx_t* trx, /*!< in: transaction */ + dict_index_t* index, /*!< in: FTS Index */ + bool drop_table); /*!< in: in addition to stop + using index, also prevent + threads from start using it, + used by drop table */ + +/****************************************************************** +Wait for background threads to stop using any FTS index of the table +*/ +UNIV_INTERN +void +fts_wait_bg_to_stop_using_table( +/*======================*/ + trx_t* trx, /*!< in: transaction */ + dict_table_t* table, /*!< in: table to stop threads */ + bool drop_table); /*!< in: in addition to stop + using table, also prevent + threads from start using it, + used by drop table */ + /******************************************************************//** Remove the table from the OPTIMIZER's list. We do wait for acknowledgement from the consumer of the message. */ diff --git a/storage/innobase/include/log0crypt.h b/storage/innobase/include/log0crypt.h new file mode 100644 index 00000000000..188e82397a2 --- /dev/null +++ b/storage/innobase/include/log0crypt.h @@ -0,0 +1,85 @@ +/**************************************************//** +@file include/log0crypt.h +Innodb log encrypt/decrypt + +Created 11/25/2013 Minli Zhu +*******************************************************/ +#ifndef log0crypt_h +#define log0crypt_h + +#include "univ.i" +#include "ut0byte.h" +#include "ut0lst.h" +#include "ut0rnd.h" +#include "my_aes.h" +#include "my_crypt_key_management.h" // for key version and key + +#define PURPOSE_BYTE_LEN MY_AES_BLOCK_SIZE - 1 +#define PURPOSE_BYTE_OFFSET 0 +#define UNENCRYPTED_KEY_VER 0 + +/* If true, enable redo log encryption. */ +extern my_bool srv_encrypt_log; +/* Plain text used by AES_ECB to generate redo log crypt key. */ +extern byte redo_log_crypt_msg[MY_AES_BLOCK_SIZE]; +/* IV to concatenate with counter used by AES_CTR for redo log crypto. */ +extern byte aes_ctr_nonce[MY_AES_BLOCK_SIZE]; + +/*********************************************************************//** +Generate a 128-bit random message used to generate redo log crypto key. +Init AES-CTR iv/nonce with random number. +It is called only when clean startup (i.e., redo logs do not exist). */ +UNIV_INTERN +void +log_init_crypt_msg_and_nonce(void); +/*===============================*/ +/*********************************************************************//** +Init log_sys redo log crypto key. */ +UNIV_INTERN +void +log_init_crypt_key( +/*===============*/ + const byte* crypt_msg, /*< in: crypt msg */ + const uint crypt_ver, /*< in: mysqld key version */ + byte* crypt_key); /*< out: crypt struct with key and iv */ +/*********************************************************************//** +Encrypt log blocks. */ +UNIV_INTERN +Crypt_result +log_blocks_encrypt( +/*===============*/ + const byte* blocks, /*!< in: blocks before encryption */ + const ulint size, /*!< in: size of blocks, must be multiple of a log block */ + byte* dst_blocks); /*!< out: blocks after encryption */ + +/*********************************************************************//** +Decrypt log blocks. */ +UNIV_INTERN +Crypt_result +log_blocks_decrypt( +/*===============*/ + const byte* blocks, /*!< in: blocks before decryption */ + const ulint size, /*!< in: size of blocks, must be multiple of a log block */ + byte* dst_blocks); /*!< out: blocks after decryption */ + +/*********************************************************************//** +Set next checkpoint's key version to latest one, and generate current +key. Key version 0 means no encryption. */ +UNIV_INTERN +void +log_crypt_set_ver_and_key( +/*======================*/ + uint& key_ver, /*!< out: latest key version */ + byte* crypt_key); /*!< out: crypto key */ + +/*********************************************************************//** +Writes the crypto (version, msg and iv) info, which has been used for +log blocks with lsn <= this checkpoint's lsn, to a log header's +checkpoint buf. */ +UNIV_INTERN +void +log_crypt_write_checkpoint_buf( +/*===========================*/ + byte* buf); /*!< in/out: checkpoint buffer */ + +#endif // log0crypt.h diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h index ad9710b1870..79667097724 100644 --- a/storage/innobase/include/log0log.h +++ b/storage/innobase/include/log0log.h @@ -40,9 +40,8 @@ Created 12/9/1995 Heikki Tuuri #include "sync0sync.h" #include "sync0rw.h" #endif /* !UNIV_HOTBACKUP */ +#include "log0crypt.h" -/* Type used for all log sequence number storage and arithmetics */ -typedef ib_uint64_t lsn_t; #define LSN_MAX IB_UINT64_MAX #define LSN_PF UINT64PF @@ -677,8 +676,20 @@ extern log_t* log_sys; is valid */ #endif #define LOG_CHECKPOINT_OFFSET_HIGH32 (16 + LOG_CHECKPOINT_ARRAY_END) -#define LOG_CHECKPOINT_SIZE (20 + LOG_CHECKPOINT_ARRAY_END) - +#define LOG_CRYPT_VER (20 + LOG_CHECKPOINT_ARRAY_END) + /*!< 32-bit key version. Corresponding + key has been used for log records with + lsn <= the checkpoint' lsn */ +#define LOG_CRYPT_MSG (24 + LOG_CHECKPOINT_ARRAY_END) + /*!< a 128-bit value used to + derive cryto key for redo log. + It is generated via the concatenation + of 1 purpose byte T (0x02) and a + 15-byte random number.*/ +#define LOG_CRYPT_IV (40 + LOG_CHECKPOINT_ARRAY_END) + /*!< a 128-bit random number used as + AES-CTR iv/nonce for redo log */ +#define LOG_CHECKPOINT_SIZE (56 + LOG_CHECKPOINT_ARRAY_END) /* Offsets of a log file header */ #define LOG_GROUP_ID 0 /* log group number */ @@ -783,6 +794,10 @@ struct log_t{ lsn_t lsn; /*!< log sequence number */ ulint buf_free; /*!< first free offset within the log buffer */ + uint redo_log_crypt_ver; + /*!< 32-bit crypto ver */ + byte redo_log_crypt_key[MY_AES_BLOCK_SIZE]; + /*!< crypto key to encrypt redo log */ #ifndef UNIV_HOTBACKUP ib_mutex_t mutex; /*!< mutex protecting the log */ @@ -1006,6 +1021,22 @@ struct log_t{ /* @} */ #endif /* UNIV_LOG_ARCHIVE */ +extern os_event_t log_scrub_event; +/* log scrubbing interval in ms */ +extern ulonglong innodb_scrub_log_interval; + +/*****************************************************************//** +This is the main thread for log scrub. It waits for an event and +when waked up fills current log block with dummy records and +sleeps again. +@return this function does not return, it calls os_thread_exit() */ +extern "C" UNIV_INTERN +os_thread_ret_t +DECLARE_THREAD(log_scrub_thread)( +/*===============================*/ + void* arg); /*!< in: a dummy parameter + required by os_thread_create */ + #ifndef UNIV_NONINL #include "log0log.ic" #endif diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h index 8ede49d4ecc..f8785faafdf 100644 --- a/storage/innobase/include/log0recv.h +++ b/storage/innobase/include/log0recv.h @@ -434,6 +434,11 @@ struct recv_sys_t{ scan find a corrupt log block, or a corrupt log record, or there is a log parsing buffer overflow */ + uint recv_log_crypt_ver; + /*!< mysqld key version to generate redo + log crypt key for recovery */ + byte recv_log_crypt_key[MY_AES_BLOCK_SIZE]; + /*!< crypto key to decrypt redo log for recovery */ #ifdef UNIV_LOG_ARCHIVE log_group_t* archive_group; /*!< in archive recovery: the log group whose diff --git a/storage/innobase/include/mtr0log.ic b/storage/innobase/include/mtr0log.ic index 3ed4876eeab..6457e02d455 100644 --- a/storage/innobase/include/mtr0log.ic +++ b/storage/innobase/include/mtr0log.ic @@ -191,7 +191,7 @@ mlog_write_initial_log_record_fast( ulint offset; ut_ad(mtr_memo_contains_page(mtr, ptr, MTR_MEMO_PAGE_X_FIX)); - ut_ad(type <= MLOG_BIGGEST_TYPE); + ut_ad(type <= MLOG_BIGGEST_TYPE || EXTRA_CHECK_MLOG_NUMBER(type)); ut_ad(ptr && log_ptr); page = (const byte*) ut_align_down(ptr, UNIV_PAGE_SIZE); diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h index b91dbd0353c..eae981f2fbb 100644 --- a/storage/innobase/include/mtr0mtr.h +++ b/storage/innobase/include/mtr0mtr.h @@ -189,6 +189,14 @@ For 1 - 8 bytes, the flag value must give the length also! @{ */ page */ #define MLOG_BIGGEST_TYPE ((byte)53) /*!< biggest value (used in assertions) */ + +#define MLOG_FILE_WRITE_CRYPT_DATA ((byte)100) /*!< log record for + writing/updating crypt data of + a tablespace */ + +#define EXTRA_CHECK_MLOG_NUMBER(x) \ + ((x) == MLOG_FILE_WRITE_CRYPT_DATA) + /* @} */ /** @name Flags for MLOG_FILE operations @@ -251,6 +259,18 @@ mtr_release_s_latch_at_savepoint( #else /* !UNIV_HOTBACKUP */ # define mtr_release_s_latch_at_savepoint(mtr,savepoint,lock) ((void) 0) #endif /* !UNIV_HOTBACKUP */ + +/**********************************************************//** +Releases a buf_page stored in an mtr memo after a +savepoint. */ +UNIV_INTERN +void +mtr_release_buf_page_at_savepoint( +/*=============================*/ + mtr_t* mtr, /*!< in: mtr */ + ulint savepoint, /*!< in: savepoint */ + buf_block_t* block); /*!< in: block to release */ + /***************************************************************//** Gets the logging mode of a mini-transaction. @return logging mode: MTR_LOG_NONE, ... */ diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h index 8f8aef4f45c..e2d0cf26682 100644 --- a/storage/innobase/include/os0file.h +++ b/storage/innobase/include/os0file.h @@ -313,10 +313,14 @@ The wrapper functions have the prefix of "innodb_". */ # define os_aio(type, mode, name, file, buf, offset, \ n, message1, message2, write_size, \ - page_compression, page_compression_level) \ + page_compression, page_compression_level, \ + page_encryption, page_encryption_key, lsn) \ pfs_os_aio_func(type, mode, name, file, buf, offset, \ n, message1, message2, write_size, \ - page_compression, page_compression_level, __FILE__, __LINE__) + page_compression, page_compression_level, \ + page_encryption, page_encryption_key, \ + lsn, __FILE__, __LINE__) + # define os_file_read(file, buf, offset, n, compressed) \ pfs_os_file_read_func(file, buf, offset, n, compressed, __FILE__, __LINE__) @@ -357,9 +361,13 @@ to original un-instrumented file I/O APIs */ # define os_file_close(file) os_file_close_func(file) -# define os_aio(type, mode, name, file, buf, offset, n, message1, message2, write_size, page_compression, page_compression_level) \ +# define os_aio(type, mode, name, file, buf, offset, n, message1, \ + message2, write_size, page_compression, page_compression_level, \ + page_encryption, page_encryption_key, lsn) \ os_aio_func(type, mode, name, file, buf, offset, n, \ - message1, message2, write_size, page_compression, page_compression_level) + message1, message2, write_size, \ + page_compression, page_compression_level, \ + page_encryption, page_encryption_key, lsn) # define os_file_read(file, buf, offset, n, compressed) \ os_file_read_func(file, buf, offset, n, compressed) @@ -777,6 +785,11 @@ pfs_os_aio_func( on this file space */ ulint page_compression_level, /*!< page compression level to be used */ + ibool page_encryption, /*!< in: is page encryption used + on this file space */ + ulint page_encryption_key, /*!< page encryption + key to be used */ + lsn_t lsn, /* lsn of the newest modification */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line);/*!< in: line where the func invoked */ /*******************************************************************//** @@ -1153,9 +1166,13 @@ os_aio_func( actual page size does not decrease. */ ibool page_compression, /*!< in: is page compression used on this file space */ - ulint page_compression_level); /*!< page compression + ulint page_compression_level, /*!< page compression level to be used */ - + ibool page_encryption, /*!< in: is page encryption used + on this file space */ + ulint page_encryption_key, /*!< page encryption key + to be used */ + lsn_t lsn); /* lsn of the newest modification */ /************************************************************************//** Wakes up all async i/o threads so that they know to exit themselves in shutdown. */ diff --git a/storage/innobase/include/os0file.ic b/storage/innobase/include/os0file.ic index 8e1cea585e6..bbd3826d50b 100644 --- a/storage/innobase/include/os0file.ic +++ b/storage/innobase/include/os0file.ic @@ -224,6 +224,11 @@ pfs_os_aio_func( on this file space */ ulint page_compression_level, /*!< page compression level to be used */ + ibool page_encryption, /*!< in: is page encryption used + on this file space */ + ulint page_encryption_key, /*!< page encryption + key to be used */ + lsn_t lsn, /* lsn of the newest modification */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line)/*!< in: line where the func invoked */ { @@ -240,7 +245,8 @@ pfs_os_aio_func( result = os_aio_func(type, mode, name, file, buf, offset, n, message1, message2, write_size, - page_compression, page_compression_level); + page_compression, page_compression_level, + page_encryption, page_encryption_key, lsn); register_pfs_file_io_end(locker, n); diff --git a/storage/innobase/include/page0page.h b/storage/innobase/include/page0page.h index cb6633bb941..2b47aef8790 100644 --- a/storage/innobase/include/page0page.h +++ b/storage/innobase/include/page0page.h @@ -162,6 +162,8 @@ directory. */ #define PAGE_DIR_SLOT_MAX_N_OWNED 8 #define PAGE_DIR_SLOT_MIN_N_OWNED 4 +extern my_bool srv_immediate_scrub_data_uncompressed; + /************************************************************//** Gets the start of a page. @return start of the page */ diff --git a/storage/innobase/include/page0page.ic b/storage/innobase/include/page0page.ic index 99e17001c0a..cde3cad33f0 100644 --- a/storage/innobase/include/page0page.ic +++ b/storage/innobase/include/page0page.ic @@ -1169,6 +1169,13 @@ page_mem_free( ut_ad(rec_offs_validate(rec, index, offsets)); free = page_header_get_ptr(page, PAGE_FREE); + bool scrub = srv_immediate_scrub_data_uncompressed; + if (scrub) { + /* scrub record */ + uint size = rec_offs_data_size(offsets); + memset(rec, 0, size); + } + page_rec_set_next(rec, free); page_header_set_ptr(page, page_zip, PAGE_FREE, rec); diff --git a/storage/innobase/include/srv0mon.h b/storage/innobase/include/srv0mon.h index 0a47d514e1b..d1d902ef57d 100644 --- a/storage/innobase/include/srv0mon.h +++ b/storage/innobase/include/srv0mon.h @@ -327,6 +327,11 @@ enum monitor_id_t { MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED, MONITOR_OVLD_PAGES_PAGE_COMPRESSION_ERROR, + /* New monitor variables for page encryption */ + MONITOR_OVLD_PAGES_PAGE_ENCRYPTED, + MONITOR_OVLD_PAGES_PAGE_DECRYPTED, + MONITOR_OVLD_PAGES_PAGE_ENCRYPTION_ERROR, + /* Index related counters */ MONITOR_MODULE_INDEX, MONITOR_INDEX_SPLIT, diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index 83c478582b2..be0112959c7 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -133,6 +133,12 @@ struct srv_stats_t { ulint_ctr_64_t pages_page_decompressed; /* Number of page compression errors */ ulint_ctr_64_t pages_page_compression_error; + /* Number of pages encrypted with page encryption */ + ulint_ctr_64_t pages_page_encrypted; + /* Number of pages decrypted with page encryption */ + ulint_ctr_64_t pages_page_decrypted; + /* Number of page encryption errors */ + ulint_ctr_64_t pages_page_encryption_error; /** Number of data read in total (in bytes) */ ulint_ctr_1_t data_read; @@ -471,6 +477,11 @@ extern ibool srv_buf_dump_thread_active; /* TRUE during the lifetime of the stats thread */ extern ibool srv_dict_stats_thread_active; +/* TRUE if enable log scrubbing */ +extern my_bool srv_scrub_log; +/* TRUE during the lifetime of the log scrub thread */ +extern ibool srv_log_scrub_thread_active; + extern ulong srv_n_spin_wait_rounds; extern ulong srv_n_free_tickets_to_enter; extern ulong srv_thread_sleep_delay; @@ -534,6 +545,9 @@ extern my_bool srv_print_all_deadlocks; extern my_bool srv_cmp_per_index_enabled; +/* is encryption enabled */ +extern my_bool srv_encrypt_tables; + /** Status variables to be passed to MySQL */ extern struct export_var_t export_vars; @@ -1000,9 +1014,29 @@ struct export_var_t{ compression */ ib_int64_t innodb_pages_page_compression_error;/*!< Number of page compression errors */ + ib_int64_t innodb_pages_page_encrypted;/*!< Number of pages + encrypted by page encryption */ + ib_int64_t innodb_pages_page_decrypted;/*!< Number of pages + decrypted by page encryption */ + ib_int64_t innodb_pages_page_encryption_error;/*!< Number of page + encryption errors */ ulint innodb_sec_rec_cluster_reads; /*!< srv_sec_rec_cluster_reads */ - ulint innodb_sec_rec_cluster_reads_avoided; /*!< srv_sec_rec_cluster_reads_avoided */ + ulint innodb_sec_rec_cluster_reads_avoided; + /*!< srv_sec_rec_cluster_reads_avoided */ + + ulint innodb_encryption_rotation_pages_read_from_cache; + ulint innodb_encryption_rotation_pages_read_from_disk; + ulint innodb_encryption_rotation_pages_modified; + ulint innodb_encryption_rotation_pages_flushed; + ulint innodb_encryption_rotation_estimated_iops; + + ulint innodb_scrub_page_reorganizations; + ulint innodb_scrub_page_splits; + ulint innodb_scrub_page_split_failures_underflow; + ulint innodb_scrub_page_split_failures_out_of_filespace; + ulint innodb_scrub_page_split_failures_missing_index; + ulint innodb_scrub_page_split_failures_unknown; }; /** Thread slot in the thread table. */ diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i index 86f03c7917f..b2f486d059e 100644 --- a/storage/innobase/include/univ.i +++ b/storage/innobase/include/univ.i @@ -478,6 +478,9 @@ typedef uint32_t ib_uint32_t; # define IB_ID_FMT UINT64PF +/* Type used for all log sequence number storage and arithmetics */ +typedef ib_uint64_t lsn_t; + #ifdef _WIN64 typedef unsigned __int64 ulint; typedef __int64 lint; diff --git a/storage/innobase/log/log0crypt.cc b/storage/innobase/log/log0crypt.cc new file mode 100644 index 00000000000..0647fd04e84 --- /dev/null +++ b/storage/innobase/log/log0crypt.cc @@ -0,0 +1,267 @@ +/**************************************************//** +@file log0crypt.cc +Innodb log encrypt/decrypt + +Created 11/25/2013 Minli Zhu +*******************************************************/ +#include "m_string.h" +#include "log0crypt.h" +#include <my_crypt.h> + +#include "log0log.h" +#include "srv0start.h" // for srv_start_lsn +#include "log0recv.h" // for recv_sys + +/* If true, enable redo log encryption. */ +UNIV_INTERN my_bool srv_encrypt_log = FALSE; +/* + Sub system type for InnoDB redo log crypto. + Set and used to validate crypto msg. +*/ +static const byte redo_log_purpose_byte = 0x02; +/* Plain text used by AES_ECB to generate redo log crypt key. */ +byte redo_log_crypt_msg[MY_AES_BLOCK_SIZE] = {0}; +/* IV to concatenate with counter used by AES_CTR for redo log + * encryption/decryption. */ +byte aes_ctr_nonce[MY_AES_BLOCK_SIZE] = {0}; + +/*********************************************************************//** +Generate a 128-bit value used to generate crypt key for redo log. +It is generated via the concatenation of 1 purpose byte (0x02) and 15-byte +random number. +Init AES-CTR iv/nonce with random number. +It is called when: +- redo logs do not exist when start up, or +- transition from without crypto. +Note: +We should not use flags and conditions such as: + (srv_encrypt_log && + debug_use_static_keys && + GetLatestCryptoKeyVersion() == UNENCRYPTED_KEY_VER) +because they haven't been read and set yet in the situation of resetting +redo logs. +*/ +UNIV_INTERN +void +log_init_crypt_msg_and_nonce(void) +/*==============================*/ +{ + mach_write_to_1(redo_log_crypt_msg, redo_log_purpose_byte); + if (my_random_bytes(redo_log_crypt_msg + 1, PURPOSE_BYTE_LEN) != AES_OK) + { + fprintf(stderr, + "\nInnodb redo log crypto: generate " + "%u-byte random number as crypto msg failed.\n", + PURPOSE_BYTE_LEN); + abort(); + } + + if (my_random_bytes(aes_ctr_nonce, MY_AES_BLOCK_SIZE) != AES_OK) + { + fprintf(stderr, + "\nInnodb redo log crypto: generate " + "%u-byte random number as AES_CTR nonce failed.\n", + MY_AES_BLOCK_SIZE); + abort(); + } +} + +/*********************************************************************//** +Generate crypt key from crypt msg. */ +UNIV_INTERN +void +log_init_crypt_key( +/*===============*/ + const byte* crypt_msg, /*< in: crypt msg */ + const uint crypt_ver, /*< in: key version */ + byte* key) /*< out: crypt key*/ +{ + if (crypt_ver == UNENCRYPTED_KEY_VER) + { + fprintf(stderr, "\nInnodb redo log crypto: unencrypted key ver.\n\n"); + memset(key, 0, MY_AES_BLOCK_SIZE); + return; + } + + if (crypt_msg[PURPOSE_BYTE_OFFSET] != redo_log_purpose_byte) + { + fprintf(stderr, + "\nInnodb redo log crypto: msg type mismatched. " + "Expected: %x; Actual: %x\n", + redo_log_purpose_byte, crypt_msg[PURPOSE_BYTE_OFFSET]); + abort(); + } + + byte mysqld_key[MY_AES_BLOCK_SIZE] = {0}; + if (GetCryptoKey(crypt_ver, mysqld_key, MY_AES_BLOCK_SIZE)) + { + fprintf(stderr, + "\nInnodb redo log crypto: getting mysqld crypto key " + "from key version failed.\n"); + abort(); + } + + uint32 dst_len; + my_aes_encrypt_dynamic_type func= get_aes_encrypt_func(MY_AES_ALGORITHM_ECB); + int rc= (*func)(crypt_msg, MY_AES_BLOCK_SIZE, //src, srclen + key, &dst_len, //dst, &dstlen + (unsigned char*)&mysqld_key, sizeof(mysqld_key), + NULL, 0, + 1); + + if (rc != AES_OK || dst_len != MY_AES_BLOCK_SIZE) + { + fprintf(stderr, + "\nInnodb redo log crypto: getting redo log crypto key " + "failed.\n"); + abort(); + } +} + +/*********************************************************************//** +Get a log block's start lsn. +@return a log block's start lsn */ +static inline +lsn_t +log_block_get_start_lsn( +/*====================*/ + lsn_t lsn, /*!< in: checkpoint lsn */ + ulint log_block_no) /*!< in: log block number */ +{ + lsn_t start_lsn = + (lsn & (lsn_t)0xffffffff00000000ULL) | + (((log_block_no - 1) & (lsn_t)0x3fffffff) << 9); + return start_lsn; +} + +/*********************************************************************//** +Call AES CTR to encrypt/decrypt log blocks. */ +static +Crypt_result +log_blocks_crypt( +/*=============*/ + const byte* block, /*!< in: blocks before encrypt/decrypt*/ + const ulint size, /*!< in: size of block, must be multiple of a log block*/ + byte* dst_block, /*!< out: blocks after encrypt/decrypt */ + const bool is_encrypt) /*!< in: encrypt or decrypt*/ +{ + byte *log_block = (byte*)block; + Crypt_result rc = AES_OK; + uint32 src_len, dst_len; + byte aes_ctr_counter[MY_AES_BLOCK_SIZE]; + ulint log_block_no, log_block_start_lsn; + byte *key; + ulint lsn; + if (is_encrypt) + { + ut_a(log_sys && log_sys->redo_log_crypt_ver != UNENCRYPTED_KEY_VER); + key = (byte *)(log_sys->redo_log_crypt_key); + lsn = log_sys->lsn; + + } else { + ut_a(recv_sys && recv_sys->recv_log_crypt_ver != UNENCRYPTED_KEY_VER); + key = (byte *)(recv_sys->recv_log_crypt_key); + lsn = srv_start_lsn; + } + ut_a(size % OS_FILE_LOG_BLOCK_SIZE == 0); + src_len = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE; + for (ulint i = 0; i < size ; i += OS_FILE_LOG_BLOCK_SIZE) + { + log_block_no = log_block_get_hdr_no(log_block); + log_block_start_lsn = log_block_get_start_lsn(lsn, log_block_no); + + // Assume log block header is not encrypted + memcpy(dst_block, log_block, LOG_BLOCK_HDR_SIZE); + + // aes_ctr_counter = nonce(3-byte) + start lsn to a log block + // (8-byte) + lbn (4-byte) + abn + // (1-byte, only 5 bits are used). "+" means concatenate. + bzero(aes_ctr_counter, MY_AES_BLOCK_SIZE); + memcpy(aes_ctr_counter, &aes_ctr_nonce, 3); + mach_write_to_8(aes_ctr_counter + 3, log_block_start_lsn); + mach_write_to_4(aes_ctr_counter + 11, log_block_no); + bzero(aes_ctr_counter + 15, 1); + + int rc = (* my_aes_encrypt_dynamic)(log_block + LOG_BLOCK_HDR_SIZE, src_len, + dst_block + LOG_BLOCK_HDR_SIZE, &dst_len, + (unsigned char*)key, 16, + aes_ctr_counter, MY_AES_BLOCK_SIZE, + 1); + + ut_a(rc == AES_OK); + ut_a(dst_len == src_len); + log_block += OS_FILE_LOG_BLOCK_SIZE; + dst_block += OS_FILE_LOG_BLOCK_SIZE; + } + + return rc; +} + +/*********************************************************************//** +Encrypt log blocks. */ +UNIV_INTERN +Crypt_result +log_blocks_encrypt( +/*===============*/ + const byte* block, /*!< in: blocks before encryption */ + const ulint size, /*!< in: size of blocks, must be multiple of a log block */ + byte* dst_block) /*!< out: blocks after encryption */ +{ + return log_blocks_crypt(block, size, dst_block, true); +} + +/*********************************************************************//** +Decrypt log blocks. */ +UNIV_INTERN +Crypt_result +log_blocks_decrypt( +/*===============*/ + const byte* block, /*!< in: blocks before decryption */ + const ulint size, /*!< in: size of blocks, must be multiple of a log block */ + byte* dst_block) /*!< out: blocks after decryption */ +{ + return log_blocks_crypt(block, size, dst_block, false); +} + +/*********************************************************************//** +Set next checkpoint's key version to latest one, and generate current +key. Key version 0 means no encryption. */ +UNIV_INTERN +void +log_crypt_set_ver_and_key( +/*======================*/ + uint& key_ver, /*!< out: latest key version */ + byte* crypt_key) /*!< out: crypto key */ +{ + if (!srv_encrypt_log || + (key_ver = GetLatestCryptoKeyVersion()) == UNENCRYPTED_KEY_VER) + { + key_ver = UNENCRYPTED_KEY_VER; + memset(crypt_key, 0, MY_AES_BLOCK_SIZE); + return; + } + log_init_crypt_key(redo_log_crypt_msg, key_ver, crypt_key); +} + +/*********************************************************************//** +Writes the crypto (version, msg and iv) info, which has been used for +log blocks with lsn <= this checkpoint's lsn, to a log header's +checkpoint buf. */ +UNIV_INTERN +void +log_crypt_write_checkpoint_buf( +/*===========================*/ + byte* buf) /*!< in/out: checkpoint buffer */ +{ + ut_a(log_sys); + mach_write_to_4(buf + LOG_CRYPT_VER, log_sys->redo_log_crypt_ver); + if (!srv_encrypt_log || + log_sys->redo_log_crypt_ver == UNENCRYPTED_KEY_VER) { + memset(buf + LOG_CRYPT_MSG, 0, MY_AES_BLOCK_SIZE); + memset(buf + LOG_CRYPT_IV, 0, MY_AES_BLOCK_SIZE); + return; + } + ut_a(redo_log_crypt_msg[PURPOSE_BYTE_OFFSET] == redo_log_purpose_byte); + memcpy(buf + LOG_CRYPT_MSG, redo_log_crypt_msg, MY_AES_BLOCK_SIZE); + memcpy(buf + LOG_CRYPT_IV, aes_ctr_nonce, MY_AES_BLOCK_SIZE); +} diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index d0f17a43cf3..ba05987dfbe 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -81,6 +81,10 @@ reduce the size of the log. /* Global log system variable */ UNIV_INTERN log_t* log_sys = NULL; +/* Next log block number to do dummy record filling if no log records written +for a while */ +static ulint next_lbn_to_pad = 0; + #ifdef UNIV_PFS_RWLOCK UNIV_INTERN mysql_pfs_key_t checkpoint_lock_key; # ifdef UNIV_LOG_ARCHIVE @@ -532,10 +536,9 @@ function_exit: return(lsn); } -#ifdef UNIV_LOG_ARCHIVE /******************************************************//** Pads the current log block full with dummy log records. Used in producing -consistent archived log files. */ +consistent archived log files and scrubbing redo log. */ static void log_pad_current_log_block(void) @@ -564,7 +567,6 @@ log_pad_current_log_block(void) ut_a(lsn % OS_FILE_LOG_BLOCK_SIZE == LOG_BLOCK_HDR_SIZE); } -#endif /* UNIV_LOG_ARCHIVE */ /******************************************************//** Calculates the data capacity of a log group, when the log file headers are not @@ -900,6 +902,7 @@ log_init(void) /*----------------------------*/ log_sys->next_checkpoint_no = 0; + log_sys->redo_log_crypt_ver = UNENCRYPTED_KEY_VER; log_sys->last_checkpoint_lsn = log_sys->lsn; log_sys->n_pending_checkpoint_writes = 0; @@ -945,7 +948,7 @@ log_init(void) log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE); log_sys->buf_free = LOG_BLOCK_HDR_SIZE; - log_sys->lsn = LOG_START_LSN + LOG_BLOCK_HDR_SIZE; + log_sys->lsn = LOG_START_LSN + LOG_BLOCK_HDR_SIZE; // TODO(minliz): ensure various LOG_START_LSN? MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE, log_sys->lsn - log_sys->last_checkpoint_lsn); @@ -1273,7 +1276,7 @@ log_group_file_header_flush( (ulint) (dest_offset / UNIV_PAGE_SIZE), (ulint) (dest_offset % UNIV_PAGE_SIZE), OS_FILE_LOG_BLOCK_SIZE, - buf, group, 0); + buf, group, 0, 0); srv_stats.os_log_pending_writes.dec(); } @@ -1293,6 +1296,36 @@ log_block_store_checksum( } /******************************************************//** +Encrypt one or more log block before it is flushed to disk +@return true if encryption succeeds. */ +static +bool +log_group_encrypt_before_write( +/*===========================*/ + const log_group_t* group, /*!< in: log group to be flushed */ + byte* block, /*!< in/out: pointer to a log block */ + const ulint size) /*!< in: size of log blocks */ + +{ + Crypt_result result = AES_OK; + + ut_ad(size % OS_FILE_LOG_BLOCK_SIZE == 0); + byte* dst_frame = (byte*)malloc(size); + + //encrypt log blocks content + result = log_blocks_encrypt(block, size, dst_frame); + + if (result == AES_OK) + { + ut_ad(block[0] == dst_frame[0]); + memcpy(block, dst_frame, size); + } + free(dst_frame); + + return (result == AES_OK); +} + +/******************************************************//** Writes a buffer to a log file group. */ UNIV_INTERN void @@ -1398,10 +1431,19 @@ loop: ut_a(next_offset / UNIV_PAGE_SIZE <= ULINT_MAX); + if (srv_encrypt_log && + log_sys->redo_log_crypt_ver != UNENCRYPTED_KEY_VER && + !log_group_encrypt_before_write(group, buf, write_len)) + { + fprintf(stderr, + "\nInnodb redo log encryption failed.\n"); + abort(); + } + fil_io(OS_FILE_WRITE | OS_FILE_LOG, true, group->space_id, 0, (ulint) (next_offset / UNIV_PAGE_SIZE), (ulint) (next_offset % UNIV_PAGE_SIZE), write_len, buf, - group, 0); + group, 0, 0); srv_stats.os_log_pending_writes.dec(); @@ -1884,6 +1926,8 @@ log_group_checkpoint( mach_write_to_8(buf + LOG_CHECKPOINT_NO, log_sys->next_checkpoint_no); mach_write_to_8(buf + LOG_CHECKPOINT_LSN, log_sys->next_checkpoint_lsn); + log_crypt_write_checkpoint_buf(buf); + lsn_offset = log_group_calc_lsn_offset(log_sys->next_checkpoint_lsn, group); mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET_LOW32, @@ -1967,7 +2011,7 @@ log_group_checkpoint( write_offset / UNIV_PAGE_SIZE, write_offset % UNIV_PAGE_SIZE, OS_FILE_LOG_BLOCK_SIZE, - buf, ((byte*) group + 1), 0); + buf, ((byte*) group + 1), 0, 0); ut_ad(((ulint) group & 0x1UL) == 0); } @@ -2008,6 +2052,8 @@ log_reset_first_header_and_checkpoint( mach_write_to_8(buf + LOG_CHECKPOINT_NO, 0); mach_write_to_8(buf + LOG_CHECKPOINT_LSN, lsn); + log_crypt_write_checkpoint_buf(buf); + mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET_LOW32, LOG_FILE_HDR_SIZE + LOG_BLOCK_HDR_SIZE); mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET_HIGH32, 0); @@ -2047,7 +2093,7 @@ log_group_read_checkpoint_info( fil_io(OS_FILE_READ | OS_FILE_LOG, true, group->space_id, 0, field / UNIV_PAGE_SIZE, field % UNIV_PAGE_SIZE, - OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL, 0); + OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL, 0, 0); } /******************************************************//** @@ -2146,7 +2192,6 @@ log_checkpoint( } log_sys->next_checkpoint_lsn = oldest_lsn; - #ifdef UNIV_DEBUG if (log_debug_writes) { fprintf(stderr, "Making checkpoint no " @@ -2158,6 +2203,10 @@ log_checkpoint( log_groups_write_checkpoint_info(); + /* generate key version and key used to encrypt next log block */ + log_crypt_set_ver_and_key(log_sys->redo_log_crypt_ver, + log_sys->redo_log_crypt_key); + MONITOR_INC(MONITOR_NUM_CHECKPOINT); mutex_exit(&(log_sys->mutex)); @@ -2291,6 +2340,33 @@ loop: } /******************************************************//** +Decrypt a specified log segment after they are read from a log file to a buffer. +@return true if decryption succeeds. */ +static +bool +log_group_decrypt_after_read( +/*==========================*/ + const log_group_t* group, /*!< in: log group to be read from */ + byte* frame, /*!< in/out: log segment */ + const ulint size) /*!< in: log segment size */ +{ + Crypt_result result; + ut_ad(size % OS_FILE_LOG_BLOCK_SIZE == 0); + byte* dst_frame = (byte*)malloc(size); + + // decrypt log blocks content + result = log_blocks_decrypt(frame, size, dst_frame); + + if (result == AES_OK) + { + memcpy(frame, dst_frame, size); + } + free(dst_frame); + + return (result == AES_OK); +} + +/******************************************************//** Reads a specified log segment to a buffer. */ UNIV_INTERN void @@ -2341,7 +2417,14 @@ loop: fil_io(OS_FILE_READ | OS_FILE_LOG, sync, group->space_id, 0, (ulint) (source_offset / UNIV_PAGE_SIZE), (ulint) (source_offset % UNIV_PAGE_SIZE), - len, buf, NULL, 0); + len, buf, NULL, 0, 0); + + if (recv_sys->recv_log_crypt_ver != UNENCRYPTED_KEY_VER && + !log_group_decrypt_after_read(group, buf, len)) + { + fprintf(stderr, "Innodb redo log decryption failed.\n"); + abort(); + } start_lsn += len; buf += len; @@ -2566,6 +2649,14 @@ loop: MONITOR_INC(MONITOR_LOG_IO); + if (srv_encrypt_log && + log_sys->redo_log_crypt_ver != UNENCRYPTED_KEY_VER && + !log_group_encrypt_before_write(group, buf, len)) + { + fprintf(stderr, "Innodb redo log encryption failed.\n"); + abort(); + } + fil_io(OS_FILE_WRITE | OS_FILE_LOG, false, group->archive_space_id, (ulint) (next_offset / UNIV_PAGE_SIZE), (ulint) (next_offset % UNIV_PAGE_SIZE), @@ -3738,4 +3829,62 @@ log_mem_free(void) log_sys = NULL; } } + +/** Event to wake up the log scrub thread */ +UNIV_INTERN os_event_t log_scrub_event = NULL; + +UNIV_INTERN ibool srv_log_scrub_thread_active = FALSE; + +/*****************************************************************//* +If no log record has been written for a while, fill current log +block with dummy records. */ +static +void +log_scrub() +/*=========*/ +{ + ulint cur_lbn = log_block_convert_lsn_to_no(log_sys->lsn); + if (next_lbn_to_pad == cur_lbn) + { + log_pad_current_log_block(); + } + next_lbn_to_pad = log_block_convert_lsn_to_no(log_sys->lsn); +} + +/* log scrubbing interval in ms. */ +UNIV_INTERN ulonglong innodb_scrub_log_interval; + +/*****************************************************************//** +This is the main thread for log scrub. It waits for an event and +when waked up fills current log block with dummy records and +sleeps again. +@return this function does not return, it calls os_thread_exit() */ +extern "C" UNIV_INTERN +os_thread_ret_t +DECLARE_THREAD(log_scrub_thread)( +/*===============================*/ + void* arg __attribute__((unused))) /*!< in: a dummy parameter + required by os_thread_create */ +{ + ut_ad(!srv_read_only_mode); + + srv_log_scrub_thread_active = TRUE; + + while(srv_shutdown_state == SRV_SHUTDOWN_NONE) + { + os_event_wait_time(log_scrub_event, innodb_scrub_log_interval * 1000); + + log_scrub(); + + os_event_reset(log_scrub_event); + } + + srv_log_scrub_thread_active = FALSE; + + /* We count the number of threads in os_thread_exit(). A created + thread should always use that to exit and not use return() to exit. */ + os_thread_exit(NULL); + + OS_THREAD_DUMMY_RETURN; +} #endif /* !UNIV_HOTBACKUP */ diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 48a204ff327..fbed6137cd7 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -694,8 +694,9 @@ recv_synchronize_groups( recovered_lsn */ log_group_set_fields(group, recovered_lsn); - } + ut_a(log_sys); + } /* Copy the checkpoint info to the groups; remember that we have incremented checkpoint_no by one, and the info will not be written over the max checkpoint info, thus making the preservation of max @@ -1144,7 +1145,9 @@ recv_parse_or_apply_log_rec_body( + 0 /*FLST_PREV*/ || offs == PAGE_BTR_IBUF_FREE_LIST_NODE + PAGE_HEADER + FIL_ADDR_PAGE - + FIL_ADDR_SIZE /*FLST_NEXT*/); + + FIL_ADDR_SIZE /*FLST_NEXT*/ + || offs == + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); break; } } @@ -1371,6 +1374,9 @@ recv_parse_or_apply_log_rec_body( ptr, end_ptr, page, page_zip, index); } break; + case MLOG_FILE_WRITE_CRYPT_DATA: + ptr = fil_parse_write_crypt_data(ptr, end_ptr, block); + break; default: ptr = NULL; recv_sys->found_corrupt_log = TRUE; @@ -3021,6 +3027,7 @@ recv_recovery_from_checkpoint_start_func( ulint max_cp_field; lsn_t checkpoint_lsn; ib_uint64_t checkpoint_no; + uint recv_crypt_ver; lsn_t group_scanned_lsn = 0; lsn_t contiguous_lsn; #ifdef UNIV_LOG_ARCHIVE @@ -3080,13 +3087,21 @@ recv_recovery_from_checkpoint_start_func( #ifdef UNIV_LOG_ARCHIVE archived_lsn = mach_read_from_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN); #endif /* UNIV_LOG_ARCHIVE */ + recv_crypt_ver = mach_read_from_4(buf + LOG_CRYPT_VER); + if (recv_crypt_ver == UNENCRYPTED_KEY_VER) + { + log_init_crypt_msg_and_nonce(); + } else { + ut_memcpy(redo_log_crypt_msg, buf + LOG_CRYPT_MSG, MY_AES_BLOCK_SIZE); + ut_memcpy(aes_ctr_nonce, buf + LOG_CRYPT_IV, MY_AES_BLOCK_SIZE); + } /* Read the first log file header to print a note if this is a recovery from a restored InnoDB Hot Backup */ fil_io(OS_FILE_READ | OS_FILE_LOG, true, max_cp_group->space_id, 0, 0, 0, LOG_FILE_HDR_SIZE, - log_hdr_buf, max_cp_group, 0); + log_hdr_buf, max_cp_group, 0, 0); if (0 == ut_memcmp(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP, (byte*)"ibbackup", (sizeof "ibbackup") - 1)) { @@ -3117,7 +3132,7 @@ recv_recovery_from_checkpoint_start_func( fil_io(OS_FILE_WRITE | OS_FILE_LOG, true, max_cp_group->space_id, 0, 0, 0, OS_FILE_LOG_BLOCK_SIZE, - log_hdr_buf, max_cp_group, 0); + log_hdr_buf, max_cp_group, 0, 0); } #ifdef UNIV_LOG_ARCHIVE @@ -3141,7 +3156,10 @@ recv_recovery_from_checkpoint_start_func( recv_sys->scanned_lsn = checkpoint_lsn; recv_sys->scanned_checkpoint_no = 0; recv_sys->recovered_lsn = checkpoint_lsn; - + recv_sys->recv_log_crypt_ver = recv_crypt_ver; + log_init_crypt_key(redo_log_crypt_msg, + recv_sys->recv_log_crypt_ver, + recv_sys->recv_log_crypt_key); srv_start_lsn = checkpoint_lsn; } @@ -3224,7 +3242,6 @@ recv_recovery_from_checkpoint_start_func( group = UT_LIST_GET_NEXT(log_groups, group); } - /* Done with startup scan. Clear the flag. */ recv_log_scan_is_startup_type = FALSE; if (TYPE_CHECKPOINT) { @@ -3312,6 +3329,8 @@ recv_recovery_from_checkpoint_start_func( log_sys->next_checkpoint_lsn = checkpoint_lsn; log_sys->next_checkpoint_no = checkpoint_no + 1; + log_crypt_set_ver_and_key(log_sys->redo_log_crypt_ver, + log_sys->redo_log_crypt_key); #ifdef UNIV_LOG_ARCHIVE log_sys->archived_lsn = archived_lsn; @@ -3342,6 +3361,8 @@ recv_recovery_from_checkpoint_start_func( log_sys->lsn - log_sys->last_checkpoint_lsn); log_sys->next_checkpoint_no = checkpoint_no + 1; + log_crypt_set_ver_and_key(log_sys->redo_log_crypt_ver, + log_sys->redo_log_crypt_key); #ifdef UNIV_LOG_ARCHIVE if (archived_lsn == LSN_MAX) { @@ -3543,6 +3564,16 @@ recv_reset_logs( log_sys->next_checkpoint_no = 0; log_sys->last_checkpoint_lsn = 0; + /* redo_log_crypt_ver will be set by log_checkpoint() to the + latest key version. */ + log_sys->redo_log_crypt_ver = UNENCRYPTED_KEY_VER; + /* + Note: flags (srv_encrypt_log and debug_use_static_keys) + haven't been read and set yet! + So don't use condition such as: + if (srv_encrypt_log && debug_use_static_keys) + */ + log_init_crypt_msg_and_nonce(); #ifdef UNIV_LOG_ARCHIVE log_sys->archived_lsn = log_sys->lsn; @@ -4019,4 +4050,3 @@ byte* recv_dblwr_t::find_page(ulint space_id, ulint page_no) return(result); } - diff --git a/storage/innobase/mtr/mtr0log.cc b/storage/innobase/mtr/mtr0log.cc index 5335cb4c9ef..82df1df63d4 100644 --- a/storage/innobase/mtr/mtr0log.cc +++ b/storage/innobase/mtr/mtr0log.cc @@ -75,7 +75,7 @@ mlog_write_initial_log_record( { byte* log_ptr; - ut_ad(type <= MLOG_BIGGEST_TYPE); + ut_ad(type <= MLOG_BIGGEST_TYPE || EXTRA_CHECK_MLOG_NUMBER(type)); ut_ad(type > MLOG_8BYTES); log_ptr = mlog_open(mtr, 11); @@ -111,7 +111,7 @@ mlog_parse_initial_log_record( } *type = (byte)((ulint)*ptr & ~MLOG_SINGLE_REC_FLAG); - ut_ad(*type <= MLOG_BIGGEST_TYPE); + ut_ad(*type <= MLOG_BIGGEST_TYPE || EXTRA_CHECK_MLOG_NUMBER(*type)); ptr++; @@ -150,8 +150,6 @@ mlog_parse_nbytes( ib_uint64_t dval; ut_a(type <= MLOG_8BYTES); - ut_a(!page || !page_zip || fil_page_get_type(page) != FIL_PAGE_INDEX); - if (end_ptr < ptr + 2) { return(NULL); @@ -160,6 +158,11 @@ mlog_parse_nbytes( offset = mach_read_from_2(ptr); ptr += 2; + ut_a(!page || !page_zip || fil_page_get_type(page) != FIL_PAGE_INDEX || + /* scrubbing changes page type from FIL_PAGE_INDEX to + * FIL_PAGE_TYPE_ALLOCATED (rest of this assertion is below) */ + (type == MLOG_2BYTES && offset == FIL_PAGE_TYPE)); + if (offset >= UNIV_PAGE_SIZE) { recv_sys->found_corrupt_log = TRUE; @@ -219,6 +222,14 @@ mlog_parse_nbytes( } mach_write_to_2(page + offset, val); } + ut_a(!page || !page_zip || + fil_page_get_type(page) != FIL_PAGE_INDEX || + /* scrubbing changes page type from FIL_PAGE_INDEX to + * FIL_PAGE_TYPE_ALLOCATED */ + (type == MLOG_2BYTES && + offset == FIL_PAGE_TYPE && + val == FIL_PAGE_TYPE_ALLOCATED)); + break; case MLOG_4BYTES: if (page) { diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc index 869586bcd90..400aa9bff57 100644 --- a/storage/innobase/mtr/mtr0mtr.cc +++ b/storage/innobase/mtr/mtr0mtr.cc @@ -437,3 +437,36 @@ mtr_print( } # endif /* !UNIV_HOTBACKUP */ #endif /* UNIV_DEBUG */ + +/**********************************************************//** +Releases a buf_page stored in an mtr memo after a +savepoint. */ +UNIV_INTERN +void +mtr_release_buf_page_at_savepoint( +/*=============================*/ + mtr_t* mtr, /*!< in: mtr */ + ulint savepoint, /*!< in: savepoint */ + buf_block_t* block) /*!< in: block to release */ +{ + mtr_memo_slot_t* slot; + dyn_array_t* memo; + + ut_ad(mtr); + ut_ad(mtr->magic_n == MTR_MAGIC_N); + ut_ad(mtr->state == MTR_ACTIVE); + + memo = &(mtr->memo); + + ut_ad(dyn_array_get_data_size(memo) > savepoint); + + slot = (mtr_memo_slot_t*) dyn_array_get_element(memo, savepoint); + + ut_ad(slot->object == block); + ut_ad(slot->type == MTR_MEMO_PAGE_S_FIX || + slot->type == MTR_MEMO_PAGE_X_FIX || + slot->type == MTR_MEMO_BUF_FIX); + + buf_page_release((buf_block_t*) slot->object, slot->type); + slot->object = NULL; +} diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc index 4db5f183892..f41ddaf2b30 100644 --- a/storage/innobase/os/os0file.cc +++ b/storage/innobase/os/os0file.cc @@ -43,7 +43,9 @@ Created 10/21/1995 Heikki Tuuri #include "srv0srv.h" #include "srv0start.h" #include "fil0fil.h" +#include "fsp0fsp.h" #include "fil0pagecompress.h" +#include "fil0pageencryption.h" #include "buf0buf.h" #include "srv0mon.h" #include "srv0srv.h" @@ -223,9 +225,17 @@ struct os_aio_slot_t{ freed after the write has been completed */ + byte* page_encryption_page; /*!< Memory allocated for + page encrypted page and + freed after the write + has been completed */ + ibool page_compression; ulint page_compression_level; + ibool page_encryption; + ulint page_encryption_key; + ulint* write_size; /*!< Actual write size initialized after fist successfull trim operation for this page and if @@ -236,9 +246,17 @@ struct os_aio_slot_t{ page compressed pages, do not free this */ - ibool page_compress_success; - /*!< TRUE if page compression was - successfull, false if not */ + byte* page_buf2; /*!< Actual page buffer for + page encrypted pages, do not + free this */ + byte* tmp_encryption_buf; /*!< a temporal buffer used by page encryption */ + + ibool page_compression_success; + ibool page_encryption_success; + /*!< TRUE if page compression was + successfull, false if not */ + + lsn_t lsn; /* lsn of the newest modification */ ulint file_block_size;/*!< file block size */ @@ -398,6 +416,19 @@ os_slot_alloc_lzo_mem( os_aio_slot_t* slot); /*!< in: slot structure */ #endif +/**********************************************************************//** +Allocate memory for temporal buffer used for page encryption. This +buffer is freed later. */ +UNIV_INTERN +void +os_slot_alloc_page_buf2( + os_aio_slot_t* slot); /*!< in: slot structure */ +/**********************************************************************//** +Allocate memory for temporal buffer used for page encryption. */ +UNIV_INTERN +void +os_slot_alloc_tmp_encryption_buf( + os_aio_slot_t* slot); /*!< in: slot structure */ /****************************************************************//** Does error handling when a file operation fails. @return TRUE if we should retry the operation */ @@ -2923,14 +2954,6 @@ try_again: os_mutex_exit(os_file_count_mutex); if (ret && len == n) { - /* Note that InnoDB writes files that are not formated - as file spaces and they do not have FIL_PAGE_TYPE - field, thus we must use here information is the actual - file space compressed. */ - if (fil_page_is_compressed((byte *)buf)) { - fil_decompress_page(NULL, (byte *)buf, len, NULL); - } - return(TRUE); } #else /* __WIN__ */ @@ -2943,14 +2966,6 @@ try_again: ret = os_file_pread(file, buf, n, offset); if ((ulint) ret == n) { - /* Note that InnoDB writes files that are not formated - as file spaces and they do not have FIL_PAGE_TYPE - field, thus we must use here information is the actual - file space compressed. */ - if (fil_page_is_compressed((byte *)buf)) { - fil_decompress_page(NULL, (byte *)buf, n, NULL); - } - return(TRUE); } @@ -3066,15 +3081,6 @@ try_again: os_mutex_exit(os_file_count_mutex); if (ret && len == n) { - - /* Note that InnoDB writes files that are not formated - as file spaces and they do not have FIL_PAGE_TYPE - field, thus we must use here information is the actual - file space compressed. */ - if (fil_page_is_compressed((byte *)buf)) { - fil_decompress_page(NULL, (byte *)buf, n, NULL); - } - return(TRUE); } #else /* __WIN__ */ @@ -3087,14 +3093,6 @@ try_again: ret = os_file_pread(file, buf, n, offset); if ((ulint) ret == n) { - /* Note that InnoDB writes files that are not formated - as file spaces and they do not have FIL_PAGE_TYPE - field, thus we must use here information is the actual - file space compressed. */ - if (fil_page_is_compressed((byte *)buf)) { - fil_decompress_page(NULL, (byte *)buf, n, NULL); - } - return(TRUE); } #endif /* __WIN__ */ @@ -4180,6 +4178,7 @@ os_aio_array_free( for (i = 0; i < array->n_slots; i++) { os_aio_slot_t* slot = os_aio_array_get_nth_slot(array, i); + if (slot->page_compression_page) { ut_free(slot->page_compression_page); slot->page_compression_page = NULL; @@ -4189,8 +4188,19 @@ os_aio_array_free( ut_free(slot->lzo_mem); slot->lzo_mem = NULL; } + + if (slot->page_encryption_page) { + ut_free(slot->page_encryption_page); + slot->page_encryption_page = NULL; + } + + if (slot->tmp_encryption_buf) { + ut_free(slot->tmp_encryption_buf); + slot->tmp_encryption_buf = NULL; + } } + ut_free(array->slots); ut_free(array); @@ -4532,8 +4542,13 @@ os_aio_array_reserve_slot( actual page size does not decrease. */ ibool page_compression, /*!< in: is page compression used on this file space */ - ulint page_compression_level) /*!< page compression - level to be used */ + ulint page_compression_level, /*!< page compression + level to be used */ + ibool page_encryption, /*!< in: is page encryption used + on this file space */ + ulint page_encryption_key, /*!< page encryption key + to be used */ + lsn_t lsn) /* lsn of the newest modification */ { os_aio_slot_t* slot = NULL; #ifdef WIN_ASYNC_IO @@ -4622,11 +4637,15 @@ found: slot->type = type; slot->buf = static_cast<byte*>(buf); slot->offset = offset; + slot->lsn = lsn; slot->io_already_done = FALSE; - slot->page_compress_success = FALSE; + slot->page_compression_success = FALSE; + slot->page_encryption_success = FALSE; slot->write_size = write_size; slot->page_compression_level = page_compression_level; slot->page_compression = page_compression; + slot->page_encryption_key = page_encryption_key; + slot->page_encryption = page_encryption; if (message1) { slot->file_block_size = fil_node_get_block_size(message1); @@ -4652,7 +4671,8 @@ found: #endif /* Call page compression */ - tmp = fil_compress_page(fil_node_get_space_id(slot->message1), + tmp = fil_compress_page( + fil_node_get_space_id(slot->message1), (byte *)buf, slot->page_buf, len, @@ -4667,9 +4687,9 @@ found: len = real_len; buf = slot->page_buf; slot->len = real_len; - slot->page_compress_success = TRUE; + slot->page_compression_success = TRUE; } else { - slot->page_compress_success = FALSE; + slot->page_compression_success = FALSE; } /* Take array mutex back, not sure if this is really needed @@ -4678,6 +4698,35 @@ found: } +// if (srv_encrypt_tables) { + //page_encryption = TRUE; +// } + + /* If the space is page encryption and this is write operation + then we encrypt the page */ + if (message1 && type == OS_FILE_WRITE && page_encryption ) { + /* Release the array mutex while encrypting */ + os_mutex_exit(array->mutex); + + // We allocate memory for page encrypted buffer if and only + // if it is not yet allocated. + os_slot_alloc_page_buf2(slot); + + fil_space_encrypt( + fil_node_get_space_id(slot->message1), + slot->offset, + slot->lsn, + (byte *)buf, + slot->len, + slot->page_buf2, + slot->page_encryption_key); + + slot->page_encryption_success = TRUE; + buf = slot->page_buf2; + + /* Take array mutex back */ + os_mutex_enter(array->mutex); + } #ifdef WIN_ASYNC_IO control = &slot->control; @@ -4963,12 +5012,18 @@ os_aio_func( actual page size does not decrease. */ ibool page_compression, /*!< in: is page compression used on this file space */ - ulint page_compression_level) /*!< page compression + ulint page_compression_level, /*!< page compression level to be used */ + ibool page_encryption, /*!< in: is page encryption used + on this file space */ + ulint page_encryption_key, /*!< page encryption key + to be used */ + lsn_t lsn) /* lsn of the newest modification */ { os_aio_array_t* array; os_aio_slot_t* slot; #ifdef WIN_ASYNC_IO + void* buffer = NULL; ibool retval; BOOL ret = TRUE; DWORD len = (DWORD) n; @@ -4987,6 +5042,7 @@ os_aio_func( ut_ad((n & 0xFFFFFFFFUL) == n); #endif + wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER; mode = mode & (~OS_AIO_SIMULATED_WAKE_LATER); @@ -5077,7 +5133,9 @@ try_again: } slot = os_aio_array_reserve_slot(type, array, message1, message2, file, - name, buf, offset, n, write_size, page_compression, page_compression_level); + name, buf, offset, n, write_size, + page_compression, page_compression_level, + page_encryption, page_encryption_key, lsn); if (type == OS_FILE_READ) { if (srv_use_native_aio) { @@ -5104,7 +5162,18 @@ try_again: if (srv_use_native_aio) { os_n_file_writes++; #ifdef WIN_ASYNC_IO - ret = WriteFile(file, buf, (DWORD) n, &len, + if (page_encryption && slot->page_encryption_success) { + buffer = slot->page_buf2; + n = slot->len; + } else { + if (page_compression && slot->page_compression_success) { + buffer = slot->page_buf; + n = slot->len; + } else { + buffer = buf; + } + } + ret = WriteFile(file, buffer, (DWORD) n, &len, &(slot->control)); #elif defined(LINUX_NATIVE_AIO) @@ -5307,21 +5376,29 @@ os_aio_windows_handle( ut_a((slot->len & 0xFFFFFFFFUL) == slot->len); - switch (slot->type) { - case OS_FILE_WRITE: - if (slot->message1 && - slot->page_compression && - slot->page_compress_success && - slot->page_buf) { - ret = WriteFile(slot->file, slot->page_buf, - (DWORD) slot->len, &len, - &(slot->control)); + switch (slot->type) { + case OS_FILE_WRITE: + if (slot->message1 + && slot->page_encryption + && slot->page_encryption_success) { + ret_val = os_file_write(slot->name, + slot->file, + slot->page_buf2, + slot->offset, + slot->len); } else { - ret = WriteFile(slot->file, slot->buf, - (DWORD) slot->len, &len, - &(slot->control)); - } - + if (slot->message1 + && slot->page_compression + && slot->page_compression_success) { + ret = WriteFile(slot->file, slot->page_buf, + (DWORD) slot->len, &len, + &(slot->control)); + } else { + ret = WriteFile(slot->file, slot->buf, + (DWORD) slot->len, &len, + &(slot->control)); + } + } break; case OS_FILE_READ: ret = ReadFile(slot->file, slot->buf, @@ -5353,20 +5430,41 @@ os_aio_windows_handle( } if (slot->type == OS_FILE_READ) { - if(fil_page_is_compressed(slot->buf)) { + if (fil_page_is_compressed_encrypted(slot->buf) || + fil_page_is_encrypted(slot->buf)) { + ut_ad(slot->message1 != NULL); + os_slot_alloc_page_buf2(slot); + os_slot_alloc_tmp_encryption_buf(slot); + + // Decrypt the data + fil_space_decrypt( + fil_node_get_space_id(slot->message1), + slot->buf, + slot->len, + slot->page_buf2); + // Copy decrypted buffer back to buf + memcpy(slot->buf, slot->page_buf2, slot->len); + } + if (fil_page_is_compressed(slot->buf)) { + /* We allocate memory for page compressed buffer if + and only if it is not yet allocated. */ os_slot_alloc_page_buf(slot); - #ifdef HAVE_LZO if (fil_page_is_lzo_compressed(slot->buf)) { os_slot_alloc_lzo_mem(slot); } #endif - - fil_decompress_page(slot->page_buf, slot->buf, slot->len, slot->write_size); + fil_decompress_page( + slot->page_buf, + slot->buf, + slot->len, + slot->write_size); } } else { /* OS_FILE_WRITE */ - if (slot->page_compress_success && fil_page_is_compressed(slot->page_buf)) { + if (slot->page_compression_success && + (fil_page_is_compressed(slot->page_buf) || + fil_page_is_compressed_encrypted(slot->buf))) { if (srv_use_trim && os_fallocate_failed == FALSE) { // Deallocate unused blocks from file system os_file_trim(slot); @@ -5464,9 +5562,27 @@ retry: ut_a(slot->pos < end_pos); if (slot->type == OS_FILE_READ) { - /* If the table is page compressed and this is read, - we decompress before we annouce the read is - complete. For writes, we free the compressed page. */ + /* If the page is page encrypted we encrypt */ + if (fil_page_is_compressed_encrypted(slot->buf) || + fil_page_is_encrypted(slot->buf)) { + os_slot_alloc_page_buf2(slot); + os_slot_alloc_tmp_encryption_buf(slot); + ut_ad(slot->message1 != NULL); + + // Decrypt the data + fil_space_decrypt( + fil_node_get_space_id(slot->message1), + slot->buf, + slot->len, + slot->page_buf2); + // Copy decrypted buffer back to buf + memcpy(slot->buf, slot->page_buf2, slot->len); + } + + /* If the table is page compressed and this + is read, we decompress before we announce + the read is complete. For writes, we free + the compressed page. */ if (fil_page_is_compressed(slot->buf)) { // We allocate memory for page compressed buffer if and only // if it is not yet allocated. @@ -5481,9 +5597,9 @@ retry: } } else { /* OS_FILE_WRITE */ - if (slot->page_compress_success && - fil_page_is_compressed(slot->page_buf)) { - ut_ad(slot->page_compression_page); + if (slot->page_compression_success && + (fil_page_is_compressed(slot->page_buf) || + fil_page_is_compressed_encrypted(slot->buf))) { if (srv_use_trim && os_fallocate_failed == FALSE) { // Deallocate unused blocks from file system os_file_trim(slot); @@ -6509,6 +6625,29 @@ os_file_trim( #endif /* !UNIV_HOTBACKUP */ /**********************************************************************//** +Allocate memory for temporal buffer used for page encryption. This +buffer is freed later. */ +UNIV_INTERN +void +os_slot_alloc_page_buf2( +/*===================*/ + os_aio_slot_t* slot) /*!< in: slot structure */ +{ + ut_a(slot != NULL); + + if(slot->page_buf2 == NULL) { + byte* cbuf2; + byte* cbuf; + + cbuf2 = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE*2)); + cbuf = static_cast<byte *>(ut_align(cbuf2, UNIV_PAGE_SIZE)); + slot->page_encryption_page = static_cast<byte *>(cbuf2); + slot->page_buf2 = static_cast<byte *>(cbuf); + memset(slot->page_encryption_page, 0, UNIV_PAGE_SIZE*2); + } +} + +/**********************************************************************//** Allocate memory for temporal buffer used for page compression. This buffer is freed later. */ UNIV_INTERN @@ -6517,18 +6656,17 @@ os_slot_alloc_page_buf( /*===================*/ os_aio_slot_t* slot) /*!< in: slot structure */ { - byte* cbuf2; - byte* cbuf; - ut_a(slot != NULL); - if (slot->page_compression_page == NULL) { + if (slot->page_buf == NULL) { + byte* cbuf2; + byte* cbuf; /* We allocate extra to avoid memory overwrite on compression */ cbuf2 = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE*2)); cbuf = static_cast<byte *>(ut_align(cbuf2, UNIV_PAGE_SIZE)); slot->page_compression_page = static_cast<byte *>(cbuf2); slot->page_buf = static_cast<byte *>(cbuf); - memset(slot->page_compression_page, 0, UNIV_PAGE_SIZE*2); ut_a(slot->page_buf != NULL); + memset(slot->page_compression_page, 0, UNIV_PAGE_SIZE*2); } } @@ -6545,12 +6683,28 @@ os_slot_alloc_lzo_mem( ut_a(slot != NULL); if(slot->lzo_mem == NULL) { slot->lzo_mem = static_cast<byte *>(ut_malloc(LZO1X_1_15_MEM_COMPRESS)); - memset(slot->lzo_mem, 0, LZO1X_1_15_MEM_COMPRESS); ut_a(slot->lzo_mem != NULL); + memset(slot->lzo_mem, 0, LZO1X_1_15_MEM_COMPRESS); } } #endif +/**********************************************************************//** +Allocate memory for temporal buffer used for page encryption. */ +UNIV_INTERN +void +os_slot_alloc_tmp_encryption_buf( +/*=============================*/ + os_aio_slot_t* slot) /*!< in: slot structure */ +{ + ut_a(slot != NULL); + if (slot->tmp_encryption_buf == NULL) { + slot->tmp_encryption_buf = static_cast<byte *>(ut_malloc(64)); + memset(slot->tmp_encryption_buf, 0, 64); + } +} + + /***********************************************************************//** Try to get number of bytes per sector from file system. @return file block size */ diff --git a/storage/innobase/page/page0page.cc b/storage/innobase/page/page0page.cc index bd5fb36af8f..4aff88818bb 100644 --- a/storage/innobase/page/page0page.cc +++ b/storage/innobase/page/page0page.cc @@ -1087,7 +1087,9 @@ delete_all: last_rec = page_rec_get_prev(page_get_supremum_rec(page)); - if ((size == ULINT_UNDEFINED) || (n_recs == ULINT_UNDEFINED)) { + bool scrub = srv_immediate_scrub_data_uncompressed; + if ((size == ULINT_UNDEFINED) || (n_recs == ULINT_UNDEFINED) || + scrub) { rec_t* rec2 = rec; /* Calculate the sum of sizes and the number of records */ size = 0; @@ -1104,6 +1106,12 @@ delete_all: size += s; n_recs++; + if (scrub) { + /* scrub record */ + uint recsize = rec_offs_data_size(offsets); + memset(rec2, 0, recsize); + } + rec2 = page_rec_get_next(rec2); } while (!page_rec_is_supremum(rec2)); diff --git a/storage/innobase/row/row0import.cc b/storage/innobase/row/row0import.cc index c513320afc1..d5f766ef51b 100644 --- a/storage/innobase/row/row0import.cc +++ b/storage/innobase/row/row0import.cc @@ -1990,7 +1990,8 @@ PageConverter::update_header( } mach_write_to_8( - get_frame(block) + FIL_PAGE_FILE_FLUSH_LSN, m_current_lsn); + get_frame(block) + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, + m_current_lsn); /* Write space_id to the tablespace header, page 0. */ mach_write_to_4( diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc index 68941b11c05..43446112bca 100644 --- a/storage/innobase/row/row0mysql.cc +++ b/storage/innobase/row/row0mysql.cc @@ -3245,6 +3245,41 @@ run_again: return(err); } +static +void +fil_wait_crypt_bg_threads( + dict_table_t* table) +{ + uint start = time(0); + uint last = start; + + if (table->space != 0) { + fil_space_crypt_mark_space_closing(table->space); + } + + while (table->n_ref_count > 0) { + dict_mutex_exit_for_mysql(); + os_thread_sleep(20000); + dict_mutex_enter_for_mysql(); + uint now = time(0); + if (now >= last + 30) { + fprintf(stderr, + "WARNING: waited %u seconds " + "for ref-count on table: %s space: %u\n", + now - start, table->name, table->space); + last = now; + } + + if (now >= start + 300) { + fprintf(stderr, + "WARNING: after %u seconds, gave up waiting " + "for ref-count on table: %s space: %u\n", + now - start, table->name, table->space); + break; + } + } +} + /*********************************************************************//** Truncates a table for MySQL. @return error code or DB_SUCCESS */ @@ -4055,6 +4090,9 @@ row_drop_table_for_mysql( shouldn't have to. There should never be record locks on a table that is going to be dropped. */ + /* Wait on background threads to stop using table */ + fil_wait_crypt_bg_threads(table); + if (table->n_ref_count == 0) { lock_remove_all_on_table(table, TRUE); ut_a(table->n_rec_locks == 0); diff --git a/storage/innobase/row/row0umod.cc b/storage/innobase/row/row0umod.cc index 8580aa45145..7649add4b33 100644 --- a/storage/innobase/row/row0umod.cc +++ b/storage/innobase/row/row0umod.cc @@ -364,8 +364,15 @@ row_undo_mod_clust( } } - ut_ad(rec_get_trx_id(btr_pcur_get_rec(pcur), index) - == node->new_trx_id); + /** + * when scrubbing, and records gets cleared, + * the transaction id is not present afterwards. + * this is safe as: since the record is on free-list + * it can be reallocated at any time after this mtr-commits + * which is just below + */ + ut_ad(srv_immediate_scrub_data_uncompressed || + rec_get_trx_id(btr_pcur_get_rec(pcur), index) == node->new_trx_id); btr_pcur_commit_specify_mtr(pcur, &mtr); diff --git a/storage/innobase/srv/srv0mon.cc b/storage/innobase/srv/srv0mon.cc index 24cf403c0af..cb7c1ae6367 100644 --- a/storage/innobase/srv/srv0mon.cc +++ b/storage/innobase/srv/srv0mon.cc @@ -985,6 +985,21 @@ static monitor_info_t innodb_counter_info[] = MONITOR_NONE, MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_PAGE_COMPRESSION_ERROR}, + {"compress_pages_page_encrypted", "compression", + "Number of pages encrypted by page encryption", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_PAGE_ENCRYPTED}, + + {"compress_pages_page_decrypted", "compression", + "Number of pages decrypted by page encryption", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_PAGE_DECRYPTED}, + + {"compress_pages_page_encryption_error", "compression", + "Number of page encryption errors ", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_PAGE_ENCRYPTION_ERROR}, + /* ========== Counters for Index ========== */ {"module_index", "index", "Index Manager", MONITOR_MODULE, @@ -1998,6 +2013,15 @@ srv_mon_process_existing_counter( case MONITOR_OVLD_PAGES_PAGE_COMPRESSION_ERROR: value = srv_stats.pages_page_compression_error; break; + case MONITOR_OVLD_PAGES_PAGE_ENCRYPTED: + value = srv_stats.pages_page_encrypted; + break; + case MONITOR_OVLD_PAGES_PAGE_DECRYPTED: + value = srv_stats.pages_page_decrypted; + break; + case MONITOR_OVLD_PAGES_PAGE_ENCRYPTION_ERROR: + value = srv_stats.pages_page_encryption_error; + break; default: ut_error; diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc index bcbce3cd53c..7c796efe58f 100644 --- a/storage/innobase/srv/srv0srv.cc +++ b/storage/innobase/srv/srv0srv.cc @@ -73,7 +73,9 @@ Created 10/8/1995 Heikki Tuuri #include "mysql/plugin.h" #include "mysql/service_thd_wait.h" +#include "fil0fil.h" #include "fil0pagecompress.h" +#include "btr0scrub.h" #ifdef WITH_WSREP extern int wsrep_debug; @@ -93,6 +95,9 @@ UNIV_INTERN ibool srv_buf_dump_thread_active = FALSE; UNIV_INTERN ibool srv_dict_stats_thread_active = FALSE; +UNIV_INTERN ibool srv_log_scrub_active = FALSE; +UNIV_INTERN my_bool srv_scrub_log = FALSE; + UNIV_INTERN const char* srv_main_thread_op_info = ""; /** Prefix used by MySQL to indicate pre-5.1 table name encoding */ @@ -1426,10 +1431,14 @@ srv_export_innodb_status(void) ulint LRU_len; ulint free_len; ulint flush_list_len; + fil_crypt_stat_t crypt_stat; + btr_scrub_stat_t scrub_stat; buf_get_total_stat(&stat); buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len); buf_get_total_list_size_in_bytes(&buf_pools_list_size); + fil_crypt_total_stat(&crypt_stat); + btr_scrub_total_stat(&scrub_stat); mutex_enter(&srv_innodb_monitor_mutex); @@ -1584,6 +1593,10 @@ srv_export_innodb_status(void) export_vars.innodb_page_compressed_trim_op = srv_stats.page_compressed_trim_op; export_vars.innodb_page_compressed_trim_op_saved = srv_stats.page_compressed_trim_op_saved; export_vars.innodb_pages_page_decompressed = srv_stats.pages_page_decompressed; + export_vars.innodb_pages_page_compression_error = srv_stats.pages_page_compression_error; + export_vars.innodb_pages_page_decrypted = srv_stats.pages_page_decrypted; + export_vars.innodb_pages_page_encrypted = srv_stats.pages_page_encrypted; + export_vars.innodb_pages_page_encryption_error = srv_stats.pages_page_encryption_error; export_vars.innodb_defragment_compression_failures = btr_defragment_compression_failures; @@ -1627,6 +1640,30 @@ srv_export_innodb_status(void) export_vars.innodb_sec_rec_cluster_reads_avoided = srv_stats.n_sec_rec_cluster_reads_avoided; + export_vars.innodb_encryption_rotation_pages_read_from_cache = + crypt_stat.pages_read_from_cache; + export_vars.innodb_encryption_rotation_pages_read_from_disk = + crypt_stat.pages_read_from_disk; + export_vars.innodb_encryption_rotation_pages_modified = + crypt_stat.pages_modified; + export_vars.innodb_encryption_rotation_pages_flushed = + crypt_stat.pages_flushed; + export_vars.innodb_encryption_rotation_estimated_iops = + crypt_stat.estimated_iops; + + export_vars.innodb_scrub_page_reorganizations = + scrub_stat.page_reorganizations; + export_vars.innodb_scrub_page_splits = + scrub_stat.page_splits; + export_vars.innodb_scrub_page_split_failures_underflow = + scrub_stat.page_split_failures_underflow; + export_vars.innodb_scrub_page_split_failures_out_of_filespace = + scrub_stat.page_split_failures_out_of_filespace; + export_vars.innodb_scrub_page_split_failures_missing_index = + scrub_stat.page_split_failures_missing_index; + export_vars.innodb_scrub_page_split_failures_unknown = + scrub_stat.page_split_failures_unknown; + mutex_exit(&srv_innodb_monitor_mutex); } @@ -2010,6 +2047,8 @@ srv_any_background_threads_are_active(void) thread_active = "buf_dump_thread"; } else if (srv_dict_stats_thread_active) { thread_active = "dict_stats_thread"; + } else if (srv_scrub_log && srv_log_scrub_thread_active) { + thread_active = "log_scrub_thread"; } os_event_set(srv_error_event); @@ -2017,6 +2056,8 @@ srv_any_background_threads_are_active(void) os_event_set(srv_buf_dump_event); os_event_set(lock_sys->timeout_event); os_event_set(dict_stats_event); + if (srv_scrub_log) + os_event_set(log_scrub_event); return(thread_active); } diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index 3d0d62c335b..3822a9abf2d 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -99,6 +99,7 @@ Created 2/16/1996 Heikki Tuuri # include "os0sync.h" # include "zlib.h" # include "ut0crc32.h" +# include "btr0scrub.h" /** Log sequence number immediately after startup */ UNIV_INTERN lsn_t srv_start_lsn; @@ -664,7 +665,8 @@ create_log_files( fil_space_create( logfilename, SRV_LOG_SPACE_FIRST_ID, fsp_flags_set_page_size(0, UNIV_PAGE_SIZE), - FIL_LOG); + FIL_LOG, + NULL /* no encryption yet */); ut_a(fil_validate()); logfile0 = fil_node_create( @@ -802,6 +804,7 @@ open_or_create_data_files( ulint space; ulint rounded_size_pages; char name[10000]; + fil_space_crypt_t* crypt_data; if (srv_n_data_files >= 1000) { @@ -1021,7 +1024,7 @@ check_first_page: min_arch_log_no, max_arch_log_no, #endif /* UNIV_LOG_ARCHIVE */ min_flushed_lsn, max_flushed_lsn, - ULINT_UNDEFINED); + ULINT_UNDEFINED, &crypt_data); if (check_msg) { @@ -1115,6 +1118,8 @@ check_first_page: } *sum_of_new_sizes += srv_data_file_sizes[i]; + + crypt_data = fil_space_create_crypt_data(); } ret = os_file_close(files[i]); @@ -1122,7 +1127,9 @@ check_first_page: if (i == 0) { flags = fsp_flags_set_page_size(0, UNIV_PAGE_SIZE); - fil_space_create(name, 0, flags, FIL_TABLESPACE); + fil_space_create(name, 0, flags, FIL_TABLESPACE, + crypt_data); + crypt_data = NULL; } ut_a(fil_validate()); @@ -1268,7 +1275,8 @@ srv_undo_tablespace_open( /* Set the compressed page size to 0 (non-compressed) */ flags = fsp_flags_set_page_size(0, UNIV_PAGE_SIZE); - fil_space_create(name, space, flags, FIL_TABLESPACE); + fil_space_create(name, space, flags, FIL_TABLESPACE, + NULL /* no encryption */); ut_a(fil_validate()); @@ -2257,7 +2265,8 @@ innobase_start_or_create_for_mysql(void) fil_space_create(logfilename, SRV_LOG_SPACE_FIRST_ID, fsp_flags_set_page_size(0, UNIV_PAGE_SIZE), - FIL_LOG); + FIL_LOG, + NULL /* no encryption yet */); ut_a(fil_validate()); @@ -2313,6 +2322,11 @@ files_checked: dict_stats_thread_init(); } + if (!srv_read_only_mode && srv_scrub_log) { + /* TODO(minliz): have/use log_scrub_thread_init() instead? */ + log_scrub_event = os_event_create(); + } + trx_sys_file_format_init(); trx_sys_create(); @@ -2917,6 +2931,16 @@ files_checked: /* Create the thread that will optimize the FTS sub-system. */ fts_optimize_init(); + + /* Init data for datafile scrub threads */ + btr_scrub_init(); + + /* Create thread(s) that handles key rotation */ + fil_crypt_threads_init(); + + /* Create the log scrub thread */ + if (srv_scrub_log) + os_thread_create(log_scrub_thread, NULL, NULL); } /* Initialize online defragmentation. */ @@ -2982,6 +3006,9 @@ innobase_shutdown_for_mysql(void) fts_optimize_start_shutdown(); fts_optimize_end(); + + /* Shutdown key rotation threads */ + fil_crypt_threads_end(); } /* 1. Flush the buffer pool to disk, write the current lsn to @@ -3090,6 +3117,18 @@ innobase_shutdown_for_mysql(void) if (!srv_read_only_mode) { dict_stats_thread_deinit(); + if (srv_scrub_log) { + /* TODO(minliz): have/use log_scrub_thread_deinit() instead? */ + os_event_free(log_scrub_event); + log_scrub_event = NULL; + } + } + + if (!srv_read_only_mode) { + fil_crypt_threads_cleanup(); + + /* Cleanup data for datafile scrubbing */ + btr_scrub_cleanup(); } #ifdef __WIN__ diff --git a/storage/maria/CMakeLists.txt b/storage/maria/CMakeLists.txt index 0f30f8f3156..09c816aee4e 100644 --- a/storage/maria/CMakeLists.txt +++ b/storage/maria/CMakeLists.txt @@ -15,6 +15,14 @@ INCLUDE(CMakeDependentOption) +INCLUDE_DIRECTORIES( +${SSL_INCLUDE_DIRS} +) + +IF(SSL_DEFINES) +SET_SOURCE_FILES_PROPERTIES(ma_crypt.c PROPERTIES COMPILE_FLAGS ${SSL_DEFINES}) +ENDIF() + SET(ARIA_SOURCES ma_init.c ma_open.c ma_extra.c ma_info.c ma_rkey.c ma_rnext.c ma_rnext_same.c ma_search.c ma_page.c ma_key_recover.c ma_key.c @@ -39,6 +47,7 @@ SET(ARIA_SOURCES ma_init.c ma_open.c ma_extra.c ma_info.c ma_rkey.c ma_checkpoint.c ma_recovery.c ma_commit.c ma_pagecrc.c ha_maria.h maria_def.h ma_recovery_util.c ma_servicethread.c ma_norec.c + ma_crypt.c ) IF(APPLE) @@ -54,7 +63,8 @@ IF(NOT WITH_ARIA_STORAGE_ENGINE) RETURN() ENDIF() -TARGET_LINK_LIBRARIES(aria myisam) +TARGET_LINK_LIBRARIES(aria myisam + mysys mysys_ssl ${LIBCRYPT} ${LIBDL} ${SSL_LIBRARIES}) MYSQL_ADD_EXECUTABLE(aria_ftdump maria_ftdump.c COMPONENT Server) TARGET_LINK_LIBRARIES(aria_ftdump aria) diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc index 8eed173e475..4b95725a743 100644 --- a/storage/maria/ha_maria.cc +++ b/storage/maria/ha_maria.cc @@ -290,6 +290,11 @@ static MYSQL_SYSVAR_BOOL(used_for_temp_tables, "Whether temporary tables should be MyISAM or Aria", 0, 0, 1); +static MYSQL_SYSVAR_BOOL(encrypt_tables, maria_encrypt_tables, 0, + "Encrypt tables (only for tables with ROW_FORMAT=PAGE (default) " + "and not FIXED/DYNAMIC)", + 0, 0, 0); + #ifdef HAVE_PSI_INTERFACE static PSI_mutex_info all_aria_mutexes[]= @@ -3133,6 +3138,11 @@ int ha_maria::create(const char *name, register TABLE *table_arg, ha_create_info->page_checksum == HA_CHOICE_YES) create_flags|= HA_CREATE_PAGE_CHECKSUM; + mysql_mutex_lock(&LOCK_global_system_variables); + mysql_mutex_unlock(&LOCK_global_system_variables); + if (row_type == BLOCK_RECORD && maria_encrypt_tables) + create_flags|= HA_CREATE_ENCRYPTED; + (void) translog_log_debug_info(0, LOGREC_DEBUG_INFO_QUERY, (uchar*) thd->query(), thd->query_length()); @@ -3702,6 +3712,7 @@ struct st_mysql_sys_var* system_variables[]= { MYSQL_SYSVAR(stats_method), MYSQL_SYSVAR(sync_log_dir), MYSQL_SYSVAR(used_for_temp_tables), + MYSQL_SYSVAR(encrypt_tables), NULL }; diff --git a/storage/maria/ma_bitmap.c b/storage/maria/ma_bitmap.c index e1ccceb78cd..d9d4564fc99 100644 --- a/storage/maria/ma_bitmap.c +++ b/storage/maria/ma_bitmap.c @@ -155,7 +155,7 @@ static inline my_bool write_changed_bitmap(MARIA_SHARE *share, my_bool res; DBUG_ENTER("write_changed_bitmap"); DBUG_ASSERT(share->pagecache->block_size == bitmap->block_size); - DBUG_ASSERT(bitmap->file.write_callback != 0); + DBUG_ASSERT(bitmap->file.pre_write_hook != 0); DBUG_PRINT("info", ("bitmap->non_flushable: %u", bitmap->non_flushable)); /* @@ -238,6 +238,7 @@ my_bool _ma_bitmap_init(MARIA_SHARE *share, File file, sizeof(MARIA_PINNED_PAGE), 1, 1, MYF(0))) return 1; + bitmap->share= share; bitmap->block_size= share->block_size; bitmap->file.file= file; _ma_bitmap_set_pagecache_callbacks(&bitmap->file, share); @@ -256,7 +257,7 @@ my_bool _ma_bitmap_init(MARIA_SHARE *share, File file, /* Update size for bits */ /* TODO; Make this dependent of the row size */ - max_page_size= share->block_size - PAGE_OVERHEAD_SIZE + DIR_ENTRY_SIZE; + max_page_size= share->block_size - PAGE_OVERHEAD_SIZE(share) + DIR_ENTRY_SIZE; bitmap->sizes[0]= max_page_size; /* Empty page */ bitmap->sizes[1]= max_page_size - max_page_size * 30 / 100; bitmap->sizes[2]= max_page_size - max_page_size * 60 / 100; @@ -1240,9 +1241,22 @@ static my_bool allocate_head(MARIA_FILE_BITMAP *bitmap, uint size, uchar *data= bitmap->map, *end= data + bitmap->used_size; uchar *best_data= 0; uint best_bits= (uint) -1, UNINIT_VAR(best_pos); + uint first_pattern= 0; /* if doing insert_order */ + MARIA_SHARE *share= bitmap->share; + my_bool insert_order= + MY_TEST(share->base.extra_options & MA_EXTRA_OPTIONS_INSERT_ORDER); DBUG_ENTER("allocate_head"); - DBUG_ASSERT(size <= FULL_PAGE_SIZE(bitmap->block_size)); + DBUG_ASSERT(size <= FULL_PAGE_SIZE(share)); + + if (insert_order) + { + uint last_insert_page= share->last_insert_page; + uint byte= 6 * (last_insert_page / 16); + first_pattern= last_insert_page % 16; + DBUG_ASSERT(data + byte < end); + data+= byte; + } for (; data < end; data+= 6) { @@ -1257,8 +1271,12 @@ static my_bool allocate_head(MARIA_FILE_BITMAP *bitmap, uint size, */ if ((!bits && best_data) || ((bits & 04444444444444444LL) == 04444444444444444LL)) + { + first_pattern= 0; // always restart from 0 when moving to new 6-byte continue; - for (i= 0; i < 16 ; i++, bits >>= 3) + } + for (i= first_pattern, bits >>= (3 * first_pattern); i < 16 ; + i++, bits >>= 3) { uint pattern= (uint) (bits & 7); if (pattern <= min_bits) @@ -1279,6 +1297,7 @@ static my_bool allocate_head(MARIA_FILE_BITMAP *bitmap, uint size, } } } + first_pattern= 0; // always restart from 0 when moving to new 6-byte } if (!best_data) /* Found no place */ { @@ -1292,6 +1311,11 @@ static my_bool allocate_head(MARIA_FILE_BITMAP *bitmap, uint size, } found: + if (insert_order) + { + share->last_insert_page= + ((uint) (best_data - bitmap->map)) / 6 * 16 + best_pos; + } fill_block(bitmap, block, best_data, best_pos, best_bits, FULL_HEAD_PAGE); DBUG_RETURN(0); } @@ -1621,7 +1645,7 @@ static my_bool find_tail(MARIA_HA *info, uint length, uint position) MARIA_FILE_BITMAP *bitmap= &info->s->bitmap; MARIA_BITMAP_BLOCK *block; DBUG_ENTER("find_tail"); - DBUG_ASSERT(length <= info->s->block_size - PAGE_OVERHEAD_SIZE); + DBUG_ASSERT(length <= info->s->block_size - PAGE_OVERHEAD_SIZE(info->s)); /* Needed, as there is no error checking in dynamic_element */ if (allocate_dynamic(&info->bitmap_blocks, position)) @@ -1694,7 +1718,7 @@ static my_bool find_mid(MARIA_HA *info, ulong pages, uint position) static my_bool find_blob(MARIA_HA *info, ulong length) { MARIA_FILE_BITMAP *bitmap= &info->s->bitmap; - uint full_page_size= FULL_PAGE_SIZE(info->s->block_size); + uint full_page_size= FULL_PAGE_SIZE(info->s); ulong pages; uint rest_length, used; uint first_block_pos; @@ -1909,7 +1933,7 @@ static my_bool write_rest_of_head(MARIA_HA *info, uint position, ulong rest_length) { MARIA_SHARE *share= info->s; - uint full_page_size= FULL_PAGE_SIZE(share->block_size); + uint full_page_size= FULL_PAGE_SIZE(share); MARIA_BITMAP_BLOCK *block; DBUG_ENTER("write_rest_of_head"); DBUG_PRINT("enter", ("position: %u rest_length: %lu", position, @@ -1995,7 +2019,7 @@ my_bool _ma_bitmap_find_place(MARIA_HA *info, MARIA_ROW *row, */ info->bitmap_blocks.elements= ELEMENTS_RESERVED_FOR_MAIN_PART; - max_page_size= (share->block_size - PAGE_OVERHEAD_SIZE); + max_page_size= (share->block_size - PAGE_OVERHEAD_SIZE(share)); mysql_mutex_lock(&share->bitmap.bitmap_lock); @@ -2890,12 +2914,10 @@ int _ma_bitmap_create_first(MARIA_SHARE *share) */ static my_bool -flush_log_for_bitmap(uchar *page __attribute__((unused)), - pgcache_page_no_t page_no __attribute__((unused)), - uchar *data_ptr __attribute__((unused))) +flush_log_for_bitmap(PAGECACHE_IO_HOOK_ARGS *args __attribute__ ((unused))) { #ifndef DBUG_OFF - const MARIA_SHARE *share= (MARIA_SHARE*)data_ptr; + const MARIA_SHARE *share= (MARIA_SHARE*)args->data; #endif DBUG_ENTER("flush_log_for_bitmap"); DBUG_ASSERT(share->now_transactional); @@ -2918,22 +2940,23 @@ flush_log_for_bitmap(uchar *page __attribute__((unused)), void _ma_bitmap_set_pagecache_callbacks(PAGECACHE_FILE *file, MARIA_SHARE *share) { + pagecache_file_set_null_hooks(file); file->callback_data= (uchar*) share; file->flush_log_callback= maria_flush_log_for_page_none; - file->write_fail= maria_page_write_failure; + file->post_write_hook= maria_page_write_failure; if (share->temporary) { - file->read_callback= &maria_page_crc_check_none; - file->write_callback= &maria_page_filler_set_none; + file->post_read_hook= &maria_page_crc_check_none; + file->pre_write_hook= &maria_page_filler_set_none; } else { - file->read_callback= &maria_page_crc_check_bitmap; + file->post_read_hook= &maria_page_crc_check_bitmap; if (share->options & HA_OPTION_PAGE_CHECKSUM) - file->write_callback= &maria_page_crc_set_normal; + file->pre_write_hook= &maria_page_crc_set_normal; else - file->write_callback= &maria_page_filler_set_bitmap; + file->pre_write_hook= &maria_page_filler_set_bitmap; if (share->now_transactional) file->flush_log_callback= flush_log_for_bitmap; } diff --git a/storage/maria/ma_blockrec.c b/storage/maria/ma_blockrec.c index fe719888817..bf66424e539 100644 --- a/storage/maria/ma_blockrec.c +++ b/storage/maria/ma_blockrec.c @@ -340,10 +340,12 @@ static my_bool delete_head_or_tail(MARIA_HA *info, pgcache_page_no_t page, uint record_number, my_bool head, my_bool from_update); #ifndef DBUG_OFF -static void _ma_print_directory(FILE *file, uchar *buff, uint block_size); +static void _ma_print_directory(MARIA_SHARE *share, + FILE *file, uchar *buff, uint block_size); #endif -static uchar *store_page_range(uchar *to, MARIA_BITMAP_BLOCK *block, - uint block_size, ulong length, +static uchar *store_page_range(MARIA_SHARE *share, + uchar *to, MARIA_BITMAP_BLOCK *block, + ulong length, uint *tot_ranges); static size_t fill_insert_undo_parts(MARIA_HA *info, const uchar *record, LEX_CUSTRING *log_parts, @@ -523,7 +525,7 @@ my_bool _ma_init_block_record(MARIA_HA *info) /* Reserve some initial space to avoid mallocs during execution */ default_extents= (ELEMENTS_RESERVED_FOR_MAIN_PART + 1 + (AVERAGE_BLOB_SIZE / - FULL_PAGE_SIZE(share->block_size) / + FULL_PAGE_SIZE(share) / BLOB_SEGMENT_MIN_SIZE)); if (my_init_dynamic_array(&info->bitmap_blocks, @@ -616,7 +618,8 @@ static inline uint start_of_next_entry(uchar *dir) */ -static inline uint end_of_previous_entry(uchar *dir, uchar *end) +static inline uint end_of_previous_entry(MARIA_SHARE *share, + uchar *dir, uchar *end) { uchar *pos; for (pos= dir + DIR_ENTRY_SIZE ; pos < end ; pos+= DIR_ENTRY_SIZE) @@ -625,16 +628,17 @@ static inline uint end_of_previous_entry(uchar *dir, uchar *end) if ((offset= uint2korr(pos))) return offset + uint2korr(pos+2); } - return PAGE_HEADER_SIZE; + return PAGE_HEADER_SIZE(share); } #ifndef DBUG_OFF -static void _ma_print_directory(FILE *file, uchar *buff, uint block_size) +static void _ma_print_directory(MARIA_SHARE *share, + FILE *file, uchar *buff, uint block_size) { uint max_entry= (uint) ((uchar *) buff)[DIR_COUNT_OFFSET], row= 0; - uint end_of_prev_row= PAGE_HEADER_SIZE; + uint end_of_prev_row= PAGE_HEADER_SIZE(share); uchar *dir, *end; dir= dir_entry_pos(buff, block_size, max_entry-1); @@ -662,13 +666,14 @@ static void _ma_print_directory(FILE *file, uchar *buff, uint block_size) } -static void check_directory(uchar *buff, uint block_size, uint min_row_length, +static void check_directory(MARIA_SHARE *share, + uchar *buff, uint block_size, uint min_row_length, uint real_empty_size) { uchar *dir, *end; uint max_entry= (uint) buff[DIR_COUNT_OFFSET]; uint start_of_dir, deleted; - uint end_of_prev_row= PAGE_HEADER_SIZE; + uint end_of_prev_row= PAGE_HEADER_SIZE(share); uint empty_size_on_page; uint empty_size; uchar free_entry, prev_free_entry; @@ -715,7 +720,7 @@ static void check_directory(uchar *buff, uint block_size, uint min_row_length, DBUG_ASSERT(deleted == 0); } #else -#define check_directory(A,B,C,D) +#define check_directory(A,B,C,D,E) #endif /* DBUG_OFF */ @@ -779,7 +784,7 @@ my_bool enough_free_entries_on_page(MARIA_SHARE *share, @brief Extend a record area to fit a given size block @fn extend_area_on_page() - @param info Handler if head page and 0 if tail page + @param info Handler @param buff Page buffer @param dir Pointer to dir entry in buffer @param rownr Row number we working on @@ -788,6 +793,7 @@ my_bool enough_free_entries_on_page(MARIA_SHARE *share, @param empty_space Total empty space in buffer This is updated with length after dir is allocated and current block freed + @param head_page 1 if head page, 0 for tail page @implementation The logic is as follows (same as in _ma_update_block_record()) @@ -812,20 +818,23 @@ my_bool enough_free_entries_on_page(MARIA_SHARE *share, static my_bool extend_area_on_page(MARIA_HA *info, uchar *buff, uchar *dir, - uint rownr, uint block_size, + uint rownr, uint request_length, uint *empty_space, uint *ret_offset, - uint *ret_length) + uint *ret_length, + my_bool head_page) { uint rec_offset, length, org_rec_length; uint max_entry= (uint) buff[DIR_COUNT_OFFSET]; + MARIA_SHARE *share= info->s; + uint block_size= share->block_size; DBUG_ENTER("extend_area_on_page"); /* We can't check for min length here as we may have called extend_directory() to create a new (empty) entry just before */ - check_directory(buff, block_size, 0, *empty_space); + check_directory(share, buff, block_size, 0, *empty_space); rec_offset= uint2korr(dir); if (rec_offset) @@ -867,7 +876,8 @@ static my_bool extend_area_on_page(MARIA_HA *info, Find first possible position where to put new data. */ old_rec_offset= rec_offset; - rec_offset= end_of_previous_entry(dir, buff + block_size - + rec_offset= end_of_previous_entry(share, + dir, buff + block_size - PAGE_SUFFIX_SIZE); length+= (uint) (old_rec_offset - rec_offset); DBUG_ASSERT(old_rec_offset); @@ -896,9 +906,10 @@ static my_bool extend_area_on_page(MARIA_HA *info, int2store(dir, rec_offset); /* Reset length, as this may be a deleted block */ int2store(dir+2, 0); - _ma_compact_block_page(buff, block_size, rownr, 1, - info ? info->trn->min_read_from: 0, - info ? info->s->base.min_block_length : 0); + _ma_compact_block_page(share, + buff, rownr, 1, + head_page ? info->trn->min_read_from: 0, + head_page ? share->base.min_block_length : 0); rec_offset= uint2korr(dir); length= uint2korr(dir+2); if (length < request_length) @@ -906,7 +917,7 @@ static my_bool extend_area_on_page(MARIA_HA *info, DBUG_PRINT("error", ("Not enough space: " "length: %u request_length: %u", length, request_length)); - _ma_set_fatal_error(info->s, HA_ERR_WRONG_IN_RECORD); + _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); DBUG_RETURN(1); /* Error in block */ } *empty_space= length; /* All space is here */ @@ -918,7 +929,9 @@ static my_bool extend_area_on_page(MARIA_HA *info, *ret_offset= rec_offset; *ret_length= length; - check_directory(buff, block_size, info ? info->s->base.min_block_length : 0, + check_directory(share, + buff, block_size, + head_page ? share->base.min_block_length : 0, *empty_space - length); DBUG_RETURN(0); } @@ -984,14 +997,15 @@ static uint empty_space_on_page(uchar *buff, uint block_size) @brief Ensure we have space for new directory entries @fn make_space_for_directory() + @param info Handler @param buff Page buffer - @param block_size Block size for pages @param max_entry Number of current entries in directory @param count Number of new entries to be added to directory @param first_dir First directory entry on page @param empty_space Total empty space in buffer. It's updated to reflect the new empty space @param first_pos Store position to last data byte on page here + @param head_page 1 if head page, 0 for tail page. @note This function is inline as the argument passing is the biggest @@ -1004,11 +1018,13 @@ static uint empty_space_on_page(uchar *buff, uint block_size) static inline my_bool make_space_for_directory(MARIA_HA *info, - uchar *buff, uint block_size, uint max_entry, + uchar *buff, uint max_entry, uint count, uchar *first_dir, uint *empty_space, - uint *first_pos) + uint *first_pos, + my_bool head_page) { uint length_needed= DIR_ENTRY_SIZE * count; + MARIA_SHARE *share= info->s; /* The following is not true only in the case and UNDO is used to reinsert @@ -1022,9 +1038,10 @@ make_space_for_directory(MARIA_HA *info, if ((uint) (first_dir - buff) < *first_pos + length_needed) { /* Create place for directory */ - _ma_compact_block_page(buff, block_size, max_entry - 1, 0, - info ? info->trn->min_read_from : 0, - info ? info->s->base.min_block_length : 0); + _ma_compact_block_page(share, + buff, max_entry - 1, 0, + head_page ? info->trn->min_read_from : 0, + head_page ? share->base.min_block_length : 0); *first_pos= (uint2korr(first_dir) + uint2korr(first_dir + 2)); *empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET); if (*empty_space < length_needed) @@ -1040,7 +1057,7 @@ make_space_for_directory(MARIA_HA *info, } } else - *first_pos= PAGE_HEADER_SIZE; + *first_pos= PAGE_HEADER_SIZE(share); /* Reduce directory entry size from free space size */ (*empty_space)-= length_needed; @@ -1054,13 +1071,14 @@ make_space_for_directory(MARIA_HA *info, SYNOPSIS find_free_position() - info Handler if head page and 0 otherwise + info Handler buff Page block_size Size of page res_rownr Store index to free position here res_length Store length of found segment here empty_space Store length of empty space on disk here. This is all empty space, including the found block. + @param head_page 1 if head page, 0 for tail page. NOTES If there is a free directory entry (entry with position == 0), @@ -1088,11 +1106,13 @@ make_space_for_directory(MARIA_HA *info, static uchar *find_free_position(MARIA_HA *info, uchar *buff, uint block_size, uint *res_rownr, - uint *res_length, uint *empty_space) + uint *res_length, uint *empty_space, + my_bool head_page) { uint max_entry, free_entry; uint length, first_pos; uchar *dir, *first_dir; + MARIA_SHARE *share= info->s; DBUG_ENTER("find_free_position"); max_entry= (uint) buff[DIR_COUNT_OFFSET]; @@ -1119,7 +1139,8 @@ static uchar *find_free_position(MARIA_HA *info, next_entry[2]= END_OF_DIR_FREE_LIST; /* Backlink */ } - first_pos= end_of_previous_entry(dir, buff + block_size - + first_pos= end_of_previous_entry(share, + dir, buff + block_size - PAGE_SUFFIX_SIZE); length= start_of_next_entry(dir) - first_pos; int2store(dir, first_pos); /* Update dir entry */ @@ -1127,8 +1148,8 @@ static uchar *find_free_position(MARIA_HA *info, *res_rownr= free_entry; *res_length= length; - check_directory(buff, block_size, - info ? info->s->base.min_block_length : 0, (uint) -1); + check_directory(share, buff, block_size, + head_page ? share->base.min_block_length : 0, (uint) -1); DBUG_RETURN(dir); } /* No free places in dir; create a new one */ @@ -1137,8 +1158,8 @@ static uchar *find_free_position(MARIA_HA *info, if (max_entry == MAX_ROWS_PER_PAGE) DBUG_RETURN(0); - if (make_space_for_directory(info, buff, block_size, max_entry, 1, - first_dir, empty_space, &first_pos)) + if (make_space_for_directory(info, buff, max_entry, 1, + first_dir, empty_space, &first_pos, head_page)) DBUG_RETURN(0); dir= first_dir - DIR_ENTRY_SIZE; @@ -1149,7 +1170,9 @@ static uchar *find_free_position(MARIA_HA *info, *res_rownr= max_entry; *res_length= length; - check_directory(buff, block_size, info ? info->s->base.min_block_length : 0, + check_directory(share, + buff, block_size, + head_page ? share->base.min_block_length : 0, *empty_space); DBUG_RETURN(dir); } @@ -1159,13 +1182,14 @@ static uchar *find_free_position(MARIA_HA *info, @brief Enlarge page directory to hold more entries @fn extend_directory() - @param info Handler if head page and 0 otherwise + @param info Handler @param buff Page buffer @param block_size Block size @param max_entry Number of directory entries on page @param new_entry Position for new entry @param empty_space Total empty space in buffer. It's updated to reflect the new empty space + @param head_page 1 if head page, 0 for tail page. @note This is only called on UNDO when we want to expand the directory @@ -1180,10 +1204,11 @@ static uchar *find_free_position(MARIA_HA *info, static my_bool extend_directory(MARIA_HA *info, uchar *buff, uint block_size, uint max_entry, uint new_entry, - uint *empty_space) + uint *empty_space, my_bool head_page) { uint length, first_pos; uchar *dir, *first_dir; + MARIA_SHARE *share= info->s; DBUG_ENTER("extend_directory"); /* @@ -1193,9 +1218,9 @@ static my_bool extend_directory(MARIA_HA *info, uchar *buff, uint block_size, */ first_dir= dir_entry_pos(buff, block_size, max_entry) + DIR_ENTRY_SIZE; - if (make_space_for_directory(info, buff, block_size, max_entry, + if (make_space_for_directory(info, buff, max_entry, new_entry - max_entry + 1, - first_dir, empty_space, &first_pos)) + first_dir, empty_space, &first_pos, head_page)) DBUG_RETURN(1); /* Set the new directory entry to cover the max possible length */ @@ -1229,8 +1254,9 @@ static my_bool extend_directory(MARIA_HA *info, uchar *buff, uint block_size, } } - check_directory(buff, block_size, - info ? MY_MIN(info->s->base.min_block_length, length) : 0, + check_directory(share, + buff, block_size, + head_page ? MY_MIN(share->base.min_block_length, length) : 0, *empty_space); DBUG_RETURN(0); } @@ -1432,25 +1458,27 @@ static void calc_record_size(MARIA_HA *info, const uchar *record, @param min_read_from If <> 0, remove all trid's that are less than this */ -void _ma_compact_block_page(uchar *buff, uint block_size, uint rownr, +void _ma_compact_block_page(MARIA_SHARE *share, + uchar *buff, uint rownr, my_bool extend_block, TrID min_read_from, uint min_row_length) { uint max_entry= (uint) buff[DIR_COUNT_OFFSET]; uint page_pos, next_free_pos, start_of_found_block, diff, end_of_found_block; uint freed_size= 0; + uint block_size= share->block_size; uchar *dir, *end; DBUG_ENTER("_ma_compact_block_page"); DBUG_PRINT("enter", ("rownr: %u min_read_from: %lu", rownr, (ulong) min_read_from)); DBUG_ASSERT(max_entry > 0 && - max_entry < (block_size - PAGE_HEADER_SIZE - + max_entry < (block_size - PAGE_HEADER_SIZE(share) - PAGE_SUFFIX_SIZE) / DIR_ENTRY_SIZE); /* Move all entries before and including rownr up to start of page */ dir= dir_entry_pos(buff, block_size, rownr); end= dir_entry_pos(buff, block_size, 0); - page_pos= next_free_pos= start_of_found_block= PAGE_HEADER_SIZE; + page_pos= next_free_pos= start_of_found_block= PAGE_HEADER_SIZE(share); diff= 0; for (; dir <= end ; end-= DIR_ENTRY_SIZE) { @@ -1634,9 +1662,10 @@ void _ma_compact_block_page(uchar *buff, uint block_size, uint rownr, } buff[PAGE_TYPE_OFFSET]&= ~(uchar) PAGE_CAN_BE_COMPACTED; } - check_directory(buff, block_size, min_row_length, + check_directory(share, buff, block_size, min_row_length, extend_block ? 0 : (uint) -1); - DBUG_EXECUTE("directory", _ma_print_directory(DBUG_FILE, buff, block_size);); + DBUG_EXECUTE("directory", _ma_print_directory(share, + DBUG_FILE, buff, block_size);); DBUG_VOID_RETURN; } @@ -1661,7 +1690,7 @@ static void make_empty_page(MARIA_HA *info, uchar *buff, uint page_type, uint block_size= info->s->block_size; DBUG_ENTER("make_empty_page"); - bzero(buff, PAGE_HEADER_SIZE); + bzero(buff, PAGE_HEADER_SIZE(info->s)); #if !defined(DONT_ZERO_PAGE_BLOCKS) || defined(HAVE_valgrind) /* @@ -1670,7 +1699,8 @@ static void make_empty_page(MARIA_HA *info, uchar *buff, uint page_type, The code does not assume the block is zeroed. */ if (page_type != BLOB_PAGE) - bzero(buff+ PAGE_HEADER_SIZE, block_size - PAGE_HEADER_SIZE); + bzero(buff+ PAGE_HEADER_SIZE(info->s), + block_size - PAGE_HEADER_SIZE(info->s)); #endif buff[PAGE_TYPE_OFFSET]= (uchar) page_type; buff[DIR_COUNT_OFFSET]= (int) create_dir_entry; @@ -1679,7 +1709,7 @@ static void make_empty_page(MARIA_HA *info, uchar *buff, uint page_type, { /* Create directory entry to point to start of page with size 0 */ buff+= block_size - PAGE_SUFFIX_SIZE - DIR_ENTRY_SIZE; - int2store(buff, PAGE_HEADER_SIZE); + int2store(buff, PAGE_HEADER_SIZE(info->s)); int2store(buff+2, 0); } DBUG_VOID_RETURN; @@ -1738,8 +1768,8 @@ static my_bool get_head_or_tail_page(MARIA_HA *info, /* New page */ make_empty_page(info, buff, page_type, 1); res->buff= buff; - res->empty_space= res->length= (block_size - PAGE_OVERHEAD_SIZE); - res->data= (buff + PAGE_HEADER_SIZE); + res->empty_space= res->length= (block_size - PAGE_OVERHEAD_SIZE(share)); + res->data= (buff + PAGE_HEADER_SIZE(share)); res->dir= res->data + res->length; res->rownr= 0; DBUG_ASSERT(length <= res->length); @@ -1759,16 +1789,17 @@ static my_bool get_head_or_tail_page(MARIA_HA *info, DBUG_ASSERT((uint) (res->buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == page_type); - if (!(dir= find_free_position(page_type == HEAD_PAGE ? info : 0, - res->buff, block_size, &res->rownr, - &res->length, &res->empty_space))) + if (!(dir= find_free_position(info, res->buff, block_size, &res->rownr, + &res->length, &res->empty_space, + page_type == HEAD_PAGE))) goto crashed; if (res->length < length) { if (res->empty_space + res->length >= length) { - _ma_compact_block_page(res->buff, block_size, res->rownr, 1, + _ma_compact_block_page(share, + res->buff, res->rownr, 1, (page_type == HEAD_PAGE ? info->trn->min_read_from : 0), (page_type == HEAD_PAGE ? @@ -1839,7 +1870,7 @@ static my_bool get_rowpos_in_head_or_tail_page(MARIA_HA *info, { /* New page */ make_empty_page(info, buff, page_type, 0); - res->empty_space= block_size - PAGE_HEADER_SIZE - PAGE_SUFFIX_SIZE; + res->empty_space= block_size - PAGE_HEADER_SIZE(share) - PAGE_SUFFIX_SIZE; } else { @@ -1861,8 +1892,9 @@ static my_bool get_rowpos_in_head_or_tail_page(MARIA_HA *info, max_entry= (uint) buff[DIR_COUNT_OFFSET]; if (max_entry <= rownr) { - if (extend_directory(page_type == HEAD_PAGE ? info : 0, buff, block_size, - max_entry, rownr, &res->empty_space)) + if (extend_directory(info, buff, block_size, + max_entry, rownr, &res->empty_space, + page_type == HEAD_PAGE)) goto err; } @@ -1872,9 +1904,9 @@ static my_bool get_rowpos_in_head_or_tail_page(MARIA_HA *info, */ dir= dir_entry_pos(buff, block_size, rownr); - if (extend_area_on_page(page_type == HEAD_PAGE ? info : 0, buff, dir, - rownr, block_size, length, - &res->empty_space, &rec_offset, &max_length)) + if (extend_area_on_page(info, buff, dir, rownr, length, + &res->empty_space, &rec_offset, &max_length, + page_type == HEAD_PAGE)) goto err; res->buff= buff; @@ -2085,7 +2117,7 @@ static my_bool write_full_pages(MARIA_HA *info, pgcache_page_no_t page; MARIA_SHARE *share= info->s; uint block_size= share->block_size; - uint data_size= FULL_PAGE_SIZE(block_size); + uint data_size= FULL_PAGE_SIZE(share); uchar *buff= info->keyread_buff; uint page_count, sub_blocks; my_off_t position, max_position; @@ -2126,8 +2158,10 @@ static my_bool write_full_pages(MARIA_HA *info, } lsn_store(buff, lsn); buff[PAGE_TYPE_OFFSET]= (uchar) BLOB_PAGE; + bzero(buff + LSN_SIZE + PAGE_TYPE_SIZE, + FULL_PAGE_HEADER_SIZE(share) - (LSN_SIZE + PAGE_TYPE_SIZE)); copy_length= MY_MIN(data_size, length); - memcpy(buff + LSN_SIZE + PAGE_TYPE_SIZE, data, copy_length); + memcpy(buff + FULL_PAGE_HEADER_SIZE(share), data, copy_length); length-= copy_length; /* @@ -2163,7 +2197,6 @@ static my_bool write_full_pages(MARIA_HA *info, store_page_range() to Store data here block Where pages are to be written - block_size block size length Length of data to be written Normally this is full pages, except for the last tail block that may only partly fit the last page. @@ -2182,11 +2215,12 @@ static my_bool write_full_pages(MARIA_HA *info, # end position for 'to' */ -static uchar *store_page_range(uchar *to, MARIA_BITMAP_BLOCK *block, - uint block_size, ulong length, +static uchar *store_page_range(MARIA_SHARE *share, + uchar *to, MARIA_BITMAP_BLOCK *block, + ulong length, uint *tot_ranges) { - uint data_size= FULL_PAGE_SIZE(block_size); + uint data_size= FULL_PAGE_SIZE(share); ulong pages_left= (length + data_size -1) / data_size; uint page_count, ranges, empty_space; uchar *to_start; @@ -2853,7 +2887,8 @@ static my_bool write_block_record(MARIA_HA *info, head_block->empty_space= 0; /* Page is full */ head_block->used|= BLOCKUSED_USED; - check_directory(page_buff, share->block_size, share->base.min_block_length, + check_directory(share, + page_buff, share->block_size, share->base.min_block_length, (uint) -1); /* @@ -2889,7 +2924,7 @@ static my_bool write_block_record(MARIA_HA *info, uint length; length= column->length - portable_sizeof_char_ptr; memcpy(&blob_pos, record + column->offset + length, sizeof(char*)); - length= *blob_lengths % FULL_PAGE_SIZE(block_size); /* tail size */ + length= *blob_lengths % FULL_PAGE_SIZE(share); /* tail size */ if (length != *blob_lengths) blob_full_pages_exists= 1; if (write_tail(info, block + block->sub_blocks-1, @@ -2956,7 +2991,7 @@ static my_bool write_block_record(MARIA_HA *info, we find the empty page block. */ while (data_length >= (length= (cur_block->page_count * - FULL_PAGE_SIZE(block_size))) && + FULL_PAGE_SIZE(share))) && cur_block->page_count) { #ifdef SANITY_CHECKS @@ -3016,7 +3051,7 @@ static my_bool write_block_record(MARIA_HA *info, } else { - DBUG_ASSERT(data_length < length - FULL_PAGE_SIZE(block_size)); + DBUG_ASSERT(data_length < length - FULL_PAGE_SIZE(share)); DBUG_PRINT("info", ("Splitting blocks into full and tail")); cur_block[1].page= (cur_block->page + cur_block->page_count - 1); cur_block[1].page_count= 1; /* Avoid DBUG_ASSERT */ @@ -3052,7 +3087,7 @@ static my_bool write_block_record(MARIA_HA *info, ulong block_length= (ulong) (tmp_data - info->rec_buff); uchar *extent_data; - length= (uint) (block_length % FULL_PAGE_SIZE(block_size)); + length= (uint) (block_length % FULL_PAGE_SIZE(share)); if (write_tail(info, head_tail_block, info->rec_buff + block_length - length, length)) @@ -3216,7 +3251,8 @@ static my_bool write_block_record(MARIA_HA *info, /* Full head page */ translog_size_t block_length= (translog_size_t) (tmp_data - info->rec_buff); - log_pos= store_page_range(log_pos, head_block+1, block_size, + log_pos= store_page_range(share, + log_pos, head_block+1, (ulong) block_length, &extents); log_array_pos->str= info->rec_buff; log_array_pos->length= block_length; @@ -3245,7 +3281,7 @@ static my_bool write_block_record(MARIA_HA *info, reflect this */ if (tmp_block[tmp_block->sub_blocks - 1].used & BLOCKUSED_TAIL) - blob_length-= (blob_length % FULL_PAGE_SIZE(block_size)); + blob_length-= (blob_length % FULL_PAGE_SIZE(share)); if (blob_length) { memcpy((void*) &log_array_pos->str, @@ -3256,7 +3292,8 @@ static my_bool write_block_record(MARIA_HA *info, log_array_pos++; sub_extents++; - log_pos= store_page_range(log_pos, tmp_block, block_size, + log_pos= store_page_range(share, + log_pos, tmp_block, blob_length, &extents); } tmp_block+= tmp_block->sub_blocks; @@ -3418,7 +3455,7 @@ static my_bool write_block_record(MARIA_HA *info, /* remove tail part */ blob_length= *blob_lengths; if (block[block->sub_blocks - 1].used & BLOCKUSED_TAIL) - blob_length-= (blob_length % FULL_PAGE_SIZE(block_size)); + blob_length-= (blob_length % FULL_PAGE_SIZE(share)); if (blob_length && write_full_pages(info, lsn, block, blob_pos, blob_length)) @@ -3734,9 +3771,9 @@ static my_bool _ma_update_block_record2(MARIA_HA *info, */ block.org_bitmap_value= _ma_free_size_to_head_pattern(&share->bitmap, org_empty_size); - if (extend_area_on_page(info, buff, dir, rownr, block_size, + if (extend_area_on_page(info, buff, dir, rownr, new_row->total_length, &org_empty_size, - &rec_offset, &length)) + &rec_offset, &length, 1)) { errpos= 1; goto err; @@ -3803,7 +3840,8 @@ static my_bool _ma_update_block_record2(MARIA_HA *info, (new_row->total_length <= head_length && org_empty_size + head_length >= new_row->total_length))) { - _ma_compact_block_page(buff, block_size, rownr, 1, + _ma_compact_block_page(share, + buff, rownr, 1, info->trn->min_read_from, share->base.min_block_length); org_empty_size= 0; @@ -3914,9 +3952,9 @@ static my_bool _ma_update_at_original_place(MARIA_HA *info, of the row */ empty_size= org_empty_size; - if (extend_area_on_page(info, buff, dir, rownr, block_size, + if (extend_area_on_page(info, buff, dir, rownr, length_on_head_page, &empty_size, - &rec_offset, &length)) + &rec_offset, &length, 1)) goto err; row_pos.buff= buff; @@ -3991,7 +4029,6 @@ my_bool _ma_update_block_record(MARIA_HA *info, MARIA_RECORD_POS record_pos, SYNOPSIS delete_dir_entry() buff Page buffer - block_size Block size record_number Record number to delete empty_space Empty space on page after delete @@ -4001,9 +4038,11 @@ my_bool _ma_update_block_record(MARIA_HA *info, MARIA_RECORD_POS record_pos, 1 Page is now empty */ -static int delete_dir_entry(uchar *buff, uint block_size, uint record_number, +static int delete_dir_entry(MARIA_SHARE *share, + uchar *buff, uint record_number, uint *empty_space_res) { + uint block_size= share->block_size; uint number_of_records= (uint) buff[DIR_COUNT_OFFSET]; uint length, empty_space; uchar *dir; @@ -4023,7 +4062,7 @@ static int delete_dir_entry(uchar *buff, uint block_size, uint record_number, } #endif - check_directory(buff, block_size, 0, (uint) -1); + check_directory(share, buff, block_size, 0, (uint) -1); empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET); dir= dir_entry_pos(buff, block_size, record_number); length= uint2korr(dir + 2); /* Length of entry we just deleted */ @@ -4099,7 +4138,7 @@ static int delete_dir_entry(uchar *buff, uint block_size, uint record_number, *empty_space_res= empty_space; - check_directory(buff, block_size, 0, empty_space); + check_directory(share, buff, block_size, 0, empty_space); DBUG_RETURN(0); } @@ -4161,7 +4200,7 @@ static my_bool delete_head_or_tail(MARIA_HA *info, lock_at_unpin= PAGECACHE_LOCK_READ_UNLOCK; } - res= delete_dir_entry(buff, share->block_size, record_number, &empty_space); + res= delete_dir_entry(share, buff, record_number, &empty_space); if (res < 0) DBUG_RETURN(1); if (res == 0) /* after our deletion, page is still not empty */ @@ -4378,9 +4417,10 @@ err: In this case *end_of_data is set. */ -static uchar *get_record_position(uchar *buff, uint block_size, +static uchar *get_record_position(MARIA_SHARE *share, uchar *buff, uint record_number, uchar **end_of_data) { + uint block_size= share->block_size; uint number_of_records= (uint) buff[DIR_COUNT_OFFSET]; uchar *dir; uchar *data; @@ -4388,8 +4428,8 @@ static uchar *get_record_position(uchar *buff, uint block_size, #ifdef SANITY_CHECKS if (record_number >= number_of_records || - record_number > ((block_size - PAGE_HEADER_SIZE - PAGE_SUFFIX_SIZE) / - DIR_ENTRY_SIZE)) + record_number > ((block_size - PAGE_HEADER_SIZE(share) - PAGE_SUFFIX_SIZE) + / DIR_ENTRY_SIZE)) { DBUG_PRINT("error", ("Wrong row number: record_number: %u number_of_records: %u", @@ -4402,7 +4442,7 @@ static uchar *get_record_position(uchar *buff, uint block_size, offset= uint2korr(dir); length= uint2korr(dir + 2); #ifdef SANITY_CHECKS - if (offset < PAGE_HEADER_SIZE || + if (offset < PAGE_HEADER_SIZE(share) || offset + length > (block_size - number_of_records * DIR_ENTRY_SIZE - PAGE_SUFFIX_SIZE)) @@ -4532,7 +4572,7 @@ static uchar *read_next_extent(MARIA_HA *info, MARIA_EXTENT_CURSOR *extent, extent->page_count--; *end_of_data= buff + share->block_size - PAGE_SUFFIX_SIZE; info->cur_row.full_page_count++; /* For maria_chk */ - DBUG_RETURN(extent->data_start= buff + LSN_SIZE + PAGE_TYPE_SIZE); + DBUG_RETURN(extent->data_start= buff + FULL_PAGE_HEADER_SIZE(share)); } /* Found tail */ @@ -4542,7 +4582,7 @@ static uchar *read_next_extent(MARIA_HA *info, MARIA_EXTENT_CURSOR *extent, extent->tail_row_nr); info->cur_row.tail_count++; /* For maria_chk */ - if (!(data= get_record_position(buff, share->block_size, + if (!(data= get_record_position(share, buff, extent->tail_row_nr, end_of_data))) goto crashed; @@ -5013,7 +5053,7 @@ static my_bool read_row_extent_info(MARIA_HA *info, uchar *buff, uchar *extents, *end; DBUG_ENTER("read_row_extent_info"); - if (!(data= get_record_position(buff, share->block_size, + if (!(data= get_record_position(share, buff, record_number, &end_of_data))) DBUG_RETURN(1); /* Wrong in record */ @@ -5107,7 +5147,6 @@ int _ma_read_block_record(MARIA_HA *info, uchar *record, MARIA_SHARE *share= info->s; uchar *data, *end_of_data, *buff; uint offset; - uint block_size= share->block_size; int ret; DBUG_ENTER("_ma_read_block_record"); DBUG_PRINT("enter", ("rowid: %lu page: %lu rownr: %u", @@ -5123,7 +5162,7 @@ int _ma_read_block_record(MARIA_HA *info, uchar *record, PAGECACHE_LOCK_LEFT_UNLOCKED, 0))) DBUG_RETURN(my_errno); DBUG_ASSERT((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == HEAD_PAGE); - if (!(data= get_record_position(buff, block_size, offset, &end_of_data))) + if (!(data= get_record_position(share, buff, offset, &end_of_data))) { DBUG_ASSERT(!maria_assert_if_crashed_table); DBUG_PRINT("error", ("Wrong directory entry in data block")); @@ -5413,10 +5452,11 @@ restart_record_read: info->scan.dir-= DIR_ENTRY_SIZE; /* Point to next row to process */ #ifdef SANITY_CHECKS if (end_of_data > info->scan.dir_end || - offset < PAGE_HEADER_SIZE || length < share->base.min_block_length) + offset < PAGE_HEADER_SIZE(share) || + length < share->base.min_block_length) { DBUG_ASSERT(!(end_of_data > info->scan.dir_end)); - DBUG_ASSERT(!(offset < PAGE_HEADER_SIZE)); + DBUG_ASSERT(!(offset < PAGE_HEADER_SIZE(share))); DBUG_ASSERT(!(length < share->base.min_block_length)); goto err; } @@ -6318,8 +6358,8 @@ uint _ma_apply_redo_insert_row_head_or_tail(MARIA_HA *info, LSN lsn, buff= info->keyread_buff; info->keyread_buff_used= 1; make_empty_page(info, buff, page_type, 1); - empty_space= (block_size - PAGE_OVERHEAD_SIZE); - rec_offset= PAGE_HEADER_SIZE; + empty_space= (block_size - PAGE_OVERHEAD_SIZE(share)); + rec_offset= PAGE_HEADER_SIZE(share); dir= buff+ block_size - PAGE_SUFFIX_SIZE - DIR_ENTRY_SIZE; } else @@ -6377,10 +6417,10 @@ uint _ma_apply_redo_insert_row_head_or_tail(MARIA_HA *info, LSN lsn, goto crashed_file; } make_empty_page(info, buff, page_type, 0); - empty_space= block_size - PAGE_HEADER_SIZE - PAGE_SUFFIX_SIZE; - (void) extend_directory(page_type == HEAD_PAGE ? info: 0, buff, - block_size, 0, rownr, &empty_space); - rec_offset= PAGE_HEADER_SIZE; + empty_space= block_size - PAGE_HEADER_SIZE(share) - PAGE_SUFFIX_SIZE; + (void) extend_directory(info, buff, block_size, 0, rownr, &empty_space, + page_type == HEAD_PAGE); + rec_offset= PAGE_HEADER_SIZE(share); dir= dir_entry_pos(buff, block_size, rownr); empty_space+= uint2korr(dir+2); } @@ -6396,14 +6436,13 @@ uint _ma_apply_redo_insert_row_head_or_tail(MARIA_HA *info, LSN lsn, if (max_entry <= rownr) { /* Add directory entry first in directory and data last on page */ - if (extend_directory(page_type == HEAD_PAGE ? info : 0, buff, - block_size, max_entry, rownr, &empty_space)) + if (extend_directory(info, buff, block_size, max_entry, rownr, + &empty_space, page_type == HEAD_PAGE)) goto crashed_file; } - if (extend_area_on_page(page_type == HEAD_PAGE ? info : 0, buff, - dir, rownr, block_size, + if (extend_area_on_page(info, buff, dir, rownr, (uint) data_length, &empty_space, - &rec_offset, &length)) + &rec_offset, &length, page_type == HEAD_PAGE)) goto crashed_file; } } @@ -6488,7 +6527,6 @@ uint _ma_apply_redo_purge_row_head_or_tail(MARIA_HA *info, LSN lsn, MARIA_SHARE *share= info->s; pgcache_page_no_t page; uint rownr, empty_space; - uint block_size= share->block_size; uchar *buff; int result; uint error; @@ -6535,7 +6573,7 @@ uint _ma_apply_redo_purge_row_head_or_tail(MARIA_HA *info, LSN lsn, DBUG_ASSERT((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == (uchar) page_type); - if (delete_dir_entry(buff, block_size, rownr, &empty_space) < 0) + if (delete_dir_entry(share, buff, rownr, &empty_space) < 0) { _ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); goto err; @@ -6753,7 +6791,7 @@ uint _ma_apply_redo_insert_row_blobs(MARIA_HA *info, { MARIA_SHARE *share= info->s; const uchar *data; - uint data_size= FULL_PAGE_SIZE(share->block_size); + uint data_size= FULL_PAGE_SIZE(share); uint blob_count, ranges; uint16 sid; pgcache_page_no_t first_page2= ULONGLONG_MAX, last_page2= 0; @@ -6885,6 +6923,8 @@ uint _ma_apply_redo_insert_row_blobs(MARIA_HA *info, */ lsn_store(buff, lsn); buff[PAGE_TYPE_OFFSET]= BLOB_PAGE; + bzero(buff + LSN_SIZE + PAGE_TYPE_SIZE, + FULL_PAGE_HEADER_SIZE(share) - (LSN_SIZE + PAGE_TYPE_SIZE)); if (data_on_page != data_size) { @@ -6895,7 +6935,7 @@ uint _ma_apply_redo_insert_row_blobs(MARIA_HA *info, bzero(buff + share->block_size - PAGE_SUFFIX_SIZE - empty_space, empty_space); } - memcpy(buff+ PAGE_TYPE_OFFSET + 1, data, data_on_page); + memcpy(buff + FULL_PAGE_HEADER_SIZE(share), data, data_on_page); if (pagecache_write(share->pagecache, &info->dfile, page, 0, buff, PAGECACHE_PLAIN_PAGE, @@ -7499,7 +7539,7 @@ void maria_ignore_trids(MARIA_HA *info) /* The following functions are useful to call from debugger */ -void _ma_print_block_info(uchar *buff) +void _ma_print_block_info(MARIA_SHARE *share, uchar *buff) { LSN lsn= lsn_korr(buff); @@ -7512,6 +7552,6 @@ void _ma_print_block_info(uchar *buff) printf("Start of directory: %lu\n", maria_block_size - PAGE_SUFFIX_SIZE - (uint) buff[DIR_COUNT_OFFSET] * DIR_ENTRY_SIZE); - _ma_print_directory(stdout, buff, maria_block_size); + _ma_print_directory(share, stdout, buff, maria_block_size); } #endif diff --git a/storage/maria/ma_blockrec.h b/storage/maria/ma_blockrec.h index 40ca2591236..1c2ba57bf07 100644 --- a/storage/maria/ma_blockrec.h +++ b/storage/maria/ma_blockrec.h @@ -23,14 +23,27 @@ #define EMPTY_SPACE_SIZE 2 /* Stores empty space on page */ #define PAGE_TYPE_SIZE 1 #define PAGE_SUFFIX_SIZE 4 /* Bytes for checksum */ -#define PAGE_HEADER_SIZE (LSN_SIZE + DIR_COUNT_SIZE + DIR_FREE_SIZE +\ +#define PAGE_HEADER_SIZE_RAW (LSN_SIZE + DIR_COUNT_SIZE + DIR_FREE_SIZE + \ EMPTY_SPACE_SIZE + PAGE_TYPE_SIZE) -#define PAGE_OVERHEAD_SIZE (PAGE_HEADER_SIZE + DIR_ENTRY_SIZE + \ + +#define PAGE_HEADER_SIZE(share) (PAGE_HEADER_SIZE_RAW + \ + (share)->crypt_page_header_space) + +#define PAGE_OVERHEAD_SIZE_RAW (PAGE_HEADER_SIZE_RAW + DIR_ENTRY_SIZE + \ PAGE_SUFFIX_SIZE) +#define PAGE_OVERHEAD_SIZE(share) (PAGE_OVERHEAD_SIZE_RAW + \ + (share)->crypt_page_header_space) + #define BLOCK_RECORD_POINTER_SIZE 6 -#define FULL_PAGE_SIZE(block_size) ((block_size) - LSN_SIZE - \ - PAGE_TYPE_SIZE - PAGE_SUFFIX_SIZE) +#define FULL_PAGE_HEADER_SIZE(share) (LSN_SIZE + PAGE_TYPE_SIZE + \ + (share)->crypt_page_header_space) +#define FULL_PAGE_SIZE(share) ((share)->block_size - \ + FULL_PAGE_HEADER_SIZE(share) - \ + PAGE_SUFFIX_SIZE) + +#define FULL_PAGE_SIZE2(block_size, crypt_size) \ + ((block_size) - (LSN_SIZE + PAGE_TYPE_SIZE + PAGE_SUFFIX_SIZE + (crypt_size))) #define ROW_EXTENT_PAGE_SIZE 5 #define ROW_EXTENT_COUNT_SIZE 2 @@ -68,6 +81,9 @@ enum en_page_type { UNALLOCATED_PAGE, HEAD_PAGE, TAIL_PAGE, BLOB_PAGE, MAX_PAGE_ #define DIR_COUNT_OFFSET (LSN_SIZE+PAGE_TYPE_SIZE) #define DIR_FREE_OFFSET (DIR_COUNT_OFFSET+DIR_COUNT_SIZE) #define EMPTY_SPACE_OFFSET (DIR_FREE_OFFSET+DIR_FREE_SIZE) + /* for encryption */ +#define KEY_VERSION_OFFSET (EMPTY_SPACE_OFFSET+EMPTY_SPACE_SIZE) +#define FULL_PAGE_KEY_VERSION_OFFSET (PAGE_TYPE_OFFSET + PAGE_TYPE_SIZE) /* Bits used for flag uchar (one byte, first in record) */ #define ROW_FLAG_TRANSID 1 @@ -176,7 +192,7 @@ my_bool _ma_write_block_record(MARIA_HA *info, const uchar *record); my_bool _ma_write_abort_block_record(MARIA_HA *info); my_bool _ma_compare_block_record(register MARIA_HA *info, register const uchar *record); -void _ma_compact_block_page(uchar *buff, uint block_size, uint rownr, +void _ma_compact_block_page(MARIA_SHARE *share, uchar *buff, uint rownr, my_bool extend_block, TrID min_read_from, uint min_row_length); my_bool enough_free_entries_on_page(MARIA_SHARE *share, uchar *page_buff); diff --git a/storage/maria/ma_check.c b/storage/maria/ma_check.c index 0a89babb205..e582f0e166e 100644 --- a/storage/maria/ma_check.c +++ b/storage/maria/ma_check.c @@ -1583,7 +1583,7 @@ static int check_page_layout(HA_CHECK *param, MARIA_HA *info, block_size= info->s->block_size; empty= 0; - last_row_end= PAGE_HEADER_SIZE; + last_row_end= PAGE_HEADER_SIZE(info->s); *real_rows_found= 0; /* Check free directory list */ @@ -1936,7 +1936,7 @@ static int check_block_record(HA_CHECK *param, MARIA_HA *info, int extend, row_count= page_buff[DIR_COUNT_OFFSET]; empty_space= uint2korr(page_buff + EMPTY_SPACE_OFFSET); param->used+= block_size - empty_space; - param->link_used+= (PAGE_HEADER_SIZE + PAGE_SUFFIX_SIZE + + param->link_used+= (PAGE_HEADER_SIZE(info->s) + PAGE_SUFFIX_SIZE + row_count * DIR_ENTRY_SIZE); if (empty_space < share->bitmap.sizes[3]) param->lost+= empty_space; @@ -1950,7 +1950,7 @@ static int check_block_record(HA_CHECK *param, MARIA_HA *info, int extend, row_count= page_buff[DIR_COUNT_OFFSET]; empty_space= uint2korr(page_buff + EMPTY_SPACE_OFFSET); param->used+= block_size - empty_space; - param->link_used+= (PAGE_HEADER_SIZE + PAGE_SUFFIX_SIZE + + param->link_used+= (PAGE_HEADER_SIZE(info->s) + PAGE_SUFFIX_SIZE + row_count * DIR_ENTRY_SIZE); if (empty_space < share->bitmap.sizes[6]) param->lost+= empty_space; @@ -1964,7 +1964,7 @@ static int check_block_record(HA_CHECK *param, MARIA_HA *info, int extend, full_page_count++; full_dir= 0; empty_space= block_size; /* for error reporting */ - param->link_used+= (LSN_SIZE + PAGE_TYPE_SIZE); + param->link_used+= FULL_PAGE_HEADER_SIZE(info->s); param->used+= block_size; break; } @@ -3191,17 +3191,23 @@ err2: /** - @brief put CRC on the page + @brief write a page directly to index file - @param buff reference on the page buffer. - @param pos position of the page in the file. - @param length length of the page */ -static void put_crc(uchar *buff, my_off_t pos, MARIA_SHARE *share) +static int write_page(MARIA_SHARE *share, File file, + uchar *buff, uint block_size, + my_off_t pos, int myf_rw) { - maria_page_crc_set_index(buff, (pgcache_page_no_t) (pos / share->block_size), - (uchar*) share); + int res; + PAGECACHE_IO_HOOK_ARGS args; + args.page= buff; + args.pageno= (pgcache_page_no_t) (pos / share->block_size); + args.data= (uchar*) share; + (* share->kfile.pre_write_hook)(&args); + res= my_pwrite(file, args.page, block_size, pos, myf_rw); + (* share->kfile.post_write_hook)(res, &args); + return res; } @@ -3290,9 +3296,8 @@ static int sort_one_index(HA_CHECK *param, MARIA_HA *info, /* Fill block with zero and write it to the new index file */ length= page.size; bzero(buff+length,keyinfo->block_length-length); - put_crc(buff, new_page_pos, share); - if (my_pwrite(new_file, buff,(uint) keyinfo->block_length, - new_page_pos,MYF(MY_NABP | MY_WAIT_IF_FULL))) + if (write_page(share, new_file, buff, keyinfo->block_length, + new_page_pos, MYF(MY_NABP | MY_WAIT_IF_FULL))) { _ma_check_print_error(param,"Can't write indexblock, error: %d",my_errno); goto err; @@ -3483,7 +3488,8 @@ static my_bool maria_zerofill_data(HA_CHECK *param, MARIA_HA *info, { my_bool is_head_page= (page_type == HEAD_PAGE); dir= dir_entry_pos(buff, block_size, max_entry - 1); - _ma_compact_block_page(buff, block_size, max_entry -1, 0, + _ma_compact_block_page(share, + buff, max_entry -1, 0, is_head_page ? ~(TrID) 0 : 0, is_head_page ? share->base.min_block_length : 0); @@ -5791,9 +5797,8 @@ static int sort_insert_key(MARIA_SORT_PARAM *sort_param, } else { - put_crc(anc_buff, filepos, share); - if (my_pwrite(share->kfile.file, anc_buff, - (uint) keyinfo->block_length, filepos, param->myf_rw)) + if (write_page(share, share->kfile.file, anc_buff, + keyinfo->block_length, filepos, param->myf_rw)) DBUG_RETURN(1); } DBUG_DUMP("buff", anc_buff, _ma_get_page_used(share, anc_buff)); @@ -5921,9 +5926,8 @@ int _ma_flush_pending_blocks(MARIA_SORT_PARAM *sort_param) } else { - put_crc(key_block->buff, filepos, info->s); - if (my_pwrite(info->s->kfile.file, key_block->buff, - (uint) keyinfo->block_length,filepos, myf_rw)) + if (write_page(info->s, info->s->kfile.file, key_block->buff, + keyinfo->block_length, filepos, myf_rw)) goto err; } DBUG_DUMP("buff",key_block->buff,length); @@ -6700,7 +6704,8 @@ static int _ma_safe_scan_block_record(MARIA_SORT_INFO *sort_info, info->scan.dir-= DIR_ENTRY_SIZE; /* Point to previous row */ if (end_of_data > info->scan.dir_end || - offset < PAGE_HEADER_SIZE || length < share->base.min_block_length) + offset < PAGE_HEADER_SIZE(info->s) || + length < share->base.min_block_length) { _ma_check_print_info(sort_info->param, "Wrong directory entry %3u at page %s", diff --git a/storage/maria/ma_checkpoint.c b/storage/maria/ma_checkpoint.c index 4ed4e813cc9..ce24efc7014 100644 --- a/storage/maria/ma_checkpoint.c +++ b/storage/maria/ma_checkpoint.c @@ -36,7 +36,7 @@ #include "ma_checkpoint.h" #include "ma_loghandler_lsn.h" #include "ma_servicethread.h" - +#include "ma_crypt.h" /** @brief type of checkpoint currently running */ static CHECKPOINT_LEVEL checkpoint_in_progress= CHECKPOINT_NONE; @@ -1109,6 +1109,7 @@ static int collect_tables(LEX_STRING *str, LSN checkpoint_start_log_horizon) mysql_mutex_destroy(&share->intern_lock); mysql_mutex_unlock(&share->close_lock); mysql_mutex_destroy(&share->close_lock); + ma_crypt_free(share); my_free(share); } else @@ -1222,6 +1223,7 @@ err: { /* maria_close() left us to free the share */ mysql_mutex_destroy(&share->intern_lock); + ma_crypt_free(share); my_free(share); } else diff --git a/storage/maria/ma_close.c b/storage/maria/ma_close.c index 4532b029126..022da39002b 100644 --- a/storage/maria/ma_close.c +++ b/storage/maria/ma_close.c @@ -21,6 +21,7 @@ */ #include "maria_def.h" +#include "ma_crypt.h" int maria_close(register MARIA_HA *info) { @@ -240,6 +241,7 @@ int maria_close(register MARIA_HA *info) } if (share_can_be_freed) { + ma_crypt_free(share); (void) mysql_mutex_destroy(&share->intern_lock); (void) mysql_mutex_destroy(&share->close_lock); (void) mysql_cond_destroy(&share->key_del_cond); diff --git a/storage/maria/ma_create.c b/storage/maria/ma_create.c index 1176b2037b5..b7fcb6b7058 100644 --- a/storage/maria/ma_create.c +++ b/storage/maria/ma_create.c @@ -20,6 +20,7 @@ #include <my_bit.h> #include "ma_blockrec.h" #include "trnman_public.h" +#include "ma_crypt.h" #if defined(MSDOS) || defined(__WIN__) #ifdef __WIN__ @@ -72,6 +73,9 @@ int maria_create(const char *name, enum data_file_type datafile_type, my_bool forced_packed; myf sync_dir= 0; uchar *log_data= NULL; + my_bool encrypted= MY_TEST(flags & HA_CREATE_ENCRYPTED); + my_bool insert_order= MY_TEST(flags & HA_INSERT_ORDER); + uint crypt_page_header_space= 0; DBUG_ENTER("maria_create"); DBUG_PRINT("enter", ("keys: %u columns: %u uniques: %u flags: %u", keys, columns, uniques, flags)); @@ -140,6 +144,12 @@ int maria_create(const char *name, enum data_file_type datafile_type, forced_packed= 0; column_nr= 0; + if (encrypted) + { + DBUG_ASSERT(datafile_type == BLOCK_RECORD); + crypt_page_header_space= ma_crypt_get_data_page_header_space(); + } + for (column= columndef, end_column= column + columns ; column != end_column ; column++) @@ -160,7 +170,8 @@ int maria_create(const char *name, enum data_file_type datafile_type, if (type == FIELD_SKIP_PRESPACE) type= column->type= FIELD_NORMAL; /* SKIP_PRESPACE not supported */ if (type == FIELD_NORMAL && - column->length > FULL_PAGE_SIZE(maria_block_size)) + column->length > FULL_PAGE_SIZE2(maria_block_size, + crypt_page_header_space)) { /* FIELD_NORMAL can't be split over many blocks, convert to a CHAR */ type= column->type= FIELD_SKIP_ENDSPACE; @@ -256,6 +267,19 @@ int maria_create(const char *name, enum data_file_type datafile_type, datafile_type= BLOCK_RECORD; } + if (encrypted) + { + /* + datafile_type is set (finally?) + update encryption that is only supported for BLOCK_RECORD + */ + if (datafile_type != BLOCK_RECORD) + { + encrypted= FALSE; + crypt_page_header_space= 0; + } + } + if (datafile_type == DYNAMIC_RECORD) options|= HA_OPTION_PACK_RECORD; /* Must use packed records */ @@ -340,9 +364,9 @@ int maria_create(const char *name, enum data_file_type datafile_type, { if (datafile_type == BLOCK_RECORD) { - uint rows_per_page= ((maria_block_size - PAGE_OVERHEAD_SIZE) / - (min_pack_length + extra_header_size + - DIR_ENTRY_SIZE)); + uint rows_per_page= + ((maria_block_size - PAGE_OVERHEAD_SIZE_RAW - crypt_page_header_space) + / (min_pack_length + extra_header_size + DIR_ENTRY_SIZE)); ulonglong data_file_length= ci->data_file_length; if (!data_file_length) data_file_length= ((((ulonglong) 1 << ((BLOCK_RECORD_POINTER_SIZE-1) * @@ -665,7 +689,20 @@ int maria_create(const char *name, enum data_file_type datafile_type, (key_segs + unique_key_parts)*HA_KEYSEG_SIZE+ columns*(MARIA_COLUMNDEF_SIZE + 2)); - DBUG_PRINT("info", ("info_length: %u", info_length)); + if (encrypted) + { + share.base.extra_options|= MA_EXTRA_OPTIONS_ENCRYPTED; + + /* store crypt data in info */ + info_length+= ma_crypt_get_file_length(); + } + + if (insert_order) + { + share.base.extra_options|= MA_EXTRA_OPTIONS_INSERT_ORDER; + } + + DBUG_PRINT("info", ("info_length: %u", info_length)); /* There are only 16 bits for the total header length. */ if (info_length > 65535) { @@ -1003,6 +1040,13 @@ int maria_create(const char *name, enum data_file_type datafile_type, if (_ma_column_nr_write(file, column_array, columns)) goto err; + if (encrypted) + { + if (ma_crypt_create(&share) || + ma_crypt_write(&share, file)) + goto err; + } + if ((kfile_size_before_extension= mysql_file_tell(file,MYF(0))) == MY_FILEPOS_ERROR) goto err; #ifndef DBUG_OFF @@ -1178,6 +1222,7 @@ int maria_create(const char *name, enum data_file_type datafile_type, mysql_mutex_unlock(&THR_LOCK_maria); res= 0; my_free((char*) rec_per_key_part); + ma_crypt_free(&share); errpos=0; if (mysql_file_close(file,MYF(0))) res= my_errno; @@ -1208,6 +1253,7 @@ err_no_lock: MY_UNPACK_FILENAME | MY_APPEND_EXT), sync_dir); } + ma_crypt_free(&share); my_free(log_data); my_free(rec_per_key_part); DBUG_RETURN(my_errno=save_errno); /* return the fatal errno */ diff --git a/storage/maria/ma_crypt.c b/storage/maria/ma_crypt.c new file mode 100644 index 00000000000..89defe8cec9 --- /dev/null +++ b/storage/maria/ma_crypt.c @@ -0,0 +1,461 @@ +/* Copyright 2013 Google Inc. All Rights Reserved. */ + +#include <my_global.h> +#include "ma_crypt.h" +#include "maria_def.h" +#include "ma_blockrec.h" +#include <my_crypt.h> + +#define CRYPT_SCHEME_1 1 +#define CRYPT_SCHEME_1_ID_LEN 4 /* 4 bytes for counter-block */ +#define CRYPT_SCHEME_1_IV_LEN 16 +#define CRYPT_SCHEME_1_KEY_VERSION_SIZE 4 + +struct st_maria_crypt_data +{ + uchar type; + uchar iv_length; + uchar iv[1]; // var size +}; + +static +void +fatal(const char * fmt, ...) +{ + va_list args; + va_start(args,fmt); + vfprintf(stderr, fmt, args); + va_end(args); + abort(); +} + +uint +ma_crypt_get_data_page_header_space() +{ + return CRYPT_SCHEME_1_KEY_VERSION_SIZE; +} + +uint +ma_crypt_get_index_page_header_space(MARIA_SHARE *share) +{ + if (share->base.born_transactional) + { + return CRYPT_SCHEME_1_KEY_VERSION_SIZE; + } + else + { + /* if the index is not transactional, we add 7 bytes LSN anyway + to be used for counter block + */ + return LSN_STORE_SIZE + CRYPT_SCHEME_1_KEY_VERSION_SIZE; + } +} + +uint +ma_crypt_get_file_length() +{ + return 2 + CRYPT_SCHEME_1_IV_LEN + CRYPT_SCHEME_1_ID_LEN; +} + +int +ma_crypt_create(MARIA_SHARE* share) +{ + const uint iv_length= CRYPT_SCHEME_1_IV_LEN + CRYPT_SCHEME_1_ID_LEN; + const uint sz= sizeof(MARIA_CRYPT_DATA) + iv_length; + MARIA_CRYPT_DATA *crypt_data= (MARIA_CRYPT_DATA*)my_malloc(sz, MYF(0)); + bzero(crypt_data, sz); + crypt_data->type= CRYPT_SCHEME_1; + crypt_data->iv_length= iv_length; + my_random_bytes(crypt_data->iv, iv_length); + share->crypt_data= crypt_data; + share->crypt_page_header_space= CRYPT_SCHEME_1_KEY_VERSION_SIZE; + return 0; +} + +void +ma_crypt_free(MARIA_SHARE* share) +{ + if (share->crypt_data != NULL) + { + my_free(share->crypt_data); + share->crypt_data= NULL; + } +} + +int +ma_crypt_write(MARIA_SHARE* share, File file) +{ + uchar buff[2]; + MARIA_CRYPT_DATA *crypt_data= share->crypt_data; + if (crypt_data == 0) + return 0; + + buff[0] = crypt_data->type; + buff[1] = crypt_data->iv_length; + + if (mysql_file_write(file, buff, 2, MYF(MY_NABP))) + return 1; + + if (mysql_file_write(file, crypt_data->iv, crypt_data->iv_length, + MYF(MY_NABP))) + return 1; + + return 0; +} + +uchar* +ma_crypt_read(MARIA_SHARE* share, uchar *buff) +{ + uchar type= buff[0]; + uchar iv_length= buff[1]; + if (share->crypt_data == NULL) + { + /* opening a table */ + const uint sz= sizeof(MARIA_CRYPT_DATA) + iv_length; + MARIA_CRYPT_DATA *crypt_data= (MARIA_CRYPT_DATA*)my_malloc(sz, MYF(0)); + + crypt_data->type= type; + crypt_data->iv_length= iv_length; + memcpy(crypt_data->iv, buff + 2, iv_length); + share->crypt_data= crypt_data; + } + else + { + /* creating a table */ + assert(type == share->crypt_data->type); + assert(iv_length == share->crypt_data->iv_length); + } + /* currently only supported type */ + if (type != CRYPT_SCHEME_1) + { + fatal("Unsupported crypt scheme! type: %d iv_length: %d\n", + type, iv_length); + } + + share->crypt_page_header_space= CRYPT_SCHEME_1_KEY_VERSION_SIZE; + return buff + 2 + iv_length; +} + +static void ma_encrypt(MARIA_CRYPT_DATA *crypt_data, + const uchar *src, uchar *dst, uint size, + uint pageno, LSN lsn, uint *key_version); +static void ma_decrypt(MARIA_CRYPT_DATA *crypt_data, + const uchar *src, uchar *dst, uint size, + uint pageno, LSN lsn, uint key_version); + +static my_bool ma_crypt_pre_read_hook(PAGECACHE_IO_HOOK_ARGS *args) +{ + MARIA_SHARE *share= (MARIA_SHARE*) args->data; + uchar *crypt_buf= my_malloc(share->block_size, MYF(0)); + if (crypt_buf == NULL) + { + args->crypt_buf= NULL; /* for post-hook */ + return 1; + } + + /* swap pointers to read into crypt_buf */ + args->crypt_buf= args->page; + args->page= crypt_buf; + + return 0; +} + +static my_bool ma_crypt_data_post_read_hook(int res, + PAGECACHE_IO_HOOK_ARGS *args) +{ + MARIA_SHARE *share= (MARIA_SHARE*) args->data; + const uint size= share->block_size; + const uchar page_type= args->page[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK; + const uint32 key_version_offset= (page_type <= TAIL_PAGE) ? + KEY_VERSION_OFFSET : FULL_PAGE_KEY_VERSION_OFFSET; + + if (res == 0) + { + const uchar *src= args->page; + uchar* dst= args->crypt_buf; + uint pageno= (uint)args->pageno; + LSN lsn= lsn_korr(src); + const uint head= (page_type <= TAIL_PAGE) ? + PAGE_HEADER_SIZE(share) : FULL_PAGE_HEADER_SIZE(share); + const uint tail= CRC_SIZE; + const uint32 key_version= uint4korr(src + key_version_offset); + + /* 1 - copy head */ + memcpy(dst, src, head); + /* 2 - decrypt page */ + ma_decrypt(share->crypt_data, + src + head, dst + head, size - (head + tail), pageno, lsn, + key_version); + /* 3 - copy tail */ + memcpy(dst + size - tail, src + size - tail, tail); + /* 4 clear key version to get correct crc */ + int4store(dst + key_version_offset, 0); + } + + if (args->crypt_buf != NULL) + { + uchar *tmp= args->page; + args->page= args->crypt_buf; + args->crypt_buf= NULL; + my_free(tmp); + } + + return maria_page_crc_check_data(res, args); +} + +static void store_rand_lsn(uchar * page) +{ + LSN lsn = 0; + lsn+= rand(); + lsn<<= 32; + lsn+= rand(); + lsn_store(page, lsn); +} + +static my_bool ma_crypt_data_pre_write_hook(PAGECACHE_IO_HOOK_ARGS *args) +{ + MARIA_SHARE *share= (MARIA_SHARE*) args->data; + const uint size= share->block_size; + uint key_version; + uchar *crypt_buf= my_malloc(share->block_size, MYF(0)); + + if (crypt_buf == NULL) + { + args->crypt_buf= NULL; /* for post-hook */ + return 1; + } + + if (!share->now_transactional) + { + /* store a random number instead of LSN (for counter block) */ + store_rand_lsn(args->page); + } + + maria_page_crc_set_normal(args); + + { + const uchar *src= args->page; + uchar* dst= crypt_buf; + uint pageno= (uint)args->pageno; + LSN lsn= lsn_korr(src); + const uchar page_type= src[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK; + const uint head= (page_type <= TAIL_PAGE) ? + PAGE_HEADER_SIZE(share) : FULL_PAGE_HEADER_SIZE(share); + const uint tail= CRC_SIZE; + const uint32 key_version_offset= (page_type <= TAIL_PAGE) ? + KEY_VERSION_OFFSET : FULL_PAGE_KEY_VERSION_OFFSET; + + DBUG_ASSERT(page_type < MAX_PAGE_TYPE); + + /* 1 - copy head */ + memcpy(dst, src, head); + /* 2 - decrypt page */ + ma_encrypt(share->crypt_data, + src + head, dst + head, size - (head + tail), pageno, lsn, + &key_version); + /* 3 - copy tail */ + memcpy(dst + size - tail, src + size - tail, tail); + /* 4 - store key version */ + int4store(dst + key_version_offset, key_version); + } + + /* swap pointers to instead write out the encrypted block */ + args->crypt_buf= args->page; + args->page= crypt_buf; + + return 0; +} + +static void ma_crypt_post_write_hook(int res, + PAGECACHE_IO_HOOK_ARGS *args) +{ + if (args->crypt_buf != NULL) + { + uchar *tmp= args->page; + args->page= args->crypt_buf; + args->crypt_buf= NULL; + my_free(tmp); + } + + maria_page_write_failure(res, args); +} + +void ma_crypt_set_data_pagecache_callbacks(PAGECACHE_FILE *file, + MARIA_SHARE *share + __attribute__((unused))) +{ + /* Only use encryption if we have defined it */ + if (likely(current_aes_dynamic_method != MY_AES_ALGORITHM_NONE)) + { + file->pre_read_hook= ma_crypt_pre_read_hook; + file->post_read_hook= ma_crypt_data_post_read_hook; + file->pre_write_hook= ma_crypt_data_pre_write_hook; + file->post_write_hook= ma_crypt_post_write_hook; + } +} + +static my_bool ma_crypt_index_post_read_hook(int res, + PAGECACHE_IO_HOOK_ARGS *args) +{ + MARIA_SHARE *share= (MARIA_SHARE*) args->data; + const uint block_size= share->block_size; + const uint page_used= _ma_get_page_used(share, args->page); + + if (res == 0 && page_used <= block_size - CRC_SIZE) + { + const uchar *src= args->page; + uchar* dst= args->crypt_buf; + uint pageno= (uint)args->pageno; + LSN lsn= lsn_korr(src); + const uint head= share->keypage_header; + const uint tail= CRC_SIZE; + const uint32 key_version= _ma_get_key_version(share, src); + /* page_used includes header (but not trailer) */ + const uint size= page_used - head; + + /* 1 - copy head */ + memcpy(dst, src, head); + /* 2 - decrypt page */ + ma_decrypt(share->crypt_data, + src + head, dst + head, size, pageno, lsn, key_version); + /* 3 - copy tail */ + memcpy(dst + block_size - tail, src + block_size - tail, tail); + /* 4 clear key version to get correct crc */ + _ma_store_key_version(share, dst, 0); + } + + if (args->crypt_buf != NULL) + { + uchar *tmp= args->page; + args->page= args->crypt_buf; + args->crypt_buf= NULL; + my_free(tmp); + } + + return maria_page_crc_check_index(res, args); +} + +static my_bool ma_crypt_index_pre_write_hook(PAGECACHE_IO_HOOK_ARGS *args) +{ + MARIA_SHARE *share= (MARIA_SHARE*) args->data; + const uint block_size= share->block_size; + const uint page_used= _ma_get_page_used(share, args->page); + uint key_version; + uchar *crypt_buf= my_malloc(block_size, MYF(0)); + if (crypt_buf == NULL) + { + args->crypt_buf= NULL; /* for post-hook */ + return 1; + } + + if (!share->now_transactional) + { + /* store a random number instead of LSN (for counter block) */ + store_rand_lsn(args->page); + } + + maria_page_crc_set_index(args); + + { + const uchar *src= args->page; + uchar* dst= crypt_buf; + uint pageno= (uint)args->pageno; + LSN lsn= lsn_korr(src); + const uint head= share->keypage_header; + const uint tail= CRC_SIZE; + /* page_used includes header (but not trailer) */ + const uint size= page_used - head; + + /* 1 - copy head */ + memcpy(dst, src, head); + /* 2 - decrypt page */ + ma_encrypt(share->crypt_data, + src + head, dst + head, size, pageno, lsn, &key_version); + /* 3 - copy tail */ + memcpy(dst + block_size - tail, src + block_size - tail, tail); + /* 4 - store key version */ + _ma_store_key_version(share, dst, key_version); + } + + /* swap pointers to instead write out the encrypted block */ + args->crypt_buf= args->page; + args->page= crypt_buf; + + return 0; +} + +void ma_crypt_set_index_pagecache_callbacks(PAGECACHE_FILE *file, + MARIA_SHARE *share + __attribute__((unused))) +{ + file->pre_read_hook= ma_crypt_pre_read_hook; + file->post_read_hook= ma_crypt_index_post_read_hook; + file->pre_write_hook= ma_crypt_index_pre_write_hook; + file->post_write_hook= ma_crypt_post_write_hook; +} + +#define COUNTER_LEN MY_AES_BLOCK_SIZE + +static void ma_encrypt(MARIA_CRYPT_DATA *crypt_data, + const uchar *src, uchar *dst, uint size, + uint pageno, LSN lsn, + uint *key_version) +{ + int rc; + uint32 dstlen; + uchar counter[COUNTER_LEN]; + uchar *key= crypt_data->iv; + + // create counter block + memcpy(counter + 0, crypt_data->iv + CRYPT_SCHEME_1_IV_LEN, 4); + int4store(counter + 4, pageno); + int8store(counter + 8, lsn); + + rc = my_aes_encrypt_dynamic(src, size, + dst, &dstlen, + key, sizeof(crypt_data->iv), + counter, sizeof(counter), + 1); + + DBUG_ASSERT(rc == AES_OK); + DBUG_ASSERT(dstlen == size); + if (! (rc == AES_OK && dstlen == size)) + { + fatal("failed to encrypt! rc: %d, dstlen: %d size: %d\n", + rc, dstlen, (int)size); + } + + *key_version= 1; +} + +static void ma_decrypt(MARIA_CRYPT_DATA *crypt_data, + const uchar *src, uchar *dst, uint size, + uint pageno, LSN lsn, + uint key_version) +{ + int rc; + uint32 dstlen; + uchar counter[COUNTER_LEN]; + uchar *key= crypt_data->iv; + + // create counter block + memcpy(counter + 0, crypt_data->iv + CRYPT_SCHEME_1_IV_LEN, 4); + int4store(counter + 4, pageno); + int8store(counter + 8, lsn); + + rc = my_aes_decrypt_dynamic(src, size, + dst, &dstlen, + key, sizeof(crypt_data->iv), + counter, sizeof(counter), + 1); + + DBUG_ASSERT(rc == AES_OK); + DBUG_ASSERT(dstlen == size); + if (! (rc == AES_OK && dstlen == size)) + { + fatal("failed to decrypt! rc: %d, dstlen: %d size: %d\n", + rc, dstlen, (int)size); + } + + (void)key_version; +} diff --git a/storage/maria/ma_crypt.h b/storage/maria/ma_crypt.h new file mode 100644 index 00000000000..76752e19449 --- /dev/null +++ b/storage/maria/ma_crypt.h @@ -0,0 +1,26 @@ +/* Copyright 2013 Google Inc. All Rights Reserved. */ + +#ifndef _ma_crypt_h +#define _ma_crypt_h + +#include <my_global.h> + +struct st_maria_share; +struct st_pagecache_file; + +uint ma_crypt_get_data_page_header_space();/* bytes in data/index page header */ +uint ma_crypt_get_index_page_header_space(struct st_maria_share *); +uint ma_crypt_get_file_length(); /* bytes needed in file */ +int ma_crypt_create(struct st_maria_share *); /* create encryption data */ +int ma_crypt_write(struct st_maria_share *, File); /* write encryption data */ +uchar* ma_crypt_read(struct st_maria_share *, uchar *buff); /* read crypt data*/ + +void ma_crypt_set_data_pagecache_callbacks(struct st_pagecache_file *file, + struct st_maria_share *share); + +void ma_crypt_set_index_pagecache_callbacks(struct st_pagecache_file *file, + struct st_maria_share *share); + +void ma_crypt_free(struct st_maria_share *share); + +#endif diff --git a/storage/maria/ma_delete.c b/storage/maria/ma_delete.c index 31773ef2dfc..bcea1d4054d 100644 --- a/storage/maria/ma_delete.c +++ b/storage/maria/ma_delete.c @@ -1505,7 +1505,7 @@ my_bool _ma_log_delete(MARIA_PAGE *ma_page, const uchar *key_pos, /* Store keypage_flag */ *log_pos++= KEY_OP_SET_PAGEFLAG; - *log_pos++= ma_page->buff[KEYPAGE_TRANSFLAG_OFFSET]; + *log_pos++= _ma_get_keypage_flag(info->s, ma_page->buff); log_pos[0]= KEY_OP_OFFSET; int2store(log_pos+1, offset); diff --git a/storage/maria/ma_key_recover.c b/storage/maria/ma_key_recover.c index ae9427981ea..e0a7bd35322 100644 --- a/storage/maria/ma_key_recover.c +++ b/storage/maria/ma_key_recover.c @@ -340,7 +340,7 @@ my_bool _ma_log_prefix(MARIA_PAGE *ma_page, uint changed_length, /* Store keypage_flag */ *log_pos++= KEY_OP_SET_PAGEFLAG; - *log_pos++= buff[KEYPAGE_TRANSFLAG_OFFSET]; + *log_pos++= _ma_get_keypage_flag(info->s, buff); if (move_length < 0) { @@ -424,7 +424,7 @@ my_bool _ma_log_suffix(MARIA_PAGE *ma_page, uint org_length, uint new_length) /* Store keypage_flag */ *log_pos++= KEY_OP_SET_PAGEFLAG; - *log_pos++= buff[KEYPAGE_TRANSFLAG_OFFSET]; + *log_pos++= _ma_get_keypage_flag(info->s, buff); if ((diff= (int) (new_length - org_length)) < 0) { @@ -526,7 +526,7 @@ my_bool _ma_log_add(MARIA_PAGE *ma_page, /* Store keypage_flag */ *log_pos++= KEY_OP_SET_PAGEFLAG; - *log_pos++= buff[KEYPAGE_TRANSFLAG_OFFSET]; + *log_pos++= _ma_get_keypage_flag(info->s, buff); /* Don't overwrite page boundary @@ -667,7 +667,7 @@ void _ma_log_key_changes(MARIA_PAGE *ma_page, LEX_CUSTRING *log_array, uint org_length; ha_checksum crc; - DBUG_ASSERT(ma_page->flag == (uint) ma_page->buff[KEYPAGE_TRANSFLAG_OFFSET]); + DBUG_ASSERT(ma_page->flag == (uint) _ma_get_keypage_flag(share, ma_page->buff)); /* We have to change length as the page may have been shortened */ org_length= _ma_get_page_used(share, ma_page->buff); @@ -1155,7 +1155,7 @@ uint _ma_apply_redo_index(MARIA_HA *info, } case KEY_OP_SET_PAGEFLAG: DBUG_PRINT("redo", ("key_op_set_pageflag")); - buff[KEYPAGE_TRANSFLAG_OFFSET]= *header++; + _ma_store_keypage_flag(share, buff, *header++); break; case KEY_OP_COMPACT_PAGE: { diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c index edf819cee4d..1e7cc9483a2 100644 --- a/storage/maria/ma_loghandler.c +++ b/storage/maria/ma_loghandler.c @@ -420,12 +420,7 @@ static ulonglong flush_start= 0; /* an array that maps id of a MARIA_SHARE to this MARIA_SHARE */ static MARIA_SHARE **id_to_share= NULL; -static my_bool translog_dummy_callback(uchar *page, - pgcache_page_no_t page_no, - uchar* data_ptr); -static my_bool translog_page_validator(uchar *page, - pgcache_page_no_t page_no, - uchar* data_ptr); +static my_bool translog_page_validator(int res, PAGECACHE_IO_HOOK_ARGS *args); static my_bool translog_get_next_chunk(TRANSLOG_SCANNER_DATA *scanner); static uint32 translog_first_file(TRANSLOG_ADDRESS horizon, int is_protected); @@ -1565,17 +1560,6 @@ static my_bool translog_close_log_file(TRANSLOG_FILE *file) /** - @brief Dummy function for write failure (the log to not use - pagecache writing) -*/ - -void translog_dummy_write_failure(uchar *data __attribute__((unused))) -{ - return; -} - - -/** @brief Initializes TRANSLOG_FILE structure @param file reference on the file to initialize @@ -1586,10 +1570,11 @@ void translog_dummy_write_failure(uchar *data __attribute__((unused))) static void translog_file_init(TRANSLOG_FILE *file, uint32 number, my_bool is_sync) { - pagecache_file_init(file->handler, &translog_page_validator, - &translog_dummy_callback, - &translog_dummy_write_failure, - maria_flush_log_for_page_none, file); + pagecache_file_set_null_hooks(&file->handler); + file->handler.post_read_hook= translog_page_validator; + file->handler.flush_log_callback= maria_flush_log_for_page_none; + file->handler.callback_data= (uchar*)file; + file->number= number; file->was_recovered= 0; file->is_sync= is_sync; @@ -2786,19 +2771,6 @@ static my_bool translog_recover_page_up_to_sector(uchar *page, uint16 offset) /** - @brief Dummy write callback. -*/ - -static my_bool -translog_dummy_callback(uchar *page __attribute__((unused)), - pgcache_page_no_t page_no __attribute__((unused)), - uchar* data_ptr __attribute__((unused))) -{ - return 0; -} - - -/** @brief Checks and removes sector protection. @param page reference on the page content. @@ -2874,14 +2846,14 @@ translog_check_sector_protection(uchar *page, TRANSLOG_FILE *file) @retval 1 Error */ -static my_bool translog_page_validator(uchar *page, - pgcache_page_no_t page_no, - uchar* data_ptr) +static my_bool translog_page_validator(int res, PAGECACHE_IO_HOOK_ARGS *args) { + uchar *page= args->page; + pgcache_page_no_t page_no= args->pageno; uint this_page_page_overhead; uint flags; uchar *page_pos; - TRANSLOG_FILE *data= (TRANSLOG_FILE *) data_ptr; + TRANSLOG_FILE *data= (TRANSLOG_FILE *) args->data; #ifndef DBUG_OFF pgcache_page_no_t offset= page_no * TRANSLOG_PAGE_SIZE; #endif @@ -2889,6 +2861,11 @@ static my_bool translog_page_validator(uchar *page, data->was_recovered= 0; + if (res) + { + DBUG_RETURN(1); + } + if ((pgcache_page_no_t) uint3korr(page) != page_no || (uint32) uint3korr(page + 3) != data->number) { @@ -3153,9 +3130,11 @@ restart: This IF should be true because we use in-memory data which supposed to be correct. */ - if (translog_page_validator(buffer, - LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE, - (uchar*) &file_copy)) + PAGECACHE_IO_HOOK_ARGS args; + args.page= buffer; + args.pageno= LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE; + args.data= (uchar*) &file_copy; + if (translog_page_validator(0, &args)) { DBUG_ASSERT(0); buffer= NULL; @@ -9105,8 +9084,8 @@ static void dump_datapage(uchar *buffer, File handler) } } tfile.number= file; + bzero(&tfile.handler, sizeof(tfile.handler)); tfile.handler.file= handler; - pagecache_file_init(tfile.handler, NULL, NULL, NULL, NULL, NULL); tfile.was_recovered= 0; tfile.is_sync= 1; if (translog_check_sector_protection(buffer, &tfile)) diff --git a/storage/maria/ma_open.c b/storage/maria/ma_open.c index 1d274d796be..c83ad70914e 100644 --- a/storage/maria/ma_open.c +++ b/storage/maria/ma_open.c @@ -20,6 +20,7 @@ #include "ma_rt_index.h" #include "ma_blockrec.h" #include <m_ctype.h> +#include "ma_crypt.h" #if defined(MSDOS) || defined(__WIN__) #ifdef __WIN__ @@ -596,6 +597,12 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) LSN_STORE_SIZE + TRANSID_SIZE : 0) + KEYPAGE_KEYID_SIZE + KEYPAGE_FLAG_SIZE + KEYPAGE_USED_SIZE); + + if (MY_TEST(share->base.extra_options & MA_EXTRA_OPTIONS_ENCRYPTED)) + { + share->keypage_header+= ma_crypt_get_index_page_header_space(share); + } + { HA_KEYSEG *pos=share->keyparts; uint32 ftkey_nr= 1; @@ -829,6 +836,11 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) disk_pos= _ma_column_nr_read(disk_pos, share->column_nr, share->base.fields); + if (MY_TEST(share->base.extra_options & MA_EXTRA_OPTIONS_ENCRYPTED)) + { + disk_pos= ma_crypt_read(share, disk_pos); + } + if ((share->data_file_type == BLOCK_RECORD || share->data_file_type == COMPRESSED_RECORD)) { @@ -1040,6 +1052,7 @@ err: (*share->once_end)(share); /* fall through */ case 4: + ma_crypt_free(share); my_free(share); /* fall through */ case 3: @@ -1819,24 +1832,31 @@ uchar *_ma_column_nr_read(uchar *ptr, uint16 *offsets, uint columns) void _ma_set_data_pagecache_callbacks(PAGECACHE_FILE *file, MARIA_SHARE *share) { + pagecache_file_set_null_hooks(file); file->callback_data= (uchar*) share; file->flush_log_callback= &maria_flush_log_for_page_none; /* Do nothing */ + file->post_write_hook= maria_page_write_failure; if (share->temporary) { - file->read_callback= &maria_page_crc_check_none; - file->write_callback= &maria_page_filler_set_none; + file->post_read_hook= &maria_page_crc_check_none; + file->pre_write_hook= &maria_page_filler_set_none; } else { - file->read_callback= &maria_page_crc_check_data; + file->post_read_hook= &maria_page_crc_check_data; if (share->options & HA_OPTION_PAGE_CHECKSUM) - file->write_callback= &maria_page_crc_set_normal; + file->pre_write_hook= &maria_page_crc_set_normal; else - file->write_callback= &maria_page_filler_set_normal; + file->pre_write_hook= &maria_page_filler_set_normal; if (share->now_transactional) file->flush_log_callback= maria_flush_log_for_page; } + + if (MY_TEST(share->base.extra_options & MA_EXTRA_OPTIONS_ENCRYPTED)) + { + ma_crypt_set_data_pagecache_callbacks(file, share); + } } @@ -1851,26 +1871,32 @@ void _ma_set_data_pagecache_callbacks(PAGECACHE_FILE *file, void _ma_set_index_pagecache_callbacks(PAGECACHE_FILE *file, MARIA_SHARE *share) { + pagecache_file_set_null_hooks(file); file->callback_data= (uchar*) share; file->flush_log_callback= &maria_flush_log_for_page_none; /* Do nothing */ - file->write_fail= maria_page_write_failure; + file->post_write_hook= maria_page_write_failure; if (share->temporary) { - file->read_callback= &maria_page_crc_check_none; - file->write_callback= &maria_page_filler_set_none; + file->post_read_hook= &maria_page_crc_check_none; + file->pre_write_hook= &maria_page_filler_set_none; } else { - file->read_callback= &maria_page_crc_check_index; + file->post_read_hook= &maria_page_crc_check_index; if (share->options & HA_OPTION_PAGE_CHECKSUM) - file->write_callback= &maria_page_crc_set_index; + file->pre_write_hook= &maria_page_crc_set_index; else - file->write_callback= &maria_page_filler_set_normal; + file->pre_write_hook= &maria_page_filler_set_normal; if (share->now_transactional) file->flush_log_callback= maria_flush_log_for_page; } + + if (MY_TEST(share->base.extra_options & MA_EXTRA_OPTIONS_ENCRYPTED)) + { + ma_crypt_set_index_pagecache_callbacks(file, share); + } } diff --git a/storage/maria/ma_pagecache.c b/storage/maria/ma_pagecache.c index ac66fdf6c57..eeb87d2b7a0 100644 --- a/storage/maria/ma_pagecache.c +++ b/storage/maria/ma_pagecache.c @@ -625,6 +625,8 @@ static my_bool pagecache_fwrite(PAGECACHE *pagecache, __attribute__((unused)), myf flags) { + int res; + PAGECACHE_IO_HOOK_ARGS args; DBUG_ENTER("pagecache_fwrite"); DBUG_ASSERT(type != PAGECACHE_READ_UNKNOWN_PAGE); @@ -648,24 +650,26 @@ static my_bool pagecache_fwrite(PAGECACHE *pagecache, } #endif + /* initialize hooks args */ + args.page= buffer; + args.pageno= pageno; + args.data= filedesc->callback_data; + /* Todo: Integrate this with write_callback so we have only one callback */ - if ((*filedesc->flush_log_callback)(buffer, pageno, filedesc->callback_data)) + if ((*filedesc->flush_log_callback)(&args)) DBUG_RETURN(1); - DBUG_PRINT("info", ("write_callback: 0x%lx data: 0x%lx", - (ulong) filedesc->write_callback, + DBUG_PRINT("info", ("pre_write_hook: 0x%lx data: 0x%lx", + (ulong) filedesc->pre_write_hook, (ulong) filedesc->callback_data)); - if ((*filedesc->write_callback)(buffer, pageno, filedesc->callback_data)) + if ((*filedesc->pre_write_hook)(&args)) { DBUG_PRINT("error", ("write callback problem")); DBUG_RETURN(1); } - if (my_pwrite(filedesc->file, buffer, pagecache->block_size, - ((my_off_t) pageno << pagecache->shift), flags)) - { - (*filedesc->write_fail)(filedesc->callback_data); - DBUG_RETURN(1); - } - DBUG_RETURN(0); + res= my_pwrite(filedesc->file, args.page, pagecache->block_size, + ((my_off_t) pageno << pagecache->shift), flags); + (*filedesc->post_write_hook)(res, &args); + DBUG_RETURN(res); } @@ -2689,6 +2693,7 @@ static void read_block(PAGECACHE *pagecache, if (primary) { size_t error; + PAGECACHE_IO_HOOK_ARGS args; /* This code is executed only by threads that submitted primary requests @@ -2701,10 +2706,18 @@ static void read_block(PAGECACHE *pagecache, They will register in block->wqueue[COND_FOR_REQUESTED]. */ pagecache_pthread_mutex_unlock(&pagecache->cache_lock); - error= pagecache_fread(pagecache, &block->hash_link->file, - block->buffer, - block->hash_link->pageno, - pagecache->readwrite_flags); + args.page= block->buffer; + args.pageno= block->hash_link->pageno; + args.data= block->hash_link->file.callback_data; + error= (*block->hash_link->file.pre_read_hook)(&args); + if (!error) + { + error= pagecache_fread(pagecache, &block->hash_link->file, + args.page, + block->hash_link->pageno, + pagecache->readwrite_flags); + } + error= (*block->hash_link->file.post_read_hook)(error != 0, &args); pagecache_pthread_mutex_lock(&pagecache->cache_lock); if (error) { @@ -2716,16 +2729,6 @@ static void read_block(PAGECACHE *pagecache, else { block->status|= PCBLOCK_READ; - if ((*block->hash_link->file.read_callback)(block->buffer, - block->hash_link->pageno, - block->hash_link-> - file.callback_data)) - { - DBUG_PRINT("error", ("read callback problem")); - block->status|= PCBLOCK_ERROR; - block->error= (int16) my_errno; - my_debug_put_break_here(); - } } DBUG_PRINT("read_block", ("primary request: new page in cache")); @@ -3504,9 +3507,21 @@ no_key_cache: /* Key cache is not used */ /* We can't use mutex here as the key cache may not be initialized */ pagecache->global_cache_r_requests++; pagecache->global_cache_read++; - if (pagecache_fread(pagecache, file, buff, pageno, - pagecache->readwrite_flags)) - error= 1; + + { + PAGECACHE_IO_HOOK_ARGS args; + args.page= buff; + args.pageno= pageno; + args.data= file->callback_data; + error= (* file->pre_read_hook)(&args); + if (!error) + { + error= pagecache_fread(pagecache, file, args.page, pageno, + pagecache->readwrite_flags) != 0; + } + error= (* file->post_read_hook)(error, &args); + } + DBUG_RETURN(error ? (uchar*) 0 : buff); } @@ -3597,17 +3612,16 @@ static my_bool pagecache_delete_internal(PAGECACHE *pagecache, } else { + PAGECACHE_IO_HOOK_ARGS args; PAGECACHE_FILE *filedesc= &block->hash_link->file; + args.page= block->buffer; + args.pageno= block->hash_link->pageno; + args.data= filedesc->callback_data; /* We are not going to write the page but have to call callbacks */ - DBUG_PRINT("info", ("flush_callback :0x%lx" - "write_callback: 0x%lx data: 0x%lx", + DBUG_PRINT("info", ("flush_callback :0x%lx data: 0x%lx", (ulong) filedesc->flush_log_callback, - (ulong) filedesc->write_callback, (ulong) filedesc->callback_data)); - if ((*filedesc->flush_log_callback) - (block->buffer, block->hash_link->pageno, filedesc->callback_data) || - (*filedesc->write_callback) - (block->buffer, block->hash_link->pageno, filedesc->callback_data)) + if ((*filedesc->flush_log_callback)(&args)) { DBUG_PRINT("error", ("flush or write callback problem")); error= 1; @@ -4077,23 +4091,6 @@ restart: /* Copy data from buff */ memcpy(block->buffer + offset, buff, size); block->status= PCBLOCK_READ; - /* - The read_callback can change the page content (removing page - protection) so it have to be called - */ - DBUG_PRINT("info", ("read_callback: 0x%lx data: 0x%lx", - (ulong) block->hash_link->file.read_callback, - (ulong) block->hash_link->file.callback_data)); - if ((*block->hash_link->file.read_callback)(block->buffer, - block->hash_link->pageno, - block->hash_link-> - file.callback_data)) - { - DBUG_PRINT("error", ("read callback problem")); - block->status|= PCBLOCK_ERROR; - block->error= (int16) my_errno; - my_debug_put_break_here(); - } KEYCACHE_DBUG_PRINT("key_cache_insert", ("Page injection")); /* Signal that all pending requests for this now can be processed. */ @@ -4181,14 +4178,21 @@ no_key_cache: if (offset != 0 || size != pagecache->block_size) { uchar *page_buffer= (uchar *) alloca(pagecache->block_size); + PAGECACHE_IO_HOOK_ARGS args; + args.page= page_buffer; + args.pageno= pageno; + args.data= file->callback_data; pagecache->global_cache_read++; - if ((error= (pagecache_fread(pagecache, file, - page_buffer, - pageno, - pagecache->readwrite_flags) != 0))) - goto end; - if ((file->read_callback)(page_buffer, pageno, file->callback_data)) + error= (*file->pre_read_hook)(&args); + if (!error) + { + error= pagecache_fread(pagecache, file, + page_buffer, + pageno, + pagecache->readwrite_flags) != 0; + } + if ((*file->post_read_hook)(error, &args)) { DBUG_PRINT("error", ("read callback problem")); error= 1; @@ -5251,3 +5255,37 @@ void pagecache_debug_log_close(void) #endif /* defined(PAGECACHE_DEBUG_LOG) */ #endif /* defined(PAGECACHE_DEBUG) */ + +/** + @brief null hooks +*/ + +static my_bool null_pre_hook(PAGECACHE_IO_HOOK_ARGS *args + __attribute__((unused))) +{ + return 0; +} + +static my_bool null_post_read_hook(int res, PAGECACHE_IO_HOOK_ARGS *args + __attribute__((unused))) +{ + return res != 0; +} + +static void null_post_write_hook(int res __attribute__((unused)), + PAGECACHE_IO_HOOK_ARGS *args + __attribute__((unused))) +{ + return; +} + +void +pagecache_file_set_null_hooks(PAGECACHE_FILE *file) +{ + file->pre_read_hook= null_pre_hook; + file->post_read_hook= null_post_read_hook; + file->pre_write_hook= null_pre_hook; + file->post_write_hook= null_post_write_hook; + file->flush_log_callback= null_pre_hook; + file->callback_data= NULL; +} diff --git a/storage/maria/ma_pagecache.h b/storage/maria/ma_pagecache.h index f7ddb2fe716..e212a7b7029 100644 --- a/storage/maria/ma_pagecache.h +++ b/storage/maria/ma_pagecache.h @@ -76,20 +76,32 @@ enum pagecache_write_mode /* page number for maria */ typedef ulonglong pgcache_page_no_t; +/* args for read/write hooks */ +typedef struct st_pagecache_io_hook_args +{ + uchar * page; + pgcache_page_no_t pageno; + uchar * data; + + uchar *crypt_buf; /* when using encryption */ +} PAGECACHE_IO_HOOK_ARGS; + /* file descriptor for Maria */ typedef struct st_pagecache_file { File file; + /** Cannot be NULL */ - my_bool (*read_callback)(uchar *page, pgcache_page_no_t offset, - uchar *data); + my_bool (*pre_read_hook)(PAGECACHE_IO_HOOK_ARGS *args); + my_bool (*post_read_hook)(int error, PAGECACHE_IO_HOOK_ARGS *args); + /** Cannot be NULL */ - my_bool (*write_callback)(uchar *page, pgcache_page_no_t offset, - uchar *data); - void (*write_fail)(uchar *data); + my_bool (*pre_write_hook)(PAGECACHE_IO_HOOK_ARGS *args); + void (*post_write_hook)(int error, PAGECACHE_IO_HOOK_ARGS *args); + /** Cannot be NULL */ - my_bool (*flush_log_callback)(uchar *page, pgcache_page_no_t offset, - uchar *data); + my_bool (*flush_log_callback)(PAGECACHE_IO_HOOK_ARGS *args); + uchar *callback_data; } PAGECACHE_FILE; @@ -270,12 +282,8 @@ extern void pagecache_set_write_on_delete_by_link(PAGECACHE_BLOCK_LINK *block); /* PCFLUSH_ERROR and PCFLUSH_PINNED. */ #define PCFLUSH_PINNED_AND_ERROR (PCFLUSH_ERROR|PCFLUSH_PINNED) -#define pagecache_file_init(F,RC,WC,WF,GLC,D) \ - do{ \ - (F).read_callback= (RC); (F).write_callback= (WC); \ - (F).write_fail= (WF); \ - (F).flush_log_callback= (GLC); (F).callback_data= (uchar*)(D); \ - } while(0) +// initialize file with empty hooks +void pagecache_file_set_null_hooks(PAGECACHE_FILE*); #define flush_pagecache_blocks(A,B,C) \ flush_pagecache_blocks_with_filter(A,B,C,NULL,NULL) diff --git a/storage/maria/ma_pagecrc.c b/storage/maria/ma_pagecrc.c index d3522fa4e88..940feb8576b 100644 --- a/storage/maria/ma_pagecrc.c +++ b/storage/maria/ma_pagecrc.c @@ -128,11 +128,11 @@ static my_bool maria_page_crc_check(uchar *page, @retval 0 OK */ -my_bool maria_page_crc_set_normal(uchar *page, - pgcache_page_no_t page_no, - uchar *data_ptr) +my_bool maria_page_crc_set_normal(PAGECACHE_IO_HOOK_ARGS *args) { - MARIA_SHARE *share= (MARIA_SHARE *)data_ptr; + uchar *page= args->page; + pgcache_page_no_t page_no= args->pageno; + MARIA_SHARE *share= (MARIA_SHARE *)args->data; int data_length= share->block_size - CRC_SIZE; uint32 crc= maria_page_crc((uint32) page_no, page, data_length); DBUG_ENTER("maria_page_crc_set_normal"); @@ -154,11 +154,11 @@ my_bool maria_page_crc_set_normal(uchar *page, @retval 0 OK */ -my_bool maria_page_crc_set_index(uchar *page, - pgcache_page_no_t page_no, - uchar *data_ptr) +my_bool maria_page_crc_set_index(PAGECACHE_IO_HOOK_ARGS *args) { - MARIA_SHARE *share= (MARIA_SHARE *)data_ptr; + uchar *page= args->page; + pgcache_page_no_t page_no= args->pageno; + MARIA_SHARE *share= (MARIA_SHARE *)args->data; int data_length= _ma_get_page_used(share, page); uint32 crc= maria_page_crc((uint32) page_no, page, data_length); DBUG_ENTER("maria_page_crc_set_index"); @@ -185,11 +185,16 @@ my_bool maria_page_crc_set_index(uchar *page, @retval 1 Error */ -my_bool maria_page_crc_check_data(uchar *page, - pgcache_page_no_t page_no, - uchar *data_ptr) +my_bool maria_page_crc_check_data(int res, PAGECACHE_IO_HOOK_ARGS *args) { - MARIA_SHARE *share= (MARIA_SHARE *)data_ptr; + uchar *page= args->page; + pgcache_page_no_t page_no= args->pageno; + MARIA_SHARE *share= (MARIA_SHARE *)args->data; + if (res) + { + return 1; + } + return (maria_page_crc_check(page, (uint32) page_no, share, MARIA_NO_CRC_NORMAL_PAGE, share->block_size - CRC_SIZE)); @@ -207,11 +212,15 @@ my_bool maria_page_crc_check_data(uchar *page, @retval 1 Error */ -my_bool maria_page_crc_check_bitmap(uchar *page, - pgcache_page_no_t page_no, - uchar *data_ptr) +my_bool maria_page_crc_check_bitmap(int res, PAGECACHE_IO_HOOK_ARGS *args) { - MARIA_SHARE *share= (MARIA_SHARE *)data_ptr; + uchar *page= args->page; + pgcache_page_no_t page_no= args->pageno; + MARIA_SHARE *share= (MARIA_SHARE *)args->data; + if (res) + { + return 1; + } return (maria_page_crc_check(page, (uint32) page_no, share, MARIA_NO_CRC_BITMAP_PAGE, share->block_size - CRC_SIZE)); @@ -229,12 +238,16 @@ my_bool maria_page_crc_check_bitmap(uchar *page, @retval 1 Error */ -my_bool maria_page_crc_check_index(uchar *page, - pgcache_page_no_t page_no, - uchar *data_ptr) +my_bool maria_page_crc_check_index(int res, PAGECACHE_IO_HOOK_ARGS *args) { - MARIA_SHARE *share= (MARIA_SHARE *)data_ptr; + uchar *page= args->page; + pgcache_page_no_t page_no= args->pageno; + MARIA_SHARE *share= (MARIA_SHARE *)args->data; uint length= _ma_get_page_used(share, page); + if (res) + { + return 1; + } if (length > share->block_size - CRC_SIZE) { DBUG_PRINT("error", ("Wrong page length: %u", length)); @@ -253,12 +266,11 @@ my_bool maria_page_crc_check_index(uchar *page, @retval 1 Error */ -my_bool maria_page_crc_check_none(uchar *page __attribute__((unused)), - pgcache_page_no_t page_no - __attribute__((unused)), - uchar *data_ptr __attribute__((unused))) +my_bool maria_page_crc_check_none(int res, + PAGECACHE_IO_HOOK_ARGS *args + __attribute__((unused))) { - return 0; + return res != 0; } @@ -272,14 +284,16 @@ my_bool maria_page_crc_check_none(uchar *page __attribute__((unused)), @retval 0 OK */ -my_bool maria_page_filler_set_normal(uchar *page, - pgcache_page_no_t page_no - __attribute__((unused)), - uchar *data_ptr) +my_bool maria_page_filler_set_normal(PAGECACHE_IO_HOOK_ARGS *args) { + uchar *page= args->page; +#ifndef DBUG_OFF + pgcache_page_no_t page_no= args->pageno; +#endif + MARIA_SHARE *share= (MARIA_SHARE *)args->data; DBUG_ENTER("maria_page_filler_set_normal"); DBUG_ASSERT(page_no != 0); /* Catches some simple bugs */ - int4store_aligned(page + ((MARIA_SHARE *)data_ptr)->block_size - CRC_SIZE, + int4store_aligned(page + share->block_size - CRC_SIZE, MARIA_NO_CRC_NORMAL_PAGE); DBUG_RETURN(0); } @@ -295,13 +309,12 @@ my_bool maria_page_filler_set_normal(uchar *page, @retval 0 OK */ -my_bool maria_page_filler_set_bitmap(uchar *page, - pgcache_page_no_t page_no - __attribute__((unused)), - uchar *data_ptr) +my_bool maria_page_filler_set_bitmap(PAGECACHE_IO_HOOK_ARGS *args) { + uchar *page= args->page; + MARIA_SHARE *share= (MARIA_SHARE *)args->data; DBUG_ENTER("maria_page_filler_set_bitmap"); - int4store_aligned(page + ((MARIA_SHARE *)data_ptr)->block_size - CRC_SIZE, + int4store_aligned(page + share->block_size - CRC_SIZE, MARIA_NO_CRC_BITMAP_PAGE); DBUG_RETURN(0); } @@ -313,13 +326,13 @@ my_bool maria_page_filler_set_bitmap(uchar *page, @retval 0 OK */ -my_bool maria_page_filler_set_none(uchar *page __attribute__((unused)), - pgcache_page_no_t page_no - __attribute__((unused)), - uchar *data_ptr __attribute__((unused))) +my_bool maria_page_filler_set_none(PAGECACHE_IO_HOOK_ARGS *args + __attribute__((unused))) { #ifdef HAVE_valgrind - int4store_aligned(page + ((MARIA_SHARE *)data_ptr)->block_size - CRC_SIZE, + uchar *page= args->page; + MARIA_SHARE *share= (MARIA_SHARE *)args->data; + int4store_aligned(page + share->block_size - CRC_SIZE, 0); #endif return 0; @@ -332,9 +345,10 @@ my_bool maria_page_filler_set_none(uchar *page __attribute__((unused)), @param data_ptr Write callback data pointer (pointer to MARIA_SHARE) */ -void maria_page_write_failure(uchar* data_ptr) +void maria_page_write_failure(int error, PAGECACHE_IO_HOOK_ARGS *args) { - maria_mark_crashed_share((MARIA_SHARE *)data_ptr); + if (error) + maria_mark_crashed_share((MARIA_SHARE *)args->data); } @@ -349,13 +363,11 @@ void maria_page_write_failure(uchar* data_ptr) @retval 1 error */ -my_bool maria_flush_log_for_page(uchar *page, - pgcache_page_no_t page_no - __attribute__((unused)), - uchar *data_ptr __attribute__((unused))) +my_bool maria_flush_log_for_page(PAGECACHE_IO_HOOK_ARGS *args) { LSN lsn; - MARIA_SHARE *share= (MARIA_SHARE*) data_ptr; + uchar *page= args->page; + MARIA_SHARE *share= (MARIA_SHARE *)args->data; DBUG_ENTER("maria_flush_log_for_page"); /* share is 0 here only in unittest */ DBUG_ASSERT(!share || share->page_type == PAGECACHE_LSN_PAGE); @@ -372,10 +384,14 @@ my_bool maria_flush_log_for_page(uchar *page, } -my_bool maria_flush_log_for_page_none(uchar *page __attribute__((unused)), - pgcache_page_no_t page_no - __attribute__((unused)), - uchar *data_ptr __attribute__((unused))) +my_bool maria_flush_log_for_page_none(PAGECACHE_IO_HOOK_ARGS *args + __attribute__((unused))) +{ + return 0; +} + +my_bool maria_page_null_pre_read_hook(PAGECACHE_IO_HOOK_ARGS *args + __attribute__((unused))) { return 0; } diff --git a/storage/maria/ma_static.c b/storage/maria/ma_static.c index 35ad7d5a96a..2877f05c8dc 100644 --- a/storage/maria/ma_static.c +++ b/storage/maria/ma_static.c @@ -40,6 +40,7 @@ my_bool maria_in_ha_maria= FALSE; /* If used from ha_maria or not */ my_bool maria_recovery_changed_data= 0, maria_recovery_verbose= 0; my_bool maria_assert_if_crashed_table= 0; my_bool maria_checkpoint_disabled= 0; +my_bool maria_encrypt_tables= 0; mysql_mutex_t THR_LOCK_maria; #ifdef DONT_USE_RW_LOCKS diff --git a/storage/maria/ma_write.c b/storage/maria/ma_write.c index 629b774706e..bab07a48fde 100644 --- a/storage/maria/ma_write.c +++ b/storage/maria/ma_write.c @@ -392,8 +392,12 @@ err: else fatal_error= 1; - if ((*share->write_record_abort)(info)) - fatal_error= 1; + if (filepos != HA_OFFSET_ERROR) + { + if ((*share->write_record_abort)(info)) + fatal_error= 1; + } + if (fatal_error) { maria_print_error(info->s, HA_ERR_CRASHED); @@ -970,6 +974,7 @@ int _ma_split_page(MARIA_HA *info, MARIA_KEY *key, MARIA_PAGE *split_page, int move_length, uchar *key_buff, my_bool insert_last_key) { + uint keynr; uint length,a_length,key_ref_length,t_length,nod_flag,key_length; uint page_length, split_length, page_flag; uchar *key_pos,*pos, *after_key; @@ -1045,10 +1050,8 @@ int _ma_split_page(MARIA_HA *info, MARIA_KEY *key, MARIA_PAGE *split_page, page_store_info(share, &new_page); /* Copy key number */ - new_page.buff[share->keypage_header - KEYPAGE_USED_SIZE - - KEYPAGE_KEYID_SIZE - KEYPAGE_FLAG_SIZE]= - split_page->buff[share->keypage_header - KEYPAGE_USED_SIZE - - KEYPAGE_KEYID_SIZE - KEYPAGE_FLAG_SIZE]; + keynr= _ma_get_keynr(share, split_page->buff); + _ma_store_keynr(share, new_page.buff, keynr); res= 2; /* Middle key up */ if (share->now_transactional && _ma_log_new(&new_page, 0)) @@ -1494,8 +1497,7 @@ static int _ma_balance_page(MARIA_HA *info, MARIA_KEYDEF *keyinfo, page_store_info(share, &extra_page); /* Copy key number */ - extra_buff[share->keypage_header - KEYPAGE_USED_SIZE - KEYPAGE_KEYID_SIZE - - KEYPAGE_FLAG_SIZE]= keyinfo->key_nr; + _ma_store_keynr(share, extra_buff, keyinfo->key_nr); /* move first largest keys to new page */ pos= right_page->buff + right_length-extra_length; @@ -2051,7 +2053,7 @@ static my_bool _ma_log_split(MARIA_PAGE *ma_page, /* Store keypage_flag */ *log_pos++= KEY_OP_SET_PAGEFLAG; - *log_pos++= ma_page->buff[KEYPAGE_TRANSFLAG_OFFSET]; + *log_pos++= _ma_get_keypage_flag(info->s, ma_page->buff); if (new_length <= offset || !key_pos) { @@ -2218,7 +2220,7 @@ static my_bool _ma_log_del_prefix(MARIA_PAGE *ma_page, /* Store keypage_flag */ *log_pos++= KEY_OP_SET_PAGEFLAG; - *log_pos++= ma_page->buff[KEYPAGE_TRANSFLAG_OFFSET]; + *log_pos++= _ma_get_keypage_flag(info->s, ma_page->buff); if (offset < diff_length + info->s->keypage_header) { @@ -2342,7 +2344,7 @@ static my_bool _ma_log_key_middle(MARIA_PAGE *ma_page, /* Store keypage_flag */ *log_pos++= KEY_OP_SET_PAGEFLAG; - *log_pos++= ma_page->buff[KEYPAGE_TRANSFLAG_OFFSET]; + *log_pos++= _ma_get_keypage_flag(info->s, ma_page->buff); log_pos[0]= KEY_OP_DEL_SUFFIX; int2store(log_pos+1, data_deleted_last); diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h index b878aaa0f7d..086b7a3775d 100644 --- a/storage/maria/maria_def.h +++ b/storage/maria/maria_def.h @@ -235,6 +235,10 @@ typedef struct st_maria_state_info MARIA_MAX_POINTER_LENGTH) #define MARIA_DELETE_KEY_NR 255 /* keynr for deleted blocks */ + /* extra options */ +#define MA_EXTRA_OPTIONS_ENCRYPTED (1 << 0) +#define MA_EXTRA_OPTIONS_INSERT_ORDER (1 << 1) + /* Basic information of the Maria table. This is stored on disk and not changed (unless we do DLL changes). @@ -317,6 +321,7 @@ typedef struct st_maria_pack typedef struct st_maria_file_bitmap { + struct st_maria_share *share; uchar *map; pgcache_page_no_t page; /* Page number for current bitmap */ pgcache_page_no_t last_bitmap_page; /* Last possible bitmap page */ @@ -345,6 +350,8 @@ typedef struct st_maria_file_bitmap #define MARIA_CHECKPOINT_SHOULD_FREE_ME 2 #define MARIA_CHECKPOINT_SEEN_IN_LOOP 4 +typedef struct st_maria_crypt_data MARIA_CRYPT_DATA; + typedef struct st_maria_share { /* Shared between opens */ MARIA_STATE_INFO state; @@ -505,6 +512,17 @@ typedef struct st_maria_share MARIA_FILE_BITMAP bitmap; mysql_rwlock_t mmap_lock; LSN lsn_of_file_id; /**< LSN of its last LOGREC_FILE_ID */ + + /** + Crypt data + */ + uint crypt_page_header_space; + MARIA_CRYPT_DATA *crypt_data; + + /** + Keep of track of last insert page, used to implement insert order + */ + uint last_insert_page; } MARIA_SHARE; @@ -723,14 +741,13 @@ struct st_maria_handler #define KEYPAGE_USED_SIZE 2 #define KEYPAGE_KEYID_SIZE 1 #define KEYPAGE_FLAG_SIZE 1 +#define KEYPAGE_KEY_VERSION_SIZE 4 /* encryption */ #define KEYPAGE_CHECKSUM_SIZE 4 #define MAX_KEYPAGE_HEADER_SIZE (LSN_STORE_SIZE + KEYPAGE_USED_SIZE + \ KEYPAGE_KEYID_SIZE + KEYPAGE_FLAG_SIZE + \ - TRANSID_SIZE) + TRANSID_SIZE + KEYPAGE_KEY_VERSION_SIZE) #define KEYPAGE_FLAG_ISNOD 1 #define KEYPAGE_FLAG_HAS_TRANSID 2 -/* Position to KEYPAGE_FLAG for transactional tables */ -#define KEYPAGE_TRANSFLAG_OFFSET LSN_STORE_SIZE + TRANSID_SIZE + KEYPAGE_KEYID_SIZE #define _ma_get_page_used(share,x) \ ((uint) mi_uint2korr((x) + (share)->keypage_header - KEYPAGE_USED_SIZE)) @@ -751,6 +768,18 @@ struct st_maria_handler (page)->flag|= KEYPAGE_FLAG_HAS_TRANSID; \ (page)->buff[(share)->keypage_header - KEYPAGE_USED_SIZE - KEYPAGE_FLAG_SIZE]= (page)->flag; +#define KEYPAGE_KEY_VERSION(share, x) ((x) + \ + (share)->keypage_header - \ + (KEYPAGE_USED_SIZE + \ + KEYPAGE_FLAG_SIZE + \ + KEYPAGE_KEYID_SIZE + \ + KEYPAGE_KEY_VERSION_SIZE)) + +#define _ma_get_key_version(share,x) \ + ((uint) uint4korr(KEYPAGE_KEY_VERSION((share), (x)))) + +#define _ma_store_key_version(share,x,kv) \ + int4store(KEYPAGE_KEY_VERSION((share), (x)), (kv)) /* TODO: write int4store_aligned as *((uint32 *) (T))= (uint32) (A) for @@ -1379,40 +1408,19 @@ void _ma_unpin_all_pages(MARIA_HA *info, LSN undo_lsn); #define MARIA_NO_CRC_NORMAL_PAGE 0xffffffff #define MARIA_NO_CRC_BITMAP_PAGE 0xfffffffe -extern my_bool maria_page_crc_set_index(uchar *page, - pgcache_page_no_t page_no, - uchar *data_ptr); -extern my_bool maria_page_crc_set_normal(uchar *page, - pgcache_page_no_t page_no, - uchar *data_ptr); -extern my_bool maria_page_crc_check_bitmap(uchar *page, - pgcache_page_no_t page_no, - uchar *data_ptr); -extern my_bool maria_page_crc_check_data(uchar *page, - pgcache_page_no_t page_no, - uchar *data_ptr); -extern my_bool maria_page_crc_check_index(uchar *page, - pgcache_page_no_t page_no, - uchar *data_ptr); -extern my_bool maria_page_crc_check_none(uchar *page, - pgcache_page_no_t page_no, - uchar *data_ptr); -extern my_bool maria_page_filler_set_bitmap(uchar *page, - pgcache_page_no_t page_no, - uchar *data_ptr); -extern my_bool maria_page_filler_set_normal(uchar *page, - pgcache_page_no_t page_no, - uchar *data_ptr); -extern my_bool maria_page_filler_set_none(uchar *page, - pgcache_page_no_t page_no, - uchar *data_ptr); -extern void maria_page_write_failure(uchar* data_ptr); -extern my_bool maria_flush_log_for_page(uchar *page, - pgcache_page_no_t page_no, - uchar *data_ptr); -extern my_bool maria_flush_log_for_page_none(uchar *page, - pgcache_page_no_t page_no, - uchar *data_ptr); +extern my_bool maria_page_crc_set_index(PAGECACHE_IO_HOOK_ARGS *args); +extern my_bool maria_page_crc_set_normal(PAGECACHE_IO_HOOK_ARGS *args); +extern my_bool maria_page_crc_check_bitmap(int, PAGECACHE_IO_HOOK_ARGS *args); +extern my_bool maria_page_crc_check_data(int, PAGECACHE_IO_HOOK_ARGS *args); +extern my_bool maria_page_crc_check_index(int, PAGECACHE_IO_HOOK_ARGS *args); +extern my_bool maria_page_crc_check_none(int, PAGECACHE_IO_HOOK_ARGS *args); +extern my_bool maria_page_filler_set_bitmap(PAGECACHE_IO_HOOK_ARGS *args); +extern my_bool maria_page_filler_set_normal(PAGECACHE_IO_HOOK_ARGS *args); +extern my_bool maria_page_filler_set_none(PAGECACHE_IO_HOOK_ARGS *args); +extern void maria_page_write_failure(int error, PAGECACHE_IO_HOOK_ARGS *args); +extern my_bool maria_flush_log_for_page(PAGECACHE_IO_HOOK_ARGS *args); +extern my_bool maria_flush_log_for_page_none(PAGECACHE_IO_HOOK_ARGS *args); + extern PAGECACHE *maria_log_pagecache; extern void ma_set_index_cond_func(MARIA_HA *info, index_cond_func_t func, void *func_arg); diff --git a/storage/maria/unittest/ma_pagecache_consist.c b/storage/maria/unittest/ma_pagecache_consist.c index 5f0e25b5bf4..2c505428dab 100644 --- a/storage/maria/unittest/ma_pagecache_consist.c +++ b/storage/maria/unittest/ma_pagecache_consist.c @@ -77,30 +77,6 @@ static uint flush_divider= 1000; #endif /*TEST_HIGH_CONCURENCY*/ -/** - @brief Dummy pagecache callback. -*/ - -static my_bool -dummy_callback(uchar *page __attribute__((unused)), - pgcache_page_no_t page_no __attribute__((unused)), - uchar* data_ptr __attribute__((unused))) -{ - return 0; -} - - -/** - @brief Dummy pagecache callback. -*/ - -static void -dummy_fail_callback(uchar* data_ptr __attribute__((unused))) -{ - return; -} - - /* Get pseudo-random length of the field in (0;limit) @@ -392,8 +368,8 @@ int main(int argc __attribute__((unused)), errno); exit(1); } - pagecache_file_init(file1, &dummy_callback, &dummy_callback, - &dummy_fail_callback, &dummy_callback, NULL); + + pagecache_file_set_null_hooks(&file1); DBUG_PRINT("info", ("file1: %d", file1.file)); if (my_chmod(file1_name, 0777, MYF(MY_WME))) exit(1); diff --git a/storage/maria/unittest/ma_pagecache_rwconsist.c b/storage/maria/unittest/ma_pagecache_rwconsist.c index 1a268db6ad5..dbeb3a98052 100644 --- a/storage/maria/unittest/ma_pagecache_rwconsist.c +++ b/storage/maria/unittest/ma_pagecache_rwconsist.c @@ -49,30 +49,6 @@ static uint read_sleep_limit= 3; static uint report_divisor= 50; /** - @brief Dummy pagecache callback. -*/ - -static my_bool -dummy_callback(uchar *page __attribute__((unused)), - pgcache_page_no_t page_no __attribute__((unused)), - uchar* data_ptr __attribute__((unused))) -{ - return 0; -} - - -/** - @brief Dummy pagecache callback. -*/ - -static void -dummy_fail_callback(uchar* data_ptr __attribute__((unused))) -{ - return; -} - - -/** @brief Checks page consistency @param buff pointer to the page content @@ -262,8 +238,7 @@ int main(int argc __attribute__((unused)), errno); exit(1); } - pagecache_file_init(file1, &dummy_callback, &dummy_callback, - &dummy_fail_callback, &dummy_callback, NULL); + pagecache_file_set_null_hooks(&file1); DBUG_PRINT("info", ("file1: %d", file1.file)); if (my_chmod(file1_name, 0777, MYF(MY_WME))) exit(1); diff --git a/storage/maria/unittest/ma_pagecache_rwconsist2.c b/storage/maria/unittest/ma_pagecache_rwconsist2.c index 751c045a879..c06395d0fb3 100644 --- a/storage/maria/unittest/ma_pagecache_rwconsist2.c +++ b/storage/maria/unittest/ma_pagecache_rwconsist2.c @@ -55,30 +55,6 @@ static uint number_of_write_tests= 1000; static uint report_divisor= 50; /** - @brief Dummy pagecache callback. -*/ - -static my_bool -dummy_callback(uchar *page __attribute__((unused)), - pgcache_page_no_t page_no __attribute__((unused)), - uchar* data_ptr __attribute__((unused))) -{ - return 0; -} - - -/** - @brief Dummy pagecache callback. -*/ - -static void -dummy_fail_callback(uchar* data_ptr __attribute__((unused))) -{ - return; -} - - -/** @brief Checks page consistency @param buff pointer to the page content @@ -258,8 +234,7 @@ int main(int argc __attribute__((unused)), errno); exit(1); } - pagecache_file_init(file1, &dummy_callback, &dummy_callback, - &dummy_fail_callback, &dummy_callback, NULL); + pagecache_file_set_null_hooks(&file1); DBUG_PRINT("info", ("file1: %d", file1.file)); if (my_chmod(file1_name, 0777, MYF(MY_WME))) exit(1); diff --git a/storage/maria/unittest/ma_pagecache_single.c b/storage/maria/unittest/ma_pagecache_single.c index 64f6782f20f..6ae6f5b87a4 100644 --- a/storage/maria/unittest/ma_pagecache_single.c +++ b/storage/maria/unittest/ma_pagecache_single.c @@ -97,30 +97,6 @@ static struct file_desc simple_delete_flush_test_file[]= }; -/** - @brief Dummy pagecache callback. -*/ - -static my_bool -dummy_callback(uchar *page __attribute__((unused)), - pgcache_page_no_t page_no __attribute__((unused)), - uchar* data_ptr __attribute__((unused))) -{ - return 0; -} - - -/** - @brief Dummy pagecache callback. -*/ - -static void -dummy_fail_callback(uchar* data_ptr __attribute__((unused))) -{ - return; -} - - /* Recreate and reopen a file for test @@ -786,8 +762,7 @@ int main(int argc __attribute__((unused)), errno); exit(1); } - pagecache_file_init(file1, &dummy_callback, &dummy_callback, - &dummy_fail_callback, &dummy_callback, NULL); + pagecache_file_set_null_hooks(&file1); my_close(tmp_file, MYF(0)); my_delete(file2_name, MYF(0)); diff --git a/storage/maria/unittest/ma_test_loghandler_pagecache-t.c b/storage/maria/unittest/ma_test_loghandler_pagecache-t.c index e634506628a..e296b24cdb3 100644 --- a/storage/maria/unittest/ma_test_loghandler_pagecache-t.c +++ b/storage/maria/unittest/ma_test_loghandler_pagecache-t.c @@ -38,30 +38,6 @@ static char file1_name[FN_REFLEN], first_translog_file[FN_REFLEN]; static PAGECACHE_FILE file1; -/** - @brief Dummy pagecache callback. -*/ - -static my_bool -dummy_callback(uchar *page __attribute__((unused)), - pgcache_page_no_t page_no __attribute__((unused)), - uchar* data_ptr __attribute__((unused))) -{ - return 0; -} - - -/** - @brief Dummy pagecache callback. -*/ - -static void -dummy_fail_callback(uchar* data_ptr __attribute__((unused))) -{ - return; -} - - int main(int argc __attribute__((unused)), char *argv[]) { uchar long_tr_id[6]; @@ -151,8 +127,9 @@ int main(int argc __attribute__((unused)), char *argv[]) errno); exit(1); } - pagecache_file_init(file1, &dummy_callback, &dummy_callback, - &dummy_fail_callback, maria_flush_log_for_page, NULL); + pagecache_file_set_null_hooks(&file1); + file1.flush_log_callback= maria_flush_log_for_page; + if (my_chmod(file1_name, 0777, MYF(MY_WME))) exit(1); diff --git a/storage/xtradb/CMakeLists.txt b/storage/xtradb/CMakeLists.txt index 093f8f64e20..121d7914083 100644 --- a/storage/xtradb/CMakeLists.txt +++ b/storage/xtradb/CMakeLists.txt @@ -30,6 +30,7 @@ MYSQL_CHECK_BZIP2() # OS tests IF(UNIX) + IF(CMAKE_SYSTEM_NAME STREQUAL "Linux") CHECK_INCLUDE_FILES (libaio.h HAVE_LIBAIO_H) IF (XTRADB_PREFER_STATIC_LIBAIO) @@ -336,6 +337,7 @@ SET(INNOBASE_SOURCES btr/btr0btr.cc btr/btr0cur.cc btr/btr0pcur.cc + btr/btr0scrub.cc btr/btr0sea.cc btr/btr0defragment.cc buf/buf0buddy.cc @@ -360,7 +362,9 @@ SET(INNOBASE_SOURCES eval/eval0eval.cc eval/eval0proc.cc fil/fil0fil.cc - fil/fil0pagecompress.cc + fil/fil0pagecompress.cc + fil/fil0pageencryption.cc + fil/fil0crypt.cc fsp/fsp0fsp.cc fut/fut0fut.cc fut/fut0lst.cc @@ -387,6 +391,7 @@ SET(INNOBASE_SOURCES log/log0log.cc log/log0online.cc log/log0recv.cc + log/log0crypt.cc mach/mach0data.cc mem/mem0mem.cc mem/mem0pool.cc diff --git a/storage/xtradb/btr/btr0btr.cc b/storage/xtradb/btr/btr0btr.cc index a5ed71a55d4..2f248c98651 100644 --- a/storage/xtradb/btr/btr0btr.cc +++ b/storage/xtradb/btr/btr0btr.cc @@ -1157,9 +1157,27 @@ btr_page_alloc_low( reservation for free extents, and thus we know that a page can be allocated: */ - return(fseg_alloc_free_page_general( - seg_header, hint_page_no, file_direction, - TRUE, mtr, init_mtr)); + buf_block_t* block = fseg_alloc_free_page_general( + seg_header, hint_page_no, file_direction, + TRUE, mtr, init_mtr); + +#ifdef UNIV_DEBUG_SCRUBBING + if (block != NULL) { + fprintf(stderr, + "alloc %lu:%lu to index: %lu root: %lu\n", + buf_block_get_page_no(block), + buf_block_get_space(block), + index->id, + dict_index_get_page(index)); + } else { + fprintf(stderr, + "failed alloc index: %lu root: %lu\n", + index->id, + dict_index_get_page(index)); + } +#endif /* UNIV_DEBUG_SCRUBBING */ + + return block; } /**************************************************************//** @@ -1307,6 +1325,7 @@ btr_page_free_low( dict_index_t* index, /*!< in: index tree */ buf_block_t* block, /*!< in: block to be freed, x-latched */ ulint level, /*!< in: page level */ + bool blob, /*!< in: blob page */ mtr_t* mtr) /*!< in: mtr */ { fseg_header_t* seg_header; @@ -1319,6 +1338,76 @@ btr_page_free_low( buf_block_modify_clock_inc(block); btr_blob_dbg_assert_empty(index, buf_block_get_page_no(block)); + if (blob) { + ut_a(level == 0); + } + + bool scrub = srv_immediate_scrub_data_uncompressed; + /* scrub page */ + if (scrub && blob) { + /* blob page: scrub entire page */ + // TODO(jonaso): scrub only what is actually needed + page_t* page = buf_block_get_frame(block); + memset(page + PAGE_HEADER, 0, + UNIV_PAGE_SIZE - PAGE_HEADER); +#ifdef UNIV_DEBUG_SCRUBBING + fprintf(stderr, + "btr_page_free_low: scrub blob page %lu/%lu\n", + buf_block_get_space(block), + buf_block_get_page_no(block)); +#endif /* UNIV_DEBUG_SCRUBBING */ + } else if (scrub) { + /* scrub records on page */ + + /* TODO(jonaso): in theory we could clear full page + * but, since page still remains in buffer pool, and + * gets flushed etc. Lots of routines validates consistency + * of it. And in order to remain structurally consistent + * we clear each record by it own + * + * NOTE: The TODO below mentions removing page from buffer pool + * and removing redo entries, once that is done, clearing full + * pages should be possible + */ + uint cnt = 0; + uint bytes = 0; + page_t* page = buf_block_get_frame(block); + mem_heap_t* heap = NULL; + ulint* offsets = NULL; + rec_t* rec = page_rec_get_next(page_get_infimum_rec(page)); + while (!page_rec_is_supremum(rec)) { + offsets = rec_get_offsets(rec, index, + offsets, ULINT_UNDEFINED, + &heap); + uint size = rec_offs_data_size(offsets); + memset(rec, 0, size); + rec = page_rec_get_next(rec); + cnt++; + bytes += size; + } +#ifdef UNIV_DEBUG_SCRUBBING + fprintf(stderr, + "btr_page_free_low: scrub %lu/%lu - " + "%u records %u bytes\n", + buf_block_get_space(block), + buf_block_get_page_no(block), + cnt, bytes); +#endif /* UNIV_DEBUG_SCRUBBING */ + if (heap) { + mem_heap_free(heap); + } + } + +#ifdef UNIV_DEBUG_SCRUBBING + if (scrub == false) { + fprintf(stderr, + "btr_page_free_low %lu/%lu blob: %u\n", + buf_block_get_space(block), + buf_block_get_page_no(block), + blob); + } +#endif /* UNIV_DEBUG_SCRUBBING */ + if (dict_index_is_ibuf(index)) { btr_page_free_for_ibuf(index, block, mtr); @@ -1334,6 +1423,14 @@ btr_page_free_low( seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP; } + if (scrub) { + /** + * Reset page type so that scrub thread won't try to scrub it + */ + mlog_write_ulint(buf_block_get_frame(block) + FIL_PAGE_TYPE, + FIL_PAGE_TYPE_ALLOCATED, MLOG_2BYTES, mtr); + } + fseg_free_page(seg_header, buf_block_get_space(block), buf_block_get_page_no(block), mtr); @@ -1363,7 +1460,7 @@ btr_page_free( ulint level = btr_page_get_level(page, mtr); ut_ad(fil_page_get_type(block->frame) == FIL_PAGE_INDEX); - btr_page_free_low(index, block, level, mtr); + btr_page_free_low(index, block, level, false, mtr); } /**************************************************************//** @@ -2320,9 +2417,14 @@ btr_root_raise_and_insert( ibuf_reset_free_bits(new_block); } - /* Reposition the cursor to the child node */ - page_cur_search(new_block, index, tuple, - PAGE_CUR_LE, page_cursor); + if (tuple != NULL) { + /* Reposition the cursor to the child node */ + page_cur_search(new_block, index, tuple, + PAGE_CUR_LE, page_cursor); + } else { + /* Set cursor to first record on child node */ + page_cur_set_before_first(new_block, page_cursor); + } /* Split the child and insert tuple */ return(btr_page_split_and_insert(flags, cursor, offsets, heap, @@ -2998,6 +3100,9 @@ function must always succeed, we cannot reverse it: therefore enough free disk space (2 pages) must be guaranteed to be available before this function is called. +NOTE: jonaso added support for calling function with tuple == NULL +which cause it to only split a page. + @return inserted record */ UNIV_INTERN rec_t* @@ -3075,7 +3180,7 @@ func_start: half-page */ insert_left = FALSE; - if (n_iterations > 0) { + if (tuple != NULL && n_iterations > 0) { direction = FSP_UP; hint_page_no = page_no + 1; split_rec = btr_page_get_split_rec(cursor, tuple, n_ext); @@ -3136,7 +3241,12 @@ func_start: *offsets = rec_get_offsets(split_rec, cursor->index, *offsets, n_uniq, heap); - insert_left = cmp_dtuple_rec(tuple, split_rec, *offsets) < 0; + if (tuple != NULL) { + insert_left = cmp_dtuple_rec( + tuple, split_rec, *offsets) < 0; + } else { + insert_left = 1; + } if (!insert_left && new_page_zip && n_iterations > 0) { /* If a compressed page has already been split, @@ -3170,8 +3280,10 @@ insert_empty: on the appropriate half-page, we may release the tree x-latch. We can then move the records after releasing the tree latch, thus reducing the tree latch contention. */ - - if (split_rec) { + if (tuple == NULL) { + insert_will_fit = 1; + } + else if (split_rec) { insert_will_fit = !new_page_zip && btr_page_insert_fits(cursor, split_rec, offsets, tuple, n_ext, heap); @@ -3292,6 +3404,11 @@ insert_empty: /* 6. The split and the tree modification is now completed. Decide the page where the tuple should be inserted */ + if (tuple == NULL) { + rec = NULL; + goto func_exit; + } + if (insert_left) { insert_block = left_block; } else { @@ -3379,6 +3496,9 @@ func_exit: ut_ad(page_validate(buf_block_get_frame(left_block), cursor->index)); ut_ad(page_validate(buf_block_get_frame(right_block), cursor->index)); + if (tuple == NULL) { + ut_ad(rec == NULL); + } ut_ad(!rec || rec_offs_validate(rec, cursor->index, *offsets)); return(rec); } diff --git a/storage/xtradb/btr/btr0cur.cc b/storage/xtradb/btr/btr0cur.cc index 323bb468527..e48e1454db4 100644 --- a/storage/xtradb/btr/btr0cur.cc +++ b/storage/xtradb/btr/btr0cur.cc @@ -4940,11 +4940,11 @@ alloc_another: change when B-tree nodes are split or merged. */ mlog_write_ulint(page - + FIL_PAGE_FILE_FLUSH_LSN, + + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, space_id, MLOG_4BYTES, &mtr); mlog_write_ulint(page - + FIL_PAGE_FILE_FLUSH_LSN + 4, + + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 4, rec_page_no, MLOG_4BYTES, &mtr); @@ -4952,9 +4952,10 @@ alloc_another: memset(page + page_zip_get_size(page_zip) - c_stream.avail_out, 0, c_stream.avail_out); - mlog_log_string(page + FIL_PAGE_FILE_FLUSH_LSN, + mlog_log_string(page + + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, page_zip_get_size(page_zip) - - FIL_PAGE_FILE_FLUSH_LSN, + - FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, &mtr); /* Copy the page to compressed storage, because it will be flushed to disk @@ -5127,7 +5128,7 @@ func_exit: ut_ad(btr_blob_op_is_update(op)); for (i = 0; i < n_freed_pages; i++) { - btr_page_free_low(index, freed_pages[i], 0, alloc_mtr); + btr_page_free_low(index, freed_pages[i], 0, true, alloc_mtr); } } @@ -5361,7 +5362,7 @@ btr_free_externally_stored_field( } next_page_no = mach_read_from_4(page + FIL_PAGE_NEXT); - btr_page_free_low(index, ext_block, 0, &mtr); + btr_page_free_low(index, ext_block, 0, true, &mtr); if (page_zip != NULL) { mach_write_to_4(field_ref + BTR_EXTERN_PAGE_NO, @@ -5392,7 +5393,7 @@ btr_free_externally_stored_field( because we did not store it on the page (we save the space overhead from an index page header. */ - btr_page_free_low(index, ext_block, 0, &mtr); + btr_page_free_low(index, ext_block, 0, true, &mtr); mlog_write_ulint(field_ref + BTR_EXTERN_PAGE_NO, next_page_no, diff --git a/storage/xtradb/btr/btr0scrub.cc b/storage/xtradb/btr/btr0scrub.cc new file mode 100644 index 00000000000..d53b478e429 --- /dev/null +++ b/storage/xtradb/btr/btr0scrub.cc @@ -0,0 +1,898 @@ +// Copyright (c) 2014, Google Inc. + +/**************************************************//** +@file btr/btr0scrub.cc +Scrubbing of btree pages + +*******************************************************/ + +#include "btr0btr.h" +#include "btr0cur.h" +#include "btr0scrub.h" +#include "ibuf0ibuf.h" +#include "fsp0fsp.h" +#include "dict0dict.h" +#include "mtr0mtr.h" + +/* used when trying to acquire dict-lock */ +UNIV_INTERN bool fil_crypt_is_closing(ulint space); + +/** +* scrub data at delete time (e.g purge thread) +*/ +my_bool srv_immediate_scrub_data_uncompressed = false; + +/** +* background scrub uncompressed data +* +* if srv_immediate_scrub_data_uncompressed is enabled +* this is only needed to handle "old" data +*/ +my_bool srv_background_scrub_data_uncompressed = false; + +/** +* backgrounds scrub compressed data +* +* reorganize compressed page for scrubbing +* (only way to scrub compressed data) +*/ +my_bool srv_background_scrub_data_compressed = false; + +/* check spaces once per hour */ +UNIV_INTERN uint srv_background_scrub_data_check_interval = (60 * 60); + +/* default to scrub spaces that hasn't been scrubbed in a week */ +UNIV_INTERN uint srv_background_scrub_data_interval = (7 * 24 * 60 * 60); + +/** +* statistics for scrubbing by background threads +*/ +static btr_scrub_stat_t scrub_stat; +static ib_mutex_t scrub_stat_mutex; +#ifdef UNIV_PFS_MUTEX +UNIV_INTERN mysql_pfs_key_t scrub_stat_mutex_key; +#endif + +#ifdef UNIV_DEBUG +/** +* srv_scrub_force_testing +* +* - force scrubbing using background threads even for uncompressed tables +* - force pessimistic scrubbing (page split) even if not needed +* (see test_pessimistic_scrub_pct) +*/ +my_bool srv_scrub_force_testing = true; + +/** +* Force pessimistic scrubbing in 50% of the cases (UNIV_DEBUG only) +*/ +static int test_pessimistic_scrub_pct = 50; + +#endif +static uint scrub_compression_level = page_zip_level; + +/**************************************************************//** +Log a scrubbing failure */ +static +void +log_scrub_failure( +/*===============*/ + btr_scrub_t* scrub_data, /*!< in: data to store statistics on */ + buf_block_t* block, /*!< in: block */ + dberr_t err) /*!< in: error */ +{ + const char* reason = "unknown"; + switch(err) { + case DB_UNDERFLOW: + reason = "too few records on page"; + scrub_data->scrub_stat.page_split_failures_underflow++; + break; + case DB_INDEX_CORRUPT: + reason = "unable to find index!"; + scrub_data->scrub_stat.page_split_failures_missing_index++; + break; + case DB_OUT_OF_FILE_SPACE: + reason = "out of filespace"; + scrub_data->scrub_stat.page_split_failures_out_of_filespace++; + break; + default: + ut_ad(0); + reason = "unknown"; + scrub_data->scrub_stat.page_split_failures_unknown++; + } + fprintf(stderr, + "InnoDB: Warning: Failed to scrub page %lu in space %lu : %s\n", + buf_block_get_page_no(block), + buf_block_get_space(block), + reason); +} + +/**************************************************************** +Lock dict mutexes */ +static +bool +btr_scrub_lock_dict_func(ulint space, bool lock_to_close_table, + const char * file, uint line) +{ + uint start = time(0); + uint last = start; + + while (mutex_enter_nowait_func(&(dict_sys->mutex), file, line)) { + /* if we lock to close a table, we wait forever + * if we don't lock to close a table, we check if space + * is closing, and then instead give up + */ + if (lock_to_close_table == false) { + if (fil_crypt_is_closing(space)) { + return false; + } + } + os_thread_sleep(250000); + + uint now = time(0); + if (now >= last + 30) { + fprintf(stderr, + "WARNING: %s:%u waited %u seconds for" + " dict_sys lock, space: %lu" + " lock_to_close_table: %u\n", + file, line, now - start, space, + lock_to_close_table); + + last = now; + } + } + + ut_ad(mutex_own(&dict_sys->mutex)); + return true; +} + +#define btr_scrub_lock_dict(space, lock_to_close_table) \ + btr_scrub_lock_dict_func(space, lock_to_close_table, __FILE__, __LINE__) + +/**************************************************************** +Unlock dict mutexes */ +static +void +btr_scrub_unlock_dict() +{ + dict_mutex_exit_for_mysql(); +} + +/**************************************************************** +Release reference to table +*/ +static +void +btr_scrub_table_close( +/*==================*/ + dict_table_t* table) /*!< in: table */ +{ + bool dict_locked = true; + bool try_drop = false; + table->stats_bg_flag &= ~BG_SCRUB_IN_PROGRESS; + dict_table_close(table, dict_locked, try_drop); +} + +/**************************************************************** +Release reference to table +*/ +static +void +btr_scrub_table_close_for_thread( + btr_scrub_t *scrub_data) +{ + if (scrub_data->current_table == NULL) + return; + + bool lock_for_close = true; + btr_scrub_lock_dict(scrub_data->space, lock_for_close); + + /* perform the actual closing */ + btr_scrub_table_close(scrub_data->current_table); + + btr_scrub_unlock_dict(); + + scrub_data->current_table = NULL; + scrub_data->current_index = NULL; +} + +/**************************************************************//** +Check if scrubbing is turned ON or OFF */ +static +bool +check_scrub_setting( +/*=====================*/ + btr_scrub_t* scrub_data) /*!< in: scrub data */ +{ + if (scrub_data->compressed) + return srv_background_scrub_data_compressed; + else + return srv_background_scrub_data_uncompressed; +} + +#define IBUF_INDEX_ID (DICT_IBUF_ID_MIN + IBUF_SPACE_ID) + +/**************************************************************//** +Check if a page needs scrubbing */ +UNIV_INTERN +int +btr_page_needs_scrubbing( +/*=====================*/ + btr_scrub_t* scrub_data, /*!< in: scrub data */ + buf_block_t* block, /*!< in: block to check, latched */ + btr_scrub_page_allocation_status_t allocated) /*!< in: is block known + to be allocated */ +{ + /** + * Check if scrubbing has been turned OFF. + * + * at start of space, we check if scrubbing is ON or OFF + * here we only check if scrubbing is turned OFF. + * + * Motivation is that it's only valueable to have a full table (space) + * scrubbed. + */ + if (!check_scrub_setting(scrub_data)) { + bool before_value = scrub_data->scrubbing; + scrub_data->scrubbing = false; + + if (before_value == true) { + /* we toggle scrubbing from on to off */ + return BTR_SCRUB_TURNED_OFF; + } + } + + if (scrub_data->scrubbing == false) { + return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; + } + + page_t* page = buf_block_get_frame(block); + uint type = fil_page_get_type(page); + + if (allocated == BTR_SCRUB_PAGE_ALLOCATED) { + if (type != FIL_PAGE_INDEX) { + /* this function is called from fil-crypt-threads. + * these threads iterate all pages of all tablespaces + * and don't know about fil_page_type. + * But scrubbing is only needed for index-pages. */ + + /** + * NOTE: scrubbing is also needed for UNDO pages, + * but they are scrubbed at purge-time, since they are + * uncompressed + */ + + /* if encountering page type not needing scrubbing + release reference to table object */ + return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; + } + + if (page_has_garbage(page) == false) { + /* no garbage (from deleted/shrunken records) */ + return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; + } + + } else if (allocated == BTR_SCRUB_PAGE_FREE || + allocated == BTR_SCRUB_PAGE_ALLOCATION_UNKNOWN) { + + if (! (type == FIL_PAGE_INDEX || + type == FIL_PAGE_TYPE_BLOB || + type == FIL_PAGE_TYPE_ZBLOB || + type == FIL_PAGE_TYPE_ZBLOB2)) { + + /** + * If this is a dropped page, we also need to scrub + * BLOB pages + */ + + /* if encountering page type not needing scrubbing + release reference to table object */ + return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; + } + } + + if (btr_page_get_index_id(page) == IBUF_INDEX_ID) { + /* skip ibuf */ + return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; + } + + return BTR_SCRUB_PAGE; +} + +/**************************************************************** +Handle a skipped page +*/ +UNIV_INTERN +void +btr_scrub_skip_page( +/*==================*/ + btr_scrub_t* scrub_data, /*!< in: data with scrub state */ + int needs_scrubbing) /*!< in: return code from + btr_page_needs_scrubbing */ +{ + switch(needs_scrubbing) { + case BTR_SCRUB_SKIP_PAGE: + /* nothing todo */ + return; + case BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE: + btr_scrub_table_close_for_thread(scrub_data); + return; + case BTR_SCRUB_TURNED_OFF: + case BTR_SCRUB_SKIP_PAGE_AND_COMPLETE_SPACE: + btr_scrub_complete_space(scrub_data); + return; + } + + /* unknown value. should not happen */ + ut_a(0); +} + +/**************************************************************** +Try to scrub a page using btr_page_reorganize_low +return DB_SUCCESS on success or DB_OVERFLOW on failure */ +static +dberr_t +btr_optimistic_scrub( +/*==================*/ + btr_scrub_t* scrub_data, /*!< in: data with scrub state */ + buf_block_t* block, /*!< in: block to scrub */ + dict_index_t* index, /*!< in: index */ + mtr_t* mtr) /*!< in: mtr */ +{ +#ifdef UNIV_DEBUG + if (srv_scrub_force_testing && + page_get_n_recs(buf_block_get_frame(block)) > 2 && + (rand() % 100) < test_pessimistic_scrub_pct) { + + fprintf(stderr, + "scrub: simulate btr_page_reorganize failed %lu:%lu " + " table: %llu:%s index: %llu:%s get_n_recs(): %lu\n", + buf_block_get_space(block), + buf_block_get_page_no(block), + (ulonglong)scrub_data->current_table->id, + scrub_data->current_table->name, + (ulonglong)scrub_data->current_index->id, + scrub_data->current_index->name, + page_get_n_recs(buf_block_get_frame(block))); + return DB_OVERFLOW; + } +#endif + + page_cur_t cur; + page_cur_set_before_first(block, &cur); + bool recovery = false; + if (!btr_page_reorganize_low(recovery, scrub_compression_level, + &cur, index, mtr)) { + return DB_OVERFLOW; + } + + /* We play safe and reset the free bits */ + if (!dict_index_is_clust(index) && + page_is_leaf(buf_block_get_frame(block))) { + + ibuf_reset_free_bits(block); + } + + scrub_data->scrub_stat.page_reorganizations++; + return DB_SUCCESS; +} + +/**************************************************************** +Try to scrub a page by splitting it +return DB_SUCCESS on success +DB_UNDERFLOW if page has too few records +DB_OUT_OF_FILE_SPACE if we can't find space for split */ +static +dberr_t +btr_pessimistic_scrub( +/*==================*/ + btr_scrub_t* scrub_data, /*!< in: data with scrub state */ + buf_block_t* block, /*!< in: block to scrub */ + dict_index_t* index, /*!< in: index */ + mtr_t* mtr) /*!< in: mtr */ +{ + page_t* page = buf_block_get_frame(block); + if (page_get_n_recs(page) < 2) { + /** + * There is no way we can split a page with < 2 records + */ + log_scrub_failure(scrub_data, block, DB_UNDERFLOW); + return DB_UNDERFLOW; + } + + /** + * Splitting page needs new space, allocate it here + * so that splitting won't fail due to this */ + ulint n_extents = 3; + ulint n_reserved = 0; + if (!fsp_reserve_free_extents(&n_reserved, index->space, + n_extents, FSP_NORMAL, mtr)) { + log_scrub_failure(scrub_data, block, + DB_OUT_OF_FILE_SPACE); + return DB_OUT_OF_FILE_SPACE; + } + + /* read block variables */ + ulint space = buf_block_get_space(block); + ulint page_no = buf_block_get_page_no(block); + ulint zip_size = buf_block_get_zip_size(block); + ulint left_page_no = btr_page_get_prev(page, mtr); + ulint right_page_no = btr_page_get_next(page, mtr); + + /** + * When splitting page, we need X-latches on left/right brothers + * see e.g btr_cur_latch_leaves + */ + + if (left_page_no != FIL_NULL) { + /** + * pages needs to be locked left-to-right, release block + * and re-lock. We still have x-lock on index + * so this should be safe + */ + mtr_release_buf_page_at_savepoint(mtr, scrub_data->savepoint, + block); + + buf_block_t* get_block = btr_block_get( + space, zip_size, left_page_no, + RW_X_LATCH, index, mtr); + get_block->check_index_page_at_flush = TRUE; + + /** + * Refetch block and re-initialize page + */ + block = btr_block_get( + space, zip_size, page_no, + RW_X_LATCH, index, mtr); + + page = buf_block_get_frame(block); + + /** + * structure should be unchanged + */ + ut_a(left_page_no == btr_page_get_prev(page, mtr)); + ut_a(right_page_no == btr_page_get_next(page, mtr)); + } + + if (right_page_no != FIL_NULL) { + buf_block_t* get_block = btr_block_get( + space, zip_size, right_page_no, + RW_X_LATCH, index, mtr); + get_block->check_index_page_at_flush = TRUE; + } + + /* arguments to btr_page_split_and_insert */ + mem_heap_t* heap = NULL; + dtuple_t* entry = NULL; + ulint* offsets = NULL; + ulint n_ext = 0; + ulint flags = BTR_MODIFY_TREE; + + /** + * position a cursor on first record on page + */ + rec_t* rec = page_rec_get_next(page_get_infimum_rec(page)); + btr_cur_t cursor; + btr_cur_position(index, rec, block, &cursor); + + /** + * call split page with NULL as argument for entry to insert + */ + if (dict_index_get_page(index) == buf_block_get_page_no(block)) { + /* The page is the root page + * NOTE: ibuf_reset_free_bits is called inside + * btr_root_raise_and_insert */ + rec = btr_root_raise_and_insert( + flags, &cursor, &offsets, &heap, entry, n_ext, mtr); + } else { + /* We play safe and reset the free bits + * NOTE: need to call this prior to btr_page_split_and_insert */ + if (!dict_index_is_clust(index) && + page_is_leaf(buf_block_get_frame(block))) { + + ibuf_reset_free_bits(block); + } + + rec = btr_page_split_and_insert( + flags, &cursor, &offsets, &heap, entry, n_ext, mtr); + } + + if (heap) { + mem_heap_free(heap); + } + + if (n_reserved > 0) { + fil_space_release_free_extents(index->space, n_reserved); + } + + scrub_data->scrub_stat.page_splits++; + return DB_SUCCESS; +} + +/**************************************************************** +Location index by id for a table +return index or NULL */ +static +dict_index_t* +find_index( +/*========*/ + dict_table_t* table, /*!< in: table */ + index_id_t index_id) /*!< in: index id */ +{ + if (table != NULL) { + dict_index_t* index = dict_table_get_first_index(table); + while (index != NULL) { + if (index->id == index_id) + return index; + index = dict_table_get_next_index(index); + } + } + + return NULL; +} + +/**************************************************************** +Check if table should be scrubbed +*/ +static +bool +btr_scrub_table_needs_scrubbing( +/*============================*/ + dict_table_t* table) /*!< in: table */ +{ + if (table == NULL) + return false; + + if (table->stats_bg_flag & BG_STAT_SHOULD_QUIT) { + return false; + } + + if (table->to_be_dropped) { + return false; + } + + if (table->corrupted) { + return false; + } + + return true; +} + +/**************************************************************** +Check if index should be scrubbed +*/ +static +bool +btr_scrub_index_needs_scrubbing( +/*============================*/ + dict_index_t* index) /*!< in: index */ +{ + if (index == NULL) + return false; + + if (dict_index_is_ibuf(index)) { + return false; + } + + if (dict_index_is_online_ddl(index)) { + return false; + } + + return true; +} + +/**************************************************************** +Get table and index and store it on scrub_data +*/ +static +void +btr_scrub_get_table_and_index( +/*=========================*/ + btr_scrub_t* scrub_data, /*!< in/out: scrub data */ + index_id_t index_id) /*!< in: index id */ +{ + /* first check if it's an index to current table */ + scrub_data->current_index = find_index(scrub_data->current_table, + index_id); + + if (scrub_data->current_index != NULL) { + /* yes it was */ + return; + } + + if (!btr_scrub_lock_dict(scrub_data->space, false)) { + btr_scrub_complete_space(scrub_data); + return; + } + + /* close current table (if any) */ + if (scrub_data->current_table != NULL) { + btr_scrub_table_close(scrub_data->current_table); + scrub_data->current_table = NULL; + } + + /* argument to dict_table_open_on_index_id */ + bool dict_locked = true; + + /* open table based on index_id */ + dict_table_t* table = dict_table_open_on_index_id( + index_id, + dict_locked); + + if (table != NULL) { + /* mark table as being scrubbed */ + table->stats_bg_flag |= BG_SCRUB_IN_PROGRESS; + + if (!btr_scrub_table_needs_scrubbing(table)) { + btr_scrub_table_close(table); + btr_scrub_unlock_dict(); + return; + } + } + + btr_scrub_unlock_dict(); + scrub_data->current_table = table; + scrub_data->current_index = find_index(table, index_id); +} + +/**************************************************************** +Handle free page */ +UNIV_INTERN +int +btr_scrub_free_page( +/*====================*/ + btr_scrub_t* scrub_data, /*!< in/out: scrub data */ + buf_block_t* block, /*!< in: block to scrub */ + mtr_t* mtr) /*!< in: mtr */ +{ + // TODO(jonaso): scrub only what is actually needed + + { + /* note: perform both the memset and setting of FIL_PAGE_TYPE + * wo/ logging. so that if we crash before page is flushed + * it will be found by scrubbing thread again + */ + memset(buf_block_get_frame(block) + PAGE_HEADER, 0, + UNIV_PAGE_SIZE - PAGE_HEADER); + + mach_write_to_2(buf_block_get_frame(block) + FIL_PAGE_TYPE, + FIL_PAGE_TYPE_ALLOCATED); + } + + ulint compact = 1; + page_create(block, mtr, compact); + + mtr_commit(mtr); + + /* page doesn't need further processing => SKIP + * and close table/index so that we don't keep references too long */ + return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; +} + +/**************************************************************** +Recheck if a page needs scrubbing, and if it does load appropriate +table and index */ +UNIV_INTERN +int +btr_scrub_recheck_page( +/*====================*/ + btr_scrub_t* scrub_data, /*!< inut: scrub data */ + buf_block_t* block, /*!< in: block */ + btr_scrub_page_allocation_status_t allocated, /*!< in: is block + allocated or free */ + mtr_t* mtr) /*!< in: mtr */ +{ + /* recheck if page needs scrubbing (knowing allocation status) */ + int needs_scrubbing = btr_page_needs_scrubbing( + scrub_data, block, allocated); + + if (needs_scrubbing != BTR_SCRUB_PAGE) { + mtr_commit(mtr); + return needs_scrubbing; + } + + if (allocated == BTR_SCRUB_PAGE_FREE) { + /** we don't need to load table/index for free pages + * so scrub directly here */ + /* mtr is committed inside btr_scrub_page_free */ + return btr_scrub_free_page(scrub_data, + block, + mtr); + } + + page_t* page = buf_block_get_frame(block); + index_id_t index_id = btr_page_get_index_id(page); + + if (scrub_data->current_index == NULL || + scrub_data->current_index->id != index_id) { + + /** + * commit mtr (i.e release locks on block) + * and try to get table&index potentially loading it + * from disk + */ + mtr_commit(mtr); + btr_scrub_get_table_and_index(scrub_data, index_id); + } else { + /* we already have correct index + * commit mtr so that we can lock index before fetching page + */ + mtr_commit(mtr); + } + + /* check if table is about to be dropped */ + if (!btr_scrub_table_needs_scrubbing(scrub_data->current_table)) { + return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; + } + + /* check if index is scrubbable */ + if (!btr_scrub_index_needs_scrubbing(scrub_data->current_index)) { + return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; + } + + mtr_start(mtr); + mtr_x_lock(dict_index_get_lock(scrub_data->current_index), mtr); + /** set savepoint for X-latch of block */ + scrub_data->savepoint = mtr_set_savepoint(mtr); + return BTR_SCRUB_PAGE; +} + +/**************************************************************** +Perform actual scrubbing of page */ +UNIV_INTERN +int +btr_scrub_page( +/*============*/ + btr_scrub_t* scrub_data, /*!< in/out: scrub data */ + buf_block_t* block, /*!< in: block */ + btr_scrub_page_allocation_status_t allocated, /*!< in: is block + allocated or free */ + mtr_t* mtr) /*!< in: mtr */ +{ + /* recheck if page needs scrubbing (knowing allocation status) */ + int needs_scrubbing = btr_page_needs_scrubbing( + scrub_data, block, allocated); + if (needs_scrubbing != BTR_SCRUB_PAGE) { + mtr_commit(mtr); + return needs_scrubbing; + } + + if (allocated == BTR_SCRUB_PAGE_FREE) { + /* mtr is committed inside btr_scrub_page_free */ + return btr_scrub_free_page(scrub_data, + block, + mtr); + } + + /* check that table/index still match now that they are loaded */ + + if (scrub_data->current_table->space != scrub_data->space) { + /* this is truncate table */ + mtr_commit(mtr); + return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; + } + + if (scrub_data->current_index->space != scrub_data->space) { + /* this is truncate table */ + mtr_commit(mtr); + return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; + } + + if (scrub_data->current_index->page == FIL_NULL) { + /* this is truncate table */ + mtr_commit(mtr); + return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; + } + + if (btr_page_get_index_id(buf_block_get_frame(block)) != + scrub_data->current_index->id) { + /* page has been reallocated to new index */ + mtr_commit(mtr); + return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; + } + + /* check if I can scrub (reorganize) page wo/ overflow */ + if (btr_optimistic_scrub(scrub_data, + block, + scrub_data->current_index, + mtr) != DB_SUCCESS) { + + /** + * Can't reorganize page...need to split it + */ + btr_pessimistic_scrub(scrub_data, + block, + scrub_data->current_index, + mtr); + } + mtr_commit(mtr); + + return BTR_SCRUB_SKIP_PAGE; // no further action needed +} + +/**************************************************************//** +Start iterating a space */ +UNIV_INTERN +bool +btr_scrub_start_space( +/*===================*/ + ulint space, /*!< in: space */ + btr_scrub_t* scrub_data) /*!< in/out: scrub data */ +{ + scrub_data->space = space; + scrub_data->current_table = NULL; + scrub_data->current_index = NULL; + + scrub_data->compressed = fil_space_get_zip_size(space) > 0; + scrub_data->scrubbing = check_scrub_setting(scrub_data); + return scrub_data->scrubbing; +} + +/*********************************************************************** +Update global statistics with thread statistics */ +static +void +btr_scrub_update_total_stat(btr_scrub_t *scrub_data) +{ + mutex_enter(&scrub_stat_mutex); + scrub_stat.page_reorganizations += + scrub_data->scrub_stat.page_reorganizations; + scrub_stat.page_splits += + scrub_data->scrub_stat.page_splits; + scrub_stat.page_split_failures_underflow += + scrub_data->scrub_stat.page_split_failures_underflow; + scrub_stat.page_split_failures_out_of_filespace += + scrub_data->scrub_stat.page_split_failures_out_of_filespace; + scrub_stat.page_split_failures_missing_index += + scrub_data->scrub_stat.page_split_failures_missing_index; + scrub_stat.page_split_failures_unknown += + scrub_data->scrub_stat.page_split_failures_unknown; + mutex_exit(&scrub_stat_mutex); + + // clear stat + memset(&scrub_data->scrub_stat, 0, sizeof(scrub_data->scrub_stat)); +} + +/**************************************************************//** +Complete iterating a space */ +UNIV_INTERN +bool +btr_scrub_complete_space( +/*=====================*/ + btr_scrub_t* scrub_data) /*!< in/out: scrub data */ +{ + btr_scrub_table_close_for_thread(scrub_data); + btr_scrub_update_total_stat(scrub_data); + return scrub_data->scrubbing; +} + +/********************************************************************* +Return scrub statistics */ +void +btr_scrub_total_stat(btr_scrub_stat_t *stat) +{ + mutex_enter(&scrub_stat_mutex); + *stat = scrub_stat; + mutex_exit(&scrub_stat_mutex); +} + +/********************************************************************* +Init global variables */ +UNIV_INTERN +void +btr_scrub_init() +{ + mutex_create(scrub_stat_mutex_key, + &scrub_stat_mutex, SYNC_NO_ORDER_CHECK); + + memset(&scrub_stat, 0, sizeof(scrub_stat)); +} + +/********************************************************************* +Cleanup globals */ +UNIV_INTERN +void +btr_scrub_cleanup() +{ + mutex_free(&scrub_stat_mutex); +} diff --git a/storage/xtradb/buf/buf0buf.cc b/storage/xtradb/buf/buf0buf.cc index 6c6b67ad774..7ae1aba31d7 100644 --- a/storage/xtradb/buf/buf0buf.cc +++ b/storage/xtradb/buf/buf0buf.cc @@ -56,6 +56,10 @@ Created 11/5/1995 Heikki Tuuri #include "buf0checksum.h" #include "trx0trx.h" #include "srv0start.h" +#include "ut0byte.h" + +#include "fil0pageencryption.h" + /* prototypes for new functions added to ha_innodb.cc */ trx_t* innobase_get_trx(); @@ -570,12 +574,13 @@ buf_page_is_corrupted( ulint zip_size) /*!< in: size of compressed page; 0 for uncompressed pages */ { + ulint page_encrypted = fil_page_is_encrypted(read_buf); ulint checksum_field1; ulint checksum_field2; ibool crc32_inited = FALSE; ib_uint32_t crc32 = ULINT32_UNDEFINED; - if (!zip_size + if (!page_encrypted && !zip_size && memcmp(read_buf + FIL_PAGE_LSN + 4, read_buf + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) { @@ -628,6 +633,9 @@ buf_page_is_corrupted( if (zip_size) { return(!page_zip_verify_checksum(read_buf, zip_size)); } + if (page_encrypted) { + return (FALSE); + } checksum_field1 = mach_read_from_4( read_buf + FIL_PAGE_SPACE_OR_CHKSUM); @@ -1070,6 +1078,9 @@ buf_block_init( block->page.state = BUF_BLOCK_NOT_USED; block->page.buf_fix_count = 0; block->page.io_fix = BUF_IO_NONE; + block->page.crypt_buf = NULL; + block->page.crypt_buf_free = NULL; + block->page.key_version = 0; block->modify_clock = 0; @@ -3459,11 +3470,13 @@ page is not in the buffer pool it is not loaded and NULL is returned. Suitable for using when holding the lock_sys_t::mutex. @return pointer to a page or NULL */ UNIV_INTERN -const buf_block_t* +buf_block_t* buf_page_try_get_func( /*==================*/ ulint space_id,/*!< in: tablespace id */ ulint page_no,/*!< in: page number */ + ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */ + bool possibly_freed, const char* file, /*!< in: file name */ ulint line, /*!< in: line where called */ mtr_t* mtr) /*!< in: mini-transaction */ @@ -3501,8 +3514,12 @@ buf_page_try_get_func( buf_block_buf_fix_inc(block, file, line); mutex_exit(&block->mutex); - fix_type = MTR_MEMO_PAGE_S_FIX; - success = rw_lock_s_lock_nowait(&block->lock, file, line); + if (rw_latch == RW_S_LATCH) { + fix_type = MTR_MEMO_PAGE_S_FIX; + success = rw_lock_s_lock_nowait(&block->lock, file, line); + } else { + success = false; + } if (!success) { /* Let us try to get an X-latch. If the current thread @@ -3527,9 +3544,11 @@ buf_page_try_get_func( ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG - mutex_enter(&block->mutex); - ut_a(!block->page.file_page_was_freed); - mutex_exit(&block->mutex); + if (!possibly_freed) { + mutex_enter(&block->mutex); + ut_a(!block->page.file_page_was_freed); + mutex_exit(&block->mutex); + } #endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK); @@ -3559,6 +3578,10 @@ buf_page_init_low( bpage->newest_modification = 0; bpage->oldest_modification = 0; bpage->write_size = 0; + bpage->crypt_buf = NULL; + bpage->crypt_buf_free = NULL; + bpage->key_version = 0; + HASH_INVALIDATE(bpage, hash); bpage->is_corrupt = FALSE; #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG @@ -4083,7 +4106,7 @@ buf_page_create( Then InnoDB could in a crash recovery print a big, false, corruption warning if the stamp contains an lsn bigger than the ib_logfile lsn. */ - memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8); + memset(frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8); #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG ut_a(++buf_dbg_counter % 5771 || buf_validate()); @@ -4287,6 +4310,16 @@ buf_page_io_complete( ulint read_space_id; byte* frame; + if (!buf_page_decrypt_after_read(bpage)) { + /* encryption error! */ + if (buf_page_get_zip_size(bpage)) { + frame = bpage->zip.data; + } else { + frame = ((buf_block_t*) bpage)->frame; + } + goto corrupt; + } + if (buf_page_get_zip_size(bpage)) { frame = bpage->zip.data; os_atomic_increment_ulint(&buf_pool->n_pend_unzip, 1); @@ -4464,6 +4497,9 @@ corrupt: bpage->offset, buf_page_get_zip_size(bpage), update_ibuf_bitmap); } + } else { + /* io_type == BUF_IO_WRITE */ + buf_page_encrypt_after_write(bpage); } if (io_type == BUF_IO_WRITE @@ -5741,3 +5777,181 @@ buf_pool_mutex_exit( ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); mutex_exit(&buf_pool->LRU_list_mutex); } + +/********************************************************************//** +Encrypts a buffer page right before it's flushed to disk +*/ +byte* +buf_page_encrypt_before_write( +/*==========================*/ + buf_page_t* bpage, /*!< in/out: buffer page to be flushed */ + const byte* src_frame) /*!< in: src frame */ +{ + if (bpage->offset == 0) { + /* Page 0 of a tablespace is not encrypted */ + ut_ad(bpage->key_version == 0); + return const_cast<byte*>(src_frame); + } + + if (fil_space_check_encryption_write(bpage->space) == false) { + /* An unencrypted table */ + bpage->key_version = 0; + return const_cast<byte*>(src_frame); + } + + if (bpage->space == TRX_SYS_SPACE && bpage->offset == TRX_SYS_PAGE_NO) { + /* don't encrypt page as it contains address to dblwr buffer */ + bpage->key_version = 0; + return const_cast<byte*>(src_frame); + } + + ulint zip_size = buf_page_get_zip_size(bpage); + + /** + * TODO(jonaso): figure out more clever malloc strategy + * + * This implementation does a malloc/free per iop for encrypted + * tablespaces. Alternative strategies that have been considered are + * + * 1) use buf_block_alloc (i.e alloc from buffer pool) + * this does not work as buf_block_alloc will then be called + * when needing to flush a page, which might be triggered + * due to shortage of memory in buffer pool + * 2) allocate a buffer per fil_node_t + * this would break abstraction layers and has therfore not been + * considered a lot. + */ + + bpage->crypt_buf_free = (byte*)malloc(UNIV_PAGE_SIZE*2); + byte *dst_frame = bpage->crypt_buf = (byte *)ut_align(bpage->crypt_buf_free, UNIV_PAGE_SIZE); + + // encrypt page content + fil_space_encrypt(bpage->space, bpage->offset, + bpage->newest_modification, + src_frame, zip_size, dst_frame, 0); + + unsigned key_version = + mach_read_from_4(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); + ut_ad(key_version == 0 || key_version >= bpage->key_version); + bpage->key_version = key_version; + + // return dst_frame which will be written + return dst_frame; +} + +/********************************************************************//** +Release memory after encrypted page has been written to disk +*/ +ibool +buf_page_encrypt_after_write( +/*=========================*/ + buf_page_t* bpage) /*!< in/out: buffer page flushed */ +{ + if (bpage->crypt_buf_free != NULL) { + free(bpage->crypt_buf_free); + bpage->crypt_buf_free = NULL; + bpage->crypt_buf = NULL; + } + return (TRUE); +} + +/********************************************************************//** +Allocates memory to read in an encrypted page +*/ +byte* +buf_page_decrypt_before_read( +/*=========================*/ + buf_page_t* bpage) /*!< in/out: buffer page to be read */ +{ + ulint zip_size = buf_page_get_zip_size(bpage); + + if (bpage->offset == 0) { + /* File header pages are not encrypted */ +unencrypted: + if (zip_size) + return bpage->zip.data; + else + return ((buf_block_t*) bpage)->frame; + } + + if (fil_space_check_encryption_read(bpage->space) == false) { + goto unencrypted; + } + + // allocate buffer to read data into + bpage->crypt_buf_free = (byte*)malloc(UNIV_PAGE_SIZE*2); + bpage->crypt_buf = (byte*)ut_align(bpage->crypt_buf_free, UNIV_PAGE_SIZE); + return bpage->crypt_buf; +} + +/********************************************************************//** +Decrypt page after it has been read from disk +*/ +ibool +buf_page_decrypt_after_read( +/*========================*/ + buf_page_t* bpage) /*!< in/out: buffer page read from disk */ +{ + ut_ad(bpage->key_version == 0); + if (bpage->offset == 0) { + /* File header pages are not encrypted */ + ut_a(bpage->crypt_buf == NULL); + return (TRUE); + } + + ulint zip_size = buf_page_get_zip_size(bpage); + ulint size = (zip_size) ? zip_size : UNIV_PAGE_SIZE; + + byte* dst_frame = (zip_size) ? bpage->zip.data : + ((buf_block_t*) bpage)->frame; + const byte* src_frame = bpage->crypt_buf != NULL ? + bpage->crypt_buf : dst_frame; + + unsigned key_version = + mach_read_from_4(src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); + + if (key_version == 0) { + /* the page we read is unencrypted */ + if (dst_frame != src_frame) { + /* but we had allocated a crypt_buf */ + memcpy(dst_frame, src_frame, size); + } + } else { + /* the page we read is encrypted */ + if (dst_frame == src_frame) { + /* but we had NOT allocated a crypt buf + * malloc a buffer, copy page to it + * and then decrypt from that into real page*/ + bpage->crypt_buf_free = (byte *)malloc(UNIV_PAGE_SIZE*2); + src_frame = bpage->crypt_buf = (byte*)ut_align(bpage->crypt_buf_free, UNIV_PAGE_SIZE); + memcpy(bpage->crypt_buf, dst_frame, size); + } + /* decrypt from src_frame to dst_frame */ + fil_space_decrypt(bpage->space, + src_frame, size, dst_frame); + } + bpage->key_version = key_version; + + if (bpage->crypt_buf_free != NULL) { + // free temp page + free(bpage->crypt_buf_free); + bpage->crypt_buf = NULL; + bpage->crypt_buf_free = NULL; + } + return (TRUE); +} + +/********************************************************************//** +Release memory allocated for decryption +*/ +void +buf_page_decrypt_cleanup( +/*=====================*/ + buf_page_t* bpage) /*!< in/out: buffer page */ +{ + if (bpage->crypt_buf != NULL) { + free(bpage->crypt_buf_free); + bpage->crypt_buf = NULL; + bpage->crypt_buf_free = NULL; + } +} diff --git a/storage/xtradb/buf/buf0checksum.cc b/storage/xtradb/buf/buf0checksum.cc index ec79bbe6be9..451fef2f82e 100644 --- a/storage/xtradb/buf/buf0checksum.cc +++ b/storage/xtradb/buf/buf0checksum.cc @@ -63,7 +63,8 @@ buf_calc_page_crc32( there we store the old formula checksum. */ checksum = ut_crc32(page + FIL_PAGE_OFFSET, - FIL_PAGE_FILE_FLUSH_LSN - FIL_PAGE_OFFSET) + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + - FIL_PAGE_OFFSET) ^ ut_crc32(page + FIL_PAGE_DATA, UNIV_PAGE_SIZE - FIL_PAGE_DATA - FIL_PAGE_END_LSN_OLD_CHKSUM); @@ -93,7 +94,8 @@ buf_calc_page_new_checksum( there we store the old formula checksum. */ checksum = ut_fold_binary(page + FIL_PAGE_OFFSET, - FIL_PAGE_FILE_FLUSH_LSN - FIL_PAGE_OFFSET) + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + - FIL_PAGE_OFFSET) + ut_fold_binary(page + FIL_PAGE_DATA, UNIV_PAGE_SIZE - FIL_PAGE_DATA - FIL_PAGE_END_LSN_OLD_CHKSUM); @@ -118,7 +120,7 @@ buf_calc_page_old_checksum( { ulint checksum; - checksum = ut_fold_binary(page, FIL_PAGE_FILE_FLUSH_LSN); + checksum = ut_fold_binary(page, FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); checksum = checksum & 0xFFFFFFFFUL; diff --git a/storage/xtradb/buf/buf0dblwr.cc b/storage/xtradb/buf/buf0dblwr.cc index 0f59375b256..7b5cd4c12c2 100644 --- a/storage/xtradb/buf/buf0dblwr.cc +++ b/storage/xtradb/buf/buf0dblwr.cc @@ -50,6 +50,8 @@ UNIV_INTERN buf_dblwr_t* buf_dblwr = NULL; /** Set to TRUE when the doublewrite buffer is being created */ UNIV_INTERN ibool buf_dblwr_being_created = FALSE; +#define TRX_SYS_DOUBLEWRITE_BLOCKS 2 + /****************************************************************//** Determines if a page number is located inside the doublewrite buffer. @return TRUE if the location is inside the two blocks of the @@ -136,7 +138,7 @@ buf_dblwr_init( /* There are two blocks of same size in the doublewrite buffer. */ - buf_size = 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE; + buf_size = TRX_SYS_DOUBLEWRITE_BLOCKS * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE; /* There must be atleast one buffer for single page writes and one buffer for batch writes. */ @@ -216,7 +218,7 @@ start_again: "Doublewrite buffer not found: creating new"); if (buf_pool_get_curr_size() - < ((2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE + < ((TRX_SYS_DOUBLEWRITE_BLOCKS * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE + FSP_EXTENT_SIZE / 2 + 100) * UNIV_PAGE_SIZE)) { @@ -252,7 +254,7 @@ start_again: fseg_header = doublewrite + TRX_SYS_DOUBLEWRITE_FSEG; prev_page_no = 0; - for (i = 0; i < 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE + for (i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCKS * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE + FSP_EXTENT_SIZE / 2; i++) { new_block = fseg_alloc_free_page( fseg_header, prev_page_no + 1, FSP_UP, &mtr); @@ -375,7 +377,7 @@ buf_dblwr_init_or_load_pages( /* We do the file i/o past the buffer pool */ - unaligned_read_buf = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE)); + unaligned_read_buf = static_cast<byte*>(ut_malloc(3 * UNIV_PAGE_SIZE)); read_buf = static_cast<byte*>( ut_align(unaligned_read_buf, UNIV_PAGE_SIZE)); @@ -387,6 +389,14 @@ buf_dblwr_init_or_load_pages( doublewrite = read_buf + TRX_SYS_DOUBLEWRITE; + if (mach_read_from_4(read_buf + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION) != 0) { + fil_space_decrypt((ulint)TRX_SYS_SPACE, + read_buf, + UNIV_PAGE_SIZE, /* page size */ + read_buf + UNIV_PAGE_SIZE); + doublewrite = read_buf + UNIV_PAGE_SIZE + TRX_SYS_DOUBLEWRITE; + } + if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC) == TRX_SYS_DOUBLEWRITE_MAGIC_N) { /* The doublewrite buffer has been created */ @@ -429,7 +439,7 @@ buf_dblwr_init_or_load_pages( page = buf; - for (i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 2; i++) { + for (i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * TRX_SYS_DOUBLEWRITE_BLOCKS; i++) { ulint source_page_no; @@ -516,9 +526,9 @@ buf_dblwr_process() zip_size ? zip_size : UNIV_PAGE_SIZE, read_buf, NULL, 0); - /* Check if the page is corrupt */ - - if (buf_page_is_corrupted(true, read_buf, zip_size)) { + if (fil_space_verify_crypt_checksum(read_buf, zip_size)) { + /* page is encrypted and checksum is OK */ + } else if (buf_page_is_corrupted(true, read_buf, zip_size)) { fprintf(stderr, "InnoDB: Warning: database page" @@ -529,8 +539,11 @@ buf_dblwr_process() " the doublewrite buffer.\n", (ulong) space_id, (ulong) page_no); - if (buf_page_is_corrupted(true, - page, zip_size)) { + if (fil_space_verify_crypt_checksum(page, zip_size)) { + /* the doublewrite buffer page is encrypted and OK */ + } else if (buf_page_is_corrupted(true, + page, + zip_size)) { fprintf(stderr, "InnoDB: Dump of the page:\n"); buf_page_print( @@ -593,7 +606,27 @@ buf_dblwr_process() } fil_flush_file_spaces(FIL_TABLESPACE); - ut_free(unaligned_read_buf); + + { + fprintf(stderr, + "Clear dblwr buffer after completing " + "processing of it...\n"); + + size_t bytes = TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE; + byte *unaligned_buf = static_cast<byte*>( + ut_malloc(bytes + UNIV_PAGE_SIZE - 1)); + + byte *buf = static_cast<byte*>( + ut_align(unaligned_buf, UNIV_PAGE_SIZE)); + memset(buf, 0, bytes); + + fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0, + buf_dblwr->block1, 0, bytes, buf, NULL, NULL); + fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0, + buf_dblwr->block2, 0, bytes, buf, NULL, NULL); + + ut_free(unaligned_buf); + } } /****************************************************************//** @@ -665,7 +698,7 @@ buf_dblwr_update( break; case BUF_FLUSH_SINGLE_PAGE: { - const ulint size = 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE; + const ulint size = TRX_SYS_DOUBLEWRITE_BLOCKS * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE; ulint i; mutex_enter(&buf_dblwr->mutex); for (i = srv_doublewrite_batch_size; i < size; ++i) { @@ -792,12 +825,14 @@ buf_dblwr_write_block_to_datafile( ? OS_FILE_WRITE : OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER; + void * frame = buf_page_get_frame(bpage); + if (bpage->zip.data) { fil_io(flags, sync, buf_page_get_space(bpage), buf_page_get_zip_size(bpage), buf_page_get_page_no(bpage), 0, buf_page_get_zip_size(bpage), - (void*) bpage->zip.data, + frame, (void*) bpage, 0); return; @@ -810,7 +845,7 @@ buf_dblwr_write_block_to_datafile( fil_io(flags, sync, buf_block_get_space(block), 0, buf_block_get_page_no(block), 0, UNIV_PAGE_SIZE, - (void*) block->frame, (void*) block, + frame, (void*) block, (ulint *)&bpage->write_size); } @@ -1003,13 +1038,14 @@ try_again: } zip_size = buf_page_get_zip_size(bpage); + void * frame = buf_page_get_frame(bpage); if (zip_size) { UNIV_MEM_ASSERT_RW(bpage->zip.data, zip_size); /* Copy the compressed page and clear the rest. */ memcpy(buf_dblwr->write_buf + UNIV_PAGE_SIZE * buf_dblwr->first_free, - bpage->zip.data, zip_size); + frame, zip_size); memset(buf_dblwr->write_buf + UNIV_PAGE_SIZE * buf_dblwr->first_free + zip_size, 0, UNIV_PAGE_SIZE - zip_size); @@ -1020,7 +1056,7 @@ try_again: memcpy(buf_dblwr->write_buf + UNIV_PAGE_SIZE * buf_dblwr->first_free, - ((buf_block_t*) bpage)->frame, UNIV_PAGE_SIZE); + frame, UNIV_PAGE_SIZE); } buf_dblwr->buf_block_arr[buf_dblwr->first_free] = bpage; @@ -1071,7 +1107,7 @@ buf_dblwr_write_single_page( /* total number of slots available for single page flushes starts from srv_doublewrite_batch_size to the end of the buffer. */ - size = 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE; + size = TRX_SYS_DOUBLEWRITE_BLOCKS * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE; ut_a(size > srv_doublewrite_batch_size); n_slots = size - srv_doublewrite_batch_size; @@ -1142,9 +1178,11 @@ retry: bytes in the doublewrite page with zeros. */ zip_size = buf_page_get_zip_size(bpage); + void * frame = buf_page_get_frame(bpage); + if (zip_size) { memcpy(buf_dblwr->write_buf + UNIV_PAGE_SIZE * i, - bpage->zip.data, zip_size); + frame, zip_size); memset(buf_dblwr->write_buf + UNIV_PAGE_SIZE * i + zip_size, 0, UNIV_PAGE_SIZE - zip_size); @@ -1157,7 +1195,7 @@ retry: doublewrite buffer */ fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0, offset, 0, UNIV_PAGE_SIZE, - (void*) ((buf_block_t*) bpage)->frame, + frame, NULL, 0); } diff --git a/storage/xtradb/buf/buf0flu.cc b/storage/xtradb/buf/buf0flu.cc index cbfba081425..571aaa2151c 100644 --- a/storage/xtradb/buf/buf0flu.cc +++ b/storage/xtradb/buf/buf0flu.cc @@ -758,7 +758,7 @@ buf_flush_update_zip_checksum( srv_checksum_algorithm))); mach_write_to_8(page + FIL_PAGE_LSN, lsn); - memset(page + FIL_PAGE_FILE_FLUSH_LSN, 0, 8); + memset(page + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8); mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM, checksum); } @@ -936,7 +936,7 @@ buf_flush_write_block_low( mach_write_to_8(frame + FIL_PAGE_LSN, bpage->newest_modification); - memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8); + memset(frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8); break; case BUF_BLOCK_FILE_PAGE: frame = bpage->zip.data; @@ -951,6 +951,8 @@ buf_flush_write_block_low( break; } + frame = buf_page_encrypt_before_write(bpage, frame); + if (!srv_use_doublewrite_buf || !buf_dblwr) { fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER, sync, buf_page_get_space(bpage), zip_size, diff --git a/storage/xtradb/buf/buf0rea.cc b/storage/xtradb/buf/buf0rea.cc index 7a79958c136..1bb861cf820 100644 --- a/storage/xtradb/buf/buf0rea.cc +++ b/storage/xtradb/buf/buf0rea.cc @@ -222,6 +222,8 @@ not_to_recover: ut_ad(buf_page_in_file(bpage)); ut_ad(!mutex_own(&buf_pool_from_bpage(bpage)->LRU_list_mutex)); + byte* frame = buf_page_decrypt_before_read(bpage, zip_size); + if (sync) { thd_wait_begin(NULL, THD_WAIT_DISKIO); } @@ -230,14 +232,14 @@ not_to_recover: *err = _fil_io(OS_FILE_READ | wake_later | ignore_nonexistent_pages, sync, space, zip_size, offset, 0, zip_size, - bpage->zip.data, bpage, 0, trx); + frame, bpage, 0, trx); } else { ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE); *err = _fil_io(OS_FILE_READ | wake_later | ignore_nonexistent_pages, sync, space, 0, offset, 0, UNIV_PAGE_SIZE, - ((buf_block_t*) bpage)->frame, bpage, &bpage->write_size, trx); + frame, bpage, &bpage->write_size, trx); } if (sync) { @@ -245,6 +247,7 @@ not_to_recover: } if (*err != DB_SUCCESS) { + buf_page_decrypt_cleanup(bpage); if (ignore_nonexistent_pages || *err == DB_TABLESPACE_DELETED) { buf_read_page_handle_error(bpage); return(0); diff --git a/storage/xtradb/dict/dict0dict.cc b/storage/xtradb/dict/dict0dict.cc index 2d6242a04cb..58e57fa77fc 100644 --- a/storage/xtradb/dict/dict0dict.cc +++ b/storage/xtradb/dict/dict0dict.cc @@ -321,10 +321,10 @@ dict_get_db_name_len( Reserves the dictionary system mutex for MySQL. */ UNIV_INTERN void -dict_mutex_enter_for_mysql(void) +dict_mutex_enter_for_mysql_func(const char * file, ulint line) /*============================*/ { - mutex_enter(&(dict_sys->mutex)); + mutex_enter_func(&(dict_sys->mutex), file, line); } /********************************************************************//** diff --git a/storage/xtradb/dict/dict0load.cc b/storage/xtradb/dict/dict0load.cc index 874614bfb5c..fb33c31ff92 100644 --- a/storage/xtradb/dict/dict0load.cc +++ b/storage/xtradb/dict/dict0load.cc @@ -1153,6 +1153,12 @@ loop: space_id, name); } + /* We need to read page 0 to get (optional) IV + regardless if encryptions is turned on or not, + since if it's off we should decrypt a potentially + already encrypted table */ + bool read_page_0 = true; + /* We set the 2nd param (fix_dict = true) here because we already have an x-lock on dict_operation_lock and dict_sys->mutex. Besides, @@ -1160,7 +1166,7 @@ loop: If the filepath is not known, it will need to be discovered. */ dberr_t err = fil_open_single_table_tablespace( - false, srv_read_only_mode ? false : true, + read_page_0, srv_read_only_mode ? false : true, space_id, dict_tf_to_fsp_flags(flags), name, filepath); @@ -2642,6 +2648,99 @@ check_rec: return(table); } +/***********************************************************************//** +Loads a table id based on the index id. +@return true if found */ +static +bool +dict_load_table_id_on_index_id( +/*==================*/ + index_id_t index_id, /*!< in: index id */ + table_id_t* table_id) /*!< out: table id */ +{ + /* check hard coded indexes */ + switch(index_id) { + case DICT_TABLES_ID: + case DICT_COLUMNS_ID: + case DICT_INDEXES_ID: + case DICT_FIELDS_ID: + *table_id = index_id; + return true; + case DICT_TABLE_IDS_ID: + /* The following is a secondary index on SYS_TABLES */ + *table_id = DICT_TABLES_ID; + return true; + } + + bool found = false; + mtr_t mtr; + + ut_ad(mutex_own(&(dict_sys->mutex))); + + /* NOTE that the operation of this function is protected by + the dictionary mutex, and therefore no deadlocks can occur + with other dictionary operations. */ + + mtr_start(&mtr); + + btr_pcur_t pcur; + const rec_t* rec = dict_startscan_system(&pcur, &mtr, SYS_INDEXES); + + while (rec) { + ulint len; + const byte* field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_INDEXES__ID, &len); + ut_ad(len == 8); + + /* Check if the index id is the one searched for */ + if (index_id == mach_read_from_8(field)) { + found = true; + /* Now we get the table id */ + const byte* field = rec_get_nth_field_old( + rec, + DICT_FLD__SYS_INDEXES__TABLE_ID, + &len); + *table_id = mach_read_from_8(field); + break; + } + mtr_commit(&mtr); + mtr_start(&mtr); + rec = dict_getnext_system(&pcur, &mtr); + } + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + + return(found); +} + +UNIV_INTERN +dict_table_t* +dict_table_open_on_index_id( +/*==================*/ + index_id_t index_id, /*!< in: index id */ + bool dict_locked) /*!< in: dict locked */ +{ + if (!dict_locked) { + mutex_enter(&dict_sys->mutex); + } + + ut_ad(mutex_own(&dict_sys->mutex)); + table_id_t table_id; + dict_table_t * table = NULL; + if (dict_load_table_id_on_index_id(index_id, &table_id)) { + bool local_dict_locked = true; + table = dict_table_open_on_id(table_id, + local_dict_locked, + DICT_TABLE_OP_LOAD_TABLESPACE); + } + + if (!dict_locked) { + mutex_exit(&dict_sys->mutex); + } + return table; +} + /********************************************************************//** This function is called when the database is booted. Loads system table index definitions except for the clustered index which is added to the diff --git a/storage/xtradb/dict/dict0stats_bg.cc b/storage/xtradb/dict/dict0stats_bg.cc index 2cf8aff1e30..ef3a8c89c08 100644 --- a/storage/xtradb/dict/dict0stats_bg.cc +++ b/storage/xtradb/dict/dict0stats_bg.cc @@ -413,7 +413,7 @@ dict_stats_process_entry_from_recalc_pool() return; } - table->stats_bg_flag = BG_STAT_IN_PROGRESS; + table->stats_bg_flag |= BG_STAT_IN_PROGRESS; mutex_exit(&dict_sys->mutex); @@ -440,7 +440,7 @@ dict_stats_process_entry_from_recalc_pool() mutex_enter(&dict_sys->mutex); - table->stats_bg_flag = BG_STAT_NONE; + table->stats_bg_flag &= ~BG_STAT_IN_PROGRESS; dict_table_close(table, TRUE, FALSE); diff --git a/storage/xtradb/fil/fil0crypt.cc b/storage/xtradb/fil/fil0crypt.cc new file mode 100644 index 00000000000..e34297f4f86 --- /dev/null +++ b/storage/xtradb/fil/fil0crypt.cc @@ -0,0 +1,2433 @@ +#include "fil0fil.h" +#include "srv0srv.h" +#include "srv0start.h" +#include "mach0data.h" +#include "log0recv.h" +#include "mtr0mtr.h" +#include "mtr0log.h" +#include "page0zip.h" +#include "ut0ut.h" +#include "btr0scrub.h" +#include "fsp0fsp.h" +#include "fil0pagecompress.h" +#include "fil0pageencryption.h" + +#include <my_crypt.h> +#include <my_crypt_key_management.h> + +#include <my_aes.h> +#include <math.h> + + +/** Mutex for keys */ +UNIV_INTERN ib_mutex_t fil_crypt_key_mutex; + +#ifdef UNIV_PFS_MUTEX +UNIV_INTERN mysql_pfs_key_t fil_crypt_key_mutex_key; +#endif + +/** Is encryption enabled/disabled */ +UNIV_INTERN my_bool srv_encrypt_tables = FALSE; + +/** No of key rotation threads requested */ +UNIV_INTERN uint srv_n_fil_crypt_threads = 0; + +/** No of key rotation threads started */ +static uint srv_n_fil_crypt_threads_started = 0; + +/** At this age or older a space/page will be rotated */ +UNIV_INTERN uint srv_fil_crypt_rotate_key_age = 1; + +/** Event to signal FROM the key rotation threads. */ +UNIV_INTERN os_event_t fil_crypt_event; + +/** Event to signal TO the key rotation threads. */ +UNIV_INTERN os_event_t fil_crypt_threads_event; + +/** Event for waking up threads throttle */ +UNIV_INTERN os_event_t fil_crypt_throttle_sleep_event; + +/** Mutex for key rotation threads */ +UNIV_INTERN ib_mutex_t fil_crypt_threads_mutex; + +#ifdef UNIV_PFS_MUTEX +UNIV_INTERN mysql_pfs_key_t fil_crypt_threads_mutex_key; +#endif + +/** Variable ensuring only 1 thread at time does initial conversion */ +static bool fil_crypt_start_converting = false; + +/** Variables for throttling */ +UNIV_INTERN uint srv_n_fil_crypt_iops = 100; // 10ms per iop +static uint srv_alloc_time = 3; // allocate iops for 3s at a time +static uint n_fil_crypt_iops_allocated = 0; + +/** Variables for scrubbing */ +extern uint srv_background_scrub_data_interval; +extern uint srv_background_scrub_data_check_interval; + +#define DEBUG_KEYROTATION_THROTTLING 0 + +/** Statistics variables */ +static fil_crypt_stat_t crypt_stat; +static ib_mutex_t crypt_stat_mutex; + +#ifdef UNIV_PFS_MUTEX +UNIV_INTERN mysql_pfs_key_t fil_crypt_stat_mutex_key; +#endif + +/** + * key for crypt data mutex +*/ +#ifdef UNIV_PFS_MUTEX +UNIV_INTERN mysql_pfs_key_t fil_crypt_data_mutex_key; +#endif + +/** +* Magic pattern in start of crypt data on page 0 +*/ +#define MAGIC_SZ 6 + +static const unsigned char CRYPT_MAGIC[MAGIC_SZ] = { + 's', 0xE, 0xC, 'R', 'E', 't' }; + +static const unsigned char EMPTY_PATTERN[MAGIC_SZ] = { + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +/** + * CRYPT_SCHEME_UNENCRYPTED + * + * Used as intermediate state when convering a space from unencrypted + * to encrypted + */ +#define CRYPT_SCHEME_UNENCRYPTED 0 + +/** + * CRYPT_SCHEME_1 + * + * L = AES_ECB(KEY, IV) + * CRYPT(PAGE) = AES_CRT(KEY=L, IV=C, PAGE) + */ +#define CRYPT_SCHEME_1 1 +#define CRYPT_SCHEME_1_IV_LEN 16 +// cached L given key_version +struct key_struct +{ + uint key_version; + byte key[CRYPT_SCHEME_1_IV_LEN]; +}; + +struct fil_space_rotate_state_t +{ + time_t start_time; // time when rotation started + ulint active_threads; // active threads in space + ulint next_offset; // next "free" offset + ulint max_offset; // max offset needing to be rotated + uint min_key_version_found; // min key version found but not rotated + lsn_t end_lsn; // max lsn created when rotating this space + bool starting; // initial write of IV + bool flushing; // space is being flushed at end of rotate + struct { + bool is_active; // is scrubbing active in this space + time_t last_scrub_completed; // when was last scrub completed + } scrubbing; +}; + +struct fil_space_crypt_struct +{ + ulint type; // CRYPT_SCHEME + uint keyserver_requests; // no of key requests to key server + uint key_count; // No of initalized key-structs + key_struct keys[3]; // cached L = AES_ECB(KEY, IV) + uint min_key_version; // min key version for this space + ulint page0_offset; // byte offset on page 0 for crypt data + + ib_mutex_t mutex; // mutex protecting following variables + bool closing; // is tablespace being closed + fil_space_rotate_state_t rotate_state; + + uint iv_length; // length of IV + byte iv[1]; // IV-data +}; + +/********************************************************************* +Init space crypt */ +UNIV_INTERN +void +fil_space_crypt_init() +{ + mutex_create(fil_crypt_key_mutex_key, + &fil_crypt_key_mutex, SYNC_NO_ORDER_CHECK); + + fil_crypt_throttle_sleep_event = os_event_create(); + + mutex_create(fil_crypt_stat_mutex_key, + &crypt_stat_mutex, SYNC_NO_ORDER_CHECK); + memset(&crypt_stat, 0, sizeof(crypt_stat)); +} + +/********************************************************************* +Cleanup space crypt */ +UNIV_INTERN +void +fil_space_crypt_cleanup() +{ + os_event_free(fil_crypt_throttle_sleep_event); +} + +/****************************************************************** +Get key bytes for a space/key-version */ +static +void +fil_crypt_get_key(byte *dst, uint* key_length, + fil_space_crypt_t* crypt_data, uint version, bool page_encrypted) +{ + unsigned char keybuf[MY_AES_MAX_KEY_LENGTH]; + unsigned char iv[CRYPT_SCHEME_1_IV_LEN]; + ulint iv_len = sizeof(iv); + + if (!page_encrypted) { + mutex_enter(&crypt_data->mutex); + + // Check if we already have key + for (uint i = 0; i < crypt_data->key_count; i++) { + if (crypt_data->keys[i].key_version == version) { + memcpy(dst, crypt_data->keys[i].key, + sizeof(crypt_data->keys[i].key)); + mutex_exit(&crypt_data->mutex); + return; + } + } + // Not found! + crypt_data->keyserver_requests++; + + // Rotate keys to make room for a new + for (uint i = 1; i < array_elements(crypt_data->keys); i++) { + crypt_data->keys[i] = crypt_data->keys[i - 1]; + } + } + else + { + // load iv + int rc = GetCryptoIV(version, (unsigned char*)iv, iv_len); + fprintf(stderr, " %d\n",rc); + + if (rc != CRYPT_KEY_OK) { + ib_logf(IB_LOG_LEVEL_FATAL, + "IV %d can not be found. Reason=%d", version, rc); + ut_error; + } + } + + if (HasCryptoKey(version)) { + *key_length = GetCryptoKeySize(version); + + int rc = GetCryptoKey(version, (unsigned char*)keybuf, *key_length); + + if (rc != CRYPT_KEY_OK) { + ib_logf(IB_LOG_LEVEL_FATAL, + "Key %d can not be found. Reason=%d", version, rc); + ut_error; + } + } else { + ib_logf(IB_LOG_LEVEL_FATAL, + "Key %d not found", version); + ut_error; + } + + + // do ctr key initialization + if (current_aes_dynamic_method == MY_AES_ALGORITHM_CTR) + { + // Now compute L by encrypting IV using this key + const unsigned char* src = page_encrypted ? iv : crypt_data->iv; + const int srclen = page_encrypted ? iv_len : crypt_data->iv_length; + unsigned char* buf = page_encrypted ? keybuf : crypt_data->keys[0].key; + uint32 buflen = page_encrypted ? *key_length : sizeof(crypt_data->keys[0].key); + + // call ecb explicit + my_aes_encrypt_dynamic_type func = get_aes_encrypt_func(MY_AES_ALGORITHM_ECB); + int rc = (*func)(src, srclen, + buf, &buflen, + (unsigned char*)keybuf, *key_length, + NULL, 0, + 1); + + if (rc != AES_OK) { + ib_logf(IB_LOG_LEVEL_FATAL, + "Unable to encrypt key-block " + " src: %p srclen: %d buf: %p buflen: %d." + " return-code: %d. Can't continue!\n", + src, srclen, buf, buflen, rc); + ut_error; + } + + if (!page_encrypted) { + crypt_data->keys[0].key_version = version; + crypt_data->key_count++; + + if (crypt_data->key_count > array_elements(crypt_data->keys)) { + crypt_data->key_count = array_elements(crypt_data->keys); + } + } + + // set the key size to the aes block size because this encrypted data is the key + *key_length = MY_AES_BLOCK_SIZE; + memcpy(dst, buf, buflen); + } + else + { + // otherwise keybuf contains the right key + memcpy(dst, keybuf, *key_length); + } + + if (!page_encrypted) { + mutex_exit(&crypt_data->mutex); + } +} + +/****************************************************************** +Get key bytes for a space/latest(key-version) */ +static inline +void +fil_crypt_get_latest_key(byte *dst, uint* key_length, + fil_space_crypt_t* crypt_data, uint *version) +{ + if (srv_encrypt_tables) { + // used for key rotation - get the next key id from the key provider + int rc = GetLatestCryptoKeyVersion(); + + // if no new key was created use the last one + if (rc >= 0) + { + *version = rc; + } + + return fil_crypt_get_key(dst, key_length, crypt_data, *version, false); + } else { + return fil_crypt_get_key(dst, key_length, NULL, *version, true); + } +} + +/****************************************************************** +Create a fil_space_crypt_t object */ +UNIV_INTERN +fil_space_crypt_t* +fil_space_create_crypt_data() +{ + const uint iv_length = CRYPT_SCHEME_1_IV_LEN; + const uint sz = sizeof(fil_space_crypt_t) + iv_length; + fil_space_crypt_t* crypt_data = + static_cast<fil_space_crypt_t*>(malloc(sz)); + memset(crypt_data, 0, sz); + + if (srv_encrypt_tables == FALSE) { + crypt_data->type = CRYPT_SCHEME_UNENCRYPTED; + crypt_data->min_key_version = 0; + } else { + crypt_data->type = CRYPT_SCHEME_1; + crypt_data->min_key_version = GetLatestCryptoKeyVersion(); + } + + mutex_create(fil_crypt_data_mutex_key, + &crypt_data->mutex, SYNC_NO_ORDER_CHECK); + crypt_data->iv_length = iv_length; + my_random_bytes(crypt_data->iv, iv_length); + return crypt_data; +} + +/****************************************************************** +Compare two crypt objects */ +UNIV_INTERN +int +fil_space_crypt_compare(const fil_space_crypt_t* crypt_data1, + const fil_space_crypt_t* crypt_data2) +{ + ut_a(crypt_data1->type == CRYPT_SCHEME_UNENCRYPTED || + crypt_data1->type == CRYPT_SCHEME_1); + ut_a(crypt_data2->type == CRYPT_SCHEME_UNENCRYPTED || + crypt_data2->type == CRYPT_SCHEME_1); + + ut_a(crypt_data1->iv_length == CRYPT_SCHEME_1_IV_LEN); + ut_a(crypt_data2->iv_length == CRYPT_SCHEME_1_IV_LEN); + + /* no support for changing iv (yet?) */ + ut_a(memcmp(crypt_data1->iv, crypt_data2->iv, + crypt_data1->iv_length) == 0); + + return 0; +} + +/****************************************************************** +Read crypt data from a page (0) */ +UNIV_INTERN +fil_space_crypt_t* +fil_space_read_crypt_data(ulint space, const byte* page, ulint offset) +{ + if (memcmp(page + offset, EMPTY_PATTERN, MAGIC_SZ) == 0) { + /* crypt is not stored */ + return NULL; + } + + if (memcmp(page + offset, CRYPT_MAGIC, MAGIC_SZ) != 0) { + fprintf(stderr, + "Warning: found potentially bogus bytes on " + "page 0 offset %lu for space %lu : " + "[ %.2x %.2x %.2x %.2x %.2x %.2x ]. " + "Assuming space is not encrypted!\n", + offset, space, + page[offset + 0], + page[offset + 1], + page[offset + 2], + page[offset + 3], + page[offset + 4], + page[offset + 5]); + return NULL; + } + + ulint type = mach_read_from_1(page + offset + MAGIC_SZ + 0); + + if (! (type == CRYPT_SCHEME_UNENCRYPTED || + type == CRYPT_SCHEME_1)) { + fprintf(stderr, + "Found non sensible crypt scheme: %lu for space %lu " + " offset: %lu bytes: " + "[ %.2x %.2x %.2x %.2x %.2x %.2x ]\n", + type, space, offset, + page[offset + 0 + MAGIC_SZ], + page[offset + 1 + MAGIC_SZ], + page[offset + 2 + MAGIC_SZ], + page[offset + 3 + MAGIC_SZ], + page[offset + 4 + MAGIC_SZ], + page[offset + 5 + MAGIC_SZ]); + ut_error; + } + + ulint iv_length = mach_read_from_1(page + offset + MAGIC_SZ + 1); + if (! (iv_length == CRYPT_SCHEME_1_IV_LEN)) { + fprintf(stderr, + "Found non sensible iv length: %lu for space %lu " + " offset: %lu type: %lu bytes: " + "[ %.2x %.2x %.2x %.2x %.2x %.2x ]\n", + iv_length, space, offset, type, + page[offset + 0 + MAGIC_SZ], + page[offset + 1 + MAGIC_SZ], + page[offset + 2 + MAGIC_SZ], + page[offset + 3 + MAGIC_SZ], + page[offset + 4 + MAGIC_SZ], + page[offset + 5 + MAGIC_SZ]); + ut_error; + } + + uint min_key_version = mach_read_from_4 + (page + offset + MAGIC_SZ + 2 + iv_length); + + const uint sz = sizeof(fil_space_crypt_t) + iv_length; + fil_space_crypt_t* crypt_data = static_cast<fil_space_crypt_t*>( + malloc(sz)); + memset(crypt_data, 0, sz); + + crypt_data->type = type; + crypt_data->min_key_version = min_key_version; + crypt_data->page0_offset = offset; + mutex_create(fil_crypt_data_mutex_key, + &crypt_data->mutex, SYNC_NO_ORDER_CHECK); + crypt_data->iv_length = iv_length; + memcpy(crypt_data->iv, page + offset + MAGIC_SZ + 2, iv_length); + + return crypt_data; +} + +/****************************************************************** +Free a crypt data object */ +UNIV_INTERN +void +fil_space_destroy_crypt_data(fil_space_crypt_t **crypt_data) +{ + if (crypt_data != NULL && (*crypt_data) != NULL) { + /* lock (and unlock) mutex to make sure no one has it locked + * currently */ + mutex_enter(& (*crypt_data)->mutex); + mutex_exit(& (*crypt_data)->mutex); + mutex_free(& (*crypt_data)->mutex); + free(*crypt_data); + (*crypt_data) = NULL; + } +} + +/****************************************************************** +Write crypt data to a page (0) */ +static +void +fil_space_write_crypt_data_low(fil_space_crypt_t *crypt_data, + ulint type, + byte* page, ulint offset, + ulint maxsize, mtr_t* mtr) +{ + ut_a(offset > 0 && offset < UNIV_PAGE_SIZE); + ulint space_id = mach_read_from_4( + page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); + const uint len = crypt_data->iv_length; + const uint min_key_version = crypt_data->min_key_version; + crypt_data->page0_offset = offset; + ut_a(2 + len + 4 + MAGIC_SZ < maxsize); + + /* + redo log this as bytewise updates to page 0 + followed by an MLOG_FILE_WRITE_CRYPT_DATA + (that will during recovery update fil_space_t) + */ + mlog_write_string(page + offset, CRYPT_MAGIC, MAGIC_SZ, mtr); + mlog_write_ulint(page + offset + MAGIC_SZ + 0, type, MLOG_1BYTE, mtr); + mlog_write_ulint(page + offset + MAGIC_SZ + 1, len, MLOG_1BYTE, mtr); + mlog_write_string(page + offset + MAGIC_SZ + 2, crypt_data->iv, len, + mtr); + mlog_write_ulint(page + offset + MAGIC_SZ + 2 + len, min_key_version, + MLOG_4BYTES, mtr); + + byte* log_ptr = mlog_open(mtr, 11 + 12 + len); + if (log_ptr != NULL) { + log_ptr = mlog_write_initial_log_record_fast( + page, + MLOG_FILE_WRITE_CRYPT_DATA, + log_ptr, mtr); + mach_write_to_4(log_ptr, space_id); + log_ptr += 4; + mach_write_to_2(log_ptr, offset); + log_ptr += 2; + mach_write_to_1(log_ptr, type); + log_ptr += 1; + mach_write_to_1(log_ptr, len); + log_ptr += 1; + mach_write_to_4(log_ptr, min_key_version); + log_ptr += 4; + mlog_close(mtr, log_ptr); + + mlog_catenate_string(mtr, crypt_data->iv, len); + } +} + +/****************************************************************** +Write crypt data to a page (0) */ +UNIV_INTERN +void +fil_space_write_crypt_data(ulint space, byte* page, ulint offset, + ulint maxsize, mtr_t* mtr) +{ + fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space); + if (crypt_data == NULL) { + return; + } + + fil_space_write_crypt_data_low(crypt_data, crypt_data->type, + page, offset, maxsize, mtr); +} + +/****************************************************************** +Parse a MLOG_FILE_WRITE_CRYPT_DATA log entry */ +UNIV_INTERN +byte* +fil_parse_write_crypt_data(byte* ptr, byte* end_ptr, + buf_block_t* block) +{ + /* check that redo log entry is complete */ + uint entry_size = + 4 + // size of space_id + 2 + // size of offset + 1 + // size of type + 1 + // size of iv-len + 4; // size of min_key_version + if (end_ptr - ptr < entry_size) + return NULL; + + ulint space_id = mach_read_from_4(ptr); + ptr += 4; + uint offset = mach_read_from_2(ptr); + ptr += 2; + uint type = mach_read_from_1(ptr); + ptr += 1; + uint len = mach_read_from_1(ptr); + ptr += 1; + + ut_a(type == CRYPT_SCHEME_UNENCRYPTED || + type == CRYPT_SCHEME_1); // only supported + ut_a(len == CRYPT_SCHEME_1_IV_LEN); // only supported + uint min_key_version = mach_read_from_4(ptr); + ptr += 4; + + if (end_ptr - ptr < len) + return NULL; + + fil_space_crypt_t* crypt_data = fil_space_create_crypt_data(); + crypt_data->page0_offset = offset; + crypt_data->min_key_version = min_key_version; + memcpy(crypt_data->iv, ptr, len); + ptr += len; + + /* update fil_space memory cache with crypt_data */ + fil_space_set_crypt_data(space_id, crypt_data); + + return ptr; +} + +/****************************************************************** +Clear crypt data from a page (0) */ +UNIV_INTERN +void +fil_space_clear_crypt_data(byte* page, ulint offset) +{ + //TODO(jonaso): pass crypt-data and read len from there + ulint len = CRYPT_SCHEME_1_IV_LEN; + ulint size = + sizeof(CRYPT_MAGIC) + + 1 + // type + 1 + // len + len + // iv + 4; // min key version + memset(page + offset, 0, size); +} + +/********************************************************************* +Check if page shall be encrypted before write */ +UNIV_INTERN +bool +fil_space_check_encryption_write( +/*==============================*/ + ulint space) /*!< in: tablespace id */ +{ + if (srv_encrypt_tables == FALSE) + return false; + + fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space); + if (crypt_data == NULL) + return false; + + if (crypt_data->type == CRYPT_SCHEME_UNENCRYPTED) + return false; + + return true; +} + +/****************************************************************** +Encrypt a page */ +UNIV_INTERN +void +fil_space_encrypt(ulint space, ulint offset, lsn_t lsn, + const byte* src_frame, ulint zip_size, byte* dst_frame, ulint encryption_key) +{ + fil_space_crypt_t* crypt_data; + ulint page_size = (zip_size) ? zip_size : UNIV_PAGE_SIZE; + + // get key (L) + uint key_version; + byte key[MY_AES_MAX_KEY_LENGTH]; + uint key_length; + + if (srv_encrypt_tables) { + crypt_data = fil_space_get_crypt_data(space); + if (crypt_data == NULL) { + //TODO: Is this really needed ? + memcpy(dst_frame, src_frame, page_size); + return; + } + fil_crypt_get_latest_key(key, &key_length, crypt_data, &key_version); + } else { + key_version = encryption_key; + fil_crypt_get_latest_key(key, &key_length, NULL, (uint*)&key_version); + } + + + /* Load the iv or counter (depending to the encryption algorithm used) */ + unsigned char iv[MY_AES_BLOCK_SIZE]; + + if (current_aes_dynamic_method == MY_AES_ALGORITHM_CTR) + { + // create counter block (C) + mach_write_to_4(iv + 0, space); + ulint space_offset = mach_read_from_4( + src_frame + FIL_PAGE_OFFSET); + mach_write_to_4(iv + 4, space_offset); + mach_write_to_8(iv + 8, lsn); + } + else + { + // take the iv from the key provider + + int load_iv_rc = GetCryptoIV(key_version, (uchar *) iv, sizeof(iv)); + + // if the iv can not be loaded the whole page can not be encrypted + if (load_iv_rc != CRYPT_KEY_OK) + { + ib_logf(IB_LOG_LEVEL_FATAL, + "Unable to decrypt data-block. " + " Can not load iv for key %d" + " return-code: %d. Can't continue!\n", + key_version, load_iv_rc); + + ut_error; + } + } + + + ibool page_compressed = (mach_read_from_2(src_frame+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED); + ibool page_encrypted = fil_space_is_page_encrypted(space); + + ulint compression_alg = mach_read_from_8(src_frame+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); + + ulint orig_page_type = mach_read_from_2(src_frame+FIL_PAGE_TYPE); + if (orig_page_type==FIL_PAGE_TYPE_FSP_HDR + || orig_page_type==FIL_PAGE_TYPE_XDES + || orig_page_type== FIL_PAGE_PAGE_ENCRYPTED + || orig_page_type== FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) { + memcpy(dst_frame, src_frame, page_size); + return; + } + + // copy page header + memcpy(dst_frame, src_frame, FIL_PAGE_DATA); + + + if (page_encrypted && !page_compressed) { + // key id + mach_write_to_2(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, + key_version); + // original page type + mach_write_to_2(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 2, + orig_page_type); + // new page type + mach_write_to_2(dst_frame+FIL_PAGE_TYPE, FIL_PAGE_PAGE_ENCRYPTED); + } else { + // store key version + mach_write_to_4(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, + key_version); + } + + // encrypt page data + ulint unencrypted_bytes = FIL_PAGE_DATA + FIL_PAGE_DATA_END; + ulint srclen = page_size - unencrypted_bytes; + const byte* src = src_frame + FIL_PAGE_DATA; + byte* dst = dst_frame + FIL_PAGE_DATA; + uint32 dstlen; + + if (page_compressed) { + srclen = page_size - FIL_PAGE_DATA; + } + + int rc = (* my_aes_encrypt_dynamic)(src, srclen, + dst, &dstlen, + (unsigned char*)key, key_length, + (unsigned char*)iv, sizeof(iv), + 1); + + if (! ((rc == AES_OK) && ((ulint) dstlen == srclen))) { + ib_logf(IB_LOG_LEVEL_FATAL, + "Unable to encrypt data-block " + " src: %p srclen: %ld buf: %p buflen: %d." + " return-code: %d. Can't continue!\n", + src, (long)srclen, + dst, dstlen, rc); + ut_error; + } + + if (!page_compressed) { + // copy page trailer + memcpy(dst_frame + page_size - FIL_PAGE_DATA_END, + src_frame + page_size - FIL_PAGE_DATA_END, + FIL_PAGE_DATA_END); + + /* handle post encryption checksum */ + ib_uint32_t checksum = 0; + srv_checksum_algorithm_t algorithm = + static_cast<srv_checksum_algorithm_t>(srv_checksum_algorithm); + + if (zip_size == 0) { + switch (algorithm) { + case SRV_CHECKSUM_ALGORITHM_CRC32: + case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32: + checksum = buf_calc_page_crc32(dst_frame); + break; + case SRV_CHECKSUM_ALGORITHM_INNODB: + case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB: + checksum = (ib_uint32_t) buf_calc_page_new_checksum( + dst_frame); + break; + case SRV_CHECKSUM_ALGORITHM_NONE: + case SRV_CHECKSUM_ALGORITHM_STRICT_NONE: + checksum = BUF_NO_CHECKSUM_MAGIC; + break; + /* no default so the compiler will emit a warning + * if new enum is added and not handled here */ + } + } else { + checksum = page_zip_calc_checksum(dst_frame, zip_size, + algorithm); + } + + // store the post-encryption checksum after the key-version + mach_write_to_4(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 4, + checksum); + } else { + /* Page compressed and encrypted tables have different + FIL_HEADER */ + ulint page_len = log10((double)page_size)/log10((double)2); + /* Set up the correct page type */ + mach_write_to_2(dst_frame+FIL_PAGE_TYPE, FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED); + /* Set up the compression algorithm */ + mach_write_to_2(dst_frame+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION+4, orig_page_type); + /* Set up the compressed size */ + mach_write_to_1(dst_frame+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION+6, page_len); + /* Set up the compression method */ + mach_write_to_1(dst_frame+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION+7, compression_alg); + } + +} + +/********************************************************************* +Check if extra buffer shall be allocated for decrypting after read */ +UNIV_INTERN +bool +fil_space_check_encryption_read( +/*==============================*/ + ulint space) /*!< in: tablespace id */ +{ + fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space); + if (crypt_data == NULL) + return false; + + if (crypt_data->type == CRYPT_SCHEME_UNENCRYPTED) + return false; + + return true; +} + +/****************************************************************** +Decrypt a page */ +UNIV_INTERN +bool +fil_space_decrypt(fil_space_crypt_t* crypt_data, + const byte* src_frame, ulint page_size, byte* dst_frame) +{ + ulint page_type = mach_read_from_2(src_frame+FIL_PAGE_TYPE); + // key version + uint key_version; + bool page_encrypted = (page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED + || page_type == FIL_PAGE_PAGE_ENCRYPTED); + + bool page_compressed = (page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED + || page_type == FIL_PAGE_PAGE_COMPRESSED); + + ulint orig_page_type=0; + + if (page_type == FIL_PAGE_PAGE_ENCRYPTED) { + key_version = mach_read_from_2( + src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); + orig_page_type = mach_read_from_2( + src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 2); + } else { + key_version = mach_read_from_4( + src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); + } + + if (key_version == 0 && !page_encrypted) { + //TODO: is this really needed ? + memcpy(dst_frame, src_frame, page_size); + return false; /* page not decrypted */ + } + + // read space & offset & lsn + ulint space = mach_read_from_4( + src_frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); + ulint offset = mach_read_from_4( + src_frame + FIL_PAGE_OFFSET); + ib_uint64_t lsn = mach_read_from_8(src_frame + FIL_PAGE_LSN); + + // copy page header + memcpy(dst_frame, src_frame, FIL_PAGE_DATA); + + if (page_type == FIL_PAGE_PAGE_ENCRYPTED) { + // orig page type + mach_write_to_2(dst_frame+FIL_PAGE_TYPE, orig_page_type); + } + + + // get key + byte key[MY_AES_MAX_KEY_LENGTH]; + uint key_length; + fil_crypt_get_key(key, &key_length, crypt_data, key_version, page_encrypted); + + // get the iv + unsigned char iv[MY_AES_BLOCK_SIZE]; + + if (current_aes_dynamic_method == MY_AES_ALGORITHM_CTR) + { + // create counter block + + mach_write_to_4(iv + 0, space); + mach_write_to_4(iv + 4, offset); + mach_write_to_8(iv + 8, lsn); + } + else + { + // take the iv from the key provider + + int load_iv_rc = GetCryptoIV(key_version, (uchar *) iv, sizeof(iv)); + + // if the iv can not be loaded the whole page can not be decrypted + if (load_iv_rc != CRYPT_KEY_OK) + { + ib_logf(IB_LOG_LEVEL_FATAL, + "Unable to decrypt data-block. " + " Can not load iv for key %d" + " return-code: %d. Can't continue!\n", + key_version, load_iv_rc); + + return AES_KEY_CREATION_FAILED; + } + } + + const byte* src = src_frame + FIL_PAGE_DATA; + byte* dst = dst_frame + FIL_PAGE_DATA; + uint32 dstlen; + ulint srclen = page_size - (FIL_PAGE_DATA + FIL_PAGE_DATA_END); + + ulint compressed_len; + ulint compression_method; + + if (page_compressed) { + orig_page_type = mach_read_from_2(src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION+4); + compressed_len = mach_read_from_1(src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION+6); + compression_method = mach_read_from_1(src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION+7); + } + + if (page_encrypted && !page_compressed) { + orig_page_type = mach_read_from_2(src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION+2); + } + + if (page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) { + srclen = pow((double)2, (double)((int)compressed_len)) - FIL_PAGE_DATA; + } + + int rc = (* my_aes_decrypt_dynamic)(src, srclen, + dst, &dstlen, + (unsigned char*)key, key_length, + (unsigned char*)iv, sizeof(iv), + 1); + + if (! ((rc == AES_OK) && ((ulint) dstlen == srclen))) { + ib_logf(IB_LOG_LEVEL_FATAL, + "Unable to decrypt data-block " + " src: %p srclen: %ld buf: %p buflen: %d." + " return-code: %d. Can't continue!\n", + src, (long)srclen, + dst, dstlen, rc); + ut_error; + } + + if (page_type != FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) { + // copy page trailer + memcpy(dst_frame + page_size - FIL_PAGE_DATA_END, + src_frame + page_size - FIL_PAGE_DATA_END, + FIL_PAGE_DATA_END); + + // clear key-version & crypt-checksum from dst + memset(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8); + } else { + /* For page compressed tables we set up the FIL_HEADER again */ + /* setting original page type */ + mach_write_to_2(dst_frame + FIL_PAGE_TYPE, orig_page_type); + /* page_compression uses BUF_NO_CHECKSUM_MAGIC as checksum */ + mach_write_to_4(dst_frame + FIL_PAGE_SPACE_OR_CHKSUM, BUF_NO_CHECKSUM_MAGIC); + /* Set up the flush lsn to be compression algorithm */ + mach_write_to_8(dst_frame+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, compression_method); + } + + return true; /* page was decrypted */ +} + +/****************************************************************** +Decrypt a page */ +UNIV_INTERN +void +fil_space_decrypt(ulint space, + const byte* src_frame, ulint page_size, byte* dst_frame) +{ + fil_space_decrypt(fil_space_get_crypt_data(space), + src_frame, page_size, dst_frame); +} + +/********************************************************************* +Verify checksum for a page (iff it's encrypted) +NOTE: currently this function can only be run in single threaded mode +as it modifies srv_checksum_algorithm (temporarily) +@return true if page is encrypted AND OK, false otherwise */ +bool +fil_space_verify_crypt_checksum(const byte* src_frame, ulint zip_size) +{ + // key version + uint key_version = mach_read_from_4( + src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); + + if (key_version == 0) { + return false; // unencrypted page + } + + /* "trick" the normal checksum routines by storing the post-encryption + * checksum into the normal checksum field allowing for reuse of + * the normal routines */ + + // post encryption checksum + ib_uint32_t stored_post_encryption = mach_read_from_4( + src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 4); + + // save pre encryption checksum for restore in end of this function + ib_uint32_t stored_pre_encryption = mach_read_from_4( + src_frame + FIL_PAGE_SPACE_OR_CHKSUM); + + ib_uint32_t checksum_field2 = mach_read_from_4( + src_frame + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM); + + /** prepare frame for usage of normal checksum routines */ + mach_write_to_4(const_cast<byte*>(src_frame) + FIL_PAGE_SPACE_OR_CHKSUM, + stored_post_encryption); + + /* NOTE: this function is (currently) only run when restoring + * dblwr-buffer, server is single threaded so it's safe to modify + * srv_checksum_algorithm */ + srv_checksum_algorithm_t save_checksum_algorithm = + (srv_checksum_algorithm_t)srv_checksum_algorithm; + if (zip_size == 0 && + (save_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB || + save_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_INNODB)) { + /* handle ALGORITHM_INNODB specially, + * "downgrade" to ALGORITHM_INNODB and store BUF_NO_CHECKSUM_MAGIC + * checksum_field2 is sort of pointless anyway... + */ + srv_checksum_algorithm = SRV_CHECKSUM_ALGORITHM_INNODB; + mach_write_to_4(const_cast<byte*>(src_frame) + + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM, + BUF_NO_CHECKSUM_MAGIC); + } + + /* verify checksums */ + ibool corrupted = buf_page_is_corrupted(false, src_frame, zip_size); + + /** restore frame & algorithm */ + srv_checksum_algorithm = save_checksum_algorithm; + + mach_write_to_4(const_cast<byte*>(src_frame) + + FIL_PAGE_SPACE_OR_CHKSUM, + stored_pre_encryption); + + mach_write_to_4(const_cast<byte*>(src_frame) + + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM, + checksum_field2); + + if (!corrupted) { + return true; // page was encrypted and checksum matched + } else { + return false; // page was encrypted but checksum didn't match + } +} + +/***********************************************************************/ + +/** A copy of global key state */ +struct key_state_t { + key_state_t() : key_version(0), + rotate_key_age(srv_fil_crypt_rotate_key_age) {} + bool operator==(const key_state_t& other) const { + return key_version == other.key_version && + rotate_key_age == other.rotate_key_age; + } + uint key_version; + uint rotate_key_age; +}; + +/*********************************************************************** +Copy global key state */ +static void +fil_crypt_get_key_state( + key_state_t *new_state) +{ + if (srv_encrypt_tables == TRUE) { + new_state->key_version = GetLatestCryptoKeyVersion(); + new_state->rotate_key_age = srv_fil_crypt_rotate_key_age; + ut_a(new_state->key_version > 0); + } else { + new_state->key_version = 0; + new_state->rotate_key_age = 0; + } +} + +/*********************************************************************** +Check if a key needs rotation given a key_state */ +static bool +fil_crypt_needs_rotation(uint key_version, const key_state_t *key_state) +{ + // TODO(jonaso): Add support for rotating encrypted => unencrypted + + if (key_version == 0 && key_state->key_version != 0) { + /* this is rotation unencrypted => encrypted + * ignore rotate_key_age */ + return true; + } + + if (key_state->key_version == 0 && key_version != 0) { + /* this is rotation encrypted => unencrypted */ + return true; + } + + /* this is rotation encrypted => encrypted, + * only reencrypt if key is sufficiently old */ + if (key_version + key_state->rotate_key_age < key_state->key_version) + return true; + + return false; +} + +/*********************************************************************** +Check if a space is closing (i.e just before drop) */ +UNIV_INTERN bool +fil_crypt_is_closing(ulint space) +{ + bool closing; + fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space); + mutex_enter(&crypt_data->mutex); + closing = crypt_data->closing; + mutex_exit(&crypt_data->mutex); + return closing; +} + +/*********************************************************************** +Start encrypting a space +@return true if a pending op (fil_inc_pending_ops/fil_decr_pending_ops) is held +*/ +static bool +fil_crypt_start_encrypting_space(ulint space, bool *recheck) { + + /* we have a pending op when entering function */ + bool pending_op = true; + + mutex_enter(&fil_crypt_threads_mutex); + fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space); + if (crypt_data != NULL || fil_crypt_start_converting) { + /* someone beat us to it */ + if (fil_crypt_start_converting) + *recheck = true; + + mutex_exit(&fil_crypt_threads_mutex); + return pending_op; + } + + /* NOTE: we need to write and flush page 0 before publishing + * the crypt data. This so that after restart there is no + * risk of finding encrypted pages without having + * crypt data in page 0 */ + + /* 1 - create crypt data */ + crypt_data = fil_space_create_crypt_data(); + if (crypt_data == NULL) { + mutex_exit(&fil_crypt_threads_mutex); + return pending_op; + } + + crypt_data->type = CRYPT_SCHEME_UNENCRYPTED; + crypt_data->min_key_version = 0; // all pages are unencrypted + crypt_data->rotate_state.start_time = time(0); + crypt_data->rotate_state.starting = true; + crypt_data->rotate_state.active_threads = 1; + + mutex_enter(&crypt_data->mutex); + fil_space_set_crypt_data(space, crypt_data); + mutex_exit(&crypt_data->mutex); + + fil_crypt_start_converting = true; + mutex_exit(&fil_crypt_threads_mutex); + + do + { + if (fil_crypt_is_closing(space) || + fil_tablespace_is_being_deleted(space)) + break; + + mtr_t mtr; + mtr_start(&mtr); + + /* 2 - get page 0 */ + ulint offset = 0; + ulint zip_size = fil_space_get_zip_size(space); + buf_block_t* block = buf_page_get_gen(space, zip_size, offset, + RW_X_LATCH, + NULL, + BUF_GET, + __FILE__, __LINE__, + &mtr); + + if (fil_crypt_is_closing(space) || + fil_tablespace_is_being_deleted(space)) { + mtr_commit(&mtr); + break; + } + + /* 3 - compute location to store crypt data */ + byte* frame = buf_block_get_frame(block); + ulint maxsize; + crypt_data->page0_offset = + fsp_header_get_crypt_offset(zip_size, &maxsize); + + /* 4 - write crypt data to page 0 */ + fil_space_write_crypt_data_low(crypt_data, + CRYPT_SCHEME_1, + frame, + crypt_data->page0_offset, + maxsize, &mtr); + + mtr_commit(&mtr); + + if (fil_crypt_is_closing(space) || + fil_tablespace_is_being_deleted(space)) { + break; + } + + /* record lsn of update */ + lsn_t end_lsn = mtr.end_lsn; + + /* 4 - sync tablespace before publishing crypt data */ + + /* release "lock" while syncing */ + fil_decr_pending_ops(space); + pending_op = false; + + bool success = false; + ulint n_pages = 0; + ulint sum_pages = 0; + do { + success = buf_flush_list(ULINT_MAX, end_lsn, &n_pages); + buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST); + sum_pages += n_pages; + } while (!success && + !fil_crypt_is_closing(space) && + !fil_tablespace_is_being_deleted(space)); + + /* try to reacquire pending op */ + if (fil_inc_pending_ops(space, true)) + break; + + /* pending op reacquired! */ + pending_op = true; + + if (fil_crypt_is_closing(space) || + fil_tablespace_is_being_deleted(space)) { + break; + } + + /* 5 - publish crypt data */ + mutex_enter(&fil_crypt_threads_mutex); + mutex_enter(&crypt_data->mutex); + crypt_data->type = CRYPT_SCHEME_1; + ut_a(crypt_data->rotate_state.active_threads == 1); + crypt_data->rotate_state.active_threads = 0; + crypt_data->rotate_state.starting = false; + + fil_crypt_start_converting = false; + mutex_exit(&crypt_data->mutex); + mutex_exit(&fil_crypt_threads_mutex); + + return pending_op; + } while (0); + + mutex_enter(&crypt_data->mutex); + ut_a(crypt_data->rotate_state.active_threads == 1); + crypt_data->rotate_state.active_threads = 0; + mutex_exit(&crypt_data->mutex); + + mutex_enter(&fil_crypt_threads_mutex); + fil_crypt_start_converting = false; + mutex_exit(&fil_crypt_threads_mutex); + + return pending_op; +} + +/*********************************************************************** +Check if space needs rotation given a key_state */ +static bool +fil_crypt_space_needs_rotation(uint space, const key_state_t *key_state, + bool *recheck) +{ + if (fil_space_get_type(space) != FIL_TABLESPACE) + return false; + + if (fil_inc_pending_ops(space, true)) { + /* tablespace being dropped */ + return false; + } + + /* keep track of if we have pending op */ + bool pending_op = true; + + fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space); + if (crypt_data == NULL) { + /** + * space has no crypt data + * start encrypting it... + */ + pending_op = fil_crypt_start_encrypting_space(space, recheck); + crypt_data = fil_space_get_crypt_data(space); + if (crypt_data == NULL) { + if (pending_op) { + fil_decr_pending_ops(space); + } + return false; + } + } + + mutex_enter(&crypt_data->mutex); + do { + /* prevent threads from starting to rotate space */ + if (crypt_data->rotate_state.starting) { + /* recheck this space later */ + *recheck = true; + break; + } + + /* prevent threads from starting to rotate space */ + if (crypt_data->closing) + break; + + if (crypt_data->rotate_state.flushing) + break; + + bool need_key_rotation = fil_crypt_needs_rotation( + crypt_data->min_key_version, key_state); + + time_t diff = time(0) - crypt_data->rotate_state.scrubbing. + last_scrub_completed; + bool need_scrubbing = + diff >= srv_background_scrub_data_interval; + + if (need_key_rotation == false && need_scrubbing == false) + break; + + mutex_exit(&crypt_data->mutex); + /* NOTE! fil_decr_pending_ops is performed outside */ + return true; + } while (0); + + mutex_exit(&crypt_data->mutex); + if (pending_op) { + fil_decr_pending_ops(space); + } + return false; +} + +/** State of a rotation thread */ +struct rotate_thread_t { + explicit rotate_thread_t(uint no) { + memset(this, 0, sizeof(* this)); + thread_no = no; + first = true; + estimated_max_iops = 20; + } + + uint thread_no; + bool first; /*!< is position before first space */ + ulint space; /*!< current space */ + ulint offset; /*!< current offset */ + ulint batch; /*!< #pages to rotate */ + uint min_key_version_found;/*!< min key version found but not rotated */ + lsn_t end_lsn; /*!< max lsn when rotating this space */ + + uint estimated_max_iops; /*!< estimation of max iops */ + uint allocated_iops; /*!< allocated iops */ + uint cnt_waited; /*!< #times waited during this slot */ + uint sum_waited_us; /*!< wait time during this slot */ + + fil_crypt_stat_t crypt_stat; // statistics + + btr_scrub_t scrub_data; /* thread local data used by btr_scrub-functions + * when iterating pages of tablespace */ + + /* check if this thread should shutdown */ + bool should_shutdown() const { + return ! (srv_shutdown_state == SRV_SHUTDOWN_NONE && + thread_no < srv_n_fil_crypt_threads); + } +}; + +/*********************************************************************** +Update global statistics with thread statistics */ +static void +fil_crypt_update_total_stat(rotate_thread_t *state) +{ + mutex_enter(&crypt_stat_mutex); + crypt_stat.pages_read_from_cache += + state->crypt_stat.pages_read_from_cache; + crypt_stat.pages_read_from_disk += + state->crypt_stat.pages_read_from_disk; + crypt_stat.pages_modified += state->crypt_stat.pages_modified; + crypt_stat.pages_flushed += state->crypt_stat.pages_flushed; + // remote old estimate + crypt_stat.estimated_iops -= state->crypt_stat.estimated_iops; + // add new estimate + crypt_stat.estimated_iops += state->estimated_max_iops; + mutex_exit(&crypt_stat_mutex); + + // make new estimate "current" estimate + memset(&state->crypt_stat, 0, sizeof(state->crypt_stat)); + // record our old (current) estimate + state->crypt_stat.estimated_iops = state->estimated_max_iops; +} + +/*********************************************************************** +Allocate iops to thread from global setting, +used before starting to rotate a space */ +static bool +fil_crypt_alloc_iops(rotate_thread_t *state) +{ + ut_ad(state->allocated_iops == 0); + + uint max_iops = state->estimated_max_iops; + mutex_enter(&fil_crypt_threads_mutex); + if (n_fil_crypt_iops_allocated >= srv_n_fil_crypt_iops) { + /* this can happen when user decreases srv_fil_crypt_iops */ + mutex_exit(&fil_crypt_threads_mutex); + return false; + } + + uint alloc = srv_n_fil_crypt_iops - n_fil_crypt_iops_allocated; + if (alloc > max_iops) + alloc = max_iops; + + n_fil_crypt_iops_allocated += alloc; + mutex_exit(&fil_crypt_threads_mutex); + + state->allocated_iops = alloc; + + return alloc > 0; +} + +/*********************************************************************** +Reallocate iops to thread, +used when inside a space */ +static void +fil_crypt_realloc_iops(rotate_thread_t *state) +{ + ut_a(state->allocated_iops > 0); + + if (10 * state->cnt_waited > state->batch) { + /* if we waited more than 10% re-estimate max_iops */ + uint avg_wait_time_us = + state->sum_waited_us / state->cnt_waited; + +#if DEBUG_KEYROTATION_THROTTLING + fprintf(stderr, + "thr_no: %u - update estimated_max_iops from %u to %u\n", + state->thread_no, + state->estimated_max_iops, + 1000000 / avg_wait_time_us); +#endif + if (avg_wait_time_us == 0) + avg_wait_time_us = 1; // prevent division by zero + + state->estimated_max_iops = 1000000 / avg_wait_time_us; + state->cnt_waited = 0; + state->sum_waited_us = 0; + } else { +#if DEBUG_KEYROTATION_THROTTLING + fprintf(stderr, + "thr_no: %u only waited %lu%% skip re-estimate\n", + state->thread_no, + (100 * state->cnt_waited) / state->batch); +#endif + } + + if (state->estimated_max_iops <= state->allocated_iops) { + /* return extra iops */ + uint extra = state->allocated_iops - state->estimated_max_iops; + + if (extra > 0) { + mutex_enter(&fil_crypt_threads_mutex); + if (n_fil_crypt_iops_allocated < extra) { + /* unknown bug! + * crash in debug + * keep n_fil_crypt_iops_allocated unchanged + * in release */ + ut_ad(0); + extra = 0; + } + n_fil_crypt_iops_allocated -= extra; + state->allocated_iops -= extra; + + if (state->allocated_iops == 0) { + /* no matter how slow io system seems to be + * never decrease allocated_iops to 0... */ + state->allocated_iops ++; + n_fil_crypt_iops_allocated ++; + } + mutex_exit(&fil_crypt_threads_mutex); + os_event_set(fil_crypt_threads_event); + } + } else { + /* see if there are more to get */ + mutex_enter(&fil_crypt_threads_mutex); + if (n_fil_crypt_iops_allocated < srv_n_fil_crypt_iops) { + /* there are extra iops free */ + uint extra = srv_n_fil_crypt_iops - + n_fil_crypt_iops_allocated; + if (state->allocated_iops + extra > + state->estimated_max_iops) { + /* but don't alloc more than our max */ + extra = state->estimated_max_iops - + state->allocated_iops; + } + n_fil_crypt_iops_allocated += extra; + state->allocated_iops += extra; +#if DEBUG_KEYROTATION_THROTTLING + fprintf(stderr, + "thr_no: %u increased iops from %u to %u\n", + state->thread_no, + state->allocated_iops - extra, + state->allocated_iops); +#endif + } + mutex_exit(&fil_crypt_threads_mutex); + } + + fil_crypt_update_total_stat(state); +} + +/*********************************************************************** +Return allocated iops to global */ +static void +fil_crypt_return_iops(rotate_thread_t *state) +{ + if (state->allocated_iops > 0) { + uint iops = state->allocated_iops; + mutex_enter(&fil_crypt_threads_mutex); + if (n_fil_crypt_iops_allocated < iops) { + /* unknown bug! + * crash in debug + * keep n_fil_crypt_iops_allocated unchanged + * in release */ + ut_ad(0); + iops = 0; + } + n_fil_crypt_iops_allocated -= iops; + mutex_exit(&fil_crypt_threads_mutex); + state->allocated_iops = 0; + os_event_set(fil_crypt_threads_event); + } + + fil_crypt_update_total_stat(state); +} + +/*********************************************************************** +Search for a space needing rotation */ +bool +fil_crypt_find_space_to_rotate( + const key_state_t *key_state, + rotate_thread_t *state, + bool *recheck) +{ + /* we need iops to start rotating */ + while (!state->should_shutdown() && !fil_crypt_alloc_iops(state)) { + os_event_reset(fil_crypt_threads_event); + os_event_wait_time(fil_crypt_threads_event, 1000000); + } + + if (state->should_shutdown()) + return false; + + if (state->first) { + state->first = false; + state->space = fil_get_first_space(); + } else { + state->space = fil_get_next_space(state->space); + } + + while (!state->should_shutdown() && state->space != ULINT_UNDEFINED) { + + ulint space = state->space; + if (fil_crypt_space_needs_rotation(space, key_state, recheck)) { + /* init state->min_key_version_found before + * starting on a space */ + state->min_key_version_found = key_state->key_version; + return true; + } + + state->space = fil_get_next_space(space); + } + + /* if we didn't find any space return iops */ + fil_crypt_return_iops(state); + + return false; + +} + +/*********************************************************************** +Start rotating a space */ +static +void +fil_crypt_start_rotate_space( + const key_state_t *key_state, + rotate_thread_t *state) +{ + ulint space = state->space; + fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space); + mutex_enter(&crypt_data->mutex); + if (crypt_data->rotate_state.active_threads == 0) { + /* only first thread needs to init */ + crypt_data->rotate_state.next_offset = 1; // skip page 0 + /* no need to rotate beyond current max + * if space extends, it will be encrypted with newer version */ + crypt_data->rotate_state.max_offset = fil_space_get_size(space); + + crypt_data->rotate_state.end_lsn = 0; + crypt_data->rotate_state.min_key_version_found = + key_state->key_version; + + crypt_data->rotate_state.start_time = time(0); + } + + /* count active threads in space */ + crypt_data->rotate_state.active_threads++; + + /* Initialize thread local state */ + state->end_lsn = crypt_data->rotate_state.end_lsn; + state->min_key_version_found = + crypt_data->rotate_state.min_key_version_found; + + /* inform scrubbing */ + crypt_data->rotate_state.scrubbing.is_active = + btr_scrub_start_space(space, &state->scrub_data); + + mutex_exit(&crypt_data->mutex); +} + +/*********************************************************************** +Search for batch of pages needing rotation */ +static +bool +fil_crypt_find_page_to_rotate( + const key_state_t *key_state, + rotate_thread_t *state) +{ + ulint batch = srv_alloc_time * state->allocated_iops; + ulint space = state->space; + fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space); + mutex_enter(&crypt_data->mutex); + if (crypt_data->closing == false && + crypt_data->rotate_state.next_offset < + crypt_data->rotate_state.max_offset) { + + state->offset = crypt_data->rotate_state.next_offset; + ulint remaining = crypt_data->rotate_state.max_offset - + crypt_data->rotate_state.next_offset; + + if (batch <= remaining) + state->batch = batch; + else + state->batch = remaining; + + crypt_data->rotate_state.next_offset += batch; + mutex_exit(&crypt_data->mutex); + return true; + } + + mutex_exit(&crypt_data->mutex); + return false; +} + +/*********************************************************************** +Check if a page is uninitialized (doesn't need to be rotated) */ +static bool +fil_crypt_is_page_uninitialized(const byte* frame, uint zip_size) +{ + if (zip_size) { + ulint stored_checksum = mach_read_from_4( + frame + FIL_PAGE_SPACE_OR_CHKSUM); + /* empty pages aren't encrypted */ + if (stored_checksum == 0) { + return true; + } + } else { + ulint size = UNIV_PAGE_SIZE; + ulint checksum_field1 = mach_read_from_4( + frame + FIL_PAGE_SPACE_OR_CHKSUM); + ulint checksum_field2 = mach_read_from_4( + frame + size - FIL_PAGE_END_LSN_OLD_CHKSUM); + /* empty pages are not encrypted */ + if (checksum_field1 == 0 && checksum_field2 == 0 + && mach_read_from_4(frame + FIL_PAGE_LSN) == 0) { + return true; + } + } + return false; +} + +#define fil_crypt_get_page_throttle(state,space,zip_size,offset,mtr,sleeptime_ms) \ + fil_crypt_get_page_throttle_func(state, space, zip_size, offset, mtr, \ + sleeptime_ms, __FILE__, __LINE__) + +/*********************************************************************** +Get a page and compute sleep time */ +static +buf_block_t* +fil_crypt_get_page_throttle_func(rotate_thread_t *state, + ulint space, uint zip_size, ulint offset, + mtr_t *mtr, + ulint *sleeptime_ms, + const char *file, + ulint line) +{ + buf_block_t* block = buf_page_try_get_func(space, offset, RW_X_LATCH, + true, + file, line, mtr); + if (block != NULL) { + /* page was in buffer pool */ + state->crypt_stat.pages_read_from_cache++; + return block; + } + + state->crypt_stat.pages_read_from_disk++; + + ullint start = ut_time_us(NULL); + block = buf_page_get_gen(space, zip_size, offset, + RW_X_LATCH, + NULL, BUF_GET_POSSIBLY_FREED, + file, line, mtr); + ullint end = ut_time_us(NULL); + + if (end < start) { + end = start; // safety... + } + + state->cnt_waited++; + state->sum_waited_us += (end - start); + + /* average page load */ + ulint add_sleeptime_ms = 0; + ulint avg_wait_time_us = state->sum_waited_us / state->cnt_waited; + ulint alloc_wait_us = 1000000 / state->allocated_iops; + if (avg_wait_time_us < alloc_wait_us) { + /* we reading faster than we allocated */ + add_sleeptime_ms = (alloc_wait_us - avg_wait_time_us) / 1000; + } else { + /* if page load time is longer than we want, skip sleeping */ + } + + *sleeptime_ms += add_sleeptime_ms; + return block; +} + + +/*********************************************************************** +Get block and allocation status + +note: innodb locks fil_space_latch and then block when allocating page +but locks block and then fil_space_latch when freeing page. +*/ +static +buf_block_t* +btr_scrub_get_block_and_allocation_status( + rotate_thread_t *state, + ulint space, + ulint zip_size, + ulint offset, + mtr_t *mtr, + btr_scrub_page_allocation_status_t *allocation_status, + ulint *sleeptime_ms) +{ + mtr_t local_mtr; + buf_block_t *block = NULL; + mtr_start(&local_mtr); + *allocation_status = fsp_page_is_free(space, offset, &local_mtr) ? + BTR_SCRUB_PAGE_FREE : + BTR_SCRUB_PAGE_ALLOCATED; + + if (*allocation_status == BTR_SCRUB_PAGE_FREE) { + /* this is easy case, we lock fil_space_latch first and + then block */ + block = fil_crypt_get_page_throttle(state, + space, zip_size, + offset, mtr, + sleeptime_ms); + mtr_commit(&local_mtr); + } else { + /* page is allocated according to xdes */ + + /* release fil_space_latch *before* fetching block */ + mtr_commit(&local_mtr); + + /* NOTE: when we have locked dict_index_get_lock(), + * it's safe to release fil_space_latch and then fetch block + * as dict_index_get_lock() is needed to make tree modifications + * such as free-ing a page + */ + + block = fil_crypt_get_page_throttle(state, + space, zip_size, + offset, mtr, + sleeptime_ms); + } + + return block; +} + + +/*********************************************************************** +Rotate one page */ +static +void +fil_crypt_rotate_page( + const key_state_t *key_state, + rotate_thread_t *state) +{ + ulint space = state->space; + ulint offset = state->offset; + const uint zip_size = fil_space_get_zip_size(space); + ulint sleeptime_ms = 0; + + /* check if tablespace is closing before reading page */ + if (fil_crypt_is_closing(space)) + return; + + if (space == TRX_SYS_SPACE && offset == TRX_SYS_PAGE_NO) { + /* don't encrypt this as it contains address to dblwr buffer */ + return; + } + + mtr_t mtr; + mtr_start(&mtr); + buf_block_t* block = fil_crypt_get_page_throttle(state, + space, zip_size, + offset, &mtr, + &sleeptime_ms); + + bool modified = false; + int needs_scrubbing = BTR_SCRUB_SKIP_PAGE; + lsn_t block_lsn = block->page.newest_modification; + uint kv = block->page.key_version; + + /* check if tablespace is closing after reading page */ + if (!fil_crypt_is_closing(space)) { + byte* frame = buf_block_get_frame(block); + fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space); + + if (kv == 0 && + fil_crypt_is_page_uninitialized(frame, zip_size)) { + ; + } else if (fil_crypt_needs_rotation(kv, key_state)) { + + /* page can be "fresh" i.e never written in case + * kv == 0 or it should have a key version at least + * as big as the space minimum key version*/ + ut_a(kv == 0 || kv >= crypt_data->min_key_version); + + modified = true; + + /* force rotation by dummy updating page */ + mlog_write_ulint(frame + + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, + space, MLOG_4BYTES, &mtr); + + /* update block */ + block->page.key_version = key_state->key_version; + + /* statistics */ + state->crypt_stat.pages_modified++; + } else { + ut_a(kv >= crypt_data->min_key_version || + (kv == 0 && key_state->key_version == 0)); + + if (kv < state->min_key_version_found) { + state->min_key_version_found = kv; + } + } + + needs_scrubbing = btr_page_needs_scrubbing( + &state->scrub_data, block, + BTR_SCRUB_PAGE_ALLOCATION_UNKNOWN); + } + + mtr_commit(&mtr); + lsn_t end_lsn = mtr.end_lsn; + + if (needs_scrubbing == BTR_SCRUB_PAGE) { + mtr_start(&mtr); + /* + * refetch page and allocation status + */ + btr_scrub_page_allocation_status_t allocated; + block = btr_scrub_get_block_and_allocation_status( + state, space, zip_size, offset, &mtr, + &allocated, + &sleeptime_ms); + + /* get required table/index and index-locks */ + needs_scrubbing = btr_scrub_recheck_page( + &state->scrub_data, block, allocated, &mtr); + + if (needs_scrubbing == BTR_SCRUB_PAGE) { + /* we need to refetch it once more now that we have + * index locked */ + block = btr_scrub_get_block_and_allocation_status( + state, space, zip_size, offset, &mtr, + &allocated, + &sleeptime_ms); + + needs_scrubbing = btr_scrub_page(&state->scrub_data, + block, allocated, + &mtr); + } + + /* NOTE: mtr is committed inside btr_scrub_recheck_page() + * and/or btr_scrub_page. This is to make sure that + * locks & pages are latched in corrected order, + * the mtr is in some circumstances restarted. + * (mtr_commit() + mtr_start()) + */ + } + + if (needs_scrubbing != BTR_SCRUB_PAGE) { + /* if page didn't need scrubbing it might be that cleanups + are needed. do those outside of any mtr to prevent deadlocks. + + the information what kinds of cleanups that are needed are + encoded inside the needs_scrubbing, but this is opaque to + this function (except the value BTR_SCRUB_PAGE) */ + btr_scrub_skip_page(&state->scrub_data, needs_scrubbing); + } + + if (needs_scrubbing == BTR_SCRUB_TURNED_OFF) { + /* if we just detected that scrubbing was turned off + * update global state to reflect this */ + fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space); + mutex_enter(&crypt_data->mutex); + crypt_data->rotate_state.scrubbing.is_active = false; + mutex_exit(&crypt_data->mutex); + } + + if (modified) { + /* if we modified page, we take lsn from mtr */ + ut_a(end_lsn > state->end_lsn); + ut_a(end_lsn > block_lsn); + state->end_lsn = end_lsn; + } else { + /* if we did not modify page, check for max lsn */ + if (block_lsn > state->end_lsn) { + state->end_lsn = block_lsn; + } + } + + if (sleeptime_ms) { + os_event_reset(fil_crypt_throttle_sleep_event); + os_event_wait_time(fil_crypt_throttle_sleep_event, + 1000 * sleeptime_ms); + } +} + +/*********************************************************************** +Rotate a batch of pages */ +static +void +fil_crypt_rotate_pages( + const key_state_t *key_state, + rotate_thread_t *state) +{ + ulint space = state->space; + ulint end = state->offset + state->batch; + for (; state->offset < end; state->offset++) { + + /* we can't rotate pages in dblwr buffer as + * it's not possible to read those due to lots of asserts + * in buffer pool. + * + * However since these are only (short-lived) copies of + * real pages, they will be updated anyway when the + * real page is updated + */ + if (space == TRX_SYS_SPACE && + buf_dblwr_page_inside(state->offset)) { + continue; + } + + fil_crypt_rotate_page(key_state, state); + } +} + +/*********************************************************************** +Flush rotated pages and then update page 0 */ +static +void +fil_crypt_flush_space(rotate_thread_t *state, ulint space) +{ + fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space); + + /* flush tablespace pages so that there are no pages left with old key */ + lsn_t end_lsn = crypt_data->rotate_state.end_lsn; + if (end_lsn > 0 && !fil_crypt_is_closing(space)) { + bool success = false; + ulint n_pages = 0; + ulint sum_pages = 0; + ullint start = ut_time_us(NULL); + do { + success = buf_flush_list(ULINT_MAX, end_lsn, &n_pages); + buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST); + sum_pages += n_pages; + } while (!success && !fil_crypt_is_closing(space)); + ullint end = ut_time_us(NULL); + if (sum_pages && end > start) { + state->cnt_waited += sum_pages; + state->sum_waited_us += (end - start); + + /* statistics */ + state->crypt_stat.pages_flushed += sum_pages; + } + } + + if (crypt_data->min_key_version == 0) { + crypt_data->type = CRYPT_SCHEME_UNENCRYPTED; + } + + /* update page 0 */ + if (!fil_crypt_is_closing(space)) { + mtr_t mtr; + mtr_start(&mtr); + ulint offset = 0; // page 0 + const uint zip_size = fil_space_get_zip_size(space); + buf_block_t* block = buf_page_get_gen(space, zip_size, offset, + RW_X_LATCH, NULL, BUF_GET, + __FILE__, __LINE__, &mtr); + byte* frame = buf_block_get_frame(block); + fil_space_write_crypt_data(space, frame, + crypt_data->page0_offset, + ULINT_MAX, &mtr); + mtr_commit(&mtr); + } +} + +/*********************************************************************** +Complete rotating a space */ +static +void +fil_crypt_complete_rotate_space( + const key_state_t *key_state, + rotate_thread_t *state) +{ + ulint space = state->space; + fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space); + mutex_enter(&crypt_data->mutex); + + /** + * Update crypt data state with state from thread + */ + if (state->min_key_version_found < + crypt_data->rotate_state.min_key_version_found) { + crypt_data->rotate_state.min_key_version_found = + state->min_key_version_found; + } + + if (state->end_lsn > crypt_data->rotate_state.end_lsn) { + crypt_data->rotate_state.end_lsn = state->end_lsn; + } + + ut_a(crypt_data->rotate_state.active_threads > 0); + crypt_data->rotate_state.active_threads--; + bool last = crypt_data->rotate_state.active_threads == 0; + + /** + * check if space is fully done + * this as when threads shutdown, it could be that we "complete" + * iterating before we have scanned the full space. + */ + bool done = crypt_data->rotate_state.next_offset >= + crypt_data->rotate_state.max_offset; + + /** + * we should flush space if we're last thread AND + * the iteration is done + */ + bool should_flush = last && done; + + if (should_flush) { + /* we're the last active thread */ + crypt_data->rotate_state.flushing = true; + crypt_data->min_key_version = + crypt_data->rotate_state.min_key_version_found; + } + + /* inform scrubbing */ + crypt_data->rotate_state.scrubbing.is_active = false; + mutex_exit(&crypt_data->mutex); + + /* all threads must call btr_scrub_complete_space wo/ mutex held */ + if (btr_scrub_complete_space(&state->scrub_data) == true) { + if (should_flush) { + /* only last thread updates last_scrub_completed */ + mutex_enter(&crypt_data->mutex); + crypt_data->rotate_state.scrubbing. + last_scrub_completed = time(0); + mutex_exit(&crypt_data->mutex); + } + } + + if (should_flush) { + fil_crypt_flush_space(state, space); + + mutex_enter(&crypt_data->mutex); + crypt_data->rotate_state.flushing = false; + mutex_exit(&crypt_data->mutex); + } +} + +/*********************************************************************//** +A thread which monitors global key state and rotates tablespaces accordingly +@return a dummy parameter */ +extern "C" UNIV_INTERN +os_thread_ret_t +DECLARE_THREAD(fil_crypt_thread)( +/*===============================*/ + void* arg __attribute__((unused))) /*!< in: a dummy parameter required + * by os_thread_create */ +{ + UT_NOT_USED(arg); + + mutex_enter(&fil_crypt_threads_mutex); + uint thread_no = srv_n_fil_crypt_threads_started; + srv_n_fil_crypt_threads_started++; + mutex_exit(&fil_crypt_threads_mutex); + os_event_set(fil_crypt_event); /* signal that we started */ + + /* state of this thread */ + rotate_thread_t thr(thread_no); + + /* if we find a space that is starting, skip over it and recheck it later */ + bool recheck = false; + + key_state_t key_state; + fil_crypt_get_key_state(&key_state); + + /* make sure that thread always checks all tablespace when starting. + * + * by decreasing key_version, loop that waits for change in key-state + * should exit immediately causing thread to check all spaces when starting */ + key_state.key_version--; + + while (!thr.should_shutdown()) { + + key_state_t new_state; + fil_crypt_get_key_state(&new_state); + + time_t wait_start = time(0); + while (!thr.should_shutdown() && key_state == new_state) { + + /* wait for key state changes + * i.e either new key version of change or + * new rotate_key_age */ + os_event_reset(fil_crypt_threads_event); + os_event_wait_time(fil_crypt_threads_event, 1000000); + fil_crypt_get_key_state(&new_state); + + if (recheck) { + /* check recheck here, after sleep, so + * that we don't busy loop while when one thread is starting + * a space*/ + break; + } + + time_t waited = time(0) - wait_start; + if (waited >= srv_background_scrub_data_check_interval) + break; + } + + recheck = false; + thr.first = true; // restart from first tablespace + key_state = new_state; // save for next loop + + /* iterate all spaces searching for those needing rotation */ + while (!thr.should_shutdown() && + fil_crypt_find_space_to_rotate(&new_state, &thr, &recheck)) { + + /* we found a space to rotate */ + fil_crypt_start_rotate_space(&new_state, &thr); + + /* decrement pending ops that was incremented in + * fil_crypt_space_needs_rotation + * (called from fil_crypt_find_space_to_rotate), + * this makes sure that tablespace won't be dropped + * just after we decided to start processing it. */ + fil_decr_pending_ops(thr.space); + + /* iterate all pages (cooperativly with other threads) */ + while (!thr.should_shutdown() && + fil_crypt_find_page_to_rotate(&new_state, &thr)) { + + /* rotate a (set) of pages */ + fil_crypt_rotate_pages(&new_state, &thr); + + /* realloc iops */ + fil_crypt_realloc_iops(&thr); + } + + /* complete rotation */ + fil_crypt_complete_rotate_space(&new_state, &thr); + + /* refresh key state */ + fil_crypt_get_key_state(&new_state); + + /* return iops */ + fil_crypt_return_iops(&thr); + } + } + + /* return iops if shutting down */ + fil_crypt_return_iops(&thr); + + mutex_enter(&fil_crypt_threads_mutex); + srv_n_fil_crypt_threads_started--; + mutex_exit(&fil_crypt_threads_mutex); + os_event_set(fil_crypt_event); /* signal that we stopped */ + + /* We count the number of threads in os_thread_exit(). A created + thread should always use that to exit and not use return() to exit. */ + + os_thread_exit(NULL); + + OS_THREAD_DUMMY_RETURN; +} + +/********************************************************************* +Adjust thread count for key rotation */ +UNIV_INTERN +void +fil_crypt_set_thread_cnt(uint new_cnt) { + if (new_cnt > srv_n_fil_crypt_threads) { + uint add = new_cnt - srv_n_fil_crypt_threads; + srv_n_fil_crypt_threads = new_cnt; + for (uint i = 0; i < add; i++) { + os_thread_create(fil_crypt_thread, NULL, NULL); + } + } else if (new_cnt < srv_n_fil_crypt_threads) { + srv_n_fil_crypt_threads = new_cnt; + os_event_set(fil_crypt_threads_event); + } + + while(srv_n_fil_crypt_threads_started != srv_n_fil_crypt_threads) { + os_event_reset(fil_crypt_event); + os_event_wait_time(fil_crypt_event, 1000000); + } +} + +/********************************************************************* +Adjust max key age */ +UNIV_INTERN +void +fil_crypt_set_rotate_key_age(uint val) +{ + srv_fil_crypt_rotate_key_age = val; + os_event_set(fil_crypt_threads_event); +} + +/********************************************************************* +Adjust rotation iops */ +UNIV_INTERN +void +fil_crypt_set_rotation_iops(uint val) +{ + srv_n_fil_crypt_iops = val; + os_event_set(fil_crypt_threads_event); +} + +/********************************************************************* +Init threads for key rotation */ +UNIV_INTERN +void +fil_crypt_threads_init() +{ + fil_crypt_event = os_event_create(); + fil_crypt_threads_event = os_event_create(); + mutex_create(fil_crypt_threads_mutex_key, + &fil_crypt_threads_mutex, SYNC_NO_ORDER_CHECK); + + uint cnt = srv_n_fil_crypt_threads; + srv_n_fil_crypt_threads = 0; + fil_crypt_set_thread_cnt(cnt); +} + +/********************************************************************* +End threads for key rotation */ +UNIV_INTERN +void +fil_crypt_threads_end() +{ + /* stop threads */ + fil_crypt_set_thread_cnt(0); +} + +/********************************************************************* +Clean up key rotation threads resources */ +UNIV_INTERN +void +fil_crypt_threads_cleanup() { + os_event_free(fil_crypt_event); + os_event_free(fil_crypt_threads_event); +} + +/********************************************************************* +Mark a space as closing */ +UNIV_INTERN +void +fil_space_crypt_mark_space_closing( + ulint space) +{ + mutex_enter(&fil_crypt_threads_mutex); + fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space); + if (crypt_data == NULL) { + mutex_exit(&fil_crypt_threads_mutex); + return; + } + + mutex_enter(&crypt_data->mutex); + mutex_exit(&fil_crypt_threads_mutex); + crypt_data->closing = true; + mutex_exit(&crypt_data->mutex); +} + +/********************************************************************* +Wait for crypt threads to stop accessing space */ +UNIV_INTERN +void +fil_space_crypt_close_tablespace( + ulint space) +{ + mutex_enter(&fil_crypt_threads_mutex); + fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space); + if (crypt_data == NULL) { + mutex_exit(&fil_crypt_threads_mutex); + return; + } + + uint start = time(0); + uint last = start; + mutex_enter(&crypt_data->mutex); + mutex_exit(&fil_crypt_threads_mutex); + crypt_data->closing = true; + uint cnt = crypt_data->rotate_state.active_threads; + bool flushing = crypt_data->rotate_state.flushing; + while (cnt > 0 || flushing) { + mutex_exit(&crypt_data->mutex); + /* release dict mutex so that scrub threads can release their + * table references */ + dict_mutex_exit_for_mysql(); + /* wakeup throttle (all) sleepers */ + os_event_set(fil_crypt_throttle_sleep_event); + os_thread_sleep(20000); + dict_mutex_enter_for_mysql(); + mutex_enter(&crypt_data->mutex); + cnt = crypt_data->rotate_state.active_threads; + flushing = crypt_data->rotate_state.flushing; + + uint now = time(0); + if (now >= last + 30) { + fprintf(stderr, + "WARNING: " + "waited %u seconds to drop space: %lu\n", + now - start, space); + last = now; + } + } + mutex_exit(&crypt_data->mutex); +} + +/********************************************************************* +Get crypt status for a space (used by information_schema) +return 0 if crypt data present */ +int +fil_space_crypt_get_status( +/*==================*/ + ulint id, /*!< in: space id */ + struct fil_space_crypt_status_t* status) /*!< out: status */ +{ + fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(id); + + if (crypt_data != NULL) { + status->space = id; + status->scheme = crypt_data->type; + mutex_enter(&crypt_data->mutex); + status->keyserver_requests = crypt_data->keyserver_requests; + status->min_key_version = crypt_data->min_key_version; + if (crypt_data->rotate_state.active_threads > 0 || + crypt_data->rotate_state.flushing) { + status->rotating = true; + status->flushing = + crypt_data->rotate_state.flushing; + status->rotate_next_page_number = + crypt_data->rotate_state.next_offset; + status->rotate_max_page_number = + crypt_data->rotate_state.max_offset; + } else { + status->rotating = false; + } + mutex_exit(&crypt_data->mutex); + } else { + memset(status, 0, sizeof(*status)); + } + + if (srv_encrypt_tables == TRUE) { + status->current_key_version = GetLatestCryptoKeyVersion(); + } else { + status->current_key_version = 0; + } + return crypt_data == NULL ? 1 : 0; +} + +/********************************************************************* +Return crypt statistics */ +void +fil_crypt_total_stat(fil_crypt_stat_t *stat) +{ + mutex_enter(&crypt_stat_mutex); + *stat = crypt_stat; + mutex_exit(&crypt_stat_mutex); +} + +/********************************************************************* +Get scrub status for a space (used by information_schema) +return 0 if data found */ +int +fil_space_get_scrub_status( +/*==================*/ + ulint id, /*!< in: space id */ + struct fil_space_scrub_status_t* status) /*!< out: status */ +{ + fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(id); + memset(status, 0, sizeof(*status)); + if (crypt_data != NULL) { + status->space = id; + status->compressed = fil_space_get_zip_size(id) > 0; + mutex_enter(&crypt_data->mutex); + status->last_scrub_completed = + crypt_data->rotate_state.scrubbing.last_scrub_completed; + if (crypt_data->rotate_state.active_threads > 0 && + crypt_data->rotate_state.scrubbing.is_active) { + status->scrubbing = true; + status->current_scrub_started = + crypt_data->rotate_state.start_time; + status->current_scrub_active_threads = + crypt_data->rotate_state.active_threads; + status->current_scrub_page_number = + crypt_data->rotate_state.next_offset; + status->current_scrub_max_page_number = + crypt_data->rotate_state.max_offset; + } else { + status->scrubbing = false; + } + mutex_exit(&crypt_data->mutex); + } else { + memset(status, 0, sizeof(*status)); + } + + return crypt_data == NULL ? 1 : 0; +} diff --git a/storage/xtradb/fil/fil0fil.cc b/storage/xtradb/fil/fil0fil.cc index 08487f595ed..e4be4f6910c 100644 --- a/storage/xtradb/fil/fil0fil.cc +++ b/storage/xtradb/fil/fil0fil.cc @@ -56,6 +56,10 @@ Created 10/25/1995 Heikki Tuuri static ulint srv_data_read, srv_data_written; #endif /* !UNIV_HOTBACKUP */ #include "fil0pagecompress.h" + +#include "fil0pageencryption.h" +#include "fsp0pageencryption.h" + #include "zlib.h" #ifdef __linux__ #include <linux/fs.h> @@ -645,8 +649,23 @@ fil_node_open_file( success = os_file_read(node->handle, page, 0, UNIV_PAGE_SIZE, space->flags); + if (fil_page_encryption_status(page)) { + /* if page is (still) encrypted, write an error and return. + * Otherwise the server would crash if decrypting is not possible. + * This may be the case, if the key file could not be + * opened on server startup. + */ + ib_logf(IB_LOG_LEVEL_ERROR, + "InnoDB: can not decrypt page, because " + "keys could not be read.\n" + ); + return false; + + } + space_id = fsp_header_get_space_id(page); flags = fsp_header_get_flags(page); + page_size = fsp_flags_get_page_size(flags); atomic_writes = fsp_flags_get_atomic_writes(flags); @@ -1157,7 +1176,8 @@ fil_space_create( const char* name, /*!< in: space name */ ulint id, /*!< in: space id */ ulint flags, /*!< in: tablespace flags */ - ulint purpose)/*!< in: FIL_TABLESPACE, or FIL_LOG if log */ + ulint purpose,/*!< in: FIL_TABLESPACE, or FIL_LOG if log */ + fil_space_crypt_t* crypt_data) /*!< in: crypt data */ { fil_space_t* space; @@ -1165,6 +1185,21 @@ fil_space_create( ut_a(fil_system); + if (fsp_flags_is_page_encrypted(flags)) { + if (!HasCryptoKey(fsp_flags_get_page_encryption_key(flags))) { + /* by returning here it should be avoided that + * the server crashes, if someone tries to access an + * encrypted table and the encryption key is not available. + * The the table is treaded as non-existent. + */ + ib_logf(IB_LOG_LEVEL_WARN, + "Tablespace '%s' can not be opened, because " + " encryption key can not be found (space id: %lu, key %lu)\n" + , name, (ulong) id, fsp_flags_get_page_encryption_key(flags)); + return (FALSE); + } + } + /* Look for a matching tablespace and if found free it. */ do { mutex_enter(&fil_system->mutex); @@ -1253,6 +1288,8 @@ fil_space_create( UT_LIST_ADD_LAST(space_list, fil_system->space_list, space); + space->crypt_data = crypt_data; + mutex_exit(&fil_system->mutex); return(TRUE); @@ -1387,6 +1424,8 @@ fil_space_free( rw_lock_free(&(space->latch)); + fil_space_destroy_crypt_data(&(space->crypt_data)); + mem_free(space->name); mem_free(space); @@ -1620,6 +1659,8 @@ fil_init( UT_LIST_INIT(fil_system->LRU); fil_system->max_n_open = max_n_open; + + fil_space_crypt_init(); } /*******************************************************************//** @@ -1827,7 +1868,8 @@ fil_write_lsn_and_arch_no_to_file( err = fil_read(TRUE, space, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL, 0); if (err == DB_SUCCESS) { - mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn); + mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, + lsn); err = fil_write(TRUE, space, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL, 0); @@ -1909,6 +1951,7 @@ fil_check_first_page( { ulint space_id; ulint flags; + ulint page_is_encrypted; if (srv_force_recovery >= SRV_FORCE_IGNORE_CORRUPT) { return(NULL); @@ -1916,12 +1959,23 @@ fil_check_first_page( space_id = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_ID + page); flags = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page); - - if (UNIV_PAGE_SIZE != fsp_flags_get_page_size(flags)) { - fprintf(stderr, "InnoDB: Error: Current page size %lu != page size on page %lu\n", - UNIV_PAGE_SIZE, fsp_flags_get_page_size(flags)); - - return("innodb-page-size mismatch"); + /* Note: the 1st page is usually not encrypted. If the Key Provider + or the encryption key is not available, the + check for reading the first page should intentionally fail + with "can not decrypt" message. */ + page_is_encrypted = fil_page_encryption_status(page); + if (page_is_encrypted == PAGE_ENCRYPTION_KEY_MISSING && page_is_encrypted) { + page_is_encrypted = 1; + } else { + page_is_encrypted = 0; + if (UNIV_PAGE_SIZE != fsp_flags_get_page_size(flags)) { + fprintf(stderr, + "InnoDB: Error: Current page size %lu != " + " page size on page %lu\n", + UNIV_PAGE_SIZE, fsp_flags_get_page_size(flags)); + + return("innodb-page-size mismatch"); + } } if (!space_id && !flags) { @@ -1937,9 +1991,17 @@ fil_check_first_page( } } - if (buf_page_is_corrupted( + if (!page_is_encrypted && buf_page_is_corrupted( false, page, fsp_flags_get_zip_size(flags))) { return("checksum mismatch"); + } else { + if (page_is_encrypted) { + /* this error message is interpreted by the calling method, which is + * executed if the server starts in recovery mode. + */ + return(MSG_CANNOT_DECRYPT); + + } } if (page_get_space_id(page) == space_id @@ -1969,8 +2031,9 @@ fil_read_first_page( lsn values in data files */ lsn_t* max_flushed_lsn, /*!< out: max of flushed lsn values in data files */ - ulint orig_space_id) /*!< in: original file space + ulint orig_space_id, /*!< in: original file space id */ + fil_space_crypt_t** crypt_data) /*< out: crypt data */ { byte* buf; byte* page; @@ -2008,7 +2071,16 @@ fil_read_first_page( check_msg = fil_check_first_page(page); } - flushed_lsn = mach_read_from_8(page + FIL_PAGE_FILE_FLUSH_LSN); + flushed_lsn = mach_read_from_8(page + + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); + + if (crypt_data) { + ulint space = fsp_header_get_space_id(page); + ulint offset = + fsp_header_get_crypt_offset( + fsp_flags_get_zip_size(*flags), NULL); + *crypt_data = fil_space_read_crypt_data(space, page, offset); + } ut_free(buf); @@ -2487,6 +2559,9 @@ fil_check_pending_operations( *space = 0; + /* Wait for crypt threads to stop accessing space */ + fil_space_crypt_close_tablespace(id); + mutex_enter(&fil_system->mutex); fil_space_t* sp = fil_space_get_by_id(id); if (sp) { @@ -3468,7 +3543,8 @@ fil_create_new_single_table_tablespace( } } - success = fil_space_create(tablename, space_id, flags, FIL_TABLESPACE); + success = fil_space_create(tablename, space_id, flags, FIL_TABLESPACE, + fil_space_create_crypt_data()); if (!success || !fil_node_create(path, size, space_id, FALSE)) { err = DB_ERROR; goto error_exit_1; @@ -3596,6 +3672,7 @@ fil_open_single_table_tablespace( ulint tablespaces_found = 0; ulint valid_tablespaces_found = 0; ulint atomic_writes = 0; + fil_space_crypt_t* crypt_data = NULL; #ifdef UNIV_SYNC_DEBUG ut_ad(!fix_dict || rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); @@ -3694,7 +3771,7 @@ fil_open_single_table_tablespace( if (def.success) { def.check_msg = fil_read_first_page( def.file, FALSE, &def.flags, &def.id, - &def.lsn, &def.lsn, id); + &def.lsn, &def.lsn, id, &def.crypt_data); def.valid = !def.check_msg; /* Validate this single-table-tablespace with SYS_TABLES, @@ -3716,7 +3793,7 @@ fil_open_single_table_tablespace( if (remote.success) { remote.check_msg = fil_read_first_page( remote.file, FALSE, &remote.flags, &remote.id, - &remote.lsn, &remote.lsn, id); + &remote.lsn, &remote.lsn, id, &remote.crypt_data); remote.valid = !remote.check_msg; /* Validate this single-table-tablespace with SYS_TABLES, @@ -3739,7 +3816,7 @@ fil_open_single_table_tablespace( if (dict.success) { dict.check_msg = fil_read_first_page( dict.file, FALSE, &dict.flags, &dict.id, - &dict.lsn, &dict.lsn, id); + &dict.lsn, &dict.lsn, id, &dict.crypt_data); dict.valid = !dict.check_msg; /* Validate this single-table-tablespace with SYS_TABLES, @@ -3892,9 +3969,17 @@ fil_open_single_table_tablespace( } skip_validate: + if (remote.success) + crypt_data = remote.crypt_data; + else if (dict.success) + crypt_data = dict.crypt_data; + else if (def.success) + crypt_data = def.crypt_data; + if (err != DB_SUCCESS) { ; // Don't load the tablespace into the cache - } else if (!fil_space_create(tablename, id, flags, FIL_TABLESPACE)) { + } else if (!fil_space_create(tablename, id, flags, FIL_TABLESPACE, + crypt_data)) { err = DB_ERROR; } else { /* We do not measure the size of the file, that is why @@ -3914,15 +3999,25 @@ cleanup_and_exit: if (remote.filepath) { mem_free(remote.filepath); } + if (remote.crypt_data && remote.crypt_data != crypt_data) { + fil_space_destroy_crypt_data(&remote.crypt_data); + } if (dict.success) { os_file_close(dict.file); } if (dict.filepath) { mem_free(dict.filepath); } + if (dict.crypt_data && dict.crypt_data != crypt_data) { + fil_space_destroy_crypt_data(&dict.crypt_data); + } if (def.success) { os_file_close(def.file); } + if (def.crypt_data && def.crypt_data != crypt_data) { + fil_space_destroy_crypt_data(&def.crypt_data); + } + mem_free(def.filepath); return(err); @@ -4139,13 +4234,22 @@ fil_validate_single_table_tablespace( check_first_page: fsp->success = TRUE; + fsp->encryption_error = 0; if (const char* check_msg = fil_read_first_page( fsp->file, FALSE, &fsp->flags, &fsp->id, - &fsp->lsn, &fsp->lsn, ULINT_UNDEFINED)) { + &fsp->lsn, &fsp->lsn, ULINT_UNDEFINED, &fsp->crypt_data)) { ib_logf(IB_LOG_LEVEL_ERROR, "%s in tablespace %s (table %s)", check_msg, fsp->filepath, tablename); fsp->success = FALSE; + if (strncmp(check_msg, MSG_CANNOT_DECRYPT, strlen(check_msg))==0) { + /* by returning here, it should be avoided, that the server crashes, + * if started in recovery mode and can not decrypt tables, if + * the key file can not be read. + */ + fsp->encryption_error = 1; + return; + } } if (!fsp->success) { @@ -4299,6 +4403,14 @@ fil_load_single_table_tablespace( } if (!def.success && !remote.success) { + + if (def.encryption_error || remote.encryption_error) { + fprintf(stderr, + "InnoDB: Error: could not open single-table" + " tablespace file %s. Encryption error!\n", def.filepath); + return; + } + /* The following call prints an error message */ os_file_get_last_error(true); fprintf(stderr, @@ -4482,7 +4594,8 @@ will_not_choose: mutex_exit(&fil_system->mutex); #endif /* UNIV_HOTBACKUP */ ibool file_space_create_success = fil_space_create( - tablename, fsp->id, fsp->flags, FIL_TABLESPACE); + tablename, fsp->id, fsp->flags, FIL_TABLESPACE, + fsp->crypt_data); if (!file_space_create_success) { if (srv_force_recovery > 0) { @@ -5133,7 +5246,7 @@ retry: success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC, node->name, node->handle, buf, offset, page_size * n_pages, - node, NULL, space_id, NULL, 0, 0, 0); + node, NULL, space_id, NULL, 0, 0, 0, 0, 0); #endif /* UNIV_HOTBACKUP */ if (success) { os_has_said_disk_full = FALSE; @@ -5526,6 +5639,8 @@ _fil_io( ibool ignore_nonexistent_pages; ibool page_compressed = FALSE; ulint page_compression_level = 0; + ibool page_encrypted; + ulint page_encryption_key; is_log = type & OS_FILE_LOG; type = type & ~OS_FILE_LOG; @@ -5595,6 +5710,11 @@ _fil_io( page_compressed = fsp_flags_is_page_compressed(space->flags); page_compression_level = fsp_flags_get_page_compression_level(space->flags); + + page_encrypted = fsp_flags_is_page_encrypted(space->flags); + page_encryption_key = fsp_flags_get_page_encryption_key(space->flags); + + /* If we are deleting a tablespace we don't allow any read operations on that. However, we do allow write operations. */ if (space == 0 || (type == OS_FILE_READ && space->stop_new_ops)) { @@ -5739,9 +5859,23 @@ _fil_io( } /* Queue the aio request */ - ret = os_aio(type, mode | wake_later, node->name, node->handle, buf, - offset, len, node, message, space_id, trx, - page_compressed, page_compression_level, write_size); + ret = os_aio( + type, + mode | wake_later, + node->name, + node->handle, + buf, + offset, + len, + node, + message, + space_id, + trx, + page_compressed, + page_compression_level, + write_size, + page_encrypted, + page_encryption_key); #else /* In mysqlbackup do normal i/o, not aio */ @@ -6180,6 +6314,8 @@ void fil_close(void) /*===========*/ { + fil_space_crypt_cleanup(); + #ifndef UNIV_HOTBACKUP /* The mutex should already have been freed. */ ut_ad(fil_system->mutex.magic_n == 0); @@ -6229,6 +6365,8 @@ struct fil_iterator_t { ulint n_io_buffers; /*!< Number of pages to use for IO */ byte* io_buffer; /*!< Buffer to use for IO */ + fil_space_crypt_t *crypt_data; /*!< Crypt data (if encrypted) */ + byte* crypt_io_buffer; /*!< IO buffer when encrypted */ }; /********************************************************************//** @@ -6291,7 +6429,12 @@ fil_iterate( ut_ad(n_bytes > 0); ut_ad(!(n_bytes % iter.page_size)); - if (!os_file_read(iter.file, io_buffer, offset, + byte* readptr = io_buffer; + if (iter.crypt_data != NULL) { + readptr = iter.crypt_io_buffer; + } + + if (!os_file_read(iter.file, readptr, offset, (ulint) n_bytes, fil_space_is_page_compressed(space_id))) { @@ -6306,6 +6449,18 @@ fil_iterate( for (ulint i = 0; i < n_pages_read; ++i) { + if (iter.crypt_data != NULL) { + bool decrypted = fil_space_decrypt( + iter.crypt_data, + readptr + i * iter.page_size, // src + iter.page_size, + io_buffer + i * iter.page_size); // dst + if (decrypted) { + /* write back unencrypted page */ + updated = true; + } + } + buf_block_set_file_page(block, space_id, page_no++); dberr_t err; @@ -6448,6 +6603,13 @@ fil_tablespace_iterate( iter.n_io_buffers = n_io_buffers; iter.page_size = callback.get_page_size(); + ulint crypt_data_offset = fsp_header_get_crypt_offset( + callback.get_zip_size(), 0); + + /* read (optional) crypt data */ + iter.crypt_data = fil_space_read_crypt_data( + 0, page, crypt_data_offset); + /* Compressed pages can't be optimised for block IO for now. We do the IMPORT page by page. */ @@ -6456,6 +6618,14 @@ fil_tablespace_iterate( ut_a(iter.page_size == callback.get_zip_size()); } + /** If tablespace is encrypted, it needs extra buffers */ + if (iter.crypt_data != NULL) { + /* decrease io buffers so that memory + * consumption doesnt double + * note: the +1 is to avoid n_io_buffers getting down to 0 */ + iter.n_io_buffers = (iter.n_io_buffers + 1) / 2; + } + /** Add an extra page for compressed page scratch area. */ void* io_buffer = mem_alloc( @@ -6464,9 +6634,45 @@ fil_tablespace_iterate( iter.io_buffer = static_cast<byte*>( ut_align(io_buffer, UNIV_PAGE_SIZE)); + void* crypt_io_buffer = NULL; + if (iter.crypt_data != NULL) { + crypt_io_buffer = mem_alloc( + iter.n_io_buffers * UNIV_PAGE_SIZE); + iter.crypt_io_buffer = static_cast<byte*>( + crypt_io_buffer); + } + err = fil_iterate(iter, &block, callback); mem_free(io_buffer); + + if (iter.crypt_data != NULL) { + /* clear crypt data from page 0 and write it back */ + os_file_read(file, page, 0, UNIV_PAGE_SIZE, 0); + fil_space_clear_crypt_data(page, crypt_data_offset); + lsn_t lsn = mach_read_from_8(page + FIL_PAGE_LSN); + if (callback.get_zip_size() == 0) { + buf_flush_init_for_writing( + page, 0, lsn); + } else { + buf_flush_update_zip_checksum( + page, callback.get_zip_size(), lsn); + } + + if (!os_file_write( + iter.filepath, iter.file, page, + 0, iter.page_size)) { + + ib_logf(IB_LOG_LEVEL_ERROR, + "os_file_write() failed"); + + return(DB_IO_ERROR); + } + + mem_free(crypt_io_buffer); + iter.crypt_io_buffer = NULL; + fil_space_destroy_crypt_data(&iter.crypt_data); + } } if (err == DB_SUCCESS) { @@ -6700,6 +6906,16 @@ fil_space_name( } /*******************************************************************//** +Return space flags */ +ulint +fil_space_flags( +/*===========*/ + fil_space_t* space) /*!< in: space */ +{ + return (space->flags); +} + +/*******************************************************************//** Return page type name */ const char* fil_get_page_type_name( @@ -6752,3 +6968,137 @@ fil_node_get_block_size( { return (node->file_block_size); } + +/****************************************************************** +Get id of first tablespace or ULINT_UNDEFINED if none */ +UNIV_INTERN +ulint +fil_get_first_space() +{ + ulint out_id = ULINT_UNDEFINED; + fil_space_t* space; + + mutex_enter(&fil_system->mutex); + + space = UT_LIST_GET_FIRST(fil_system->space_list); + if (space != NULL) { + do + { + if (!space->stop_new_ops) { + out_id = space->id; + break; + } + space = UT_LIST_GET_NEXT(space_list, space); + } while (space != NULL); + } + + mutex_exit(&fil_system->mutex); + + return out_id; +} + +/****************************************************************** +Get id of next tablespace or ULINT_UNDEFINED if none */ +UNIV_INTERN +ulint +fil_get_next_space(ulint id) +{ + bool found; + fil_space_t* space; + ulint out_id = ULINT_UNDEFINED; + + mutex_enter(&fil_system->mutex); + + space = fil_space_get_by_id(id); + if (space == NULL) { + /* we didn't find it...search for space with space->id > id */ + found = false; + space = UT_LIST_GET_FIRST(fil_system->space_list); + } else { + /* we found it, take next available space */ + found = true; + } + + while ((space = UT_LIST_GET_NEXT(space_list, space)) != NULL) { + + if (!found && space->id <= id) + continue; + + if (!space->stop_new_ops) { + /* inc reference to prevent drop */ + out_id = space->id; + break; + } + } + + mutex_exit(&fil_system->mutex); + + return out_id; +} + +/****************************************************************** +Get crypt data for a tablespace */ +UNIV_INTERN +fil_space_crypt_t* +fil_space_get_crypt_data( +/*==================*/ + ulint id) /*!< in: space id */ +{ + fil_space_t* space; + fil_space_crypt_t* crypt_data = NULL; + + ut_ad(fil_system); + + mutex_enter(&fil_system->mutex); + + space = fil_space_get_by_id(id); + if (space != NULL) { + crypt_data = space->crypt_data; + } + + mutex_exit(&fil_system->mutex); + + return(crypt_data); +} + +/****************************************************************** +Get crypt data for a tablespace */ +UNIV_INTERN +void +fil_space_set_crypt_data( +/*==================*/ + ulint id, /*!< in: space id */ + fil_space_crypt_t* crypt_data) /*!< in: crypt data */ +{ + fil_space_t* space; + fil_space_crypt_t* old_crypt_data = NULL; + + ut_ad(fil_system); + + mutex_enter(&fil_system->mutex); + + space = fil_space_get_by_id(id); + if (space != NULL) { + + if (space->crypt_data != NULL) { + ut_a(!fil_space_crypt_compare(crypt_data, + space->crypt_data)); + old_crypt_data = space->crypt_data; + } + + space->crypt_data = crypt_data; + } else { + /* there is a small risk that tablespace has been deleted */ + old_crypt_data = crypt_data; + } + + mutex_exit(&fil_system->mutex); + + if (old_crypt_data != NULL) { + /* first assign space->crypt_data + * then destroy old_crypt_data when no new references to + * it can be created. + */ + fil_space_destroy_crypt_data(&old_crypt_data); + } +} diff --git a/storage/xtradb/fil/fil0pagecompress.cc b/storage/xtradb/fil/fil0pagecompress.cc index c1d476126c6..fa25d8875ae 100644 --- a/storage/xtradb/fil/fil0pagecompress.cc +++ b/storage/xtradb/fil/fil0pagecompress.cc @@ -269,14 +269,24 @@ fil_compress_page( int level = 0; ulint header_len = FIL_PAGE_DATA + FIL_PAGE_COMPRESSED_SIZE; ulint write_size=0; - ulint comp_method = innodb_compression_algorithm; /* Cache to avoid - change during - function execution */ + /* Cache to avoid change during function execution */ + ulint comp_method = innodb_compression_algorithm; + ulint orig_page_type; + ut_ad(buf); ut_ad(out_buf); ut_ad(len); ut_ad(out_len); + /* read original page type */ + orig_page_type = mach_read_from_2(buf + FIL_PAGE_TYPE); + + /* Let's not compress file space header or + extent descriptor */ + if ((orig_page_type == FIL_PAGE_TYPE_FSP_HDR) || (orig_page_type == FIL_PAGE_TYPE_XDES) ) { + *out_len = len; + return (buf); + } level = compression_level; ut_ad(fil_space_is_page_compressed(space_id)); @@ -419,7 +429,7 @@ fil_compress_page( /* Set up the correct page type */ mach_write_to_2(out_buf+FIL_PAGE_TYPE, FIL_PAGE_PAGE_COMPRESSED); /* Set up the flush lsn to be compression algorithm */ - mach_write_to_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN, comp_method); + mach_write_to_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, comp_method); /* Set up the actual payload lenght */ mach_write_to_2(out_buf+FIL_PAGE_DATA, write_size); @@ -428,7 +438,7 @@ fil_compress_page( ut_ad(fil_page_is_compressed(out_buf)); ut_ad(mach_read_from_4(out_buf+FIL_PAGE_SPACE_OR_CHKSUM) == BUF_NO_CHECKSUM_MAGIC); ut_ad(mach_read_from_2(out_buf+FIL_PAGE_DATA) == write_size); - ut_ad(mach_read_from_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN) == (ulint)comp_method); + ut_ad(mach_read_from_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION) == (ulint)comp_method); /* Verify that page can be decompressed */ { @@ -470,7 +480,6 @@ fil_compress_page( space_id, fil_space_name(space), len, write_size); #endif /* UNIV_PAGECOMPRESS_DEBUG */ - srv_stats.page_compression_saved.add((len - write_size)); srv_stats.pages_page_compressed.inc(); @@ -552,7 +561,7 @@ fil_decompress_page( } /* Get compression algorithm */ - compression_alg = mach_read_from_8(buf+FIL_PAGE_FILE_FLUSH_LSN); + compression_alg = mach_read_from_8(buf+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); /* Get the actual size of compressed page */ actual_size = mach_read_from_2(buf+FIL_PAGE_DATA); @@ -722,5 +731,3 @@ fil_decompress_page( ut_free(in_buf); } } - - diff --git a/storage/xtradb/fil/fil0pageencryption.cc b/storage/xtradb/fil/fil0pageencryption.cc new file mode 100644 index 00000000000..49c42615e19 --- /dev/null +++ b/storage/xtradb/fil/fil0pageencryption.cc @@ -0,0 +1,628 @@ +/***************************************************************************** + +Copyright (C) 2014 eperi GmbH. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*****************************************************************************/ + +/***************************************************************** + @file fil/fil0pageencryption.cc + Implementation for page encryption file spaces. + + Created 08/25/2014 Ludger Göckel eperi-GmbH + Modified 11/26/2014 Jan Lindström MariaDB Corporation + ***********************************************************************/ + +#include "fil0fil.h" +#include "fil0pageencryption.h" +#include "fsp0pageencryption.h" +#include "my_dbug.h" +#include "page0zip.h" + +#include "buf0checksum.h" +#include <my_global.h> +#include <my_aes.h> +#include <math.h> + +/* + * derived from libFLAC, which is gpl v2 + */ +byte crc_table[] = { + 0x00,0x07,0x0E,0x09,0x1C,0x1B,0x12,0x15,0x38,0x3F,0x36,0x31,0x24,0x23,0x2A,0x2D,0x70,0x77,0x7E,0x79, + 0x6C,0x6B,0x62,0x65,0x48,0x4F,0x46,0x41,0x54,0x53,0x5A,0x5D,0xE0,0xE7,0xEE,0xE9,0xFC,0xFB,0xF2,0xF5, + 0xD8,0xDF,0xD6,0xD1,0xC4,0xC3,0xCA,0xCD,0x90,0x97,0x9E,0x99,0x8C,0x8B,0x82,0x85,0xA8,0xAF,0xA6,0xA1, + 0xB4,0xB3,0xBA,0xBD,0xC7,0xC0,0xC9,0xCE,0xDB,0xDC,0xD5,0xD2,0xFF,0xF8,0xF1,0xF6,0xE3,0xE4,0xED,0xEA, + 0xB7,0xB0,0xB9,0xBE,0xAB,0xAC,0xA5,0xA2,0x8F,0x88,0x81,0x86,0x93,0x94,0x9D,0x9A,0x27,0x20,0x29,0x2E, + 0x3B,0x3C,0x35,0x32,0x1F,0x18,0x11,0x16,0x03,0x04,0x0D,0x0A,0x57,0x50,0x59,0x5E,0x4B,0x4C,0x45,0x42, + 0x6F,0x68,0x61,0x66,0x73,0x74,0x7D,0x7A,0x89,0x8E,0x87,0x80,0x95,0x92,0x9B,0x9C,0xB1,0xB6,0xBF,0xB8, + 0xAD,0xAA,0xA3,0xA4,0xF9,0xFE,0xF7,0xF0,0xE5,0xE2,0xEB,0xEC,0xC1,0xC6,0xCF,0xC8,0xDD,0xDA,0xD3,0xD4, + 0x69,0x6E,0x67,0x60,0x75,0x72,0x7B,0x7C,0x51,0x56,0x5F,0x58,0x4D,0x4A,0x43,0x44,0x19,0x1E,0x17,0x10, + 0x05,0x02,0x0B,0x0C,0x21,0x26,0x2F,0x28,0x3D,0x3A,0x33,0x34,0x4E,0x49,0x40,0x47,0x52,0x55,0x5C,0x5B, + 0x76,0x71,0x78,0x7F,0x6A,0x6D,0x64,0x63,0x3E,0x39,0x30,0x37,0x22,0x25,0x2C,0x2B,0x06,0x01,0x08,0x0F, + 0x1A,0x1D,0x14,0x13,0xAE,0xA9,0xA0,0xA7,0xB2,0xB5,0xBC,0xBB,0x96,0x91,0x98,0x9F,0x8A,0x8D,0x84,0x83, + 0xDE,0xD9,0xD0,0xD7,0xC2,0xC5,0xCC,0xCB,0xE6,0xE1,0xE8,0xEF,0xFA,0xFD,0xF4,0xF3 + +}; + +/****************************************************************//** +Calculate checksum for encrypted pages +@return checksum */ +static +byte +fil_page_encryption_calc_checksum( +/*==============================*/ + unsigned char* buf, /*!<in: buffer where to calculate checksum */ + ulint len) /*!<in: buffer length */ +{ + byte crc = 0; + for (ulint i = 0; i < len; i++) { + crc = crc_table[(crc ^ buf[i]) & 0xff]; + } + return crc; +} + +/****************************************************************//** +Recalculate checksum for encrypted pages */ +static +void +do_check_sum( +/*=========*/ + ulint page_size, /*!< in: page size */ + ulint zip_size, /*!< in: compressed page size */ + byte* buf) /*!< in: buffer */ +{ + ib_uint32_t checksum = 0; + + if (zip_size) { + checksum = page_zip_calc_checksum(buf,zip_size, + static_cast<srv_checksum_algorithm_t>( + srv_checksum_algorithm)); + + mach_write_to_4(buf + FIL_PAGE_SPACE_OR_CHKSUM, checksum); + return; + } + + switch ((srv_checksum_algorithm_t) srv_checksum_algorithm) { + case SRV_CHECKSUM_ALGORITHM_CRC32: + case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32: + checksum = buf_calc_page_crc32(buf); + break; + case SRV_CHECKSUM_ALGORITHM_INNODB: + case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB: + checksum = (ib_uint32_t) buf_calc_page_new_checksum(buf); + break; + case SRV_CHECKSUM_ALGORITHM_NONE: + case SRV_CHECKSUM_ALGORITHM_STRICT_NONE: + checksum = BUF_NO_CHECKSUM_MAGIC; + break; + /* no default so the compiler will emit a warning if new enum + is added and not handled here */ + } + + mach_write_to_4(buf + FIL_PAGE_SPACE_OR_CHKSUM, checksum); + + /* old style checksum is omitted */ +} + +/****************************************************************//** + For page encrypted pages encrypt the page before actual write + operation. + + Note, that FIL_PAGE_TYPE_FSP_HDR and FIL_PAGE_TYPE_XDES type pages + are not encrypted! + + Pages are encrypted with AES/CBC/NoPadding algorithm. + + "No padding" is used to ensure, that the encrypted page does not + exceed the page size. If "no padding" is used, the input for encryption + must be of size (multiple * AES blocksize). AES Blocksize is usually 16 + (bytes). + + Everything in the page is encrypted except for the 38 byte FIL header. + Since the length of the payload is not a multiple of the AES blocksize, + and to ensure that every byte of the payload is encrypted, two encryption + operations are done. Each time with a block of adequate size as input. + 1st block contains everything from beginning of payload bytes except for + the remainder. 2nd block is of size 64 and contains the remainder and + the last (64 - sizeof(remainder)) bytes of the encrypted 1st block. + + Each encrypted page receives a new page type for PAGE_ENCRYPTION. + The original page type (2 bytes) is stored in the Checksum header of the + page (position FIL_PAGE_SPACE_OR_CHKSUM). Additionally the encryption + key identifier is stored in the Checksum Header. This uses 1 byte. + Checksum verification for encrypted pages is disabled. This checksum + should be restored after decryption. + + To be able to verify decryption in a later stage, a 1-byte checksum at + position 4 of the FIL_PAGE_SPACE_OR_CHKSUM header is stored. + For page compressed table pages the log base 2 of the length of the + encrypted data is stored. + + @return encrypted page or original page if encryption failed to be + written*/ +UNIV_INTERN +byte* +fil_encrypt_page( +/*==============*/ + ulint space_id, /*!< in: tablespace id of the table. */ + byte* buf, /*!< in: buffer from which to write; in aio + this must be appropriately aligned */ + byte* out_buf, /*!< out: encrypted buffer */ + ulint len, /*!< in: length of input buffer.*/ + ulint encryption_key, /*!< in: encryption key */ + ulint* out_len, /*!< out: actual length of encrypted page */ + ulint* errorCode, /*!< out: an error code. set, + if page is intentionally not encrypted */ + byte* tmp_encryption_buf) /*!< in: temporary buffer or NULL */ +{ + + int err = AES_OK; + int key = 0; + uint32 data_size = 0; + ulint orig_page_type = 0; + uint32 write_size = 0; + fil_space_t* space = NULL; + byte* tmp_buf = NULL; + ulint page_len = 0; + ulint offset = 0; + + ut_ad(buf);ut_ad(out_buf); + key = encryption_key; + + *errorCode = AES_OK; + + ut_ad(fil_space_is_page_encrypted(space_id)); + fil_system_enter(); + space = fil_space_get_by_id(space_id); + fil_system_exit(); + +#ifdef UNIV_DEBUG_PAGEENCRYPTION + ulint pageno = mach_read_from_4(buf + FIL_PAGE_OFFSET); + fprintf(stderr, + "InnoDB: Note: Preparing for encryption for space %lu name %s len %lu, page no %lu\n", + space_id, fil_space_name(space), len, pageno); +#endif /* UNIV_DEBUG_PAGEENCRYPTION */ + + /* read original page type */ + orig_page_type = mach_read_from_2(buf + FIL_PAGE_TYPE); + + /* Do not encrypt file space header or extend descriptor */ + if ((orig_page_type == FIL_PAGE_TYPE_FSP_HDR) + || (orig_page_type == FIL_PAGE_TYPE_XDES) ) { + *errorCode = PAGE_ENCRYPTION_WILL_NOT_ENCRYPT; + *out_len = len; + return (buf); + } + + if (FIL_PAGE_PAGE_COMPRESSED == orig_page_type) { + page_len = log10((double)len)/log10((double)2); + } + + byte checksum_byte = fil_page_encryption_calc_checksum(buf + FIL_PAGE_DATA, len - FIL_PAGE_DATA); + + /* data_size bytes will be encrypted at first. + * data_size will be the length of the cipher text since no padding is used.*/ + data_size = ((len - FIL_PAGE_DATA - FIL_PAGE_DATA_END) / MY_AES_BLOCK_SIZE) * MY_AES_BLOCK_SIZE; + + + unsigned char rkey[GetCryptoKeySize(encryption_key)]; + uint key_len = sizeof(rkey); + + unsigned char iv[16]; + uint iv_len = sizeof(iv); + + if (!HasCryptoKey(encryption_key)) { + err = PAGE_ENCRYPTION_KEY_MISSING; + } else { + int rc; + + rc = GetCryptoKey(encryption_key, rkey, key_len); + if (rc != AES_OK) + { + err = PAGE_ENCRYPTION_KEY_MISSING; + } + + rc = GetCryptoIV(encryption_key, iv, iv_len); + if (rc != AES_OK) + { + err = PAGE_ENCRYPTION_KEY_MISSING; + } + } + + /* 1st encryption: data_size bytes starting from FIL_PAGE_DATA */ + if (err == AES_OK) { + err = my_aes_encrypt_dynamic( + (uchar*) buf + FIL_PAGE_DATA, + data_size, + (uchar *) out_buf + FIL_PAGE_DATA, + &write_size, + (const unsigned char *) rkey, + key_len, + (const unsigned char *) iv, + iv_len, + 1); + + ut_ad(write_size == data_size); + + if (err == AES_OK) { + /* copy remaining bytes from input buffer to output buffer. + * Note, that this copies the final 8 bytes of a + * page, which consists of the + * Old-style checksum and the "Low 32 bits of LSN */ + memcpy(out_buf + FIL_PAGE_DATA + data_size, + buf + FIL_PAGE_DATA + data_size , + len - FIL_PAGE_DATA -data_size); + + if (tmp_encryption_buf == NULL) { + //create temporary buffer for 2nd encryption + tmp_buf = static_cast<byte *>(ut_malloc(64)); + } else { + tmp_buf = tmp_encryption_buf; + } + + /* 2nd encryption: 64 bytes from out_buf, + result length is 64 bytes */ + err = my_aes_encrypt_dynamic((uchar*)out_buf + len -offset -64, + 64, + (uchar*)tmp_buf, + &write_size, + (const unsigned char *)rkey, + key_len, + (const unsigned char *)iv, + iv_len, 1); + ut_ad(write_size == 64); + + /* copy 64 bytes from 2nd encryption to out_buf*/ + memcpy(out_buf + len - offset -64, tmp_buf, 64); + } + + } + + /* error handling */ + if (err != AES_OK) { + /* If an error occurred we leave the actual page as it was */ + + fprintf(stderr, + "InnoDB: Warning: Encryption failed for space %lu " + "name %s len %lu rt %d write %lu, error: %d\n", + space_id, fil_space_name(space), len, err, (ulint)data_size, err); + fflush(stderr); + srv_stats.pages_page_encryption_error.inc(); + *out_len = len; + + /* free temporary buffer */ + if (tmp_buf!=NULL && tmp_encryption_buf == NULL) { + ut_free(tmp_buf); + } + *errorCode = err; + + return (buf); + } + + /* Set up the page header. Copied from input buffer*/ + memcpy(out_buf, buf, FIL_PAGE_DATA); + + /* Set up the correct page type */ + mach_write_to_2(out_buf + FIL_PAGE_TYPE, FIL_PAGE_PAGE_ENCRYPTED); + + /* The 1st checksum field is used to store original page type, etc. + * checksum check for page encrypted pages is omitted. + */ + + /* Set up the encryption key. Written to the 1st byte of + the checksum header field. This header is currently used to store data. */ + mach_write_to_1(out_buf + FIL_PAGE_SPACE_OR_CHKSUM, key); + + /* store original page type. Written to 2nd and 3rd byte + of the checksum header field */ + mach_write_to_2(out_buf + FIL_PAGE_SPACE_OR_CHKSUM + 1, orig_page_type); + + if (FIL_PAGE_PAGE_COMPRESSED == orig_page_type) { + /* set byte 4 of checksum field to page length (ln(len)) */ + memset(out_buf + FIL_PAGE_SPACE_OR_CHKSUM + 3, page_len, 1); + } else { + /* set byte 4 of checksum field to checksum byte */ + memset(out_buf + FIL_PAGE_SPACE_OR_CHKSUM + 3, checksum_byte, 1); + } + +#ifdef UNIV_DEBUG + /* Verify */ + ut_ad(fil_page_is_encrypted(out_buf)); + +#endif /* UNIV_DEBUG */ + + srv_stats.pages_page_encrypted.inc(); + *out_len = len; + + /* free temporary buffer */ + if (tmp_buf!=NULL && tmp_encryption_buf == NULL) { + ut_free(tmp_buf); + } + + return (out_buf); +} + +/****************************************************************//** + For page encrypted pages decrypt the page after actual read + operation. + + See fil_encrypt_page for details, how the encryption works. + + If the decryption can be verified, original page should be completely restored. + This includes original page type, 4-byte checksum field at page start. + If it is not a page compressed table's page, decryption is verified against + a 1-byte checksum built over the plain data bytes. If this verification + fails, an error state is returned. + + @return decrypted page */ +ulint +fil_decrypt_page( +/*=============*/ + byte* page_buf, /*!< in: preallocated buffer or NULL */ + byte* buf, /*!< in/out: buffer from which to read; in aio + this must be appropriately aligned */ + ulint len, /*!< in: length buffer, which should be decrypted.*/ + ulint* write_size, /*!< out: size of the decrypted + data. If no error occurred equal to len */ + ibool* page_compressed,/*!<out: is page compressed.*/ + byte* tmp_encryption_buf) /*!< in: temporary buffer or NULL */ +{ + int err = AES_OK; + ulint page_decryption_key; + uint32 data_size = 0; + ulint orig_page_type = 0; + uint32 tmp_write_size = 0; + ulint offset = 0; + byte *in_buf = NULL; + byte *tmp_buf = NULL; + fil_space_t* space = NULL; + + ulint page_compression_flag = 0; + + ut_ad(buf); + ut_ad(len); + + /* Before actual decrypt, make sure that page type is correct */ + ulint current_page_type = mach_read_from_2(buf + FIL_PAGE_TYPE); + + if ((current_page_type == FIL_PAGE_TYPE_FSP_HDR) + || (current_page_type == FIL_PAGE_TYPE_XDES)) { + /* assumed as unencrypted */ + if (write_size!=NULL) { + *write_size = len; + } + return AES_OK; + } + + if (current_page_type != FIL_PAGE_PAGE_ENCRYPTED) { + + fprintf(stderr, "InnoDB: Corruption: We try to decrypt corrupted page\n" + "InnoDB: CRC %lu type %lu.\n" + "InnoDB: len %lu\n", + mach_read_from_4(buf + FIL_PAGE_SPACE_OR_CHKSUM), + mach_read_from_2(buf + FIL_PAGE_TYPE), len); + + fflush(stderr); + return PAGE_ENCRYPTION_WRONG_PAGE_TYPE; + } + + /* 1st checksum field is used to store original page type, etc. + * checksum check for page encrypted pages is omitted. + */ + + /* read page encryption key */ + page_decryption_key = mach_read_from_1(buf + FIL_PAGE_SPACE_OR_CHKSUM); + + /* Get the page type */ + orig_page_type = mach_read_from_2(buf + FIL_PAGE_SPACE_OR_CHKSUM + 1); + + /* read checksum byte */ + byte stored_checksum_byte = mach_read_from_1(buf + FIL_PAGE_SPACE_OR_CHKSUM + 3); + + if (FIL_PAGE_PAGE_COMPRESSED == orig_page_type) { + if (page_compressed != NULL) { + *page_compressed = 1L; + } + page_compression_flag = 1; + len = pow((double)2, (double)((int)stored_checksum_byte)); + offset = 0; + } + + data_size = ((len - FIL_PAGE_DATA - FIL_PAGE_DATA_END) / MY_AES_BLOCK_SIZE) * MY_AES_BLOCK_SIZE; + + + unsigned char rkey[GetCryptoKeySize(page_decryption_key)]; + uint key_len = sizeof(rkey); + + unsigned char iv[16]; + uint iv_len = sizeof(iv); + + if (!HasCryptoKey(page_decryption_key)) { + err = PAGE_ENCRYPTION_KEY_MISSING; + } else { + int rc; + + rc = GetCryptoKey(page_decryption_key, rkey, key_len); + if (rc != AES_OK) + { + err = PAGE_ENCRYPTION_KEY_MISSING; + } + + rc = GetCryptoIV(page_decryption_key, iv, iv_len); + if (rc != AES_OK) + { + err = PAGE_ENCRYPTION_KEY_MISSING; + } + } + + + if (err != AES_OK) { + /* surely key could not be determined. */ + fprintf(stderr, "InnoDB: Corruption: Page is marked as encrypted\n" + "InnoDB: but decrypt failed with error %d, encryption key %d.\n", + err, (int)page_decryption_key); + fflush(stderr); + + return err; + } + + if (tmp_encryption_buf == NULL) { + tmp_buf= static_cast<byte *>(ut_malloc(64)); + } else { + tmp_buf = tmp_encryption_buf; + } + + // If no buffer was given, we need to allocate temporal buffer + if (page_buf == NULL) { +#ifdef UNIV_PAGECOMPRESS_DEBUG + fprintf(stderr, + "InnoDB: Note: FIL: Encryption buffer not given, allocating...\n"); +#endif /* UNIV_PAGECOMPRESS_DEBUG */ + in_buf = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE*2)); + } else { + in_buf = page_buf; + } + + /* 1st decryption: 64 bytes */ + /* 64 bytes from data area are copied to temporary buffer. + * These are the last 64 of the (encrypted) payload */ + memcpy(tmp_buf, buf + len - offset - 64, 64); + + err = my_aes_decrypt_dynamic( + (const uchar*) tmp_buf, + 64, + (uchar *) in_buf + len - offset - 64, + &tmp_write_size, + (const unsigned char *) rkey, + key_len, + (const unsigned char *) iv, + iv_len, + 1); + + ut_ad(tmp_write_size == 64); + + /* If decrypt fails it means that page is corrupted or has an unknown key */ + if (err != AES_OK) { + fprintf(stderr, "InnoDB: Corruption: Page is marked as encrypted\n" + "InnoDB: but decrypt failed with error %d.\n" + "InnoDB: size %lu len %lu, key %d\n", err, (ulint)data_size, + len, (int)page_decryption_key); + fflush(stderr); + + if (tmp_encryption_buf == NULL) { + ut_free(tmp_buf); + } + + if (page_buf == NULL) { + ut_free(in_buf); + } + return err; + } + + ut_ad(tmp_write_size == 64); + + /* copy 1st part of payload from buf to in_buf */ + /* do not override result of 1st decryption */ + memcpy(in_buf + FIL_PAGE_DATA, buf + FIL_PAGE_DATA, len -offset -64 - FIL_PAGE_DATA); + + + /* Decrypt rest of the page */ + err = my_aes_decrypt_dynamic((uchar*) in_buf + FIL_PAGE_DATA, + data_size, + (uchar *) buf + FIL_PAGE_DATA, + &tmp_write_size, + (const unsigned char *)&rkey, + key_len, + (const unsigned char *)&iv, + iv_len, + 1); + + ut_ad(tmp_write_size = data_size); + + /* copy remaining bytes from in_buf to buf. + */ + ulint bytes_to_copy = len - FIL_PAGE_DATA - data_size - offset; + memcpy(buf + FIL_PAGE_DATA + data_size, in_buf + FIL_PAGE_DATA + data_size, bytes_to_copy); + + /* apart from header data everything is now in in_buf */ + + if (tmp_encryption_buf == NULL) { + ut_free(tmp_buf); + } + +#ifdef UNIV_PAGEENCRIPTION_DEBUG + fprintf(stderr, "InnoDB: Note: Decryption succeeded for len %lu\n", len); + fflush(stderr); +#endif + + if (page_buf == NULL) { + ut_free(in_buf); + } + + /* setting original page type */ + mach_write_to_2(buf + FIL_PAGE_TYPE, orig_page_type); + + ulint pageno = mach_read_from_4(buf + FIL_PAGE_OFFSET); + ulint flags = 0; + ulint zip_size = 0; + + /* please note, that page with number 0 is not encrypted */ + if (pageno == 0 ) { + flags = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + buf); + } else { + ulint space_id = mach_read_from_4(buf + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); + fil_system_enter(); + space = fil_space_get_by_id(space_id); + flags = fil_space_flags(space); + fil_system_exit(); + } + + if (!(page_compression_flag)) { + zip_size = fsp_flags_get_zip_size(flags); + } + + if (write_size!=NULL) { + *write_size = len; + } + + if (!(page_compression_flag)) { + byte checksum_byte = fil_page_encryption_calc_checksum(buf + FIL_PAGE_DATA, len - FIL_PAGE_DATA); + + if (checksum_byte != stored_checksum_byte) { + err = PAGE_ENCRYPTION_WRONG_KEY; + fprintf(stderr, "InnoDB: Corruption: Page is marked as encrypted\n" + "InnoDB: but decryption verification failed with error %d," + " encryption key %d.\n", + err, (int)page_decryption_key); + fflush(stderr); + return err; + } + + /* calc check sums and write to the buffer, if page is not of type PAGE_COMPRESSED. + * if the decryption is verified, it is assumed that the + * original page was restored, re-calculating the original + * checksums should be ok + */ + do_check_sum(len, zip_size, buf); + } else { + /* page_compression uses BUF_NO_CHECKSUM_MAGIC as checksum */ + mach_write_to_4(buf + FIL_PAGE_SPACE_OR_CHKSUM, BUF_NO_CHECKSUM_MAGIC); + } + + srv_stats.pages_page_decrypted.inc(); + + return err; +} + + diff --git a/storage/xtradb/fsp/fsp0fsp.cc b/storage/xtradb/fsp/fsp0fsp.cc index c06d4213d73..0fffe60253e 100644 --- a/storage/xtradb/fsp/fsp0fsp.cc +++ b/storage/xtradb/fsp/fsp0fsp.cc @@ -769,7 +769,12 @@ fsp_header_init( } else { fsp_fill_free_list(TRUE, space, header, mtr); } + + ulint maxsize = 0; + ulint offset = fsp_header_get_crypt_offset(zip_size, &maxsize); + fil_space_write_crypt_data(space, page, offset, maxsize, mtr); } + #endif /* !UNIV_HOTBACKUP */ /**********************************************************************//** @@ -4149,3 +4154,61 @@ fsp_print( fprintf(stderr, "NUMBER of file segments: %lu\n", (ulong) n_segs); } #endif /* !UNIV_HOTBACKUP */ + +/**********************************************************************//** +Compute offset after xdes where crypt data can be stored +@return offset */ +ulint +fsp_header_get_crypt_offset( +/*========================*/ + ulint zip_size, /*!< in: zip_size */ + ulint* max_size) /*!< out: free space available for crypt data */ +{ + ulint pageno = 0; + /* compute first page_no that will have xdes stored on page != 0*/ + for (ulint i = 0; + (pageno = xdes_calc_descriptor_page(zip_size, i)) == 0; ) + i++; + + /* use pageno prior to this...i.e last page on page 0 */ + ut_ad(pageno > 0); + pageno--; + + ulint iv_offset = XDES_ARR_OFFSET + + XDES_SIZE * (1 + xdes_calc_descriptor_index(zip_size, pageno)); + + if (max_size != NULL) { + /* return how much free space there is available on page */ + *max_size = (zip_size ? zip_size : UNIV_PAGE_SIZE) - + (FSP_HEADER_OFFSET + iv_offset + FIL_PAGE_DATA_END); + } + + return FSP_HEADER_OFFSET + iv_offset; +} + +/**********************************************************************//** +Checks if a single page is free. +@return true if free */ +UNIV_INTERN +bool +fsp_page_is_free_func( +/*==============*/ + ulint space, /*!< in: space id */ + ulint page_no, /*!< in: page offset */ + mtr_t* mtr, /*!< in/out: mini-transaction */ + const char *file, + ulint line) +{ + ulint flags; + + ut_ad(mtr); + + mtr_x_lock_func(fil_space_get_latch(space, &flags), file, line, mtr); + ulint zip_size = fsp_flags_get_zip_size(flags); + + xdes_t* descr = xdes_get_descriptor(space, zip_size, page_no, mtr); + ut_a(descr); + + return xdes_mtr_get_bit( + descr, XDES_FREE_BIT, page_no % FSP_EXTENT_SIZE, mtr); +} diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index 411bc381972..d65e2e321f2 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -107,6 +107,7 @@ this program; if not, write to the Free Software Foundation, Inc., #include "page0zip.h" #include "fil0pagecompress.h" + #define thd_get_trx_isolation(X) ((enum_tx_isolation)thd_tx_isolation(X)) #ifdef MYSQL_DYNAMIC_PLUGIN @@ -244,6 +245,20 @@ static char* internal_innobase_data_file_path = NULL; static char* innodb_version_str = (char*) INNODB_VERSION_STR; +extern my_bool srv_encrypt_tables; +extern uint srv_n_fil_crypt_threads; +extern uint srv_fil_crypt_rotate_key_age; +extern uint srv_n_fil_crypt_iops; + +extern my_bool srv_immediate_scrub_data_uncompressed; +extern my_bool srv_background_scrub_data_uncompressed; +extern my_bool srv_background_scrub_data_compressed; +extern uint srv_background_scrub_data_interval; +extern uint srv_background_scrub_data_check_interval; +#ifdef UNIV_DEBUG +extern my_bool srv_scrub_force_testing; +#endif + /** Possible values for system variable "innodb_stats_method". The values are defined the same as its corresponding MyISAM system variable "myisam_stats_method"(see "myisam_stats_method_names"), for better usability */ @@ -616,6 +631,12 @@ ha_create_table_option innodb_table_option_list[]= HA_TOPTION_NUMBER("PAGE_COMPRESSION_LEVEL", page_compression_level, ULINT_UNDEFINED, 0, 9, 1), /* With this option user can enable atomic writes feature for this table */ HA_TOPTION_ENUM("ATOMIC_WRITES", atomic_writes, "DEFAULT,ON,OFF", 0), + /* With this option the user can enable page encryption for the table */ + HA_TOPTION_BOOL("PAGE_ENCRYPTION", page_encryption, 0), + + /* With this option the user defines the key identifier using for the encryption */ + HA_TOPTION_NUMBER("PAGE_ENCRYPTION_KEY", page_encryption_key, ULINT_UNDEFINED, 1, 255, 1), + HA_TOPTION_END }; @@ -990,6 +1011,14 @@ static SHOW_VAR innodb_status_variables[]= { (char*) &export_vars.innodb_page_compressed_trim_op_saved, SHOW_LONGLONG}, {"num_pages_page_decompressed", (char*) &export_vars.innodb_pages_page_decompressed, SHOW_LONGLONG}, + {"num_pages_page_compression_error", + (char*) &export_vars.innodb_pages_page_compression_error, SHOW_LONGLONG}, + {"num_pages_page_encrypted", + (char*) &export_vars.innodb_pages_page_encrypted, SHOW_LONGLONG}, + {"num_pages_page_decrypted", + (char*) &export_vars.innodb_pages_page_decrypted, SHOW_LONGLONG}, + {"num_pages_page_encryption_error", + (char*) &export_vars.innodb_pages_page_encryption_error, SHOW_LONGLONG}, {"have_lz4", (char*) &innodb_have_lz4, SHOW_BOOL}, {"have_lzo", @@ -1022,6 +1051,42 @@ static SHOW_VAR innodb_status_variables[]= { {"secondary_index_triggered_cluster_reads_avoided", (char*) &export_vars.innodb_sec_rec_cluster_reads_avoided, SHOW_LONG}, + /* Encryption */ + {"encryption_rotation_pages_read_from_cache", + (char*) &export_vars.innodb_encryption_rotation_pages_read_from_cache, + SHOW_LONG}, + {"encryption_rotation_pages_read_from_disk", + (char*) &export_vars.innodb_encryption_rotation_pages_read_from_disk, + SHOW_LONG}, + {"encryption_rotation_pages_modified", + (char*) &export_vars.innodb_encryption_rotation_pages_modified, + SHOW_LONG}, + {"encryption_rotation_pages_flushed", + (char*) &export_vars.innodb_encryption_rotation_pages_flushed, + SHOW_LONG}, + {"encryption_rotation_estimated_iops", + (char*) &export_vars.innodb_encryption_rotation_estimated_iops, + SHOW_LONG}, + + /* Scrubing feature */ + {"scrub_background_page_reorganizations", + (char*) &export_vars.innodb_scrub_page_reorganizations, + SHOW_LONG}, + {"scrub_background_page_splits", + (char*) &export_vars.innodb_scrub_page_splits, + SHOW_LONG}, + {"scrub_background_page_split_failures_underflow", + (char*) &export_vars.innodb_scrub_page_split_failures_underflow, + SHOW_LONG}, + {"scrub_background_page_split_failures_out_of_filespace", + (char*) &export_vars.innodb_scrub_page_split_failures_out_of_filespace, + SHOW_LONG}, + {"scrub_background_page_split_failures_missing_index", + (char*) &export_vars.innodb_scrub_page_split_failures_missing_index, + SHOW_LONG}, + {"scrub_background_page_split_failures_unknown", + (char*) &export_vars.innodb_scrub_page_split_failures_unknown, + SHOW_LONG}, {NullS, NullS, SHOW_LONG} }; @@ -3518,7 +3583,7 @@ innobase_init( goto error; } } - + #ifndef HAVE_LZ4 if (innodb_compression_algorithm == PAGE_LZ4_ALGORITHM) { sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n" @@ -3586,6 +3651,7 @@ innobase_init( srv_data_home = (innobase_data_home_dir ? innobase_data_home_dir : default_path); + /* Set default InnoDB data file size to 12 MB and let it be auto-extending. Thus users can use InnoDB in >= 4.0 without having to specify any startup options. */ @@ -11475,6 +11541,8 @@ innobase_table_flags( modified by another thread while the table is being created. */ const ulint default_compression_level = page_zip_level; + const ulint default_encryption_key = 1; + *flags = 0; *flags2 = 0; @@ -11673,7 +11741,10 @@ index_bad: options->page_compressed, (ulint)options->page_compression_level == ULINT_UNDEFINED ? default_compression_level : options->page_compression_level, - options->atomic_writes); + options->atomic_writes, + options->page_encryption, + (ulint)options->page_encryption_key == ULINT_UNDEFINED ? + default_encryption_key : options->page_encryption_key); if (create_info->options & HA_LEX_CREATE_TMP_TABLE) { *flags2 |= DICT_TF2_TEMPORARY; @@ -11710,6 +11781,17 @@ ha_innobase::check_table_options( ha_table_option_struct *options= table->s->option_struct; atomic_writes_t awrites = (atomic_writes_t)options->atomic_writes; + if (options->page_encryption) { + if (!use_tablespace) { + push_warning( + thd, Sql_condition::WARN_LEVEL_WARN, + HA_WRONG_CREATE_OPTION, + "InnoDB: PAGE_ENCRYPTION requires" + " innodb_file_per_table."); + return "PAGE_ENCRYPTION"; + } + } + /* Check page compression requirements */ if (options->page_compressed) { @@ -11782,6 +11864,33 @@ ha_innobase::check_table_options( } } + if ((ulint)options->page_encryption_key != ULINT_UNDEFINED) { + if (options->page_encryption == false) { + /* ignore this to allow alter table without changing page_encryption_key ...*/ + } + + if (options->page_encryption_key < 1 || options->page_encryption_key > 255) { + push_warning_printf( + thd, Sql_condition::WARN_LEVEL_WARN, + HA_WRONG_CREATE_OPTION, + "InnoDB: invalid PAGE_ENCRYPTION_KEY = %lu." + " Valid values are [1..255]", + options->page_encryption_key); + return "PAGE_ENCRYPTION_KEY"; + } + + if (!HasCryptoKey(options->page_encryption_key)) { + push_warning_printf( + thd, Sql_condition::WARN_LEVEL_WARN, + HA_WRONG_CREATE_OPTION, + "InnoDB: PAGE_ENCRYPTION_KEY encryption key %lu not available", + options->page_encryption_key + ); + return "PAGE_ENCRYPTION_KEY"; + + } + } + /* Check atomic writes requirements */ if (awrites == ATOMIC_WRITES_ON || (awrites == ATOMIC_WRITES_DEFAULT && srv_use_atomic_writes)) { @@ -18566,6 +18675,57 @@ innodb_status_output_update( os_event_set(srv_monitor_event); } +/****************************************************************** +Update the system variable innodb_encryption_threads */ +static +void +innodb_encryption_threads_update( +/*=========================*/ + THD* thd, /*!< in: thread handle */ + struct st_mysql_sys_var* var, /*!< in: pointer to + system variable */ + void* var_ptr,/*!< out: where the + formal string goes */ + const void* save) /*!< in: immediate result + from check function */ +{ + fil_crypt_set_thread_cnt(*static_cast<const uint*>(save)); +} + +/****************************************************************** +Update the system variable innodb_encryption_rotate_key_age */ +static +void +innodb_encryption_rotate_key_age_update( +/*=========================*/ + THD* thd, /*!< in: thread handle */ + struct st_mysql_sys_var* var, /*!< in: pointer to + system variable */ + void* var_ptr,/*!< out: where the + formal string goes */ + const void* save) /*!< in: immediate result + from check function */ +{ + fil_crypt_set_rotate_key_age(*static_cast<const uint*>(save)); +} + +/****************************************************************** +Update the system variable innodb_encryption_rotation_iops */ +static +void +innodb_encryption_rotation_iops_update( +/*=========================*/ + THD* thd, /*!< in: thread handle */ + struct st_mysql_sys_var* var, /*!< in: pointer to + system variable */ + void* var_ptr,/*!< out: where the + formal string goes */ + const void* save) /*!< in: immediate result + from check function */ +{ + fil_crypt_set_rotation_iops(*static_cast<const uint*>(save)); +} + static SHOW_VAR innodb_status_variables_export[]= { {"Innodb", (char*) &show_innodb_vars, SHOW_FUNC}, {NullS, NullS, SHOW_LONG} @@ -20115,6 +20275,108 @@ static MYSQL_SYSVAR_ULONG(fatal_semaphore_wait_threshold, srv_fatal_semaphore_wa UINT_MAX32, /* Maximum setting */ 0); +static MYSQL_SYSVAR_BOOL(encrypt_tables, srv_encrypt_tables, 0, + "Encrypt all tables in the storage engine", + 0, 0, 0); + +static MYSQL_SYSVAR_UINT(encryption_threads, srv_n_fil_crypt_threads, + PLUGIN_VAR_RQCMDARG, + "No of threads performing background key rotation and " + "scrubbing", + NULL, + innodb_encryption_threads_update, + srv_n_fil_crypt_threads, 0, UINT_MAX32, 0); + +static MYSQL_SYSVAR_UINT(encryption_rotate_key_age, + srv_fil_crypt_rotate_key_age, + PLUGIN_VAR_RQCMDARG, + "Rotate any page having a key older than this", + NULL, + innodb_encryption_rotate_key_age_update, + srv_fil_crypt_rotate_key_age, 0, UINT_MAX32, 0); + +static MYSQL_SYSVAR_UINT(encryption_rotation_iops, srv_n_fil_crypt_iops, + PLUGIN_VAR_RQCMDARG, + "Use this many iops for background key rotation", + NULL, + innodb_encryption_rotation_iops_update, + srv_n_fil_crypt_iops, 0, UINT_MAX32, 0); + +static MYSQL_SYSVAR_BOOL(scrub_log, srv_scrub_log, + PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY, + "Enable redo log scrubbing", + 0, 0, 0); + +/* + If innodb_scrub_log is on, logs will be scrubbed in less than + (((innodb_log_file_size * innodb_log_files_in_group) / 512 ) / + ((1000 * 86400) / innodb_scrub_log_interval)) + days. + In above formula, the first line calculates the number of log blocks to scrub, + and the second line calculates the number of log blocks scrubbed in one day. +*/ +static MYSQL_SYSVAR_ULONGLONG(scrub_log_interval, innodb_scrub_log_interval, + PLUGIN_VAR_OPCMDARG, + "Innodb redo log scrubbing interval in ms", + NULL, NULL, + 2000, /* default */ + 10, /* min */ + ULONGLONG_MAX, 0);/* max */ + +static MYSQL_SYSVAR_BOOL(encrypt_log, srv_encrypt_log, + PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY, + "Enable redo log encryption/decryption.", + NULL, NULL, FALSE); + +static MYSQL_SYSVAR_BOOL(immediate_scrub_data_uncompressed, + srv_immediate_scrub_data_uncompressed, + 0, + "Enable scrubbing of data", + NULL, NULL, FALSE); + +static MYSQL_SYSVAR_BOOL(background_scrub_data_uncompressed, + srv_background_scrub_data_uncompressed, + 0, + "Enable scrubbing of uncompressed data by " + "background threads (same as encryption_threads)", + NULL, NULL, FALSE); + +static MYSQL_SYSVAR_BOOL(background_scrub_data_compressed, + srv_background_scrub_data_compressed, + 0, + "Enable scrubbing of compressed data by " + "background threads (same as encryption_threads)", + NULL, NULL, FALSE); + +static MYSQL_SYSVAR_UINT(background_scrub_data_check_interval, + srv_background_scrub_data_check_interval, + 0, + "check if spaces needs scrubbing every " + "innodb_background_scrub_data_check_interval " + "seconds", + NULL, NULL, + srv_background_scrub_data_check_interval, + 1, + UINT_MAX32, 0); + +static MYSQL_SYSVAR_UINT(background_scrub_data_interval, + srv_background_scrub_data_interval, + 0, + "scrub spaces that were last scrubbed longer than " + " innodb_background_scrub_data_interval seconds ago", + NULL, NULL, + srv_background_scrub_data_interval, + 1, + UINT_MAX32, 0); + +#ifdef UNIV_DEBUG +static MYSQL_SYSVAR_BOOL(scrub_force_testing, + srv_scrub_force_testing, + 0, + "Perform extra scrubbing to increase test exposure", + NULL, NULL, FALSE); +#endif /* UNIV_DEBUG */ + static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(log_block_size), MYSQL_SYSVAR(additional_mem_pool_size), @@ -20325,12 +20587,30 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(locking_fake_changes), MYSQL_SYSVAR(use_stacktrace), MYSQL_SYSVAR(force_primary_key), + MYSQL_SYSVAR(fatal_semaphore_wait_threshold), + /* Table page compression feature */ MYSQL_SYSVAR(use_trim), MYSQL_SYSVAR(compression_algorithm), MYSQL_SYSVAR(mtflush_threads), MYSQL_SYSVAR(use_mtflush), + /* Encryption feature */ + MYSQL_SYSVAR(encrypt_tables), + MYSQL_SYSVAR(encryption_threads), + MYSQL_SYSVAR(encryption_rotate_key_age), + MYSQL_SYSVAR(encryption_rotation_iops), + MYSQL_SYSVAR(scrub_log), + MYSQL_SYSVAR(scrub_log_interval), + MYSQL_SYSVAR(encrypt_log), + /* Scrubing feature */ + MYSQL_SYSVAR(immediate_scrub_data_uncompressed), + MYSQL_SYSVAR(background_scrub_data_uncompressed), + MYSQL_SYSVAR(background_scrub_data_compressed), + MYSQL_SYSVAR(background_scrub_data_interval), + MYSQL_SYSVAR(background_scrub_data_check_interval), +#ifdef UNIV_DEBUG + MYSQL_SYSVAR(scrub_force_testing), +#endif - MYSQL_SYSVAR(fatal_semaphore_wait_threshold), NULL }; @@ -20340,7 +20620,7 @@ maria_declare_plugin(xtradb) &innobase_storage_engine, innobase_hton_name, plugin_author, - "Percona-XtraDB, Supports transactions, row-level locking, and foreign keys", + "Percona-XtraDB, Supports transactions, row-level locking, foreign keys and encryption for tables", PLUGIN_LICENSE_GPL, innobase_init, /* Plugin Init */ NULL, /* Plugin Deinit */ @@ -20381,7 +20661,9 @@ i_s_innodb_sys_foreign, i_s_innodb_sys_foreign_cols, i_s_innodb_sys_tablespaces, i_s_innodb_sys_datafiles, -i_s_innodb_changed_pages +i_s_innodb_changed_pages, +i_s_innodb_tablespaces_encryption, +i_s_innodb_tablespaces_scrubbing maria_declare_plugin_end; /** @brief Initialize the default value of innodb_commit_concurrency. diff --git a/storage/xtradb/handler/ha_innodb.h b/storage/xtradb/handler/ha_innodb.h index e649fc09b1d..0acf77da28f 100644 --- a/storage/xtradb/handler/ha_innodb.h +++ b/storage/xtradb/handler/ha_innodb.h @@ -58,7 +58,7 @@ typedef struct st_innobase_share { /** Prebuilt structures in an InnoDB table handle used within MySQL */ struct row_prebuilt_t; -/** Engine specific table options are definined using this struct */ +/** Engine specific table options are defined using this struct */ struct ha_table_option_struct { bool page_compressed; /*!< Table is using page compression @@ -71,6 +71,9 @@ struct ha_table_option_struct srv_use_atomic_writes=1. Atomic writes are not used if value OFF.*/ + bool page_encryption; /*!< Flag for an encrypted table */ + /* Following can't be unsigned as it's compared with ULINT_UNDEFINED */ + int page_encryption_key; /*!< ID of the encryption key */ }; /** The class defining a handle to an Innodb table */ diff --git a/storage/xtradb/handler/handler0alter.cc b/storage/xtradb/handler/handler0alter.cc index 250c1019e09..7e8e6e4598e 100644 --- a/storage/xtradb/handler/handler0alter.cc +++ b/storage/xtradb/handler/handler0alter.cc @@ -281,6 +281,13 @@ ha_innobase::check_if_supported_inplace_alter( ER_ALTER_OPERATION_NOT_SUPPORTED_REASON); DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); } + + if (new_options->page_encryption != old_options->page_encryption || + new_options->page_encryption_key != old_options->page_encryption_key) { + ha_alter_info->unsupported_reason = innobase_get_err_msg( + ER_ALTER_OPERATION_NOT_SUPPORTED_REASON); + DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); + } } if (ha_alter_info->handler_flags diff --git a/storage/xtradb/handler/i_s.cc b/storage/xtradb/handler/i_s.cc index 589117d0858..99b1f486862 100644 --- a/storage/xtradb/handler/i_s.cc +++ b/storage/xtradb/handler/i_s.cc @@ -4455,10 +4455,14 @@ i_s_innodb_stats_fill( info->pages_written_rate)); if (info->n_page_get_delta) { - OK(fields[IDX_BUF_STATS_HIT_RATE]->store( - static_cast<double>( - 1000 - (1000 * info->page_read_delta - / info->n_page_get_delta)))); + if (info->page_read_delta <= info->n_page_get_delta) { + OK(fields[IDX_BUF_STATS_HIT_RATE]->store( + static_cast<double>( + 1000 - (1000 * info->page_read_delta + / info->n_page_get_delta)))); + } else { + OK(fields[IDX_BUF_STATS_HIT_RATE]->store(0)); + } OK(fields[IDX_BUF_STATS_MADE_YOUNG_PCT]->store( static_cast<double>( @@ -8410,3 +8414,581 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_changed_pages = STRUCT_FLD(system_vars, NULL), INNODB_VERSION_STR, MariaDB_PLUGIN_MATURITY_STABLE }; + +/** TABLESPACES_ENCRYPTION ********************************************/ +/* Fields of the table INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION */ +static ST_FIELD_INFO innodb_tablespaces_encryption_fields_info[] = +{ +#define TABLESPACES_ENCRYPTION_SPACE 0 + {STRUCT_FLD(field_name, "SPACE"), + STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define TABLESPACES_ENCRYPTION_NAME 1 + {STRUCT_FLD(field_name, "NAME"), + STRUCT_FLD(field_length, MAX_FULL_NAME_LEN + 1), + STRUCT_FLD(field_type, MYSQL_TYPE_STRING), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define TABLESPACES_ENCRYPTION_ENCRYPTION_SCHEME 2 + {STRUCT_FLD(field_name, "ENCRYPTION_SCHEME"), + STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define TABLESPACES_ENCRYPTION_KEYSERVER_REQUESTS 3 + {STRUCT_FLD(field_name, "KEYSERVER_REQUESTS"), + STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define TABLESPACES_ENCRYPTION_MIN_KEY_VERSION 4 + {STRUCT_FLD(field_name, "MIN_KEY_VERSION"), + STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define TABLESPACES_ENCRYPTION_CURRENT_KEY_VERSION 5 + {STRUCT_FLD(field_name, "CURRENT_KEY_VERSION"), + STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define TABLESPACES_ENCRYPTION_KEY_ROTATION_PAGE_NUMBER 6 + {STRUCT_FLD(field_name, "KEY_ROTATION_PAGE_NUMBER"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED | MY_I_S_MAYBE_NULL), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define TABLESPACES_ENCRYPTION_KEY_ROTATION_MAX_PAGE_NUMBER 7 + {STRUCT_FLD(field_name, "KEY_ROTATION_MAX_PAGE_NUMBER"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED | MY_I_S_MAYBE_NULL), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + END_OF_ST_FIELD_INFO +}; + +/**********************************************************************//** +Function to fill INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION +with information collected by scanning SYS_TABLESPACES table and then use +fil_space() +@return 0 on success */ +static +int +i_s_dict_fill_tablespaces_encryption( +/*==========================*/ + THD* thd, /*!< in: thread */ + ulint space, /*!< in: space ID */ + const char* name, /*!< in: tablespace name */ + TABLE* table_to_fill) /*!< in/out: fill this table */ +{ + Field** fields; + struct fil_space_crypt_status_t status; + + DBUG_ENTER("i_s_dict_fill_tablespaces_encryption"); + + fields = table_to_fill->field; + + fil_space_crypt_get_status(space, &status); + OK(fields[TABLESPACES_ENCRYPTION_SPACE]->store(space)); + + OK(field_store_string(fields[TABLESPACES_ENCRYPTION_NAME], + name)); + + OK(fields[TABLESPACES_ENCRYPTION_ENCRYPTION_SCHEME]->store( + status.scheme)); + OK(fields[TABLESPACES_ENCRYPTION_KEYSERVER_REQUESTS]->store( + status.keyserver_requests)); + OK(fields[TABLESPACES_ENCRYPTION_MIN_KEY_VERSION]->store( + status.min_key_version)); + OK(fields[TABLESPACES_ENCRYPTION_CURRENT_KEY_VERSION]->store( + status.current_key_version)); + if (status.rotating) { + fields[TABLESPACES_ENCRYPTION_KEY_ROTATION_PAGE_NUMBER]->set_notnull(); + OK(fields[TABLESPACES_ENCRYPTION_KEY_ROTATION_PAGE_NUMBER]->store( + status.rotate_next_page_number)); + fields[TABLESPACES_ENCRYPTION_KEY_ROTATION_MAX_PAGE_NUMBER]->set_notnull(); + OK(fields[TABLESPACES_ENCRYPTION_KEY_ROTATION_MAX_PAGE_NUMBER]->store( + status.rotate_max_page_number)); + } else { + fields[TABLESPACES_ENCRYPTION_KEY_ROTATION_PAGE_NUMBER] + ->set_null(); + fields[TABLESPACES_ENCRYPTION_KEY_ROTATION_MAX_PAGE_NUMBER] + ->set_null(); + } + OK(schema_table_store_record(thd, table_to_fill)); + + DBUG_RETURN(0); +} +/*******************************************************************//** +Function to populate INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION table. +Loop through each record in TABLESPACES_ENCRYPTION, and extract the column +information and fill the INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION table. +@return 0 on success */ +static +int +i_s_tablespaces_encryption_fill_table( +/*===========================*/ + THD* thd, /*!< in: thread */ + TABLE_LIST* tables, /*!< in/out: tables to fill */ + Item* ) /*!< in: condition (not used) */ +{ + btr_pcur_t pcur; + const rec_t* rec; + mem_heap_t* heap; + mtr_t mtr; + bool found_space_0 = false; + + DBUG_ENTER("i_s_tablespaces_encryption_fill_table"); + + /* deny access to user without PROCESS_ACL privilege */ + if (check_global_access(thd, SUPER_ACL)) { + DBUG_RETURN(0); + } + + heap = mem_heap_create(1000); + mutex_enter(&dict_sys->mutex); + mtr_start(&mtr); + + rec = dict_startscan_system(&pcur, &mtr, SYS_TABLESPACES); + + while (rec) { + const char* err_msg; + ulint space; + const char* name; + ulint flags; + + /* Extract necessary information from a SYS_TABLESPACES row */ + err_msg = dict_process_sys_tablespaces( + heap, rec, &space, &name, &flags); + + mtr_commit(&mtr); + mutex_exit(&dict_sys->mutex); + + if (space == 0) { + found_space_0 = true; + } + + if (!err_msg) { + i_s_dict_fill_tablespaces_encryption( + thd, space, name, tables->table); + } else { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_CANT_FIND_SYSTEM_REC, "%s", + err_msg); + } + + mem_heap_empty(heap); + + /* Get the next record */ + mutex_enter(&dict_sys->mutex); + mtr_start(&mtr); + rec = dict_getnext_system(&pcur, &mtr); + } + + mtr_commit(&mtr); + mutex_exit(&dict_sys->mutex); + mem_heap_free(heap); + + if (found_space_0 == false) { + /* space 0 does for what ever unknown reason not show up + * in iteration above, add it manually */ + ulint space = 0; + const char* name = NULL; + i_s_dict_fill_tablespaces_encryption( + thd, space, name, tables->table); + } + + DBUG_RETURN(0); +} +/*******************************************************************//** +Bind the dynamic table INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION +@return 0 on success */ +static +int +innodb_tablespaces_encryption_init( +/*========================*/ + void* p) /*!< in/out: table schema object */ +{ + ST_SCHEMA_TABLE* schema; + + DBUG_ENTER("innodb_tablespaces_encryption_init"); + + schema = (ST_SCHEMA_TABLE*) p; + + schema->fields_info = innodb_tablespaces_encryption_fields_info; + schema->fill_table = i_s_tablespaces_encryption_fill_table; + + DBUG_RETURN(0); +} + +UNIV_INTERN struct st_maria_plugin i_s_innodb_tablespaces_encryption = +{ + /* the plugin type (a MYSQL_XXX_PLUGIN value) */ + /* int */ + STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), + + /* pointer to type-specific plugin descriptor */ + /* void* */ + STRUCT_FLD(info, &i_s_info), + + /* plugin name */ + /* const char* */ + STRUCT_FLD(name, "INNODB_TABLESPACES_ENCRYPTION"), + + /* plugin author (for SHOW PLUGINS) */ + /* const char* */ + STRUCT_FLD(author, "Google Inc"), + + /* general descriptive text (for SHOW PLUGINS) */ + /* const char* */ + STRUCT_FLD(descr, "InnoDB TABLESPACES_ENCRYPTION"), + + /* the plugin license (PLUGIN_LICENSE_XXX) */ + /* int */ + STRUCT_FLD(license, PLUGIN_LICENSE_BSD), + + /* the function to invoke when plugin is loaded */ + /* int (*)(void*); */ + STRUCT_FLD(init, innodb_tablespaces_encryption_init), + + /* the function to invoke when plugin is unloaded */ + /* int (*)(void*); */ + STRUCT_FLD(deinit, i_s_common_deinit), + + /* plugin version (for SHOW PLUGINS) */ + /* unsigned int */ + STRUCT_FLD(version, INNODB_VERSION_SHORT), + + /* struct st_mysql_show_var* */ + STRUCT_FLD(status_vars, NULL), + + /* struct st_mysql_sys_var** */ + STRUCT_FLD(system_vars, NULL), + + INNODB_VERSION_STR, MariaDB_PLUGIN_MATURITY_STABLE +}; + +/** TABLESPACES_SCRUBBING ********************************************/ +/* Fields of the table INFORMATION_SCHEMA.INNODB_TABLESPACES_SCRUBBING */ +static ST_FIELD_INFO innodb_tablespaces_scrubbing_fields_info[] = +{ +#define TABLESPACES_SCRUBBING_SPACE 0 + {STRUCT_FLD(field_name, "SPACE"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define TABLESPACES_SCRUBBING_NAME 1 + {STRUCT_FLD(field_name, "NAME"), + STRUCT_FLD(field_length, MAX_FULL_NAME_LEN + 1), + STRUCT_FLD(field_type, MYSQL_TYPE_STRING), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define TABLESPACES_SCRUBBING_COMPRESSED 2 + {STRUCT_FLD(field_name, "COMPRESSED"), + STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define TABLESPACES_SCRUBBING_LAST_SCRUB_COMPLETED 3 + {STRUCT_FLD(field_name, "LAST_SCRUB_COMPLETED"), + STRUCT_FLD(field_length, 0), + STRUCT_FLD(field_type, MYSQL_TYPE_DATETIME), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define TABLESPACES_SCRUBBING_CURRENT_SCRUB_STARTED 4 + {STRUCT_FLD(field_name, "CURRENT_SCRUB_STARTED"), + STRUCT_FLD(field_length, 0), + STRUCT_FLD(field_type, MYSQL_TYPE_DATETIME), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define TABLESPACES_SCRUBBING_CURRENT_SCRUB_ACTIVE_THREADS 5 + {STRUCT_FLD(field_name, "CURRENT_SCRUB_ACTIVE_THREADS"), + STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED | MY_I_S_MAYBE_NULL), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define TABLESPACES_SCRUBBING_CURRENT_SCRUB_PAGE_NUMBER 6 + {STRUCT_FLD(field_name, "CURRENT_SCRUB_PAGE_NUMBER"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + +#define TABLESPACES_SCRUBBING_CURRENT_SCRUB_MAX_PAGE_NUMBER 7 + {STRUCT_FLD(field_name, "CURRENT_SCRUB_MAX_PAGE_NUMBER"), + STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), + STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), + STRUCT_FLD(value, 0), + STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), + STRUCT_FLD(old_name, ""), + STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, + + END_OF_ST_FIELD_INFO +}; + +/**********************************************************************//** +Function to fill INFORMATION_SCHEMA.INNODB_TABLESPACES_SCRUBBING +with information collected by scanning SYS_TABLESPACES table and then use +fil_space() +@return 0 on success */ +static +int +i_s_dict_fill_tablespaces_scrubbing( +/*==========================*/ + THD* thd, /*!< in: thread */ + ulint space, /*!< in: space ID */ + const char* name, /*!< in: tablespace name */ + TABLE* table_to_fill) /*!< in/out: fill this table */ +{ + Field** fields; + struct fil_space_scrub_status_t status; + + DBUG_ENTER("i_s_dict_fill_tablespaces_scrubbing"); + + fields = table_to_fill->field; + + fil_space_get_scrub_status(space, &status); + OK(fields[TABLESPACES_SCRUBBING_SPACE]->store(space)); + + OK(field_store_string(fields[TABLESPACES_SCRUBBING_NAME], + name)); + + OK(fields[TABLESPACES_SCRUBBING_COMPRESSED]->store( + status.compressed ? 1 : 0)); + + if (status.last_scrub_completed == 0) { + fields[TABLESPACES_SCRUBBING_LAST_SCRUB_COMPLETED]->set_null(); + } else { + fields[TABLESPACES_SCRUBBING_LAST_SCRUB_COMPLETED] + ->set_notnull(); + OK(field_store_time_t( + fields[TABLESPACES_SCRUBBING_LAST_SCRUB_COMPLETED], + status.last_scrub_completed)); + } + + int field_numbers[] = { + TABLESPACES_SCRUBBING_CURRENT_SCRUB_STARTED, + TABLESPACES_SCRUBBING_CURRENT_SCRUB_ACTIVE_THREADS, + TABLESPACES_SCRUBBING_CURRENT_SCRUB_PAGE_NUMBER, + TABLESPACES_SCRUBBING_CURRENT_SCRUB_MAX_PAGE_NUMBER }; + if (status.scrubbing) { + for (uint i = 0; i < array_elements(field_numbers); i++) { + fields[field_numbers[i]]->set_notnull(); + } + + OK(field_store_time_t( + fields[TABLESPACES_SCRUBBING_CURRENT_SCRUB_STARTED], + status.current_scrub_started)); + OK(fields[TABLESPACES_SCRUBBING_CURRENT_SCRUB_ACTIVE_THREADS] + ->store(status.current_scrub_active_threads)); + OK(fields[TABLESPACES_SCRUBBING_CURRENT_SCRUB_PAGE_NUMBER] + ->store(status.current_scrub_page_number)); + OK(fields[TABLESPACES_SCRUBBING_CURRENT_SCRUB_MAX_PAGE_NUMBER] + ->store(status.current_scrub_max_page_number)); + } else { + for (uint i = 0; i < array_elements(field_numbers); i++) { + fields[field_numbers[i]]->set_null(); + } + } + OK(schema_table_store_record(thd, table_to_fill)); + + DBUG_RETURN(0); +} +/*******************************************************************//** +Function to populate INFORMATION_SCHEMA.INNODB_TABLESPACES_SCRUBBING table. +Loop through each record in TABLESPACES_SCRUBBING, and extract the column +information and fill the INFORMATION_SCHEMA.INNODB_TABLESPACES_SCRUBBING table. +@return 0 on success */ +static +int +i_s_tablespaces_scrubbing_fill_table( +/*===========================*/ + THD* thd, /*!< in: thread */ + TABLE_LIST* tables, /*!< in/out: tables to fill */ + Item* ) /*!< in: condition (not used) */ +{ + btr_pcur_t pcur; + const rec_t* rec; + mem_heap_t* heap; + mtr_t mtr; + bool found_space_0 = false; + + DBUG_ENTER("i_s_tablespaces_scrubbing_fill_table"); + + /* deny access to user without SUPER_ACL privilege */ + if (check_global_access(thd, SUPER_ACL)) { + DBUG_RETURN(0); + } + + heap = mem_heap_create(1000); + mutex_enter(&dict_sys->mutex); + mtr_start(&mtr); + + rec = dict_startscan_system(&pcur, &mtr, SYS_TABLESPACES); + + while (rec) { + const char* err_msg; + ulint space; + const char* name; + ulint flags; + + /* Extract necessary information from a SYS_TABLESPACES row */ + err_msg = dict_process_sys_tablespaces( + heap, rec, &space, &name, &flags); + + mtr_commit(&mtr); + mutex_exit(&dict_sys->mutex); + + if (space == 0) { + found_space_0 = true; + } + + if (!err_msg) { + i_s_dict_fill_tablespaces_scrubbing( + thd, space, name, tables->table); + } else { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_CANT_FIND_SYSTEM_REC, "%s", + err_msg); + } + + mem_heap_empty(heap); + + /* Get the next record */ + mutex_enter(&dict_sys->mutex); + mtr_start(&mtr); + rec = dict_getnext_system(&pcur, &mtr); + } + + mtr_commit(&mtr); + mutex_exit(&dict_sys->mutex); + mem_heap_free(heap); + + if (found_space_0 == false) { + /* space 0 does for what ever unknown reason not show up + * in iteration above, add it manually */ + ulint space = 0; + const char* name = NULL; + i_s_dict_fill_tablespaces_scrubbing( + thd, space, name, tables->table); + } + + DBUG_RETURN(0); +} +/*******************************************************************//** +Bind the dynamic table INFORMATION_SCHEMA.INNODB_TABLESPACES_SCRUBBING +@return 0 on success */ +static +int +innodb_tablespaces_scrubbing_init( +/*========================*/ + void* p) /*!< in/out: table schema object */ +{ + ST_SCHEMA_TABLE* schema; + + DBUG_ENTER("innodb_tablespaces_scrubbing_init"); + + schema = (ST_SCHEMA_TABLE*) p; + + schema->fields_info = innodb_tablespaces_scrubbing_fields_info; + schema->fill_table = i_s_tablespaces_scrubbing_fill_table; + + DBUG_RETURN(0); +} + +UNIV_INTERN struct st_maria_plugin i_s_innodb_tablespaces_scrubbing = +{ + /* the plugin type (a MYSQL_XXX_PLUGIN value) */ + /* int */ + STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), + + /* pointer to type-specific plugin descriptor */ + /* void* */ + STRUCT_FLD(info, &i_s_info), + + /* plugin name */ + /* const char* */ + STRUCT_FLD(name, "INNODB_TABLESPACES_SCRUBBING"), + + /* plugin author (for SHOW PLUGINS) */ + /* const char* */ + STRUCT_FLD(author, "Google Inc"), + + /* general descriptive text (for SHOW PLUGINS) */ + /* const char* */ + STRUCT_FLD(descr, "InnoDB TABLESPACES_SCRUBBING"), + + /* the plugin license (PLUGIN_LICENSE_XXX) */ + /* int */ + STRUCT_FLD(license, PLUGIN_LICENSE_BSD), + + /* the function to invoke when plugin is loaded */ + /* int (*)(void*); */ + STRUCT_FLD(init, innodb_tablespaces_scrubbing_init), + + /* the function to invoke when plugin is unloaded */ + /* int (*)(void*); */ + STRUCT_FLD(deinit, i_s_common_deinit), + + /* plugin version (for SHOW PLUGINS) */ + /* unsigned int */ + STRUCT_FLD(version, INNODB_VERSION_SHORT), + + /* struct st_mysql_show_var* */ + STRUCT_FLD(status_vars, NULL), + + /* struct st_mysql_sys_var** */ + STRUCT_FLD(system_vars, NULL), + + /* Maria extension */ + STRUCT_FLD(version_info, INNODB_VERSION_STR), + STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE) +}; diff --git a/storage/xtradb/handler/i_s.h b/storage/xtradb/handler/i_s.h index 8d33e9bce7d..f141af40c87 100644 --- a/storage/xtradb/handler/i_s.h +++ b/storage/xtradb/handler/i_s.h @@ -60,5 +60,7 @@ extern struct st_mysql_plugin i_s_innodb_sys_foreign_cols; extern struct st_mysql_plugin i_s_innodb_sys_tablespaces; extern struct st_mysql_plugin i_s_innodb_sys_datafiles; extern struct st_mysql_plugin i_s_innodb_changed_pages; +extern struct st_maria_plugin i_s_innodb_tablespaces_encryption; +extern struct st_maria_plugin i_s_innodb_tablespaces_scrubbing; #endif /* i_s_h */ diff --git a/storage/xtradb/include/btr0btr.h b/storage/xtradb/include/btr0btr.h index 001e1af7d2d..2d2595ef2ab 100644 --- a/storage/xtradb/include/btr0btr.h +++ b/storage/xtradb/include/btr0btr.h @@ -734,6 +734,7 @@ btr_page_free_low( dict_index_t* index, /*!< in: index tree */ buf_block_t* block, /*!< in: block to be freed, x-latched */ ulint level, /*!< in: page level */ + bool blob, /*!< in: blob page */ mtr_t* mtr) /*!< in: mtr */ __attribute__((nonnull)); /*************************************************************//** @@ -870,4 +871,8 @@ btr_lift_page_up( #include "btr0btr.ic" #endif +/**************************************************************** +Global variable controlling if scrubbing should be performed */ +extern my_bool srv_immediate_scrub_data_uncompressed; + #endif diff --git a/storage/xtradb/include/btr0scrub.h b/storage/xtradb/include/btr0scrub.h new file mode 100644 index 00000000000..608266c206d --- /dev/null +++ b/storage/xtradb/include/btr0scrub.h @@ -0,0 +1,166 @@ +// Copyright 2014 Google + +#ifndef btr0scrub_h +#define btr0scrub_h + +#include "univ.i" + +#include "dict0dict.h" +#include "data0data.h" +#include "page0cur.h" +#include "mtr0mtr.h" +#include "btr0types.h" + +/** + * enum describing page allocation status + */ +enum btr_scrub_page_allocation_status_t { + BTR_SCRUB_PAGE_FREE, + BTR_SCRUB_PAGE_ALLOCATED, + BTR_SCRUB_PAGE_ALLOCATION_UNKNOWN +}; + +/** +* constants returned by btr_page_needs_scrubbing & btr_scrub_recheck_page +*/ +#define BTR_SCRUB_PAGE 1 /* page should be scrubbed */ +#define BTR_SCRUB_SKIP_PAGE 2 /* no scrub & no action */ +#define BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE 3 /* no scrub & close table */ +#define BTR_SCRUB_SKIP_PAGE_AND_COMPLETE_SPACE 4 /* no scrub & complete space */ +#define BTR_SCRUB_TURNED_OFF 5 /* we detected that scrubbing + was disabled by global + variable */ + +/**************************************************************//** +struct for keeping scrub statistics. */ +struct btr_scrub_stat_t { + /* page reorganizations */ + ulint page_reorganizations; + /* page splits */ + ulint page_splits; + /* scrub failures */ + ulint page_split_failures_underflow; + ulint page_split_failures_out_of_filespace; + ulint page_split_failures_missing_index; + ulint page_split_failures_unknown; +}; + +/**************************************************************//** +struct for thread local scrub state. */ +struct btr_scrub_t { + + /* current space */ + ulint space; + + /* is scrubbing enabled for this space */ + bool scrubbing; + + /* is current space compressed */ + bool compressed; + + dict_table_t* current_table; + dict_index_t* current_index; + /* savepoint for X_LATCH of block */ + ulint savepoint; + + /* statistic counters */ + btr_scrub_stat_t scrub_stat; +}; + +/********************************************************************* +Init scrub global variables */ +UNIV_INTERN +void +btr_scrub_init(); + +/********************************************************************* +Cleanup scrub globals */ +UNIV_INTERN +void +btr_scrub_cleanup(); + +/*********************************************************************** +Return crypt statistics */ +UNIV_INTERN +void +btr_scrub_total_stat( +/*==================*/ + btr_scrub_stat_t *stat); /*!< out: stats to update */ + +/**************************************************************//** +Check if a page needs scrubbing +* @return BTR_SCRUB_PAGE if page should be scrubbed +* else btr_scrub_skip_page should be called +* with this return value (and without any latches held) +*/ +UNIV_INTERN +int +btr_page_needs_scrubbing( +/*=====================*/ + btr_scrub_t* scrub_data, /*!< in: scrub data */ + buf_block_t* block, /*!< in: block to check, latched */ + btr_scrub_page_allocation_status_t allocated); /*!< in: is block + allocated, free or + unknown */ + +/**************************************************************** +Recheck if a page needs scrubbing, and if it does load appropriate +table and index +* @return BTR_SCRUB_PAGE if page should be scrubbed +* else btr_scrub_skip_page should be called +* with this return value (and without any latches held) +*/ +UNIV_INTERN +int +btr_scrub_recheck_page( +/*====================*/ + btr_scrub_t* scrub_data, /*!< inut: scrub data */ + buf_block_t* block, /*!< in: block */ + btr_scrub_page_allocation_status_t allocated, /*!< in: is block + allocated or free */ + mtr_t* mtr); /*!< in: mtr */ + +/**************************************************************** +Perform actual scrubbing of page */ +UNIV_INTERN +int +btr_scrub_page( +/*============*/ + btr_scrub_t* scrub_data, /*!< in/out: scrub data */ + buf_block_t* block, /*!< in: block */ + btr_scrub_page_allocation_status_t allocated, /*!< in: is block + allocated or free */ + mtr_t* mtr); /*!< in: mtr */ + +/**************************************************************** +Perform cleanup needed for a page not needing scrubbing */ +UNIV_INTERN +void +btr_scrub_skip_page( +/*============*/ + btr_scrub_t* scrub_data, /*!< in/out: scrub data */ + int needs_scrubbing); /*!< in: return value from + btr_page_needs_scrubbing or + btr_scrub_recheck_page which encodes what kind + of cleanup is needed */ + +/**************************************************************** +Start iterating a space +* @return true if scrubbing is turned on */ +UNIV_INTERN +bool +btr_scrub_start_space( +/*===================*/ + ulint space, /*!< in: space */ + btr_scrub_t* scrub_data); /*!< in/out: scrub data */ + +/**************************************************************** +Complete iterating a space +* @return true if space was scrubbed */ +UNIV_INTERN +bool +btr_scrub_complete_space( +/*=====================*/ + btr_scrub_t* scrub_data); /*!< in/out: scrub data */ + +#endif diff --git a/storage/xtradb/include/buf0buf.h b/storage/xtradb/include/buf0buf.h index 15f36e4343c..10b296f6cd4 100644 --- a/storage/xtradb/include/buf0buf.h +++ b/storage/xtradb/include/buf0buf.h @@ -376,11 +376,13 @@ Given a tablespace id and page number tries to get that page. If the page is not in the buffer pool it is not loaded and NULL is returned. Suitable for using when holding the lock_sys_t::mutex. */ UNIV_INTERN -const buf_block_t* +buf_block_t* buf_page_try_get_func( /*==================*/ ulint space_id,/*!< in: tablespace id */ ulint page_no,/*!< in: page number */ + ulint rw_latch, /*!< in: RW_S_LATCH, RW_X_LATCH */ + bool possibly_freed, /*!< in: don't mind if page is freed */ const char* file, /*!< in: file name */ ulint line, /*!< in: line where called */ mtr_t* mtr); /*!< in: mini-transaction */ @@ -392,7 +394,8 @@ not loaded. Suitable for using when holding the lock_sys_t::mutex. @param mtr in: mini-transaction @return the page if in buffer pool, NULL if not */ #define buf_page_try_get(space_id, page_no, mtr) \ - buf_page_try_get_func(space_id, page_no, __FILE__, __LINE__, mtr); + buf_page_try_get_func(space_id, page_no, RW_S_LATCH, false, \ + __FILE__, __LINE__, mtr); /********************************************************************//** Get read access to a compressed page (usually of type @@ -1462,6 +1465,53 @@ buf_own_zip_mutex_for_page( __attribute__((nonnull,warn_unused_result)); #endif /* UNIV_DEBUG */ +/********************************************************************//** +The hook that is called just before a page is written to disk. +The function encrypts the content of the page and returns a pointer +to a frame that will be written instead of the real frame. */ +byte* +buf_page_encrypt_before_write( +/*==========================*/ + buf_page_t* page, /*!< in/out: buffer page to be flushed */ + const byte* frame); + +/********************************************************************** +The hook that is called after page is written to disk. +The function releases any resources needed for encryption that was allocated +in buf_page_encrypt_before_write */ +ibool +buf_page_encrypt_after_write( +/*=========================*/ + buf_page_t* page); /*!< in/out: buffer page that was flushed */ + +/********************************************************************//** +The hook that is called just before a page is read from disk. +The function allocates memory that is used to temporarily store disk content +before getting decrypted */ +byte* +buf_page_decrypt_before_read( +/*=========================*/ + buf_page_t* page, /*!< in/out: buffer page read from disk */ + ulint zip_size); /*!< in: compressed page size, or 0 */ + +/********************************************************************//** +The hook that is called just after a page is read from disk. +The function decrypt disk content into buf_page_t and releases the +temporary buffer that was allocated in buf_page_decrypt_before_read */ +ibool +buf_page_decrypt_after_read( +/*========================*/ + buf_page_t* page); /*!< in/out: buffer page read from disk */ + +/********************************************************************//** +Release memory allocated for page decryption. +Only used in scenarios where read fails, e.g due to tablespace being dropped */ +void +buf_page_decrypt_cleanup( +/*=====================*/ + buf_page_t* page); /*!< in/out: buffer page read from disk */ + + /** The common buffer control block structure for compressed and uncompressed frames */ @@ -1536,6 +1586,14 @@ struct buf_page_t{ if written again we check is TRIM operation needed. */ + unsigned key_version; /*!< key version for this block */ + byte* crypt_buf; /*!< for encryption the data needs to be + copied to a separate buffer before it's + encrypted&written. this as a page can be + read while it's being flushed */ + byte* crypt_buf_free; /*!< for encryption, allocated buffer + that is then alligned */ + #ifndef UNIV_HOTBACKUP buf_page_t* hash; /*!< node used in chaining to buf_pool->page_hash or diff --git a/storage/xtradb/include/buf0buf.ic b/storage/xtradb/include/buf0buf.ic index 10f0e02cb8f..8917c8f2fb5 100644 --- a/storage/xtradb/include/buf0buf.ic +++ b/storage/xtradb/include/buf0buf.ic @@ -917,6 +917,35 @@ buf_block_free( mutex_exit(&block->mutex); } + +/********************************************************************//** +Get crypt buffer. */ +UNIV_INLINE +byte* +buf_page_get_crypt_buffer( +/*=========================*/ + const buf_page_t* bpage) /*!< in: buffer pool page */ +{ + return bpage->crypt_buf; +} + +/********************************************************************//** +Get buf frame. */ +UNIV_INLINE +void * +buf_page_get_frame( +/*=========================*/ + const buf_page_t* bpage) /*!< in: buffer pool page */ +{ + if (bpage->crypt_buf) { + return buf_page_get_crypt_buffer(bpage); + } else if (bpage->zip.data) { + return bpage->zip.data; + } else { + return ((buf_block_t*) bpage)->frame; + } +} + #endif /* !UNIV_HOTBACKUP */ /*********************************************************************//** diff --git a/storage/xtradb/include/dict0dict.h b/storage/xtradb/include/dict0dict.h index 585c5566f32..ef8c9878297 100644 --- a/storage/xtradb/include/dict0dict.h +++ b/storage/xtradb/include/dict0dict.h @@ -139,6 +139,17 @@ dict_table_open_on_id( ibool dict_locked, /*!< in: TRUE=data dictionary locked */ dict_table_op_t table_op) /*!< in: operation to perform */ __attribute__((warn_unused_result)); + +/**********************************************************************//** +Returns a table object based on table id. +@return table, NULL if does not exist */ +UNIV_INTERN +dict_table_t* +dict_table_open_on_index_id( +/*==================*/ + table_id_t table_id, /*!< in: table id */ + bool dict_locked) /*!< in: TRUE=data dictionary locked */ + __attribute__((warn_unused_result)); /********************************************************************//** Decrements the count of open handles to a table. */ UNIV_INTERN @@ -918,8 +929,10 @@ dict_tf_set( pages */ ulint page_compression_level, /*!< in: table page compression level */ - ulint atomic_writes) /*!< in: table atomic + ulint atomic_writes, /*!< in: table atomic writes option value*/ + bool page_encrypted,/*!< in: table uses page encryption */ + ulint page_encryption_key) /*!< in: page encryption key */ __attribute__((nonnull)); /********************************************************************//** Convert a 32 bit integer table flags to the 32 bit integer that is @@ -1445,8 +1458,12 @@ dict_index_calc_min_rec_len( Reserves the dictionary system mutex for MySQL. */ UNIV_INTERN void -dict_mutex_enter_for_mysql(void); +dict_mutex_enter_for_mysql_func(const char * file, ulint line); /*============================*/ + +#define dict_mutex_enter_for_mysql() \ + dict_mutex_enter_for_mysql_func(__FILE__, __LINE__) + /********************************************************************//** Releases the dictionary system mutex for MySQL. */ UNIV_INTERN diff --git a/storage/xtradb/include/dict0dict.ic b/storage/xtradb/include/dict0dict.ic index 676d09b7268..c480b3c6216 100644 --- a/storage/xtradb/include/dict0dict.ic +++ b/storage/xtradb/include/dict0dict.ic @@ -543,6 +543,9 @@ dict_tf_is_valid( ulint data_dir = DICT_TF_HAS_DATA_DIR(flags); ulint atomic_writes = DICT_TF_GET_ATOMIC_WRITES(flags); + ulint page_encryption = DICT_TF_GET_PAGE_ENCRYPTION(flags); + ulint page_encryption_key = DICT_TF_GET_PAGE_ENCRYPTION_KEY(flags); + /* Make sure there are no bits that we do not know about. */ if (unused != 0) { @@ -553,10 +556,12 @@ dict_tf_is_valid( "InnoDB: compact %ld atomic_blobs %ld\n" "InnoDB: unused %ld data_dir %ld zip_ssize %ld\n" "InnoDB: page_compression %ld page_compression_level %ld\n" - "InnoDB: atomic_writes %ld\n", + "InnoDB: atomic_writes %ld\n" + "InnoDB: page_encryption %ld page_encryption_key %ld\n", unused, compact, atomic_blobs, unused, data_dir, zip_ssize, - page_compression, page_compression_level, atomic_writes + page_compression, page_compression_level, atomic_writes, + page_encryption, page_encryption_key ); return(false); @@ -856,7 +861,9 @@ dict_tf_set( pages */ ulint page_compression_level, /*!< in: table page compression level */ - ulint atomic_writes) /*!< in: table atomic writes setup */ + ulint atomic_writes, /*!< in: table atomic writes setup */ + bool page_encrypted, /*!< in: table uses page encryption */ + ulint page_encryption_key /*!< in: page encryption key */) { atomic_writes_t awrites = (atomic_writes_t)atomic_writes; @@ -897,6 +904,11 @@ dict_tf_set( *flags |= (atomic_writes << DICT_TF_POS_ATOMIC_WRITES); ut_a(dict_tf_get_atomic_writes(*flags) == awrites); + + if (page_encrypted) { + *flags |= (1 << DICT_TF_POS_PAGE_ENCRYPTION) + | (page_encryption_key << DICT_TF_POS_PAGE_ENCRYPTION_KEY); + } } /********************************************************************//** @@ -919,6 +931,11 @@ dict_tf_to_fsp_flags( ulint fsp_flags; ulint page_compression = DICT_TF_GET_PAGE_COMPRESSION(table_flags); ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(table_flags); + + ulint page_encryption = DICT_TF_GET_PAGE_ENCRYPTION(table_flags); + /* Keys are limited to 255 values */ + ulint page_encryption_key = DICT_TF_GET_PAGE_ENCRYPTION_KEY(table_flags); + ulint atomic_writes = DICT_TF_GET_ATOMIC_WRITES(table_flags); DBUG_EXECUTE_IF("dict_tf_to_fsp_flags_failure", @@ -946,6 +963,14 @@ dict_tf_to_fsp_flags( if page compression is used for this table. */ fsp_flags |= FSP_FLAGS_SET_PAGE_COMPRESSION_LEVEL(fsp_flags, page_compression_level); + /* In addition, tablespace flags also contain if the page + encryption is used for this table. */ + fsp_flags |= FSP_FLAGS_SET_PAGE_ENCRYPTION(fsp_flags, page_encryption); + + /* In addition, tablespace flags also contain page encryption key if the page + encryption is used for this table. */ + fsp_flags |= FSP_FLAGS_SET_PAGE_ENCRYPTION_KEY(fsp_flags, page_encryption_key); + /* In addition, tablespace flags also contain flag if atomic writes is used for this table */ fsp_flags |= FSP_FLAGS_SET_ATOMIC_WRITES(fsp_flags, atomic_writes); @@ -987,6 +1012,9 @@ dict_sys_tables_type_to_tf( | DICT_TF_MASK_PAGE_COMPRESSION | DICT_TF_MASK_PAGE_COMPRESSION_LEVEL | DICT_TF_MASK_ATOMIC_WRITES + | DICT_TF_MASK_PAGE_ENCRYPTION + | DICT_TF_MASK_PAGE_ENCRYPTION_KEY + ); return(flags); @@ -1022,7 +1050,9 @@ dict_tf_to_sys_tables_type( | DICT_TF_MASK_DATA_DIR | DICT_TF_MASK_PAGE_COMPRESSION | DICT_TF_MASK_PAGE_COMPRESSION_LEVEL - | DICT_TF_MASK_ATOMIC_WRITES); + | DICT_TF_MASK_ATOMIC_WRITES + | DICT_TF_MASK_PAGE_ENCRYPTION + | DICT_TF_MASK_PAGE_ENCRYPTION_KEY); return(type); } diff --git a/storage/xtradb/include/dict0mem.h b/storage/xtradb/include/dict0mem.h index a5c58aaab8c..b691e28f0b5 100644 --- a/storage/xtradb/include/dict0mem.h +++ b/storage/xtradb/include/dict0mem.h @@ -137,6 +137,12 @@ Width of the page compression flag #define DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL 4 /** +Width of the page encryption flag +*/ +#define DICT_TF_WIDTH_PAGE_ENCRYPTION 1 +#define DICT_TF_WIDTH_PAGE_ENCRYPTION_KEY 8 + +/** Width of atomic writes flag DEFAULT=0, ON = 1, OFF = 2 */ @@ -149,7 +155,9 @@ DEFAULT=0, ON = 1, OFF = 2 + DICT_TF_WIDTH_DATA_DIR \ + DICT_TF_WIDTH_PAGE_COMPRESSION \ + DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL \ - + DICT_TF_WIDTH_ATOMIC_WRITES) + + DICT_TF_WIDTH_ATOMIC_WRITES \ + + DICT_TF_WIDTH_PAGE_ENCRYPTION \ + + DICT_TF_WIDTH_PAGE_ENCRYPTION_KEY) /** A mask of all the known/used bits in table flags */ #define DICT_TF_BIT_MASK (~(~0 << DICT_TF_BITS)) @@ -174,9 +182,16 @@ DEFAULT=0, ON = 1, OFF = 2 /** Zero relative shift position of the ATOMIC_WRITES field */ #define DICT_TF_POS_ATOMIC_WRITES (DICT_TF_POS_PAGE_COMPRESSION_LEVEL \ + DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL) + +/** Zero relative shift position of the PAGE_ENCRYPTION field */ +#define DICT_TF_POS_PAGE_ENCRYPTION (DICT_TF_POS_ATOMIC_WRITES \ + + DICT_TF_WIDTH_ATOMIC_WRITES) +/** Zero relative shift position of the PAGE_ENCRYPTION_KEY field */ +#define DICT_TF_POS_PAGE_ENCRYPTION_KEY (DICT_TF_POS_PAGE_ENCRYPTION \ + + DICT_TF_WIDTH_PAGE_ENCRYPTION) /** Zero relative shift position of the start of the UNUSED bits */ -#define DICT_TF_POS_UNUSED (DICT_TF_POS_ATOMIC_WRITES \ - + DICT_TF_WIDTH_ATOMIC_WRITES) +#define DICT_TF_POS_UNUSED (DICT_TF_POS_PAGE_ENCRYPTION_KEY \ + + DICT_TF_WIDTH_PAGE_ENCRYPTION_KEY) /** Bit mask of the COMPACT field */ #define DICT_TF_MASK_COMPACT \ @@ -206,6 +221,14 @@ DEFAULT=0, ON = 1, OFF = 2 #define DICT_TF_MASK_ATOMIC_WRITES \ ((~(~0 << DICT_TF_WIDTH_ATOMIC_WRITES)) \ << DICT_TF_POS_ATOMIC_WRITES) +/** Bit mask of the PAGE_ENCRYPTION field */ +#define DICT_TF_MASK_PAGE_ENCRYPTION \ + ((~(~0L << DICT_TF_WIDTH_PAGE_ENCRYPTION)) \ + << DICT_TF_POS_PAGE_ENCRYPTION) +/** Bit mask of the PAGE_ENCRYPTION_KEY field */ +#define DICT_TF_MASK_PAGE_ENCRYPTION_KEY \ + ((~(~0L << DICT_TF_WIDTH_PAGE_ENCRYPTION_KEY)) \ + << DICT_TF_POS_PAGE_ENCRYPTION_KEY) /** Return the value of the COMPACT field */ #define DICT_TF_GET_COMPACT(flags) \ @@ -223,6 +246,17 @@ DEFAULT=0, ON = 1, OFF = 2 #define DICT_TF_HAS_DATA_DIR(flags) \ ((flags & DICT_TF_MASK_DATA_DIR) \ >> DICT_TF_POS_DATA_DIR) + +/** Return the contents of the PAGE_ENCRYPTION field */ +#define DICT_TF_GET_PAGE_ENCRYPTION(flags) \ + ((flags & DICT_TF_MASK_PAGE_ENCRYPTION) \ + >> DICT_TF_POS_PAGE_ENCRYPTION) +/** Return the contents of the PAGE_ENCRYPTION KEY field */ +#define DICT_TF_GET_PAGE_ENCRYPTION_KEY(flags) \ + ((flags & DICT_TF_MASK_PAGE_ENCRYPTION_KEY) \ + >> DICT_TF_POS_PAGE_ENCRYPTION_KEY) + + /** Return the contents of the UNUSED bits */ #define DICT_TF_GET_UNUSED(flags) \ (flags >> DICT_TF_POS_UNUSED) @@ -1204,20 +1238,29 @@ struct dict_table_t{ calculation; this counter is not protected by any latch, because this is only used for heuristics */ -#define BG_STAT_NONE 0 -#define BG_STAT_IN_PROGRESS (1 << 0) + +#define BG_STAT_IN_PROGRESS ((byte)(1 << 0)) /*!< BG_STAT_IN_PROGRESS is set in stats_bg_flag when the background stats code is working on this table. The DROP TABLE code waits for this to be cleared before proceeding. */ -#define BG_STAT_SHOULD_QUIT (1 << 1) +#define BG_STAT_SHOULD_QUIT ((byte)(1 << 1)) /*!< BG_STAT_SHOULD_QUIT is set in stats_bg_flag when DROP TABLE starts waiting on BG_STAT_IN_PROGRESS to be cleared, the background stats thread will detect this and will eventually quit sooner */ - byte stats_bg_flag; +#define BG_SCRUB_IN_PROGRESS ((byte)(1 << 2)) + /*!< BG_SCRUB_IN_PROGRESS is set in + stats_bg_flag when the background + scrub code is working on this table. The DROP + TABLE code waits for this to be cleared + before proceeding. */ + +#define BG_IN_PROGRESS (BG_STAT_IN_PROGRESS | BG_SCRUB_IN_PROGRESS) + + byte stats_bg_flag; /*!< see BG_STAT_* above. Writes are covered by dict_sys->mutex. Dirty reads are possible. */ diff --git a/storage/xtradb/include/dict0pagecompress.ic b/storage/xtradb/include/dict0pagecompress.ic index 811976434a8..a71b2b34b07 100644 --- a/storage/xtradb/include/dict0pagecompress.ic +++ b/storage/xtradb/include/dict0pagecompress.ic @@ -42,6 +42,8 @@ dict_tf_verify_flags( ulint page_compression = DICT_TF_GET_PAGE_COMPRESSION(table_flags); ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(table_flags); ulint atomic_writes = DICT_TF_GET_ATOMIC_WRITES(table_flags); + ulint page_encryption = DICT_TF_GET_PAGE_ENCRYPTION(table_flags); + ulint page_encryption_key = DICT_TF_GET_PAGE_ENCRYPTION_KEY(table_flags); ulint post_antelope = FSP_FLAGS_GET_POST_ANTELOPE(fsp_flags); ulint zip_ssize = FSP_FLAGS_GET_ZIP_SSIZE(fsp_flags); ulint fsp_atomic_blobs = FSP_FLAGS_HAS_ATOMIC_BLOBS(fsp_flags); @@ -50,6 +52,9 @@ dict_tf_verify_flags( ulint fsp_page_compression = FSP_FLAGS_GET_PAGE_COMPRESSION(fsp_flags); ulint fsp_page_compression_level = FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(fsp_flags); ulint fsp_atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(fsp_flags); + ulint fsp_page_encryption = FSP_FLAGS_GET_PAGE_ENCRYPTION(fsp_flags); + ulint fsp_page_encryption_key = FSP_FLAGS_GET_PAGE_ENCRYPTION_KEY(fsp_flags); + DBUG_EXECUTE_IF("dict_tf_verify_flags_failure", return(ULINT_UNDEFINED);); @@ -107,6 +112,27 @@ dict_tf_verify_flags( return (FALSE); } + if (page_encryption != fsp_page_encryption) { + fprintf(stderr, + "InnoDB: Error: table flags has page_encryption %ld" + " in the data dictionary\n" + "InnoDB: but the flags in file has page_encryption %ld\n", + page_encryption, fsp_page_encryption); + + return (FALSE); + } + + if (page_encryption_key != fsp_page_encryption_key) { + fprintf(stderr, + "InnoDB: Error: table flags has page_encryption_key %ld" + " in the data dictionary\n" + "InnoDB: but the flags in file has page_encryption_key %ld\n", + page_encryption_key, fsp_page_encryption_key); + + return (FALSE); + } + + return(TRUE); } diff --git a/storage/xtradb/include/fil0fil.h b/storage/xtradb/include/fil0fil.h index 7be56206734..f271522bbe9 100644 --- a/storage/xtradb/include/fil0fil.h +++ b/storage/xtradb/include/fil0fil.h @@ -26,7 +26,7 @@ Created 10/25/1995 Heikki Tuuri #ifndef fil0fil_h #define fil0fil_h - +#define MSG_CANNOT_DECRYPT "can not decrypt" #include "univ.i" #ifndef UNIV_INNOCHECKSUM @@ -123,16 +123,20 @@ extern fil_addr_t fil_addr_null; MySQL/InnoDB 5.1.7 or later, the contents of this field is valid for all uncompressed pages. */ -#define FIL_PAGE_FILE_FLUSH_LSN 26 /*!< this is only defined for the - first page in a system tablespace - data file (ibdata*, not *.ibd): - the file has been flushed to disk - at least up to this lsn */ +#define FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION 26 /*!< for the first page + in a system tablespace data file + (ibdata*, not *.ibd): the file has + been flushed to disk at least up + to this lsn + for other pages: a 32-bit key version + used to encrypt the page + 32-bit checksum + or 64 bits of zero if no encryption + */ /** If page type is FIL_PAGE_COMPRESSED then the 8 bytes starting at FIL_PAGE_FILE_FLUSH_LSN are broken down as follows: */ /** Control information version format (u8) */ -static const ulint FIL_PAGE_VERSION = FIL_PAGE_FILE_FLUSH_LSN; +static const ulint FIL_PAGE_VERSION = FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION; /** Compression algorithm (u8) */ static const ulint FIL_PAGE_ALGORITHM_V1 = FIL_PAGE_VERSION + 1; @@ -155,9 +159,6 @@ static const ulint FIL_PAGE_COMPRESS_SIZE_V1 = FIL_PAGE_ORIGINAL_SIZE_V1 + 2; #define FIL_PAGE_COMPRESSED_SIZE 2 /*!< Number of bytes used to store actual payload data size on compressed pages. */ -#define FIL_PAGE_COMPRESSION_ZLIB 1 /*!< Compressin algorithm ZLIB. */ -#define FIL_PAGE_COMPRESSION_LZ4 2 /*!< Compressin algorithm LZ4. */ - /* @} */ /** File page trailer @{ */ #define FIL_PAGE_END_LSN_OLD_CHKSUM 8 /*!< the low 4 bytes of this are used @@ -168,7 +169,10 @@ static const ulint FIL_PAGE_COMPRESS_SIZE_V1 = FIL_PAGE_ORIGINAL_SIZE_V1 + 2; /* @} */ /** File page types (values of FIL_PAGE_TYPE) @{ */ +#define FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED 35631 /* page compressed + + encrypted page */ #define FIL_PAGE_PAGE_COMPRESSED 34354 /*!< Page compressed page */ +#define FIL_PAGE_PAGE_ENCRYPTED 34355 /*!< Page encrypted page */ #define FIL_PAGE_INDEX 17855 /*!< B-tree node */ #define FIL_PAGE_UNDO_LOG 2 /*!< Undo log page */ #define FIL_PAGE_INODE 3 /*!< Index node */ @@ -206,6 +210,9 @@ extern ulint fil_n_pending_tablespace_flushes; /** Number of files currently open */ extern ulint fil_n_file_opened; +/* structure containing encryption specification */ +typedef struct fil_space_crypt_struct fil_space_crypt_t; + struct fsp_open_info { ibool success; /*!< Has the tablespace been opened? */ const char* check_msg; /*!< fil_check_first_page() message */ @@ -215,6 +222,8 @@ struct fsp_open_info { lsn_t lsn; /*!< Flushed LSN from header page */ ulint id; /*!< Space ID */ ulint flags; /*!< Tablespace flags */ + ulint encryption_error; /*!< if an encryption error occurs */ + fil_space_crypt_t* crypt_data; /*!< crypt data */ }; struct fil_space_t; @@ -330,6 +339,7 @@ struct fil_space_t { ibool is_corrupt; UT_LIST_NODE_T(fil_space_t) space_list; /*!< list of all spaces */ + fil_space_crypt_t* crypt_data; ulint magic_n;/*!< FIL_SPACE_MAGIC_N */ }; @@ -472,7 +482,9 @@ fil_space_create( ulint id, /*!< in: space id */ ulint zip_size,/*!< in: compressed page size, or 0 for uncompressed tablespaces */ - ulint purpose);/*!< in: FIL_TABLESPACE, or FIL_LOG if log */ + ulint purpose, /*!< in: FIL_TABLESPACE, or FIL_LOG if log */ + fil_space_crypt_t* crypt_data); /*!< in: crypt data */ + /*******************************************************************//** Assigns a new space id for a new single-table tablespace. This works simply by incrementing the global counter. If 4 billion id's is not enough, we may need @@ -607,8 +619,10 @@ fil_read_first_page( lsn values in data files */ lsn_t* max_flushed_lsn, /*!< out: max of flushed lsn values in data files */ - ulint orig_space_id) /*!< in: file space id or + ulint orig_space_id, /*!< in: file space id or ULINT_UNDEFINED */ + fil_space_crypt_t** crypt_data) /*!< out: crypt data */ + __attribute__((warn_unused_result)); /*******************************************************************//** Increments the count of pending operation, if space is not being deleted. @@ -1283,8 +1297,267 @@ char* fil_space_name( /*===========*/ fil_space_t* space); /*!< in: space */ + +/****************************************************************** +Get id of first tablespace or ULINT_UNDEFINED if none */ +UNIV_INTERN +ulint +fil_get_first_space(); + +/****************************************************************** +Get id of next tablespace or ULINT_UNDEFINED if none */ +UNIV_INTERN +ulint +fil_get_next_space( + ulint id); /*!< in: space id */ + +/********************************************************************* +Init global resources needed for tablespace encryption/decryption */ +void +fil_space_crypt_init(); + +/********************************************************************* +Cleanup global resources needed for tablespace encryption/decryption */ +void +fil_space_crypt_cleanup(); + +/********************************************************************* +Create crypt data, i.e data that is used for a single tablespace */ +fil_space_crypt_t * +fil_space_create_crypt_data(); + +/********************************************************************* +Destroy crypt data */ +UNIV_INTERN +void +fil_space_destroy_crypt_data( +/*=========================*/ + fil_space_crypt_t **crypt_data); /*!< in/out: crypt data */ + +/********************************************************************* +Get crypt data for a space*/ +fil_space_crypt_t * +fil_space_get_crypt_data( +/*======================*/ + ulint space); /*!< in: tablespace id */ + +/********************************************************************* +Set crypt data for a space*/ +void +fil_space_set_crypt_data( +/*======================*/ + ulint space, /*!< in: tablespace id */ + fil_space_crypt_t* crypt_data); /*!< in: crypt data */ + +/********************************************************************* +Compare crypt data*/ +int +fil_space_crypt_compare( +/*======================*/ + const fil_space_crypt_t* crypt_data1, /*!< in: crypt data */ + const fil_space_crypt_t* crypt_data2); /*!< in: crypt data */ + +/********************************************************************* +Read crypt data from buffer page */ +fil_space_crypt_t * +fil_space_read_crypt_data( +/*======================*/ + ulint space, /*!< in: tablespace id */ + const byte* page, /*!< in: buffer page */ + ulint offset); /*!< in: offset where crypt data is stored */ + +/********************************************************************* +Write crypt data to buffer page */ +void +fil_space_write_crypt_data( +/*=======================*/ + ulint space, /*!< in: tablespace id */ + byte* page, /*!< in: buffer page */ + ulint offset, /*!< in: offset where to store data */ + ulint maxsize, /*!< in: max space available to store crypt data in */ + mtr_t * mtr); /*!< in: mini-transaction */ + +/********************************************************************* +Clear crypt data from page 0 (used for import tablespace) */ +void +fil_space_clear_crypt_data( +/*======================*/ + byte* page, /*!< in: buffer page */ + ulint offset); /*!< in: offset where crypt data is stored */ + +/********************************************************************* +Parse crypt data log record */ +byte* +fil_parse_write_crypt_data( +/*=======================*/ + byte* ptr, /*!< in: start of log record */ + byte* end_ptr, /*!< in: end of log record */ + buf_block_t*); /*!< in: buffer page to apply record to */ + +/********************************************************************* +Check if extra buffer shall be allocated for decrypting after read */ +UNIV_INTERN +bool +fil_space_check_encryption_read( +/*==============================*/ + ulint space); /*!< in: tablespace id */ + +/********************************************************************* +Check if page shall be encrypted before write */ +UNIV_INTERN +bool +fil_space_check_encryption_write( +/*==============================*/ + ulint space); /*!< in: tablespace id */ + +/********************************************************************* +Encrypt buffer page */ +void +fil_space_encrypt( +/*===============*/ + ulint space, /*!< in: tablespace id */ + ulint offset, /*!< in: page no */ + lsn_t lsn, /*!< in: page lsn */ + const byte* src_frame,/*!< in: page frame */ + ulint size, /*!< in: size of data to encrypt */ + byte* dst_frame, /*!< in: where to encrypt to */ + ulint page_encryption_key); /*!< in: page encryption key id if page + encrypted */ + +/********************************************************************* +Decrypt buffer page */ +void +fil_space_decrypt( +/*===============*/ + ulint space, /*!< in: tablespace id */ + const byte* src_frame,/*!< in: page frame */ + ulint page_size, /*!< in: size of data to encrypt */ + byte* dst_frame); /*!< in: where to decrypt to */ + + +/********************************************************************* +Decrypt buffer page +@return true if page was encrypted */ +bool +fil_space_decrypt( +/*===============*/ + fil_space_crypt_t* crypt_data, /*!< in: crypt data */ + const byte* src_frame,/*!< in: page frame */ + ulint page_size, /*!< in: page size */ + byte* dst_frame); /*!< in: where to decrypt to */ + +/********************************************************************* +fil_space_verify_crypt_checksum +NOTE: currently this function can only be run in single threaded mode +as it modifies srv_checksum_algorithm (temporarily) +@return true if page is encrypted AND OK, false otherwise */ +bool +fil_space_verify_crypt_checksum( +/*===============*/ + const byte* src_frame,/*!< in: page frame */ + ulint zip_size); /*!< in: size of data to encrypt */ + +/********************************************************************* +Init threads for key rotation */ +void +fil_crypt_threads_init(); + +/********************************************************************* +Set thread count (e.g start or stops threads) used for key rotation */ +void +fil_crypt_set_thread_cnt( +/*=====================*/ + uint new_cnt); /*!< in: requested #threads */ + +/********************************************************************* +End threads for key rotation */ +void +fil_crypt_threads_end(); + +/********************************************************************* +Cleanup resources for threads for key rotation */ +void +fil_crypt_threads_cleanup(); + +/********************************************************************* +Set rotate key age */ +void +fil_crypt_set_rotate_key_age( +/*=====================*/ + uint rotate_age); /*!< in: requested rotate age */ + +/********************************************************************* +Set rotation threads iops */ +void +fil_crypt_set_rotation_iops( +/*=====================*/ + uint iops); /*!< in: requested iops */ + +/********************************************************************* +Mark a space as closing */ +UNIV_INTERN +void +fil_space_crypt_mark_space_closing( +/*===============*/ + ulint space); /*!< in: tablespace id */ + +/********************************************************************* +Wait for crypt threads to stop accessing space */ +UNIV_INTERN +void +fil_space_crypt_close_tablespace( +/*===============*/ + ulint space); /*!< in: tablespace id */ + +/** Struct for retreiving info about encryption */ +struct fil_space_crypt_status_t { + ulint space; /*!< tablespace id */ + ulint scheme; /*!< encryption scheme */ + uint min_key_version; /*!< min key version */ + uint current_key_version;/*!< current key version */ + uint keyserver_requests;/*!< no of key requests to key server */ + bool rotating; /*!< is key rotation ongoing */ + bool flushing; /*!< is flush at end of rotation ongoing */ + ulint rotate_next_page_number; /*!< next page if key rotating */ + ulint rotate_max_page_number; /*!< max page if key rotating */ +}; + +/********************************************************************* +Get crypt status for a space +@return 0 if crypt data found */ +int +fil_space_crypt_get_status( +/*==================*/ + ulint id, /*!< in: space id */ + struct fil_space_crypt_status_t * status); /*!< out: status */ + +/** Struct for retreiving statistics about encryption key rotation */ +struct fil_crypt_stat_t { + ulint pages_read_from_cache; + ulint pages_read_from_disk; + ulint pages_modified; + ulint pages_flushed; + ulint estimated_iops; +}; + +/********************************************************************* +Get crypt rotation statistics */ +void +fil_crypt_total_stat( +/*==================*/ + fil_crypt_stat_t* stat); /*!< out: crypt stat */ + #endif +/*******************************************************************//** +Return space flags */ +ulint +fil_space_flags( +/*===========*/ + fil_space_t* space); /*!< in: space */ + + + /****************************************************************//** Does error handling when a file operation fails. @return TRUE if we should retry the operation */ @@ -1305,4 +1578,25 @@ fil_get_page_type_name( /*===================*/ ulint page_type); /*!< in: FIL_PAGE_TYPE */ +/** Struct for retreiving info about scrubbing */ +struct fil_space_scrub_status_t { + ulint space; /*!< tablespace id */ + bool compressed; /*!< is space compressed */ + time_t last_scrub_completed; /*!< when was last scrub completed */ + bool scrubbing; /*!< is scrubbing ongoing */ + time_t current_scrub_started; /*!< when started current scrubbing */ + ulint current_scrub_active_threads; /*!< current scrub active threads */ + ulint current_scrub_page_number; /*!< current scrub page no */ + ulint current_scrub_max_page_number; /*!< current scrub max page no */ +}; + +/********************************************************************* +Get scrub status for a space +@return 0 if no scrub info found */ +int +fil_space_get_scrub_status( +/*==================*/ + ulint id, /*!< in: space id */ + struct fil_space_scrub_status_t * status); /*!< out: status */ + #endif /* fil0fil_h */ diff --git a/storage/xtradb/include/fil0pageencryption.h b/storage/xtradb/include/fil0pageencryption.h new file mode 100644 index 00000000000..c0c48df4e34 --- /dev/null +++ b/storage/xtradb/include/fil0pageencryption.h @@ -0,0 +1,102 @@ +/***************************************************************************** + +Copyright (C) 2014 eperi GmbH. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*****************************************************************************/ + +#ifndef fil0pageencryption_h +#define fil0pageencryption_h + +#define PAGE_ENCRYPTION_WRONG_KEY 1 +#define PAGE_ENCRYPTION_WRONG_PAGE_TYPE 2 +#define PAGE_ENCRYPTION_ERROR 3 +#define PAGE_ENCRYPTION_KEY_MISSING 4 +#define PAGE_ENCRYPTION_OK 0 +#define PAGE_ENCRYPTION_WILL_NOT_ENCRYPT 5 + +#include "fsp0fsp.h" +#include "fsp0pageencryption.h" + +/******************************************************************//** +@file include/fil0pageencryption.h +Helper functions for encryption/decryption page data on to table space. + +Created 08/25/2014 +***********************************************************************/ + + +/******************************PAGE_ENCRYPTION_ERROR*************************************//** +Returns the page encryption flag of the space, or false if the space +is not encrypted. The tablespace must be cached in the memory cache. +@return true if page encrypted, false if not or space not found */ +ibool +fil_space_is_page_encrypted( +/*=========================*/ + ulint id); /*!< in: space id */ + + +/*******************************************************************//** +Find out whether the page is page encrypted +@return true if page is page encrypted, false if not */ +UNIV_INLINE +ibool +fil_page_is_encrypted( +/*===================*/ + const byte *buf); /*!< in: page */ + + +/*******************************************************************//** +Find out whether the page can be decrypted +@return true if page can be decrypted, false if not. */ +UNIV_INLINE +ulint +fil_page_encryption_status( +/*===================*/ + const byte *buf); /*!< in: page */ + + +/****************************************************************//** +For page encrypted pages encrypt the page before actual write +operation. +@return encrypted page to be written*/ +byte* +fil_encrypt_page( +/*==============*/ + ulint space_id, /*!< in: tablespace id of the table. */ + byte* buf, /*!< in: buffer from which to write; in aio + this must be appropriately aligned */ + byte* out_buf, /*!< out: encrypted buffer */ + ulint len, /*!< in: length of input buffer.*/ + ulint encryption_key, /*!< in: encryption key */ + ulint* out_len, /*!< out: actual length of encrypted page */ + ulint* errorCode, /*!< out: an error code. set, if page is intentionally not encrypted */ + byte* tmp_encryption_buf); /*!< in: temporary buffer or NULL */ + +/****************************************************************//** +For page encrypted pages decrypt the page after actual read +operation. +@return decrypted page */ +ulint +fil_decrypt_page( +/*================*/ + byte* page_buf, /*!< in: preallocated buffer or NULL */ + byte* buf, /*!< in/out: buffer from which to read; in aio + this must be appropriately aligned */ + ulint len, /*!< in: length buffer, which should be decrypted.*/ + ulint* write_size, /*!< out: size of the decrypted data. If no error occurred equal to len */ + ibool* page_compressed,/*!<out: is page compressed.*/ + byte* tmp_encryption_buf); /*!< in: temporary buffer or NULL */ + +#endif // fil0pageencryption_h diff --git a/storage/xtradb/include/fsp0fsp.h b/storage/xtradb/include/fsp0fsp.h index 6fe44a0ef16..57e5b9490b1 100644 --- a/storage/xtradb/include/fsp0fsp.h +++ b/storage/xtradb/include/fsp0fsp.h @@ -57,6 +57,11 @@ is found in a remote location, not the default data directory. */ /** Number of flag bits used to indicate the page compression and compression level */ #define FSP_FLAGS_WIDTH_PAGE_COMPRESSION 1 #define FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL 4 + +/** Number of flag bits used to indicate the page compression and compression level */ +#define FSP_FLAGS_WIDTH_PAGE_ENCRYPTION 1 +#define FSP_FLAGS_WIDTH_PAGE_ENCRYPTION_KEY 8 + /** Number of flag bits used to indicate atomic writes for this tablespace */ #define FSP_FLAGS_WIDTH_ATOMIC_WRITES 2 @@ -68,7 +73,9 @@ is found in a remote location, not the default data directory. */ + FSP_FLAGS_WIDTH_DATA_DIR \ + FSP_FLAGS_WIDTH_PAGE_COMPRESSION \ + FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL \ - + FSP_FLAGS_WIDTH_ATOMIC_WRITES) + + FSP_FLAGS_WIDTH_ATOMIC_WRITES \ + + FSP_FLAGS_WIDTH_PAGE_ENCRYPTION \ + + FSP_FLAGS_WIDTH_PAGE_ENCRYPTION_KEY) /** A mask of all the known/used bits in tablespace flags */ #define FSP_FLAGS_MASK (~(~0 << FSP_FLAGS_WIDTH)) @@ -92,15 +99,21 @@ dictionary */ /** Zero relative shift position of the ATOMIC_WRITES field */ #define FSP_FLAGS_POS_ATOMIC_WRITES (FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL \ + FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL) - /** Zero relative shift position of the PAGE_SSIZE field */ -#define FSP_FLAGS_POS_PAGE_SSIZE (FSP_FLAGS_POS_ATOMIC_WRITES \ +/** Zero relative shift position of the PAGE_ENCRYPTION field */ +#define FSP_FLAGS_POS_PAGE_ENCRYPTION (FSP_FLAGS_POS_ATOMIC_WRITES \ + FSP_FLAGS_WIDTH_ATOMIC_WRITES) +/** Zero relative shift position of the PAGE_ENCRYPTION_KEY field */ +#define FSP_FLAGS_POS_PAGE_ENCRYPTION_KEY (FSP_FLAGS_POS_PAGE_ENCRYPTION \ + + FSP_FLAGS_WIDTH_PAGE_ENCRYPTION) + /** Zero relative shift position of the PAGE_SSIZE field */ +#define FSP_FLAGS_POS_PAGE_SSIZE (FSP_FLAGS_POS_PAGE_ENCRYPTION_KEY \ + + FSP_FLAGS_WIDTH_PAGE_ENCRYPTION_KEY) /** Zero relative shift position of the start of the DATA DIR bits */ #define FSP_FLAGS_POS_DATA_DIR (FSP_FLAGS_POS_PAGE_SSIZE \ + FSP_FLAGS_WIDTH_PAGE_SSIZE) /** Zero relative shift position of the start of the UNUSED bits */ -#define FSP_FLAGS_POS_UNUSED (FSP_FLAGS_POS_DATA_DIR \ - + FSP_FLAGS_WIDTH_DATA_DIR) +#define FSP_FLAGS_POS_UNUSED (FSP_FLAGS_POS_DATA_DIR\ + + FSP_FLAGS_WIDTH_DATA_DIR) /** Bit mask of the POST_ANTELOPE field */ #define FSP_FLAGS_MASK_POST_ANTELOPE \ @@ -130,12 +143,20 @@ dictionary */ #define FSP_FLAGS_MASK_PAGE_COMPRESSION_LEVEL \ ((~(~0 << FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL)) \ << FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL) +/** Bit mask of the PAGE_ENCRYPTION field */ +#define FSP_FLAGS_MASK_PAGE_ENCRYPTION \ + ((~(~0 << FSP_FLAGS_WIDTH_PAGE_ENCRYPTION)) \ + << FSP_FLAGS_POS_PAGE_ENCRYPTION) +/** Bit mask of the PAGE_ENCRYPTION_KEY field */ +#define FSP_FLAGS_MASK_PAGE_ENCRYPTION_KEY \ + ((~(~0 << FSP_FLAGS_WIDTH_PAGE_ENCRYPTION_KEY)) \ + << FSP_FLAGS_POS_PAGE_ENCRYPTION_KEY) + /** Bit mask of the ATOMIC_WRITES field */ #define FSP_FLAGS_MASK_ATOMIC_WRITES \ ((~(~0 << FSP_FLAGS_WIDTH_ATOMIC_WRITES)) \ << FSP_FLAGS_POS_ATOMIC_WRITES) - /** Return the value of the POST_ANTELOPE field */ #define FSP_FLAGS_GET_POST_ANTELOPE(flags) \ ((flags & FSP_FLAGS_MASK_POST_ANTELOPE) \ @@ -171,6 +192,14 @@ dictionary */ #define FSP_FLAGS_GET_ATOMIC_WRITES(flags) \ ((flags & FSP_FLAGS_MASK_ATOMIC_WRITES) \ >> FSP_FLAGS_POS_ATOMIC_WRITES) +/** Return the value of the PAGE_ENCRYPTION field */ +#define FSP_FLAGS_GET_PAGE_ENCRYPTION(flags) \ + ((flags & FSP_FLAGS_MASK_PAGE_ENCRYPTION) \ + >> FSP_FLAGS_POS_PAGE_ENCRYPTION) +/** Return the value of the PAGE_ENCRYPTION_KEY field */ +#define FSP_FLAGS_GET_PAGE_ENCRYPTION_KEY(flags) \ + ((flags & FSP_FLAGS_MASK_PAGE_ENCRYPTION_KEY) \ + >> FSP_FLAGS_POS_PAGE_ENCRYPTION_KEY) /** Set a PAGE_SSIZE into the correct bits in a given tablespace flags. */ @@ -186,6 +215,14 @@ tablespace flags. */ tablespace flags. */ #define FSP_FLAGS_SET_PAGE_COMPRESSION_LEVEL(flags, level) \ (flags | (level << FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL)) + +/** Set a PAGE_ENCRYPTION into the correct bits in a given tablespace flags. */ +#define FSP_FLAGS_SET_PAGE_ENCRYPTION(flags, encryption) \ + (flags | (encryption << FSP_FLAGS_POS_PAGE_ENCRYPTION)) +/** Set a PAGE_ENCRYPTION_KEY into the correct bits in a given tablespace flags. */ +#define FSP_FLAGS_SET_PAGE_ENCRYPTION_KEY(flags, encryption_key) \ + (flags | (encryption_key << FSP_FLAGS_POS_PAGE_ENCRYPTION_KEY)) + /** Set a ATOMIC_WRITES into the correct bits in a given tablespace flags. */ #define FSP_FLAGS_SET_ATOMIC_WRITES(flags, atomics) \ @@ -800,6 +837,31 @@ fsp_flags_get_page_size( /*====================*/ ulint flags); /*!< in: tablespace flags */ +/*********************************************************************/ +/* @return offset into fsp header where crypt data is stored */ +UNIV_INTERN +ulint +fsp_header_get_crypt_offset( +/*========================*/ + ulint zip_size, /*!< in: zip_size */ + ulint* max_size); /*!< out: free space after offset */ + +#define fsp_page_is_free(space,page,mtr) \ + fsp_page_is_free_func(space,page,mtr, __FILE__, __LINE__) + +/**********************************************************************//** +Checks if a single page is free. +@return true if free */ +UNIV_INTERN +bool +fsp_page_is_free_func( +/*==============*/ + ulint space, /*!< in: space id */ + ulint page, /*!< in: page offset */ + mtr_t* mtr, /*!< in/out: mini-transaction */ + const char *file, + ulint line); + #ifndef UNIV_NONINL #include "fsp0fsp.ic" #endif diff --git a/storage/xtradb/include/fsp0pagecompress.ic b/storage/xtradb/include/fsp0pagecompress.ic index 4dde042e19e..3f7a1e6cf9e 100644 --- a/storage/xtradb/include/fsp0pagecompress.ic +++ b/storage/xtradb/include/fsp0pagecompress.ic @@ -193,5 +193,5 @@ fil_page_is_lzo_compressed( byte *buf) /*!< in: page */ { return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED && - mach_read_from_8(buf+FIL_PAGE_FILE_FLUSH_LSN) == PAGE_LZO_ALGORITHM); + mach_read_from_8(buf+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION) == PAGE_LZO_ALGORITHM); } diff --git a/storage/xtradb/include/fsp0pageencryption.h b/storage/xtradb/include/fsp0pageencryption.h new file mode 100644 index 00000000000..631aa72211c --- /dev/null +++ b/storage/xtradb/include/fsp0pageencryption.h @@ -0,0 +1,66 @@ +/***************************************************************************** + + Copyright (C) 2014 eperi GmbH. All Rights Reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +/******************************************************************/ + +/******************************************************************//** +@file include/fsp0pageencryption.h +Helper functions for extracting/storing page encryption information to file space. + +Created 08/28/2014 +***********************************************************************/ + +#ifndef FSP0PAGEENCRYPTION_H_ +#define FSP0PAGEENCRYPTION_H_ + +#define FIL_PAGE_ENCRYPTION_AES_128 16 /*!< Encryption algorithm AES-128. */ +#define FIL_PAGE_ENCRYPTION_AES_196 24 /*!< Encryption algorithm AES-196. */ +#define FIL_PAGE_ENCRYPTION_AES_256 32 /*!< Encryption algorithm AES-256. */ + +#define FIL_PAGE_ENCRYPTED_SIZE 2 /*!< Number of bytes used to store + actual payload data size onencrypted + pages. */ + +/********************************************************************//** +Determine if the tablespace is page encrypted from dict_table_t::flags. +@return TRUE if page encrypted, FALSE if not page encrypted */ +UNIV_INLINE +ibool +fsp_flags_is_page_encrypted( +/*=========================*/ + ulint flags); /*!< in: tablespace flags */ + + +/********************************************************************//** +Extract the page encryption key from tablespace flags. +A tablespace has only one physical page encryption key +whether that page is encrypted or not. +@return page encryption key of the file-per-table tablespace, +or zero if the table is not encrypted. */ +UNIV_INLINE +ulint +fsp_flags_get_page_encryption_key( +/*=================================*/ + ulint flags); /*!< in: tablespace flags */ + + +#ifndef UNIV_NONINL +#include "fsp0pageencryption.ic" +#endif + + +#endif /* FSP0PAGEENCRYPTION_H_ */ diff --git a/storage/xtradb/include/fsp0pageencryption.ic b/storage/xtradb/include/fsp0pageencryption.ic new file mode 100644 index 00000000000..7ff002b203e --- /dev/null +++ b/storage/xtradb/include/fsp0pageencryption.ic @@ -0,0 +1,166 @@ +/***************************************************************************** + + Copyright (C) 2014 eperi GmbH. All Rights Reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +/******************************************************************//** +@file include/fsp0pageencryption.ic +Implementation for helper functions for encrypting/decrypting pages +and atomic writes information to file space. + +Created 08/28/2014 +***********************************************************************/ + +#include "fsp0fsp.h" +#include "fil0pageencryption.h" + + + + +/********************************************************************//** +Determine if the tablespace is page encrypted from dict_table_t::flags. +@return TRUE if page encrypted, FALSE if not page encrypted */ +UNIV_INLINE +ibool +fsp_flags_is_page_encrypted( +/*=========================*/ + ulint flags) /*!< in: tablespace flags */ +{ + return(FSP_FLAGS_GET_PAGE_ENCRYPTION(flags)); +} + +/********************************************************************//** +Extract the page encryption key from tablespace flags. +A tablespace has only one physical page encryption key +whether that page is encrypted or not. +@return page encryption key of the file-per-table tablespace, +or zero if the table is not encrypted. */ +UNIV_INLINE +ulint +fsp_flags_get_page_encryption_key( +/*=================================*/ + ulint flags) /*!< in: tablespace flags */ +{ + return(FSP_FLAGS_GET_PAGE_ENCRYPTION_KEY(flags)); +} + + +/*******************************************************************//** +Returns the page encryption flag of the space, or false if the space +is not encrypted. The tablespace must be cached in the memory cache. +@return true if page encrypted, false if not or space not found */ +UNIV_INLINE +ibool +fil_space_is_page_encrypted( +/*=========================*/ + ulint id) /*!< in: space id */ +{ + ulint flags; + + flags = fil_space_get_flags(id); + + if (flags && flags != ULINT_UNDEFINED) { + + return(fsp_flags_is_page_encrypted(flags)); + } + + return(flags); +} + +/*******************************************************************//** +Returns the page encryption key of the space, or 0 if the space +is not encrypted. The tablespace must be cached in the memory cache. +@return page compression level, ULINT_UNDEFINED if space not found */ +UNIV_INLINE +ulint +fil_space_get_page_encryption_key( +/*=================================*/ + ulint id) /*!< in: space id */ +{ + ulint flags; + + flags = fil_space_get_flags(id); + + if (flags && flags != ULINT_UNDEFINED) { + + return(fsp_flags_get_page_encryption_key(flags)); + } + + return(flags); +} + + + +/*******************************************************************//** +Find out whether the page is page encrypted +@return true if page is page encrypted, false if not */ +UNIV_INLINE +ibool +fil_page_is_encrypted( +/*===================*/ + const byte *buf) /*!< in: page */ +{ + return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_ENCRYPTED); +} + +/*******************************************************************//** +Find out whether the page is page is first compressed and then encrypted +@return true if page is page compressed+encrypted, false if not */ +UNIV_INLINE +ibool +fil_page_is_compressed_encrypted( +/*=============================*/ + const byte *buf) /*!< in: page */ +{ + return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED); +} + +/*******************************************************************//** +Find out whether the page can be decrypted. +This is the case, if the page is already decrypted and is not the first page of the table space. +If the page is already decrypted it is not of the FIL_PAGE_PAGE_ENCRYPTED type. +if it is the first page of the table space, it is assumed that a page can be decrypted if the +key found in the flags (part of the 1st page) can be read from the key provider. +The case, if the key changed, is currently not caught. +The function for decrypting the page should already be executed before this. +@return PAGE_ENCRYPTION_KEY_MISSING if key provider is available, but key is not available + PAGE_ENCRYPTION_ERROR if other error occurred + 0 if decryption should be possible +*/ +UNIV_INLINE +ulint +fil_page_encryption_status( +/*===================*/ + const byte *buf) /*!< in: page */ +{ + ulint page_type = mach_read_from_2(buf+FIL_PAGE_TYPE); + if (page_type == FIL_PAGE_TYPE_FSP_HDR) { + ulint flags = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + buf); + if (fsp_flags_is_page_encrypted(flags)) { + if (!HasCryptoKey(fsp_flags_get_page_encryption_key(flags))) { + /* accessing table would surely fail, because no key or no key provider available */ + return PAGE_ENCRYPTION_KEY_MISSING; + } + } + } + if(page_type == FIL_PAGE_PAGE_ENCRYPTED) { + ulint key = mach_read_from_1(buf + FIL_PAGE_SPACE_OR_CHKSUM); + if (!HasCryptoKey(key)) { + return PAGE_ENCRYPTION_KEY_MISSING; + } + return PAGE_ENCRYPTION_ERROR; + } + return 0; +} diff --git a/storage/xtradb/include/fts0fts.h b/storage/xtradb/include/fts0fts.h index a2996ecacc8..f00d754ac66 100644 --- a/storage/xtradb/include/fts0fts.h +++ b/storage/xtradb/include/fts0fts.h @@ -715,6 +715,34 @@ fts_drop_index_tables( dict_index_t* index) /*!< in: Index to drop */ __attribute__((nonnull, warn_unused_result)); +/****************************************************************** +Wait for background threads to stop using FTS index +*/ +UNIV_INTERN +void +fts_wait_bg_to_stop_using_index( +/*======================*/ + trx_t* trx, /*!< in: transaction */ + dict_index_t* index, /*!< in: FTS Index */ + bool drop_table); /*!< in: in addition to stop + using index, also prevent + threads from start using it, + used by drop table */ + +/****************************************************************** +Wait for background threads to stop using any FTS index of the table +*/ +UNIV_INTERN +void +fts_wait_bg_to_stop_using_table( +/*======================*/ + trx_t* trx, /*!< in: transaction */ + dict_table_t* table, /*!< in: table to stop threads */ + bool drop_table); /*!< in: in addition to stop + using table, also prevent + threads from start using it, + used by drop table */ + /******************************************************************//** Remove the table from the OPTIMIZER's list. We do wait for acknowledgement from the consumer of the message. */ diff --git a/storage/xtradb/include/log0crypt.h b/storage/xtradb/include/log0crypt.h new file mode 100644 index 00000000000..188e82397a2 --- /dev/null +++ b/storage/xtradb/include/log0crypt.h @@ -0,0 +1,85 @@ +/**************************************************//** +@file include/log0crypt.h +Innodb log encrypt/decrypt + +Created 11/25/2013 Minli Zhu +*******************************************************/ +#ifndef log0crypt_h +#define log0crypt_h + +#include "univ.i" +#include "ut0byte.h" +#include "ut0lst.h" +#include "ut0rnd.h" +#include "my_aes.h" +#include "my_crypt_key_management.h" // for key version and key + +#define PURPOSE_BYTE_LEN MY_AES_BLOCK_SIZE - 1 +#define PURPOSE_BYTE_OFFSET 0 +#define UNENCRYPTED_KEY_VER 0 + +/* If true, enable redo log encryption. */ +extern my_bool srv_encrypt_log; +/* Plain text used by AES_ECB to generate redo log crypt key. */ +extern byte redo_log_crypt_msg[MY_AES_BLOCK_SIZE]; +/* IV to concatenate with counter used by AES_CTR for redo log crypto. */ +extern byte aes_ctr_nonce[MY_AES_BLOCK_SIZE]; + +/*********************************************************************//** +Generate a 128-bit random message used to generate redo log crypto key. +Init AES-CTR iv/nonce with random number. +It is called only when clean startup (i.e., redo logs do not exist). */ +UNIV_INTERN +void +log_init_crypt_msg_and_nonce(void); +/*===============================*/ +/*********************************************************************//** +Init log_sys redo log crypto key. */ +UNIV_INTERN +void +log_init_crypt_key( +/*===============*/ + const byte* crypt_msg, /*< in: crypt msg */ + const uint crypt_ver, /*< in: mysqld key version */ + byte* crypt_key); /*< out: crypt struct with key and iv */ +/*********************************************************************//** +Encrypt log blocks. */ +UNIV_INTERN +Crypt_result +log_blocks_encrypt( +/*===============*/ + const byte* blocks, /*!< in: blocks before encryption */ + const ulint size, /*!< in: size of blocks, must be multiple of a log block */ + byte* dst_blocks); /*!< out: blocks after encryption */ + +/*********************************************************************//** +Decrypt log blocks. */ +UNIV_INTERN +Crypt_result +log_blocks_decrypt( +/*===============*/ + const byte* blocks, /*!< in: blocks before decryption */ + const ulint size, /*!< in: size of blocks, must be multiple of a log block */ + byte* dst_blocks); /*!< out: blocks after decryption */ + +/*********************************************************************//** +Set next checkpoint's key version to latest one, and generate current +key. Key version 0 means no encryption. */ +UNIV_INTERN +void +log_crypt_set_ver_and_key( +/*======================*/ + uint& key_ver, /*!< out: latest key version */ + byte* crypt_key); /*!< out: crypto key */ + +/*********************************************************************//** +Writes the crypto (version, msg and iv) info, which has been used for +log blocks with lsn <= this checkpoint's lsn, to a log header's +checkpoint buf. */ +UNIV_INTERN +void +log_crypt_write_checkpoint_buf( +/*===========================*/ + byte* buf); /*!< in/out: checkpoint buffer */ + +#endif // log0crypt.h diff --git a/storage/xtradb/include/log0log.h b/storage/xtradb/include/log0log.h index f130c8de423..cda4a692aec 100644 --- a/storage/xtradb/include/log0log.h +++ b/storage/xtradb/include/log0log.h @@ -40,6 +40,7 @@ Created 12/9/1995 Heikki Tuuri #include "sync0sync.h" #include "sync0rw.h" #endif /* !UNIV_HOTBACKUP */ +#include "log0crypt.h" /* Type used for all log sequence number storage and arithmetics */ typedef ib_uint64_t lsn_t; @@ -745,8 +746,20 @@ extern log_t* log_sys; is valid */ #endif #define LOG_CHECKPOINT_OFFSET_HIGH32 (16 + LOG_CHECKPOINT_ARRAY_END) -#define LOG_CHECKPOINT_SIZE (20 + LOG_CHECKPOINT_ARRAY_END) - +#define LOG_CRYPT_VER (20 + LOG_CHECKPOINT_ARRAY_END) + /*!< 32-bit key version. Corresponding + key has been used for log records with + lsn <= the checkpoint' lsn */ +#define LOG_CRYPT_MSG (24 + LOG_CHECKPOINT_ARRAY_END) + /*!< a 128-bit value used to + derive cryto key for redo log. + It is generated via the concatenation + of 1 purpose byte T (0x02) and a + 15-byte random number.*/ +#define LOG_CRYPT_IV (40 + LOG_CHECKPOINT_ARRAY_END) + /*!< a 128-bit random number used as + AES-CTR iv/nonce for redo log */ +#define LOG_CHECKPOINT_SIZE (56 + LOG_CHECKPOINT_ARRAY_END) /* Offsets of a log file header */ #define LOG_GROUP_ID 0 /* log group number */ @@ -854,6 +867,10 @@ struct log_t{ lsn_t lsn; /*!< log sequence number */ ulint buf_free; /*!< first free offset within the log buffer */ + uint redo_log_crypt_ver; + /*!< 32-bit crypto ver */ + byte redo_log_crypt_key[MY_AES_BLOCK_SIZE]; + /*!< crypto key to encrypt redo log */ #ifndef UNIV_HOTBACKUP ib_prio_mutex_t mutex; /*!< mutex protecting the log */ @@ -1083,6 +1100,22 @@ struct log_t{ /* @} */ #endif /* UNIV_LOG_ARCHIVE */ +extern os_event_t log_scrub_event; +/* log scrubbing interval in ms */ +extern ulonglong innodb_scrub_log_interval; + +/*****************************************************************//** +This is the main thread for log scrub. It waits for an event and +when waked up fills current log block with dummy records and +sleeps again. +@return this function does not return, it calls os_thread_exit() */ +extern "C" UNIV_INTERN +os_thread_ret_t +DECLARE_THREAD(log_scrub_thread)( +/*===============================*/ + void* arg); /*!< in: a dummy parameter + required by os_thread_create */ + #ifndef UNIV_NONINL #include "log0log.ic" #endif diff --git a/storage/xtradb/include/log0recv.h b/storage/xtradb/include/log0recv.h index 805b6c66768..9a3ecb3604d 100644 --- a/storage/xtradb/include/log0recv.h +++ b/storage/xtradb/include/log0recv.h @@ -471,6 +471,11 @@ struct recv_sys_t{ scan find a corrupt log block, or a corrupt log record, or there is a log parsing buffer overflow */ + uint recv_log_crypt_ver; + /*!< mysqld key version to generate redo + log crypt key for recovery */ + byte recv_log_crypt_key[MY_AES_BLOCK_SIZE]; + /*!< crypto key to decrypt redo log for recovery */ #ifdef UNIV_LOG_ARCHIVE log_group_t* archive_group; /*!< in archive recovery: the log group whose diff --git a/storage/xtradb/include/mtr0log.ic b/storage/xtradb/include/mtr0log.ic index bc49f655294..d508d30fafe 100644 --- a/storage/xtradb/include/mtr0log.ic +++ b/storage/xtradb/include/mtr0log.ic @@ -192,7 +192,7 @@ mlog_write_initial_log_record_fast( ulint offset; ut_ad(mtr_memo_contains_page(mtr, ptr, MTR_MEMO_PAGE_X_FIX)); - ut_ad(type <= MLOG_BIGGEST_TYPE); + ut_ad(type <= MLOG_BIGGEST_TYPE || EXTRA_CHECK_MLOG_NUMBER(type)); ut_ad(ptr && log_ptr); page = (const byte*) ut_align_down(ptr, UNIV_PAGE_SIZE); diff --git a/storage/xtradb/include/mtr0mtr.h b/storage/xtradb/include/mtr0mtr.h index 827039953be..c9199153138 100644 --- a/storage/xtradb/include/mtr0mtr.h +++ b/storage/xtradb/include/mtr0mtr.h @@ -189,6 +189,14 @@ For 1 - 8 bytes, the flag value must give the length also! @{ */ page */ #define MLOG_BIGGEST_TYPE ((byte)53) /*!< biggest value (used in assertions) */ + +#define MLOG_FILE_WRITE_CRYPT_DATA ((byte)100) /*!< log record for + writing/updating crypt data of + a tablespace */ + +#define EXTRA_CHECK_MLOG_NUMBER(x) \ + ((x) == MLOG_FILE_WRITE_CRYPT_DATA) + /* @} */ /** @name Flags for MLOG_FILE operations @@ -251,6 +259,18 @@ mtr_release_s_latch_at_savepoint( #else /* !UNIV_HOTBACKUP */ # define mtr_release_s_latch_at_savepoint(mtr,savepoint,lock) ((void) 0) #endif /* !UNIV_HOTBACKUP */ + +/**********************************************************//** +Releases a buf_page stored in an mtr memo after a +savepoint. */ +UNIV_INTERN +void +mtr_release_buf_page_at_savepoint( +/*=============================*/ + mtr_t* mtr, /*!< in: mtr */ + ulint savepoint, /*!< in: savepoint */ + buf_block_t* block); /*!< in: block to release */ + /***************************************************************//** Gets the logging mode of a mini-transaction. @return logging mode: MTR_LOG_NONE, ... */ diff --git a/storage/xtradb/include/os0file.h b/storage/xtradb/include/os0file.h index 89a1d8c0fe4..c92cfa186fc 100644 --- a/storage/xtradb/include/os0file.h +++ b/storage/xtradb/include/os0file.h @@ -323,10 +323,12 @@ The wrapper functions have the prefix of "innodb_". */ # define os_aio(type, mode, name, file, buf, offset, \ n, message1, message2, space_id, \ - trx, page_compressed, page_compression_level, write_size) \ + trx, page_compressed, page_compression_level, write_size, \ + page_encryption, page_encryption_key) \ pfs_os_aio_func(type, mode, name, file, buf, offset, \ n, message1, message2, space_id, trx, \ page_compressed, page_compression_level, write_size, \ + page_encryption, page_encryption_key, \ __FILE__, __LINE__) # define os_file_read(file, buf, offset, n, compressed) \ @@ -375,10 +377,12 @@ to original un-instrumented file I/O APIs */ # define os_aio(type, mode, name, file, buf, offset, n, message1, \ message2, space_id, trx, \ - page_compressed, page_compression_level, write_size) \ + page_compressed, page_compression_level, write_size, \ + page_encryption, page_encryption_key) \ os_aio_func(type, mode, name, file, buf, offset, n, \ message1, message2, space_id, trx, \ - page_compressed, page_compression_level, write_size) + page_compressed, page_compression_level, write_size, \ + page_encryption, page_encryption_key) # define os_file_read(file, buf, offset, n, compressed) \ os_file_read_func(file, buf, offset, n, NULL, compressed) @@ -806,6 +810,10 @@ pfs_os_aio_func( operation for this page and if initialized we do not trim again if actual page size does not decrease. */ + ibool page_encryption, /*!< in: is page encryption used + on this file space */ + ulint page_encryption_key, /*!< page encryption + key to be used */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line);/*!< in: line where the func invoked */ /*******************************************************************//** @@ -1188,11 +1196,15 @@ os_aio_func( on this file space */ ulint page_compression_level, /*!< page compression level to be used */ - ulint* write_size);/*!< in/out: Actual write size initialized + ulint* write_size,/*!< in/out: Actual write size initialized after fist successfull trim operation for this page and if initialized we do not trim again if actual page size does not decrease. */ + ibool page_encryption, /*!< in: is page encryption used + on this file space */ + ulint page_encryption_key); /*!< page encryption key + to be used */ /************************************************************************//** Wakes up all async i/o threads so that they know to exit themselves in diff --git a/storage/xtradb/include/os0file.ic b/storage/xtradb/include/os0file.ic index 61300387e1b..c715d52cc13 100644 --- a/storage/xtradb/include/os0file.ic +++ b/storage/xtradb/include/os0file.ic @@ -229,6 +229,11 @@ pfs_os_aio_func( operation for this page and if initialized we do not trim again if actual page size does not decrease. */ + ibool page_encryption, /*!< in: is page encryption used + on this file space */ + ulint page_encryption_key, /*!< page encryption + key to be used */ + const char* src_file,/*!< in: file name where func invoked */ ulint src_line)/*!< in: line where the func invoked */ { @@ -245,7 +250,8 @@ pfs_os_aio_func( result = os_aio_func(type, mode, name, file, buf, offset, n, message1, message2, space_id, trx, - page_compression, page_compression_level, write_size); + page_compression, page_compression_level, write_size , + page_encryption, page_encryption_key); register_pfs_file_io_end(locker, n); diff --git a/storage/xtradb/include/page0page.ic b/storage/xtradb/include/page0page.ic index 4a22a32112f..efa5e855eb7 100644 --- a/storage/xtradb/include/page0page.ic +++ b/storage/xtradb/include/page0page.ic @@ -38,6 +38,8 @@ Created 2/2/1994 Heikki Tuuri #define UNIV_INLINE #endif +extern my_bool srv_immediate_scrub_data_uncompressed; + /************************************************************//** Gets the start of a page. @return start of the page */ @@ -1157,6 +1159,13 @@ page_mem_free( ut_ad(rec_offs_validate(rec, index, offsets)); free = page_header_get_ptr(page, PAGE_FREE); + bool scrub = srv_immediate_scrub_data_uncompressed; + if (scrub) { + /* scrub record */ + uint size = rec_offs_data_size(offsets); + memset(rec, 0, size); + } + page_rec_set_next(rec, free); page_header_set_ptr(page, page_zip, PAGE_FREE, rec); diff --git a/storage/xtradb/include/srv0mon.h b/storage/xtradb/include/srv0mon.h index f13bd34cf5d..70868463448 100644 --- a/storage/xtradb/include/srv0mon.h +++ b/storage/xtradb/include/srv0mon.h @@ -328,6 +328,11 @@ enum monitor_id_t { MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED, MONITOR_OVLD_PAGES_PAGE_COMPRESSION_ERROR, + /* New monitor variables for page encryption */ + MONITOR_OVLD_PAGES_PAGE_ENCRYPTED, + MONITOR_OVLD_PAGES_PAGE_DECRYPTED, + MONITOR_OVLD_PAGES_PAGE_ENCRYPTION_ERROR, + /* Index related counters */ MONITOR_MODULE_INDEX, MONITOR_INDEX_SPLIT, diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h index d5594230484..55d17625dfe 100644 --- a/storage/xtradb/include/srv0srv.h +++ b/storage/xtradb/include/srv0srv.h @@ -133,6 +133,12 @@ struct srv_stats_t { ulint_ctr_64_t pages_page_decompressed; /* Number of page compression errors */ ulint_ctr_64_t pages_page_compression_error; + /* Number of pages encrypted with page encryption */ + ulint_ctr_64_t pages_page_encrypted; + /* Number of pages decrypted with page encryption */ + ulint_ctr_64_t pages_page_decrypted; + /* Number of page encryption errors */ + ulint_ctr_64_t pages_page_encryption_error; /** Number of data read in total (in bytes) */ ulint_ctr_1_t data_read; @@ -568,6 +574,11 @@ extern ibool srv_buf_dump_thread_active; /* TRUE during the lifetime of the stats thread */ extern ibool srv_dict_stats_thread_active; +/* TRUE if enable log scrubbing */ +extern my_bool srv_scrub_log; +/* TRUE during the lifetime of the log scrub thread */ +extern ibool srv_log_scrub_thread_active; + extern ulong srv_n_spin_wait_rounds; extern ulong srv_n_free_tickets_to_enter; extern ulong srv_thread_sleep_delay; @@ -673,6 +684,9 @@ extern my_bool srv_print_all_deadlocks; extern my_bool srv_cmp_per_index_enabled; +/* is encryption enabled */ +extern my_bool srv_encrypt_tables; + /** Status variables to be passed to MySQL */ extern struct export_var_t export_vars; @@ -1218,9 +1232,29 @@ struct export_var_t{ compression */ ib_int64_t innodb_pages_page_compression_error;/*!< Number of page compression errors */ + ib_int64_t innodb_pages_page_encrypted;/*!< Number of pages + encrypted by page encryption */ + ib_int64_t innodb_pages_page_decrypted;/*!< Number of pages + decrypted by page encryption */ + ib_int64_t innodb_pages_page_encryption_error;/*!< Number of page + encryption errors */ ulint innodb_sec_rec_cluster_reads; /*!< srv_sec_rec_cluster_reads */ - ulint innodb_sec_rec_cluster_reads_avoided; /*!< srv_sec_rec_cluster_reads_avoided */ + ulint innodb_sec_rec_cluster_reads_avoided; + /*!< srv_sec_rec_cluster_reads_avoided */ + + ulint innodb_encryption_rotation_pages_read_from_cache; + ulint innodb_encryption_rotation_pages_read_from_disk; + ulint innodb_encryption_rotation_pages_modified; + ulint innodb_encryption_rotation_pages_flushed; + ulint innodb_encryption_rotation_estimated_iops; + + ulint innodb_scrub_page_reorganizations; + ulint innodb_scrub_page_splits; + ulint innodb_scrub_page_split_failures_underflow; + ulint innodb_scrub_page_split_failures_out_of_filespace; + ulint innodb_scrub_page_split_failures_missing_index; + ulint innodb_scrub_page_split_failures_unknown; }; /** Thread slot in the thread table. */ diff --git a/storage/xtradb/log/log0crypt.cc b/storage/xtradb/log/log0crypt.cc new file mode 100644 index 00000000000..43da1f10c19 --- /dev/null +++ b/storage/xtradb/log/log0crypt.cc @@ -0,0 +1,268 @@ +/**************************************************//** +@file log0crypt.cc +Innodb log encrypt/decrypt + +Created 11/25/2013 Minli Zhu +*******************************************************/ +#include "log0crypt.h" +#include "log0log.h" +#include "srv0start.h" // for srv_start_lsn +#include "log0recv.h" // for recv_sys + +/* If true, enable redo log encryption. */ +UNIV_INTERN my_bool srv_encrypt_log = FALSE; +/* + Sub system type for InnoDB redo log crypto. + Set and used to validate crypto msg. +*/ +static const byte redo_log_purpose_byte = 0x02; +/* Plain text used by AES_ECB to generate redo log crypt key. */ +byte redo_log_crypt_msg[MY_AES_BLOCK_SIZE] = {0}; +/* IV to concatenate with counter used by AES_CTR for redo log + * encryption/decryption. */ +byte aes_ctr_nonce[MY_AES_BLOCK_SIZE] = {0}; + +/*********************************************************************//** +Generate a 128-bit value used to generate crypt key for redo log. +It is generated via the concatenation of 1 purpose byte (0x02) and 15-byte +random number. +Init AES-CTR iv/nonce with random number. +It is called when: +- redo logs do not exist when start up, or +- transition from without crypto. +Note: +We should not use flags and conditions such as: + (srv_encrypt_log && + debug_use_static_keys && + GetLatestCryptoKeyVersion() == UNENCRYPTED_KEY_VER) +because they haven't been read and set yet in the situation of resetting +redo logs. +*/ +UNIV_INTERN +void +log_init_crypt_msg_and_nonce(void) +/*==============================*/ +{ +#ifndef HAVE_EncryptAes128Ctr + return; +#else + mach_write_to_1(redo_log_crypt_msg, redo_log_purpose_byte); + if (my_random_bytes(redo_log_crypt_msg + 1, PURPOSE_BYTE_LEN) != AES_OK) + { + fprintf(stderr, + "\nInnodb redo log crypto: generate " + "%u-byte random number as crypto msg failed.\n", + PURPOSE_BYTE_LEN); + abort(); + } + + if (my_random_bytes(aes_ctr_nonce, MY_AES_BLOCK_SIZE) != AES_OK) + { + fprintf(stderr, + "\nInnodb redo log crypto: generate " + "%u-byte random number as AES_CTR nonce failed.\n", + MY_AES_BLOCK_SIZE); + abort(); + } +#endif +} + +/*********************************************************************//** +Generate crypt key from crypt msg. */ +UNIV_INTERN +void +log_init_crypt_key( +/*===============*/ + const byte* crypt_msg, /*< in: crypt msg */ + const uint crypt_ver, /*< in: key version */ + byte* key) /*< out: crypt key*/ +{ +#ifndef HAVE_EncryptAes128Ctr + return; +#else + if (crypt_ver == UNENCRYPTED_KEY_VER) + { + fprintf(stderr, "\nInnodb redo log crypto: unencrypted key ver.\n\n"); + memset(key, 0, MY_AES_BLOCK_SIZE); + return; + } + + if (crypt_msg[PURPOSE_BYTE_OFFSET] != redo_log_purpose_byte) + { + fprintf(stderr, + "\nInnodb redo log crypto: msg type mismatched. " + "Expected: %x; Actual: %x\n", + redo_log_purpose_byte, crypt_msg[PURPOSE_BYTE_OFFSET]); + abort(); + } + + byte mysqld_key[MY_AES_BLOCK_SIZE] = {0}; + if (GetCryptoKey(crypt_ver, mysqld_key, MY_AES_BLOCK_SIZE)) + { + fprintf(stderr, + "\nInnodb redo log crypto: getting mysqld crypto key " + "from key version failed.\n"); + abort(); + } + + int dst_len; + int rc = EncryptAes128Ecb(mysqld_key, //key + crypt_msg, MY_AES_BLOCK_SIZE, //src, srclen + key, &dst_len); //dst, &dstlen + if (rc != AES_OK || dst_len != MY_AES_BLOCK_SIZE) + { + fprintf(stderr, + "\nInnodb redo log crypto: getting redo log crypto key " + "failed.\n"); + abort(); + } +#endif +} + +/*********************************************************************//** +Get a log block's start lsn. +@return a log block's start lsn */ +static inline +lsn_t +log_block_get_start_lsn( +/*====================*/ + lsn_t lsn, /*!< in: checkpoint lsn */ + ulint log_block_no) /*!< in: log block number */ +{ + lsn_t start_lsn = + (lsn & (lsn_t)0xffffffff00000000ULL) | + (((log_block_no - 1) & (lsn_t)0x3fffffff) << 9); + return start_lsn; +} + +/*********************************************************************//** +Call AES CTR to encrypt/decrypt log blocks. */ +static +Crypt_result +log_blocks_crypt( +/*=============*/ + const byte* block, /*!< in: blocks before encrypt/decrypt*/ + const ulint size, /*!< in: size of block, must be multiple of a log block*/ + byte* dst_block, /*!< out: blocks after encrypt/decrypt */ + const bool is_encrypt) /*!< in: encrypt or decrypt*/ +{ +#ifndef HAVE_EncryptAes128Ctr + ut_error; // We should never be called +#else + byte *log_block = (byte*)block; + Crypt_result rc = AES_OK; + int src_len, dst_len; + byte aes_ctr_counter[MY_AES_BLOCK_SIZE]; + ulint log_block_no, log_block_start_lsn; + byte *key; + ulint lsn; + if (is_encrypt) + { + ut_a(log_sys && log_sys->redo_log_crypt_ver != UNENCRYPTED_KEY_VER); + key = (byte *)(log_sys->redo_log_crypt_key); + lsn = log_sys->lsn; + + } else { + ut_a(recv_sys && recv_sys->recv_log_crypt_ver != UNENCRYPTED_KEY_VER); + key = (byte *)(recv_sys->recv_log_crypt_key); + lsn = srv_start_lsn; + } + ut_a(size % OS_FILE_LOG_BLOCK_SIZE == 0); + src_len = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE; + for (ulint i = 0; i < size ; i += OS_FILE_LOG_BLOCK_SIZE) + { + log_block_no = log_block_get_hdr_no(log_block); + log_block_start_lsn = log_block_get_start_lsn(lsn, log_block_no); + + // Assume log block header is not encrypted + memcpy(dst_block, log_block, LOG_BLOCK_HDR_SIZE); + + // aes_ctr_counter = nonce(3-byte) + start lsn to a log block + // (8-byte) + lbn (4-byte) + abn + // (1-byte, only 5 bits are used). "+" means concatenate. + bzero(aes_ctr_counter, MY_AES_BLOCK_SIZE); + memcpy(aes_ctr_counter, &aes_ctr_nonce, 3); + mach_write_to_8(aes_ctr_counter + 3, log_block_start_lsn); + mach_write_to_4(aes_ctr_counter + 11, log_block_no); + bzero(aes_ctr_counter + 15, 1); + rc = EncryptAes128Ctr(key, aes_ctr_counter, MY_AES_BLOCK_SIZE, // key, counter, block size + log_block + LOG_BLOCK_HDR_SIZE, src_len, // src, src_len + dst_block + LOG_BLOCK_HDR_SIZE, &dst_len); // dst, dst_len + ut_a(rc == CRYPT_OK); + ut_a(dst_len == src_len); + log_block += OS_FILE_LOG_BLOCK_SIZE; + dst_block += OS_FILE_LOG_BLOCK_SIZE; + } + + return rc; +#endif +} + +/*********************************************************************//** +Encrypt log blocks. */ +UNIV_INTERN +Crypt_result +log_blocks_encrypt( +/*===============*/ + const byte* block, /*!< in: blocks before encryption */ + const ulint size, /*!< in: size of blocks, must be multiple of a log block */ + byte* dst_block) /*!< out: blocks after encryption */ +{ + return log_blocks_crypt(block, size, dst_block, true); +} + +/*********************************************************************//** +Decrypt log blocks. */ +UNIV_INTERN +Crypt_result +log_blocks_decrypt( +/*===============*/ + const byte* block, /*!< in: blocks before decryption */ + const ulint size, /*!< in: size of blocks, must be multiple of a log block */ + byte* dst_block) /*!< out: blocks after decryption */ +{ + return log_blocks_crypt(block, size, dst_block, false); +} + +/*********************************************************************//** +Set next checkpoint's key version to latest one, and generate current +key. Key version 0 means no encryption. */ +UNIV_INTERN +void +log_crypt_set_ver_and_key( +/*======================*/ + uint& key_ver, /*!< out: latest key version */ + byte* crypt_key) /*!< out: crypto key */ +{ + if (!srv_encrypt_log || + (key_ver = GetLatestCryptoKeyVersion()) == UNENCRYPTED_KEY_VER) + { + key_ver = UNENCRYPTED_KEY_VER; + memset(crypt_key, 0, MY_AES_BLOCK_SIZE); + return; + } + log_init_crypt_key(redo_log_crypt_msg, key_ver, crypt_key); +} + +/*********************************************************************//** +Writes the crypto (version, msg and iv) info, which has been used for +log blocks with lsn <= this checkpoint's lsn, to a log header's +checkpoint buf. */ +UNIV_INTERN +void +log_crypt_write_checkpoint_buf( +/*===========================*/ + byte* buf) /*!< in/out: checkpoint buffer */ +{ + ut_a(log_sys); + mach_write_to_4(buf + LOG_CRYPT_VER, log_sys->redo_log_crypt_ver); + if (!srv_encrypt_log || + log_sys->redo_log_crypt_ver == UNENCRYPTED_KEY_VER) { + memset(buf + LOG_CRYPT_MSG, 0, MY_AES_BLOCK_SIZE); + memset(buf + LOG_CRYPT_IV, 0, MY_AES_BLOCK_SIZE); + return; + } + ut_a(redo_log_crypt_msg[PURPOSE_BYTE_OFFSET] == redo_log_purpose_byte); + memcpy(buf + LOG_CRYPT_MSG, redo_log_crypt_msg, MY_AES_BLOCK_SIZE); + memcpy(buf + LOG_CRYPT_IV, aes_ctr_nonce, MY_AES_BLOCK_SIZE); +} diff --git a/storage/xtradb/log/log0log.cc b/storage/xtradb/log/log0log.cc index 031926fd91f..9b33c7858bc 100644 --- a/storage/xtradb/log/log0log.cc +++ b/storage/xtradb/log/log0log.cc @@ -90,6 +90,10 @@ UNIV_INTERN log_t* log_sys = NULL; UNIV_INTERN log_checksum_func_t log_checksum_algorithm_ptr = log_block_calc_checksum_innodb; +/* Next log block number to do dummy record filling if no log records written +for a while */ +static ulint next_lbn_to_pad = 0; + #ifdef UNIV_PFS_RWLOCK UNIV_INTERN mysql_pfs_key_t checkpoint_lock_key; # ifdef UNIV_LOG_ARCHIVE @@ -629,10 +633,9 @@ function_exit: return(lsn); } -#ifdef UNIV_LOG_ARCHIVE /******************************************************//** Pads the current log block full with dummy log records. Used in producing -consistent archived log files. */ +consistent archived log files and scrubbing redo log. */ static void log_pad_current_log_block(void) @@ -667,7 +670,6 @@ log_pad_current_log_block(void) ut_a(lsn % OS_FILE_LOG_BLOCK_SIZE == LOG_BLOCK_HDR_SIZE); } -#endif /* UNIV_LOG_ARCHIVE */ /******************************************************//** Calculates the data capacity of a log group, when the log file headers are not @@ -1003,6 +1005,7 @@ log_init(void) /*----------------------------*/ log_sys->next_checkpoint_no = 0; + log_sys->redo_log_crypt_ver = UNENCRYPTED_KEY_VER; log_sys->last_checkpoint_lsn = log_sys->lsn; log_sys->n_pending_checkpoint_writes = 0; @@ -1048,7 +1051,7 @@ log_init(void) log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE); log_sys->buf_free = LOG_BLOCK_HDR_SIZE; - log_sys->lsn = LOG_START_LSN + LOG_BLOCK_HDR_SIZE; + log_sys->lsn = LOG_START_LSN + LOG_BLOCK_HDR_SIZE; // TODO(minliz): ensure various LOG_START_LSN? MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE, log_sys->lsn - log_sys->last_checkpoint_lsn); @@ -1401,6 +1404,36 @@ log_block_store_checksum( } /******************************************************//** +Encrypt one or more log block before it is flushed to disk +@return true if encryption succeeds. */ +static +bool +log_group_encrypt_before_write( +/*===========================*/ + const log_group_t* group, /*!< in: log group to be flushed */ + byte* block, /*!< in/out: pointer to a log block */ + const ulint size) /*!< in: size of log blocks */ + +{ + Crypt_result result = AES_OK; + + ut_ad(size % OS_FILE_LOG_BLOCK_SIZE == 0); + byte* dst_frame = (byte*)malloc(size); + + //encrypt log blocks content + result = log_blocks_encrypt(block, size, dst_frame); + + if (result == AES_OK) + { + ut_ad(block[0] == dst_frame[0]); + memcpy(block, dst_frame, size); + } + free(dst_frame); + + return (result == AES_OK); +} + +/******************************************************//** Writes a buffer to a log file group. */ UNIV_INTERN void @@ -1506,6 +1539,15 @@ loop: ut_a(next_offset / UNIV_PAGE_SIZE <= ULINT_MAX); + if (srv_encrypt_log && + log_sys->redo_log_crypt_ver != UNENCRYPTED_KEY_VER && + !log_group_encrypt_before_write(group, buf, write_len)) + { + fprintf(stderr, + "\nInnodb redo log encryption failed.\n"); + abort(); + } + fil_io(OS_FILE_WRITE | OS_FILE_LOG, true, group->space_id, 0, (ulint) (next_offset / UNIV_PAGE_SIZE), (ulint) (next_offset % UNIV_PAGE_SIZE), write_len, buf, @@ -2023,6 +2065,8 @@ log_group_checkpoint( mach_write_to_8(buf + LOG_CHECKPOINT_NO, log_sys->next_checkpoint_no); mach_write_to_8(buf + LOG_CHECKPOINT_LSN, log_sys->next_checkpoint_lsn); + log_crypt_write_checkpoint_buf(buf); + lsn_offset = log_group_calc_lsn_offset(log_sys->next_checkpoint_lsn, group); mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET_LOW32, @@ -2141,6 +2185,8 @@ log_reset_first_header_and_checkpoint( mach_write_to_8(buf + LOG_CHECKPOINT_NO, 0); mach_write_to_8(buf + LOG_CHECKPOINT_LSN, lsn); + log_crypt_write_checkpoint_buf(buf); + mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET_LOW32, LOG_FILE_HDR_SIZE + LOG_BLOCK_HDR_SIZE); mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET_HIGH32, 0); @@ -2295,7 +2341,6 @@ log_checkpoint( } log_sys->next_checkpoint_lsn = oldest_lsn; - #ifdef UNIV_DEBUG if (log_debug_writes) { fprintf(stderr, "Making checkpoint no " @@ -2307,6 +2352,10 @@ log_checkpoint( log_groups_write_checkpoint_info(); + /* generate key version and key used to encrypt next log block */ + log_crypt_set_ver_and_key(log_sys->redo_log_crypt_ver, + log_sys->redo_log_crypt_key); + MONITOR_INC(MONITOR_NUM_CHECKPOINT); mutex_exit(&(log_sys->mutex)); @@ -2506,6 +2555,33 @@ loop: } /******************************************************//** +Decrypt a specified log segment after they are read from a log file to a buffer. +@return true if decryption succeeds. */ +static +bool +log_group_decrypt_after_read( +/*==========================*/ + const log_group_t* group, /*!< in: log group to be read from */ + byte* frame, /*!< in/out: log segment */ + const ulint size) /*!< in: log segment size */ +{ + Crypt_result result; + ut_ad(size % OS_FILE_LOG_BLOCK_SIZE == 0); + byte* dst_frame = (byte*)malloc(size); + + // decrypt log blocks content + result = log_blocks_decrypt(frame, size, dst_frame); + + if (result == AES_OK) + { + memcpy(frame, dst_frame, size); + } + free(dst_frame); + + return (result == AES_OK); +} + +/******************************************************//** Reads a specified log segment to a buffer. Optionally releases the log mutex before the I/O. */ UNIV_INTERN @@ -2565,6 +2641,13 @@ loop: (ulint) (source_offset % UNIV_PAGE_SIZE), len, buf, (type == LOG_ARCHIVE) ? &log_archive_io : NULL, 0); + if (recv_sys->recv_log_crypt_ver != UNENCRYPTED_KEY_VER && + !log_group_decrypt_after_read(group, buf, len)) + { + fprintf(stderr, "Innodb redo log decryption failed.\n"); + abort(); + } + start_lsn += len; buf += len; @@ -2857,6 +2940,14 @@ loop: MONITOR_INC(MONITOR_LOG_IO); + if (srv_encrypt_log && + log_sys->redo_log_crypt_ver != UNENCRYPTED_KEY_VER && + !log_group_encrypt_before_write(group, buf, len)) + { + fprintf(stderr, "Innodb redo log encryption failed.\n"); + abort(); + } + fil_io(OS_FILE_WRITE | OS_FILE_LOG, false, group->archive_space_id, 0, (ulint) (next_offset / UNIV_PAGE_SIZE), @@ -4074,4 +4165,62 @@ log_mem_free(void) log_sys = NULL; } } + +/** Event to wake up the log scrub thread */ +UNIV_INTERN os_event_t log_scrub_event = NULL; + +UNIV_INTERN ibool srv_log_scrub_thread_active = FALSE; + +/*****************************************************************//* +If no log record has been written for a while, fill current log +block with dummy records. */ +static +void +log_scrub() +/*=========*/ +{ + ulint cur_lbn = log_block_convert_lsn_to_no(log_sys->lsn); + if (next_lbn_to_pad == cur_lbn) + { + log_pad_current_log_block(); + } + next_lbn_to_pad = log_block_convert_lsn_to_no(log_sys->lsn); +} + +/* log scrubbing interval in ms. */ +UNIV_INTERN ulonglong innodb_scrub_log_interval; + +/*****************************************************************//** +This is the main thread for log scrub. It waits for an event and +when waked up fills current log block with dummy records and +sleeps again. +@return this function does not return, it calls os_thread_exit() */ +extern "C" UNIV_INTERN +os_thread_ret_t +DECLARE_THREAD(log_scrub_thread)( +/*===============================*/ + void* arg __attribute__((unused))) /*!< in: a dummy parameter + required by os_thread_create */ +{ + ut_ad(!srv_read_only_mode); + + srv_log_scrub_thread_active = TRUE; + + while(srv_shutdown_state == SRV_SHUTDOWN_NONE) + { + os_event_wait_time(log_scrub_event, innodb_scrub_log_interval * 1000); + + log_scrub(); + + os_event_reset(log_scrub_event); + } + + srv_log_scrub_thread_active = FALSE; + + /* We count the number of threads in os_thread_exit(). A created + thread should always use that to exit and not use return() to exit. */ + os_thread_exit(NULL); + + OS_THREAD_DUMMY_RETURN; +} #endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/log/log0recv.cc b/storage/xtradb/log/log0recv.cc index 7bda744704b..3491e246ba1 100644 --- a/storage/xtradb/log/log0recv.cc +++ b/storage/xtradb/log/log0recv.cc @@ -699,8 +699,9 @@ recv_synchronize_groups( recovered_lsn */ log_group_set_fields(group, recovered_lsn); - } + ut_a(log_sys); + } /* Copy the checkpoint info to the groups; remember that we have incremented checkpoint_no by one, and the info will not be written over the max checkpoint info, thus making the preservation of max @@ -1209,7 +1210,9 @@ recv_parse_or_apply_log_rec_body( + 0 /*FLST_PREV*/ || offs == PAGE_BTR_IBUF_FREE_LIST_NODE + PAGE_HEADER + FIL_ADDR_PAGE - + FIL_ADDR_SIZE /*FLST_NEXT*/); + + FIL_ADDR_SIZE /*FLST_NEXT*/ + || offs == + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); break; } } @@ -1437,6 +1440,9 @@ recv_parse_or_apply_log_rec_body( ptr, end_ptr, page, page_zip, index); } break; + case MLOG_FILE_WRITE_CRYPT_DATA: + ptr = fil_parse_write_crypt_data(ptr, end_ptr, block); + break; default: ptr = NULL; recv_sys->found_corrupt_log = TRUE; @@ -3090,6 +3096,7 @@ recv_recovery_from_checkpoint_start_func( ulint log_hdr_log_block_size; lsn_t checkpoint_lsn; ib_uint64_t checkpoint_no; + uint recv_crypt_ver; lsn_t group_scanned_lsn = 0; lsn_t contiguous_lsn; #ifdef UNIV_LOG_ARCHIVE @@ -3154,6 +3161,14 @@ recv_recovery_from_checkpoint_start_func( #ifdef UNIV_LOG_ARCHIVE archived_lsn = mach_read_from_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN); #endif /* UNIV_LOG_ARCHIVE */ + recv_crypt_ver = mach_read_from_4(buf + LOG_CRYPT_VER); + if (recv_crypt_ver == UNENCRYPTED_KEY_VER) + { + log_init_crypt_msg_and_nonce(); + } else { + ut_memcpy(redo_log_crypt_msg, buf + LOG_CRYPT_MSG, MY_AES_BLOCK_SIZE); + ut_memcpy(aes_ctr_nonce, buf + LOG_CRYPT_IV, MY_AES_BLOCK_SIZE); + } /* Read the first log file header to print a note if this is a recovery from a restored InnoDB Hot Backup */ @@ -3232,7 +3247,10 @@ recv_recovery_from_checkpoint_start_func( recv_sys->scanned_lsn = checkpoint_lsn; recv_sys->scanned_checkpoint_no = 0; recv_sys->recovered_lsn = checkpoint_lsn; - + recv_sys->recv_log_crypt_ver = recv_crypt_ver; + log_init_crypt_key(redo_log_crypt_msg, + recv_sys->recv_log_crypt_ver, + recv_sys->recv_log_crypt_key); srv_start_lsn = checkpoint_lsn; } @@ -3315,7 +3333,6 @@ recv_recovery_from_checkpoint_start_func( group = UT_LIST_GET_NEXT(log_groups, group); } - /* Done with startup scan. Clear the flag. */ recv_log_scan_is_startup_type = FALSE; if (TYPE_CHECKPOINT) { @@ -3403,6 +3420,8 @@ recv_recovery_from_checkpoint_start_func( log_sys->next_checkpoint_lsn = checkpoint_lsn; log_sys->next_checkpoint_no = checkpoint_no + 1; + log_crypt_set_ver_and_key(log_sys->redo_log_crypt_ver, + log_sys->redo_log_crypt_key); #ifdef UNIV_LOG_ARCHIVE log_sys->archived_lsn = archived_lsn; @@ -3433,6 +3452,8 @@ recv_recovery_from_checkpoint_start_func( log_sys->lsn - log_sys->last_checkpoint_lsn); log_sys->next_checkpoint_no = checkpoint_no + 1; + log_crypt_set_ver_and_key(log_sys->redo_log_crypt_ver, + log_sys->redo_log_crypt_key); #ifdef UNIV_LOG_ARCHIVE if (archived_lsn == LSN_MAX) { @@ -3634,6 +3655,16 @@ recv_reset_logs( log_sys->next_checkpoint_no = 0; log_sys->last_checkpoint_lsn = 0; + /* redo_log_crypt_ver will be set by log_checkpoint() to the + latest key version. */ + log_sys->redo_log_crypt_ver = UNENCRYPTED_KEY_VER; + /* + Note: flags (srv_encrypt_log and debug_use_static_keys) + haven't been read and set yet! + So don't use condition such as: + if (srv_encrypt_log && debug_use_static_keys) + */ + log_init_crypt_msg_and_nonce(); #ifdef UNIV_LOG_ARCHIVE log_sys->archived_lsn = log_sys->lsn; @@ -4112,4 +4143,3 @@ byte* recv_dblwr_t::find_page(ulint space_id, ulint page_no) return(result); } - diff --git a/storage/xtradb/mtr/mtr0log.cc b/storage/xtradb/mtr/mtr0log.cc index 0660c819240..db39ebeafa4 100644 --- a/storage/xtradb/mtr/mtr0log.cc +++ b/storage/xtradb/mtr/mtr0log.cc @@ -75,7 +75,7 @@ mlog_write_initial_log_record( { byte* log_ptr; - ut_ad(type <= MLOG_BIGGEST_TYPE); + ut_ad(type <= MLOG_BIGGEST_TYPE || EXTRA_CHECK_MLOG_NUMBER(type)); ut_ad(type > MLOG_8BYTES); log_ptr = mlog_open(mtr, 11); @@ -111,7 +111,7 @@ mlog_parse_initial_log_record( } *type = (byte)((ulint)*ptr & ~MLOG_SINGLE_REC_FLAG); - ut_ad(*type <= MLOG_BIGGEST_TYPE); + ut_ad(*type <= MLOG_BIGGEST_TYPE || EXTRA_CHECK_MLOG_NUMBER(*type)); ptr++; @@ -150,8 +150,6 @@ mlog_parse_nbytes( ib_uint64_t dval; ut_a(type <= MLOG_8BYTES); - ut_a(!page || !page_zip || fil_page_get_type(page) != FIL_PAGE_INDEX); - if (end_ptr < ptr + 2) { return(NULL); @@ -160,6 +158,11 @@ mlog_parse_nbytes( offset = mach_read_from_2(ptr); ptr += 2; + ut_a(!page || !page_zip || fil_page_get_type(page) != FIL_PAGE_INDEX || + /* scrubbing changes page type from FIL_PAGE_INDEX to + * FIL_PAGE_TYPE_ALLOCATED (rest of this assertion is below) */ + (type == MLOG_2BYTES && offset == FIL_PAGE_TYPE)); + if (offset >= UNIV_PAGE_SIZE) { recv_sys->found_corrupt_log = TRUE; @@ -219,6 +222,14 @@ mlog_parse_nbytes( } mach_write_to_2(page + offset, val); } + ut_a(!page || !page_zip || + fil_page_get_type(page) != FIL_PAGE_INDEX || + /* scrubbing changes page type from FIL_PAGE_INDEX to + * FIL_PAGE_TYPE_ALLOCATED */ + (type == MLOG_2BYTES && + offset == FIL_PAGE_TYPE && + val == FIL_PAGE_TYPE_ALLOCATED)); + break; case MLOG_4BYTES: if (page) { diff --git a/storage/xtradb/mtr/mtr0mtr.cc b/storage/xtradb/mtr/mtr0mtr.cc index a4877f175a1..4010fcce7ed 100644 --- a/storage/xtradb/mtr/mtr0mtr.cc +++ b/storage/xtradb/mtr/mtr0mtr.cc @@ -440,3 +440,36 @@ mtr_print( } # endif /* !UNIV_HOTBACKUP */ #endif /* UNIV_DEBUG */ + +/**********************************************************//** +Releases a buf_page stored in an mtr memo after a +savepoint. */ +UNIV_INTERN +void +mtr_release_buf_page_at_savepoint( +/*=============================*/ + mtr_t* mtr, /*!< in: mtr */ + ulint savepoint, /*!< in: savepoint */ + buf_block_t* block) /*!< in: block to release */ +{ + mtr_memo_slot_t* slot; + dyn_array_t* memo; + + ut_ad(mtr); + ut_ad(mtr->magic_n == MTR_MAGIC_N); + ut_ad(mtr->state == MTR_ACTIVE); + + memo = &(mtr->memo); + + ut_ad(dyn_array_get_data_size(memo) > savepoint); + + slot = (mtr_memo_slot_t*) dyn_array_get_element(memo, savepoint); + + ut_ad(slot->object == block); + ut_ad(slot->type == MTR_MEMO_PAGE_S_FIX || + slot->type == MTR_MEMO_PAGE_X_FIX || + slot->type == MTR_MEMO_BUF_FIX); + + buf_page_release((buf_block_t*) slot->object, slot->type); + slot->object = NULL; +} diff --git a/storage/xtradb/os/os0file.cc b/storage/xtradb/os/os0file.cc index 6b9b2270cef..13bafeb048b 100644 --- a/storage/xtradb/os/os0file.cc +++ b/storage/xtradb/os/os0file.cc @@ -43,7 +43,9 @@ Created 10/21/1995 Heikki Tuuri #include "srv0srv.h" #include "srv0start.h" #include "fil0fil.h" +#include "fsp0fsp.h" #include "fil0pagecompress.h" +#include "fil0pageencryption.h" #include "buf0buf.h" #include "btr0types.h" #include "trx0trx.h" @@ -204,7 +206,7 @@ struct os_aio_slot_t{ #ifdef WIN_ASYNC_IO OVERLAPPED control; /*!< Windows control block for the aio request, MUST be first element in the structure*/ - void *arr; /*!< Array this slot belongs to*/ + void *arr; /*!< Array this slot belongs to*/ #endif ibool is_read; /*!< TRUE if a read operation */ @@ -237,9 +239,17 @@ struct os_aio_slot_t{ freed after the write has been completed */ + byte* page_encryption_page; /*!< Memory allocated for + page encrypted page and + freed after the write + has been completed */ + ibool page_compression; ulint page_compression_level; + ibool page_encryption; + ulint page_encryption_key; + ulint* write_size; /*!< Actual write size initialized after fist successfull trim operation for this page and if @@ -250,9 +260,15 @@ struct os_aio_slot_t{ page compressed pages, do not free this */ - ibool page_compress_success; - /*!< TRUE if page compression was - successfull, false if not */ + byte* page_buf2; /*!< Actual page buffer for + page encrypted pages, do not + free this */ + byte* tmp_encryption_buf; /*!< a temporal buffer used by page encryption */ + + ibool page_compression_success; + ibool page_encryption_success; + /*!< TRUE if page compression was + successfull, false if not */ ulint file_block_size;/*!< file block size */ @@ -398,6 +414,19 @@ os_slot_alloc_lzo_mem( os_aio_slot_t* slot); /*!< in: slot structure */ #endif +/**********************************************************************//** +Allocate memory for temporal buffer used for page encryption. This +buffer is freed later. */ +UNIV_INTERN +void +os_slot_alloc_page_buf2( + os_aio_slot_t* slot); /*!< in: slot structure */ +/**********************************************************************//** +Allocate memory for temporal buffer used for page encryption. */ +UNIV_INTERN +void +os_slot_alloc_tmp_encryption_buf( + os_aio_slot_t* slot); /*!< in: slot structure */ /****************************************************************//** Does error handling when a file operation fails. @return TRUE if we should retry the operation */ @@ -508,19 +537,19 @@ os_get_os_version(void) /* Windows : Handling synchronous IO on files opened asynchronously. -If file is opened for asynchronous IO (FILE_FLAG_OVERLAPPED) and also bound to +If file is opened for asynchronous IO (FILE_FLAG_OVERLAPPED) and also bound to a completion port, then every IO on this file would normally be enqueued to the completion port. Sometimes however we would like to do a synchronous IO. This is possible if we initialitze have overlapped.hEvent with a valid event and set its lowest order bit to 1 (see MSDN ReadFile and WriteFile description for more info) -We'll create this special event once for each thread and store in thread local +We'll create this special event once for each thread and store in thread local storage. */ /***********************************************************************//** -Initialize tls index.for event handle used for synchronized IO on files that +Initialize tls index.for event handle used for synchronized IO on files that might be opened with FILE_FLAG_OVERLAPPED. */ static void win_init_syncio_event() @@ -3119,6 +3148,21 @@ try_again: os_mutex_exit(os_file_count_mutex); if (ret && len == n) { + /* If page is encrypted we need to decrypt it first */ + if (fil_page_is_compressed_encrypted((byte *)buf) || + fil_page_is_encrypted((byte *)buf)) { + + byte *dst_frm = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE)); + // Decrypt the data + fil_space_decrypt((fil_space_crypt_t* ) NULL, + (byte *)buf, + n, + dst_frm); + // Copy decrypted buffer back to buf + memcpy(buf, dst_frm, n); + ut_free(dst_frm); + } + /* Note that InnoDB writes files that are not formated as file spaces and they do not have FIL_PAGE_TYPE field, thus we must use here information is the actual @@ -3139,6 +3183,20 @@ try_again: ret = os_file_pread(file, buf, n, offset, trx); if ((ulint) ret == n) { + /* If page is encrypted we need to decrypt it first */ + if (fil_page_is_compressed_encrypted((byte *)buf) || + fil_page_is_encrypted((byte *)buf)) { + + byte * dst_frm = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE)); + // Decrypt the data + fil_space_decrypt((fil_space_crypt_t*) NULL, + (byte *)buf, + n, + dst_frm); + // Copy decrypted buffer back to buf + memcpy(buf, dst_frm, n); + ut_free(dst_frm); + } /* Note that InnoDB writes files that are not formated as file spaces and they do not have FIL_PAGE_TYPE @@ -3236,6 +3294,19 @@ try_again: os_mutex_exit(os_file_count_mutex); if (ret && len == n) { + /* If page is encrypted we need to decrypt it first */ + if (fil_page_is_compressed_encrypted((byte *)buf) || + fil_page_is_encrypted((byte *)buf)) { + byte * dst_frm = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE)); + // Decrypt the data + fil_space_decrypt((fil_space_crypt_t* ) NULL, + (byte *)buf, + n, + dst_frm); + // Copy decrypted buffer back to buf + memcpy(buf, dst_frm, n); + ut_free(dst_frm); + } /* Note that InnoDB writes files that are not formated as file spaces and they do not have FIL_PAGE_TYPE @@ -3257,6 +3328,19 @@ try_again: ret = os_file_pread(file, buf, n, offset, NULL); if ((ulint) ret == n) { + /* If the page is encrypted we need to decrypt it first */ + if (fil_page_is_compressed_encrypted((byte *)buf) || + fil_page_is_encrypted((byte *)buf)) { + byte * dst_frm = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE)); + // Decrypt the data + fil_space_decrypt((fil_space_crypt_t* ) NULL, + (byte *)buf, + n, + dst_frm); + // Copy decrypted buffer back to buf + memcpy(buf, dst_frm, n); + ut_free(dst_frm); + } /* Note that InnoDB writes files that are not formated as file spaces and they do not have FIL_PAGE_TYPE @@ -4286,6 +4370,7 @@ os_aio_array_free( for (i = 0; i < array->n_slots; i++) { os_aio_slot_t* slot = os_aio_array_get_nth_slot(array, i); + if (slot->page_compression_page) { ut_free(slot->page_compression_page); slot->page_compression_page = NULL; @@ -4295,8 +4380,19 @@ os_aio_array_free( ut_free(slot->lzo_mem); slot->lzo_mem = NULL; } + + if (slot->page_encryption_page) { + ut_free(slot->page_encryption_page); + slot->page_encryption_page = NULL; + } + + if (slot->tmp_encryption_buf) { + ut_free(slot->tmp_encryption_buf); + slot->tmp_encryption_buf = NULL; + } } + ut_free(array->slots); ut_free(array); @@ -4646,8 +4742,12 @@ os_aio_array_reserve_slot( on this file space */ ulint page_compression_level, /*!< page compression level to be used */ + ibool page_encryption, /*!< in: is page encryption used + on this file space */ + ulint page_encryption_key, /*!< page encryption key + to be used */ ulint* write_size)/*!< in/out: Actual write size initialized - after fist successfull trim + after first successfull trim operation for this page and if initialized we do not trim again if actual page size does not decrease. */ @@ -4742,10 +4842,14 @@ found: slot->io_already_done = FALSE; slot->space_id = space_id; - slot->page_compress_success = FALSE; + slot->page_compression_success = FALSE; + slot->page_encryption_success = FALSE; + slot->write_size = write_size; slot->page_compression_level = page_compression_level; slot->page_compression = page_compression; + slot->page_encryption_key = page_encryption_key; + slot->page_encryption = page_encryption; if (message1) { slot->file_block_size = fil_node_get_block_size(message1); @@ -4786,15 +4890,42 @@ found: len = real_len; buf = slot->page_buf; slot->len = real_len; - slot->page_compress_success = TRUE; + slot->page_compression_success = TRUE; } else { - slot->page_compress_success = FALSE; + slot->page_compression_success = FALSE; } /* Take array mutex back */ os_mutex_enter(array->mutex); } + /* If the space is page encryption and this is write operation + then we encrypt the page */ + if (message1 && type == OS_FILE_WRITE && page_encryption) { + /* Release the array mutex while encrypting */ + os_mutex_exit(array->mutex); + + // We allocate memory for page encrypted buffer if and only + // if it is not yet allocated. + os_slot_alloc_page_buf2(slot); + os_slot_alloc_tmp_encryption_buf(slot); + /* ctr not yet supported in xtradb, lsn is null*/ + fil_space_encrypt( + fil_node_get_space_id(slot->message1), + slot->offset, + 0, /* QQ: Needs to be fixed to slot->lsn */ + (byte *)buf, + slot->len, + slot->page_buf2, + slot->page_encryption_key); + + slot->page_encryption_success = TRUE; + buf = slot->page_buf2; + + /* Take array mutex back */ + os_mutex_enter(array->mutex); + } + #ifdef WIN_ASYNC_IO control = &slot->control; control->Offset = (DWORD) offset & 0xFFFFFFFF; @@ -5075,15 +5206,21 @@ os_aio_func( on this file space */ ulint page_compression_level, /*!< page compression level to be used */ - ulint* write_size)/*!< in/out: Actual write size initialized + ulint* write_size,/*!< in/out: Actual write size initialized after fist successfull trim operation for this page and if initialized we do not trim again if actual page size does not decrease. */ + ibool page_encryption, /*!< in: is page encryption used + on this file space */ + ulint page_encryption_key) /*!< page encryption key + to be used */ + { os_aio_array_t* array; os_aio_slot_t* slot; #ifdef WIN_ASYNC_IO + void* buffer = NULL; DWORD len = (DWORD) n; BOOL ret; #endif @@ -5098,6 +5235,7 @@ os_aio_func( ut_ad((n & 0xFFFFFFFFUL) == n); #endif + wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER; mode = mode & (~OS_AIO_SIMULATED_WAKE_LATER); @@ -5184,8 +5322,8 @@ try_again: slot = os_aio_array_reserve_slot(type, array, message1, message2, file, name, buf, offset, n, space_id, - page_compression, page_compression_level, write_size); - + page_compression, page_compression_level, + page_encryption, page_encryption_key, write_size); if (type == OS_FILE_READ) { if (srv_use_native_aio) { os_n_file_reads++; @@ -5213,7 +5351,16 @@ try_again: if (srv_use_native_aio) { os_n_file_writes++; #ifdef WIN_ASYNC_IO - ret = WriteFile(file, buf, (DWORD) n, &len, + if (page_encryption && slot->page_encryption_success) { + buffer = slot->page_buf2; + n = slot->len; + } else if (page_compression && slot->page_compression_success) { + buffer = slot->page_buf; + n = slot->len; + } else { + buffer = buf; + } + ret = WriteFile(file, buffer, (DWORD) n, &len, &(slot->control)); if(!ret && GetLastError() != ERROR_IO_PENDING) @@ -5296,7 +5443,7 @@ os_aio_windows_handle( HANDLE port = READ_SEGMENT(segment)? read_completion_port : completion_port; for(;;) { - ret = GetQueuedCompletionStatus(port, &len, &key, + ret = GetQueuedCompletionStatus(port, &len, &key, (OVERLAPPED **)&slot, INFINITE); /* If shutdown key was received, repost the shutdown message and exit */ @@ -5311,19 +5458,19 @@ os_aio_windows_handle( if(WRITE_SEGMENT(segment)&& slot->type == OS_FILE_READ) { /* - Redirect read completions to the dedicated completion port + Redirect read completions to the dedicated completion port and thread. We need to split read and write threads. If we do not - do that, and just allow all io threads process all IO, it is possible + do that, and just allow all io threads process all IO, it is possible to get stuck in a deadlock in buffer pool code, - Currently, the problem is solved this way - "write io" threads + Currently, the problem is solved this way - "write io" threads always get all completion notifications, from both async reads and writes. Write completion is handled in the same thread that gets it. Read completion is forwarded via PostQueueCompletionStatus()) to the second completion port dedicated solely to reads. One of the "read io" threads waiting on this port will finally handle the IO. - Forwarding IO completion this way costs a context switch , and this + Forwarding IO completion this way costs a context switch , and this seems tolerable since asynchronous reads are by far less frequent. */ ut_a(PostQueuedCompletionStatus(read_completion_port, len, key, @@ -5371,14 +5518,18 @@ os_aio_windows_handle( switch (slot->type) { case OS_FILE_WRITE: - if (slot->message1 && slot->page_compression && slot->page_compress_success && slot->page_buf) { - ret_val = os_file_write(slot->name, slot->file, slot->page_buf, - slot->offset, slot->len); - } else { - - ret_val = os_file_write(slot->name, slot->file, slot->buf, - slot->offset, slot->len); - } + if (slot->message1 && slot->page_encryption && slot->page_encryption_success) { + ret_val = os_file_write(slot->name, slot->file, slot->page_buf2, + slot->offset, slot->len); + } else { + if (slot->message1 && slot->page_compression && slot->page_compression_success) { + ret_val = os_file_write(slot->name, slot->file, slot->page_buf, + slot->offset, slot->len); + } else { + ret_val = os_file_write(slot->name, slot->file, slot->buf, + slot->offset, slot->len); + } + } break; case OS_FILE_READ: ret_val = os_file_read(slot->file, slot->buf, @@ -5408,6 +5559,22 @@ os_aio_windows_handle( } if (slot->type == OS_FILE_READ) { + if (fil_page_is_compressed_encrypted(slot->buf) || + fil_page_is_encrypted(slot->buf)) { + ut_ad(slot->message1 != NULL); + os_slot_alloc_page_buf2(slot); + os_slot_alloc_tmp_encryption_buf(slot); + + // Decrypt the data + fil_space_decrypt( + fil_node_get_space_id(slot->message1), + slot->buf, + slot->len, + slot->page_buf2); + // Copy decrypted buffer back to buf + memcpy(slot->buf, slot->page_buf2, slot->len); + } + if (fil_page_is_compressed(slot->buf)) { os_slot_alloc_page_buf(slot); @@ -5416,11 +5583,13 @@ os_aio_windows_handle( os_slot_alloc_lzo_mem(slot); } #endif - fil_decompress_page(slot->page_buf, slot->buf, slot->len, slot->write_size); + fil_decompress_page(slot->page_buf, slot->buf, + slot->len, slot->write_size); } } else { /* OS_FILE_WRITE */ - if (slot->page_compress_success && fil_page_is_compressed(slot->page_buf)) { + if (slot->page_compression_success && + fil_page_is_compressed(slot->page_buf)) { if (srv_use_trim && os_fallocate_failed == FALSE) { // Deallocate unused blocks from file system os_file_trim(slot); @@ -5518,9 +5687,26 @@ retry: ut_a(slot->pos < end_pos); if (slot->type == OS_FILE_READ) { - /* If the table is page compressed and this is read, - we decompress before we annouce the read is - complete. For writes, we free the compressed page. */ + /* If the page is page encrypted we decrypt */ + if (fil_page_is_compressed_encrypted(slot->buf) || + fil_page_is_encrypted(slot->buf)) { + os_slot_alloc_page_buf2(slot); + os_slot_alloc_tmp_encryption_buf(slot); + ut_ad(slot->message1 != NULL); + + // Decrypt the data + fil_space_decrypt(fil_node_get_space_id(slot->message1), + slot->buf, + slot->len, + slot->page_buf2); + // Copy decrypted buffer back to buf + memcpy(slot->buf, slot->page_buf2, slot->len); + } + + /* If the table is page compressed and this + is read, we decompress before we announce + the read is complete. For writes, we free + the compressed page. */ if (fil_page_is_compressed(slot->buf)) { // We allocate memory for page compressed buffer if and only // if it is not yet allocated. @@ -5534,7 +5720,7 @@ retry: } } else { /* OS_FILE_WRITE */ - if (slot->page_compress_success && + if (slot->page_compression_success && fil_page_is_compressed(slot->page_buf)) { ut_ad(slot->page_compression_page); if (srv_use_trim && os_fallocate_failed == FALSE) { @@ -6600,6 +6786,29 @@ os_file_trim( } /**********************************************************************//** +Allocate memory for temporal buffer used for page encryption. This +buffer is freed later. */ +UNIV_INTERN +void +os_slot_alloc_page_buf2( +/*===================*/ + os_aio_slot_t* slot) /*!< in: slot structure */ +{ + ut_a(slot != NULL); + + if(slot->page_buf2 == NULL) { + byte* cbuf2; + byte* cbuf; + + cbuf2 = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE*2)); + cbuf = static_cast<byte *>(ut_align(cbuf2, UNIV_PAGE_SIZE)); + slot->page_encryption_page = static_cast<byte *>(cbuf2); + slot->page_buf2 = static_cast<byte *>(cbuf); + memset(slot->page_encryption_page, 0, UNIV_PAGE_SIZE*2); + } +} + +/**********************************************************************//** Allocate memory for temporal buffer used for page compression. This buffer is freed later. */ UNIV_INTERN @@ -6608,18 +6817,18 @@ os_slot_alloc_page_buf( /*===================*/ os_aio_slot_t* slot) /*!< in: slot structure */ { - byte* cbuf2; - byte* cbuf; - ut_a(slot != NULL); - if (slot->page_compression_page == NULL) { - /* We allocate extra to avoid memory overwrite on compression */ + if (slot->page_buf == NULL) { + byte* cbuf2; + byte* cbuf; + /* We allocate extra to avoid memory overwrite on + compression */ cbuf2 = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE*2)); cbuf = static_cast<byte *>(ut_align(cbuf2, UNIV_PAGE_SIZE)); slot->page_compression_page = static_cast<byte *>(cbuf2); slot->page_buf = static_cast<byte *>(cbuf); - memset(slot->page_compression_page, 0, UNIV_PAGE_SIZE*2); ut_a(slot->page_buf != NULL); + memset(slot->page_compression_page, 0, UNIV_PAGE_SIZE*2); } } @@ -6636,12 +6845,27 @@ os_slot_alloc_lzo_mem( ut_a(slot != NULL); if(slot->lzo_mem == NULL) { slot->lzo_mem = static_cast<byte *>(ut_malloc(LZO1X_1_15_MEM_COMPRESS)); - memset(slot->lzo_mem, 0, LZO1X_1_15_MEM_COMPRESS); ut_a(slot->lzo_mem != NULL); + memset(slot->lzo_mem, 0, LZO1X_1_15_MEM_COMPRESS); } } #endif +/**********************************************************************//** +Allocate memory for temporal buffer used for page encryption. */ +UNIV_INTERN +void +os_slot_alloc_tmp_encryption_buf( +/*=============================*/ + os_aio_slot_t* slot) /*!< in: slot structure */ +{ + ut_a(slot != NULL); + if (slot->tmp_encryption_buf == NULL) { + slot->tmp_encryption_buf = static_cast<byte *>(ut_malloc(64)); + memset(slot->tmp_encryption_buf, 0, 64); + } +} + /***********************************************************************//** Try to get number of bytes per sector from file system. @return file block size */ diff --git a/storage/xtradb/page/page0page.cc b/storage/xtradb/page/page0page.cc index bd5fb36af8f..4aff88818bb 100644 --- a/storage/xtradb/page/page0page.cc +++ b/storage/xtradb/page/page0page.cc @@ -1087,7 +1087,9 @@ delete_all: last_rec = page_rec_get_prev(page_get_supremum_rec(page)); - if ((size == ULINT_UNDEFINED) || (n_recs == ULINT_UNDEFINED)) { + bool scrub = srv_immediate_scrub_data_uncompressed; + if ((size == ULINT_UNDEFINED) || (n_recs == ULINT_UNDEFINED) || + scrub) { rec_t* rec2 = rec; /* Calculate the sum of sizes and the number of records */ size = 0; @@ -1104,6 +1106,12 @@ delete_all: size += s; n_recs++; + if (scrub) { + /* scrub record */ + uint recsize = rec_offs_data_size(offsets); + memset(rec2, 0, recsize); + } + rec2 = page_rec_get_next(rec2); } while (!page_rec_is_supremum(rec2)); diff --git a/storage/xtradb/row/row0import.cc b/storage/xtradb/row/row0import.cc index c513320afc1..d5f766ef51b 100644 --- a/storage/xtradb/row/row0import.cc +++ b/storage/xtradb/row/row0import.cc @@ -1990,7 +1990,8 @@ PageConverter::update_header( } mach_write_to_8( - get_frame(block) + FIL_PAGE_FILE_FLUSH_LSN, m_current_lsn); + get_frame(block) + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, + m_current_lsn); /* Write space_id to the tablespace header, page 0. */ mach_write_to_4( diff --git a/storage/xtradb/row/row0mysql.cc b/storage/xtradb/row/row0mysql.cc index d4757a1de01..cddc4a94aca 100644 --- a/storage/xtradb/row/row0mysql.cc +++ b/storage/xtradb/row/row0mysql.cc @@ -3258,6 +3258,41 @@ run_again: return(err); } +static +void +fil_wait_crypt_bg_threads( + dict_table_t* table) +{ + uint start = time(0); + uint last = start; + + if (table->space != 0) { + fil_space_crypt_mark_space_closing(table->space); + } + + while (table->n_ref_count > 0) { + dict_mutex_exit_for_mysql(); + os_thread_sleep(20000); + dict_mutex_enter_for_mysql(); + uint now = time(0); + if (now >= last + 30) { + fprintf(stderr, + "WARNING: waited %u seconds " + "for ref-count on table: %s space: %u\n", + now - start, table->name, table->space); + last = now; + } + + if (now >= start + 300) { + fprintf(stderr, + "WARNING: after %u seconds, gave up waiting " + "for ref-count on table: %s space: %u\n", + now - start, table->name, table->space); + break; + } + } +} + /*********************************************************************//** Truncates a table for MySQL. @return error code or DB_SUCCESS */ @@ -4068,6 +4103,9 @@ row_drop_table_for_mysql( shouldn't have to. There should never be record locks on a table that is going to be dropped. */ + /* Wait on background threads to stop using table */ + fil_wait_crypt_bg_threads(table); + if (table->n_ref_count == 0) { lock_remove_all_on_table(table, TRUE); ut_a(table->n_rec_locks == 0); diff --git a/storage/xtradb/row/row0umod.cc b/storage/xtradb/row/row0umod.cc index 6a23e1a6c12..675d4fa3632 100644 --- a/storage/xtradb/row/row0umod.cc +++ b/storage/xtradb/row/row0umod.cc @@ -334,8 +334,15 @@ row_undo_mod_clust( } } - ut_ad(rec_get_trx_id(btr_pcur_get_rec(pcur), index) - == node->new_trx_id); + /** + * when scrubbing, and records gets cleared, + * the transaction id is not present afterwards. + * this is safe as: since the record is on free-list + * it can be reallocated at any time after this mtr-commits + * which is just below + */ + ut_ad(srv_immediate_scrub_data_uncompressed || + rec_get_trx_id(btr_pcur_get_rec(pcur), index) == node->new_trx_id); btr_pcur_commit_specify_mtr(pcur, &mtr); diff --git a/storage/xtradb/srv/srv0mon.cc b/storage/xtradb/srv/srv0mon.cc index 02dfcb75cad..5bac6214c5b 100644 --- a/storage/xtradb/srv/srv0mon.cc +++ b/storage/xtradb/srv/srv0mon.cc @@ -985,6 +985,21 @@ static monitor_info_t innodb_counter_info[] = MONITOR_NONE, MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_PAGE_COMPRESSION_ERROR}, + {"compress_pages_page_encrypted", "compression", + "Number of pages encrypted by page encryption", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_PAGE_ENCRYPTED}, + + {"compress_pages_page_decrypted", "compression", + "Number of pages decrypted by page encryption", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_PAGE_DECRYPTED}, + + {"compress_pages_page_encryption_error", "compression", + "Number of page encryption errors ", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_PAGE_ENCRYPTION_ERROR}, + /* ========== Counters for Index ========== */ {"module_index", "index", "Index Manager", MONITOR_MODULE, @@ -1998,6 +2013,15 @@ srv_mon_process_existing_counter( case MONITOR_OVLD_PAGES_PAGE_COMPRESSION_ERROR: value = srv_stats.pages_page_compression_error; break; + case MONITOR_OVLD_PAGES_PAGE_ENCRYPTED: + value = srv_stats.pages_page_encrypted; + break; + case MONITOR_OVLD_PAGES_PAGE_DECRYPTED: + value = srv_stats.pages_page_decrypted; + break; + case MONITOR_OVLD_PAGES_PAGE_ENCRYPTION_ERROR: + value = srv_stats.pages_page_encryption_error; + break; default: ut_error; diff --git a/storage/xtradb/srv/srv0srv.cc b/storage/xtradb/srv/srv0srv.cc index 9d4c21a6ee9..e050e9d5044 100644 --- a/storage/xtradb/srv/srv0srv.cc +++ b/storage/xtradb/srv/srv0srv.cc @@ -74,8 +74,10 @@ Created 10/8/1995 Heikki Tuuri #include "btr0defragment.h" #include "mysql/plugin.h" #include "mysql/service_thd_wait.h" +#include "fil0fil.h" #include "fil0pagecompress.h" #include <my_rdtsc.h> +#include "btr0scrub.h" /* prototypes of new functions added to ha_innodb.cc for kill_idle_transaction */ ibool innobase_thd_is_idle(const void* thd); @@ -111,6 +113,9 @@ UNIV_INTERN ibool srv_buf_dump_thread_active = FALSE; UNIV_INTERN ibool srv_dict_stats_thread_active = FALSE; +UNIV_INTERN ibool srv_log_scrub_active = FALSE; +UNIV_INTERN my_bool srv_scrub_log = FALSE; + UNIV_INTERN const char* srv_main_thread_op_info = ""; /** Prefix used by MySQL to indicate pre-5.1 table name encoding */ @@ -1703,10 +1708,14 @@ srv_export_innodb_status(void) ulint mem_adaptive_hash, mem_dictionary; read_view_t* oldest_view; ulint i; + fil_crypt_stat_t crypt_stat; + btr_scrub_stat_t scrub_stat; buf_get_total_stat(&stat); buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len); buf_get_total_list_size_in_bytes(&buf_pools_list_size); + fil_crypt_total_stat(&crypt_stat); + btr_scrub_total_stat(&scrub_stat); mem_adaptive_hash = 0; @@ -1973,6 +1982,10 @@ srv_export_innodb_status(void) export_vars.innodb_page_compressed_trim_op = srv_stats.page_compressed_trim_op; export_vars.innodb_page_compressed_trim_op_saved = srv_stats.page_compressed_trim_op_saved; export_vars.innodb_pages_page_decompressed = srv_stats.pages_page_decompressed; + export_vars.innodb_pages_page_compression_error = srv_stats.pages_page_compression_error; + export_vars.innodb_pages_page_decrypted = srv_stats.pages_page_decrypted; + export_vars.innodb_pages_page_encrypted = srv_stats.pages_page_encrypted; + export_vars.innodb_pages_page_encryption_error = srv_stats.pages_page_encryption_error; export_vars.innodb_defragment_compression_failures = btr_defragment_compression_failures; @@ -2016,6 +2029,30 @@ srv_export_innodb_status(void) export_vars.innodb_sec_rec_cluster_reads_avoided = srv_stats.n_sec_rec_cluster_reads_avoided; + export_vars.innodb_encryption_rotation_pages_read_from_cache = + crypt_stat.pages_read_from_cache; + export_vars.innodb_encryption_rotation_pages_read_from_disk = + crypt_stat.pages_read_from_disk; + export_vars.innodb_encryption_rotation_pages_modified = + crypt_stat.pages_modified; + export_vars.innodb_encryption_rotation_pages_flushed = + crypt_stat.pages_flushed; + export_vars.innodb_encryption_rotation_estimated_iops = + crypt_stat.estimated_iops; + + export_vars.innodb_scrub_page_reorganizations = + scrub_stat.page_reorganizations; + export_vars.innodb_scrub_page_splits = + scrub_stat.page_splits; + export_vars.innodb_scrub_page_split_failures_underflow = + scrub_stat.page_split_failures_underflow; + export_vars.innodb_scrub_page_split_failures_out_of_filespace = + scrub_stat.page_split_failures_out_of_filespace; + export_vars.innodb_scrub_page_split_failures_missing_index = + scrub_stat.page_split_failures_missing_index; + export_vars.innodb_scrub_page_split_failures_unknown = + scrub_stat.page_split_failures_unknown; + mutex_exit(&srv_innodb_monitor_mutex); } @@ -2430,6 +2467,8 @@ srv_any_background_threads_are_active(void) thread_active = "buf_dump_thread"; } else if (srv_dict_stats_thread_active) { thread_active = "dict_stats_thread"; + } else if (srv_scrub_log && srv_log_scrub_thread_active) { + thread_active = "log_scrub_thread"; } os_event_set(srv_error_event); @@ -2437,6 +2476,8 @@ srv_any_background_threads_are_active(void) os_event_set(srv_buf_dump_event); os_event_set(lock_sys->timeout_event); os_event_set(dict_stats_event); + if (srv_scrub_log) + os_event_set(log_scrub_event); return(thread_active); } diff --git a/storage/xtradb/srv/srv0start.cc b/storage/xtradb/srv/srv0start.cc index d0b4181fe8c..0bd1faf323e 100644 --- a/storage/xtradb/srv/srv0start.cc +++ b/storage/xtradb/srv/srv0start.cc @@ -71,6 +71,7 @@ Created 2/16/1996 Heikki Tuuri #include "buf0flu.h" #include "btr0defragment.h" #include "ut0timer.h" +#include "btr0scrub.h" #ifndef UNIV_HOTBACKUP # include "trx0rseg.h" @@ -682,7 +683,8 @@ create_log_files( fil_space_create( logfilename, SRV_LOG_SPACE_FIRST_ID, fsp_flags_set_page_size(0, UNIV_PAGE_SIZE), - FIL_LOG); + FIL_LOG, + NULL /* no encryption yet */); ut_a(fil_validate()); logfile0 = fil_node_create( @@ -704,7 +706,7 @@ create_log_files( #ifdef UNIV_LOG_ARCHIVE /* Create the file space object for archived logs. */ fil_space_create("arch_log_space", SRV_LOG_SPACE_FIRST_ID + 1, - 0, FIL_LOG); + 0, FIL_LOG, NULL /* no encryption yet */); #endif log_group_init(0, srv_n_log_files, srv_log_file_size * UNIV_PAGE_SIZE, @@ -830,6 +832,7 @@ open_or_create_data_files( ulint space; ulint rounded_size_pages; char name[10000]; + fil_space_crypt_t* crypt_data; if (srv_n_data_files >= 1000) { @@ -1045,7 +1048,7 @@ skip_size_check: check_first_page: check_msg = fil_read_first_page( files[i], one_opened, &flags, &space, - min_flushed_lsn, max_flushed_lsn, ULINT_UNDEFINED); + min_flushed_lsn, max_flushed_lsn, ULINT_UNDEFINED, &crypt_data); if (check_msg) { @@ -1139,6 +1142,8 @@ check_first_page: } *sum_of_new_sizes += srv_data_file_sizes[i]; + + crypt_data = fil_space_create_crypt_data(); } ret = os_file_close(files[i]); @@ -1146,7 +1151,9 @@ check_first_page: if (i == 0) { flags = fsp_flags_set_page_size(0, UNIV_PAGE_SIZE); - fil_space_create(name, 0, flags, FIL_TABLESPACE); + fil_space_create(name, 0, flags, FIL_TABLESPACE, + crypt_data); + crypt_data = NULL; } ut_a(fil_validate()); @@ -1292,7 +1299,8 @@ srv_undo_tablespace_open( /* Set the compressed page size to 0 (non-compressed) */ flags = fsp_flags_set_page_size(0, UNIV_PAGE_SIZE); - fil_space_create(name, space, flags, FIL_TABLESPACE); + fil_space_create(name, space, flags, FIL_TABLESPACE, + NULL /* no encryption */); ut_a(fil_validate()); @@ -2325,7 +2333,8 @@ innobase_start_or_create_for_mysql(void) fil_space_create(logfilename, SRV_LOG_SPACE_FIRST_ID, fsp_flags_set_page_size(0, UNIV_PAGE_SIZE), - FIL_LOG); + FIL_LOG, + NULL /* no encryption yet */); ut_a(fil_validate()); @@ -2347,7 +2356,7 @@ innobase_start_or_create_for_mysql(void) /* Create the file space object for archived logs. Under MySQL, no archiving ever done. */ fil_space_create("arch_log_space", SRV_LOG_SPACE_FIRST_ID + 1, - 0, FIL_LOG); + 0, FIL_LOG, NULL /* no encryption yet */); #endif /* UNIV_LOG_ARCHIVE */ log_group_init(0, i, srv_log_file_size * UNIV_PAGE_SIZE, SRV_LOG_SPACE_FIRST_ID, @@ -2381,6 +2390,11 @@ files_checked: dict_stats_thread_init(); } + if (!srv_read_only_mode && srv_scrub_log) { + /* TODO(minliz): have/use log_scrub_thread_init() instead? */ + log_scrub_event = os_event_create(); + } + trx_sys_file_format_init(); trx_sys_create(); @@ -3007,6 +3021,16 @@ files_checked: /* Create the thread that will optimize the FTS sub-system. */ fts_optimize_init(); + + /* Init data for datafile scrub threads */ + btr_scrub_init(); + + /* Create thread(s) that handles key rotation */ + fil_crypt_threads_init(); + + /* Create the log scrub thread */ + if (srv_scrub_log) + os_thread_create(log_scrub_thread, NULL, NULL); } /* Initialize online defragmentation. */ @@ -3072,6 +3096,9 @@ innobase_shutdown_for_mysql(void) fts_optimize_start_shutdown(); fts_optimize_end(); + + /* Shutdown key rotation threads */ + fil_crypt_threads_end(); } /* 1. Flush the buffer pool to disk, write the current lsn to @@ -3180,6 +3207,18 @@ innobase_shutdown_for_mysql(void) if (!srv_read_only_mode) { dict_stats_thread_deinit(); + if (srv_scrub_log) { + /* TODO(minliz): have/use log_scrub_thread_deinit() instead? */ + os_event_free(log_scrub_event); + log_scrub_event = NULL; + } + } + + if (!srv_read_only_mode) { + fil_crypt_threads_cleanup(); + + /* Cleanup data for datafile scrubbing */ + btr_scrub_cleanup(); } #ifdef __WIN__ diff --git a/tests/test_innodb_log_encryption.sh b/tests/test_innodb_log_encryption.sh new file mode 100644 index 00000000000..774afa76d3b --- /dev/null +++ b/tests/test_innodb_log_encryption.sh @@ -0,0 +1,119 @@ +#!/bin/bash +################################################################## +# - restart server with default (no encryption, no key ver and key). +# - verify various inputs will not corrupt. +# - verify transition from without crypto to with crypto and vice versa work. +# - verify transition from various key versions, including dynamically change it +# after startup, work. +################################################################## +TEST_INSTANCE_NAME="test" +TEST_INSTANCE_DIR="/tmp/instance" +TEST_INSTANCE_PATH=${TEST_INSTANCE_DIR}/${TEST_INSTANCE_NAME} +TEST_INSTANCE_SOCK_FILENAME=mysql.sock +TEST_INSTANCE_SOCK=${TEST_INSTANCE_PATH}/${TEST_INSTANCE_SOCK_FILENAME} +TEST_INSTANCE_ERR_FILE=${TEST_INSTANCE_PATH}/mysql.err +TEST_INSTANCE_DATA_DIR=${TEST_INSTANCE_PATH}/datadir + +google/instance restart ${TEST_INSTANCE_NAME} + +MYSQLD_EXTRA_ARGS="--debug_use_static_keys" google/instance restart ${TEST_INSTANCE_NAME} + +MYSQLD_EXTRA_ARGS="--innodb_encrypt_log=1" google/instance restart ${TEST_INSTANCE_NAME} + +MYSQLD_EXTRA_ARGS="--debug_use_static_keys --innodb_encrypt_log=1" google/instance restart ${TEST_INSTANCE_NAME} + +MYSQLD_EXTRA_ARGS="--debug_use_static_keys --debug_crypto_key_version=11" google/instance restart ${TEST_INSTANCE_NAME} + +MYSQLD_EXTRA_ARGS="--debug_use_static_keys --debug_crypto_key_version=12 --innodb_encrypt_log=1" google/instance restart ${TEST_INSTANCE_NAME} + +MYSQLD_EXTRA_ARGS="--debug_use_static_keys --debug_crypto_key_version=123 --innodb_encrypt_log=1" google/instance restart ${TEST_INSTANCE_NAME} + +# -- manually create a database sbtest +# mysql> create database sbtest; + +sysbench --test=oltp --oltp-table-size=1000 --mysql-user=root --mysql-socket=${TEST_INSTANCE_SOCK} prepare & + +sysbench --num-threads=10 --test=oltp --oltp-table-size=1000 --mysql-user=root --mysql-socket=${TEST_INSTANCE_SOCK} run & + +# -- change key version through mysql client +# mysql -S ${TEST_INSTANCE_SOCK} k -u root +# mysql> set global variable debug_crypto_key_version=7; +# ps aux | grep mysqld +# -- simulate a fast shutdown +# kill <myslqd's pid> + +MYSQLD_EXTRA_ARGS="--debug_use_static_keys" google/instance restart ${TEST_INSTANCE_NAME} + +google/instance restart ${TEST_INSTANCE_NAME} + +grep -n corrupt ${TEST_INSTANCE_ERR_FILE} | tail -100 + +################################################################## +# - clean shutdown. +# - remove InnoDB redo log files. +# - start the server with encryption on. +# - verify no corruption. +################################################################## +MYSQLD_EXTRA_ARGS="--innodb_fast_shutdown=0" google/instance restart ${TEST_INSTANCE_NAME} +google/instance stop ${TEST_INSTANCE_NAME} +mv ${TEST_INSTANCE_DATA_DIR}/ib_logfile0 ${TEST_INSTANCE_DATA_DIR}/ib_logfile0.1 +mv ${TEST_INSTANCE_DATA_DIR}/ib_logfile1 ${TEST_INSTANCE_DATA_DIR}/ib_logfile1.1 +MYSQLD_EXTRA_ARGS="--debug_use_static_keys --debug_crypto_key_version=777 --innodb_encrypt_log=1 --innodb_fast_shutdown=0" google/instance start ${TEST_INSTANCE_NAME} +grep -n corrupt ${TEST_INSTANCE_ERR_FILE} | tail -100 +################################################################## +# - clean shutdown. +# - remove InnoDB redo log files. +# - start the server with encryption off. +# - verify no corruption. +################################################################## +google/instance stop ${TEST_INSTANCE_NAME} +mv ${TEST_INSTANCE_DATA_DIR}/ib_logfile0 ${TEST_INSTANCE_DATA_DIR}/ib_logfile0.2 +mv ${TEST_INSTANCE_DATA_DIR}/ib_logfile1 ${TEST_INSTANCE_DATA_DIR}/ib_logfile1.2 +google/instance start ${TEST_INSTANCE_NAME} +grep -n corrupt ${TEST_INSTANCE_ERR_FILE} | tail -100 +################################################################## +# - verify fresh start of mysqld instance with encryption off. +################################################################## +google/instance stop ${TEST_INSTANCE_NAME} +mv ${TEST_INSTANCE_DIR} ${TEST_INSTANCE_DIR}.200 +google/instance start ${TEST_INSTANCE_NAME} +grep -n corrupt ${TEST_INSTANCE_ERR_FILE} | tail -100 +################################################################## +# - verify fresh start of mysqld instance with encryption on. +################################################################## +google/instance stop ${TEST_INSTANCE_NAME} +mv ${TEST_INSTANCE_DIR} ${TEST_INSTANCE_DIR}.300 +MYSQLD_EXTRA_ARGS="--debug_use_static_keys --debug_crypto_key_version=888 --innodb_encrypt_log=1" google/instance start ${TEST_INSTANCE_NAME} +grep -n corrupt ${TEST_INSTANCE_ERR_FILE} | tail -100 +################################################################## +# - fast shutdown. +# - remove InnoDB redo log files. +# - start the server with encryption on. +# - verify no corruption. +################################################################## +google/instance stop ${TEST_INSTANCE_NAME} +mv ${TEST_INSTANCE_DATA_DIR}/ib_logfile0 ${TEST_INSTANCE_DATA_DIR}/ib_logfile0.3 +mv ${TEST_INSTANCE_DATA_DIR}/ib_logfile1 ${TEST_INSTANCE_DATA_DIR}/ib_logfile1.3 +MYSQLD_EXTRA_ARGS="--debug_use_static_keys --debug_crypto_key_version=999 --innodb_encrypt_log=1" google/instance start ${TEST_INSTANCE_NAME} +grep -n corrupt ${TEST_INSTANCE_ERR_FILE} | tail -100 +################################################################## +# - fast shutdown while running workload. +# - remove InnoDB redo log files. +# - start the server with encryption on. +# - verify no corruption. +################################################################## +# -- manually create a database sbtest +# mysql> create database sbtest; +sysbench --test=oltp --oltp-table-size=1000 --mysql-user=root --mysql-socket=${TEST_INSTANCE_SOCK} prepare & +sysbench --num-threads=10 --test=oltp --oltp-table-size=1000 --mysql-user=root --mysql-socket=${TEST_INSTANCE_SOCK} run & +google/instance stop ${TEST_INSTANCE_NAME} +mv ${TEST_INSTANCE_DATA_DIR}/ib_logfile0 ${TEST_INSTANCE_DATA_DIR}/ib_logfile0.4 +mv ${TEST_INSTANCE_DATA_DIR}/ib_logfile1 ${TEST_INSTANCE_DATA_DIR}/ib_logfile1.4 +MYSQLD_EXTRA_ARGS="--debug_use_static_keys --debug_crypto_key_version=333 --innodb_encrypt_log=1" google/instance start ${TEST_INSTANCE_NAME} +grep -n corrupt ${TEST_INSTANCE_ERR_FILE} | tail -100 +################################################################## +# - clean up +################################################################## +google/instance stop ${TEST_INSTANCE_NAME} +MYSQLD_EXTRA_ARGS="--debug_use_static_keys" google/instance start ${TEST_INSTANCE_NAME} +google/instance stop ${TEST_INSTANCE_NAME} |