diff options
Diffstat (limited to 'storage/xtradb/fil')
-rw-r--r-- | storage/xtradb/fil/fil0crypt.cc | 2433 | ||||
-rw-r--r-- | storage/xtradb/fil/fil0fil.cc | 396 | ||||
-rw-r--r-- | storage/xtradb/fil/fil0pagecompress.cc | 25 | ||||
-rw-r--r-- | storage/xtradb/fil/fil0pageencryption.cc | 628 |
4 files changed, 3450 insertions, 32 deletions
diff --git a/storage/xtradb/fil/fil0crypt.cc b/storage/xtradb/fil/fil0crypt.cc new file mode 100644 index 00000000000..e34297f4f86 --- /dev/null +++ b/storage/xtradb/fil/fil0crypt.cc @@ -0,0 +1,2433 @@ +#include "fil0fil.h" +#include "srv0srv.h" +#include "srv0start.h" +#include "mach0data.h" +#include "log0recv.h" +#include "mtr0mtr.h" +#include "mtr0log.h" +#include "page0zip.h" +#include "ut0ut.h" +#include "btr0scrub.h" +#include "fsp0fsp.h" +#include "fil0pagecompress.h" +#include "fil0pageencryption.h" + +#include <my_crypt.h> +#include <my_crypt_key_management.h> + +#include <my_aes.h> +#include <math.h> + + +/** Mutex for keys */ +UNIV_INTERN ib_mutex_t fil_crypt_key_mutex; + +#ifdef UNIV_PFS_MUTEX +UNIV_INTERN mysql_pfs_key_t fil_crypt_key_mutex_key; +#endif + +/** Is encryption enabled/disabled */ +UNIV_INTERN my_bool srv_encrypt_tables = FALSE; + +/** No of key rotation threads requested */ +UNIV_INTERN uint srv_n_fil_crypt_threads = 0; + +/** No of key rotation threads started */ +static uint srv_n_fil_crypt_threads_started = 0; + +/** At this age or older a space/page will be rotated */ +UNIV_INTERN uint srv_fil_crypt_rotate_key_age = 1; + +/** Event to signal FROM the key rotation threads. */ +UNIV_INTERN os_event_t fil_crypt_event; + +/** Event to signal TO the key rotation threads. */ +UNIV_INTERN os_event_t fil_crypt_threads_event; + +/** Event for waking up threads throttle */ +UNIV_INTERN os_event_t fil_crypt_throttle_sleep_event; + +/** Mutex for key rotation threads */ +UNIV_INTERN ib_mutex_t fil_crypt_threads_mutex; + +#ifdef UNIV_PFS_MUTEX +UNIV_INTERN mysql_pfs_key_t fil_crypt_threads_mutex_key; +#endif + +/** Variable ensuring only 1 thread at time does initial conversion */ +static bool fil_crypt_start_converting = false; + +/** Variables for throttling */ +UNIV_INTERN uint srv_n_fil_crypt_iops = 100; // 10ms per iop +static uint srv_alloc_time = 3; // allocate iops for 3s at a time +static uint n_fil_crypt_iops_allocated = 0; + +/** Variables for scrubbing */ +extern uint srv_background_scrub_data_interval; +extern uint srv_background_scrub_data_check_interval; + +#define DEBUG_KEYROTATION_THROTTLING 0 + +/** Statistics variables */ +static fil_crypt_stat_t crypt_stat; +static ib_mutex_t crypt_stat_mutex; + +#ifdef UNIV_PFS_MUTEX +UNIV_INTERN mysql_pfs_key_t fil_crypt_stat_mutex_key; +#endif + +/** + * key for crypt data mutex +*/ +#ifdef UNIV_PFS_MUTEX +UNIV_INTERN mysql_pfs_key_t fil_crypt_data_mutex_key; +#endif + +/** +* Magic pattern in start of crypt data on page 0 +*/ +#define MAGIC_SZ 6 + +static const unsigned char CRYPT_MAGIC[MAGIC_SZ] = { + 's', 0xE, 0xC, 'R', 'E', 't' }; + +static const unsigned char EMPTY_PATTERN[MAGIC_SZ] = { + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; + +/** + * CRYPT_SCHEME_UNENCRYPTED + * + * Used as intermediate state when convering a space from unencrypted + * to encrypted + */ +#define CRYPT_SCHEME_UNENCRYPTED 0 + +/** + * CRYPT_SCHEME_1 + * + * L = AES_ECB(KEY, IV) + * CRYPT(PAGE) = AES_CRT(KEY=L, IV=C, PAGE) + */ +#define CRYPT_SCHEME_1 1 +#define CRYPT_SCHEME_1_IV_LEN 16 +// cached L given key_version +struct key_struct +{ + uint key_version; + byte key[CRYPT_SCHEME_1_IV_LEN]; +}; + +struct fil_space_rotate_state_t +{ + time_t start_time; // time when rotation started + ulint active_threads; // active threads in space + ulint next_offset; // next "free" offset + ulint max_offset; // max offset needing to be rotated + uint min_key_version_found; // min key version found but not rotated + lsn_t end_lsn; // max lsn created when rotating this space + bool starting; // initial write of IV + bool flushing; // space is being flushed at end of rotate + struct { + bool is_active; // is scrubbing active in this space + time_t last_scrub_completed; // when was last scrub completed + } scrubbing; +}; + +struct fil_space_crypt_struct +{ + ulint type; // CRYPT_SCHEME + uint keyserver_requests; // no of key requests to key server + uint key_count; // No of initalized key-structs + key_struct keys[3]; // cached L = AES_ECB(KEY, IV) + uint min_key_version; // min key version for this space + ulint page0_offset; // byte offset on page 0 for crypt data + + ib_mutex_t mutex; // mutex protecting following variables + bool closing; // is tablespace being closed + fil_space_rotate_state_t rotate_state; + + uint iv_length; // length of IV + byte iv[1]; // IV-data +}; + +/********************************************************************* +Init space crypt */ +UNIV_INTERN +void +fil_space_crypt_init() +{ + mutex_create(fil_crypt_key_mutex_key, + &fil_crypt_key_mutex, SYNC_NO_ORDER_CHECK); + + fil_crypt_throttle_sleep_event = os_event_create(); + + mutex_create(fil_crypt_stat_mutex_key, + &crypt_stat_mutex, SYNC_NO_ORDER_CHECK); + memset(&crypt_stat, 0, sizeof(crypt_stat)); +} + +/********************************************************************* +Cleanup space crypt */ +UNIV_INTERN +void +fil_space_crypt_cleanup() +{ + os_event_free(fil_crypt_throttle_sleep_event); +} + +/****************************************************************** +Get key bytes for a space/key-version */ +static +void +fil_crypt_get_key(byte *dst, uint* key_length, + fil_space_crypt_t* crypt_data, uint version, bool page_encrypted) +{ + unsigned char keybuf[MY_AES_MAX_KEY_LENGTH]; + unsigned char iv[CRYPT_SCHEME_1_IV_LEN]; + ulint iv_len = sizeof(iv); + + if (!page_encrypted) { + mutex_enter(&crypt_data->mutex); + + // Check if we already have key + for (uint i = 0; i < crypt_data->key_count; i++) { + if (crypt_data->keys[i].key_version == version) { + memcpy(dst, crypt_data->keys[i].key, + sizeof(crypt_data->keys[i].key)); + mutex_exit(&crypt_data->mutex); + return; + } + } + // Not found! + crypt_data->keyserver_requests++; + + // Rotate keys to make room for a new + for (uint i = 1; i < array_elements(crypt_data->keys); i++) { + crypt_data->keys[i] = crypt_data->keys[i - 1]; + } + } + else + { + // load iv + int rc = GetCryptoIV(version, (unsigned char*)iv, iv_len); + fprintf(stderr, " %d\n",rc); + + if (rc != CRYPT_KEY_OK) { + ib_logf(IB_LOG_LEVEL_FATAL, + "IV %d can not be found. Reason=%d", version, rc); + ut_error; + } + } + + if (HasCryptoKey(version)) { + *key_length = GetCryptoKeySize(version); + + int rc = GetCryptoKey(version, (unsigned char*)keybuf, *key_length); + + if (rc != CRYPT_KEY_OK) { + ib_logf(IB_LOG_LEVEL_FATAL, + "Key %d can not be found. Reason=%d", version, rc); + ut_error; + } + } else { + ib_logf(IB_LOG_LEVEL_FATAL, + "Key %d not found", version); + ut_error; + } + + + // do ctr key initialization + if (current_aes_dynamic_method == MY_AES_ALGORITHM_CTR) + { + // Now compute L by encrypting IV using this key + const unsigned char* src = page_encrypted ? iv : crypt_data->iv; + const int srclen = page_encrypted ? iv_len : crypt_data->iv_length; + unsigned char* buf = page_encrypted ? keybuf : crypt_data->keys[0].key; + uint32 buflen = page_encrypted ? *key_length : sizeof(crypt_data->keys[0].key); + + // call ecb explicit + my_aes_encrypt_dynamic_type func = get_aes_encrypt_func(MY_AES_ALGORITHM_ECB); + int rc = (*func)(src, srclen, + buf, &buflen, + (unsigned char*)keybuf, *key_length, + NULL, 0, + 1); + + if (rc != AES_OK) { + ib_logf(IB_LOG_LEVEL_FATAL, + "Unable to encrypt key-block " + " src: %p srclen: %d buf: %p buflen: %d." + " return-code: %d. Can't continue!\n", + src, srclen, buf, buflen, rc); + ut_error; + } + + if (!page_encrypted) { + crypt_data->keys[0].key_version = version; + crypt_data->key_count++; + + if (crypt_data->key_count > array_elements(crypt_data->keys)) { + crypt_data->key_count = array_elements(crypt_data->keys); + } + } + + // set the key size to the aes block size because this encrypted data is the key + *key_length = MY_AES_BLOCK_SIZE; + memcpy(dst, buf, buflen); + } + else + { + // otherwise keybuf contains the right key + memcpy(dst, keybuf, *key_length); + } + + if (!page_encrypted) { + mutex_exit(&crypt_data->mutex); + } +} + +/****************************************************************** +Get key bytes for a space/latest(key-version) */ +static inline +void +fil_crypt_get_latest_key(byte *dst, uint* key_length, + fil_space_crypt_t* crypt_data, uint *version) +{ + if (srv_encrypt_tables) { + // used for key rotation - get the next key id from the key provider + int rc = GetLatestCryptoKeyVersion(); + + // if no new key was created use the last one + if (rc >= 0) + { + *version = rc; + } + + return fil_crypt_get_key(dst, key_length, crypt_data, *version, false); + } else { + return fil_crypt_get_key(dst, key_length, NULL, *version, true); + } +} + +/****************************************************************** +Create a fil_space_crypt_t object */ +UNIV_INTERN +fil_space_crypt_t* +fil_space_create_crypt_data() +{ + const uint iv_length = CRYPT_SCHEME_1_IV_LEN; + const uint sz = sizeof(fil_space_crypt_t) + iv_length; + fil_space_crypt_t* crypt_data = + static_cast<fil_space_crypt_t*>(malloc(sz)); + memset(crypt_data, 0, sz); + + if (srv_encrypt_tables == FALSE) { + crypt_data->type = CRYPT_SCHEME_UNENCRYPTED; + crypt_data->min_key_version = 0; + } else { + crypt_data->type = CRYPT_SCHEME_1; + crypt_data->min_key_version = GetLatestCryptoKeyVersion(); + } + + mutex_create(fil_crypt_data_mutex_key, + &crypt_data->mutex, SYNC_NO_ORDER_CHECK); + crypt_data->iv_length = iv_length; + my_random_bytes(crypt_data->iv, iv_length); + return crypt_data; +} + +/****************************************************************** +Compare two crypt objects */ +UNIV_INTERN +int +fil_space_crypt_compare(const fil_space_crypt_t* crypt_data1, + const fil_space_crypt_t* crypt_data2) +{ + ut_a(crypt_data1->type == CRYPT_SCHEME_UNENCRYPTED || + crypt_data1->type == CRYPT_SCHEME_1); + ut_a(crypt_data2->type == CRYPT_SCHEME_UNENCRYPTED || + crypt_data2->type == CRYPT_SCHEME_1); + + ut_a(crypt_data1->iv_length == CRYPT_SCHEME_1_IV_LEN); + ut_a(crypt_data2->iv_length == CRYPT_SCHEME_1_IV_LEN); + + /* no support for changing iv (yet?) */ + ut_a(memcmp(crypt_data1->iv, crypt_data2->iv, + crypt_data1->iv_length) == 0); + + return 0; +} + +/****************************************************************** +Read crypt data from a page (0) */ +UNIV_INTERN +fil_space_crypt_t* +fil_space_read_crypt_data(ulint space, const byte* page, ulint offset) +{ + if (memcmp(page + offset, EMPTY_PATTERN, MAGIC_SZ) == 0) { + /* crypt is not stored */ + return NULL; + } + + if (memcmp(page + offset, CRYPT_MAGIC, MAGIC_SZ) != 0) { + fprintf(stderr, + "Warning: found potentially bogus bytes on " + "page 0 offset %lu for space %lu : " + "[ %.2x %.2x %.2x %.2x %.2x %.2x ]. " + "Assuming space is not encrypted!\n", + offset, space, + page[offset + 0], + page[offset + 1], + page[offset + 2], + page[offset + 3], + page[offset + 4], + page[offset + 5]); + return NULL; + } + + ulint type = mach_read_from_1(page + offset + MAGIC_SZ + 0); + + if (! (type == CRYPT_SCHEME_UNENCRYPTED || + type == CRYPT_SCHEME_1)) { + fprintf(stderr, + "Found non sensible crypt scheme: %lu for space %lu " + " offset: %lu bytes: " + "[ %.2x %.2x %.2x %.2x %.2x %.2x ]\n", + type, space, offset, + page[offset + 0 + MAGIC_SZ], + page[offset + 1 + MAGIC_SZ], + page[offset + 2 + MAGIC_SZ], + page[offset + 3 + MAGIC_SZ], + page[offset + 4 + MAGIC_SZ], + page[offset + 5 + MAGIC_SZ]); + ut_error; + } + + ulint iv_length = mach_read_from_1(page + offset + MAGIC_SZ + 1); + if (! (iv_length == CRYPT_SCHEME_1_IV_LEN)) { + fprintf(stderr, + "Found non sensible iv length: %lu for space %lu " + " offset: %lu type: %lu bytes: " + "[ %.2x %.2x %.2x %.2x %.2x %.2x ]\n", + iv_length, space, offset, type, + page[offset + 0 + MAGIC_SZ], + page[offset + 1 + MAGIC_SZ], + page[offset + 2 + MAGIC_SZ], + page[offset + 3 + MAGIC_SZ], + page[offset + 4 + MAGIC_SZ], + page[offset + 5 + MAGIC_SZ]); + ut_error; + } + + uint min_key_version = mach_read_from_4 + (page + offset + MAGIC_SZ + 2 + iv_length); + + const uint sz = sizeof(fil_space_crypt_t) + iv_length; + fil_space_crypt_t* crypt_data = static_cast<fil_space_crypt_t*>( + malloc(sz)); + memset(crypt_data, 0, sz); + + crypt_data->type = type; + crypt_data->min_key_version = min_key_version; + crypt_data->page0_offset = offset; + mutex_create(fil_crypt_data_mutex_key, + &crypt_data->mutex, SYNC_NO_ORDER_CHECK); + crypt_data->iv_length = iv_length; + memcpy(crypt_data->iv, page + offset + MAGIC_SZ + 2, iv_length); + + return crypt_data; +} + +/****************************************************************** +Free a crypt data object */ +UNIV_INTERN +void +fil_space_destroy_crypt_data(fil_space_crypt_t **crypt_data) +{ + if (crypt_data != NULL && (*crypt_data) != NULL) { + /* lock (and unlock) mutex to make sure no one has it locked + * currently */ + mutex_enter(& (*crypt_data)->mutex); + mutex_exit(& (*crypt_data)->mutex); + mutex_free(& (*crypt_data)->mutex); + free(*crypt_data); + (*crypt_data) = NULL; + } +} + +/****************************************************************** +Write crypt data to a page (0) */ +static +void +fil_space_write_crypt_data_low(fil_space_crypt_t *crypt_data, + ulint type, + byte* page, ulint offset, + ulint maxsize, mtr_t* mtr) +{ + ut_a(offset > 0 && offset < UNIV_PAGE_SIZE); + ulint space_id = mach_read_from_4( + page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); + const uint len = crypt_data->iv_length; + const uint min_key_version = crypt_data->min_key_version; + crypt_data->page0_offset = offset; + ut_a(2 + len + 4 + MAGIC_SZ < maxsize); + + /* + redo log this as bytewise updates to page 0 + followed by an MLOG_FILE_WRITE_CRYPT_DATA + (that will during recovery update fil_space_t) + */ + mlog_write_string(page + offset, CRYPT_MAGIC, MAGIC_SZ, mtr); + mlog_write_ulint(page + offset + MAGIC_SZ + 0, type, MLOG_1BYTE, mtr); + mlog_write_ulint(page + offset + MAGIC_SZ + 1, len, MLOG_1BYTE, mtr); + mlog_write_string(page + offset + MAGIC_SZ + 2, crypt_data->iv, len, + mtr); + mlog_write_ulint(page + offset + MAGIC_SZ + 2 + len, min_key_version, + MLOG_4BYTES, mtr); + + byte* log_ptr = mlog_open(mtr, 11 + 12 + len); + if (log_ptr != NULL) { + log_ptr = mlog_write_initial_log_record_fast( + page, + MLOG_FILE_WRITE_CRYPT_DATA, + log_ptr, mtr); + mach_write_to_4(log_ptr, space_id); + log_ptr += 4; + mach_write_to_2(log_ptr, offset); + log_ptr += 2; + mach_write_to_1(log_ptr, type); + log_ptr += 1; + mach_write_to_1(log_ptr, len); + log_ptr += 1; + mach_write_to_4(log_ptr, min_key_version); + log_ptr += 4; + mlog_close(mtr, log_ptr); + + mlog_catenate_string(mtr, crypt_data->iv, len); + } +} + +/****************************************************************** +Write crypt data to a page (0) */ +UNIV_INTERN +void +fil_space_write_crypt_data(ulint space, byte* page, ulint offset, + ulint maxsize, mtr_t* mtr) +{ + fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space); + if (crypt_data == NULL) { + return; + } + + fil_space_write_crypt_data_low(crypt_data, crypt_data->type, + page, offset, maxsize, mtr); +} + +/****************************************************************** +Parse a MLOG_FILE_WRITE_CRYPT_DATA log entry */ +UNIV_INTERN +byte* +fil_parse_write_crypt_data(byte* ptr, byte* end_ptr, + buf_block_t* block) +{ + /* check that redo log entry is complete */ + uint entry_size = + 4 + // size of space_id + 2 + // size of offset + 1 + // size of type + 1 + // size of iv-len + 4; // size of min_key_version + if (end_ptr - ptr < entry_size) + return NULL; + + ulint space_id = mach_read_from_4(ptr); + ptr += 4; + uint offset = mach_read_from_2(ptr); + ptr += 2; + uint type = mach_read_from_1(ptr); + ptr += 1; + uint len = mach_read_from_1(ptr); + ptr += 1; + + ut_a(type == CRYPT_SCHEME_UNENCRYPTED || + type == CRYPT_SCHEME_1); // only supported + ut_a(len == CRYPT_SCHEME_1_IV_LEN); // only supported + uint min_key_version = mach_read_from_4(ptr); + ptr += 4; + + if (end_ptr - ptr < len) + return NULL; + + fil_space_crypt_t* crypt_data = fil_space_create_crypt_data(); + crypt_data->page0_offset = offset; + crypt_data->min_key_version = min_key_version; + memcpy(crypt_data->iv, ptr, len); + ptr += len; + + /* update fil_space memory cache with crypt_data */ + fil_space_set_crypt_data(space_id, crypt_data); + + return ptr; +} + +/****************************************************************** +Clear crypt data from a page (0) */ +UNIV_INTERN +void +fil_space_clear_crypt_data(byte* page, ulint offset) +{ + //TODO(jonaso): pass crypt-data and read len from there + ulint len = CRYPT_SCHEME_1_IV_LEN; + ulint size = + sizeof(CRYPT_MAGIC) + + 1 + // type + 1 + // len + len + // iv + 4; // min key version + memset(page + offset, 0, size); +} + +/********************************************************************* +Check if page shall be encrypted before write */ +UNIV_INTERN +bool +fil_space_check_encryption_write( +/*==============================*/ + ulint space) /*!< in: tablespace id */ +{ + if (srv_encrypt_tables == FALSE) + return false; + + fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space); + if (crypt_data == NULL) + return false; + + if (crypt_data->type == CRYPT_SCHEME_UNENCRYPTED) + return false; + + return true; +} + +/****************************************************************** +Encrypt a page */ +UNIV_INTERN +void +fil_space_encrypt(ulint space, ulint offset, lsn_t lsn, + const byte* src_frame, ulint zip_size, byte* dst_frame, ulint encryption_key) +{ + fil_space_crypt_t* crypt_data; + ulint page_size = (zip_size) ? zip_size : UNIV_PAGE_SIZE; + + // get key (L) + uint key_version; + byte key[MY_AES_MAX_KEY_LENGTH]; + uint key_length; + + if (srv_encrypt_tables) { + crypt_data = fil_space_get_crypt_data(space); + if (crypt_data == NULL) { + //TODO: Is this really needed ? + memcpy(dst_frame, src_frame, page_size); + return; + } + fil_crypt_get_latest_key(key, &key_length, crypt_data, &key_version); + } else { + key_version = encryption_key; + fil_crypt_get_latest_key(key, &key_length, NULL, (uint*)&key_version); + } + + + /* Load the iv or counter (depending to the encryption algorithm used) */ + unsigned char iv[MY_AES_BLOCK_SIZE]; + + if (current_aes_dynamic_method == MY_AES_ALGORITHM_CTR) + { + // create counter block (C) + mach_write_to_4(iv + 0, space); + ulint space_offset = mach_read_from_4( + src_frame + FIL_PAGE_OFFSET); + mach_write_to_4(iv + 4, space_offset); + mach_write_to_8(iv + 8, lsn); + } + else + { + // take the iv from the key provider + + int load_iv_rc = GetCryptoIV(key_version, (uchar *) iv, sizeof(iv)); + + // if the iv can not be loaded the whole page can not be encrypted + if (load_iv_rc != CRYPT_KEY_OK) + { + ib_logf(IB_LOG_LEVEL_FATAL, + "Unable to decrypt data-block. " + " Can not load iv for key %d" + " return-code: %d. Can't continue!\n", + key_version, load_iv_rc); + + ut_error; + } + } + + + ibool page_compressed = (mach_read_from_2(src_frame+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED); + ibool page_encrypted = fil_space_is_page_encrypted(space); + + ulint compression_alg = mach_read_from_8(src_frame+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); + + ulint orig_page_type = mach_read_from_2(src_frame+FIL_PAGE_TYPE); + if (orig_page_type==FIL_PAGE_TYPE_FSP_HDR + || orig_page_type==FIL_PAGE_TYPE_XDES + || orig_page_type== FIL_PAGE_PAGE_ENCRYPTED + || orig_page_type== FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) { + memcpy(dst_frame, src_frame, page_size); + return; + } + + // copy page header + memcpy(dst_frame, src_frame, FIL_PAGE_DATA); + + + if (page_encrypted && !page_compressed) { + // key id + mach_write_to_2(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, + key_version); + // original page type + mach_write_to_2(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 2, + orig_page_type); + // new page type + mach_write_to_2(dst_frame+FIL_PAGE_TYPE, FIL_PAGE_PAGE_ENCRYPTED); + } else { + // store key version + mach_write_to_4(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, + key_version); + } + + // encrypt page data + ulint unencrypted_bytes = FIL_PAGE_DATA + FIL_PAGE_DATA_END; + ulint srclen = page_size - unencrypted_bytes; + const byte* src = src_frame + FIL_PAGE_DATA; + byte* dst = dst_frame + FIL_PAGE_DATA; + uint32 dstlen; + + if (page_compressed) { + srclen = page_size - FIL_PAGE_DATA; + } + + int rc = (* my_aes_encrypt_dynamic)(src, srclen, + dst, &dstlen, + (unsigned char*)key, key_length, + (unsigned char*)iv, sizeof(iv), + 1); + + if (! ((rc == AES_OK) && ((ulint) dstlen == srclen))) { + ib_logf(IB_LOG_LEVEL_FATAL, + "Unable to encrypt data-block " + " src: %p srclen: %ld buf: %p buflen: %d." + " return-code: %d. Can't continue!\n", + src, (long)srclen, + dst, dstlen, rc); + ut_error; + } + + if (!page_compressed) { + // copy page trailer + memcpy(dst_frame + page_size - FIL_PAGE_DATA_END, + src_frame + page_size - FIL_PAGE_DATA_END, + FIL_PAGE_DATA_END); + + /* handle post encryption checksum */ + ib_uint32_t checksum = 0; + srv_checksum_algorithm_t algorithm = + static_cast<srv_checksum_algorithm_t>(srv_checksum_algorithm); + + if (zip_size == 0) { + switch (algorithm) { + case SRV_CHECKSUM_ALGORITHM_CRC32: + case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32: + checksum = buf_calc_page_crc32(dst_frame); + break; + case SRV_CHECKSUM_ALGORITHM_INNODB: + case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB: + checksum = (ib_uint32_t) buf_calc_page_new_checksum( + dst_frame); + break; + case SRV_CHECKSUM_ALGORITHM_NONE: + case SRV_CHECKSUM_ALGORITHM_STRICT_NONE: + checksum = BUF_NO_CHECKSUM_MAGIC; + break; + /* no default so the compiler will emit a warning + * if new enum is added and not handled here */ + } + } else { + checksum = page_zip_calc_checksum(dst_frame, zip_size, + algorithm); + } + + // store the post-encryption checksum after the key-version + mach_write_to_4(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 4, + checksum); + } else { + /* Page compressed and encrypted tables have different + FIL_HEADER */ + ulint page_len = log10((double)page_size)/log10((double)2); + /* Set up the correct page type */ + mach_write_to_2(dst_frame+FIL_PAGE_TYPE, FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED); + /* Set up the compression algorithm */ + mach_write_to_2(dst_frame+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION+4, orig_page_type); + /* Set up the compressed size */ + mach_write_to_1(dst_frame+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION+6, page_len); + /* Set up the compression method */ + mach_write_to_1(dst_frame+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION+7, compression_alg); + } + +} + +/********************************************************************* +Check if extra buffer shall be allocated for decrypting after read */ +UNIV_INTERN +bool +fil_space_check_encryption_read( +/*==============================*/ + ulint space) /*!< in: tablespace id */ +{ + fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space); + if (crypt_data == NULL) + return false; + + if (crypt_data->type == CRYPT_SCHEME_UNENCRYPTED) + return false; + + return true; +} + +/****************************************************************** +Decrypt a page */ +UNIV_INTERN +bool +fil_space_decrypt(fil_space_crypt_t* crypt_data, + const byte* src_frame, ulint page_size, byte* dst_frame) +{ + ulint page_type = mach_read_from_2(src_frame+FIL_PAGE_TYPE); + // key version + uint key_version; + bool page_encrypted = (page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED + || page_type == FIL_PAGE_PAGE_ENCRYPTED); + + bool page_compressed = (page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED + || page_type == FIL_PAGE_PAGE_COMPRESSED); + + ulint orig_page_type=0; + + if (page_type == FIL_PAGE_PAGE_ENCRYPTED) { + key_version = mach_read_from_2( + src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); + orig_page_type = mach_read_from_2( + src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 2); + } else { + key_version = mach_read_from_4( + src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); + } + + if (key_version == 0 && !page_encrypted) { + //TODO: is this really needed ? + memcpy(dst_frame, src_frame, page_size); + return false; /* page not decrypted */ + } + + // read space & offset & lsn + ulint space = mach_read_from_4( + src_frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); + ulint offset = mach_read_from_4( + src_frame + FIL_PAGE_OFFSET); + ib_uint64_t lsn = mach_read_from_8(src_frame + FIL_PAGE_LSN); + + // copy page header + memcpy(dst_frame, src_frame, FIL_PAGE_DATA); + + if (page_type == FIL_PAGE_PAGE_ENCRYPTED) { + // orig page type + mach_write_to_2(dst_frame+FIL_PAGE_TYPE, orig_page_type); + } + + + // get key + byte key[MY_AES_MAX_KEY_LENGTH]; + uint key_length; + fil_crypt_get_key(key, &key_length, crypt_data, key_version, page_encrypted); + + // get the iv + unsigned char iv[MY_AES_BLOCK_SIZE]; + + if (current_aes_dynamic_method == MY_AES_ALGORITHM_CTR) + { + // create counter block + + mach_write_to_4(iv + 0, space); + mach_write_to_4(iv + 4, offset); + mach_write_to_8(iv + 8, lsn); + } + else + { + // take the iv from the key provider + + int load_iv_rc = GetCryptoIV(key_version, (uchar *) iv, sizeof(iv)); + + // if the iv can not be loaded the whole page can not be decrypted + if (load_iv_rc != CRYPT_KEY_OK) + { + ib_logf(IB_LOG_LEVEL_FATAL, + "Unable to decrypt data-block. " + " Can not load iv for key %d" + " return-code: %d. Can't continue!\n", + key_version, load_iv_rc); + + return AES_KEY_CREATION_FAILED; + } + } + + const byte* src = src_frame + FIL_PAGE_DATA; + byte* dst = dst_frame + FIL_PAGE_DATA; + uint32 dstlen; + ulint srclen = page_size - (FIL_PAGE_DATA + FIL_PAGE_DATA_END); + + ulint compressed_len; + ulint compression_method; + + if (page_compressed) { + orig_page_type = mach_read_from_2(src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION+4); + compressed_len = mach_read_from_1(src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION+6); + compression_method = mach_read_from_1(src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION+7); + } + + if (page_encrypted && !page_compressed) { + orig_page_type = mach_read_from_2(src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION+2); + } + + if (page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) { + srclen = pow((double)2, (double)((int)compressed_len)) - FIL_PAGE_DATA; + } + + int rc = (* my_aes_decrypt_dynamic)(src, srclen, + dst, &dstlen, + (unsigned char*)key, key_length, + (unsigned char*)iv, sizeof(iv), + 1); + + if (! ((rc == AES_OK) && ((ulint) dstlen == srclen))) { + ib_logf(IB_LOG_LEVEL_FATAL, + "Unable to decrypt data-block " + " src: %p srclen: %ld buf: %p buflen: %d." + " return-code: %d. Can't continue!\n", + src, (long)srclen, + dst, dstlen, rc); + ut_error; + } + + if (page_type != FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) { + // copy page trailer + memcpy(dst_frame + page_size - FIL_PAGE_DATA_END, + src_frame + page_size - FIL_PAGE_DATA_END, + FIL_PAGE_DATA_END); + + // clear key-version & crypt-checksum from dst + memset(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8); + } else { + /* For page compressed tables we set up the FIL_HEADER again */ + /* setting original page type */ + mach_write_to_2(dst_frame + FIL_PAGE_TYPE, orig_page_type); + /* page_compression uses BUF_NO_CHECKSUM_MAGIC as checksum */ + mach_write_to_4(dst_frame + FIL_PAGE_SPACE_OR_CHKSUM, BUF_NO_CHECKSUM_MAGIC); + /* Set up the flush lsn to be compression algorithm */ + mach_write_to_8(dst_frame+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, compression_method); + } + + return true; /* page was decrypted */ +} + +/****************************************************************** +Decrypt a page */ +UNIV_INTERN +void +fil_space_decrypt(ulint space, + const byte* src_frame, ulint page_size, byte* dst_frame) +{ + fil_space_decrypt(fil_space_get_crypt_data(space), + src_frame, page_size, dst_frame); +} + +/********************************************************************* +Verify checksum for a page (iff it's encrypted) +NOTE: currently this function can only be run in single threaded mode +as it modifies srv_checksum_algorithm (temporarily) +@return true if page is encrypted AND OK, false otherwise */ +bool +fil_space_verify_crypt_checksum(const byte* src_frame, ulint zip_size) +{ + // key version + uint key_version = mach_read_from_4( + src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); + + if (key_version == 0) { + return false; // unencrypted page + } + + /* "trick" the normal checksum routines by storing the post-encryption + * checksum into the normal checksum field allowing for reuse of + * the normal routines */ + + // post encryption checksum + ib_uint32_t stored_post_encryption = mach_read_from_4( + src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 4); + + // save pre encryption checksum for restore in end of this function + ib_uint32_t stored_pre_encryption = mach_read_from_4( + src_frame + FIL_PAGE_SPACE_OR_CHKSUM); + + ib_uint32_t checksum_field2 = mach_read_from_4( + src_frame + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM); + + /** prepare frame for usage of normal checksum routines */ + mach_write_to_4(const_cast<byte*>(src_frame) + FIL_PAGE_SPACE_OR_CHKSUM, + stored_post_encryption); + + /* NOTE: this function is (currently) only run when restoring + * dblwr-buffer, server is single threaded so it's safe to modify + * srv_checksum_algorithm */ + srv_checksum_algorithm_t save_checksum_algorithm = + (srv_checksum_algorithm_t)srv_checksum_algorithm; + if (zip_size == 0 && + (save_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB || + save_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_INNODB)) { + /* handle ALGORITHM_INNODB specially, + * "downgrade" to ALGORITHM_INNODB and store BUF_NO_CHECKSUM_MAGIC + * checksum_field2 is sort of pointless anyway... + */ + srv_checksum_algorithm = SRV_CHECKSUM_ALGORITHM_INNODB; + mach_write_to_4(const_cast<byte*>(src_frame) + + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM, + BUF_NO_CHECKSUM_MAGIC); + } + + /* verify checksums */ + ibool corrupted = buf_page_is_corrupted(false, src_frame, zip_size); + + /** restore frame & algorithm */ + srv_checksum_algorithm = save_checksum_algorithm; + + mach_write_to_4(const_cast<byte*>(src_frame) + + FIL_PAGE_SPACE_OR_CHKSUM, + stored_pre_encryption); + + mach_write_to_4(const_cast<byte*>(src_frame) + + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM, + checksum_field2); + + if (!corrupted) { + return true; // page was encrypted and checksum matched + } else { + return false; // page was encrypted but checksum didn't match + } +} + +/***********************************************************************/ + +/** A copy of global key state */ +struct key_state_t { + key_state_t() : key_version(0), + rotate_key_age(srv_fil_crypt_rotate_key_age) {} + bool operator==(const key_state_t& other) const { + return key_version == other.key_version && + rotate_key_age == other.rotate_key_age; + } + uint key_version; + uint rotate_key_age; +}; + +/*********************************************************************** +Copy global key state */ +static void +fil_crypt_get_key_state( + key_state_t *new_state) +{ + if (srv_encrypt_tables == TRUE) { + new_state->key_version = GetLatestCryptoKeyVersion(); + new_state->rotate_key_age = srv_fil_crypt_rotate_key_age; + ut_a(new_state->key_version > 0); + } else { + new_state->key_version = 0; + new_state->rotate_key_age = 0; + } +} + +/*********************************************************************** +Check if a key needs rotation given a key_state */ +static bool +fil_crypt_needs_rotation(uint key_version, const key_state_t *key_state) +{ + // TODO(jonaso): Add support for rotating encrypted => unencrypted + + if (key_version == 0 && key_state->key_version != 0) { + /* this is rotation unencrypted => encrypted + * ignore rotate_key_age */ + return true; + } + + if (key_state->key_version == 0 && key_version != 0) { + /* this is rotation encrypted => unencrypted */ + return true; + } + + /* this is rotation encrypted => encrypted, + * only reencrypt if key is sufficiently old */ + if (key_version + key_state->rotate_key_age < key_state->key_version) + return true; + + return false; +} + +/*********************************************************************** +Check if a space is closing (i.e just before drop) */ +UNIV_INTERN bool +fil_crypt_is_closing(ulint space) +{ + bool closing; + fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space); + mutex_enter(&crypt_data->mutex); + closing = crypt_data->closing; + mutex_exit(&crypt_data->mutex); + return closing; +} + +/*********************************************************************** +Start encrypting a space +@return true if a pending op (fil_inc_pending_ops/fil_decr_pending_ops) is held +*/ +static bool +fil_crypt_start_encrypting_space(ulint space, bool *recheck) { + + /* we have a pending op when entering function */ + bool pending_op = true; + + mutex_enter(&fil_crypt_threads_mutex); + fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space); + if (crypt_data != NULL || fil_crypt_start_converting) { + /* someone beat us to it */ + if (fil_crypt_start_converting) + *recheck = true; + + mutex_exit(&fil_crypt_threads_mutex); + return pending_op; + } + + /* NOTE: we need to write and flush page 0 before publishing + * the crypt data. This so that after restart there is no + * risk of finding encrypted pages without having + * crypt data in page 0 */ + + /* 1 - create crypt data */ + crypt_data = fil_space_create_crypt_data(); + if (crypt_data == NULL) { + mutex_exit(&fil_crypt_threads_mutex); + return pending_op; + } + + crypt_data->type = CRYPT_SCHEME_UNENCRYPTED; + crypt_data->min_key_version = 0; // all pages are unencrypted + crypt_data->rotate_state.start_time = time(0); + crypt_data->rotate_state.starting = true; + crypt_data->rotate_state.active_threads = 1; + + mutex_enter(&crypt_data->mutex); + fil_space_set_crypt_data(space, crypt_data); + mutex_exit(&crypt_data->mutex); + + fil_crypt_start_converting = true; + mutex_exit(&fil_crypt_threads_mutex); + + do + { + if (fil_crypt_is_closing(space) || + fil_tablespace_is_being_deleted(space)) + break; + + mtr_t mtr; + mtr_start(&mtr); + + /* 2 - get page 0 */ + ulint offset = 0; + ulint zip_size = fil_space_get_zip_size(space); + buf_block_t* block = buf_page_get_gen(space, zip_size, offset, + RW_X_LATCH, + NULL, + BUF_GET, + __FILE__, __LINE__, + &mtr); + + if (fil_crypt_is_closing(space) || + fil_tablespace_is_being_deleted(space)) { + mtr_commit(&mtr); + break; + } + + /* 3 - compute location to store crypt data */ + byte* frame = buf_block_get_frame(block); + ulint maxsize; + crypt_data->page0_offset = + fsp_header_get_crypt_offset(zip_size, &maxsize); + + /* 4 - write crypt data to page 0 */ + fil_space_write_crypt_data_low(crypt_data, + CRYPT_SCHEME_1, + frame, + crypt_data->page0_offset, + maxsize, &mtr); + + mtr_commit(&mtr); + + if (fil_crypt_is_closing(space) || + fil_tablespace_is_being_deleted(space)) { + break; + } + + /* record lsn of update */ + lsn_t end_lsn = mtr.end_lsn; + + /* 4 - sync tablespace before publishing crypt data */ + + /* release "lock" while syncing */ + fil_decr_pending_ops(space); + pending_op = false; + + bool success = false; + ulint n_pages = 0; + ulint sum_pages = 0; + do { + success = buf_flush_list(ULINT_MAX, end_lsn, &n_pages); + buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST); + sum_pages += n_pages; + } while (!success && + !fil_crypt_is_closing(space) && + !fil_tablespace_is_being_deleted(space)); + + /* try to reacquire pending op */ + if (fil_inc_pending_ops(space, true)) + break; + + /* pending op reacquired! */ + pending_op = true; + + if (fil_crypt_is_closing(space) || + fil_tablespace_is_being_deleted(space)) { + break; + } + + /* 5 - publish crypt data */ + mutex_enter(&fil_crypt_threads_mutex); + mutex_enter(&crypt_data->mutex); + crypt_data->type = CRYPT_SCHEME_1; + ut_a(crypt_data->rotate_state.active_threads == 1); + crypt_data->rotate_state.active_threads = 0; + crypt_data->rotate_state.starting = false; + + fil_crypt_start_converting = false; + mutex_exit(&crypt_data->mutex); + mutex_exit(&fil_crypt_threads_mutex); + + return pending_op; + } while (0); + + mutex_enter(&crypt_data->mutex); + ut_a(crypt_data->rotate_state.active_threads == 1); + crypt_data->rotate_state.active_threads = 0; + mutex_exit(&crypt_data->mutex); + + mutex_enter(&fil_crypt_threads_mutex); + fil_crypt_start_converting = false; + mutex_exit(&fil_crypt_threads_mutex); + + return pending_op; +} + +/*********************************************************************** +Check if space needs rotation given a key_state */ +static bool +fil_crypt_space_needs_rotation(uint space, const key_state_t *key_state, + bool *recheck) +{ + if (fil_space_get_type(space) != FIL_TABLESPACE) + return false; + + if (fil_inc_pending_ops(space, true)) { + /* tablespace being dropped */ + return false; + } + + /* keep track of if we have pending op */ + bool pending_op = true; + + fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space); + if (crypt_data == NULL) { + /** + * space has no crypt data + * start encrypting it... + */ + pending_op = fil_crypt_start_encrypting_space(space, recheck); + crypt_data = fil_space_get_crypt_data(space); + if (crypt_data == NULL) { + if (pending_op) { + fil_decr_pending_ops(space); + } + return false; + } + } + + mutex_enter(&crypt_data->mutex); + do { + /* prevent threads from starting to rotate space */ + if (crypt_data->rotate_state.starting) { + /* recheck this space later */ + *recheck = true; + break; + } + + /* prevent threads from starting to rotate space */ + if (crypt_data->closing) + break; + + if (crypt_data->rotate_state.flushing) + break; + + bool need_key_rotation = fil_crypt_needs_rotation( + crypt_data->min_key_version, key_state); + + time_t diff = time(0) - crypt_data->rotate_state.scrubbing. + last_scrub_completed; + bool need_scrubbing = + diff >= srv_background_scrub_data_interval; + + if (need_key_rotation == false && need_scrubbing == false) + break; + + mutex_exit(&crypt_data->mutex); + /* NOTE! fil_decr_pending_ops is performed outside */ + return true; + } while (0); + + mutex_exit(&crypt_data->mutex); + if (pending_op) { + fil_decr_pending_ops(space); + } + return false; +} + +/** State of a rotation thread */ +struct rotate_thread_t { + explicit rotate_thread_t(uint no) { + memset(this, 0, sizeof(* this)); + thread_no = no; + first = true; + estimated_max_iops = 20; + } + + uint thread_no; + bool first; /*!< is position before first space */ + ulint space; /*!< current space */ + ulint offset; /*!< current offset */ + ulint batch; /*!< #pages to rotate */ + uint min_key_version_found;/*!< min key version found but not rotated */ + lsn_t end_lsn; /*!< max lsn when rotating this space */ + + uint estimated_max_iops; /*!< estimation of max iops */ + uint allocated_iops; /*!< allocated iops */ + uint cnt_waited; /*!< #times waited during this slot */ + uint sum_waited_us; /*!< wait time during this slot */ + + fil_crypt_stat_t crypt_stat; // statistics + + btr_scrub_t scrub_data; /* thread local data used by btr_scrub-functions + * when iterating pages of tablespace */ + + /* check if this thread should shutdown */ + bool should_shutdown() const { + return ! (srv_shutdown_state == SRV_SHUTDOWN_NONE && + thread_no < srv_n_fil_crypt_threads); + } +}; + +/*********************************************************************** +Update global statistics with thread statistics */ +static void +fil_crypt_update_total_stat(rotate_thread_t *state) +{ + mutex_enter(&crypt_stat_mutex); + crypt_stat.pages_read_from_cache += + state->crypt_stat.pages_read_from_cache; + crypt_stat.pages_read_from_disk += + state->crypt_stat.pages_read_from_disk; + crypt_stat.pages_modified += state->crypt_stat.pages_modified; + crypt_stat.pages_flushed += state->crypt_stat.pages_flushed; + // remote old estimate + crypt_stat.estimated_iops -= state->crypt_stat.estimated_iops; + // add new estimate + crypt_stat.estimated_iops += state->estimated_max_iops; + mutex_exit(&crypt_stat_mutex); + + // make new estimate "current" estimate + memset(&state->crypt_stat, 0, sizeof(state->crypt_stat)); + // record our old (current) estimate + state->crypt_stat.estimated_iops = state->estimated_max_iops; +} + +/*********************************************************************** +Allocate iops to thread from global setting, +used before starting to rotate a space */ +static bool +fil_crypt_alloc_iops(rotate_thread_t *state) +{ + ut_ad(state->allocated_iops == 0); + + uint max_iops = state->estimated_max_iops; + mutex_enter(&fil_crypt_threads_mutex); + if (n_fil_crypt_iops_allocated >= srv_n_fil_crypt_iops) { + /* this can happen when user decreases srv_fil_crypt_iops */ + mutex_exit(&fil_crypt_threads_mutex); + return false; + } + + uint alloc = srv_n_fil_crypt_iops - n_fil_crypt_iops_allocated; + if (alloc > max_iops) + alloc = max_iops; + + n_fil_crypt_iops_allocated += alloc; + mutex_exit(&fil_crypt_threads_mutex); + + state->allocated_iops = alloc; + + return alloc > 0; +} + +/*********************************************************************** +Reallocate iops to thread, +used when inside a space */ +static void +fil_crypt_realloc_iops(rotate_thread_t *state) +{ + ut_a(state->allocated_iops > 0); + + if (10 * state->cnt_waited > state->batch) { + /* if we waited more than 10% re-estimate max_iops */ + uint avg_wait_time_us = + state->sum_waited_us / state->cnt_waited; + +#if DEBUG_KEYROTATION_THROTTLING + fprintf(stderr, + "thr_no: %u - update estimated_max_iops from %u to %u\n", + state->thread_no, + state->estimated_max_iops, + 1000000 / avg_wait_time_us); +#endif + if (avg_wait_time_us == 0) + avg_wait_time_us = 1; // prevent division by zero + + state->estimated_max_iops = 1000000 / avg_wait_time_us; + state->cnt_waited = 0; + state->sum_waited_us = 0; + } else { +#if DEBUG_KEYROTATION_THROTTLING + fprintf(stderr, + "thr_no: %u only waited %lu%% skip re-estimate\n", + state->thread_no, + (100 * state->cnt_waited) / state->batch); +#endif + } + + if (state->estimated_max_iops <= state->allocated_iops) { + /* return extra iops */ + uint extra = state->allocated_iops - state->estimated_max_iops; + + if (extra > 0) { + mutex_enter(&fil_crypt_threads_mutex); + if (n_fil_crypt_iops_allocated < extra) { + /* unknown bug! + * crash in debug + * keep n_fil_crypt_iops_allocated unchanged + * in release */ + ut_ad(0); + extra = 0; + } + n_fil_crypt_iops_allocated -= extra; + state->allocated_iops -= extra; + + if (state->allocated_iops == 0) { + /* no matter how slow io system seems to be + * never decrease allocated_iops to 0... */ + state->allocated_iops ++; + n_fil_crypt_iops_allocated ++; + } + mutex_exit(&fil_crypt_threads_mutex); + os_event_set(fil_crypt_threads_event); + } + } else { + /* see if there are more to get */ + mutex_enter(&fil_crypt_threads_mutex); + if (n_fil_crypt_iops_allocated < srv_n_fil_crypt_iops) { + /* there are extra iops free */ + uint extra = srv_n_fil_crypt_iops - + n_fil_crypt_iops_allocated; + if (state->allocated_iops + extra > + state->estimated_max_iops) { + /* but don't alloc more than our max */ + extra = state->estimated_max_iops - + state->allocated_iops; + } + n_fil_crypt_iops_allocated += extra; + state->allocated_iops += extra; +#if DEBUG_KEYROTATION_THROTTLING + fprintf(stderr, + "thr_no: %u increased iops from %u to %u\n", + state->thread_no, + state->allocated_iops - extra, + state->allocated_iops); +#endif + } + mutex_exit(&fil_crypt_threads_mutex); + } + + fil_crypt_update_total_stat(state); +} + +/*********************************************************************** +Return allocated iops to global */ +static void +fil_crypt_return_iops(rotate_thread_t *state) +{ + if (state->allocated_iops > 0) { + uint iops = state->allocated_iops; + mutex_enter(&fil_crypt_threads_mutex); + if (n_fil_crypt_iops_allocated < iops) { + /* unknown bug! + * crash in debug + * keep n_fil_crypt_iops_allocated unchanged + * in release */ + ut_ad(0); + iops = 0; + } + n_fil_crypt_iops_allocated -= iops; + mutex_exit(&fil_crypt_threads_mutex); + state->allocated_iops = 0; + os_event_set(fil_crypt_threads_event); + } + + fil_crypt_update_total_stat(state); +} + +/*********************************************************************** +Search for a space needing rotation */ +bool +fil_crypt_find_space_to_rotate( + const key_state_t *key_state, + rotate_thread_t *state, + bool *recheck) +{ + /* we need iops to start rotating */ + while (!state->should_shutdown() && !fil_crypt_alloc_iops(state)) { + os_event_reset(fil_crypt_threads_event); + os_event_wait_time(fil_crypt_threads_event, 1000000); + } + + if (state->should_shutdown()) + return false; + + if (state->first) { + state->first = false; + state->space = fil_get_first_space(); + } else { + state->space = fil_get_next_space(state->space); + } + + while (!state->should_shutdown() && state->space != ULINT_UNDEFINED) { + + ulint space = state->space; + if (fil_crypt_space_needs_rotation(space, key_state, recheck)) { + /* init state->min_key_version_found before + * starting on a space */ + state->min_key_version_found = key_state->key_version; + return true; + } + + state->space = fil_get_next_space(space); + } + + /* if we didn't find any space return iops */ + fil_crypt_return_iops(state); + + return false; + +} + +/*********************************************************************** +Start rotating a space */ +static +void +fil_crypt_start_rotate_space( + const key_state_t *key_state, + rotate_thread_t *state) +{ + ulint space = state->space; + fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space); + mutex_enter(&crypt_data->mutex); + if (crypt_data->rotate_state.active_threads == 0) { + /* only first thread needs to init */ + crypt_data->rotate_state.next_offset = 1; // skip page 0 + /* no need to rotate beyond current max + * if space extends, it will be encrypted with newer version */ + crypt_data->rotate_state.max_offset = fil_space_get_size(space); + + crypt_data->rotate_state.end_lsn = 0; + crypt_data->rotate_state.min_key_version_found = + key_state->key_version; + + crypt_data->rotate_state.start_time = time(0); + } + + /* count active threads in space */ + crypt_data->rotate_state.active_threads++; + + /* Initialize thread local state */ + state->end_lsn = crypt_data->rotate_state.end_lsn; + state->min_key_version_found = + crypt_data->rotate_state.min_key_version_found; + + /* inform scrubbing */ + crypt_data->rotate_state.scrubbing.is_active = + btr_scrub_start_space(space, &state->scrub_data); + + mutex_exit(&crypt_data->mutex); +} + +/*********************************************************************** +Search for batch of pages needing rotation */ +static +bool +fil_crypt_find_page_to_rotate( + const key_state_t *key_state, + rotate_thread_t *state) +{ + ulint batch = srv_alloc_time * state->allocated_iops; + ulint space = state->space; + fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space); + mutex_enter(&crypt_data->mutex); + if (crypt_data->closing == false && + crypt_data->rotate_state.next_offset < + crypt_data->rotate_state.max_offset) { + + state->offset = crypt_data->rotate_state.next_offset; + ulint remaining = crypt_data->rotate_state.max_offset - + crypt_data->rotate_state.next_offset; + + if (batch <= remaining) + state->batch = batch; + else + state->batch = remaining; + + crypt_data->rotate_state.next_offset += batch; + mutex_exit(&crypt_data->mutex); + return true; + } + + mutex_exit(&crypt_data->mutex); + return false; +} + +/*********************************************************************** +Check if a page is uninitialized (doesn't need to be rotated) */ +static bool +fil_crypt_is_page_uninitialized(const byte* frame, uint zip_size) +{ + if (zip_size) { + ulint stored_checksum = mach_read_from_4( + frame + FIL_PAGE_SPACE_OR_CHKSUM); + /* empty pages aren't encrypted */ + if (stored_checksum == 0) { + return true; + } + } else { + ulint size = UNIV_PAGE_SIZE; + ulint checksum_field1 = mach_read_from_4( + frame + FIL_PAGE_SPACE_OR_CHKSUM); + ulint checksum_field2 = mach_read_from_4( + frame + size - FIL_PAGE_END_LSN_OLD_CHKSUM); + /* empty pages are not encrypted */ + if (checksum_field1 == 0 && checksum_field2 == 0 + && mach_read_from_4(frame + FIL_PAGE_LSN) == 0) { + return true; + } + } + return false; +} + +#define fil_crypt_get_page_throttle(state,space,zip_size,offset,mtr,sleeptime_ms) \ + fil_crypt_get_page_throttle_func(state, space, zip_size, offset, mtr, \ + sleeptime_ms, __FILE__, __LINE__) + +/*********************************************************************** +Get a page and compute sleep time */ +static +buf_block_t* +fil_crypt_get_page_throttle_func(rotate_thread_t *state, + ulint space, uint zip_size, ulint offset, + mtr_t *mtr, + ulint *sleeptime_ms, + const char *file, + ulint line) +{ + buf_block_t* block = buf_page_try_get_func(space, offset, RW_X_LATCH, + true, + file, line, mtr); + if (block != NULL) { + /* page was in buffer pool */ + state->crypt_stat.pages_read_from_cache++; + return block; + } + + state->crypt_stat.pages_read_from_disk++; + + ullint start = ut_time_us(NULL); + block = buf_page_get_gen(space, zip_size, offset, + RW_X_LATCH, + NULL, BUF_GET_POSSIBLY_FREED, + file, line, mtr); + ullint end = ut_time_us(NULL); + + if (end < start) { + end = start; // safety... + } + + state->cnt_waited++; + state->sum_waited_us += (end - start); + + /* average page load */ + ulint add_sleeptime_ms = 0; + ulint avg_wait_time_us = state->sum_waited_us / state->cnt_waited; + ulint alloc_wait_us = 1000000 / state->allocated_iops; + if (avg_wait_time_us < alloc_wait_us) { + /* we reading faster than we allocated */ + add_sleeptime_ms = (alloc_wait_us - avg_wait_time_us) / 1000; + } else { + /* if page load time is longer than we want, skip sleeping */ + } + + *sleeptime_ms += add_sleeptime_ms; + return block; +} + + +/*********************************************************************** +Get block and allocation status + +note: innodb locks fil_space_latch and then block when allocating page +but locks block and then fil_space_latch when freeing page. +*/ +static +buf_block_t* +btr_scrub_get_block_and_allocation_status( + rotate_thread_t *state, + ulint space, + ulint zip_size, + ulint offset, + mtr_t *mtr, + btr_scrub_page_allocation_status_t *allocation_status, + ulint *sleeptime_ms) +{ + mtr_t local_mtr; + buf_block_t *block = NULL; + mtr_start(&local_mtr); + *allocation_status = fsp_page_is_free(space, offset, &local_mtr) ? + BTR_SCRUB_PAGE_FREE : + BTR_SCRUB_PAGE_ALLOCATED; + + if (*allocation_status == BTR_SCRUB_PAGE_FREE) { + /* this is easy case, we lock fil_space_latch first and + then block */ + block = fil_crypt_get_page_throttle(state, + space, zip_size, + offset, mtr, + sleeptime_ms); + mtr_commit(&local_mtr); + } else { + /* page is allocated according to xdes */ + + /* release fil_space_latch *before* fetching block */ + mtr_commit(&local_mtr); + + /* NOTE: when we have locked dict_index_get_lock(), + * it's safe to release fil_space_latch and then fetch block + * as dict_index_get_lock() is needed to make tree modifications + * such as free-ing a page + */ + + block = fil_crypt_get_page_throttle(state, + space, zip_size, + offset, mtr, + sleeptime_ms); + } + + return block; +} + + +/*********************************************************************** +Rotate one page */ +static +void +fil_crypt_rotate_page( + const key_state_t *key_state, + rotate_thread_t *state) +{ + ulint space = state->space; + ulint offset = state->offset; + const uint zip_size = fil_space_get_zip_size(space); + ulint sleeptime_ms = 0; + + /* check if tablespace is closing before reading page */ + if (fil_crypt_is_closing(space)) + return; + + if (space == TRX_SYS_SPACE && offset == TRX_SYS_PAGE_NO) { + /* don't encrypt this as it contains address to dblwr buffer */ + return; + } + + mtr_t mtr; + mtr_start(&mtr); + buf_block_t* block = fil_crypt_get_page_throttle(state, + space, zip_size, + offset, &mtr, + &sleeptime_ms); + + bool modified = false; + int needs_scrubbing = BTR_SCRUB_SKIP_PAGE; + lsn_t block_lsn = block->page.newest_modification; + uint kv = block->page.key_version; + + /* check if tablespace is closing after reading page */ + if (!fil_crypt_is_closing(space)) { + byte* frame = buf_block_get_frame(block); + fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space); + + if (kv == 0 && + fil_crypt_is_page_uninitialized(frame, zip_size)) { + ; + } else if (fil_crypt_needs_rotation(kv, key_state)) { + + /* page can be "fresh" i.e never written in case + * kv == 0 or it should have a key version at least + * as big as the space minimum key version*/ + ut_a(kv == 0 || kv >= crypt_data->min_key_version); + + modified = true; + + /* force rotation by dummy updating page */ + mlog_write_ulint(frame + + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, + space, MLOG_4BYTES, &mtr); + + /* update block */ + block->page.key_version = key_state->key_version; + + /* statistics */ + state->crypt_stat.pages_modified++; + } else { + ut_a(kv >= crypt_data->min_key_version || + (kv == 0 && key_state->key_version == 0)); + + if (kv < state->min_key_version_found) { + state->min_key_version_found = kv; + } + } + + needs_scrubbing = btr_page_needs_scrubbing( + &state->scrub_data, block, + BTR_SCRUB_PAGE_ALLOCATION_UNKNOWN); + } + + mtr_commit(&mtr); + lsn_t end_lsn = mtr.end_lsn; + + if (needs_scrubbing == BTR_SCRUB_PAGE) { + mtr_start(&mtr); + /* + * refetch page and allocation status + */ + btr_scrub_page_allocation_status_t allocated; + block = btr_scrub_get_block_and_allocation_status( + state, space, zip_size, offset, &mtr, + &allocated, + &sleeptime_ms); + + /* get required table/index and index-locks */ + needs_scrubbing = btr_scrub_recheck_page( + &state->scrub_data, block, allocated, &mtr); + + if (needs_scrubbing == BTR_SCRUB_PAGE) { + /* we need to refetch it once more now that we have + * index locked */ + block = btr_scrub_get_block_and_allocation_status( + state, space, zip_size, offset, &mtr, + &allocated, + &sleeptime_ms); + + needs_scrubbing = btr_scrub_page(&state->scrub_data, + block, allocated, + &mtr); + } + + /* NOTE: mtr is committed inside btr_scrub_recheck_page() + * and/or btr_scrub_page. This is to make sure that + * locks & pages are latched in corrected order, + * the mtr is in some circumstances restarted. + * (mtr_commit() + mtr_start()) + */ + } + + if (needs_scrubbing != BTR_SCRUB_PAGE) { + /* if page didn't need scrubbing it might be that cleanups + are needed. do those outside of any mtr to prevent deadlocks. + + the information what kinds of cleanups that are needed are + encoded inside the needs_scrubbing, but this is opaque to + this function (except the value BTR_SCRUB_PAGE) */ + btr_scrub_skip_page(&state->scrub_data, needs_scrubbing); + } + + if (needs_scrubbing == BTR_SCRUB_TURNED_OFF) { + /* if we just detected that scrubbing was turned off + * update global state to reflect this */ + fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space); + mutex_enter(&crypt_data->mutex); + crypt_data->rotate_state.scrubbing.is_active = false; + mutex_exit(&crypt_data->mutex); + } + + if (modified) { + /* if we modified page, we take lsn from mtr */ + ut_a(end_lsn > state->end_lsn); + ut_a(end_lsn > block_lsn); + state->end_lsn = end_lsn; + } else { + /* if we did not modify page, check for max lsn */ + if (block_lsn > state->end_lsn) { + state->end_lsn = block_lsn; + } + } + + if (sleeptime_ms) { + os_event_reset(fil_crypt_throttle_sleep_event); + os_event_wait_time(fil_crypt_throttle_sleep_event, + 1000 * sleeptime_ms); + } +} + +/*********************************************************************** +Rotate a batch of pages */ +static +void +fil_crypt_rotate_pages( + const key_state_t *key_state, + rotate_thread_t *state) +{ + ulint space = state->space; + ulint end = state->offset + state->batch; + for (; state->offset < end; state->offset++) { + + /* we can't rotate pages in dblwr buffer as + * it's not possible to read those due to lots of asserts + * in buffer pool. + * + * However since these are only (short-lived) copies of + * real pages, they will be updated anyway when the + * real page is updated + */ + if (space == TRX_SYS_SPACE && + buf_dblwr_page_inside(state->offset)) { + continue; + } + + fil_crypt_rotate_page(key_state, state); + } +} + +/*********************************************************************** +Flush rotated pages and then update page 0 */ +static +void +fil_crypt_flush_space(rotate_thread_t *state, ulint space) +{ + fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space); + + /* flush tablespace pages so that there are no pages left with old key */ + lsn_t end_lsn = crypt_data->rotate_state.end_lsn; + if (end_lsn > 0 && !fil_crypt_is_closing(space)) { + bool success = false; + ulint n_pages = 0; + ulint sum_pages = 0; + ullint start = ut_time_us(NULL); + do { + success = buf_flush_list(ULINT_MAX, end_lsn, &n_pages); + buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST); + sum_pages += n_pages; + } while (!success && !fil_crypt_is_closing(space)); + ullint end = ut_time_us(NULL); + if (sum_pages && end > start) { + state->cnt_waited += sum_pages; + state->sum_waited_us += (end - start); + + /* statistics */ + state->crypt_stat.pages_flushed += sum_pages; + } + } + + if (crypt_data->min_key_version == 0) { + crypt_data->type = CRYPT_SCHEME_UNENCRYPTED; + } + + /* update page 0 */ + if (!fil_crypt_is_closing(space)) { + mtr_t mtr; + mtr_start(&mtr); + ulint offset = 0; // page 0 + const uint zip_size = fil_space_get_zip_size(space); + buf_block_t* block = buf_page_get_gen(space, zip_size, offset, + RW_X_LATCH, NULL, BUF_GET, + __FILE__, __LINE__, &mtr); + byte* frame = buf_block_get_frame(block); + fil_space_write_crypt_data(space, frame, + crypt_data->page0_offset, + ULINT_MAX, &mtr); + mtr_commit(&mtr); + } +} + +/*********************************************************************** +Complete rotating a space */ +static +void +fil_crypt_complete_rotate_space( + const key_state_t *key_state, + rotate_thread_t *state) +{ + ulint space = state->space; + fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space); + mutex_enter(&crypt_data->mutex); + + /** + * Update crypt data state with state from thread + */ + if (state->min_key_version_found < + crypt_data->rotate_state.min_key_version_found) { + crypt_data->rotate_state.min_key_version_found = + state->min_key_version_found; + } + + if (state->end_lsn > crypt_data->rotate_state.end_lsn) { + crypt_data->rotate_state.end_lsn = state->end_lsn; + } + + ut_a(crypt_data->rotate_state.active_threads > 0); + crypt_data->rotate_state.active_threads--; + bool last = crypt_data->rotate_state.active_threads == 0; + + /** + * check if space is fully done + * this as when threads shutdown, it could be that we "complete" + * iterating before we have scanned the full space. + */ + bool done = crypt_data->rotate_state.next_offset >= + crypt_data->rotate_state.max_offset; + + /** + * we should flush space if we're last thread AND + * the iteration is done + */ + bool should_flush = last && done; + + if (should_flush) { + /* we're the last active thread */ + crypt_data->rotate_state.flushing = true; + crypt_data->min_key_version = + crypt_data->rotate_state.min_key_version_found; + } + + /* inform scrubbing */ + crypt_data->rotate_state.scrubbing.is_active = false; + mutex_exit(&crypt_data->mutex); + + /* all threads must call btr_scrub_complete_space wo/ mutex held */ + if (btr_scrub_complete_space(&state->scrub_data) == true) { + if (should_flush) { + /* only last thread updates last_scrub_completed */ + mutex_enter(&crypt_data->mutex); + crypt_data->rotate_state.scrubbing. + last_scrub_completed = time(0); + mutex_exit(&crypt_data->mutex); + } + } + + if (should_flush) { + fil_crypt_flush_space(state, space); + + mutex_enter(&crypt_data->mutex); + crypt_data->rotate_state.flushing = false; + mutex_exit(&crypt_data->mutex); + } +} + +/*********************************************************************//** +A thread which monitors global key state and rotates tablespaces accordingly +@return a dummy parameter */ +extern "C" UNIV_INTERN +os_thread_ret_t +DECLARE_THREAD(fil_crypt_thread)( +/*===============================*/ + void* arg __attribute__((unused))) /*!< in: a dummy parameter required + * by os_thread_create */ +{ + UT_NOT_USED(arg); + + mutex_enter(&fil_crypt_threads_mutex); + uint thread_no = srv_n_fil_crypt_threads_started; + srv_n_fil_crypt_threads_started++; + mutex_exit(&fil_crypt_threads_mutex); + os_event_set(fil_crypt_event); /* signal that we started */ + + /* state of this thread */ + rotate_thread_t thr(thread_no); + + /* if we find a space that is starting, skip over it and recheck it later */ + bool recheck = false; + + key_state_t key_state; + fil_crypt_get_key_state(&key_state); + + /* make sure that thread always checks all tablespace when starting. + * + * by decreasing key_version, loop that waits for change in key-state + * should exit immediately causing thread to check all spaces when starting */ + key_state.key_version--; + + while (!thr.should_shutdown()) { + + key_state_t new_state; + fil_crypt_get_key_state(&new_state); + + time_t wait_start = time(0); + while (!thr.should_shutdown() && key_state == new_state) { + + /* wait for key state changes + * i.e either new key version of change or + * new rotate_key_age */ + os_event_reset(fil_crypt_threads_event); + os_event_wait_time(fil_crypt_threads_event, 1000000); + fil_crypt_get_key_state(&new_state); + + if (recheck) { + /* check recheck here, after sleep, so + * that we don't busy loop while when one thread is starting + * a space*/ + break; + } + + time_t waited = time(0) - wait_start; + if (waited >= srv_background_scrub_data_check_interval) + break; + } + + recheck = false; + thr.first = true; // restart from first tablespace + key_state = new_state; // save for next loop + + /* iterate all spaces searching for those needing rotation */ + while (!thr.should_shutdown() && + fil_crypt_find_space_to_rotate(&new_state, &thr, &recheck)) { + + /* we found a space to rotate */ + fil_crypt_start_rotate_space(&new_state, &thr); + + /* decrement pending ops that was incremented in + * fil_crypt_space_needs_rotation + * (called from fil_crypt_find_space_to_rotate), + * this makes sure that tablespace won't be dropped + * just after we decided to start processing it. */ + fil_decr_pending_ops(thr.space); + + /* iterate all pages (cooperativly with other threads) */ + while (!thr.should_shutdown() && + fil_crypt_find_page_to_rotate(&new_state, &thr)) { + + /* rotate a (set) of pages */ + fil_crypt_rotate_pages(&new_state, &thr); + + /* realloc iops */ + fil_crypt_realloc_iops(&thr); + } + + /* complete rotation */ + fil_crypt_complete_rotate_space(&new_state, &thr); + + /* refresh key state */ + fil_crypt_get_key_state(&new_state); + + /* return iops */ + fil_crypt_return_iops(&thr); + } + } + + /* return iops if shutting down */ + fil_crypt_return_iops(&thr); + + mutex_enter(&fil_crypt_threads_mutex); + srv_n_fil_crypt_threads_started--; + mutex_exit(&fil_crypt_threads_mutex); + os_event_set(fil_crypt_event); /* signal that we stopped */ + + /* We count the number of threads in os_thread_exit(). A created + thread should always use that to exit and not use return() to exit. */ + + os_thread_exit(NULL); + + OS_THREAD_DUMMY_RETURN; +} + +/********************************************************************* +Adjust thread count for key rotation */ +UNIV_INTERN +void +fil_crypt_set_thread_cnt(uint new_cnt) { + if (new_cnt > srv_n_fil_crypt_threads) { + uint add = new_cnt - srv_n_fil_crypt_threads; + srv_n_fil_crypt_threads = new_cnt; + for (uint i = 0; i < add; i++) { + os_thread_create(fil_crypt_thread, NULL, NULL); + } + } else if (new_cnt < srv_n_fil_crypt_threads) { + srv_n_fil_crypt_threads = new_cnt; + os_event_set(fil_crypt_threads_event); + } + + while(srv_n_fil_crypt_threads_started != srv_n_fil_crypt_threads) { + os_event_reset(fil_crypt_event); + os_event_wait_time(fil_crypt_event, 1000000); + } +} + +/********************************************************************* +Adjust max key age */ +UNIV_INTERN +void +fil_crypt_set_rotate_key_age(uint val) +{ + srv_fil_crypt_rotate_key_age = val; + os_event_set(fil_crypt_threads_event); +} + +/********************************************************************* +Adjust rotation iops */ +UNIV_INTERN +void +fil_crypt_set_rotation_iops(uint val) +{ + srv_n_fil_crypt_iops = val; + os_event_set(fil_crypt_threads_event); +} + +/********************************************************************* +Init threads for key rotation */ +UNIV_INTERN +void +fil_crypt_threads_init() +{ + fil_crypt_event = os_event_create(); + fil_crypt_threads_event = os_event_create(); + mutex_create(fil_crypt_threads_mutex_key, + &fil_crypt_threads_mutex, SYNC_NO_ORDER_CHECK); + + uint cnt = srv_n_fil_crypt_threads; + srv_n_fil_crypt_threads = 0; + fil_crypt_set_thread_cnt(cnt); +} + +/********************************************************************* +End threads for key rotation */ +UNIV_INTERN +void +fil_crypt_threads_end() +{ + /* stop threads */ + fil_crypt_set_thread_cnt(0); +} + +/********************************************************************* +Clean up key rotation threads resources */ +UNIV_INTERN +void +fil_crypt_threads_cleanup() { + os_event_free(fil_crypt_event); + os_event_free(fil_crypt_threads_event); +} + +/********************************************************************* +Mark a space as closing */ +UNIV_INTERN +void +fil_space_crypt_mark_space_closing( + ulint space) +{ + mutex_enter(&fil_crypt_threads_mutex); + fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space); + if (crypt_data == NULL) { + mutex_exit(&fil_crypt_threads_mutex); + return; + } + + mutex_enter(&crypt_data->mutex); + mutex_exit(&fil_crypt_threads_mutex); + crypt_data->closing = true; + mutex_exit(&crypt_data->mutex); +} + +/********************************************************************* +Wait for crypt threads to stop accessing space */ +UNIV_INTERN +void +fil_space_crypt_close_tablespace( + ulint space) +{ + mutex_enter(&fil_crypt_threads_mutex); + fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space); + if (crypt_data == NULL) { + mutex_exit(&fil_crypt_threads_mutex); + return; + } + + uint start = time(0); + uint last = start; + mutex_enter(&crypt_data->mutex); + mutex_exit(&fil_crypt_threads_mutex); + crypt_data->closing = true; + uint cnt = crypt_data->rotate_state.active_threads; + bool flushing = crypt_data->rotate_state.flushing; + while (cnt > 0 || flushing) { + mutex_exit(&crypt_data->mutex); + /* release dict mutex so that scrub threads can release their + * table references */ + dict_mutex_exit_for_mysql(); + /* wakeup throttle (all) sleepers */ + os_event_set(fil_crypt_throttle_sleep_event); + os_thread_sleep(20000); + dict_mutex_enter_for_mysql(); + mutex_enter(&crypt_data->mutex); + cnt = crypt_data->rotate_state.active_threads; + flushing = crypt_data->rotate_state.flushing; + + uint now = time(0); + if (now >= last + 30) { + fprintf(stderr, + "WARNING: " + "waited %u seconds to drop space: %lu\n", + now - start, space); + last = now; + } + } + mutex_exit(&crypt_data->mutex); +} + +/********************************************************************* +Get crypt status for a space (used by information_schema) +return 0 if crypt data present */ +int +fil_space_crypt_get_status( +/*==================*/ + ulint id, /*!< in: space id */ + struct fil_space_crypt_status_t* status) /*!< out: status */ +{ + fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(id); + + if (crypt_data != NULL) { + status->space = id; + status->scheme = crypt_data->type; + mutex_enter(&crypt_data->mutex); + status->keyserver_requests = crypt_data->keyserver_requests; + status->min_key_version = crypt_data->min_key_version; + if (crypt_data->rotate_state.active_threads > 0 || + crypt_data->rotate_state.flushing) { + status->rotating = true; + status->flushing = + crypt_data->rotate_state.flushing; + status->rotate_next_page_number = + crypt_data->rotate_state.next_offset; + status->rotate_max_page_number = + crypt_data->rotate_state.max_offset; + } else { + status->rotating = false; + } + mutex_exit(&crypt_data->mutex); + } else { + memset(status, 0, sizeof(*status)); + } + + if (srv_encrypt_tables == TRUE) { + status->current_key_version = GetLatestCryptoKeyVersion(); + } else { + status->current_key_version = 0; + } + return crypt_data == NULL ? 1 : 0; +} + +/********************************************************************* +Return crypt statistics */ +void +fil_crypt_total_stat(fil_crypt_stat_t *stat) +{ + mutex_enter(&crypt_stat_mutex); + *stat = crypt_stat; + mutex_exit(&crypt_stat_mutex); +} + +/********************************************************************* +Get scrub status for a space (used by information_schema) +return 0 if data found */ +int +fil_space_get_scrub_status( +/*==================*/ + ulint id, /*!< in: space id */ + struct fil_space_scrub_status_t* status) /*!< out: status */ +{ + fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(id); + memset(status, 0, sizeof(*status)); + if (crypt_data != NULL) { + status->space = id; + status->compressed = fil_space_get_zip_size(id) > 0; + mutex_enter(&crypt_data->mutex); + status->last_scrub_completed = + crypt_data->rotate_state.scrubbing.last_scrub_completed; + if (crypt_data->rotate_state.active_threads > 0 && + crypt_data->rotate_state.scrubbing.is_active) { + status->scrubbing = true; + status->current_scrub_started = + crypt_data->rotate_state.start_time; + status->current_scrub_active_threads = + crypt_data->rotate_state.active_threads; + status->current_scrub_page_number = + crypt_data->rotate_state.next_offset; + status->current_scrub_max_page_number = + crypt_data->rotate_state.max_offset; + } else { + status->scrubbing = false; + } + mutex_exit(&crypt_data->mutex); + } else { + memset(status, 0, sizeof(*status)); + } + + return crypt_data == NULL ? 1 : 0; +} diff --git a/storage/xtradb/fil/fil0fil.cc b/storage/xtradb/fil/fil0fil.cc index 08487f595ed..e4be4f6910c 100644 --- a/storage/xtradb/fil/fil0fil.cc +++ b/storage/xtradb/fil/fil0fil.cc @@ -56,6 +56,10 @@ Created 10/25/1995 Heikki Tuuri static ulint srv_data_read, srv_data_written; #endif /* !UNIV_HOTBACKUP */ #include "fil0pagecompress.h" + +#include "fil0pageencryption.h" +#include "fsp0pageencryption.h" + #include "zlib.h" #ifdef __linux__ #include <linux/fs.h> @@ -645,8 +649,23 @@ fil_node_open_file( success = os_file_read(node->handle, page, 0, UNIV_PAGE_SIZE, space->flags); + if (fil_page_encryption_status(page)) { + /* if page is (still) encrypted, write an error and return. + * Otherwise the server would crash if decrypting is not possible. + * This may be the case, if the key file could not be + * opened on server startup. + */ + ib_logf(IB_LOG_LEVEL_ERROR, + "InnoDB: can not decrypt page, because " + "keys could not be read.\n" + ); + return false; + + } + space_id = fsp_header_get_space_id(page); flags = fsp_header_get_flags(page); + page_size = fsp_flags_get_page_size(flags); atomic_writes = fsp_flags_get_atomic_writes(flags); @@ -1157,7 +1176,8 @@ fil_space_create( const char* name, /*!< in: space name */ ulint id, /*!< in: space id */ ulint flags, /*!< in: tablespace flags */ - ulint purpose)/*!< in: FIL_TABLESPACE, or FIL_LOG if log */ + ulint purpose,/*!< in: FIL_TABLESPACE, or FIL_LOG if log */ + fil_space_crypt_t* crypt_data) /*!< in: crypt data */ { fil_space_t* space; @@ -1165,6 +1185,21 @@ fil_space_create( ut_a(fil_system); + if (fsp_flags_is_page_encrypted(flags)) { + if (!HasCryptoKey(fsp_flags_get_page_encryption_key(flags))) { + /* by returning here it should be avoided that + * the server crashes, if someone tries to access an + * encrypted table and the encryption key is not available. + * The the table is treaded as non-existent. + */ + ib_logf(IB_LOG_LEVEL_WARN, + "Tablespace '%s' can not be opened, because " + " encryption key can not be found (space id: %lu, key %lu)\n" + , name, (ulong) id, fsp_flags_get_page_encryption_key(flags)); + return (FALSE); + } + } + /* Look for a matching tablespace and if found free it. */ do { mutex_enter(&fil_system->mutex); @@ -1253,6 +1288,8 @@ fil_space_create( UT_LIST_ADD_LAST(space_list, fil_system->space_list, space); + space->crypt_data = crypt_data; + mutex_exit(&fil_system->mutex); return(TRUE); @@ -1387,6 +1424,8 @@ fil_space_free( rw_lock_free(&(space->latch)); + fil_space_destroy_crypt_data(&(space->crypt_data)); + mem_free(space->name); mem_free(space); @@ -1620,6 +1659,8 @@ fil_init( UT_LIST_INIT(fil_system->LRU); fil_system->max_n_open = max_n_open; + + fil_space_crypt_init(); } /*******************************************************************//** @@ -1827,7 +1868,8 @@ fil_write_lsn_and_arch_no_to_file( err = fil_read(TRUE, space, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL, 0); if (err == DB_SUCCESS) { - mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn); + mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, + lsn); err = fil_write(TRUE, space, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL, 0); @@ -1909,6 +1951,7 @@ fil_check_first_page( { ulint space_id; ulint flags; + ulint page_is_encrypted; if (srv_force_recovery >= SRV_FORCE_IGNORE_CORRUPT) { return(NULL); @@ -1916,12 +1959,23 @@ fil_check_first_page( space_id = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_ID + page); flags = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page); - - if (UNIV_PAGE_SIZE != fsp_flags_get_page_size(flags)) { - fprintf(stderr, "InnoDB: Error: Current page size %lu != page size on page %lu\n", - UNIV_PAGE_SIZE, fsp_flags_get_page_size(flags)); - - return("innodb-page-size mismatch"); + /* Note: the 1st page is usually not encrypted. If the Key Provider + or the encryption key is not available, the + check for reading the first page should intentionally fail + with "can not decrypt" message. */ + page_is_encrypted = fil_page_encryption_status(page); + if (page_is_encrypted == PAGE_ENCRYPTION_KEY_MISSING && page_is_encrypted) { + page_is_encrypted = 1; + } else { + page_is_encrypted = 0; + if (UNIV_PAGE_SIZE != fsp_flags_get_page_size(flags)) { + fprintf(stderr, + "InnoDB: Error: Current page size %lu != " + " page size on page %lu\n", + UNIV_PAGE_SIZE, fsp_flags_get_page_size(flags)); + + return("innodb-page-size mismatch"); + } } if (!space_id && !flags) { @@ -1937,9 +1991,17 @@ fil_check_first_page( } } - if (buf_page_is_corrupted( + if (!page_is_encrypted && buf_page_is_corrupted( false, page, fsp_flags_get_zip_size(flags))) { return("checksum mismatch"); + } else { + if (page_is_encrypted) { + /* this error message is interpreted by the calling method, which is + * executed if the server starts in recovery mode. + */ + return(MSG_CANNOT_DECRYPT); + + } } if (page_get_space_id(page) == space_id @@ -1969,8 +2031,9 @@ fil_read_first_page( lsn values in data files */ lsn_t* max_flushed_lsn, /*!< out: max of flushed lsn values in data files */ - ulint orig_space_id) /*!< in: original file space + ulint orig_space_id, /*!< in: original file space id */ + fil_space_crypt_t** crypt_data) /*< out: crypt data */ { byte* buf; byte* page; @@ -2008,7 +2071,16 @@ fil_read_first_page( check_msg = fil_check_first_page(page); } - flushed_lsn = mach_read_from_8(page + FIL_PAGE_FILE_FLUSH_LSN); + flushed_lsn = mach_read_from_8(page + + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); + + if (crypt_data) { + ulint space = fsp_header_get_space_id(page); + ulint offset = + fsp_header_get_crypt_offset( + fsp_flags_get_zip_size(*flags), NULL); + *crypt_data = fil_space_read_crypt_data(space, page, offset); + } ut_free(buf); @@ -2487,6 +2559,9 @@ fil_check_pending_operations( *space = 0; + /* Wait for crypt threads to stop accessing space */ + fil_space_crypt_close_tablespace(id); + mutex_enter(&fil_system->mutex); fil_space_t* sp = fil_space_get_by_id(id); if (sp) { @@ -3468,7 +3543,8 @@ fil_create_new_single_table_tablespace( } } - success = fil_space_create(tablename, space_id, flags, FIL_TABLESPACE); + success = fil_space_create(tablename, space_id, flags, FIL_TABLESPACE, + fil_space_create_crypt_data()); if (!success || !fil_node_create(path, size, space_id, FALSE)) { err = DB_ERROR; goto error_exit_1; @@ -3596,6 +3672,7 @@ fil_open_single_table_tablespace( ulint tablespaces_found = 0; ulint valid_tablespaces_found = 0; ulint atomic_writes = 0; + fil_space_crypt_t* crypt_data = NULL; #ifdef UNIV_SYNC_DEBUG ut_ad(!fix_dict || rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); @@ -3694,7 +3771,7 @@ fil_open_single_table_tablespace( if (def.success) { def.check_msg = fil_read_first_page( def.file, FALSE, &def.flags, &def.id, - &def.lsn, &def.lsn, id); + &def.lsn, &def.lsn, id, &def.crypt_data); def.valid = !def.check_msg; /* Validate this single-table-tablespace with SYS_TABLES, @@ -3716,7 +3793,7 @@ fil_open_single_table_tablespace( if (remote.success) { remote.check_msg = fil_read_first_page( remote.file, FALSE, &remote.flags, &remote.id, - &remote.lsn, &remote.lsn, id); + &remote.lsn, &remote.lsn, id, &remote.crypt_data); remote.valid = !remote.check_msg; /* Validate this single-table-tablespace with SYS_TABLES, @@ -3739,7 +3816,7 @@ fil_open_single_table_tablespace( if (dict.success) { dict.check_msg = fil_read_first_page( dict.file, FALSE, &dict.flags, &dict.id, - &dict.lsn, &dict.lsn, id); + &dict.lsn, &dict.lsn, id, &dict.crypt_data); dict.valid = !dict.check_msg; /* Validate this single-table-tablespace with SYS_TABLES, @@ -3892,9 +3969,17 @@ fil_open_single_table_tablespace( } skip_validate: + if (remote.success) + crypt_data = remote.crypt_data; + else if (dict.success) + crypt_data = dict.crypt_data; + else if (def.success) + crypt_data = def.crypt_data; + if (err != DB_SUCCESS) { ; // Don't load the tablespace into the cache - } else if (!fil_space_create(tablename, id, flags, FIL_TABLESPACE)) { + } else if (!fil_space_create(tablename, id, flags, FIL_TABLESPACE, + crypt_data)) { err = DB_ERROR; } else { /* We do not measure the size of the file, that is why @@ -3914,15 +3999,25 @@ cleanup_and_exit: if (remote.filepath) { mem_free(remote.filepath); } + if (remote.crypt_data && remote.crypt_data != crypt_data) { + fil_space_destroy_crypt_data(&remote.crypt_data); + } if (dict.success) { os_file_close(dict.file); } if (dict.filepath) { mem_free(dict.filepath); } + if (dict.crypt_data && dict.crypt_data != crypt_data) { + fil_space_destroy_crypt_data(&dict.crypt_data); + } if (def.success) { os_file_close(def.file); } + if (def.crypt_data && def.crypt_data != crypt_data) { + fil_space_destroy_crypt_data(&def.crypt_data); + } + mem_free(def.filepath); return(err); @@ -4139,13 +4234,22 @@ fil_validate_single_table_tablespace( check_first_page: fsp->success = TRUE; + fsp->encryption_error = 0; if (const char* check_msg = fil_read_first_page( fsp->file, FALSE, &fsp->flags, &fsp->id, - &fsp->lsn, &fsp->lsn, ULINT_UNDEFINED)) { + &fsp->lsn, &fsp->lsn, ULINT_UNDEFINED, &fsp->crypt_data)) { ib_logf(IB_LOG_LEVEL_ERROR, "%s in tablespace %s (table %s)", check_msg, fsp->filepath, tablename); fsp->success = FALSE; + if (strncmp(check_msg, MSG_CANNOT_DECRYPT, strlen(check_msg))==0) { + /* by returning here, it should be avoided, that the server crashes, + * if started in recovery mode and can not decrypt tables, if + * the key file can not be read. + */ + fsp->encryption_error = 1; + return; + } } if (!fsp->success) { @@ -4299,6 +4403,14 @@ fil_load_single_table_tablespace( } if (!def.success && !remote.success) { + + if (def.encryption_error || remote.encryption_error) { + fprintf(stderr, + "InnoDB: Error: could not open single-table" + " tablespace file %s. Encryption error!\n", def.filepath); + return; + } + /* The following call prints an error message */ os_file_get_last_error(true); fprintf(stderr, @@ -4482,7 +4594,8 @@ will_not_choose: mutex_exit(&fil_system->mutex); #endif /* UNIV_HOTBACKUP */ ibool file_space_create_success = fil_space_create( - tablename, fsp->id, fsp->flags, FIL_TABLESPACE); + tablename, fsp->id, fsp->flags, FIL_TABLESPACE, + fsp->crypt_data); if (!file_space_create_success) { if (srv_force_recovery > 0) { @@ -5133,7 +5246,7 @@ retry: success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC, node->name, node->handle, buf, offset, page_size * n_pages, - node, NULL, space_id, NULL, 0, 0, 0); + node, NULL, space_id, NULL, 0, 0, 0, 0, 0); #endif /* UNIV_HOTBACKUP */ if (success) { os_has_said_disk_full = FALSE; @@ -5526,6 +5639,8 @@ _fil_io( ibool ignore_nonexistent_pages; ibool page_compressed = FALSE; ulint page_compression_level = 0; + ibool page_encrypted; + ulint page_encryption_key; is_log = type & OS_FILE_LOG; type = type & ~OS_FILE_LOG; @@ -5595,6 +5710,11 @@ _fil_io( page_compressed = fsp_flags_is_page_compressed(space->flags); page_compression_level = fsp_flags_get_page_compression_level(space->flags); + + page_encrypted = fsp_flags_is_page_encrypted(space->flags); + page_encryption_key = fsp_flags_get_page_encryption_key(space->flags); + + /* If we are deleting a tablespace we don't allow any read operations on that. However, we do allow write operations. */ if (space == 0 || (type == OS_FILE_READ && space->stop_new_ops)) { @@ -5739,9 +5859,23 @@ _fil_io( } /* Queue the aio request */ - ret = os_aio(type, mode | wake_later, node->name, node->handle, buf, - offset, len, node, message, space_id, trx, - page_compressed, page_compression_level, write_size); + ret = os_aio( + type, + mode | wake_later, + node->name, + node->handle, + buf, + offset, + len, + node, + message, + space_id, + trx, + page_compressed, + page_compression_level, + write_size, + page_encrypted, + page_encryption_key); #else /* In mysqlbackup do normal i/o, not aio */ @@ -6180,6 +6314,8 @@ void fil_close(void) /*===========*/ { + fil_space_crypt_cleanup(); + #ifndef UNIV_HOTBACKUP /* The mutex should already have been freed. */ ut_ad(fil_system->mutex.magic_n == 0); @@ -6229,6 +6365,8 @@ struct fil_iterator_t { ulint n_io_buffers; /*!< Number of pages to use for IO */ byte* io_buffer; /*!< Buffer to use for IO */ + fil_space_crypt_t *crypt_data; /*!< Crypt data (if encrypted) */ + byte* crypt_io_buffer; /*!< IO buffer when encrypted */ }; /********************************************************************//** @@ -6291,7 +6429,12 @@ fil_iterate( ut_ad(n_bytes > 0); ut_ad(!(n_bytes % iter.page_size)); - if (!os_file_read(iter.file, io_buffer, offset, + byte* readptr = io_buffer; + if (iter.crypt_data != NULL) { + readptr = iter.crypt_io_buffer; + } + + if (!os_file_read(iter.file, readptr, offset, (ulint) n_bytes, fil_space_is_page_compressed(space_id))) { @@ -6306,6 +6449,18 @@ fil_iterate( for (ulint i = 0; i < n_pages_read; ++i) { + if (iter.crypt_data != NULL) { + bool decrypted = fil_space_decrypt( + iter.crypt_data, + readptr + i * iter.page_size, // src + iter.page_size, + io_buffer + i * iter.page_size); // dst + if (decrypted) { + /* write back unencrypted page */ + updated = true; + } + } + buf_block_set_file_page(block, space_id, page_no++); dberr_t err; @@ -6448,6 +6603,13 @@ fil_tablespace_iterate( iter.n_io_buffers = n_io_buffers; iter.page_size = callback.get_page_size(); + ulint crypt_data_offset = fsp_header_get_crypt_offset( + callback.get_zip_size(), 0); + + /* read (optional) crypt data */ + iter.crypt_data = fil_space_read_crypt_data( + 0, page, crypt_data_offset); + /* Compressed pages can't be optimised for block IO for now. We do the IMPORT page by page. */ @@ -6456,6 +6618,14 @@ fil_tablespace_iterate( ut_a(iter.page_size == callback.get_zip_size()); } + /** If tablespace is encrypted, it needs extra buffers */ + if (iter.crypt_data != NULL) { + /* decrease io buffers so that memory + * consumption doesnt double + * note: the +1 is to avoid n_io_buffers getting down to 0 */ + iter.n_io_buffers = (iter.n_io_buffers + 1) / 2; + } + /** Add an extra page for compressed page scratch area. */ void* io_buffer = mem_alloc( @@ -6464,9 +6634,45 @@ fil_tablespace_iterate( iter.io_buffer = static_cast<byte*>( ut_align(io_buffer, UNIV_PAGE_SIZE)); + void* crypt_io_buffer = NULL; + if (iter.crypt_data != NULL) { + crypt_io_buffer = mem_alloc( + iter.n_io_buffers * UNIV_PAGE_SIZE); + iter.crypt_io_buffer = static_cast<byte*>( + crypt_io_buffer); + } + err = fil_iterate(iter, &block, callback); mem_free(io_buffer); + + if (iter.crypt_data != NULL) { + /* clear crypt data from page 0 and write it back */ + os_file_read(file, page, 0, UNIV_PAGE_SIZE, 0); + fil_space_clear_crypt_data(page, crypt_data_offset); + lsn_t lsn = mach_read_from_8(page + FIL_PAGE_LSN); + if (callback.get_zip_size() == 0) { + buf_flush_init_for_writing( + page, 0, lsn); + } else { + buf_flush_update_zip_checksum( + page, callback.get_zip_size(), lsn); + } + + if (!os_file_write( + iter.filepath, iter.file, page, + 0, iter.page_size)) { + + ib_logf(IB_LOG_LEVEL_ERROR, + "os_file_write() failed"); + + return(DB_IO_ERROR); + } + + mem_free(crypt_io_buffer); + iter.crypt_io_buffer = NULL; + fil_space_destroy_crypt_data(&iter.crypt_data); + } } if (err == DB_SUCCESS) { @@ -6700,6 +6906,16 @@ fil_space_name( } /*******************************************************************//** +Return space flags */ +ulint +fil_space_flags( +/*===========*/ + fil_space_t* space) /*!< in: space */ +{ + return (space->flags); +} + +/*******************************************************************//** Return page type name */ const char* fil_get_page_type_name( @@ -6752,3 +6968,137 @@ fil_node_get_block_size( { return (node->file_block_size); } + +/****************************************************************** +Get id of first tablespace or ULINT_UNDEFINED if none */ +UNIV_INTERN +ulint +fil_get_first_space() +{ + ulint out_id = ULINT_UNDEFINED; + fil_space_t* space; + + mutex_enter(&fil_system->mutex); + + space = UT_LIST_GET_FIRST(fil_system->space_list); + if (space != NULL) { + do + { + if (!space->stop_new_ops) { + out_id = space->id; + break; + } + space = UT_LIST_GET_NEXT(space_list, space); + } while (space != NULL); + } + + mutex_exit(&fil_system->mutex); + + return out_id; +} + +/****************************************************************** +Get id of next tablespace or ULINT_UNDEFINED if none */ +UNIV_INTERN +ulint +fil_get_next_space(ulint id) +{ + bool found; + fil_space_t* space; + ulint out_id = ULINT_UNDEFINED; + + mutex_enter(&fil_system->mutex); + + space = fil_space_get_by_id(id); + if (space == NULL) { + /* we didn't find it...search for space with space->id > id */ + found = false; + space = UT_LIST_GET_FIRST(fil_system->space_list); + } else { + /* we found it, take next available space */ + found = true; + } + + while ((space = UT_LIST_GET_NEXT(space_list, space)) != NULL) { + + if (!found && space->id <= id) + continue; + + if (!space->stop_new_ops) { + /* inc reference to prevent drop */ + out_id = space->id; + break; + } + } + + mutex_exit(&fil_system->mutex); + + return out_id; +} + +/****************************************************************** +Get crypt data for a tablespace */ +UNIV_INTERN +fil_space_crypt_t* +fil_space_get_crypt_data( +/*==================*/ + ulint id) /*!< in: space id */ +{ + fil_space_t* space; + fil_space_crypt_t* crypt_data = NULL; + + ut_ad(fil_system); + + mutex_enter(&fil_system->mutex); + + space = fil_space_get_by_id(id); + if (space != NULL) { + crypt_data = space->crypt_data; + } + + mutex_exit(&fil_system->mutex); + + return(crypt_data); +} + +/****************************************************************** +Get crypt data for a tablespace */ +UNIV_INTERN +void +fil_space_set_crypt_data( +/*==================*/ + ulint id, /*!< in: space id */ + fil_space_crypt_t* crypt_data) /*!< in: crypt data */ +{ + fil_space_t* space; + fil_space_crypt_t* old_crypt_data = NULL; + + ut_ad(fil_system); + + mutex_enter(&fil_system->mutex); + + space = fil_space_get_by_id(id); + if (space != NULL) { + + if (space->crypt_data != NULL) { + ut_a(!fil_space_crypt_compare(crypt_data, + space->crypt_data)); + old_crypt_data = space->crypt_data; + } + + space->crypt_data = crypt_data; + } else { + /* there is a small risk that tablespace has been deleted */ + old_crypt_data = crypt_data; + } + + mutex_exit(&fil_system->mutex); + + if (old_crypt_data != NULL) { + /* first assign space->crypt_data + * then destroy old_crypt_data when no new references to + * it can be created. + */ + fil_space_destroy_crypt_data(&old_crypt_data); + } +} diff --git a/storage/xtradb/fil/fil0pagecompress.cc b/storage/xtradb/fil/fil0pagecompress.cc index c1d476126c6..fa25d8875ae 100644 --- a/storage/xtradb/fil/fil0pagecompress.cc +++ b/storage/xtradb/fil/fil0pagecompress.cc @@ -269,14 +269,24 @@ fil_compress_page( int level = 0; ulint header_len = FIL_PAGE_DATA + FIL_PAGE_COMPRESSED_SIZE; ulint write_size=0; - ulint comp_method = innodb_compression_algorithm; /* Cache to avoid - change during - function execution */ + /* Cache to avoid change during function execution */ + ulint comp_method = innodb_compression_algorithm; + ulint orig_page_type; + ut_ad(buf); ut_ad(out_buf); ut_ad(len); ut_ad(out_len); + /* read original page type */ + orig_page_type = mach_read_from_2(buf + FIL_PAGE_TYPE); + + /* Let's not compress file space header or + extent descriptor */ + if ((orig_page_type == FIL_PAGE_TYPE_FSP_HDR) || (orig_page_type == FIL_PAGE_TYPE_XDES) ) { + *out_len = len; + return (buf); + } level = compression_level; ut_ad(fil_space_is_page_compressed(space_id)); @@ -419,7 +429,7 @@ fil_compress_page( /* Set up the correct page type */ mach_write_to_2(out_buf+FIL_PAGE_TYPE, FIL_PAGE_PAGE_COMPRESSED); /* Set up the flush lsn to be compression algorithm */ - mach_write_to_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN, comp_method); + mach_write_to_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, comp_method); /* Set up the actual payload lenght */ mach_write_to_2(out_buf+FIL_PAGE_DATA, write_size); @@ -428,7 +438,7 @@ fil_compress_page( ut_ad(fil_page_is_compressed(out_buf)); ut_ad(mach_read_from_4(out_buf+FIL_PAGE_SPACE_OR_CHKSUM) == BUF_NO_CHECKSUM_MAGIC); ut_ad(mach_read_from_2(out_buf+FIL_PAGE_DATA) == write_size); - ut_ad(mach_read_from_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN) == (ulint)comp_method); + ut_ad(mach_read_from_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION) == (ulint)comp_method); /* Verify that page can be decompressed */ { @@ -470,7 +480,6 @@ fil_compress_page( space_id, fil_space_name(space), len, write_size); #endif /* UNIV_PAGECOMPRESS_DEBUG */ - srv_stats.page_compression_saved.add((len - write_size)); srv_stats.pages_page_compressed.inc(); @@ -552,7 +561,7 @@ fil_decompress_page( } /* Get compression algorithm */ - compression_alg = mach_read_from_8(buf+FIL_PAGE_FILE_FLUSH_LSN); + compression_alg = mach_read_from_8(buf+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); /* Get the actual size of compressed page */ actual_size = mach_read_from_2(buf+FIL_PAGE_DATA); @@ -722,5 +731,3 @@ fil_decompress_page( ut_free(in_buf); } } - - diff --git a/storage/xtradb/fil/fil0pageencryption.cc b/storage/xtradb/fil/fil0pageencryption.cc new file mode 100644 index 00000000000..49c42615e19 --- /dev/null +++ b/storage/xtradb/fil/fil0pageencryption.cc @@ -0,0 +1,628 @@ +/***************************************************************************** + +Copyright (C) 2014 eperi GmbH. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*****************************************************************************/ + +/***************************************************************** + @file fil/fil0pageencryption.cc + Implementation for page encryption file spaces. + + Created 08/25/2014 Ludger Göckel eperi-GmbH + Modified 11/26/2014 Jan Lindström MariaDB Corporation + ***********************************************************************/ + +#include "fil0fil.h" +#include "fil0pageencryption.h" +#include "fsp0pageencryption.h" +#include "my_dbug.h" +#include "page0zip.h" + +#include "buf0checksum.h" +#include <my_global.h> +#include <my_aes.h> +#include <math.h> + +/* + * derived from libFLAC, which is gpl v2 + */ +byte crc_table[] = { + 0x00,0x07,0x0E,0x09,0x1C,0x1B,0x12,0x15,0x38,0x3F,0x36,0x31,0x24,0x23,0x2A,0x2D,0x70,0x77,0x7E,0x79, + 0x6C,0x6B,0x62,0x65,0x48,0x4F,0x46,0x41,0x54,0x53,0x5A,0x5D,0xE0,0xE7,0xEE,0xE9,0xFC,0xFB,0xF2,0xF5, + 0xD8,0xDF,0xD6,0xD1,0xC4,0xC3,0xCA,0xCD,0x90,0x97,0x9E,0x99,0x8C,0x8B,0x82,0x85,0xA8,0xAF,0xA6,0xA1, + 0xB4,0xB3,0xBA,0xBD,0xC7,0xC0,0xC9,0xCE,0xDB,0xDC,0xD5,0xD2,0xFF,0xF8,0xF1,0xF6,0xE3,0xE4,0xED,0xEA, + 0xB7,0xB0,0xB9,0xBE,0xAB,0xAC,0xA5,0xA2,0x8F,0x88,0x81,0x86,0x93,0x94,0x9D,0x9A,0x27,0x20,0x29,0x2E, + 0x3B,0x3C,0x35,0x32,0x1F,0x18,0x11,0x16,0x03,0x04,0x0D,0x0A,0x57,0x50,0x59,0x5E,0x4B,0x4C,0x45,0x42, + 0x6F,0x68,0x61,0x66,0x73,0x74,0x7D,0x7A,0x89,0x8E,0x87,0x80,0x95,0x92,0x9B,0x9C,0xB1,0xB6,0xBF,0xB8, + 0xAD,0xAA,0xA3,0xA4,0xF9,0xFE,0xF7,0xF0,0xE5,0xE2,0xEB,0xEC,0xC1,0xC6,0xCF,0xC8,0xDD,0xDA,0xD3,0xD4, + 0x69,0x6E,0x67,0x60,0x75,0x72,0x7B,0x7C,0x51,0x56,0x5F,0x58,0x4D,0x4A,0x43,0x44,0x19,0x1E,0x17,0x10, + 0x05,0x02,0x0B,0x0C,0x21,0x26,0x2F,0x28,0x3D,0x3A,0x33,0x34,0x4E,0x49,0x40,0x47,0x52,0x55,0x5C,0x5B, + 0x76,0x71,0x78,0x7F,0x6A,0x6D,0x64,0x63,0x3E,0x39,0x30,0x37,0x22,0x25,0x2C,0x2B,0x06,0x01,0x08,0x0F, + 0x1A,0x1D,0x14,0x13,0xAE,0xA9,0xA0,0xA7,0xB2,0xB5,0xBC,0xBB,0x96,0x91,0x98,0x9F,0x8A,0x8D,0x84,0x83, + 0xDE,0xD9,0xD0,0xD7,0xC2,0xC5,0xCC,0xCB,0xE6,0xE1,0xE8,0xEF,0xFA,0xFD,0xF4,0xF3 + +}; + +/****************************************************************//** +Calculate checksum for encrypted pages +@return checksum */ +static +byte +fil_page_encryption_calc_checksum( +/*==============================*/ + unsigned char* buf, /*!<in: buffer where to calculate checksum */ + ulint len) /*!<in: buffer length */ +{ + byte crc = 0; + for (ulint i = 0; i < len; i++) { + crc = crc_table[(crc ^ buf[i]) & 0xff]; + } + return crc; +} + +/****************************************************************//** +Recalculate checksum for encrypted pages */ +static +void +do_check_sum( +/*=========*/ + ulint page_size, /*!< in: page size */ + ulint zip_size, /*!< in: compressed page size */ + byte* buf) /*!< in: buffer */ +{ + ib_uint32_t checksum = 0; + + if (zip_size) { + checksum = page_zip_calc_checksum(buf,zip_size, + static_cast<srv_checksum_algorithm_t>( + srv_checksum_algorithm)); + + mach_write_to_4(buf + FIL_PAGE_SPACE_OR_CHKSUM, checksum); + return; + } + + switch ((srv_checksum_algorithm_t) srv_checksum_algorithm) { + case SRV_CHECKSUM_ALGORITHM_CRC32: + case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32: + checksum = buf_calc_page_crc32(buf); + break; + case SRV_CHECKSUM_ALGORITHM_INNODB: + case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB: + checksum = (ib_uint32_t) buf_calc_page_new_checksum(buf); + break; + case SRV_CHECKSUM_ALGORITHM_NONE: + case SRV_CHECKSUM_ALGORITHM_STRICT_NONE: + checksum = BUF_NO_CHECKSUM_MAGIC; + break; + /* no default so the compiler will emit a warning if new enum + is added and not handled here */ + } + + mach_write_to_4(buf + FIL_PAGE_SPACE_OR_CHKSUM, checksum); + + /* old style checksum is omitted */ +} + +/****************************************************************//** + For page encrypted pages encrypt the page before actual write + operation. + + Note, that FIL_PAGE_TYPE_FSP_HDR and FIL_PAGE_TYPE_XDES type pages + are not encrypted! + + Pages are encrypted with AES/CBC/NoPadding algorithm. + + "No padding" is used to ensure, that the encrypted page does not + exceed the page size. If "no padding" is used, the input for encryption + must be of size (multiple * AES blocksize). AES Blocksize is usually 16 + (bytes). + + Everything in the page is encrypted except for the 38 byte FIL header. + Since the length of the payload is not a multiple of the AES blocksize, + and to ensure that every byte of the payload is encrypted, two encryption + operations are done. Each time with a block of adequate size as input. + 1st block contains everything from beginning of payload bytes except for + the remainder. 2nd block is of size 64 and contains the remainder and + the last (64 - sizeof(remainder)) bytes of the encrypted 1st block. + + Each encrypted page receives a new page type for PAGE_ENCRYPTION. + The original page type (2 bytes) is stored in the Checksum header of the + page (position FIL_PAGE_SPACE_OR_CHKSUM). Additionally the encryption + key identifier is stored in the Checksum Header. This uses 1 byte. + Checksum verification for encrypted pages is disabled. This checksum + should be restored after decryption. + + To be able to verify decryption in a later stage, a 1-byte checksum at + position 4 of the FIL_PAGE_SPACE_OR_CHKSUM header is stored. + For page compressed table pages the log base 2 of the length of the + encrypted data is stored. + + @return encrypted page or original page if encryption failed to be + written*/ +UNIV_INTERN +byte* +fil_encrypt_page( +/*==============*/ + ulint space_id, /*!< in: tablespace id of the table. */ + byte* buf, /*!< in: buffer from which to write; in aio + this must be appropriately aligned */ + byte* out_buf, /*!< out: encrypted buffer */ + ulint len, /*!< in: length of input buffer.*/ + ulint encryption_key, /*!< in: encryption key */ + ulint* out_len, /*!< out: actual length of encrypted page */ + ulint* errorCode, /*!< out: an error code. set, + if page is intentionally not encrypted */ + byte* tmp_encryption_buf) /*!< in: temporary buffer or NULL */ +{ + + int err = AES_OK; + int key = 0; + uint32 data_size = 0; + ulint orig_page_type = 0; + uint32 write_size = 0; + fil_space_t* space = NULL; + byte* tmp_buf = NULL; + ulint page_len = 0; + ulint offset = 0; + + ut_ad(buf);ut_ad(out_buf); + key = encryption_key; + + *errorCode = AES_OK; + + ut_ad(fil_space_is_page_encrypted(space_id)); + fil_system_enter(); + space = fil_space_get_by_id(space_id); + fil_system_exit(); + +#ifdef UNIV_DEBUG_PAGEENCRYPTION + ulint pageno = mach_read_from_4(buf + FIL_PAGE_OFFSET); + fprintf(stderr, + "InnoDB: Note: Preparing for encryption for space %lu name %s len %lu, page no %lu\n", + space_id, fil_space_name(space), len, pageno); +#endif /* UNIV_DEBUG_PAGEENCRYPTION */ + + /* read original page type */ + orig_page_type = mach_read_from_2(buf + FIL_PAGE_TYPE); + + /* Do not encrypt file space header or extend descriptor */ + if ((orig_page_type == FIL_PAGE_TYPE_FSP_HDR) + || (orig_page_type == FIL_PAGE_TYPE_XDES) ) { + *errorCode = PAGE_ENCRYPTION_WILL_NOT_ENCRYPT; + *out_len = len; + return (buf); + } + + if (FIL_PAGE_PAGE_COMPRESSED == orig_page_type) { + page_len = log10((double)len)/log10((double)2); + } + + byte checksum_byte = fil_page_encryption_calc_checksum(buf + FIL_PAGE_DATA, len - FIL_PAGE_DATA); + + /* data_size bytes will be encrypted at first. + * data_size will be the length of the cipher text since no padding is used.*/ + data_size = ((len - FIL_PAGE_DATA - FIL_PAGE_DATA_END) / MY_AES_BLOCK_SIZE) * MY_AES_BLOCK_SIZE; + + + unsigned char rkey[GetCryptoKeySize(encryption_key)]; + uint key_len = sizeof(rkey); + + unsigned char iv[16]; + uint iv_len = sizeof(iv); + + if (!HasCryptoKey(encryption_key)) { + err = PAGE_ENCRYPTION_KEY_MISSING; + } else { + int rc; + + rc = GetCryptoKey(encryption_key, rkey, key_len); + if (rc != AES_OK) + { + err = PAGE_ENCRYPTION_KEY_MISSING; + } + + rc = GetCryptoIV(encryption_key, iv, iv_len); + if (rc != AES_OK) + { + err = PAGE_ENCRYPTION_KEY_MISSING; + } + } + + /* 1st encryption: data_size bytes starting from FIL_PAGE_DATA */ + if (err == AES_OK) { + err = my_aes_encrypt_dynamic( + (uchar*) buf + FIL_PAGE_DATA, + data_size, + (uchar *) out_buf + FIL_PAGE_DATA, + &write_size, + (const unsigned char *) rkey, + key_len, + (const unsigned char *) iv, + iv_len, + 1); + + ut_ad(write_size == data_size); + + if (err == AES_OK) { + /* copy remaining bytes from input buffer to output buffer. + * Note, that this copies the final 8 bytes of a + * page, which consists of the + * Old-style checksum and the "Low 32 bits of LSN */ + memcpy(out_buf + FIL_PAGE_DATA + data_size, + buf + FIL_PAGE_DATA + data_size , + len - FIL_PAGE_DATA -data_size); + + if (tmp_encryption_buf == NULL) { + //create temporary buffer for 2nd encryption + tmp_buf = static_cast<byte *>(ut_malloc(64)); + } else { + tmp_buf = tmp_encryption_buf; + } + + /* 2nd encryption: 64 bytes from out_buf, + result length is 64 bytes */ + err = my_aes_encrypt_dynamic((uchar*)out_buf + len -offset -64, + 64, + (uchar*)tmp_buf, + &write_size, + (const unsigned char *)rkey, + key_len, + (const unsigned char *)iv, + iv_len, 1); + ut_ad(write_size == 64); + + /* copy 64 bytes from 2nd encryption to out_buf*/ + memcpy(out_buf + len - offset -64, tmp_buf, 64); + } + + } + + /* error handling */ + if (err != AES_OK) { + /* If an error occurred we leave the actual page as it was */ + + fprintf(stderr, + "InnoDB: Warning: Encryption failed for space %lu " + "name %s len %lu rt %d write %lu, error: %d\n", + space_id, fil_space_name(space), len, err, (ulint)data_size, err); + fflush(stderr); + srv_stats.pages_page_encryption_error.inc(); + *out_len = len; + + /* free temporary buffer */ + if (tmp_buf!=NULL && tmp_encryption_buf == NULL) { + ut_free(tmp_buf); + } + *errorCode = err; + + return (buf); + } + + /* Set up the page header. Copied from input buffer*/ + memcpy(out_buf, buf, FIL_PAGE_DATA); + + /* Set up the correct page type */ + mach_write_to_2(out_buf + FIL_PAGE_TYPE, FIL_PAGE_PAGE_ENCRYPTED); + + /* The 1st checksum field is used to store original page type, etc. + * checksum check for page encrypted pages is omitted. + */ + + /* Set up the encryption key. Written to the 1st byte of + the checksum header field. This header is currently used to store data. */ + mach_write_to_1(out_buf + FIL_PAGE_SPACE_OR_CHKSUM, key); + + /* store original page type. Written to 2nd and 3rd byte + of the checksum header field */ + mach_write_to_2(out_buf + FIL_PAGE_SPACE_OR_CHKSUM + 1, orig_page_type); + + if (FIL_PAGE_PAGE_COMPRESSED == orig_page_type) { + /* set byte 4 of checksum field to page length (ln(len)) */ + memset(out_buf + FIL_PAGE_SPACE_OR_CHKSUM + 3, page_len, 1); + } else { + /* set byte 4 of checksum field to checksum byte */ + memset(out_buf + FIL_PAGE_SPACE_OR_CHKSUM + 3, checksum_byte, 1); + } + +#ifdef UNIV_DEBUG + /* Verify */ + ut_ad(fil_page_is_encrypted(out_buf)); + +#endif /* UNIV_DEBUG */ + + srv_stats.pages_page_encrypted.inc(); + *out_len = len; + + /* free temporary buffer */ + if (tmp_buf!=NULL && tmp_encryption_buf == NULL) { + ut_free(tmp_buf); + } + + return (out_buf); +} + +/****************************************************************//** + For page encrypted pages decrypt the page after actual read + operation. + + See fil_encrypt_page for details, how the encryption works. + + If the decryption can be verified, original page should be completely restored. + This includes original page type, 4-byte checksum field at page start. + If it is not a page compressed table's page, decryption is verified against + a 1-byte checksum built over the plain data bytes. If this verification + fails, an error state is returned. + + @return decrypted page */ +ulint +fil_decrypt_page( +/*=============*/ + byte* page_buf, /*!< in: preallocated buffer or NULL */ + byte* buf, /*!< in/out: buffer from which to read; in aio + this must be appropriately aligned */ + ulint len, /*!< in: length buffer, which should be decrypted.*/ + ulint* write_size, /*!< out: size of the decrypted + data. If no error occurred equal to len */ + ibool* page_compressed,/*!<out: is page compressed.*/ + byte* tmp_encryption_buf) /*!< in: temporary buffer or NULL */ +{ + int err = AES_OK; + ulint page_decryption_key; + uint32 data_size = 0; + ulint orig_page_type = 0; + uint32 tmp_write_size = 0; + ulint offset = 0; + byte *in_buf = NULL; + byte *tmp_buf = NULL; + fil_space_t* space = NULL; + + ulint page_compression_flag = 0; + + ut_ad(buf); + ut_ad(len); + + /* Before actual decrypt, make sure that page type is correct */ + ulint current_page_type = mach_read_from_2(buf + FIL_PAGE_TYPE); + + if ((current_page_type == FIL_PAGE_TYPE_FSP_HDR) + || (current_page_type == FIL_PAGE_TYPE_XDES)) { + /* assumed as unencrypted */ + if (write_size!=NULL) { + *write_size = len; + } + return AES_OK; + } + + if (current_page_type != FIL_PAGE_PAGE_ENCRYPTED) { + + fprintf(stderr, "InnoDB: Corruption: We try to decrypt corrupted page\n" + "InnoDB: CRC %lu type %lu.\n" + "InnoDB: len %lu\n", + mach_read_from_4(buf + FIL_PAGE_SPACE_OR_CHKSUM), + mach_read_from_2(buf + FIL_PAGE_TYPE), len); + + fflush(stderr); + return PAGE_ENCRYPTION_WRONG_PAGE_TYPE; + } + + /* 1st checksum field is used to store original page type, etc. + * checksum check for page encrypted pages is omitted. + */ + + /* read page encryption key */ + page_decryption_key = mach_read_from_1(buf + FIL_PAGE_SPACE_OR_CHKSUM); + + /* Get the page type */ + orig_page_type = mach_read_from_2(buf + FIL_PAGE_SPACE_OR_CHKSUM + 1); + + /* read checksum byte */ + byte stored_checksum_byte = mach_read_from_1(buf + FIL_PAGE_SPACE_OR_CHKSUM + 3); + + if (FIL_PAGE_PAGE_COMPRESSED == orig_page_type) { + if (page_compressed != NULL) { + *page_compressed = 1L; + } + page_compression_flag = 1; + len = pow((double)2, (double)((int)stored_checksum_byte)); + offset = 0; + } + + data_size = ((len - FIL_PAGE_DATA - FIL_PAGE_DATA_END) / MY_AES_BLOCK_SIZE) * MY_AES_BLOCK_SIZE; + + + unsigned char rkey[GetCryptoKeySize(page_decryption_key)]; + uint key_len = sizeof(rkey); + + unsigned char iv[16]; + uint iv_len = sizeof(iv); + + if (!HasCryptoKey(page_decryption_key)) { + err = PAGE_ENCRYPTION_KEY_MISSING; + } else { + int rc; + + rc = GetCryptoKey(page_decryption_key, rkey, key_len); + if (rc != AES_OK) + { + err = PAGE_ENCRYPTION_KEY_MISSING; + } + + rc = GetCryptoIV(page_decryption_key, iv, iv_len); + if (rc != AES_OK) + { + err = PAGE_ENCRYPTION_KEY_MISSING; + } + } + + + if (err != AES_OK) { + /* surely key could not be determined. */ + fprintf(stderr, "InnoDB: Corruption: Page is marked as encrypted\n" + "InnoDB: but decrypt failed with error %d, encryption key %d.\n", + err, (int)page_decryption_key); + fflush(stderr); + + return err; + } + + if (tmp_encryption_buf == NULL) { + tmp_buf= static_cast<byte *>(ut_malloc(64)); + } else { + tmp_buf = tmp_encryption_buf; + } + + // If no buffer was given, we need to allocate temporal buffer + if (page_buf == NULL) { +#ifdef UNIV_PAGECOMPRESS_DEBUG + fprintf(stderr, + "InnoDB: Note: FIL: Encryption buffer not given, allocating...\n"); +#endif /* UNIV_PAGECOMPRESS_DEBUG */ + in_buf = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE*2)); + } else { + in_buf = page_buf; + } + + /* 1st decryption: 64 bytes */ + /* 64 bytes from data area are copied to temporary buffer. + * These are the last 64 of the (encrypted) payload */ + memcpy(tmp_buf, buf + len - offset - 64, 64); + + err = my_aes_decrypt_dynamic( + (const uchar*) tmp_buf, + 64, + (uchar *) in_buf + len - offset - 64, + &tmp_write_size, + (const unsigned char *) rkey, + key_len, + (const unsigned char *) iv, + iv_len, + 1); + + ut_ad(tmp_write_size == 64); + + /* If decrypt fails it means that page is corrupted or has an unknown key */ + if (err != AES_OK) { + fprintf(stderr, "InnoDB: Corruption: Page is marked as encrypted\n" + "InnoDB: but decrypt failed with error %d.\n" + "InnoDB: size %lu len %lu, key %d\n", err, (ulint)data_size, + len, (int)page_decryption_key); + fflush(stderr); + + if (tmp_encryption_buf == NULL) { + ut_free(tmp_buf); + } + + if (page_buf == NULL) { + ut_free(in_buf); + } + return err; + } + + ut_ad(tmp_write_size == 64); + + /* copy 1st part of payload from buf to in_buf */ + /* do not override result of 1st decryption */ + memcpy(in_buf + FIL_PAGE_DATA, buf + FIL_PAGE_DATA, len -offset -64 - FIL_PAGE_DATA); + + + /* Decrypt rest of the page */ + err = my_aes_decrypt_dynamic((uchar*) in_buf + FIL_PAGE_DATA, + data_size, + (uchar *) buf + FIL_PAGE_DATA, + &tmp_write_size, + (const unsigned char *)&rkey, + key_len, + (const unsigned char *)&iv, + iv_len, + 1); + + ut_ad(tmp_write_size = data_size); + + /* copy remaining bytes from in_buf to buf. + */ + ulint bytes_to_copy = len - FIL_PAGE_DATA - data_size - offset; + memcpy(buf + FIL_PAGE_DATA + data_size, in_buf + FIL_PAGE_DATA + data_size, bytes_to_copy); + + /* apart from header data everything is now in in_buf */ + + if (tmp_encryption_buf == NULL) { + ut_free(tmp_buf); + } + +#ifdef UNIV_PAGEENCRIPTION_DEBUG + fprintf(stderr, "InnoDB: Note: Decryption succeeded for len %lu\n", len); + fflush(stderr); +#endif + + if (page_buf == NULL) { + ut_free(in_buf); + } + + /* setting original page type */ + mach_write_to_2(buf + FIL_PAGE_TYPE, orig_page_type); + + ulint pageno = mach_read_from_4(buf + FIL_PAGE_OFFSET); + ulint flags = 0; + ulint zip_size = 0; + + /* please note, that page with number 0 is not encrypted */ + if (pageno == 0 ) { + flags = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + buf); + } else { + ulint space_id = mach_read_from_4(buf + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); + fil_system_enter(); + space = fil_space_get_by_id(space_id); + flags = fil_space_flags(space); + fil_system_exit(); + } + + if (!(page_compression_flag)) { + zip_size = fsp_flags_get_zip_size(flags); + } + + if (write_size!=NULL) { + *write_size = len; + } + + if (!(page_compression_flag)) { + byte checksum_byte = fil_page_encryption_calc_checksum(buf + FIL_PAGE_DATA, len - FIL_PAGE_DATA); + + if (checksum_byte != stored_checksum_byte) { + err = PAGE_ENCRYPTION_WRONG_KEY; + fprintf(stderr, "InnoDB: Corruption: Page is marked as encrypted\n" + "InnoDB: but decryption verification failed with error %d," + " encryption key %d.\n", + err, (int)page_decryption_key); + fflush(stderr); + return err; + } + + /* calc check sums and write to the buffer, if page is not of type PAGE_COMPRESSED. + * if the decryption is verified, it is assumed that the + * original page was restored, re-calculating the original + * checksums should be ok + */ + do_check_sum(len, zip_size, buf); + } else { + /* page_compression uses BUF_NO_CHECKSUM_MAGIC as checksum */ + mach_write_to_4(buf + FIL_PAGE_SPACE_OR_CHKSUM, BUF_NO_CHECKSUM_MAGIC); + } + + srv_stats.pages_page_decrypted.inc(); + + return err; +} + + |