summaryrefslogtreecommitdiff
path: root/storage/xtradb/fil/fil0crypt.cc
diff options
context:
space:
mode:
Diffstat (limited to 'storage/xtradb/fil/fil0crypt.cc')
-rw-r--r--storage/xtradb/fil/fil0crypt.cc2433
1 files changed, 2433 insertions, 0 deletions
diff --git a/storage/xtradb/fil/fil0crypt.cc b/storage/xtradb/fil/fil0crypt.cc
new file mode 100644
index 00000000000..e34297f4f86
--- /dev/null
+++ b/storage/xtradb/fil/fil0crypt.cc
@@ -0,0 +1,2433 @@
+#include "fil0fil.h"
+#include "srv0srv.h"
+#include "srv0start.h"
+#include "mach0data.h"
+#include "log0recv.h"
+#include "mtr0mtr.h"
+#include "mtr0log.h"
+#include "page0zip.h"
+#include "ut0ut.h"
+#include "btr0scrub.h"
+#include "fsp0fsp.h"
+#include "fil0pagecompress.h"
+#include "fil0pageencryption.h"
+
+#include <my_crypt.h>
+#include <my_crypt_key_management.h>
+
+#include <my_aes.h>
+#include <math.h>
+
+
+/** Mutex for keys */
+UNIV_INTERN ib_mutex_t fil_crypt_key_mutex;
+
+#ifdef UNIV_PFS_MUTEX
+UNIV_INTERN mysql_pfs_key_t fil_crypt_key_mutex_key;
+#endif
+
+/** Is encryption enabled/disabled */
+UNIV_INTERN my_bool srv_encrypt_tables = FALSE;
+
+/** No of key rotation threads requested */
+UNIV_INTERN uint srv_n_fil_crypt_threads = 0;
+
+/** No of key rotation threads started */
+static uint srv_n_fil_crypt_threads_started = 0;
+
+/** At this age or older a space/page will be rotated */
+UNIV_INTERN uint srv_fil_crypt_rotate_key_age = 1;
+
+/** Event to signal FROM the key rotation threads. */
+UNIV_INTERN os_event_t fil_crypt_event;
+
+/** Event to signal TO the key rotation threads. */
+UNIV_INTERN os_event_t fil_crypt_threads_event;
+
+/** Event for waking up threads throttle */
+UNIV_INTERN os_event_t fil_crypt_throttle_sleep_event;
+
+/** Mutex for key rotation threads */
+UNIV_INTERN ib_mutex_t fil_crypt_threads_mutex;
+
+#ifdef UNIV_PFS_MUTEX
+UNIV_INTERN mysql_pfs_key_t fil_crypt_threads_mutex_key;
+#endif
+
+/** Variable ensuring only 1 thread at time does initial conversion */
+static bool fil_crypt_start_converting = false;
+
+/** Variables for throttling */
+UNIV_INTERN uint srv_n_fil_crypt_iops = 100; // 10ms per iop
+static uint srv_alloc_time = 3; // allocate iops for 3s at a time
+static uint n_fil_crypt_iops_allocated = 0;
+
+/** Variables for scrubbing */
+extern uint srv_background_scrub_data_interval;
+extern uint srv_background_scrub_data_check_interval;
+
+#define DEBUG_KEYROTATION_THROTTLING 0
+
+/** Statistics variables */
+static fil_crypt_stat_t crypt_stat;
+static ib_mutex_t crypt_stat_mutex;
+
+#ifdef UNIV_PFS_MUTEX
+UNIV_INTERN mysql_pfs_key_t fil_crypt_stat_mutex_key;
+#endif
+
+/**
+ * key for crypt data mutex
+*/
+#ifdef UNIV_PFS_MUTEX
+UNIV_INTERN mysql_pfs_key_t fil_crypt_data_mutex_key;
+#endif
+
+/**
+* Magic pattern in start of crypt data on page 0
+*/
+#define MAGIC_SZ 6
+
+static const unsigned char CRYPT_MAGIC[MAGIC_SZ] = {
+ 's', 0xE, 0xC, 'R', 'E', 't' };
+
+static const unsigned char EMPTY_PATTERN[MAGIC_SZ] = {
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
+
+/**
+ * CRYPT_SCHEME_UNENCRYPTED
+ *
+ * Used as intermediate state when convering a space from unencrypted
+ * to encrypted
+ */
+#define CRYPT_SCHEME_UNENCRYPTED 0
+
+/**
+ * CRYPT_SCHEME_1
+ *
+ * L = AES_ECB(KEY, IV)
+ * CRYPT(PAGE) = AES_CRT(KEY=L, IV=C, PAGE)
+ */
+#define CRYPT_SCHEME_1 1
+#define CRYPT_SCHEME_1_IV_LEN 16
+// cached L given key_version
+struct key_struct
+{
+ uint key_version;
+ byte key[CRYPT_SCHEME_1_IV_LEN];
+};
+
+struct fil_space_rotate_state_t
+{
+ time_t start_time; // time when rotation started
+ ulint active_threads; // active threads in space
+ ulint next_offset; // next "free" offset
+ ulint max_offset; // max offset needing to be rotated
+ uint min_key_version_found; // min key version found but not rotated
+ lsn_t end_lsn; // max lsn created when rotating this space
+ bool starting; // initial write of IV
+ bool flushing; // space is being flushed at end of rotate
+ struct {
+ bool is_active; // is scrubbing active in this space
+ time_t last_scrub_completed; // when was last scrub completed
+ } scrubbing;
+};
+
+struct fil_space_crypt_struct
+{
+ ulint type; // CRYPT_SCHEME
+ uint keyserver_requests; // no of key requests to key server
+ uint key_count; // No of initalized key-structs
+ key_struct keys[3]; // cached L = AES_ECB(KEY, IV)
+ uint min_key_version; // min key version for this space
+ ulint page0_offset; // byte offset on page 0 for crypt data
+
+ ib_mutex_t mutex; // mutex protecting following variables
+ bool closing; // is tablespace being closed
+ fil_space_rotate_state_t rotate_state;
+
+ uint iv_length; // length of IV
+ byte iv[1]; // IV-data
+};
+
+/*********************************************************************
+Init space crypt */
+UNIV_INTERN
+void
+fil_space_crypt_init()
+{
+ mutex_create(fil_crypt_key_mutex_key,
+ &fil_crypt_key_mutex, SYNC_NO_ORDER_CHECK);
+
+ fil_crypt_throttle_sleep_event = os_event_create();
+
+ mutex_create(fil_crypt_stat_mutex_key,
+ &crypt_stat_mutex, SYNC_NO_ORDER_CHECK);
+ memset(&crypt_stat, 0, sizeof(crypt_stat));
+}
+
+/*********************************************************************
+Cleanup space crypt */
+UNIV_INTERN
+void
+fil_space_crypt_cleanup()
+{
+ os_event_free(fil_crypt_throttle_sleep_event);
+}
+
+/******************************************************************
+Get key bytes for a space/key-version */
+static
+void
+fil_crypt_get_key(byte *dst, uint* key_length,
+ fil_space_crypt_t* crypt_data, uint version, bool page_encrypted)
+{
+ unsigned char keybuf[MY_AES_MAX_KEY_LENGTH];
+ unsigned char iv[CRYPT_SCHEME_1_IV_LEN];
+ ulint iv_len = sizeof(iv);
+
+ if (!page_encrypted) {
+ mutex_enter(&crypt_data->mutex);
+
+ // Check if we already have key
+ for (uint i = 0; i < crypt_data->key_count; i++) {
+ if (crypt_data->keys[i].key_version == version) {
+ memcpy(dst, crypt_data->keys[i].key,
+ sizeof(crypt_data->keys[i].key));
+ mutex_exit(&crypt_data->mutex);
+ return;
+ }
+ }
+ // Not found!
+ crypt_data->keyserver_requests++;
+
+ // Rotate keys to make room for a new
+ for (uint i = 1; i < array_elements(crypt_data->keys); i++) {
+ crypt_data->keys[i] = crypt_data->keys[i - 1];
+ }
+ }
+ else
+ {
+ // load iv
+ int rc = GetCryptoIV(version, (unsigned char*)iv, iv_len);
+ fprintf(stderr, " %d\n",rc);
+
+ if (rc != CRYPT_KEY_OK) {
+ ib_logf(IB_LOG_LEVEL_FATAL,
+ "IV %d can not be found. Reason=%d", version, rc);
+ ut_error;
+ }
+ }
+
+ if (HasCryptoKey(version)) {
+ *key_length = GetCryptoKeySize(version);
+
+ int rc = GetCryptoKey(version, (unsigned char*)keybuf, *key_length);
+
+ if (rc != CRYPT_KEY_OK) {
+ ib_logf(IB_LOG_LEVEL_FATAL,
+ "Key %d can not be found. Reason=%d", version, rc);
+ ut_error;
+ }
+ } else {
+ ib_logf(IB_LOG_LEVEL_FATAL,
+ "Key %d not found", version);
+ ut_error;
+ }
+
+
+ // do ctr key initialization
+ if (current_aes_dynamic_method == MY_AES_ALGORITHM_CTR)
+ {
+ // Now compute L by encrypting IV using this key
+ const unsigned char* src = page_encrypted ? iv : crypt_data->iv;
+ const int srclen = page_encrypted ? iv_len : crypt_data->iv_length;
+ unsigned char* buf = page_encrypted ? keybuf : crypt_data->keys[0].key;
+ uint32 buflen = page_encrypted ? *key_length : sizeof(crypt_data->keys[0].key);
+
+ // call ecb explicit
+ my_aes_encrypt_dynamic_type func = get_aes_encrypt_func(MY_AES_ALGORITHM_ECB);
+ int rc = (*func)(src, srclen,
+ buf, &buflen,
+ (unsigned char*)keybuf, *key_length,
+ NULL, 0,
+ 1);
+
+ if (rc != AES_OK) {
+ ib_logf(IB_LOG_LEVEL_FATAL,
+ "Unable to encrypt key-block "
+ " src: %p srclen: %d buf: %p buflen: %d."
+ " return-code: %d. Can't continue!\n",
+ src, srclen, buf, buflen, rc);
+ ut_error;
+ }
+
+ if (!page_encrypted) {
+ crypt_data->keys[0].key_version = version;
+ crypt_data->key_count++;
+
+ if (crypt_data->key_count > array_elements(crypt_data->keys)) {
+ crypt_data->key_count = array_elements(crypt_data->keys);
+ }
+ }
+
+ // set the key size to the aes block size because this encrypted data is the key
+ *key_length = MY_AES_BLOCK_SIZE;
+ memcpy(dst, buf, buflen);
+ }
+ else
+ {
+ // otherwise keybuf contains the right key
+ memcpy(dst, keybuf, *key_length);
+ }
+
+ if (!page_encrypted) {
+ mutex_exit(&crypt_data->mutex);
+ }
+}
+
+/******************************************************************
+Get key bytes for a space/latest(key-version) */
+static inline
+void
+fil_crypt_get_latest_key(byte *dst, uint* key_length,
+ fil_space_crypt_t* crypt_data, uint *version)
+{
+ if (srv_encrypt_tables) {
+ // used for key rotation - get the next key id from the key provider
+ int rc = GetLatestCryptoKeyVersion();
+
+ // if no new key was created use the last one
+ if (rc >= 0)
+ {
+ *version = rc;
+ }
+
+ return fil_crypt_get_key(dst, key_length, crypt_data, *version, false);
+ } else {
+ return fil_crypt_get_key(dst, key_length, NULL, *version, true);
+ }
+}
+
+/******************************************************************
+Create a fil_space_crypt_t object */
+UNIV_INTERN
+fil_space_crypt_t*
+fil_space_create_crypt_data()
+{
+ const uint iv_length = CRYPT_SCHEME_1_IV_LEN;
+ const uint sz = sizeof(fil_space_crypt_t) + iv_length;
+ fil_space_crypt_t* crypt_data =
+ static_cast<fil_space_crypt_t*>(malloc(sz));
+ memset(crypt_data, 0, sz);
+
+ if (srv_encrypt_tables == FALSE) {
+ crypt_data->type = CRYPT_SCHEME_UNENCRYPTED;
+ crypt_data->min_key_version = 0;
+ } else {
+ crypt_data->type = CRYPT_SCHEME_1;
+ crypt_data->min_key_version = GetLatestCryptoKeyVersion();
+ }
+
+ mutex_create(fil_crypt_data_mutex_key,
+ &crypt_data->mutex, SYNC_NO_ORDER_CHECK);
+ crypt_data->iv_length = iv_length;
+ my_random_bytes(crypt_data->iv, iv_length);
+ return crypt_data;
+}
+
+/******************************************************************
+Compare two crypt objects */
+UNIV_INTERN
+int
+fil_space_crypt_compare(const fil_space_crypt_t* crypt_data1,
+ const fil_space_crypt_t* crypt_data2)
+{
+ ut_a(crypt_data1->type == CRYPT_SCHEME_UNENCRYPTED ||
+ crypt_data1->type == CRYPT_SCHEME_1);
+ ut_a(crypt_data2->type == CRYPT_SCHEME_UNENCRYPTED ||
+ crypt_data2->type == CRYPT_SCHEME_1);
+
+ ut_a(crypt_data1->iv_length == CRYPT_SCHEME_1_IV_LEN);
+ ut_a(crypt_data2->iv_length == CRYPT_SCHEME_1_IV_LEN);
+
+ /* no support for changing iv (yet?) */
+ ut_a(memcmp(crypt_data1->iv, crypt_data2->iv,
+ crypt_data1->iv_length) == 0);
+
+ return 0;
+}
+
+/******************************************************************
+Read crypt data from a page (0) */
+UNIV_INTERN
+fil_space_crypt_t*
+fil_space_read_crypt_data(ulint space, const byte* page, ulint offset)
+{
+ if (memcmp(page + offset, EMPTY_PATTERN, MAGIC_SZ) == 0) {
+ /* crypt is not stored */
+ return NULL;
+ }
+
+ if (memcmp(page + offset, CRYPT_MAGIC, MAGIC_SZ) != 0) {
+ fprintf(stderr,
+ "Warning: found potentially bogus bytes on "
+ "page 0 offset %lu for space %lu : "
+ "[ %.2x %.2x %.2x %.2x %.2x %.2x ]. "
+ "Assuming space is not encrypted!\n",
+ offset, space,
+ page[offset + 0],
+ page[offset + 1],
+ page[offset + 2],
+ page[offset + 3],
+ page[offset + 4],
+ page[offset + 5]);
+ return NULL;
+ }
+
+ ulint type = mach_read_from_1(page + offset + MAGIC_SZ + 0);
+
+ if (! (type == CRYPT_SCHEME_UNENCRYPTED ||
+ type == CRYPT_SCHEME_1)) {
+ fprintf(stderr,
+ "Found non sensible crypt scheme: %lu for space %lu "
+ " offset: %lu bytes: "
+ "[ %.2x %.2x %.2x %.2x %.2x %.2x ]\n",
+ type, space, offset,
+ page[offset + 0 + MAGIC_SZ],
+ page[offset + 1 + MAGIC_SZ],
+ page[offset + 2 + MAGIC_SZ],
+ page[offset + 3 + MAGIC_SZ],
+ page[offset + 4 + MAGIC_SZ],
+ page[offset + 5 + MAGIC_SZ]);
+ ut_error;
+ }
+
+ ulint iv_length = mach_read_from_1(page + offset + MAGIC_SZ + 1);
+ if (! (iv_length == CRYPT_SCHEME_1_IV_LEN)) {
+ fprintf(stderr,
+ "Found non sensible iv length: %lu for space %lu "
+ " offset: %lu type: %lu bytes: "
+ "[ %.2x %.2x %.2x %.2x %.2x %.2x ]\n",
+ iv_length, space, offset, type,
+ page[offset + 0 + MAGIC_SZ],
+ page[offset + 1 + MAGIC_SZ],
+ page[offset + 2 + MAGIC_SZ],
+ page[offset + 3 + MAGIC_SZ],
+ page[offset + 4 + MAGIC_SZ],
+ page[offset + 5 + MAGIC_SZ]);
+ ut_error;
+ }
+
+ uint min_key_version = mach_read_from_4
+ (page + offset + MAGIC_SZ + 2 + iv_length);
+
+ const uint sz = sizeof(fil_space_crypt_t) + iv_length;
+ fil_space_crypt_t* crypt_data = static_cast<fil_space_crypt_t*>(
+ malloc(sz));
+ memset(crypt_data, 0, sz);
+
+ crypt_data->type = type;
+ crypt_data->min_key_version = min_key_version;
+ crypt_data->page0_offset = offset;
+ mutex_create(fil_crypt_data_mutex_key,
+ &crypt_data->mutex, SYNC_NO_ORDER_CHECK);
+ crypt_data->iv_length = iv_length;
+ memcpy(crypt_data->iv, page + offset + MAGIC_SZ + 2, iv_length);
+
+ return crypt_data;
+}
+
+/******************************************************************
+Free a crypt data object */
+UNIV_INTERN
+void
+fil_space_destroy_crypt_data(fil_space_crypt_t **crypt_data)
+{
+ if (crypt_data != NULL && (*crypt_data) != NULL) {
+ /* lock (and unlock) mutex to make sure no one has it locked
+ * currently */
+ mutex_enter(& (*crypt_data)->mutex);
+ mutex_exit(& (*crypt_data)->mutex);
+ mutex_free(& (*crypt_data)->mutex);
+ free(*crypt_data);
+ (*crypt_data) = NULL;
+ }
+}
+
+/******************************************************************
+Write crypt data to a page (0) */
+static
+void
+fil_space_write_crypt_data_low(fil_space_crypt_t *crypt_data,
+ ulint type,
+ byte* page, ulint offset,
+ ulint maxsize, mtr_t* mtr)
+{
+ ut_a(offset > 0 && offset < UNIV_PAGE_SIZE);
+ ulint space_id = mach_read_from_4(
+ page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+ const uint len = crypt_data->iv_length;
+ const uint min_key_version = crypt_data->min_key_version;
+ crypt_data->page0_offset = offset;
+ ut_a(2 + len + 4 + MAGIC_SZ < maxsize);
+
+ /*
+ redo log this as bytewise updates to page 0
+ followed by an MLOG_FILE_WRITE_CRYPT_DATA
+ (that will during recovery update fil_space_t)
+ */
+ mlog_write_string(page + offset, CRYPT_MAGIC, MAGIC_SZ, mtr);
+ mlog_write_ulint(page + offset + MAGIC_SZ + 0, type, MLOG_1BYTE, mtr);
+ mlog_write_ulint(page + offset + MAGIC_SZ + 1, len, MLOG_1BYTE, mtr);
+ mlog_write_string(page + offset + MAGIC_SZ + 2, crypt_data->iv, len,
+ mtr);
+ mlog_write_ulint(page + offset + MAGIC_SZ + 2 + len, min_key_version,
+ MLOG_4BYTES, mtr);
+
+ byte* log_ptr = mlog_open(mtr, 11 + 12 + len);
+ if (log_ptr != NULL) {
+ log_ptr = mlog_write_initial_log_record_fast(
+ page,
+ MLOG_FILE_WRITE_CRYPT_DATA,
+ log_ptr, mtr);
+ mach_write_to_4(log_ptr, space_id);
+ log_ptr += 4;
+ mach_write_to_2(log_ptr, offset);
+ log_ptr += 2;
+ mach_write_to_1(log_ptr, type);
+ log_ptr += 1;
+ mach_write_to_1(log_ptr, len);
+ log_ptr += 1;
+ mach_write_to_4(log_ptr, min_key_version);
+ log_ptr += 4;
+ mlog_close(mtr, log_ptr);
+
+ mlog_catenate_string(mtr, crypt_data->iv, len);
+ }
+}
+
+/******************************************************************
+Write crypt data to a page (0) */
+UNIV_INTERN
+void
+fil_space_write_crypt_data(ulint space, byte* page, ulint offset,
+ ulint maxsize, mtr_t* mtr)
+{
+ fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space);
+ if (crypt_data == NULL) {
+ return;
+ }
+
+ fil_space_write_crypt_data_low(crypt_data, crypt_data->type,
+ page, offset, maxsize, mtr);
+}
+
+/******************************************************************
+Parse a MLOG_FILE_WRITE_CRYPT_DATA log entry */
+UNIV_INTERN
+byte*
+fil_parse_write_crypt_data(byte* ptr, byte* end_ptr,
+ buf_block_t* block)
+{
+ /* check that redo log entry is complete */
+ uint entry_size =
+ 4 + // size of space_id
+ 2 + // size of offset
+ 1 + // size of type
+ 1 + // size of iv-len
+ 4; // size of min_key_version
+ if (end_ptr - ptr < entry_size)
+ return NULL;
+
+ ulint space_id = mach_read_from_4(ptr);
+ ptr += 4;
+ uint offset = mach_read_from_2(ptr);
+ ptr += 2;
+ uint type = mach_read_from_1(ptr);
+ ptr += 1;
+ uint len = mach_read_from_1(ptr);
+ ptr += 1;
+
+ ut_a(type == CRYPT_SCHEME_UNENCRYPTED ||
+ type == CRYPT_SCHEME_1); // only supported
+ ut_a(len == CRYPT_SCHEME_1_IV_LEN); // only supported
+ uint min_key_version = mach_read_from_4(ptr);
+ ptr += 4;
+
+ if (end_ptr - ptr < len)
+ return NULL;
+
+ fil_space_crypt_t* crypt_data = fil_space_create_crypt_data();
+ crypt_data->page0_offset = offset;
+ crypt_data->min_key_version = min_key_version;
+ memcpy(crypt_data->iv, ptr, len);
+ ptr += len;
+
+ /* update fil_space memory cache with crypt_data */
+ fil_space_set_crypt_data(space_id, crypt_data);
+
+ return ptr;
+}
+
+/******************************************************************
+Clear crypt data from a page (0) */
+UNIV_INTERN
+void
+fil_space_clear_crypt_data(byte* page, ulint offset)
+{
+ //TODO(jonaso): pass crypt-data and read len from there
+ ulint len = CRYPT_SCHEME_1_IV_LEN;
+ ulint size =
+ sizeof(CRYPT_MAGIC) +
+ 1 + // type
+ 1 + // len
+ len + // iv
+ 4; // min key version
+ memset(page + offset, 0, size);
+}
+
+/*********************************************************************
+Check if page shall be encrypted before write */
+UNIV_INTERN
+bool
+fil_space_check_encryption_write(
+/*==============================*/
+ ulint space) /*!< in: tablespace id */
+{
+ if (srv_encrypt_tables == FALSE)
+ return false;
+
+ fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space);
+ if (crypt_data == NULL)
+ return false;
+
+ if (crypt_data->type == CRYPT_SCHEME_UNENCRYPTED)
+ return false;
+
+ return true;
+}
+
+/******************************************************************
+Encrypt a page */
+UNIV_INTERN
+void
+fil_space_encrypt(ulint space, ulint offset, lsn_t lsn,
+ const byte* src_frame, ulint zip_size, byte* dst_frame, ulint encryption_key)
+{
+ fil_space_crypt_t* crypt_data;
+ ulint page_size = (zip_size) ? zip_size : UNIV_PAGE_SIZE;
+
+ // get key (L)
+ uint key_version;
+ byte key[MY_AES_MAX_KEY_LENGTH];
+ uint key_length;
+
+ if (srv_encrypt_tables) {
+ crypt_data = fil_space_get_crypt_data(space);
+ if (crypt_data == NULL) {
+ //TODO: Is this really needed ?
+ memcpy(dst_frame, src_frame, page_size);
+ return;
+ }
+ fil_crypt_get_latest_key(key, &key_length, crypt_data, &key_version);
+ } else {
+ key_version = encryption_key;
+ fil_crypt_get_latest_key(key, &key_length, NULL, (uint*)&key_version);
+ }
+
+
+ /* Load the iv or counter (depending to the encryption algorithm used) */
+ unsigned char iv[MY_AES_BLOCK_SIZE];
+
+ if (current_aes_dynamic_method == MY_AES_ALGORITHM_CTR)
+ {
+ // create counter block (C)
+ mach_write_to_4(iv + 0, space);
+ ulint space_offset = mach_read_from_4(
+ src_frame + FIL_PAGE_OFFSET);
+ mach_write_to_4(iv + 4, space_offset);
+ mach_write_to_8(iv + 8, lsn);
+ }
+ else
+ {
+ // take the iv from the key provider
+
+ int load_iv_rc = GetCryptoIV(key_version, (uchar *) iv, sizeof(iv));
+
+ // if the iv can not be loaded the whole page can not be encrypted
+ if (load_iv_rc != CRYPT_KEY_OK)
+ {
+ ib_logf(IB_LOG_LEVEL_FATAL,
+ "Unable to decrypt data-block. "
+ " Can not load iv for key %d"
+ " return-code: %d. Can't continue!\n",
+ key_version, load_iv_rc);
+
+ ut_error;
+ }
+ }
+
+
+ ibool page_compressed = (mach_read_from_2(src_frame+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED);
+ ibool page_encrypted = fil_space_is_page_encrypted(space);
+
+ ulint compression_alg = mach_read_from_8(src_frame+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
+
+ ulint orig_page_type = mach_read_from_2(src_frame+FIL_PAGE_TYPE);
+ if (orig_page_type==FIL_PAGE_TYPE_FSP_HDR
+ || orig_page_type==FIL_PAGE_TYPE_XDES
+ || orig_page_type== FIL_PAGE_PAGE_ENCRYPTED
+ || orig_page_type== FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) {
+ memcpy(dst_frame, src_frame, page_size);
+ return;
+ }
+
+ // copy page header
+ memcpy(dst_frame, src_frame, FIL_PAGE_DATA);
+
+
+ if (page_encrypted && !page_compressed) {
+ // key id
+ mach_write_to_2(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION,
+ key_version);
+ // original page type
+ mach_write_to_2(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 2,
+ orig_page_type);
+ // new page type
+ mach_write_to_2(dst_frame+FIL_PAGE_TYPE, FIL_PAGE_PAGE_ENCRYPTED);
+ } else {
+ // store key version
+ mach_write_to_4(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION,
+ key_version);
+ }
+
+ // encrypt page data
+ ulint unencrypted_bytes = FIL_PAGE_DATA + FIL_PAGE_DATA_END;
+ ulint srclen = page_size - unencrypted_bytes;
+ const byte* src = src_frame + FIL_PAGE_DATA;
+ byte* dst = dst_frame + FIL_PAGE_DATA;
+ uint32 dstlen;
+
+ if (page_compressed) {
+ srclen = page_size - FIL_PAGE_DATA;
+ }
+
+ int rc = (* my_aes_encrypt_dynamic)(src, srclen,
+ dst, &dstlen,
+ (unsigned char*)key, key_length,
+ (unsigned char*)iv, sizeof(iv),
+ 1);
+
+ if (! ((rc == AES_OK) && ((ulint) dstlen == srclen))) {
+ ib_logf(IB_LOG_LEVEL_FATAL,
+ "Unable to encrypt data-block "
+ " src: %p srclen: %ld buf: %p buflen: %d."
+ " return-code: %d. Can't continue!\n",
+ src, (long)srclen,
+ dst, dstlen, rc);
+ ut_error;
+ }
+
+ if (!page_compressed) {
+ // copy page trailer
+ memcpy(dst_frame + page_size - FIL_PAGE_DATA_END,
+ src_frame + page_size - FIL_PAGE_DATA_END,
+ FIL_PAGE_DATA_END);
+
+ /* handle post encryption checksum */
+ ib_uint32_t checksum = 0;
+ srv_checksum_algorithm_t algorithm =
+ static_cast<srv_checksum_algorithm_t>(srv_checksum_algorithm);
+
+ if (zip_size == 0) {
+ switch (algorithm) {
+ case SRV_CHECKSUM_ALGORITHM_CRC32:
+ case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
+ checksum = buf_calc_page_crc32(dst_frame);
+ break;
+ case SRV_CHECKSUM_ALGORITHM_INNODB:
+ case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB:
+ checksum = (ib_uint32_t) buf_calc_page_new_checksum(
+ dst_frame);
+ break;
+ case SRV_CHECKSUM_ALGORITHM_NONE:
+ case SRV_CHECKSUM_ALGORITHM_STRICT_NONE:
+ checksum = BUF_NO_CHECKSUM_MAGIC;
+ break;
+ /* no default so the compiler will emit a warning
+ * if new enum is added and not handled here */
+ }
+ } else {
+ checksum = page_zip_calc_checksum(dst_frame, zip_size,
+ algorithm);
+ }
+
+ // store the post-encryption checksum after the key-version
+ mach_write_to_4(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 4,
+ checksum);
+ } else {
+ /* Page compressed and encrypted tables have different
+ FIL_HEADER */
+ ulint page_len = log10((double)page_size)/log10((double)2);
+ /* Set up the correct page type */
+ mach_write_to_2(dst_frame+FIL_PAGE_TYPE, FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED);
+ /* Set up the compression algorithm */
+ mach_write_to_2(dst_frame+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION+4, orig_page_type);
+ /* Set up the compressed size */
+ mach_write_to_1(dst_frame+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION+6, page_len);
+ /* Set up the compression method */
+ mach_write_to_1(dst_frame+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION+7, compression_alg);
+ }
+
+}
+
+/*********************************************************************
+Check if extra buffer shall be allocated for decrypting after read */
+UNIV_INTERN
+bool
+fil_space_check_encryption_read(
+/*==============================*/
+ ulint space) /*!< in: tablespace id */
+{
+ fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space);
+ if (crypt_data == NULL)
+ return false;
+
+ if (crypt_data->type == CRYPT_SCHEME_UNENCRYPTED)
+ return false;
+
+ return true;
+}
+
+/******************************************************************
+Decrypt a page */
+UNIV_INTERN
+bool
+fil_space_decrypt(fil_space_crypt_t* crypt_data,
+ const byte* src_frame, ulint page_size, byte* dst_frame)
+{
+ ulint page_type = mach_read_from_2(src_frame+FIL_PAGE_TYPE);
+ // key version
+ uint key_version;
+ bool page_encrypted = (page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED
+ || page_type == FIL_PAGE_PAGE_ENCRYPTED);
+
+ bool page_compressed = (page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED
+ || page_type == FIL_PAGE_PAGE_COMPRESSED);
+
+ ulint orig_page_type=0;
+
+ if (page_type == FIL_PAGE_PAGE_ENCRYPTED) {
+ key_version = mach_read_from_2(
+ src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
+ orig_page_type = mach_read_from_2(
+ src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 2);
+ } else {
+ key_version = mach_read_from_4(
+ src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
+ }
+
+ if (key_version == 0 && !page_encrypted) {
+ //TODO: is this really needed ?
+ memcpy(dst_frame, src_frame, page_size);
+ return false; /* page not decrypted */
+ }
+
+ // read space & offset & lsn
+ ulint space = mach_read_from_4(
+ src_frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+ ulint offset = mach_read_from_4(
+ src_frame + FIL_PAGE_OFFSET);
+ ib_uint64_t lsn = mach_read_from_8(src_frame + FIL_PAGE_LSN);
+
+ // copy page header
+ memcpy(dst_frame, src_frame, FIL_PAGE_DATA);
+
+ if (page_type == FIL_PAGE_PAGE_ENCRYPTED) {
+ // orig page type
+ mach_write_to_2(dst_frame+FIL_PAGE_TYPE, orig_page_type);
+ }
+
+
+ // get key
+ byte key[MY_AES_MAX_KEY_LENGTH];
+ uint key_length;
+ fil_crypt_get_key(key, &key_length, crypt_data, key_version, page_encrypted);
+
+ // get the iv
+ unsigned char iv[MY_AES_BLOCK_SIZE];
+
+ if (current_aes_dynamic_method == MY_AES_ALGORITHM_CTR)
+ {
+ // create counter block
+
+ mach_write_to_4(iv + 0, space);
+ mach_write_to_4(iv + 4, offset);
+ mach_write_to_8(iv + 8, lsn);
+ }
+ else
+ {
+ // take the iv from the key provider
+
+ int load_iv_rc = GetCryptoIV(key_version, (uchar *) iv, sizeof(iv));
+
+ // if the iv can not be loaded the whole page can not be decrypted
+ if (load_iv_rc != CRYPT_KEY_OK)
+ {
+ ib_logf(IB_LOG_LEVEL_FATAL,
+ "Unable to decrypt data-block. "
+ " Can not load iv for key %d"
+ " return-code: %d. Can't continue!\n",
+ key_version, load_iv_rc);
+
+ return AES_KEY_CREATION_FAILED;
+ }
+ }
+
+ const byte* src = src_frame + FIL_PAGE_DATA;
+ byte* dst = dst_frame + FIL_PAGE_DATA;
+ uint32 dstlen;
+ ulint srclen = page_size - (FIL_PAGE_DATA + FIL_PAGE_DATA_END);
+
+ ulint compressed_len;
+ ulint compression_method;
+
+ if (page_compressed) {
+ orig_page_type = mach_read_from_2(src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION+4);
+ compressed_len = mach_read_from_1(src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION+6);
+ compression_method = mach_read_from_1(src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION+7);
+ }
+
+ if (page_encrypted && !page_compressed) {
+ orig_page_type = mach_read_from_2(src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION+2);
+ }
+
+ if (page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) {
+ srclen = pow((double)2, (double)((int)compressed_len)) - FIL_PAGE_DATA;
+ }
+
+ int rc = (* my_aes_decrypt_dynamic)(src, srclen,
+ dst, &dstlen,
+ (unsigned char*)key, key_length,
+ (unsigned char*)iv, sizeof(iv),
+ 1);
+
+ if (! ((rc == AES_OK) && ((ulint) dstlen == srclen))) {
+ ib_logf(IB_LOG_LEVEL_FATAL,
+ "Unable to decrypt data-block "
+ " src: %p srclen: %ld buf: %p buflen: %d."
+ " return-code: %d. Can't continue!\n",
+ src, (long)srclen,
+ dst, dstlen, rc);
+ ut_error;
+ }
+
+ if (page_type != FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) {
+ // copy page trailer
+ memcpy(dst_frame + page_size - FIL_PAGE_DATA_END,
+ src_frame + page_size - FIL_PAGE_DATA_END,
+ FIL_PAGE_DATA_END);
+
+ // clear key-version & crypt-checksum from dst
+ memset(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8);
+ } else {
+ /* For page compressed tables we set up the FIL_HEADER again */
+ /* setting original page type */
+ mach_write_to_2(dst_frame + FIL_PAGE_TYPE, orig_page_type);
+ /* page_compression uses BUF_NO_CHECKSUM_MAGIC as checksum */
+ mach_write_to_4(dst_frame + FIL_PAGE_SPACE_OR_CHKSUM, BUF_NO_CHECKSUM_MAGIC);
+ /* Set up the flush lsn to be compression algorithm */
+ mach_write_to_8(dst_frame+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, compression_method);
+ }
+
+ return true; /* page was decrypted */
+}
+
+/******************************************************************
+Decrypt a page */
+UNIV_INTERN
+void
+fil_space_decrypt(ulint space,
+ const byte* src_frame, ulint page_size, byte* dst_frame)
+{
+ fil_space_decrypt(fil_space_get_crypt_data(space),
+ src_frame, page_size, dst_frame);
+}
+
+/*********************************************************************
+Verify checksum for a page (iff it's encrypted)
+NOTE: currently this function can only be run in single threaded mode
+as it modifies srv_checksum_algorithm (temporarily)
+@return true if page is encrypted AND OK, false otherwise */
+bool
+fil_space_verify_crypt_checksum(const byte* src_frame, ulint zip_size)
+{
+ // key version
+ uint key_version = mach_read_from_4(
+ src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
+
+ if (key_version == 0) {
+ return false; // unencrypted page
+ }
+
+ /* "trick" the normal checksum routines by storing the post-encryption
+ * checksum into the normal checksum field allowing for reuse of
+ * the normal routines */
+
+ // post encryption checksum
+ ib_uint32_t stored_post_encryption = mach_read_from_4(
+ src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 4);
+
+ // save pre encryption checksum for restore in end of this function
+ ib_uint32_t stored_pre_encryption = mach_read_from_4(
+ src_frame + FIL_PAGE_SPACE_OR_CHKSUM);
+
+ ib_uint32_t checksum_field2 = mach_read_from_4(
+ src_frame + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM);
+
+ /** prepare frame for usage of normal checksum routines */
+ mach_write_to_4(const_cast<byte*>(src_frame) + FIL_PAGE_SPACE_OR_CHKSUM,
+ stored_post_encryption);
+
+ /* NOTE: this function is (currently) only run when restoring
+ * dblwr-buffer, server is single threaded so it's safe to modify
+ * srv_checksum_algorithm */
+ srv_checksum_algorithm_t save_checksum_algorithm =
+ (srv_checksum_algorithm_t)srv_checksum_algorithm;
+ if (zip_size == 0 &&
+ (save_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB ||
+ save_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_INNODB)) {
+ /* handle ALGORITHM_INNODB specially,
+ * "downgrade" to ALGORITHM_INNODB and store BUF_NO_CHECKSUM_MAGIC
+ * checksum_field2 is sort of pointless anyway...
+ */
+ srv_checksum_algorithm = SRV_CHECKSUM_ALGORITHM_INNODB;
+ mach_write_to_4(const_cast<byte*>(src_frame) +
+ UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
+ BUF_NO_CHECKSUM_MAGIC);
+ }
+
+ /* verify checksums */
+ ibool corrupted = buf_page_is_corrupted(false, src_frame, zip_size);
+
+ /** restore frame & algorithm */
+ srv_checksum_algorithm = save_checksum_algorithm;
+
+ mach_write_to_4(const_cast<byte*>(src_frame) +
+ FIL_PAGE_SPACE_OR_CHKSUM,
+ stored_pre_encryption);
+
+ mach_write_to_4(const_cast<byte*>(src_frame) +
+ UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
+ checksum_field2);
+
+ if (!corrupted) {
+ return true; // page was encrypted and checksum matched
+ } else {
+ return false; // page was encrypted but checksum didn't match
+ }
+}
+
+/***********************************************************************/
+
+/** A copy of global key state */
+struct key_state_t {
+ key_state_t() : key_version(0),
+ rotate_key_age(srv_fil_crypt_rotate_key_age) {}
+ bool operator==(const key_state_t& other) const {
+ return key_version == other.key_version &&
+ rotate_key_age == other.rotate_key_age;
+ }
+ uint key_version;
+ uint rotate_key_age;
+};
+
+/***********************************************************************
+Copy global key state */
+static void
+fil_crypt_get_key_state(
+ key_state_t *new_state)
+{
+ if (srv_encrypt_tables == TRUE) {
+ new_state->key_version = GetLatestCryptoKeyVersion();
+ new_state->rotate_key_age = srv_fil_crypt_rotate_key_age;
+ ut_a(new_state->key_version > 0);
+ } else {
+ new_state->key_version = 0;
+ new_state->rotate_key_age = 0;
+ }
+}
+
+/***********************************************************************
+Check if a key needs rotation given a key_state */
+static bool
+fil_crypt_needs_rotation(uint key_version, const key_state_t *key_state)
+{
+ // TODO(jonaso): Add support for rotating encrypted => unencrypted
+
+ if (key_version == 0 && key_state->key_version != 0) {
+ /* this is rotation unencrypted => encrypted
+ * ignore rotate_key_age */
+ return true;
+ }
+
+ if (key_state->key_version == 0 && key_version != 0) {
+ /* this is rotation encrypted => unencrypted */
+ return true;
+ }
+
+ /* this is rotation encrypted => encrypted,
+ * only reencrypt if key is sufficiently old */
+ if (key_version + key_state->rotate_key_age < key_state->key_version)
+ return true;
+
+ return false;
+}
+
+/***********************************************************************
+Check if a space is closing (i.e just before drop) */
+UNIV_INTERN bool
+fil_crypt_is_closing(ulint space)
+{
+ bool closing;
+ fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space);
+ mutex_enter(&crypt_data->mutex);
+ closing = crypt_data->closing;
+ mutex_exit(&crypt_data->mutex);
+ return closing;
+}
+
+/***********************************************************************
+Start encrypting a space
+@return true if a pending op (fil_inc_pending_ops/fil_decr_pending_ops) is held
+*/
+static bool
+fil_crypt_start_encrypting_space(ulint space, bool *recheck) {
+
+ /* we have a pending op when entering function */
+ bool pending_op = true;
+
+ mutex_enter(&fil_crypt_threads_mutex);
+ fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space);
+ if (crypt_data != NULL || fil_crypt_start_converting) {
+ /* someone beat us to it */
+ if (fil_crypt_start_converting)
+ *recheck = true;
+
+ mutex_exit(&fil_crypt_threads_mutex);
+ return pending_op;
+ }
+
+ /* NOTE: we need to write and flush page 0 before publishing
+ * the crypt data. This so that after restart there is no
+ * risk of finding encrypted pages without having
+ * crypt data in page 0 */
+
+ /* 1 - create crypt data */
+ crypt_data = fil_space_create_crypt_data();
+ if (crypt_data == NULL) {
+ mutex_exit(&fil_crypt_threads_mutex);
+ return pending_op;
+ }
+
+ crypt_data->type = CRYPT_SCHEME_UNENCRYPTED;
+ crypt_data->min_key_version = 0; // all pages are unencrypted
+ crypt_data->rotate_state.start_time = time(0);
+ crypt_data->rotate_state.starting = true;
+ crypt_data->rotate_state.active_threads = 1;
+
+ mutex_enter(&crypt_data->mutex);
+ fil_space_set_crypt_data(space, crypt_data);
+ mutex_exit(&crypt_data->mutex);
+
+ fil_crypt_start_converting = true;
+ mutex_exit(&fil_crypt_threads_mutex);
+
+ do
+ {
+ if (fil_crypt_is_closing(space) ||
+ fil_tablespace_is_being_deleted(space))
+ break;
+
+ mtr_t mtr;
+ mtr_start(&mtr);
+
+ /* 2 - get page 0 */
+ ulint offset = 0;
+ ulint zip_size = fil_space_get_zip_size(space);
+ buf_block_t* block = buf_page_get_gen(space, zip_size, offset,
+ RW_X_LATCH,
+ NULL,
+ BUF_GET,
+ __FILE__, __LINE__,
+ &mtr);
+
+ if (fil_crypt_is_closing(space) ||
+ fil_tablespace_is_being_deleted(space)) {
+ mtr_commit(&mtr);
+ break;
+ }
+
+ /* 3 - compute location to store crypt data */
+ byte* frame = buf_block_get_frame(block);
+ ulint maxsize;
+ crypt_data->page0_offset =
+ fsp_header_get_crypt_offset(zip_size, &maxsize);
+
+ /* 4 - write crypt data to page 0 */
+ fil_space_write_crypt_data_low(crypt_data,
+ CRYPT_SCHEME_1,
+ frame,
+ crypt_data->page0_offset,
+ maxsize, &mtr);
+
+ mtr_commit(&mtr);
+
+ if (fil_crypt_is_closing(space) ||
+ fil_tablespace_is_being_deleted(space)) {
+ break;
+ }
+
+ /* record lsn of update */
+ lsn_t end_lsn = mtr.end_lsn;
+
+ /* 4 - sync tablespace before publishing crypt data */
+
+ /* release "lock" while syncing */
+ fil_decr_pending_ops(space);
+ pending_op = false;
+
+ bool success = false;
+ ulint n_pages = 0;
+ ulint sum_pages = 0;
+ do {
+ success = buf_flush_list(ULINT_MAX, end_lsn, &n_pages);
+ buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
+ sum_pages += n_pages;
+ } while (!success &&
+ !fil_crypt_is_closing(space) &&
+ !fil_tablespace_is_being_deleted(space));
+
+ /* try to reacquire pending op */
+ if (fil_inc_pending_ops(space, true))
+ break;
+
+ /* pending op reacquired! */
+ pending_op = true;
+
+ if (fil_crypt_is_closing(space) ||
+ fil_tablespace_is_being_deleted(space)) {
+ break;
+ }
+
+ /* 5 - publish crypt data */
+ mutex_enter(&fil_crypt_threads_mutex);
+ mutex_enter(&crypt_data->mutex);
+ crypt_data->type = CRYPT_SCHEME_1;
+ ut_a(crypt_data->rotate_state.active_threads == 1);
+ crypt_data->rotate_state.active_threads = 0;
+ crypt_data->rotate_state.starting = false;
+
+ fil_crypt_start_converting = false;
+ mutex_exit(&crypt_data->mutex);
+ mutex_exit(&fil_crypt_threads_mutex);
+
+ return pending_op;
+ } while (0);
+
+ mutex_enter(&crypt_data->mutex);
+ ut_a(crypt_data->rotate_state.active_threads == 1);
+ crypt_data->rotate_state.active_threads = 0;
+ mutex_exit(&crypt_data->mutex);
+
+ mutex_enter(&fil_crypt_threads_mutex);
+ fil_crypt_start_converting = false;
+ mutex_exit(&fil_crypt_threads_mutex);
+
+ return pending_op;
+}
+
+/***********************************************************************
+Check if space needs rotation given a key_state */
+static bool
+fil_crypt_space_needs_rotation(uint space, const key_state_t *key_state,
+ bool *recheck)
+{
+ if (fil_space_get_type(space) != FIL_TABLESPACE)
+ return false;
+
+ if (fil_inc_pending_ops(space, true)) {
+ /* tablespace being dropped */
+ return false;
+ }
+
+ /* keep track of if we have pending op */
+ bool pending_op = true;
+
+ fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space);
+ if (crypt_data == NULL) {
+ /**
+ * space has no crypt data
+ * start encrypting it...
+ */
+ pending_op = fil_crypt_start_encrypting_space(space, recheck);
+ crypt_data = fil_space_get_crypt_data(space);
+ if (crypt_data == NULL) {
+ if (pending_op) {
+ fil_decr_pending_ops(space);
+ }
+ return false;
+ }
+ }
+
+ mutex_enter(&crypt_data->mutex);
+ do {
+ /* prevent threads from starting to rotate space */
+ if (crypt_data->rotate_state.starting) {
+ /* recheck this space later */
+ *recheck = true;
+ break;
+ }
+
+ /* prevent threads from starting to rotate space */
+ if (crypt_data->closing)
+ break;
+
+ if (crypt_data->rotate_state.flushing)
+ break;
+
+ bool need_key_rotation = fil_crypt_needs_rotation(
+ crypt_data->min_key_version, key_state);
+
+ time_t diff = time(0) - crypt_data->rotate_state.scrubbing.
+ last_scrub_completed;
+ bool need_scrubbing =
+ diff >= srv_background_scrub_data_interval;
+
+ if (need_key_rotation == false && need_scrubbing == false)
+ break;
+
+ mutex_exit(&crypt_data->mutex);
+ /* NOTE! fil_decr_pending_ops is performed outside */
+ return true;
+ } while (0);
+
+ mutex_exit(&crypt_data->mutex);
+ if (pending_op) {
+ fil_decr_pending_ops(space);
+ }
+ return false;
+}
+
+/** State of a rotation thread */
+struct rotate_thread_t {
+ explicit rotate_thread_t(uint no) {
+ memset(this, 0, sizeof(* this));
+ thread_no = no;
+ first = true;
+ estimated_max_iops = 20;
+ }
+
+ uint thread_no;
+ bool first; /*!< is position before first space */
+ ulint space; /*!< current space */
+ ulint offset; /*!< current offset */
+ ulint batch; /*!< #pages to rotate */
+ uint min_key_version_found;/*!< min key version found but not rotated */
+ lsn_t end_lsn; /*!< max lsn when rotating this space */
+
+ uint estimated_max_iops; /*!< estimation of max iops */
+ uint allocated_iops; /*!< allocated iops */
+ uint cnt_waited; /*!< #times waited during this slot */
+ uint sum_waited_us; /*!< wait time during this slot */
+
+ fil_crypt_stat_t crypt_stat; // statistics
+
+ btr_scrub_t scrub_data; /* thread local data used by btr_scrub-functions
+ * when iterating pages of tablespace */
+
+ /* check if this thread should shutdown */
+ bool should_shutdown() const {
+ return ! (srv_shutdown_state == SRV_SHUTDOWN_NONE &&
+ thread_no < srv_n_fil_crypt_threads);
+ }
+};
+
+/***********************************************************************
+Update global statistics with thread statistics */
+static void
+fil_crypt_update_total_stat(rotate_thread_t *state)
+{
+ mutex_enter(&crypt_stat_mutex);
+ crypt_stat.pages_read_from_cache +=
+ state->crypt_stat.pages_read_from_cache;
+ crypt_stat.pages_read_from_disk +=
+ state->crypt_stat.pages_read_from_disk;
+ crypt_stat.pages_modified += state->crypt_stat.pages_modified;
+ crypt_stat.pages_flushed += state->crypt_stat.pages_flushed;
+ // remote old estimate
+ crypt_stat.estimated_iops -= state->crypt_stat.estimated_iops;
+ // add new estimate
+ crypt_stat.estimated_iops += state->estimated_max_iops;
+ mutex_exit(&crypt_stat_mutex);
+
+ // make new estimate "current" estimate
+ memset(&state->crypt_stat, 0, sizeof(state->crypt_stat));
+ // record our old (current) estimate
+ state->crypt_stat.estimated_iops = state->estimated_max_iops;
+}
+
+/***********************************************************************
+Allocate iops to thread from global setting,
+used before starting to rotate a space */
+static bool
+fil_crypt_alloc_iops(rotate_thread_t *state)
+{
+ ut_ad(state->allocated_iops == 0);
+
+ uint max_iops = state->estimated_max_iops;
+ mutex_enter(&fil_crypt_threads_mutex);
+ if (n_fil_crypt_iops_allocated >= srv_n_fil_crypt_iops) {
+ /* this can happen when user decreases srv_fil_crypt_iops */
+ mutex_exit(&fil_crypt_threads_mutex);
+ return false;
+ }
+
+ uint alloc = srv_n_fil_crypt_iops - n_fil_crypt_iops_allocated;
+ if (alloc > max_iops)
+ alloc = max_iops;
+
+ n_fil_crypt_iops_allocated += alloc;
+ mutex_exit(&fil_crypt_threads_mutex);
+
+ state->allocated_iops = alloc;
+
+ return alloc > 0;
+}
+
+/***********************************************************************
+Reallocate iops to thread,
+used when inside a space */
+static void
+fil_crypt_realloc_iops(rotate_thread_t *state)
+{
+ ut_a(state->allocated_iops > 0);
+
+ if (10 * state->cnt_waited > state->batch) {
+ /* if we waited more than 10% re-estimate max_iops */
+ uint avg_wait_time_us =
+ state->sum_waited_us / state->cnt_waited;
+
+#if DEBUG_KEYROTATION_THROTTLING
+ fprintf(stderr,
+ "thr_no: %u - update estimated_max_iops from %u to %u\n",
+ state->thread_no,
+ state->estimated_max_iops,
+ 1000000 / avg_wait_time_us);
+#endif
+ if (avg_wait_time_us == 0)
+ avg_wait_time_us = 1; // prevent division by zero
+
+ state->estimated_max_iops = 1000000 / avg_wait_time_us;
+ state->cnt_waited = 0;
+ state->sum_waited_us = 0;
+ } else {
+#if DEBUG_KEYROTATION_THROTTLING
+ fprintf(stderr,
+ "thr_no: %u only waited %lu%% skip re-estimate\n",
+ state->thread_no,
+ (100 * state->cnt_waited) / state->batch);
+#endif
+ }
+
+ if (state->estimated_max_iops <= state->allocated_iops) {
+ /* return extra iops */
+ uint extra = state->allocated_iops - state->estimated_max_iops;
+
+ if (extra > 0) {
+ mutex_enter(&fil_crypt_threads_mutex);
+ if (n_fil_crypt_iops_allocated < extra) {
+ /* unknown bug!
+ * crash in debug
+ * keep n_fil_crypt_iops_allocated unchanged
+ * in release */
+ ut_ad(0);
+ extra = 0;
+ }
+ n_fil_crypt_iops_allocated -= extra;
+ state->allocated_iops -= extra;
+
+ if (state->allocated_iops == 0) {
+ /* no matter how slow io system seems to be
+ * never decrease allocated_iops to 0... */
+ state->allocated_iops ++;
+ n_fil_crypt_iops_allocated ++;
+ }
+ mutex_exit(&fil_crypt_threads_mutex);
+ os_event_set(fil_crypt_threads_event);
+ }
+ } else {
+ /* see if there are more to get */
+ mutex_enter(&fil_crypt_threads_mutex);
+ if (n_fil_crypt_iops_allocated < srv_n_fil_crypt_iops) {
+ /* there are extra iops free */
+ uint extra = srv_n_fil_crypt_iops -
+ n_fil_crypt_iops_allocated;
+ if (state->allocated_iops + extra >
+ state->estimated_max_iops) {
+ /* but don't alloc more than our max */
+ extra = state->estimated_max_iops -
+ state->allocated_iops;
+ }
+ n_fil_crypt_iops_allocated += extra;
+ state->allocated_iops += extra;
+#if DEBUG_KEYROTATION_THROTTLING
+ fprintf(stderr,
+ "thr_no: %u increased iops from %u to %u\n",
+ state->thread_no,
+ state->allocated_iops - extra,
+ state->allocated_iops);
+#endif
+ }
+ mutex_exit(&fil_crypt_threads_mutex);
+ }
+
+ fil_crypt_update_total_stat(state);
+}
+
+/***********************************************************************
+Return allocated iops to global */
+static void
+fil_crypt_return_iops(rotate_thread_t *state)
+{
+ if (state->allocated_iops > 0) {
+ uint iops = state->allocated_iops;
+ mutex_enter(&fil_crypt_threads_mutex);
+ if (n_fil_crypt_iops_allocated < iops) {
+ /* unknown bug!
+ * crash in debug
+ * keep n_fil_crypt_iops_allocated unchanged
+ * in release */
+ ut_ad(0);
+ iops = 0;
+ }
+ n_fil_crypt_iops_allocated -= iops;
+ mutex_exit(&fil_crypt_threads_mutex);
+ state->allocated_iops = 0;
+ os_event_set(fil_crypt_threads_event);
+ }
+
+ fil_crypt_update_total_stat(state);
+}
+
+/***********************************************************************
+Search for a space needing rotation */
+bool
+fil_crypt_find_space_to_rotate(
+ const key_state_t *key_state,
+ rotate_thread_t *state,
+ bool *recheck)
+{
+ /* we need iops to start rotating */
+ while (!state->should_shutdown() && !fil_crypt_alloc_iops(state)) {
+ os_event_reset(fil_crypt_threads_event);
+ os_event_wait_time(fil_crypt_threads_event, 1000000);
+ }
+
+ if (state->should_shutdown())
+ return false;
+
+ if (state->first) {
+ state->first = false;
+ state->space = fil_get_first_space();
+ } else {
+ state->space = fil_get_next_space(state->space);
+ }
+
+ while (!state->should_shutdown() && state->space != ULINT_UNDEFINED) {
+
+ ulint space = state->space;
+ if (fil_crypt_space_needs_rotation(space, key_state, recheck)) {
+ /* init state->min_key_version_found before
+ * starting on a space */
+ state->min_key_version_found = key_state->key_version;
+ return true;
+ }
+
+ state->space = fil_get_next_space(space);
+ }
+
+ /* if we didn't find any space return iops */
+ fil_crypt_return_iops(state);
+
+ return false;
+
+}
+
+/***********************************************************************
+Start rotating a space */
+static
+void
+fil_crypt_start_rotate_space(
+ const key_state_t *key_state,
+ rotate_thread_t *state)
+{
+ ulint space = state->space;
+ fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space);
+ mutex_enter(&crypt_data->mutex);
+ if (crypt_data->rotate_state.active_threads == 0) {
+ /* only first thread needs to init */
+ crypt_data->rotate_state.next_offset = 1; // skip page 0
+ /* no need to rotate beyond current max
+ * if space extends, it will be encrypted with newer version */
+ crypt_data->rotate_state.max_offset = fil_space_get_size(space);
+
+ crypt_data->rotate_state.end_lsn = 0;
+ crypt_data->rotate_state.min_key_version_found =
+ key_state->key_version;
+
+ crypt_data->rotate_state.start_time = time(0);
+ }
+
+ /* count active threads in space */
+ crypt_data->rotate_state.active_threads++;
+
+ /* Initialize thread local state */
+ state->end_lsn = crypt_data->rotate_state.end_lsn;
+ state->min_key_version_found =
+ crypt_data->rotate_state.min_key_version_found;
+
+ /* inform scrubbing */
+ crypt_data->rotate_state.scrubbing.is_active =
+ btr_scrub_start_space(space, &state->scrub_data);
+
+ mutex_exit(&crypt_data->mutex);
+}
+
+/***********************************************************************
+Search for batch of pages needing rotation */
+static
+bool
+fil_crypt_find_page_to_rotate(
+ const key_state_t *key_state,
+ rotate_thread_t *state)
+{
+ ulint batch = srv_alloc_time * state->allocated_iops;
+ ulint space = state->space;
+ fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space);
+ mutex_enter(&crypt_data->mutex);
+ if (crypt_data->closing == false &&
+ crypt_data->rotate_state.next_offset <
+ crypt_data->rotate_state.max_offset) {
+
+ state->offset = crypt_data->rotate_state.next_offset;
+ ulint remaining = crypt_data->rotate_state.max_offset -
+ crypt_data->rotate_state.next_offset;
+
+ if (batch <= remaining)
+ state->batch = batch;
+ else
+ state->batch = remaining;
+
+ crypt_data->rotate_state.next_offset += batch;
+ mutex_exit(&crypt_data->mutex);
+ return true;
+ }
+
+ mutex_exit(&crypt_data->mutex);
+ return false;
+}
+
+/***********************************************************************
+Check if a page is uninitialized (doesn't need to be rotated) */
+static bool
+fil_crypt_is_page_uninitialized(const byte* frame, uint zip_size)
+{
+ if (zip_size) {
+ ulint stored_checksum = mach_read_from_4(
+ frame + FIL_PAGE_SPACE_OR_CHKSUM);
+ /* empty pages aren't encrypted */
+ if (stored_checksum == 0) {
+ return true;
+ }
+ } else {
+ ulint size = UNIV_PAGE_SIZE;
+ ulint checksum_field1 = mach_read_from_4(
+ frame + FIL_PAGE_SPACE_OR_CHKSUM);
+ ulint checksum_field2 = mach_read_from_4(
+ frame + size - FIL_PAGE_END_LSN_OLD_CHKSUM);
+ /* empty pages are not encrypted */
+ if (checksum_field1 == 0 && checksum_field2 == 0
+ && mach_read_from_4(frame + FIL_PAGE_LSN) == 0) {
+ return true;
+ }
+ }
+ return false;
+}
+
+#define fil_crypt_get_page_throttle(state,space,zip_size,offset,mtr,sleeptime_ms) \
+ fil_crypt_get_page_throttle_func(state, space, zip_size, offset, mtr, \
+ sleeptime_ms, __FILE__, __LINE__)
+
+/***********************************************************************
+Get a page and compute sleep time */
+static
+buf_block_t*
+fil_crypt_get_page_throttle_func(rotate_thread_t *state,
+ ulint space, uint zip_size, ulint offset,
+ mtr_t *mtr,
+ ulint *sleeptime_ms,
+ const char *file,
+ ulint line)
+{
+ buf_block_t* block = buf_page_try_get_func(space, offset, RW_X_LATCH,
+ true,
+ file, line, mtr);
+ if (block != NULL) {
+ /* page was in buffer pool */
+ state->crypt_stat.pages_read_from_cache++;
+ return block;
+ }
+
+ state->crypt_stat.pages_read_from_disk++;
+
+ ullint start = ut_time_us(NULL);
+ block = buf_page_get_gen(space, zip_size, offset,
+ RW_X_LATCH,
+ NULL, BUF_GET_POSSIBLY_FREED,
+ file, line, mtr);
+ ullint end = ut_time_us(NULL);
+
+ if (end < start) {
+ end = start; // safety...
+ }
+
+ state->cnt_waited++;
+ state->sum_waited_us += (end - start);
+
+ /* average page load */
+ ulint add_sleeptime_ms = 0;
+ ulint avg_wait_time_us = state->sum_waited_us / state->cnt_waited;
+ ulint alloc_wait_us = 1000000 / state->allocated_iops;
+ if (avg_wait_time_us < alloc_wait_us) {
+ /* we reading faster than we allocated */
+ add_sleeptime_ms = (alloc_wait_us - avg_wait_time_us) / 1000;
+ } else {
+ /* if page load time is longer than we want, skip sleeping */
+ }
+
+ *sleeptime_ms += add_sleeptime_ms;
+ return block;
+}
+
+
+/***********************************************************************
+Get block and allocation status
+
+note: innodb locks fil_space_latch and then block when allocating page
+but locks block and then fil_space_latch when freeing page.
+*/
+static
+buf_block_t*
+btr_scrub_get_block_and_allocation_status(
+ rotate_thread_t *state,
+ ulint space,
+ ulint zip_size,
+ ulint offset,
+ mtr_t *mtr,
+ btr_scrub_page_allocation_status_t *allocation_status,
+ ulint *sleeptime_ms)
+{
+ mtr_t local_mtr;
+ buf_block_t *block = NULL;
+ mtr_start(&local_mtr);
+ *allocation_status = fsp_page_is_free(space, offset, &local_mtr) ?
+ BTR_SCRUB_PAGE_FREE :
+ BTR_SCRUB_PAGE_ALLOCATED;
+
+ if (*allocation_status == BTR_SCRUB_PAGE_FREE) {
+ /* this is easy case, we lock fil_space_latch first and
+ then block */
+ block = fil_crypt_get_page_throttle(state,
+ space, zip_size,
+ offset, mtr,
+ sleeptime_ms);
+ mtr_commit(&local_mtr);
+ } else {
+ /* page is allocated according to xdes */
+
+ /* release fil_space_latch *before* fetching block */
+ mtr_commit(&local_mtr);
+
+ /* NOTE: when we have locked dict_index_get_lock(),
+ * it's safe to release fil_space_latch and then fetch block
+ * as dict_index_get_lock() is needed to make tree modifications
+ * such as free-ing a page
+ */
+
+ block = fil_crypt_get_page_throttle(state,
+ space, zip_size,
+ offset, mtr,
+ sleeptime_ms);
+ }
+
+ return block;
+}
+
+
+/***********************************************************************
+Rotate one page */
+static
+void
+fil_crypt_rotate_page(
+ const key_state_t *key_state,
+ rotate_thread_t *state)
+{
+ ulint space = state->space;
+ ulint offset = state->offset;
+ const uint zip_size = fil_space_get_zip_size(space);
+ ulint sleeptime_ms = 0;
+
+ /* check if tablespace is closing before reading page */
+ if (fil_crypt_is_closing(space))
+ return;
+
+ if (space == TRX_SYS_SPACE && offset == TRX_SYS_PAGE_NO) {
+ /* don't encrypt this as it contains address to dblwr buffer */
+ return;
+ }
+
+ mtr_t mtr;
+ mtr_start(&mtr);
+ buf_block_t* block = fil_crypt_get_page_throttle(state,
+ space, zip_size,
+ offset, &mtr,
+ &sleeptime_ms);
+
+ bool modified = false;
+ int needs_scrubbing = BTR_SCRUB_SKIP_PAGE;
+ lsn_t block_lsn = block->page.newest_modification;
+ uint kv = block->page.key_version;
+
+ /* check if tablespace is closing after reading page */
+ if (!fil_crypt_is_closing(space)) {
+ byte* frame = buf_block_get_frame(block);
+ fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space);
+
+ if (kv == 0 &&
+ fil_crypt_is_page_uninitialized(frame, zip_size)) {
+ ;
+ } else if (fil_crypt_needs_rotation(kv, key_state)) {
+
+ /* page can be "fresh" i.e never written in case
+ * kv == 0 or it should have a key version at least
+ * as big as the space minimum key version*/
+ ut_a(kv == 0 || kv >= crypt_data->min_key_version);
+
+ modified = true;
+
+ /* force rotation by dummy updating page */
+ mlog_write_ulint(frame +
+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
+ space, MLOG_4BYTES, &mtr);
+
+ /* update block */
+ block->page.key_version = key_state->key_version;
+
+ /* statistics */
+ state->crypt_stat.pages_modified++;
+ } else {
+ ut_a(kv >= crypt_data->min_key_version ||
+ (kv == 0 && key_state->key_version == 0));
+
+ if (kv < state->min_key_version_found) {
+ state->min_key_version_found = kv;
+ }
+ }
+
+ needs_scrubbing = btr_page_needs_scrubbing(
+ &state->scrub_data, block,
+ BTR_SCRUB_PAGE_ALLOCATION_UNKNOWN);
+ }
+
+ mtr_commit(&mtr);
+ lsn_t end_lsn = mtr.end_lsn;
+
+ if (needs_scrubbing == BTR_SCRUB_PAGE) {
+ mtr_start(&mtr);
+ /*
+ * refetch page and allocation status
+ */
+ btr_scrub_page_allocation_status_t allocated;
+ block = btr_scrub_get_block_and_allocation_status(
+ state, space, zip_size, offset, &mtr,
+ &allocated,
+ &sleeptime_ms);
+
+ /* get required table/index and index-locks */
+ needs_scrubbing = btr_scrub_recheck_page(
+ &state->scrub_data, block, allocated, &mtr);
+
+ if (needs_scrubbing == BTR_SCRUB_PAGE) {
+ /* we need to refetch it once more now that we have
+ * index locked */
+ block = btr_scrub_get_block_and_allocation_status(
+ state, space, zip_size, offset, &mtr,
+ &allocated,
+ &sleeptime_ms);
+
+ needs_scrubbing = btr_scrub_page(&state->scrub_data,
+ block, allocated,
+ &mtr);
+ }
+
+ /* NOTE: mtr is committed inside btr_scrub_recheck_page()
+ * and/or btr_scrub_page. This is to make sure that
+ * locks & pages are latched in corrected order,
+ * the mtr is in some circumstances restarted.
+ * (mtr_commit() + mtr_start())
+ */
+ }
+
+ if (needs_scrubbing != BTR_SCRUB_PAGE) {
+ /* if page didn't need scrubbing it might be that cleanups
+ are needed. do those outside of any mtr to prevent deadlocks.
+
+ the information what kinds of cleanups that are needed are
+ encoded inside the needs_scrubbing, but this is opaque to
+ this function (except the value BTR_SCRUB_PAGE) */
+ btr_scrub_skip_page(&state->scrub_data, needs_scrubbing);
+ }
+
+ if (needs_scrubbing == BTR_SCRUB_TURNED_OFF) {
+ /* if we just detected that scrubbing was turned off
+ * update global state to reflect this */
+ fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space);
+ mutex_enter(&crypt_data->mutex);
+ crypt_data->rotate_state.scrubbing.is_active = false;
+ mutex_exit(&crypt_data->mutex);
+ }
+
+ if (modified) {
+ /* if we modified page, we take lsn from mtr */
+ ut_a(end_lsn > state->end_lsn);
+ ut_a(end_lsn > block_lsn);
+ state->end_lsn = end_lsn;
+ } else {
+ /* if we did not modify page, check for max lsn */
+ if (block_lsn > state->end_lsn) {
+ state->end_lsn = block_lsn;
+ }
+ }
+
+ if (sleeptime_ms) {
+ os_event_reset(fil_crypt_throttle_sleep_event);
+ os_event_wait_time(fil_crypt_throttle_sleep_event,
+ 1000 * sleeptime_ms);
+ }
+}
+
+/***********************************************************************
+Rotate a batch of pages */
+static
+void
+fil_crypt_rotate_pages(
+ const key_state_t *key_state,
+ rotate_thread_t *state)
+{
+ ulint space = state->space;
+ ulint end = state->offset + state->batch;
+ for (; state->offset < end; state->offset++) {
+
+ /* we can't rotate pages in dblwr buffer as
+ * it's not possible to read those due to lots of asserts
+ * in buffer pool.
+ *
+ * However since these are only (short-lived) copies of
+ * real pages, they will be updated anyway when the
+ * real page is updated
+ */
+ if (space == TRX_SYS_SPACE &&
+ buf_dblwr_page_inside(state->offset)) {
+ continue;
+ }
+
+ fil_crypt_rotate_page(key_state, state);
+ }
+}
+
+/***********************************************************************
+Flush rotated pages and then update page 0 */
+static
+void
+fil_crypt_flush_space(rotate_thread_t *state, ulint space)
+{
+ fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space);
+
+ /* flush tablespace pages so that there are no pages left with old key */
+ lsn_t end_lsn = crypt_data->rotate_state.end_lsn;
+ if (end_lsn > 0 && !fil_crypt_is_closing(space)) {
+ bool success = false;
+ ulint n_pages = 0;
+ ulint sum_pages = 0;
+ ullint start = ut_time_us(NULL);
+ do {
+ success = buf_flush_list(ULINT_MAX, end_lsn, &n_pages);
+ buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
+ sum_pages += n_pages;
+ } while (!success && !fil_crypt_is_closing(space));
+ ullint end = ut_time_us(NULL);
+ if (sum_pages && end > start) {
+ state->cnt_waited += sum_pages;
+ state->sum_waited_us += (end - start);
+
+ /* statistics */
+ state->crypt_stat.pages_flushed += sum_pages;
+ }
+ }
+
+ if (crypt_data->min_key_version == 0) {
+ crypt_data->type = CRYPT_SCHEME_UNENCRYPTED;
+ }
+
+ /* update page 0 */
+ if (!fil_crypt_is_closing(space)) {
+ mtr_t mtr;
+ mtr_start(&mtr);
+ ulint offset = 0; // page 0
+ const uint zip_size = fil_space_get_zip_size(space);
+ buf_block_t* block = buf_page_get_gen(space, zip_size, offset,
+ RW_X_LATCH, NULL, BUF_GET,
+ __FILE__, __LINE__, &mtr);
+ byte* frame = buf_block_get_frame(block);
+ fil_space_write_crypt_data(space, frame,
+ crypt_data->page0_offset,
+ ULINT_MAX, &mtr);
+ mtr_commit(&mtr);
+ }
+}
+
+/***********************************************************************
+Complete rotating a space */
+static
+void
+fil_crypt_complete_rotate_space(
+ const key_state_t *key_state,
+ rotate_thread_t *state)
+{
+ ulint space = state->space;
+ fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space);
+ mutex_enter(&crypt_data->mutex);
+
+ /**
+ * Update crypt data state with state from thread
+ */
+ if (state->min_key_version_found <
+ crypt_data->rotate_state.min_key_version_found) {
+ crypt_data->rotate_state.min_key_version_found =
+ state->min_key_version_found;
+ }
+
+ if (state->end_lsn > crypt_data->rotate_state.end_lsn) {
+ crypt_data->rotate_state.end_lsn = state->end_lsn;
+ }
+
+ ut_a(crypt_data->rotate_state.active_threads > 0);
+ crypt_data->rotate_state.active_threads--;
+ bool last = crypt_data->rotate_state.active_threads == 0;
+
+ /**
+ * check if space is fully done
+ * this as when threads shutdown, it could be that we "complete"
+ * iterating before we have scanned the full space.
+ */
+ bool done = crypt_data->rotate_state.next_offset >=
+ crypt_data->rotate_state.max_offset;
+
+ /**
+ * we should flush space if we're last thread AND
+ * the iteration is done
+ */
+ bool should_flush = last && done;
+
+ if (should_flush) {
+ /* we're the last active thread */
+ crypt_data->rotate_state.flushing = true;
+ crypt_data->min_key_version =
+ crypt_data->rotate_state.min_key_version_found;
+ }
+
+ /* inform scrubbing */
+ crypt_data->rotate_state.scrubbing.is_active = false;
+ mutex_exit(&crypt_data->mutex);
+
+ /* all threads must call btr_scrub_complete_space wo/ mutex held */
+ if (btr_scrub_complete_space(&state->scrub_data) == true) {
+ if (should_flush) {
+ /* only last thread updates last_scrub_completed */
+ mutex_enter(&crypt_data->mutex);
+ crypt_data->rotate_state.scrubbing.
+ last_scrub_completed = time(0);
+ mutex_exit(&crypt_data->mutex);
+ }
+ }
+
+ if (should_flush) {
+ fil_crypt_flush_space(state, space);
+
+ mutex_enter(&crypt_data->mutex);
+ crypt_data->rotate_state.flushing = false;
+ mutex_exit(&crypt_data->mutex);
+ }
+}
+
+/*********************************************************************//**
+A thread which monitors global key state and rotates tablespaces accordingly
+@return a dummy parameter */
+extern "C" UNIV_INTERN
+os_thread_ret_t
+DECLARE_THREAD(fil_crypt_thread)(
+/*===============================*/
+ void* arg __attribute__((unused))) /*!< in: a dummy parameter required
+ * by os_thread_create */
+{
+ UT_NOT_USED(arg);
+
+ mutex_enter(&fil_crypt_threads_mutex);
+ uint thread_no = srv_n_fil_crypt_threads_started;
+ srv_n_fil_crypt_threads_started++;
+ mutex_exit(&fil_crypt_threads_mutex);
+ os_event_set(fil_crypt_event); /* signal that we started */
+
+ /* state of this thread */
+ rotate_thread_t thr(thread_no);
+
+ /* if we find a space that is starting, skip over it and recheck it later */
+ bool recheck = false;
+
+ key_state_t key_state;
+ fil_crypt_get_key_state(&key_state);
+
+ /* make sure that thread always checks all tablespace when starting.
+ *
+ * by decreasing key_version, loop that waits for change in key-state
+ * should exit immediately causing thread to check all spaces when starting */
+ key_state.key_version--;
+
+ while (!thr.should_shutdown()) {
+
+ key_state_t new_state;
+ fil_crypt_get_key_state(&new_state);
+
+ time_t wait_start = time(0);
+ while (!thr.should_shutdown() && key_state == new_state) {
+
+ /* wait for key state changes
+ * i.e either new key version of change or
+ * new rotate_key_age */
+ os_event_reset(fil_crypt_threads_event);
+ os_event_wait_time(fil_crypt_threads_event, 1000000);
+ fil_crypt_get_key_state(&new_state);
+
+ if (recheck) {
+ /* check recheck here, after sleep, so
+ * that we don't busy loop while when one thread is starting
+ * a space*/
+ break;
+ }
+
+ time_t waited = time(0) - wait_start;
+ if (waited >= srv_background_scrub_data_check_interval)
+ break;
+ }
+
+ recheck = false;
+ thr.first = true; // restart from first tablespace
+ key_state = new_state; // save for next loop
+
+ /* iterate all spaces searching for those needing rotation */
+ while (!thr.should_shutdown() &&
+ fil_crypt_find_space_to_rotate(&new_state, &thr, &recheck)) {
+
+ /* we found a space to rotate */
+ fil_crypt_start_rotate_space(&new_state, &thr);
+
+ /* decrement pending ops that was incremented in
+ * fil_crypt_space_needs_rotation
+ * (called from fil_crypt_find_space_to_rotate),
+ * this makes sure that tablespace won't be dropped
+ * just after we decided to start processing it. */
+ fil_decr_pending_ops(thr.space);
+
+ /* iterate all pages (cooperativly with other threads) */
+ while (!thr.should_shutdown() &&
+ fil_crypt_find_page_to_rotate(&new_state, &thr)) {
+
+ /* rotate a (set) of pages */
+ fil_crypt_rotate_pages(&new_state, &thr);
+
+ /* realloc iops */
+ fil_crypt_realloc_iops(&thr);
+ }
+
+ /* complete rotation */
+ fil_crypt_complete_rotate_space(&new_state, &thr);
+
+ /* refresh key state */
+ fil_crypt_get_key_state(&new_state);
+
+ /* return iops */
+ fil_crypt_return_iops(&thr);
+ }
+ }
+
+ /* return iops if shutting down */
+ fil_crypt_return_iops(&thr);
+
+ mutex_enter(&fil_crypt_threads_mutex);
+ srv_n_fil_crypt_threads_started--;
+ mutex_exit(&fil_crypt_threads_mutex);
+ os_event_set(fil_crypt_event); /* signal that we stopped */
+
+ /* We count the number of threads in os_thread_exit(). A created
+ thread should always use that to exit and not use return() to exit. */
+
+ os_thread_exit(NULL);
+
+ OS_THREAD_DUMMY_RETURN;
+}
+
+/*********************************************************************
+Adjust thread count for key rotation */
+UNIV_INTERN
+void
+fil_crypt_set_thread_cnt(uint new_cnt) {
+ if (new_cnt > srv_n_fil_crypt_threads) {
+ uint add = new_cnt - srv_n_fil_crypt_threads;
+ srv_n_fil_crypt_threads = new_cnt;
+ for (uint i = 0; i < add; i++) {
+ os_thread_create(fil_crypt_thread, NULL, NULL);
+ }
+ } else if (new_cnt < srv_n_fil_crypt_threads) {
+ srv_n_fil_crypt_threads = new_cnt;
+ os_event_set(fil_crypt_threads_event);
+ }
+
+ while(srv_n_fil_crypt_threads_started != srv_n_fil_crypt_threads) {
+ os_event_reset(fil_crypt_event);
+ os_event_wait_time(fil_crypt_event, 1000000);
+ }
+}
+
+/*********************************************************************
+Adjust max key age */
+UNIV_INTERN
+void
+fil_crypt_set_rotate_key_age(uint val)
+{
+ srv_fil_crypt_rotate_key_age = val;
+ os_event_set(fil_crypt_threads_event);
+}
+
+/*********************************************************************
+Adjust rotation iops */
+UNIV_INTERN
+void
+fil_crypt_set_rotation_iops(uint val)
+{
+ srv_n_fil_crypt_iops = val;
+ os_event_set(fil_crypt_threads_event);
+}
+
+/*********************************************************************
+Init threads for key rotation */
+UNIV_INTERN
+void
+fil_crypt_threads_init()
+{
+ fil_crypt_event = os_event_create();
+ fil_crypt_threads_event = os_event_create();
+ mutex_create(fil_crypt_threads_mutex_key,
+ &fil_crypt_threads_mutex, SYNC_NO_ORDER_CHECK);
+
+ uint cnt = srv_n_fil_crypt_threads;
+ srv_n_fil_crypt_threads = 0;
+ fil_crypt_set_thread_cnt(cnt);
+}
+
+/*********************************************************************
+End threads for key rotation */
+UNIV_INTERN
+void
+fil_crypt_threads_end()
+{
+ /* stop threads */
+ fil_crypt_set_thread_cnt(0);
+}
+
+/*********************************************************************
+Clean up key rotation threads resources */
+UNIV_INTERN
+void
+fil_crypt_threads_cleanup() {
+ os_event_free(fil_crypt_event);
+ os_event_free(fil_crypt_threads_event);
+}
+
+/*********************************************************************
+Mark a space as closing */
+UNIV_INTERN
+void
+fil_space_crypt_mark_space_closing(
+ ulint space)
+{
+ mutex_enter(&fil_crypt_threads_mutex);
+ fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space);
+ if (crypt_data == NULL) {
+ mutex_exit(&fil_crypt_threads_mutex);
+ return;
+ }
+
+ mutex_enter(&crypt_data->mutex);
+ mutex_exit(&fil_crypt_threads_mutex);
+ crypt_data->closing = true;
+ mutex_exit(&crypt_data->mutex);
+}
+
+/*********************************************************************
+Wait for crypt threads to stop accessing space */
+UNIV_INTERN
+void
+fil_space_crypt_close_tablespace(
+ ulint space)
+{
+ mutex_enter(&fil_crypt_threads_mutex);
+ fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space);
+ if (crypt_data == NULL) {
+ mutex_exit(&fil_crypt_threads_mutex);
+ return;
+ }
+
+ uint start = time(0);
+ uint last = start;
+ mutex_enter(&crypt_data->mutex);
+ mutex_exit(&fil_crypt_threads_mutex);
+ crypt_data->closing = true;
+ uint cnt = crypt_data->rotate_state.active_threads;
+ bool flushing = crypt_data->rotate_state.flushing;
+ while (cnt > 0 || flushing) {
+ mutex_exit(&crypt_data->mutex);
+ /* release dict mutex so that scrub threads can release their
+ * table references */
+ dict_mutex_exit_for_mysql();
+ /* wakeup throttle (all) sleepers */
+ os_event_set(fil_crypt_throttle_sleep_event);
+ os_thread_sleep(20000);
+ dict_mutex_enter_for_mysql();
+ mutex_enter(&crypt_data->mutex);
+ cnt = crypt_data->rotate_state.active_threads;
+ flushing = crypt_data->rotate_state.flushing;
+
+ uint now = time(0);
+ if (now >= last + 30) {
+ fprintf(stderr,
+ "WARNING: "
+ "waited %u seconds to drop space: %lu\n",
+ now - start, space);
+ last = now;
+ }
+ }
+ mutex_exit(&crypt_data->mutex);
+}
+
+/*********************************************************************
+Get crypt status for a space (used by information_schema)
+return 0 if crypt data present */
+int
+fil_space_crypt_get_status(
+/*==================*/
+ ulint id, /*!< in: space id */
+ struct fil_space_crypt_status_t* status) /*!< out: status */
+{
+ fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(id);
+
+ if (crypt_data != NULL) {
+ status->space = id;
+ status->scheme = crypt_data->type;
+ mutex_enter(&crypt_data->mutex);
+ status->keyserver_requests = crypt_data->keyserver_requests;
+ status->min_key_version = crypt_data->min_key_version;
+ if (crypt_data->rotate_state.active_threads > 0 ||
+ crypt_data->rotate_state.flushing) {
+ status->rotating = true;
+ status->flushing =
+ crypt_data->rotate_state.flushing;
+ status->rotate_next_page_number =
+ crypt_data->rotate_state.next_offset;
+ status->rotate_max_page_number =
+ crypt_data->rotate_state.max_offset;
+ } else {
+ status->rotating = false;
+ }
+ mutex_exit(&crypt_data->mutex);
+ } else {
+ memset(status, 0, sizeof(*status));
+ }
+
+ if (srv_encrypt_tables == TRUE) {
+ status->current_key_version = GetLatestCryptoKeyVersion();
+ } else {
+ status->current_key_version = 0;
+ }
+ return crypt_data == NULL ? 1 : 0;
+}
+
+/*********************************************************************
+Return crypt statistics */
+void
+fil_crypt_total_stat(fil_crypt_stat_t *stat)
+{
+ mutex_enter(&crypt_stat_mutex);
+ *stat = crypt_stat;
+ mutex_exit(&crypt_stat_mutex);
+}
+
+/*********************************************************************
+Get scrub status for a space (used by information_schema)
+return 0 if data found */
+int
+fil_space_get_scrub_status(
+/*==================*/
+ ulint id, /*!< in: space id */
+ struct fil_space_scrub_status_t* status) /*!< out: status */
+{
+ fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(id);
+ memset(status, 0, sizeof(*status));
+ if (crypt_data != NULL) {
+ status->space = id;
+ status->compressed = fil_space_get_zip_size(id) > 0;
+ mutex_enter(&crypt_data->mutex);
+ status->last_scrub_completed =
+ crypt_data->rotate_state.scrubbing.last_scrub_completed;
+ if (crypt_data->rotate_state.active_threads > 0 &&
+ crypt_data->rotate_state.scrubbing.is_active) {
+ status->scrubbing = true;
+ status->current_scrub_started =
+ crypt_data->rotate_state.start_time;
+ status->current_scrub_active_threads =
+ crypt_data->rotate_state.active_threads;
+ status->current_scrub_page_number =
+ crypt_data->rotate_state.next_offset;
+ status->current_scrub_max_page_number =
+ crypt_data->rotate_state.max_offset;
+ } else {
+ status->scrubbing = false;
+ }
+ mutex_exit(&crypt_data->mutex);
+ } else {
+ memset(status, 0, sizeof(*status));
+ }
+
+ return crypt_data == NULL ? 1 : 0;
+}