summaryrefslogtreecommitdiff
path: root/storage/xtradb/fil
diff options
context:
space:
mode:
Diffstat (limited to 'storage/xtradb/fil')
-rw-r--r--storage/xtradb/fil/fil0crypt.cc2433
-rw-r--r--storage/xtradb/fil/fil0fil.cc396
-rw-r--r--storage/xtradb/fil/fil0pagecompress.cc25
-rw-r--r--storage/xtradb/fil/fil0pageencryption.cc628
4 files changed, 3450 insertions, 32 deletions
diff --git a/storage/xtradb/fil/fil0crypt.cc b/storage/xtradb/fil/fil0crypt.cc
new file mode 100644
index 00000000000..e34297f4f86
--- /dev/null
+++ b/storage/xtradb/fil/fil0crypt.cc
@@ -0,0 +1,2433 @@
+#include "fil0fil.h"
+#include "srv0srv.h"
+#include "srv0start.h"
+#include "mach0data.h"
+#include "log0recv.h"
+#include "mtr0mtr.h"
+#include "mtr0log.h"
+#include "page0zip.h"
+#include "ut0ut.h"
+#include "btr0scrub.h"
+#include "fsp0fsp.h"
+#include "fil0pagecompress.h"
+#include "fil0pageencryption.h"
+
+#include <my_crypt.h>
+#include <my_crypt_key_management.h>
+
+#include <my_aes.h>
+#include <math.h>
+
+
+/** Mutex for keys */
+UNIV_INTERN ib_mutex_t fil_crypt_key_mutex;
+
+#ifdef UNIV_PFS_MUTEX
+UNIV_INTERN mysql_pfs_key_t fil_crypt_key_mutex_key;
+#endif
+
+/** Is encryption enabled/disabled */
+UNIV_INTERN my_bool srv_encrypt_tables = FALSE;
+
+/** No of key rotation threads requested */
+UNIV_INTERN uint srv_n_fil_crypt_threads = 0;
+
+/** No of key rotation threads started */
+static uint srv_n_fil_crypt_threads_started = 0;
+
+/** At this age or older a space/page will be rotated */
+UNIV_INTERN uint srv_fil_crypt_rotate_key_age = 1;
+
+/** Event to signal FROM the key rotation threads. */
+UNIV_INTERN os_event_t fil_crypt_event;
+
+/** Event to signal TO the key rotation threads. */
+UNIV_INTERN os_event_t fil_crypt_threads_event;
+
+/** Event for waking up threads throttle */
+UNIV_INTERN os_event_t fil_crypt_throttle_sleep_event;
+
+/** Mutex for key rotation threads */
+UNIV_INTERN ib_mutex_t fil_crypt_threads_mutex;
+
+#ifdef UNIV_PFS_MUTEX
+UNIV_INTERN mysql_pfs_key_t fil_crypt_threads_mutex_key;
+#endif
+
+/** Variable ensuring only 1 thread at time does initial conversion */
+static bool fil_crypt_start_converting = false;
+
+/** Variables for throttling */
+UNIV_INTERN uint srv_n_fil_crypt_iops = 100; // 10ms per iop
+static uint srv_alloc_time = 3; // allocate iops for 3s at a time
+static uint n_fil_crypt_iops_allocated = 0;
+
+/** Variables for scrubbing */
+extern uint srv_background_scrub_data_interval;
+extern uint srv_background_scrub_data_check_interval;
+
+#define DEBUG_KEYROTATION_THROTTLING 0
+
+/** Statistics variables */
+static fil_crypt_stat_t crypt_stat;
+static ib_mutex_t crypt_stat_mutex;
+
+#ifdef UNIV_PFS_MUTEX
+UNIV_INTERN mysql_pfs_key_t fil_crypt_stat_mutex_key;
+#endif
+
+/**
+ * key for crypt data mutex
+*/
+#ifdef UNIV_PFS_MUTEX
+UNIV_INTERN mysql_pfs_key_t fil_crypt_data_mutex_key;
+#endif
+
+/**
+* Magic pattern in start of crypt data on page 0
+*/
+#define MAGIC_SZ 6
+
+static const unsigned char CRYPT_MAGIC[MAGIC_SZ] = {
+ 's', 0xE, 0xC, 'R', 'E', 't' };
+
+static const unsigned char EMPTY_PATTERN[MAGIC_SZ] = {
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
+
+/**
+ * CRYPT_SCHEME_UNENCRYPTED
+ *
+ * Used as intermediate state when convering a space from unencrypted
+ * to encrypted
+ */
+#define CRYPT_SCHEME_UNENCRYPTED 0
+
+/**
+ * CRYPT_SCHEME_1
+ *
+ * L = AES_ECB(KEY, IV)
+ * CRYPT(PAGE) = AES_CRT(KEY=L, IV=C, PAGE)
+ */
+#define CRYPT_SCHEME_1 1
+#define CRYPT_SCHEME_1_IV_LEN 16
+// cached L given key_version
+struct key_struct
+{
+ uint key_version;
+ byte key[CRYPT_SCHEME_1_IV_LEN];
+};
+
+struct fil_space_rotate_state_t
+{
+ time_t start_time; // time when rotation started
+ ulint active_threads; // active threads in space
+ ulint next_offset; // next "free" offset
+ ulint max_offset; // max offset needing to be rotated
+ uint min_key_version_found; // min key version found but not rotated
+ lsn_t end_lsn; // max lsn created when rotating this space
+ bool starting; // initial write of IV
+ bool flushing; // space is being flushed at end of rotate
+ struct {
+ bool is_active; // is scrubbing active in this space
+ time_t last_scrub_completed; // when was last scrub completed
+ } scrubbing;
+};
+
+struct fil_space_crypt_struct
+{
+ ulint type; // CRYPT_SCHEME
+ uint keyserver_requests; // no of key requests to key server
+ uint key_count; // No of initalized key-structs
+ key_struct keys[3]; // cached L = AES_ECB(KEY, IV)
+ uint min_key_version; // min key version for this space
+ ulint page0_offset; // byte offset on page 0 for crypt data
+
+ ib_mutex_t mutex; // mutex protecting following variables
+ bool closing; // is tablespace being closed
+ fil_space_rotate_state_t rotate_state;
+
+ uint iv_length; // length of IV
+ byte iv[1]; // IV-data
+};
+
+/*********************************************************************
+Init space crypt */
+UNIV_INTERN
+void
+fil_space_crypt_init()
+{
+ mutex_create(fil_crypt_key_mutex_key,
+ &fil_crypt_key_mutex, SYNC_NO_ORDER_CHECK);
+
+ fil_crypt_throttle_sleep_event = os_event_create();
+
+ mutex_create(fil_crypt_stat_mutex_key,
+ &crypt_stat_mutex, SYNC_NO_ORDER_CHECK);
+ memset(&crypt_stat, 0, sizeof(crypt_stat));
+}
+
+/*********************************************************************
+Cleanup space crypt */
+UNIV_INTERN
+void
+fil_space_crypt_cleanup()
+{
+ os_event_free(fil_crypt_throttle_sleep_event);
+}
+
+/******************************************************************
+Get key bytes for a space/key-version */
+static
+void
+fil_crypt_get_key(byte *dst, uint* key_length,
+ fil_space_crypt_t* crypt_data, uint version, bool page_encrypted)
+{
+ unsigned char keybuf[MY_AES_MAX_KEY_LENGTH];
+ unsigned char iv[CRYPT_SCHEME_1_IV_LEN];
+ ulint iv_len = sizeof(iv);
+
+ if (!page_encrypted) {
+ mutex_enter(&crypt_data->mutex);
+
+ // Check if we already have key
+ for (uint i = 0; i < crypt_data->key_count; i++) {
+ if (crypt_data->keys[i].key_version == version) {
+ memcpy(dst, crypt_data->keys[i].key,
+ sizeof(crypt_data->keys[i].key));
+ mutex_exit(&crypt_data->mutex);
+ return;
+ }
+ }
+ // Not found!
+ crypt_data->keyserver_requests++;
+
+ // Rotate keys to make room for a new
+ for (uint i = 1; i < array_elements(crypt_data->keys); i++) {
+ crypt_data->keys[i] = crypt_data->keys[i - 1];
+ }
+ }
+ else
+ {
+ // load iv
+ int rc = GetCryptoIV(version, (unsigned char*)iv, iv_len);
+ fprintf(stderr, " %d\n",rc);
+
+ if (rc != CRYPT_KEY_OK) {
+ ib_logf(IB_LOG_LEVEL_FATAL,
+ "IV %d can not be found. Reason=%d", version, rc);
+ ut_error;
+ }
+ }
+
+ if (HasCryptoKey(version)) {
+ *key_length = GetCryptoKeySize(version);
+
+ int rc = GetCryptoKey(version, (unsigned char*)keybuf, *key_length);
+
+ if (rc != CRYPT_KEY_OK) {
+ ib_logf(IB_LOG_LEVEL_FATAL,
+ "Key %d can not be found. Reason=%d", version, rc);
+ ut_error;
+ }
+ } else {
+ ib_logf(IB_LOG_LEVEL_FATAL,
+ "Key %d not found", version);
+ ut_error;
+ }
+
+
+ // do ctr key initialization
+ if (current_aes_dynamic_method == MY_AES_ALGORITHM_CTR)
+ {
+ // Now compute L by encrypting IV using this key
+ const unsigned char* src = page_encrypted ? iv : crypt_data->iv;
+ const int srclen = page_encrypted ? iv_len : crypt_data->iv_length;
+ unsigned char* buf = page_encrypted ? keybuf : crypt_data->keys[0].key;
+ uint32 buflen = page_encrypted ? *key_length : sizeof(crypt_data->keys[0].key);
+
+ // call ecb explicit
+ my_aes_encrypt_dynamic_type func = get_aes_encrypt_func(MY_AES_ALGORITHM_ECB);
+ int rc = (*func)(src, srclen,
+ buf, &buflen,
+ (unsigned char*)keybuf, *key_length,
+ NULL, 0,
+ 1);
+
+ if (rc != AES_OK) {
+ ib_logf(IB_LOG_LEVEL_FATAL,
+ "Unable to encrypt key-block "
+ " src: %p srclen: %d buf: %p buflen: %d."
+ " return-code: %d. Can't continue!\n",
+ src, srclen, buf, buflen, rc);
+ ut_error;
+ }
+
+ if (!page_encrypted) {
+ crypt_data->keys[0].key_version = version;
+ crypt_data->key_count++;
+
+ if (crypt_data->key_count > array_elements(crypt_data->keys)) {
+ crypt_data->key_count = array_elements(crypt_data->keys);
+ }
+ }
+
+ // set the key size to the aes block size because this encrypted data is the key
+ *key_length = MY_AES_BLOCK_SIZE;
+ memcpy(dst, buf, buflen);
+ }
+ else
+ {
+ // otherwise keybuf contains the right key
+ memcpy(dst, keybuf, *key_length);
+ }
+
+ if (!page_encrypted) {
+ mutex_exit(&crypt_data->mutex);
+ }
+}
+
+/******************************************************************
+Get key bytes for a space/latest(key-version) */
+static inline
+void
+fil_crypt_get_latest_key(byte *dst, uint* key_length,
+ fil_space_crypt_t* crypt_data, uint *version)
+{
+ if (srv_encrypt_tables) {
+ // used for key rotation - get the next key id from the key provider
+ int rc = GetLatestCryptoKeyVersion();
+
+ // if no new key was created use the last one
+ if (rc >= 0)
+ {
+ *version = rc;
+ }
+
+ return fil_crypt_get_key(dst, key_length, crypt_data, *version, false);
+ } else {
+ return fil_crypt_get_key(dst, key_length, NULL, *version, true);
+ }
+}
+
+/******************************************************************
+Create a fil_space_crypt_t object */
+UNIV_INTERN
+fil_space_crypt_t*
+fil_space_create_crypt_data()
+{
+ const uint iv_length = CRYPT_SCHEME_1_IV_LEN;
+ const uint sz = sizeof(fil_space_crypt_t) + iv_length;
+ fil_space_crypt_t* crypt_data =
+ static_cast<fil_space_crypt_t*>(malloc(sz));
+ memset(crypt_data, 0, sz);
+
+ if (srv_encrypt_tables == FALSE) {
+ crypt_data->type = CRYPT_SCHEME_UNENCRYPTED;
+ crypt_data->min_key_version = 0;
+ } else {
+ crypt_data->type = CRYPT_SCHEME_1;
+ crypt_data->min_key_version = GetLatestCryptoKeyVersion();
+ }
+
+ mutex_create(fil_crypt_data_mutex_key,
+ &crypt_data->mutex, SYNC_NO_ORDER_CHECK);
+ crypt_data->iv_length = iv_length;
+ my_random_bytes(crypt_data->iv, iv_length);
+ return crypt_data;
+}
+
+/******************************************************************
+Compare two crypt objects */
+UNIV_INTERN
+int
+fil_space_crypt_compare(const fil_space_crypt_t* crypt_data1,
+ const fil_space_crypt_t* crypt_data2)
+{
+ ut_a(crypt_data1->type == CRYPT_SCHEME_UNENCRYPTED ||
+ crypt_data1->type == CRYPT_SCHEME_1);
+ ut_a(crypt_data2->type == CRYPT_SCHEME_UNENCRYPTED ||
+ crypt_data2->type == CRYPT_SCHEME_1);
+
+ ut_a(crypt_data1->iv_length == CRYPT_SCHEME_1_IV_LEN);
+ ut_a(crypt_data2->iv_length == CRYPT_SCHEME_1_IV_LEN);
+
+ /* no support for changing iv (yet?) */
+ ut_a(memcmp(crypt_data1->iv, crypt_data2->iv,
+ crypt_data1->iv_length) == 0);
+
+ return 0;
+}
+
+/******************************************************************
+Read crypt data from a page (0) */
+UNIV_INTERN
+fil_space_crypt_t*
+fil_space_read_crypt_data(ulint space, const byte* page, ulint offset)
+{
+ if (memcmp(page + offset, EMPTY_PATTERN, MAGIC_SZ) == 0) {
+ /* crypt is not stored */
+ return NULL;
+ }
+
+ if (memcmp(page + offset, CRYPT_MAGIC, MAGIC_SZ) != 0) {
+ fprintf(stderr,
+ "Warning: found potentially bogus bytes on "
+ "page 0 offset %lu for space %lu : "
+ "[ %.2x %.2x %.2x %.2x %.2x %.2x ]. "
+ "Assuming space is not encrypted!\n",
+ offset, space,
+ page[offset + 0],
+ page[offset + 1],
+ page[offset + 2],
+ page[offset + 3],
+ page[offset + 4],
+ page[offset + 5]);
+ return NULL;
+ }
+
+ ulint type = mach_read_from_1(page + offset + MAGIC_SZ + 0);
+
+ if (! (type == CRYPT_SCHEME_UNENCRYPTED ||
+ type == CRYPT_SCHEME_1)) {
+ fprintf(stderr,
+ "Found non sensible crypt scheme: %lu for space %lu "
+ " offset: %lu bytes: "
+ "[ %.2x %.2x %.2x %.2x %.2x %.2x ]\n",
+ type, space, offset,
+ page[offset + 0 + MAGIC_SZ],
+ page[offset + 1 + MAGIC_SZ],
+ page[offset + 2 + MAGIC_SZ],
+ page[offset + 3 + MAGIC_SZ],
+ page[offset + 4 + MAGIC_SZ],
+ page[offset + 5 + MAGIC_SZ]);
+ ut_error;
+ }
+
+ ulint iv_length = mach_read_from_1(page + offset + MAGIC_SZ + 1);
+ if (! (iv_length == CRYPT_SCHEME_1_IV_LEN)) {
+ fprintf(stderr,
+ "Found non sensible iv length: %lu for space %lu "
+ " offset: %lu type: %lu bytes: "
+ "[ %.2x %.2x %.2x %.2x %.2x %.2x ]\n",
+ iv_length, space, offset, type,
+ page[offset + 0 + MAGIC_SZ],
+ page[offset + 1 + MAGIC_SZ],
+ page[offset + 2 + MAGIC_SZ],
+ page[offset + 3 + MAGIC_SZ],
+ page[offset + 4 + MAGIC_SZ],
+ page[offset + 5 + MAGIC_SZ]);
+ ut_error;
+ }
+
+ uint min_key_version = mach_read_from_4
+ (page + offset + MAGIC_SZ + 2 + iv_length);
+
+ const uint sz = sizeof(fil_space_crypt_t) + iv_length;
+ fil_space_crypt_t* crypt_data = static_cast<fil_space_crypt_t*>(
+ malloc(sz));
+ memset(crypt_data, 0, sz);
+
+ crypt_data->type = type;
+ crypt_data->min_key_version = min_key_version;
+ crypt_data->page0_offset = offset;
+ mutex_create(fil_crypt_data_mutex_key,
+ &crypt_data->mutex, SYNC_NO_ORDER_CHECK);
+ crypt_data->iv_length = iv_length;
+ memcpy(crypt_data->iv, page + offset + MAGIC_SZ + 2, iv_length);
+
+ return crypt_data;
+}
+
+/******************************************************************
+Free a crypt data object */
+UNIV_INTERN
+void
+fil_space_destroy_crypt_data(fil_space_crypt_t **crypt_data)
+{
+ if (crypt_data != NULL && (*crypt_data) != NULL) {
+ /* lock (and unlock) mutex to make sure no one has it locked
+ * currently */
+ mutex_enter(& (*crypt_data)->mutex);
+ mutex_exit(& (*crypt_data)->mutex);
+ mutex_free(& (*crypt_data)->mutex);
+ free(*crypt_data);
+ (*crypt_data) = NULL;
+ }
+}
+
+/******************************************************************
+Write crypt data to a page (0) */
+static
+void
+fil_space_write_crypt_data_low(fil_space_crypt_t *crypt_data,
+ ulint type,
+ byte* page, ulint offset,
+ ulint maxsize, mtr_t* mtr)
+{
+ ut_a(offset > 0 && offset < UNIV_PAGE_SIZE);
+ ulint space_id = mach_read_from_4(
+ page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+ const uint len = crypt_data->iv_length;
+ const uint min_key_version = crypt_data->min_key_version;
+ crypt_data->page0_offset = offset;
+ ut_a(2 + len + 4 + MAGIC_SZ < maxsize);
+
+ /*
+ redo log this as bytewise updates to page 0
+ followed by an MLOG_FILE_WRITE_CRYPT_DATA
+ (that will during recovery update fil_space_t)
+ */
+ mlog_write_string(page + offset, CRYPT_MAGIC, MAGIC_SZ, mtr);
+ mlog_write_ulint(page + offset + MAGIC_SZ + 0, type, MLOG_1BYTE, mtr);
+ mlog_write_ulint(page + offset + MAGIC_SZ + 1, len, MLOG_1BYTE, mtr);
+ mlog_write_string(page + offset + MAGIC_SZ + 2, crypt_data->iv, len,
+ mtr);
+ mlog_write_ulint(page + offset + MAGIC_SZ + 2 + len, min_key_version,
+ MLOG_4BYTES, mtr);
+
+ byte* log_ptr = mlog_open(mtr, 11 + 12 + len);
+ if (log_ptr != NULL) {
+ log_ptr = mlog_write_initial_log_record_fast(
+ page,
+ MLOG_FILE_WRITE_CRYPT_DATA,
+ log_ptr, mtr);
+ mach_write_to_4(log_ptr, space_id);
+ log_ptr += 4;
+ mach_write_to_2(log_ptr, offset);
+ log_ptr += 2;
+ mach_write_to_1(log_ptr, type);
+ log_ptr += 1;
+ mach_write_to_1(log_ptr, len);
+ log_ptr += 1;
+ mach_write_to_4(log_ptr, min_key_version);
+ log_ptr += 4;
+ mlog_close(mtr, log_ptr);
+
+ mlog_catenate_string(mtr, crypt_data->iv, len);
+ }
+}
+
+/******************************************************************
+Write crypt data to a page (0) */
+UNIV_INTERN
+void
+fil_space_write_crypt_data(ulint space, byte* page, ulint offset,
+ ulint maxsize, mtr_t* mtr)
+{
+ fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space);
+ if (crypt_data == NULL) {
+ return;
+ }
+
+ fil_space_write_crypt_data_low(crypt_data, crypt_data->type,
+ page, offset, maxsize, mtr);
+}
+
+/******************************************************************
+Parse a MLOG_FILE_WRITE_CRYPT_DATA log entry */
+UNIV_INTERN
+byte*
+fil_parse_write_crypt_data(byte* ptr, byte* end_ptr,
+ buf_block_t* block)
+{
+ /* check that redo log entry is complete */
+ uint entry_size =
+ 4 + // size of space_id
+ 2 + // size of offset
+ 1 + // size of type
+ 1 + // size of iv-len
+ 4; // size of min_key_version
+ if (end_ptr - ptr < entry_size)
+ return NULL;
+
+ ulint space_id = mach_read_from_4(ptr);
+ ptr += 4;
+ uint offset = mach_read_from_2(ptr);
+ ptr += 2;
+ uint type = mach_read_from_1(ptr);
+ ptr += 1;
+ uint len = mach_read_from_1(ptr);
+ ptr += 1;
+
+ ut_a(type == CRYPT_SCHEME_UNENCRYPTED ||
+ type == CRYPT_SCHEME_1); // only supported
+ ut_a(len == CRYPT_SCHEME_1_IV_LEN); // only supported
+ uint min_key_version = mach_read_from_4(ptr);
+ ptr += 4;
+
+ if (end_ptr - ptr < len)
+ return NULL;
+
+ fil_space_crypt_t* crypt_data = fil_space_create_crypt_data();
+ crypt_data->page0_offset = offset;
+ crypt_data->min_key_version = min_key_version;
+ memcpy(crypt_data->iv, ptr, len);
+ ptr += len;
+
+ /* update fil_space memory cache with crypt_data */
+ fil_space_set_crypt_data(space_id, crypt_data);
+
+ return ptr;
+}
+
+/******************************************************************
+Clear crypt data from a page (0) */
+UNIV_INTERN
+void
+fil_space_clear_crypt_data(byte* page, ulint offset)
+{
+ //TODO(jonaso): pass crypt-data and read len from there
+ ulint len = CRYPT_SCHEME_1_IV_LEN;
+ ulint size =
+ sizeof(CRYPT_MAGIC) +
+ 1 + // type
+ 1 + // len
+ len + // iv
+ 4; // min key version
+ memset(page + offset, 0, size);
+}
+
+/*********************************************************************
+Check if page shall be encrypted before write */
+UNIV_INTERN
+bool
+fil_space_check_encryption_write(
+/*==============================*/
+ ulint space) /*!< in: tablespace id */
+{
+ if (srv_encrypt_tables == FALSE)
+ return false;
+
+ fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space);
+ if (crypt_data == NULL)
+ return false;
+
+ if (crypt_data->type == CRYPT_SCHEME_UNENCRYPTED)
+ return false;
+
+ return true;
+}
+
+/******************************************************************
+Encrypt a page */
+UNIV_INTERN
+void
+fil_space_encrypt(ulint space, ulint offset, lsn_t lsn,
+ const byte* src_frame, ulint zip_size, byte* dst_frame, ulint encryption_key)
+{
+ fil_space_crypt_t* crypt_data;
+ ulint page_size = (zip_size) ? zip_size : UNIV_PAGE_SIZE;
+
+ // get key (L)
+ uint key_version;
+ byte key[MY_AES_MAX_KEY_LENGTH];
+ uint key_length;
+
+ if (srv_encrypt_tables) {
+ crypt_data = fil_space_get_crypt_data(space);
+ if (crypt_data == NULL) {
+ //TODO: Is this really needed ?
+ memcpy(dst_frame, src_frame, page_size);
+ return;
+ }
+ fil_crypt_get_latest_key(key, &key_length, crypt_data, &key_version);
+ } else {
+ key_version = encryption_key;
+ fil_crypt_get_latest_key(key, &key_length, NULL, (uint*)&key_version);
+ }
+
+
+ /* Load the iv or counter (depending to the encryption algorithm used) */
+ unsigned char iv[MY_AES_BLOCK_SIZE];
+
+ if (current_aes_dynamic_method == MY_AES_ALGORITHM_CTR)
+ {
+ // create counter block (C)
+ mach_write_to_4(iv + 0, space);
+ ulint space_offset = mach_read_from_4(
+ src_frame + FIL_PAGE_OFFSET);
+ mach_write_to_4(iv + 4, space_offset);
+ mach_write_to_8(iv + 8, lsn);
+ }
+ else
+ {
+ // take the iv from the key provider
+
+ int load_iv_rc = GetCryptoIV(key_version, (uchar *) iv, sizeof(iv));
+
+ // if the iv can not be loaded the whole page can not be encrypted
+ if (load_iv_rc != CRYPT_KEY_OK)
+ {
+ ib_logf(IB_LOG_LEVEL_FATAL,
+ "Unable to decrypt data-block. "
+ " Can not load iv for key %d"
+ " return-code: %d. Can't continue!\n",
+ key_version, load_iv_rc);
+
+ ut_error;
+ }
+ }
+
+
+ ibool page_compressed = (mach_read_from_2(src_frame+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED);
+ ibool page_encrypted = fil_space_is_page_encrypted(space);
+
+ ulint compression_alg = mach_read_from_8(src_frame+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
+
+ ulint orig_page_type = mach_read_from_2(src_frame+FIL_PAGE_TYPE);
+ if (orig_page_type==FIL_PAGE_TYPE_FSP_HDR
+ || orig_page_type==FIL_PAGE_TYPE_XDES
+ || orig_page_type== FIL_PAGE_PAGE_ENCRYPTED
+ || orig_page_type== FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) {
+ memcpy(dst_frame, src_frame, page_size);
+ return;
+ }
+
+ // copy page header
+ memcpy(dst_frame, src_frame, FIL_PAGE_DATA);
+
+
+ if (page_encrypted && !page_compressed) {
+ // key id
+ mach_write_to_2(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION,
+ key_version);
+ // original page type
+ mach_write_to_2(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 2,
+ orig_page_type);
+ // new page type
+ mach_write_to_2(dst_frame+FIL_PAGE_TYPE, FIL_PAGE_PAGE_ENCRYPTED);
+ } else {
+ // store key version
+ mach_write_to_4(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION,
+ key_version);
+ }
+
+ // encrypt page data
+ ulint unencrypted_bytes = FIL_PAGE_DATA + FIL_PAGE_DATA_END;
+ ulint srclen = page_size - unencrypted_bytes;
+ const byte* src = src_frame + FIL_PAGE_DATA;
+ byte* dst = dst_frame + FIL_PAGE_DATA;
+ uint32 dstlen;
+
+ if (page_compressed) {
+ srclen = page_size - FIL_PAGE_DATA;
+ }
+
+ int rc = (* my_aes_encrypt_dynamic)(src, srclen,
+ dst, &dstlen,
+ (unsigned char*)key, key_length,
+ (unsigned char*)iv, sizeof(iv),
+ 1);
+
+ if (! ((rc == AES_OK) && ((ulint) dstlen == srclen))) {
+ ib_logf(IB_LOG_LEVEL_FATAL,
+ "Unable to encrypt data-block "
+ " src: %p srclen: %ld buf: %p buflen: %d."
+ " return-code: %d. Can't continue!\n",
+ src, (long)srclen,
+ dst, dstlen, rc);
+ ut_error;
+ }
+
+ if (!page_compressed) {
+ // copy page trailer
+ memcpy(dst_frame + page_size - FIL_PAGE_DATA_END,
+ src_frame + page_size - FIL_PAGE_DATA_END,
+ FIL_PAGE_DATA_END);
+
+ /* handle post encryption checksum */
+ ib_uint32_t checksum = 0;
+ srv_checksum_algorithm_t algorithm =
+ static_cast<srv_checksum_algorithm_t>(srv_checksum_algorithm);
+
+ if (zip_size == 0) {
+ switch (algorithm) {
+ case SRV_CHECKSUM_ALGORITHM_CRC32:
+ case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
+ checksum = buf_calc_page_crc32(dst_frame);
+ break;
+ case SRV_CHECKSUM_ALGORITHM_INNODB:
+ case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB:
+ checksum = (ib_uint32_t) buf_calc_page_new_checksum(
+ dst_frame);
+ break;
+ case SRV_CHECKSUM_ALGORITHM_NONE:
+ case SRV_CHECKSUM_ALGORITHM_STRICT_NONE:
+ checksum = BUF_NO_CHECKSUM_MAGIC;
+ break;
+ /* no default so the compiler will emit a warning
+ * if new enum is added and not handled here */
+ }
+ } else {
+ checksum = page_zip_calc_checksum(dst_frame, zip_size,
+ algorithm);
+ }
+
+ // store the post-encryption checksum after the key-version
+ mach_write_to_4(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 4,
+ checksum);
+ } else {
+ /* Page compressed and encrypted tables have different
+ FIL_HEADER */
+ ulint page_len = log10((double)page_size)/log10((double)2);
+ /* Set up the correct page type */
+ mach_write_to_2(dst_frame+FIL_PAGE_TYPE, FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED);
+ /* Set up the compression algorithm */
+ mach_write_to_2(dst_frame+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION+4, orig_page_type);
+ /* Set up the compressed size */
+ mach_write_to_1(dst_frame+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION+6, page_len);
+ /* Set up the compression method */
+ mach_write_to_1(dst_frame+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION+7, compression_alg);
+ }
+
+}
+
+/*********************************************************************
+Check if extra buffer shall be allocated for decrypting after read */
+UNIV_INTERN
+bool
+fil_space_check_encryption_read(
+/*==============================*/
+ ulint space) /*!< in: tablespace id */
+{
+ fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space);
+ if (crypt_data == NULL)
+ return false;
+
+ if (crypt_data->type == CRYPT_SCHEME_UNENCRYPTED)
+ return false;
+
+ return true;
+}
+
+/******************************************************************
+Decrypt a page */
+UNIV_INTERN
+bool
+fil_space_decrypt(fil_space_crypt_t* crypt_data,
+ const byte* src_frame, ulint page_size, byte* dst_frame)
+{
+ ulint page_type = mach_read_from_2(src_frame+FIL_PAGE_TYPE);
+ // key version
+ uint key_version;
+ bool page_encrypted = (page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED
+ || page_type == FIL_PAGE_PAGE_ENCRYPTED);
+
+ bool page_compressed = (page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED
+ || page_type == FIL_PAGE_PAGE_COMPRESSED);
+
+ ulint orig_page_type=0;
+
+ if (page_type == FIL_PAGE_PAGE_ENCRYPTED) {
+ key_version = mach_read_from_2(
+ src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
+ orig_page_type = mach_read_from_2(
+ src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 2);
+ } else {
+ key_version = mach_read_from_4(
+ src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
+ }
+
+ if (key_version == 0 && !page_encrypted) {
+ //TODO: is this really needed ?
+ memcpy(dst_frame, src_frame, page_size);
+ return false; /* page not decrypted */
+ }
+
+ // read space & offset & lsn
+ ulint space = mach_read_from_4(
+ src_frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+ ulint offset = mach_read_from_4(
+ src_frame + FIL_PAGE_OFFSET);
+ ib_uint64_t lsn = mach_read_from_8(src_frame + FIL_PAGE_LSN);
+
+ // copy page header
+ memcpy(dst_frame, src_frame, FIL_PAGE_DATA);
+
+ if (page_type == FIL_PAGE_PAGE_ENCRYPTED) {
+ // orig page type
+ mach_write_to_2(dst_frame+FIL_PAGE_TYPE, orig_page_type);
+ }
+
+
+ // get key
+ byte key[MY_AES_MAX_KEY_LENGTH];
+ uint key_length;
+ fil_crypt_get_key(key, &key_length, crypt_data, key_version, page_encrypted);
+
+ // get the iv
+ unsigned char iv[MY_AES_BLOCK_SIZE];
+
+ if (current_aes_dynamic_method == MY_AES_ALGORITHM_CTR)
+ {
+ // create counter block
+
+ mach_write_to_4(iv + 0, space);
+ mach_write_to_4(iv + 4, offset);
+ mach_write_to_8(iv + 8, lsn);
+ }
+ else
+ {
+ // take the iv from the key provider
+
+ int load_iv_rc = GetCryptoIV(key_version, (uchar *) iv, sizeof(iv));
+
+ // if the iv can not be loaded the whole page can not be decrypted
+ if (load_iv_rc != CRYPT_KEY_OK)
+ {
+ ib_logf(IB_LOG_LEVEL_FATAL,
+ "Unable to decrypt data-block. "
+ " Can not load iv for key %d"
+ " return-code: %d. Can't continue!\n",
+ key_version, load_iv_rc);
+
+ return AES_KEY_CREATION_FAILED;
+ }
+ }
+
+ const byte* src = src_frame + FIL_PAGE_DATA;
+ byte* dst = dst_frame + FIL_PAGE_DATA;
+ uint32 dstlen;
+ ulint srclen = page_size - (FIL_PAGE_DATA + FIL_PAGE_DATA_END);
+
+ ulint compressed_len;
+ ulint compression_method;
+
+ if (page_compressed) {
+ orig_page_type = mach_read_from_2(src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION+4);
+ compressed_len = mach_read_from_1(src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION+6);
+ compression_method = mach_read_from_1(src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION+7);
+ }
+
+ if (page_encrypted && !page_compressed) {
+ orig_page_type = mach_read_from_2(src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION+2);
+ }
+
+ if (page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) {
+ srclen = pow((double)2, (double)((int)compressed_len)) - FIL_PAGE_DATA;
+ }
+
+ int rc = (* my_aes_decrypt_dynamic)(src, srclen,
+ dst, &dstlen,
+ (unsigned char*)key, key_length,
+ (unsigned char*)iv, sizeof(iv),
+ 1);
+
+ if (! ((rc == AES_OK) && ((ulint) dstlen == srclen))) {
+ ib_logf(IB_LOG_LEVEL_FATAL,
+ "Unable to decrypt data-block "
+ " src: %p srclen: %ld buf: %p buflen: %d."
+ " return-code: %d. Can't continue!\n",
+ src, (long)srclen,
+ dst, dstlen, rc);
+ ut_error;
+ }
+
+ if (page_type != FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) {
+ // copy page trailer
+ memcpy(dst_frame + page_size - FIL_PAGE_DATA_END,
+ src_frame + page_size - FIL_PAGE_DATA_END,
+ FIL_PAGE_DATA_END);
+
+ // clear key-version & crypt-checksum from dst
+ memset(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8);
+ } else {
+ /* For page compressed tables we set up the FIL_HEADER again */
+ /* setting original page type */
+ mach_write_to_2(dst_frame + FIL_PAGE_TYPE, orig_page_type);
+ /* page_compression uses BUF_NO_CHECKSUM_MAGIC as checksum */
+ mach_write_to_4(dst_frame + FIL_PAGE_SPACE_OR_CHKSUM, BUF_NO_CHECKSUM_MAGIC);
+ /* Set up the flush lsn to be compression algorithm */
+ mach_write_to_8(dst_frame+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, compression_method);
+ }
+
+ return true; /* page was decrypted */
+}
+
+/******************************************************************
+Decrypt a page */
+UNIV_INTERN
+void
+fil_space_decrypt(ulint space,
+ const byte* src_frame, ulint page_size, byte* dst_frame)
+{
+ fil_space_decrypt(fil_space_get_crypt_data(space),
+ src_frame, page_size, dst_frame);
+}
+
+/*********************************************************************
+Verify checksum for a page (iff it's encrypted)
+NOTE: currently this function can only be run in single threaded mode
+as it modifies srv_checksum_algorithm (temporarily)
+@return true if page is encrypted AND OK, false otherwise */
+bool
+fil_space_verify_crypt_checksum(const byte* src_frame, ulint zip_size)
+{
+ // key version
+ uint key_version = mach_read_from_4(
+ src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
+
+ if (key_version == 0) {
+ return false; // unencrypted page
+ }
+
+ /* "trick" the normal checksum routines by storing the post-encryption
+ * checksum into the normal checksum field allowing for reuse of
+ * the normal routines */
+
+ // post encryption checksum
+ ib_uint32_t stored_post_encryption = mach_read_from_4(
+ src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 4);
+
+ // save pre encryption checksum for restore in end of this function
+ ib_uint32_t stored_pre_encryption = mach_read_from_4(
+ src_frame + FIL_PAGE_SPACE_OR_CHKSUM);
+
+ ib_uint32_t checksum_field2 = mach_read_from_4(
+ src_frame + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM);
+
+ /** prepare frame for usage of normal checksum routines */
+ mach_write_to_4(const_cast<byte*>(src_frame) + FIL_PAGE_SPACE_OR_CHKSUM,
+ stored_post_encryption);
+
+ /* NOTE: this function is (currently) only run when restoring
+ * dblwr-buffer, server is single threaded so it's safe to modify
+ * srv_checksum_algorithm */
+ srv_checksum_algorithm_t save_checksum_algorithm =
+ (srv_checksum_algorithm_t)srv_checksum_algorithm;
+ if (zip_size == 0 &&
+ (save_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB ||
+ save_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_INNODB)) {
+ /* handle ALGORITHM_INNODB specially,
+ * "downgrade" to ALGORITHM_INNODB and store BUF_NO_CHECKSUM_MAGIC
+ * checksum_field2 is sort of pointless anyway...
+ */
+ srv_checksum_algorithm = SRV_CHECKSUM_ALGORITHM_INNODB;
+ mach_write_to_4(const_cast<byte*>(src_frame) +
+ UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
+ BUF_NO_CHECKSUM_MAGIC);
+ }
+
+ /* verify checksums */
+ ibool corrupted = buf_page_is_corrupted(false, src_frame, zip_size);
+
+ /** restore frame & algorithm */
+ srv_checksum_algorithm = save_checksum_algorithm;
+
+ mach_write_to_4(const_cast<byte*>(src_frame) +
+ FIL_PAGE_SPACE_OR_CHKSUM,
+ stored_pre_encryption);
+
+ mach_write_to_4(const_cast<byte*>(src_frame) +
+ UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
+ checksum_field2);
+
+ if (!corrupted) {
+ return true; // page was encrypted and checksum matched
+ } else {
+ return false; // page was encrypted but checksum didn't match
+ }
+}
+
+/***********************************************************************/
+
+/** A copy of global key state */
+struct key_state_t {
+ key_state_t() : key_version(0),
+ rotate_key_age(srv_fil_crypt_rotate_key_age) {}
+ bool operator==(const key_state_t& other) const {
+ return key_version == other.key_version &&
+ rotate_key_age == other.rotate_key_age;
+ }
+ uint key_version;
+ uint rotate_key_age;
+};
+
+/***********************************************************************
+Copy global key state */
+static void
+fil_crypt_get_key_state(
+ key_state_t *new_state)
+{
+ if (srv_encrypt_tables == TRUE) {
+ new_state->key_version = GetLatestCryptoKeyVersion();
+ new_state->rotate_key_age = srv_fil_crypt_rotate_key_age;
+ ut_a(new_state->key_version > 0);
+ } else {
+ new_state->key_version = 0;
+ new_state->rotate_key_age = 0;
+ }
+}
+
+/***********************************************************************
+Check if a key needs rotation given a key_state */
+static bool
+fil_crypt_needs_rotation(uint key_version, const key_state_t *key_state)
+{
+ // TODO(jonaso): Add support for rotating encrypted => unencrypted
+
+ if (key_version == 0 && key_state->key_version != 0) {
+ /* this is rotation unencrypted => encrypted
+ * ignore rotate_key_age */
+ return true;
+ }
+
+ if (key_state->key_version == 0 && key_version != 0) {
+ /* this is rotation encrypted => unencrypted */
+ return true;
+ }
+
+ /* this is rotation encrypted => encrypted,
+ * only reencrypt if key is sufficiently old */
+ if (key_version + key_state->rotate_key_age < key_state->key_version)
+ return true;
+
+ return false;
+}
+
+/***********************************************************************
+Check if a space is closing (i.e just before drop) */
+UNIV_INTERN bool
+fil_crypt_is_closing(ulint space)
+{
+ bool closing;
+ fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space);
+ mutex_enter(&crypt_data->mutex);
+ closing = crypt_data->closing;
+ mutex_exit(&crypt_data->mutex);
+ return closing;
+}
+
+/***********************************************************************
+Start encrypting a space
+@return true if a pending op (fil_inc_pending_ops/fil_decr_pending_ops) is held
+*/
+static bool
+fil_crypt_start_encrypting_space(ulint space, bool *recheck) {
+
+ /* we have a pending op when entering function */
+ bool pending_op = true;
+
+ mutex_enter(&fil_crypt_threads_mutex);
+ fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space);
+ if (crypt_data != NULL || fil_crypt_start_converting) {
+ /* someone beat us to it */
+ if (fil_crypt_start_converting)
+ *recheck = true;
+
+ mutex_exit(&fil_crypt_threads_mutex);
+ return pending_op;
+ }
+
+ /* NOTE: we need to write and flush page 0 before publishing
+ * the crypt data. This so that after restart there is no
+ * risk of finding encrypted pages without having
+ * crypt data in page 0 */
+
+ /* 1 - create crypt data */
+ crypt_data = fil_space_create_crypt_data();
+ if (crypt_data == NULL) {
+ mutex_exit(&fil_crypt_threads_mutex);
+ return pending_op;
+ }
+
+ crypt_data->type = CRYPT_SCHEME_UNENCRYPTED;
+ crypt_data->min_key_version = 0; // all pages are unencrypted
+ crypt_data->rotate_state.start_time = time(0);
+ crypt_data->rotate_state.starting = true;
+ crypt_data->rotate_state.active_threads = 1;
+
+ mutex_enter(&crypt_data->mutex);
+ fil_space_set_crypt_data(space, crypt_data);
+ mutex_exit(&crypt_data->mutex);
+
+ fil_crypt_start_converting = true;
+ mutex_exit(&fil_crypt_threads_mutex);
+
+ do
+ {
+ if (fil_crypt_is_closing(space) ||
+ fil_tablespace_is_being_deleted(space))
+ break;
+
+ mtr_t mtr;
+ mtr_start(&mtr);
+
+ /* 2 - get page 0 */
+ ulint offset = 0;
+ ulint zip_size = fil_space_get_zip_size(space);
+ buf_block_t* block = buf_page_get_gen(space, zip_size, offset,
+ RW_X_LATCH,
+ NULL,
+ BUF_GET,
+ __FILE__, __LINE__,
+ &mtr);
+
+ if (fil_crypt_is_closing(space) ||
+ fil_tablespace_is_being_deleted(space)) {
+ mtr_commit(&mtr);
+ break;
+ }
+
+ /* 3 - compute location to store crypt data */
+ byte* frame = buf_block_get_frame(block);
+ ulint maxsize;
+ crypt_data->page0_offset =
+ fsp_header_get_crypt_offset(zip_size, &maxsize);
+
+ /* 4 - write crypt data to page 0 */
+ fil_space_write_crypt_data_low(crypt_data,
+ CRYPT_SCHEME_1,
+ frame,
+ crypt_data->page0_offset,
+ maxsize, &mtr);
+
+ mtr_commit(&mtr);
+
+ if (fil_crypt_is_closing(space) ||
+ fil_tablespace_is_being_deleted(space)) {
+ break;
+ }
+
+ /* record lsn of update */
+ lsn_t end_lsn = mtr.end_lsn;
+
+ /* 4 - sync tablespace before publishing crypt data */
+
+ /* release "lock" while syncing */
+ fil_decr_pending_ops(space);
+ pending_op = false;
+
+ bool success = false;
+ ulint n_pages = 0;
+ ulint sum_pages = 0;
+ do {
+ success = buf_flush_list(ULINT_MAX, end_lsn, &n_pages);
+ buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
+ sum_pages += n_pages;
+ } while (!success &&
+ !fil_crypt_is_closing(space) &&
+ !fil_tablespace_is_being_deleted(space));
+
+ /* try to reacquire pending op */
+ if (fil_inc_pending_ops(space, true))
+ break;
+
+ /* pending op reacquired! */
+ pending_op = true;
+
+ if (fil_crypt_is_closing(space) ||
+ fil_tablespace_is_being_deleted(space)) {
+ break;
+ }
+
+ /* 5 - publish crypt data */
+ mutex_enter(&fil_crypt_threads_mutex);
+ mutex_enter(&crypt_data->mutex);
+ crypt_data->type = CRYPT_SCHEME_1;
+ ut_a(crypt_data->rotate_state.active_threads == 1);
+ crypt_data->rotate_state.active_threads = 0;
+ crypt_data->rotate_state.starting = false;
+
+ fil_crypt_start_converting = false;
+ mutex_exit(&crypt_data->mutex);
+ mutex_exit(&fil_crypt_threads_mutex);
+
+ return pending_op;
+ } while (0);
+
+ mutex_enter(&crypt_data->mutex);
+ ut_a(crypt_data->rotate_state.active_threads == 1);
+ crypt_data->rotate_state.active_threads = 0;
+ mutex_exit(&crypt_data->mutex);
+
+ mutex_enter(&fil_crypt_threads_mutex);
+ fil_crypt_start_converting = false;
+ mutex_exit(&fil_crypt_threads_mutex);
+
+ return pending_op;
+}
+
+/***********************************************************************
+Check if space needs rotation given a key_state */
+static bool
+fil_crypt_space_needs_rotation(uint space, const key_state_t *key_state,
+ bool *recheck)
+{
+ if (fil_space_get_type(space) != FIL_TABLESPACE)
+ return false;
+
+ if (fil_inc_pending_ops(space, true)) {
+ /* tablespace being dropped */
+ return false;
+ }
+
+ /* keep track of if we have pending op */
+ bool pending_op = true;
+
+ fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space);
+ if (crypt_data == NULL) {
+ /**
+ * space has no crypt data
+ * start encrypting it...
+ */
+ pending_op = fil_crypt_start_encrypting_space(space, recheck);
+ crypt_data = fil_space_get_crypt_data(space);
+ if (crypt_data == NULL) {
+ if (pending_op) {
+ fil_decr_pending_ops(space);
+ }
+ return false;
+ }
+ }
+
+ mutex_enter(&crypt_data->mutex);
+ do {
+ /* prevent threads from starting to rotate space */
+ if (crypt_data->rotate_state.starting) {
+ /* recheck this space later */
+ *recheck = true;
+ break;
+ }
+
+ /* prevent threads from starting to rotate space */
+ if (crypt_data->closing)
+ break;
+
+ if (crypt_data->rotate_state.flushing)
+ break;
+
+ bool need_key_rotation = fil_crypt_needs_rotation(
+ crypt_data->min_key_version, key_state);
+
+ time_t diff = time(0) - crypt_data->rotate_state.scrubbing.
+ last_scrub_completed;
+ bool need_scrubbing =
+ diff >= srv_background_scrub_data_interval;
+
+ if (need_key_rotation == false && need_scrubbing == false)
+ break;
+
+ mutex_exit(&crypt_data->mutex);
+ /* NOTE! fil_decr_pending_ops is performed outside */
+ return true;
+ } while (0);
+
+ mutex_exit(&crypt_data->mutex);
+ if (pending_op) {
+ fil_decr_pending_ops(space);
+ }
+ return false;
+}
+
+/** State of a rotation thread */
+struct rotate_thread_t {
+ explicit rotate_thread_t(uint no) {
+ memset(this, 0, sizeof(* this));
+ thread_no = no;
+ first = true;
+ estimated_max_iops = 20;
+ }
+
+ uint thread_no;
+ bool first; /*!< is position before first space */
+ ulint space; /*!< current space */
+ ulint offset; /*!< current offset */
+ ulint batch; /*!< #pages to rotate */
+ uint min_key_version_found;/*!< min key version found but not rotated */
+ lsn_t end_lsn; /*!< max lsn when rotating this space */
+
+ uint estimated_max_iops; /*!< estimation of max iops */
+ uint allocated_iops; /*!< allocated iops */
+ uint cnt_waited; /*!< #times waited during this slot */
+ uint sum_waited_us; /*!< wait time during this slot */
+
+ fil_crypt_stat_t crypt_stat; // statistics
+
+ btr_scrub_t scrub_data; /* thread local data used by btr_scrub-functions
+ * when iterating pages of tablespace */
+
+ /* check if this thread should shutdown */
+ bool should_shutdown() const {
+ return ! (srv_shutdown_state == SRV_SHUTDOWN_NONE &&
+ thread_no < srv_n_fil_crypt_threads);
+ }
+};
+
+/***********************************************************************
+Update global statistics with thread statistics */
+static void
+fil_crypt_update_total_stat(rotate_thread_t *state)
+{
+ mutex_enter(&crypt_stat_mutex);
+ crypt_stat.pages_read_from_cache +=
+ state->crypt_stat.pages_read_from_cache;
+ crypt_stat.pages_read_from_disk +=
+ state->crypt_stat.pages_read_from_disk;
+ crypt_stat.pages_modified += state->crypt_stat.pages_modified;
+ crypt_stat.pages_flushed += state->crypt_stat.pages_flushed;
+ // remote old estimate
+ crypt_stat.estimated_iops -= state->crypt_stat.estimated_iops;
+ // add new estimate
+ crypt_stat.estimated_iops += state->estimated_max_iops;
+ mutex_exit(&crypt_stat_mutex);
+
+ // make new estimate "current" estimate
+ memset(&state->crypt_stat, 0, sizeof(state->crypt_stat));
+ // record our old (current) estimate
+ state->crypt_stat.estimated_iops = state->estimated_max_iops;
+}
+
+/***********************************************************************
+Allocate iops to thread from global setting,
+used before starting to rotate a space */
+static bool
+fil_crypt_alloc_iops(rotate_thread_t *state)
+{
+ ut_ad(state->allocated_iops == 0);
+
+ uint max_iops = state->estimated_max_iops;
+ mutex_enter(&fil_crypt_threads_mutex);
+ if (n_fil_crypt_iops_allocated >= srv_n_fil_crypt_iops) {
+ /* this can happen when user decreases srv_fil_crypt_iops */
+ mutex_exit(&fil_crypt_threads_mutex);
+ return false;
+ }
+
+ uint alloc = srv_n_fil_crypt_iops - n_fil_crypt_iops_allocated;
+ if (alloc > max_iops)
+ alloc = max_iops;
+
+ n_fil_crypt_iops_allocated += alloc;
+ mutex_exit(&fil_crypt_threads_mutex);
+
+ state->allocated_iops = alloc;
+
+ return alloc > 0;
+}
+
+/***********************************************************************
+Reallocate iops to thread,
+used when inside a space */
+static void
+fil_crypt_realloc_iops(rotate_thread_t *state)
+{
+ ut_a(state->allocated_iops > 0);
+
+ if (10 * state->cnt_waited > state->batch) {
+ /* if we waited more than 10% re-estimate max_iops */
+ uint avg_wait_time_us =
+ state->sum_waited_us / state->cnt_waited;
+
+#if DEBUG_KEYROTATION_THROTTLING
+ fprintf(stderr,
+ "thr_no: %u - update estimated_max_iops from %u to %u\n",
+ state->thread_no,
+ state->estimated_max_iops,
+ 1000000 / avg_wait_time_us);
+#endif
+ if (avg_wait_time_us == 0)
+ avg_wait_time_us = 1; // prevent division by zero
+
+ state->estimated_max_iops = 1000000 / avg_wait_time_us;
+ state->cnt_waited = 0;
+ state->sum_waited_us = 0;
+ } else {
+#if DEBUG_KEYROTATION_THROTTLING
+ fprintf(stderr,
+ "thr_no: %u only waited %lu%% skip re-estimate\n",
+ state->thread_no,
+ (100 * state->cnt_waited) / state->batch);
+#endif
+ }
+
+ if (state->estimated_max_iops <= state->allocated_iops) {
+ /* return extra iops */
+ uint extra = state->allocated_iops - state->estimated_max_iops;
+
+ if (extra > 0) {
+ mutex_enter(&fil_crypt_threads_mutex);
+ if (n_fil_crypt_iops_allocated < extra) {
+ /* unknown bug!
+ * crash in debug
+ * keep n_fil_crypt_iops_allocated unchanged
+ * in release */
+ ut_ad(0);
+ extra = 0;
+ }
+ n_fil_crypt_iops_allocated -= extra;
+ state->allocated_iops -= extra;
+
+ if (state->allocated_iops == 0) {
+ /* no matter how slow io system seems to be
+ * never decrease allocated_iops to 0... */
+ state->allocated_iops ++;
+ n_fil_crypt_iops_allocated ++;
+ }
+ mutex_exit(&fil_crypt_threads_mutex);
+ os_event_set(fil_crypt_threads_event);
+ }
+ } else {
+ /* see if there are more to get */
+ mutex_enter(&fil_crypt_threads_mutex);
+ if (n_fil_crypt_iops_allocated < srv_n_fil_crypt_iops) {
+ /* there are extra iops free */
+ uint extra = srv_n_fil_crypt_iops -
+ n_fil_crypt_iops_allocated;
+ if (state->allocated_iops + extra >
+ state->estimated_max_iops) {
+ /* but don't alloc more than our max */
+ extra = state->estimated_max_iops -
+ state->allocated_iops;
+ }
+ n_fil_crypt_iops_allocated += extra;
+ state->allocated_iops += extra;
+#if DEBUG_KEYROTATION_THROTTLING
+ fprintf(stderr,
+ "thr_no: %u increased iops from %u to %u\n",
+ state->thread_no,
+ state->allocated_iops - extra,
+ state->allocated_iops);
+#endif
+ }
+ mutex_exit(&fil_crypt_threads_mutex);
+ }
+
+ fil_crypt_update_total_stat(state);
+}
+
+/***********************************************************************
+Return allocated iops to global */
+static void
+fil_crypt_return_iops(rotate_thread_t *state)
+{
+ if (state->allocated_iops > 0) {
+ uint iops = state->allocated_iops;
+ mutex_enter(&fil_crypt_threads_mutex);
+ if (n_fil_crypt_iops_allocated < iops) {
+ /* unknown bug!
+ * crash in debug
+ * keep n_fil_crypt_iops_allocated unchanged
+ * in release */
+ ut_ad(0);
+ iops = 0;
+ }
+ n_fil_crypt_iops_allocated -= iops;
+ mutex_exit(&fil_crypt_threads_mutex);
+ state->allocated_iops = 0;
+ os_event_set(fil_crypt_threads_event);
+ }
+
+ fil_crypt_update_total_stat(state);
+}
+
+/***********************************************************************
+Search for a space needing rotation */
+bool
+fil_crypt_find_space_to_rotate(
+ const key_state_t *key_state,
+ rotate_thread_t *state,
+ bool *recheck)
+{
+ /* we need iops to start rotating */
+ while (!state->should_shutdown() && !fil_crypt_alloc_iops(state)) {
+ os_event_reset(fil_crypt_threads_event);
+ os_event_wait_time(fil_crypt_threads_event, 1000000);
+ }
+
+ if (state->should_shutdown())
+ return false;
+
+ if (state->first) {
+ state->first = false;
+ state->space = fil_get_first_space();
+ } else {
+ state->space = fil_get_next_space(state->space);
+ }
+
+ while (!state->should_shutdown() && state->space != ULINT_UNDEFINED) {
+
+ ulint space = state->space;
+ if (fil_crypt_space_needs_rotation(space, key_state, recheck)) {
+ /* init state->min_key_version_found before
+ * starting on a space */
+ state->min_key_version_found = key_state->key_version;
+ return true;
+ }
+
+ state->space = fil_get_next_space(space);
+ }
+
+ /* if we didn't find any space return iops */
+ fil_crypt_return_iops(state);
+
+ return false;
+
+}
+
+/***********************************************************************
+Start rotating a space */
+static
+void
+fil_crypt_start_rotate_space(
+ const key_state_t *key_state,
+ rotate_thread_t *state)
+{
+ ulint space = state->space;
+ fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space);
+ mutex_enter(&crypt_data->mutex);
+ if (crypt_data->rotate_state.active_threads == 0) {
+ /* only first thread needs to init */
+ crypt_data->rotate_state.next_offset = 1; // skip page 0
+ /* no need to rotate beyond current max
+ * if space extends, it will be encrypted with newer version */
+ crypt_data->rotate_state.max_offset = fil_space_get_size(space);
+
+ crypt_data->rotate_state.end_lsn = 0;
+ crypt_data->rotate_state.min_key_version_found =
+ key_state->key_version;
+
+ crypt_data->rotate_state.start_time = time(0);
+ }
+
+ /* count active threads in space */
+ crypt_data->rotate_state.active_threads++;
+
+ /* Initialize thread local state */
+ state->end_lsn = crypt_data->rotate_state.end_lsn;
+ state->min_key_version_found =
+ crypt_data->rotate_state.min_key_version_found;
+
+ /* inform scrubbing */
+ crypt_data->rotate_state.scrubbing.is_active =
+ btr_scrub_start_space(space, &state->scrub_data);
+
+ mutex_exit(&crypt_data->mutex);
+}
+
+/***********************************************************************
+Search for batch of pages needing rotation */
+static
+bool
+fil_crypt_find_page_to_rotate(
+ const key_state_t *key_state,
+ rotate_thread_t *state)
+{
+ ulint batch = srv_alloc_time * state->allocated_iops;
+ ulint space = state->space;
+ fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space);
+ mutex_enter(&crypt_data->mutex);
+ if (crypt_data->closing == false &&
+ crypt_data->rotate_state.next_offset <
+ crypt_data->rotate_state.max_offset) {
+
+ state->offset = crypt_data->rotate_state.next_offset;
+ ulint remaining = crypt_data->rotate_state.max_offset -
+ crypt_data->rotate_state.next_offset;
+
+ if (batch <= remaining)
+ state->batch = batch;
+ else
+ state->batch = remaining;
+
+ crypt_data->rotate_state.next_offset += batch;
+ mutex_exit(&crypt_data->mutex);
+ return true;
+ }
+
+ mutex_exit(&crypt_data->mutex);
+ return false;
+}
+
+/***********************************************************************
+Check if a page is uninitialized (doesn't need to be rotated) */
+static bool
+fil_crypt_is_page_uninitialized(const byte* frame, uint zip_size)
+{
+ if (zip_size) {
+ ulint stored_checksum = mach_read_from_4(
+ frame + FIL_PAGE_SPACE_OR_CHKSUM);
+ /* empty pages aren't encrypted */
+ if (stored_checksum == 0) {
+ return true;
+ }
+ } else {
+ ulint size = UNIV_PAGE_SIZE;
+ ulint checksum_field1 = mach_read_from_4(
+ frame + FIL_PAGE_SPACE_OR_CHKSUM);
+ ulint checksum_field2 = mach_read_from_4(
+ frame + size - FIL_PAGE_END_LSN_OLD_CHKSUM);
+ /* empty pages are not encrypted */
+ if (checksum_field1 == 0 && checksum_field2 == 0
+ && mach_read_from_4(frame + FIL_PAGE_LSN) == 0) {
+ return true;
+ }
+ }
+ return false;
+}
+
+#define fil_crypt_get_page_throttle(state,space,zip_size,offset,mtr,sleeptime_ms) \
+ fil_crypt_get_page_throttle_func(state, space, zip_size, offset, mtr, \
+ sleeptime_ms, __FILE__, __LINE__)
+
+/***********************************************************************
+Get a page and compute sleep time */
+static
+buf_block_t*
+fil_crypt_get_page_throttle_func(rotate_thread_t *state,
+ ulint space, uint zip_size, ulint offset,
+ mtr_t *mtr,
+ ulint *sleeptime_ms,
+ const char *file,
+ ulint line)
+{
+ buf_block_t* block = buf_page_try_get_func(space, offset, RW_X_LATCH,
+ true,
+ file, line, mtr);
+ if (block != NULL) {
+ /* page was in buffer pool */
+ state->crypt_stat.pages_read_from_cache++;
+ return block;
+ }
+
+ state->crypt_stat.pages_read_from_disk++;
+
+ ullint start = ut_time_us(NULL);
+ block = buf_page_get_gen(space, zip_size, offset,
+ RW_X_LATCH,
+ NULL, BUF_GET_POSSIBLY_FREED,
+ file, line, mtr);
+ ullint end = ut_time_us(NULL);
+
+ if (end < start) {
+ end = start; // safety...
+ }
+
+ state->cnt_waited++;
+ state->sum_waited_us += (end - start);
+
+ /* average page load */
+ ulint add_sleeptime_ms = 0;
+ ulint avg_wait_time_us = state->sum_waited_us / state->cnt_waited;
+ ulint alloc_wait_us = 1000000 / state->allocated_iops;
+ if (avg_wait_time_us < alloc_wait_us) {
+ /* we reading faster than we allocated */
+ add_sleeptime_ms = (alloc_wait_us - avg_wait_time_us) / 1000;
+ } else {
+ /* if page load time is longer than we want, skip sleeping */
+ }
+
+ *sleeptime_ms += add_sleeptime_ms;
+ return block;
+}
+
+
+/***********************************************************************
+Get block and allocation status
+
+note: innodb locks fil_space_latch and then block when allocating page
+but locks block and then fil_space_latch when freeing page.
+*/
+static
+buf_block_t*
+btr_scrub_get_block_and_allocation_status(
+ rotate_thread_t *state,
+ ulint space,
+ ulint zip_size,
+ ulint offset,
+ mtr_t *mtr,
+ btr_scrub_page_allocation_status_t *allocation_status,
+ ulint *sleeptime_ms)
+{
+ mtr_t local_mtr;
+ buf_block_t *block = NULL;
+ mtr_start(&local_mtr);
+ *allocation_status = fsp_page_is_free(space, offset, &local_mtr) ?
+ BTR_SCRUB_PAGE_FREE :
+ BTR_SCRUB_PAGE_ALLOCATED;
+
+ if (*allocation_status == BTR_SCRUB_PAGE_FREE) {
+ /* this is easy case, we lock fil_space_latch first and
+ then block */
+ block = fil_crypt_get_page_throttle(state,
+ space, zip_size,
+ offset, mtr,
+ sleeptime_ms);
+ mtr_commit(&local_mtr);
+ } else {
+ /* page is allocated according to xdes */
+
+ /* release fil_space_latch *before* fetching block */
+ mtr_commit(&local_mtr);
+
+ /* NOTE: when we have locked dict_index_get_lock(),
+ * it's safe to release fil_space_latch and then fetch block
+ * as dict_index_get_lock() is needed to make tree modifications
+ * such as free-ing a page
+ */
+
+ block = fil_crypt_get_page_throttle(state,
+ space, zip_size,
+ offset, mtr,
+ sleeptime_ms);
+ }
+
+ return block;
+}
+
+
+/***********************************************************************
+Rotate one page */
+static
+void
+fil_crypt_rotate_page(
+ const key_state_t *key_state,
+ rotate_thread_t *state)
+{
+ ulint space = state->space;
+ ulint offset = state->offset;
+ const uint zip_size = fil_space_get_zip_size(space);
+ ulint sleeptime_ms = 0;
+
+ /* check if tablespace is closing before reading page */
+ if (fil_crypt_is_closing(space))
+ return;
+
+ if (space == TRX_SYS_SPACE && offset == TRX_SYS_PAGE_NO) {
+ /* don't encrypt this as it contains address to dblwr buffer */
+ return;
+ }
+
+ mtr_t mtr;
+ mtr_start(&mtr);
+ buf_block_t* block = fil_crypt_get_page_throttle(state,
+ space, zip_size,
+ offset, &mtr,
+ &sleeptime_ms);
+
+ bool modified = false;
+ int needs_scrubbing = BTR_SCRUB_SKIP_PAGE;
+ lsn_t block_lsn = block->page.newest_modification;
+ uint kv = block->page.key_version;
+
+ /* check if tablespace is closing after reading page */
+ if (!fil_crypt_is_closing(space)) {
+ byte* frame = buf_block_get_frame(block);
+ fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space);
+
+ if (kv == 0 &&
+ fil_crypt_is_page_uninitialized(frame, zip_size)) {
+ ;
+ } else if (fil_crypt_needs_rotation(kv, key_state)) {
+
+ /* page can be "fresh" i.e never written in case
+ * kv == 0 or it should have a key version at least
+ * as big as the space minimum key version*/
+ ut_a(kv == 0 || kv >= crypt_data->min_key_version);
+
+ modified = true;
+
+ /* force rotation by dummy updating page */
+ mlog_write_ulint(frame +
+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
+ space, MLOG_4BYTES, &mtr);
+
+ /* update block */
+ block->page.key_version = key_state->key_version;
+
+ /* statistics */
+ state->crypt_stat.pages_modified++;
+ } else {
+ ut_a(kv >= crypt_data->min_key_version ||
+ (kv == 0 && key_state->key_version == 0));
+
+ if (kv < state->min_key_version_found) {
+ state->min_key_version_found = kv;
+ }
+ }
+
+ needs_scrubbing = btr_page_needs_scrubbing(
+ &state->scrub_data, block,
+ BTR_SCRUB_PAGE_ALLOCATION_UNKNOWN);
+ }
+
+ mtr_commit(&mtr);
+ lsn_t end_lsn = mtr.end_lsn;
+
+ if (needs_scrubbing == BTR_SCRUB_PAGE) {
+ mtr_start(&mtr);
+ /*
+ * refetch page and allocation status
+ */
+ btr_scrub_page_allocation_status_t allocated;
+ block = btr_scrub_get_block_and_allocation_status(
+ state, space, zip_size, offset, &mtr,
+ &allocated,
+ &sleeptime_ms);
+
+ /* get required table/index and index-locks */
+ needs_scrubbing = btr_scrub_recheck_page(
+ &state->scrub_data, block, allocated, &mtr);
+
+ if (needs_scrubbing == BTR_SCRUB_PAGE) {
+ /* we need to refetch it once more now that we have
+ * index locked */
+ block = btr_scrub_get_block_and_allocation_status(
+ state, space, zip_size, offset, &mtr,
+ &allocated,
+ &sleeptime_ms);
+
+ needs_scrubbing = btr_scrub_page(&state->scrub_data,
+ block, allocated,
+ &mtr);
+ }
+
+ /* NOTE: mtr is committed inside btr_scrub_recheck_page()
+ * and/or btr_scrub_page. This is to make sure that
+ * locks & pages are latched in corrected order,
+ * the mtr is in some circumstances restarted.
+ * (mtr_commit() + mtr_start())
+ */
+ }
+
+ if (needs_scrubbing != BTR_SCRUB_PAGE) {
+ /* if page didn't need scrubbing it might be that cleanups
+ are needed. do those outside of any mtr to prevent deadlocks.
+
+ the information what kinds of cleanups that are needed are
+ encoded inside the needs_scrubbing, but this is opaque to
+ this function (except the value BTR_SCRUB_PAGE) */
+ btr_scrub_skip_page(&state->scrub_data, needs_scrubbing);
+ }
+
+ if (needs_scrubbing == BTR_SCRUB_TURNED_OFF) {
+ /* if we just detected that scrubbing was turned off
+ * update global state to reflect this */
+ fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space);
+ mutex_enter(&crypt_data->mutex);
+ crypt_data->rotate_state.scrubbing.is_active = false;
+ mutex_exit(&crypt_data->mutex);
+ }
+
+ if (modified) {
+ /* if we modified page, we take lsn from mtr */
+ ut_a(end_lsn > state->end_lsn);
+ ut_a(end_lsn > block_lsn);
+ state->end_lsn = end_lsn;
+ } else {
+ /* if we did not modify page, check for max lsn */
+ if (block_lsn > state->end_lsn) {
+ state->end_lsn = block_lsn;
+ }
+ }
+
+ if (sleeptime_ms) {
+ os_event_reset(fil_crypt_throttle_sleep_event);
+ os_event_wait_time(fil_crypt_throttle_sleep_event,
+ 1000 * sleeptime_ms);
+ }
+}
+
+/***********************************************************************
+Rotate a batch of pages */
+static
+void
+fil_crypt_rotate_pages(
+ const key_state_t *key_state,
+ rotate_thread_t *state)
+{
+ ulint space = state->space;
+ ulint end = state->offset + state->batch;
+ for (; state->offset < end; state->offset++) {
+
+ /* we can't rotate pages in dblwr buffer as
+ * it's not possible to read those due to lots of asserts
+ * in buffer pool.
+ *
+ * However since these are only (short-lived) copies of
+ * real pages, they will be updated anyway when the
+ * real page is updated
+ */
+ if (space == TRX_SYS_SPACE &&
+ buf_dblwr_page_inside(state->offset)) {
+ continue;
+ }
+
+ fil_crypt_rotate_page(key_state, state);
+ }
+}
+
+/***********************************************************************
+Flush rotated pages and then update page 0 */
+static
+void
+fil_crypt_flush_space(rotate_thread_t *state, ulint space)
+{
+ fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space);
+
+ /* flush tablespace pages so that there are no pages left with old key */
+ lsn_t end_lsn = crypt_data->rotate_state.end_lsn;
+ if (end_lsn > 0 && !fil_crypt_is_closing(space)) {
+ bool success = false;
+ ulint n_pages = 0;
+ ulint sum_pages = 0;
+ ullint start = ut_time_us(NULL);
+ do {
+ success = buf_flush_list(ULINT_MAX, end_lsn, &n_pages);
+ buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
+ sum_pages += n_pages;
+ } while (!success && !fil_crypt_is_closing(space));
+ ullint end = ut_time_us(NULL);
+ if (sum_pages && end > start) {
+ state->cnt_waited += sum_pages;
+ state->sum_waited_us += (end - start);
+
+ /* statistics */
+ state->crypt_stat.pages_flushed += sum_pages;
+ }
+ }
+
+ if (crypt_data->min_key_version == 0) {
+ crypt_data->type = CRYPT_SCHEME_UNENCRYPTED;
+ }
+
+ /* update page 0 */
+ if (!fil_crypt_is_closing(space)) {
+ mtr_t mtr;
+ mtr_start(&mtr);
+ ulint offset = 0; // page 0
+ const uint zip_size = fil_space_get_zip_size(space);
+ buf_block_t* block = buf_page_get_gen(space, zip_size, offset,
+ RW_X_LATCH, NULL, BUF_GET,
+ __FILE__, __LINE__, &mtr);
+ byte* frame = buf_block_get_frame(block);
+ fil_space_write_crypt_data(space, frame,
+ crypt_data->page0_offset,
+ ULINT_MAX, &mtr);
+ mtr_commit(&mtr);
+ }
+}
+
+/***********************************************************************
+Complete rotating a space */
+static
+void
+fil_crypt_complete_rotate_space(
+ const key_state_t *key_state,
+ rotate_thread_t *state)
+{
+ ulint space = state->space;
+ fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space);
+ mutex_enter(&crypt_data->mutex);
+
+ /**
+ * Update crypt data state with state from thread
+ */
+ if (state->min_key_version_found <
+ crypt_data->rotate_state.min_key_version_found) {
+ crypt_data->rotate_state.min_key_version_found =
+ state->min_key_version_found;
+ }
+
+ if (state->end_lsn > crypt_data->rotate_state.end_lsn) {
+ crypt_data->rotate_state.end_lsn = state->end_lsn;
+ }
+
+ ut_a(crypt_data->rotate_state.active_threads > 0);
+ crypt_data->rotate_state.active_threads--;
+ bool last = crypt_data->rotate_state.active_threads == 0;
+
+ /**
+ * check if space is fully done
+ * this as when threads shutdown, it could be that we "complete"
+ * iterating before we have scanned the full space.
+ */
+ bool done = crypt_data->rotate_state.next_offset >=
+ crypt_data->rotate_state.max_offset;
+
+ /**
+ * we should flush space if we're last thread AND
+ * the iteration is done
+ */
+ bool should_flush = last && done;
+
+ if (should_flush) {
+ /* we're the last active thread */
+ crypt_data->rotate_state.flushing = true;
+ crypt_data->min_key_version =
+ crypt_data->rotate_state.min_key_version_found;
+ }
+
+ /* inform scrubbing */
+ crypt_data->rotate_state.scrubbing.is_active = false;
+ mutex_exit(&crypt_data->mutex);
+
+ /* all threads must call btr_scrub_complete_space wo/ mutex held */
+ if (btr_scrub_complete_space(&state->scrub_data) == true) {
+ if (should_flush) {
+ /* only last thread updates last_scrub_completed */
+ mutex_enter(&crypt_data->mutex);
+ crypt_data->rotate_state.scrubbing.
+ last_scrub_completed = time(0);
+ mutex_exit(&crypt_data->mutex);
+ }
+ }
+
+ if (should_flush) {
+ fil_crypt_flush_space(state, space);
+
+ mutex_enter(&crypt_data->mutex);
+ crypt_data->rotate_state.flushing = false;
+ mutex_exit(&crypt_data->mutex);
+ }
+}
+
+/*********************************************************************//**
+A thread which monitors global key state and rotates tablespaces accordingly
+@return a dummy parameter */
+extern "C" UNIV_INTERN
+os_thread_ret_t
+DECLARE_THREAD(fil_crypt_thread)(
+/*===============================*/
+ void* arg __attribute__((unused))) /*!< in: a dummy parameter required
+ * by os_thread_create */
+{
+ UT_NOT_USED(arg);
+
+ mutex_enter(&fil_crypt_threads_mutex);
+ uint thread_no = srv_n_fil_crypt_threads_started;
+ srv_n_fil_crypt_threads_started++;
+ mutex_exit(&fil_crypt_threads_mutex);
+ os_event_set(fil_crypt_event); /* signal that we started */
+
+ /* state of this thread */
+ rotate_thread_t thr(thread_no);
+
+ /* if we find a space that is starting, skip over it and recheck it later */
+ bool recheck = false;
+
+ key_state_t key_state;
+ fil_crypt_get_key_state(&key_state);
+
+ /* make sure that thread always checks all tablespace when starting.
+ *
+ * by decreasing key_version, loop that waits for change in key-state
+ * should exit immediately causing thread to check all spaces when starting */
+ key_state.key_version--;
+
+ while (!thr.should_shutdown()) {
+
+ key_state_t new_state;
+ fil_crypt_get_key_state(&new_state);
+
+ time_t wait_start = time(0);
+ while (!thr.should_shutdown() && key_state == new_state) {
+
+ /* wait for key state changes
+ * i.e either new key version of change or
+ * new rotate_key_age */
+ os_event_reset(fil_crypt_threads_event);
+ os_event_wait_time(fil_crypt_threads_event, 1000000);
+ fil_crypt_get_key_state(&new_state);
+
+ if (recheck) {
+ /* check recheck here, after sleep, so
+ * that we don't busy loop while when one thread is starting
+ * a space*/
+ break;
+ }
+
+ time_t waited = time(0) - wait_start;
+ if (waited >= srv_background_scrub_data_check_interval)
+ break;
+ }
+
+ recheck = false;
+ thr.first = true; // restart from first tablespace
+ key_state = new_state; // save for next loop
+
+ /* iterate all spaces searching for those needing rotation */
+ while (!thr.should_shutdown() &&
+ fil_crypt_find_space_to_rotate(&new_state, &thr, &recheck)) {
+
+ /* we found a space to rotate */
+ fil_crypt_start_rotate_space(&new_state, &thr);
+
+ /* decrement pending ops that was incremented in
+ * fil_crypt_space_needs_rotation
+ * (called from fil_crypt_find_space_to_rotate),
+ * this makes sure that tablespace won't be dropped
+ * just after we decided to start processing it. */
+ fil_decr_pending_ops(thr.space);
+
+ /* iterate all pages (cooperativly with other threads) */
+ while (!thr.should_shutdown() &&
+ fil_crypt_find_page_to_rotate(&new_state, &thr)) {
+
+ /* rotate a (set) of pages */
+ fil_crypt_rotate_pages(&new_state, &thr);
+
+ /* realloc iops */
+ fil_crypt_realloc_iops(&thr);
+ }
+
+ /* complete rotation */
+ fil_crypt_complete_rotate_space(&new_state, &thr);
+
+ /* refresh key state */
+ fil_crypt_get_key_state(&new_state);
+
+ /* return iops */
+ fil_crypt_return_iops(&thr);
+ }
+ }
+
+ /* return iops if shutting down */
+ fil_crypt_return_iops(&thr);
+
+ mutex_enter(&fil_crypt_threads_mutex);
+ srv_n_fil_crypt_threads_started--;
+ mutex_exit(&fil_crypt_threads_mutex);
+ os_event_set(fil_crypt_event); /* signal that we stopped */
+
+ /* We count the number of threads in os_thread_exit(). A created
+ thread should always use that to exit and not use return() to exit. */
+
+ os_thread_exit(NULL);
+
+ OS_THREAD_DUMMY_RETURN;
+}
+
+/*********************************************************************
+Adjust thread count for key rotation */
+UNIV_INTERN
+void
+fil_crypt_set_thread_cnt(uint new_cnt) {
+ if (new_cnt > srv_n_fil_crypt_threads) {
+ uint add = new_cnt - srv_n_fil_crypt_threads;
+ srv_n_fil_crypt_threads = new_cnt;
+ for (uint i = 0; i < add; i++) {
+ os_thread_create(fil_crypt_thread, NULL, NULL);
+ }
+ } else if (new_cnt < srv_n_fil_crypt_threads) {
+ srv_n_fil_crypt_threads = new_cnt;
+ os_event_set(fil_crypt_threads_event);
+ }
+
+ while(srv_n_fil_crypt_threads_started != srv_n_fil_crypt_threads) {
+ os_event_reset(fil_crypt_event);
+ os_event_wait_time(fil_crypt_event, 1000000);
+ }
+}
+
+/*********************************************************************
+Adjust max key age */
+UNIV_INTERN
+void
+fil_crypt_set_rotate_key_age(uint val)
+{
+ srv_fil_crypt_rotate_key_age = val;
+ os_event_set(fil_crypt_threads_event);
+}
+
+/*********************************************************************
+Adjust rotation iops */
+UNIV_INTERN
+void
+fil_crypt_set_rotation_iops(uint val)
+{
+ srv_n_fil_crypt_iops = val;
+ os_event_set(fil_crypt_threads_event);
+}
+
+/*********************************************************************
+Init threads for key rotation */
+UNIV_INTERN
+void
+fil_crypt_threads_init()
+{
+ fil_crypt_event = os_event_create();
+ fil_crypt_threads_event = os_event_create();
+ mutex_create(fil_crypt_threads_mutex_key,
+ &fil_crypt_threads_mutex, SYNC_NO_ORDER_CHECK);
+
+ uint cnt = srv_n_fil_crypt_threads;
+ srv_n_fil_crypt_threads = 0;
+ fil_crypt_set_thread_cnt(cnt);
+}
+
+/*********************************************************************
+End threads for key rotation */
+UNIV_INTERN
+void
+fil_crypt_threads_end()
+{
+ /* stop threads */
+ fil_crypt_set_thread_cnt(0);
+}
+
+/*********************************************************************
+Clean up key rotation threads resources */
+UNIV_INTERN
+void
+fil_crypt_threads_cleanup() {
+ os_event_free(fil_crypt_event);
+ os_event_free(fil_crypt_threads_event);
+}
+
+/*********************************************************************
+Mark a space as closing */
+UNIV_INTERN
+void
+fil_space_crypt_mark_space_closing(
+ ulint space)
+{
+ mutex_enter(&fil_crypt_threads_mutex);
+ fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space);
+ if (crypt_data == NULL) {
+ mutex_exit(&fil_crypt_threads_mutex);
+ return;
+ }
+
+ mutex_enter(&crypt_data->mutex);
+ mutex_exit(&fil_crypt_threads_mutex);
+ crypt_data->closing = true;
+ mutex_exit(&crypt_data->mutex);
+}
+
+/*********************************************************************
+Wait for crypt threads to stop accessing space */
+UNIV_INTERN
+void
+fil_space_crypt_close_tablespace(
+ ulint space)
+{
+ mutex_enter(&fil_crypt_threads_mutex);
+ fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space);
+ if (crypt_data == NULL) {
+ mutex_exit(&fil_crypt_threads_mutex);
+ return;
+ }
+
+ uint start = time(0);
+ uint last = start;
+ mutex_enter(&crypt_data->mutex);
+ mutex_exit(&fil_crypt_threads_mutex);
+ crypt_data->closing = true;
+ uint cnt = crypt_data->rotate_state.active_threads;
+ bool flushing = crypt_data->rotate_state.flushing;
+ while (cnt > 0 || flushing) {
+ mutex_exit(&crypt_data->mutex);
+ /* release dict mutex so that scrub threads can release their
+ * table references */
+ dict_mutex_exit_for_mysql();
+ /* wakeup throttle (all) sleepers */
+ os_event_set(fil_crypt_throttle_sleep_event);
+ os_thread_sleep(20000);
+ dict_mutex_enter_for_mysql();
+ mutex_enter(&crypt_data->mutex);
+ cnt = crypt_data->rotate_state.active_threads;
+ flushing = crypt_data->rotate_state.flushing;
+
+ uint now = time(0);
+ if (now >= last + 30) {
+ fprintf(stderr,
+ "WARNING: "
+ "waited %u seconds to drop space: %lu\n",
+ now - start, space);
+ last = now;
+ }
+ }
+ mutex_exit(&crypt_data->mutex);
+}
+
+/*********************************************************************
+Get crypt status for a space (used by information_schema)
+return 0 if crypt data present */
+int
+fil_space_crypt_get_status(
+/*==================*/
+ ulint id, /*!< in: space id */
+ struct fil_space_crypt_status_t* status) /*!< out: status */
+{
+ fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(id);
+
+ if (crypt_data != NULL) {
+ status->space = id;
+ status->scheme = crypt_data->type;
+ mutex_enter(&crypt_data->mutex);
+ status->keyserver_requests = crypt_data->keyserver_requests;
+ status->min_key_version = crypt_data->min_key_version;
+ if (crypt_data->rotate_state.active_threads > 0 ||
+ crypt_data->rotate_state.flushing) {
+ status->rotating = true;
+ status->flushing =
+ crypt_data->rotate_state.flushing;
+ status->rotate_next_page_number =
+ crypt_data->rotate_state.next_offset;
+ status->rotate_max_page_number =
+ crypt_data->rotate_state.max_offset;
+ } else {
+ status->rotating = false;
+ }
+ mutex_exit(&crypt_data->mutex);
+ } else {
+ memset(status, 0, sizeof(*status));
+ }
+
+ if (srv_encrypt_tables == TRUE) {
+ status->current_key_version = GetLatestCryptoKeyVersion();
+ } else {
+ status->current_key_version = 0;
+ }
+ return crypt_data == NULL ? 1 : 0;
+}
+
+/*********************************************************************
+Return crypt statistics */
+void
+fil_crypt_total_stat(fil_crypt_stat_t *stat)
+{
+ mutex_enter(&crypt_stat_mutex);
+ *stat = crypt_stat;
+ mutex_exit(&crypt_stat_mutex);
+}
+
+/*********************************************************************
+Get scrub status for a space (used by information_schema)
+return 0 if data found */
+int
+fil_space_get_scrub_status(
+/*==================*/
+ ulint id, /*!< in: space id */
+ struct fil_space_scrub_status_t* status) /*!< out: status */
+{
+ fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(id);
+ memset(status, 0, sizeof(*status));
+ if (crypt_data != NULL) {
+ status->space = id;
+ status->compressed = fil_space_get_zip_size(id) > 0;
+ mutex_enter(&crypt_data->mutex);
+ status->last_scrub_completed =
+ crypt_data->rotate_state.scrubbing.last_scrub_completed;
+ if (crypt_data->rotate_state.active_threads > 0 &&
+ crypt_data->rotate_state.scrubbing.is_active) {
+ status->scrubbing = true;
+ status->current_scrub_started =
+ crypt_data->rotate_state.start_time;
+ status->current_scrub_active_threads =
+ crypt_data->rotate_state.active_threads;
+ status->current_scrub_page_number =
+ crypt_data->rotate_state.next_offset;
+ status->current_scrub_max_page_number =
+ crypt_data->rotate_state.max_offset;
+ } else {
+ status->scrubbing = false;
+ }
+ mutex_exit(&crypt_data->mutex);
+ } else {
+ memset(status, 0, sizeof(*status));
+ }
+
+ return crypt_data == NULL ? 1 : 0;
+}
diff --git a/storage/xtradb/fil/fil0fil.cc b/storage/xtradb/fil/fil0fil.cc
index 08487f595ed..e4be4f6910c 100644
--- a/storage/xtradb/fil/fil0fil.cc
+++ b/storage/xtradb/fil/fil0fil.cc
@@ -56,6 +56,10 @@ Created 10/25/1995 Heikki Tuuri
static ulint srv_data_read, srv_data_written;
#endif /* !UNIV_HOTBACKUP */
#include "fil0pagecompress.h"
+
+#include "fil0pageencryption.h"
+#include "fsp0pageencryption.h"
+
#include "zlib.h"
#ifdef __linux__
#include <linux/fs.h>
@@ -645,8 +649,23 @@ fil_node_open_file(
success = os_file_read(node->handle, page, 0, UNIV_PAGE_SIZE,
space->flags);
+ if (fil_page_encryption_status(page)) {
+ /* if page is (still) encrypted, write an error and return.
+ * Otherwise the server would crash if decrypting is not possible.
+ * This may be the case, if the key file could not be
+ * opened on server startup.
+ */
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "InnoDB: can not decrypt page, because "
+ "keys could not be read.\n"
+ );
+ return false;
+
+ }
+
space_id = fsp_header_get_space_id(page);
flags = fsp_header_get_flags(page);
+
page_size = fsp_flags_get_page_size(flags);
atomic_writes = fsp_flags_get_atomic_writes(flags);
@@ -1157,7 +1176,8 @@ fil_space_create(
const char* name, /*!< in: space name */
ulint id, /*!< in: space id */
ulint flags, /*!< in: tablespace flags */
- ulint purpose)/*!< in: FIL_TABLESPACE, or FIL_LOG if log */
+ ulint purpose,/*!< in: FIL_TABLESPACE, or FIL_LOG if log */
+ fil_space_crypt_t* crypt_data) /*!< in: crypt data */
{
fil_space_t* space;
@@ -1165,6 +1185,21 @@ fil_space_create(
ut_a(fil_system);
+ if (fsp_flags_is_page_encrypted(flags)) {
+ if (!HasCryptoKey(fsp_flags_get_page_encryption_key(flags))) {
+ /* by returning here it should be avoided that
+ * the server crashes, if someone tries to access an
+ * encrypted table and the encryption key is not available.
+ * The the table is treaded as non-existent.
+ */
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Tablespace '%s' can not be opened, because "
+ " encryption key can not be found (space id: %lu, key %lu)\n"
+ , name, (ulong) id, fsp_flags_get_page_encryption_key(flags));
+ return (FALSE);
+ }
+ }
+
/* Look for a matching tablespace and if found free it. */
do {
mutex_enter(&fil_system->mutex);
@@ -1253,6 +1288,8 @@ fil_space_create(
UT_LIST_ADD_LAST(space_list, fil_system->space_list, space);
+ space->crypt_data = crypt_data;
+
mutex_exit(&fil_system->mutex);
return(TRUE);
@@ -1387,6 +1424,8 @@ fil_space_free(
rw_lock_free(&(space->latch));
+ fil_space_destroy_crypt_data(&(space->crypt_data));
+
mem_free(space->name);
mem_free(space);
@@ -1620,6 +1659,8 @@ fil_init(
UT_LIST_INIT(fil_system->LRU);
fil_system->max_n_open = max_n_open;
+
+ fil_space_crypt_init();
}
/*******************************************************************//**
@@ -1827,7 +1868,8 @@ fil_write_lsn_and_arch_no_to_file(
err = fil_read(TRUE, space, 0, sum_of_sizes, 0,
UNIV_PAGE_SIZE, buf, NULL, 0);
if (err == DB_SUCCESS) {
- mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn);
+ mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION,
+ lsn);
err = fil_write(TRUE, space, 0, sum_of_sizes, 0,
UNIV_PAGE_SIZE, buf, NULL, 0);
@@ -1909,6 +1951,7 @@ fil_check_first_page(
{
ulint space_id;
ulint flags;
+ ulint page_is_encrypted;
if (srv_force_recovery >= SRV_FORCE_IGNORE_CORRUPT) {
return(NULL);
@@ -1916,12 +1959,23 @@ fil_check_first_page(
space_id = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_ID + page);
flags = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page);
-
- if (UNIV_PAGE_SIZE != fsp_flags_get_page_size(flags)) {
- fprintf(stderr, "InnoDB: Error: Current page size %lu != page size on page %lu\n",
- UNIV_PAGE_SIZE, fsp_flags_get_page_size(flags));
-
- return("innodb-page-size mismatch");
+ /* Note: the 1st page is usually not encrypted. If the Key Provider
+ or the encryption key is not available, the
+ check for reading the first page should intentionally fail
+ with "can not decrypt" message. */
+ page_is_encrypted = fil_page_encryption_status(page);
+ if (page_is_encrypted == PAGE_ENCRYPTION_KEY_MISSING && page_is_encrypted) {
+ page_is_encrypted = 1;
+ } else {
+ page_is_encrypted = 0;
+ if (UNIV_PAGE_SIZE != fsp_flags_get_page_size(flags)) {
+ fprintf(stderr,
+ "InnoDB: Error: Current page size %lu != "
+ " page size on page %lu\n",
+ UNIV_PAGE_SIZE, fsp_flags_get_page_size(flags));
+
+ return("innodb-page-size mismatch");
+ }
}
if (!space_id && !flags) {
@@ -1937,9 +1991,17 @@ fil_check_first_page(
}
}
- if (buf_page_is_corrupted(
+ if (!page_is_encrypted && buf_page_is_corrupted(
false, page, fsp_flags_get_zip_size(flags))) {
return("checksum mismatch");
+ } else {
+ if (page_is_encrypted) {
+ /* this error message is interpreted by the calling method, which is
+ * executed if the server starts in recovery mode.
+ */
+ return(MSG_CANNOT_DECRYPT);
+
+ }
}
if (page_get_space_id(page) == space_id
@@ -1969,8 +2031,9 @@ fil_read_first_page(
lsn values in data files */
lsn_t* max_flushed_lsn, /*!< out: max of flushed
lsn values in data files */
- ulint orig_space_id) /*!< in: original file space
+ ulint orig_space_id, /*!< in: original file space
id */
+ fil_space_crypt_t** crypt_data) /*< out: crypt data */
{
byte* buf;
byte* page;
@@ -2008,7 +2071,16 @@ fil_read_first_page(
check_msg = fil_check_first_page(page);
}
- flushed_lsn = mach_read_from_8(page + FIL_PAGE_FILE_FLUSH_LSN);
+ flushed_lsn = mach_read_from_8(page +
+ FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
+
+ if (crypt_data) {
+ ulint space = fsp_header_get_space_id(page);
+ ulint offset =
+ fsp_header_get_crypt_offset(
+ fsp_flags_get_zip_size(*flags), NULL);
+ *crypt_data = fil_space_read_crypt_data(space, page, offset);
+ }
ut_free(buf);
@@ -2487,6 +2559,9 @@ fil_check_pending_operations(
*space = 0;
+ /* Wait for crypt threads to stop accessing space */
+ fil_space_crypt_close_tablespace(id);
+
mutex_enter(&fil_system->mutex);
fil_space_t* sp = fil_space_get_by_id(id);
if (sp) {
@@ -3468,7 +3543,8 @@ fil_create_new_single_table_tablespace(
}
}
- success = fil_space_create(tablename, space_id, flags, FIL_TABLESPACE);
+ success = fil_space_create(tablename, space_id, flags, FIL_TABLESPACE,
+ fil_space_create_crypt_data());
if (!success || !fil_node_create(path, size, space_id, FALSE)) {
err = DB_ERROR;
goto error_exit_1;
@@ -3596,6 +3672,7 @@ fil_open_single_table_tablespace(
ulint tablespaces_found = 0;
ulint valid_tablespaces_found = 0;
ulint atomic_writes = 0;
+ fil_space_crypt_t* crypt_data = NULL;
#ifdef UNIV_SYNC_DEBUG
ut_ad(!fix_dict || rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
@@ -3694,7 +3771,7 @@ fil_open_single_table_tablespace(
if (def.success) {
def.check_msg = fil_read_first_page(
def.file, FALSE, &def.flags, &def.id,
- &def.lsn, &def.lsn, id);
+ &def.lsn, &def.lsn, id, &def.crypt_data);
def.valid = !def.check_msg;
/* Validate this single-table-tablespace with SYS_TABLES,
@@ -3716,7 +3793,7 @@ fil_open_single_table_tablespace(
if (remote.success) {
remote.check_msg = fil_read_first_page(
remote.file, FALSE, &remote.flags, &remote.id,
- &remote.lsn, &remote.lsn, id);
+ &remote.lsn, &remote.lsn, id, &remote.crypt_data);
remote.valid = !remote.check_msg;
/* Validate this single-table-tablespace with SYS_TABLES,
@@ -3739,7 +3816,7 @@ fil_open_single_table_tablespace(
if (dict.success) {
dict.check_msg = fil_read_first_page(
dict.file, FALSE, &dict.flags, &dict.id,
- &dict.lsn, &dict.lsn, id);
+ &dict.lsn, &dict.lsn, id, &dict.crypt_data);
dict.valid = !dict.check_msg;
/* Validate this single-table-tablespace with SYS_TABLES,
@@ -3892,9 +3969,17 @@ fil_open_single_table_tablespace(
}
skip_validate:
+ if (remote.success)
+ crypt_data = remote.crypt_data;
+ else if (dict.success)
+ crypt_data = dict.crypt_data;
+ else if (def.success)
+ crypt_data = def.crypt_data;
+
if (err != DB_SUCCESS) {
; // Don't load the tablespace into the cache
- } else if (!fil_space_create(tablename, id, flags, FIL_TABLESPACE)) {
+ } else if (!fil_space_create(tablename, id, flags, FIL_TABLESPACE,
+ crypt_data)) {
err = DB_ERROR;
} else {
/* We do not measure the size of the file, that is why
@@ -3914,15 +3999,25 @@ cleanup_and_exit:
if (remote.filepath) {
mem_free(remote.filepath);
}
+ if (remote.crypt_data && remote.crypt_data != crypt_data) {
+ fil_space_destroy_crypt_data(&remote.crypt_data);
+ }
if (dict.success) {
os_file_close(dict.file);
}
if (dict.filepath) {
mem_free(dict.filepath);
}
+ if (dict.crypt_data && dict.crypt_data != crypt_data) {
+ fil_space_destroy_crypt_data(&dict.crypt_data);
+ }
if (def.success) {
os_file_close(def.file);
}
+ if (def.crypt_data && def.crypt_data != crypt_data) {
+ fil_space_destroy_crypt_data(&def.crypt_data);
+ }
+
mem_free(def.filepath);
return(err);
@@ -4139,13 +4234,22 @@ fil_validate_single_table_tablespace(
check_first_page:
fsp->success = TRUE;
+ fsp->encryption_error = 0;
if (const char* check_msg = fil_read_first_page(
fsp->file, FALSE, &fsp->flags, &fsp->id,
- &fsp->lsn, &fsp->lsn, ULINT_UNDEFINED)) {
+ &fsp->lsn, &fsp->lsn, ULINT_UNDEFINED, &fsp->crypt_data)) {
ib_logf(IB_LOG_LEVEL_ERROR,
"%s in tablespace %s (table %s)",
check_msg, fsp->filepath, tablename);
fsp->success = FALSE;
+ if (strncmp(check_msg, MSG_CANNOT_DECRYPT, strlen(check_msg))==0) {
+ /* by returning here, it should be avoided, that the server crashes,
+ * if started in recovery mode and can not decrypt tables, if
+ * the key file can not be read.
+ */
+ fsp->encryption_error = 1;
+ return;
+ }
}
if (!fsp->success) {
@@ -4299,6 +4403,14 @@ fil_load_single_table_tablespace(
}
if (!def.success && !remote.success) {
+
+ if (def.encryption_error || remote.encryption_error) {
+ fprintf(stderr,
+ "InnoDB: Error: could not open single-table"
+ " tablespace file %s. Encryption error!\n", def.filepath);
+ return;
+ }
+
/* The following call prints an error message */
os_file_get_last_error(true);
fprintf(stderr,
@@ -4482,7 +4594,8 @@ will_not_choose:
mutex_exit(&fil_system->mutex);
#endif /* UNIV_HOTBACKUP */
ibool file_space_create_success = fil_space_create(
- tablename, fsp->id, fsp->flags, FIL_TABLESPACE);
+ tablename, fsp->id, fsp->flags, FIL_TABLESPACE,
+ fsp->crypt_data);
if (!file_space_create_success) {
if (srv_force_recovery > 0) {
@@ -5133,7 +5246,7 @@ retry:
success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC,
node->name, node->handle, buf,
offset, page_size * n_pages,
- node, NULL, space_id, NULL, 0, 0, 0);
+ node, NULL, space_id, NULL, 0, 0, 0, 0, 0);
#endif /* UNIV_HOTBACKUP */
if (success) {
os_has_said_disk_full = FALSE;
@@ -5526,6 +5639,8 @@ _fil_io(
ibool ignore_nonexistent_pages;
ibool page_compressed = FALSE;
ulint page_compression_level = 0;
+ ibool page_encrypted;
+ ulint page_encryption_key;
is_log = type & OS_FILE_LOG;
type = type & ~OS_FILE_LOG;
@@ -5595,6 +5710,11 @@ _fil_io(
page_compressed = fsp_flags_is_page_compressed(space->flags);
page_compression_level = fsp_flags_get_page_compression_level(space->flags);
+
+ page_encrypted = fsp_flags_is_page_encrypted(space->flags);
+ page_encryption_key = fsp_flags_get_page_encryption_key(space->flags);
+
+
/* If we are deleting a tablespace we don't allow any read
operations on that. However, we do allow write operations. */
if (space == 0 || (type == OS_FILE_READ && space->stop_new_ops)) {
@@ -5739,9 +5859,23 @@ _fil_io(
}
/* Queue the aio request */
- ret = os_aio(type, mode | wake_later, node->name, node->handle, buf,
- offset, len, node, message, space_id, trx,
- page_compressed, page_compression_level, write_size);
+ ret = os_aio(
+ type,
+ mode | wake_later,
+ node->name,
+ node->handle,
+ buf,
+ offset,
+ len,
+ node,
+ message,
+ space_id,
+ trx,
+ page_compressed,
+ page_compression_level,
+ write_size,
+ page_encrypted,
+ page_encryption_key);
#else
/* In mysqlbackup do normal i/o, not aio */
@@ -6180,6 +6314,8 @@ void
fil_close(void)
/*===========*/
{
+ fil_space_crypt_cleanup();
+
#ifndef UNIV_HOTBACKUP
/* The mutex should already have been freed. */
ut_ad(fil_system->mutex.magic_n == 0);
@@ -6229,6 +6365,8 @@ struct fil_iterator_t {
ulint n_io_buffers; /*!< Number of pages to use
for IO */
byte* io_buffer; /*!< Buffer to use for IO */
+ fil_space_crypt_t *crypt_data; /*!< Crypt data (if encrypted) */
+ byte* crypt_io_buffer; /*!< IO buffer when encrypted */
};
/********************************************************************//**
@@ -6291,7 +6429,12 @@ fil_iterate(
ut_ad(n_bytes > 0);
ut_ad(!(n_bytes % iter.page_size));
- if (!os_file_read(iter.file, io_buffer, offset,
+ byte* readptr = io_buffer;
+ if (iter.crypt_data != NULL) {
+ readptr = iter.crypt_io_buffer;
+ }
+
+ if (!os_file_read(iter.file, readptr, offset,
(ulint) n_bytes,
fil_space_is_page_compressed(space_id))) {
@@ -6306,6 +6449,18 @@ fil_iterate(
for (ulint i = 0; i < n_pages_read; ++i) {
+ if (iter.crypt_data != NULL) {
+ bool decrypted = fil_space_decrypt(
+ iter.crypt_data,
+ readptr + i * iter.page_size, // src
+ iter.page_size,
+ io_buffer + i * iter.page_size); // dst
+ if (decrypted) {
+ /* write back unencrypted page */
+ updated = true;
+ }
+ }
+
buf_block_set_file_page(block, space_id, page_no++);
dberr_t err;
@@ -6448,6 +6603,13 @@ fil_tablespace_iterate(
iter.n_io_buffers = n_io_buffers;
iter.page_size = callback.get_page_size();
+ ulint crypt_data_offset = fsp_header_get_crypt_offset(
+ callback.get_zip_size(), 0);
+
+ /* read (optional) crypt data */
+ iter.crypt_data = fil_space_read_crypt_data(
+ 0, page, crypt_data_offset);
+
/* Compressed pages can't be optimised for block IO for now.
We do the IMPORT page by page. */
@@ -6456,6 +6618,14 @@ fil_tablespace_iterate(
ut_a(iter.page_size == callback.get_zip_size());
}
+ /** If tablespace is encrypted, it needs extra buffers */
+ if (iter.crypt_data != NULL) {
+ /* decrease io buffers so that memory
+ * consumption doesnt double
+ * note: the +1 is to avoid n_io_buffers getting down to 0 */
+ iter.n_io_buffers = (iter.n_io_buffers + 1) / 2;
+ }
+
/** Add an extra page for compressed page scratch area. */
void* io_buffer = mem_alloc(
@@ -6464,9 +6634,45 @@ fil_tablespace_iterate(
iter.io_buffer = static_cast<byte*>(
ut_align(io_buffer, UNIV_PAGE_SIZE));
+ void* crypt_io_buffer = NULL;
+ if (iter.crypt_data != NULL) {
+ crypt_io_buffer = mem_alloc(
+ iter.n_io_buffers * UNIV_PAGE_SIZE);
+ iter.crypt_io_buffer = static_cast<byte*>(
+ crypt_io_buffer);
+ }
+
err = fil_iterate(iter, &block, callback);
mem_free(io_buffer);
+
+ if (iter.crypt_data != NULL) {
+ /* clear crypt data from page 0 and write it back */
+ os_file_read(file, page, 0, UNIV_PAGE_SIZE, 0);
+ fil_space_clear_crypt_data(page, crypt_data_offset);
+ lsn_t lsn = mach_read_from_8(page + FIL_PAGE_LSN);
+ if (callback.get_zip_size() == 0) {
+ buf_flush_init_for_writing(
+ page, 0, lsn);
+ } else {
+ buf_flush_update_zip_checksum(
+ page, callback.get_zip_size(), lsn);
+ }
+
+ if (!os_file_write(
+ iter.filepath, iter.file, page,
+ 0, iter.page_size)) {
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "os_file_write() failed");
+
+ return(DB_IO_ERROR);
+ }
+
+ mem_free(crypt_io_buffer);
+ iter.crypt_io_buffer = NULL;
+ fil_space_destroy_crypt_data(&iter.crypt_data);
+ }
}
if (err == DB_SUCCESS) {
@@ -6700,6 +6906,16 @@ fil_space_name(
}
/*******************************************************************//**
+Return space flags */
+ulint
+fil_space_flags(
+/*===========*/
+ fil_space_t* space) /*!< in: space */
+{
+ return (space->flags);
+}
+
+/*******************************************************************//**
Return page type name */
const char*
fil_get_page_type_name(
@@ -6752,3 +6968,137 @@ fil_node_get_block_size(
{
return (node->file_block_size);
}
+
+/******************************************************************
+Get id of first tablespace or ULINT_UNDEFINED if none */
+UNIV_INTERN
+ulint
+fil_get_first_space()
+{
+ ulint out_id = ULINT_UNDEFINED;
+ fil_space_t* space;
+
+ mutex_enter(&fil_system->mutex);
+
+ space = UT_LIST_GET_FIRST(fil_system->space_list);
+ if (space != NULL) {
+ do
+ {
+ if (!space->stop_new_ops) {
+ out_id = space->id;
+ break;
+ }
+ space = UT_LIST_GET_NEXT(space_list, space);
+ } while (space != NULL);
+ }
+
+ mutex_exit(&fil_system->mutex);
+
+ return out_id;
+}
+
+/******************************************************************
+Get id of next tablespace or ULINT_UNDEFINED if none */
+UNIV_INTERN
+ulint
+fil_get_next_space(ulint id)
+{
+ bool found;
+ fil_space_t* space;
+ ulint out_id = ULINT_UNDEFINED;
+
+ mutex_enter(&fil_system->mutex);
+
+ space = fil_space_get_by_id(id);
+ if (space == NULL) {
+ /* we didn't find it...search for space with space->id > id */
+ found = false;
+ space = UT_LIST_GET_FIRST(fil_system->space_list);
+ } else {
+ /* we found it, take next available space */
+ found = true;
+ }
+
+ while ((space = UT_LIST_GET_NEXT(space_list, space)) != NULL) {
+
+ if (!found && space->id <= id)
+ continue;
+
+ if (!space->stop_new_ops) {
+ /* inc reference to prevent drop */
+ out_id = space->id;
+ break;
+ }
+ }
+
+ mutex_exit(&fil_system->mutex);
+
+ return out_id;
+}
+
+/******************************************************************
+Get crypt data for a tablespace */
+UNIV_INTERN
+fil_space_crypt_t*
+fil_space_get_crypt_data(
+/*==================*/
+ ulint id) /*!< in: space id */
+{
+ fil_space_t* space;
+ fil_space_crypt_t* crypt_data = NULL;
+
+ ut_ad(fil_system);
+
+ mutex_enter(&fil_system->mutex);
+
+ space = fil_space_get_by_id(id);
+ if (space != NULL) {
+ crypt_data = space->crypt_data;
+ }
+
+ mutex_exit(&fil_system->mutex);
+
+ return(crypt_data);
+}
+
+/******************************************************************
+Get crypt data for a tablespace */
+UNIV_INTERN
+void
+fil_space_set_crypt_data(
+/*==================*/
+ ulint id, /*!< in: space id */
+ fil_space_crypt_t* crypt_data) /*!< in: crypt data */
+{
+ fil_space_t* space;
+ fil_space_crypt_t* old_crypt_data = NULL;
+
+ ut_ad(fil_system);
+
+ mutex_enter(&fil_system->mutex);
+
+ space = fil_space_get_by_id(id);
+ if (space != NULL) {
+
+ if (space->crypt_data != NULL) {
+ ut_a(!fil_space_crypt_compare(crypt_data,
+ space->crypt_data));
+ old_crypt_data = space->crypt_data;
+ }
+
+ space->crypt_data = crypt_data;
+ } else {
+ /* there is a small risk that tablespace has been deleted */
+ old_crypt_data = crypt_data;
+ }
+
+ mutex_exit(&fil_system->mutex);
+
+ if (old_crypt_data != NULL) {
+ /* first assign space->crypt_data
+ * then destroy old_crypt_data when no new references to
+ * it can be created.
+ */
+ fil_space_destroy_crypt_data(&old_crypt_data);
+ }
+}
diff --git a/storage/xtradb/fil/fil0pagecompress.cc b/storage/xtradb/fil/fil0pagecompress.cc
index c1d476126c6..fa25d8875ae 100644
--- a/storage/xtradb/fil/fil0pagecompress.cc
+++ b/storage/xtradb/fil/fil0pagecompress.cc
@@ -269,14 +269,24 @@ fil_compress_page(
int level = 0;
ulint header_len = FIL_PAGE_DATA + FIL_PAGE_COMPRESSED_SIZE;
ulint write_size=0;
- ulint comp_method = innodb_compression_algorithm; /* Cache to avoid
- change during
- function execution */
+ /* Cache to avoid change during function execution */
+ ulint comp_method = innodb_compression_algorithm;
+ ulint orig_page_type;
+
ut_ad(buf);
ut_ad(out_buf);
ut_ad(len);
ut_ad(out_len);
+ /* read original page type */
+ orig_page_type = mach_read_from_2(buf + FIL_PAGE_TYPE);
+
+ /* Let's not compress file space header or
+ extent descriptor */
+ if ((orig_page_type == FIL_PAGE_TYPE_FSP_HDR) || (orig_page_type == FIL_PAGE_TYPE_XDES) ) {
+ *out_len = len;
+ return (buf);
+ }
level = compression_level;
ut_ad(fil_space_is_page_compressed(space_id));
@@ -419,7 +429,7 @@ fil_compress_page(
/* Set up the correct page type */
mach_write_to_2(out_buf+FIL_PAGE_TYPE, FIL_PAGE_PAGE_COMPRESSED);
/* Set up the flush lsn to be compression algorithm */
- mach_write_to_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN, comp_method);
+ mach_write_to_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, comp_method);
/* Set up the actual payload lenght */
mach_write_to_2(out_buf+FIL_PAGE_DATA, write_size);
@@ -428,7 +438,7 @@ fil_compress_page(
ut_ad(fil_page_is_compressed(out_buf));
ut_ad(mach_read_from_4(out_buf+FIL_PAGE_SPACE_OR_CHKSUM) == BUF_NO_CHECKSUM_MAGIC);
ut_ad(mach_read_from_2(out_buf+FIL_PAGE_DATA) == write_size);
- ut_ad(mach_read_from_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN) == (ulint)comp_method);
+ ut_ad(mach_read_from_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION) == (ulint)comp_method);
/* Verify that page can be decompressed */
{
@@ -470,7 +480,6 @@ fil_compress_page(
space_id, fil_space_name(space), len, write_size);
#endif /* UNIV_PAGECOMPRESS_DEBUG */
-
srv_stats.page_compression_saved.add((len - write_size));
srv_stats.pages_page_compressed.inc();
@@ -552,7 +561,7 @@ fil_decompress_page(
}
/* Get compression algorithm */
- compression_alg = mach_read_from_8(buf+FIL_PAGE_FILE_FLUSH_LSN);
+ compression_alg = mach_read_from_8(buf+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
/* Get the actual size of compressed page */
actual_size = mach_read_from_2(buf+FIL_PAGE_DATA);
@@ -722,5 +731,3 @@ fil_decompress_page(
ut_free(in_buf);
}
}
-
-
diff --git a/storage/xtradb/fil/fil0pageencryption.cc b/storage/xtradb/fil/fil0pageencryption.cc
new file mode 100644
index 00000000000..49c42615e19
--- /dev/null
+++ b/storage/xtradb/fil/fil0pageencryption.cc
@@ -0,0 +1,628 @@
+/*****************************************************************************
+
+Copyright (C) 2014 eperi GmbH. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*****************************************************************************/
+
+/*****************************************************************
+ @file fil/fil0pageencryption.cc
+ Implementation for page encryption file spaces.
+
+ Created 08/25/2014 Ludger Göckel eperi-GmbH
+ Modified 11/26/2014 Jan Lindström MariaDB Corporation
+ ***********************************************************************/
+
+#include "fil0fil.h"
+#include "fil0pageencryption.h"
+#include "fsp0pageencryption.h"
+#include "my_dbug.h"
+#include "page0zip.h"
+
+#include "buf0checksum.h"
+#include <my_global.h>
+#include <my_aes.h>
+#include <math.h>
+
+/*
+ * derived from libFLAC, which is gpl v2
+ */
+byte crc_table[] = {
+ 0x00,0x07,0x0E,0x09,0x1C,0x1B,0x12,0x15,0x38,0x3F,0x36,0x31,0x24,0x23,0x2A,0x2D,0x70,0x77,0x7E,0x79,
+ 0x6C,0x6B,0x62,0x65,0x48,0x4F,0x46,0x41,0x54,0x53,0x5A,0x5D,0xE0,0xE7,0xEE,0xE9,0xFC,0xFB,0xF2,0xF5,
+ 0xD8,0xDF,0xD6,0xD1,0xC4,0xC3,0xCA,0xCD,0x90,0x97,0x9E,0x99,0x8C,0x8B,0x82,0x85,0xA8,0xAF,0xA6,0xA1,
+ 0xB4,0xB3,0xBA,0xBD,0xC7,0xC0,0xC9,0xCE,0xDB,0xDC,0xD5,0xD2,0xFF,0xF8,0xF1,0xF6,0xE3,0xE4,0xED,0xEA,
+ 0xB7,0xB0,0xB9,0xBE,0xAB,0xAC,0xA5,0xA2,0x8F,0x88,0x81,0x86,0x93,0x94,0x9D,0x9A,0x27,0x20,0x29,0x2E,
+ 0x3B,0x3C,0x35,0x32,0x1F,0x18,0x11,0x16,0x03,0x04,0x0D,0x0A,0x57,0x50,0x59,0x5E,0x4B,0x4C,0x45,0x42,
+ 0x6F,0x68,0x61,0x66,0x73,0x74,0x7D,0x7A,0x89,0x8E,0x87,0x80,0x95,0x92,0x9B,0x9C,0xB1,0xB6,0xBF,0xB8,
+ 0xAD,0xAA,0xA3,0xA4,0xF9,0xFE,0xF7,0xF0,0xE5,0xE2,0xEB,0xEC,0xC1,0xC6,0xCF,0xC8,0xDD,0xDA,0xD3,0xD4,
+ 0x69,0x6E,0x67,0x60,0x75,0x72,0x7B,0x7C,0x51,0x56,0x5F,0x58,0x4D,0x4A,0x43,0x44,0x19,0x1E,0x17,0x10,
+ 0x05,0x02,0x0B,0x0C,0x21,0x26,0x2F,0x28,0x3D,0x3A,0x33,0x34,0x4E,0x49,0x40,0x47,0x52,0x55,0x5C,0x5B,
+ 0x76,0x71,0x78,0x7F,0x6A,0x6D,0x64,0x63,0x3E,0x39,0x30,0x37,0x22,0x25,0x2C,0x2B,0x06,0x01,0x08,0x0F,
+ 0x1A,0x1D,0x14,0x13,0xAE,0xA9,0xA0,0xA7,0xB2,0xB5,0xBC,0xBB,0x96,0x91,0x98,0x9F,0x8A,0x8D,0x84,0x83,
+ 0xDE,0xD9,0xD0,0xD7,0xC2,0xC5,0xCC,0xCB,0xE6,0xE1,0xE8,0xEF,0xFA,0xFD,0xF4,0xF3
+
+};
+
+/****************************************************************//**
+Calculate checksum for encrypted pages
+@return checksum */
+static
+byte
+fil_page_encryption_calc_checksum(
+/*==============================*/
+ unsigned char* buf, /*!<in: buffer where to calculate checksum */
+ ulint len) /*!<in: buffer length */
+{
+ byte crc = 0;
+ for (ulint i = 0; i < len; i++) {
+ crc = crc_table[(crc ^ buf[i]) & 0xff];
+ }
+ return crc;
+}
+
+/****************************************************************//**
+Recalculate checksum for encrypted pages */
+static
+void
+do_check_sum(
+/*=========*/
+ ulint page_size, /*!< in: page size */
+ ulint zip_size, /*!< in: compressed page size */
+ byte* buf) /*!< in: buffer */
+{
+ ib_uint32_t checksum = 0;
+
+ if (zip_size) {
+ checksum = page_zip_calc_checksum(buf,zip_size,
+ static_cast<srv_checksum_algorithm_t>(
+ srv_checksum_algorithm));
+
+ mach_write_to_4(buf + FIL_PAGE_SPACE_OR_CHKSUM, checksum);
+ return;
+ }
+
+ switch ((srv_checksum_algorithm_t) srv_checksum_algorithm) {
+ case SRV_CHECKSUM_ALGORITHM_CRC32:
+ case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
+ checksum = buf_calc_page_crc32(buf);
+ break;
+ case SRV_CHECKSUM_ALGORITHM_INNODB:
+ case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB:
+ checksum = (ib_uint32_t) buf_calc_page_new_checksum(buf);
+ break;
+ case SRV_CHECKSUM_ALGORITHM_NONE:
+ case SRV_CHECKSUM_ALGORITHM_STRICT_NONE:
+ checksum = BUF_NO_CHECKSUM_MAGIC;
+ break;
+ /* no default so the compiler will emit a warning if new enum
+ is added and not handled here */
+ }
+
+ mach_write_to_4(buf + FIL_PAGE_SPACE_OR_CHKSUM, checksum);
+
+ /* old style checksum is omitted */
+}
+
+/****************************************************************//**
+ For page encrypted pages encrypt the page before actual write
+ operation.
+
+ Note, that FIL_PAGE_TYPE_FSP_HDR and FIL_PAGE_TYPE_XDES type pages
+ are not encrypted!
+
+ Pages are encrypted with AES/CBC/NoPadding algorithm.
+
+ "No padding" is used to ensure, that the encrypted page does not
+ exceed the page size. If "no padding" is used, the input for encryption
+ must be of size (multiple * AES blocksize). AES Blocksize is usually 16
+ (bytes).
+
+ Everything in the page is encrypted except for the 38 byte FIL header.
+ Since the length of the payload is not a multiple of the AES blocksize,
+ and to ensure that every byte of the payload is encrypted, two encryption
+ operations are done. Each time with a block of adequate size as input.
+ 1st block contains everything from beginning of payload bytes except for
+ the remainder. 2nd block is of size 64 and contains the remainder and
+ the last (64 - sizeof(remainder)) bytes of the encrypted 1st block.
+
+ Each encrypted page receives a new page type for PAGE_ENCRYPTION.
+ The original page type (2 bytes) is stored in the Checksum header of the
+ page (position FIL_PAGE_SPACE_OR_CHKSUM). Additionally the encryption
+ key identifier is stored in the Checksum Header. This uses 1 byte.
+ Checksum verification for encrypted pages is disabled. This checksum
+ should be restored after decryption.
+
+ To be able to verify decryption in a later stage, a 1-byte checksum at
+ position 4 of the FIL_PAGE_SPACE_OR_CHKSUM header is stored.
+ For page compressed table pages the log base 2 of the length of the
+ encrypted data is stored.
+
+ @return encrypted page or original page if encryption failed to be
+ written*/
+UNIV_INTERN
+byte*
+fil_encrypt_page(
+/*==============*/
+ ulint space_id, /*!< in: tablespace id of the table. */
+ byte* buf, /*!< in: buffer from which to write; in aio
+ this must be appropriately aligned */
+ byte* out_buf, /*!< out: encrypted buffer */
+ ulint len, /*!< in: length of input buffer.*/
+ ulint encryption_key, /*!< in: encryption key */
+ ulint* out_len, /*!< out: actual length of encrypted page */
+ ulint* errorCode, /*!< out: an error code. set,
+ if page is intentionally not encrypted */
+ byte* tmp_encryption_buf) /*!< in: temporary buffer or NULL */
+{
+
+ int err = AES_OK;
+ int key = 0;
+ uint32 data_size = 0;
+ ulint orig_page_type = 0;
+ uint32 write_size = 0;
+ fil_space_t* space = NULL;
+ byte* tmp_buf = NULL;
+ ulint page_len = 0;
+ ulint offset = 0;
+
+ ut_ad(buf);ut_ad(out_buf);
+ key = encryption_key;
+
+ *errorCode = AES_OK;
+
+ ut_ad(fil_space_is_page_encrypted(space_id));
+ fil_system_enter();
+ space = fil_space_get_by_id(space_id);
+ fil_system_exit();
+
+#ifdef UNIV_DEBUG_PAGEENCRYPTION
+ ulint pageno = mach_read_from_4(buf + FIL_PAGE_OFFSET);
+ fprintf(stderr,
+ "InnoDB: Note: Preparing for encryption for space %lu name %s len %lu, page no %lu\n",
+ space_id, fil_space_name(space), len, pageno);
+#endif /* UNIV_DEBUG_PAGEENCRYPTION */
+
+ /* read original page type */
+ orig_page_type = mach_read_from_2(buf + FIL_PAGE_TYPE);
+
+ /* Do not encrypt file space header or extend descriptor */
+ if ((orig_page_type == FIL_PAGE_TYPE_FSP_HDR)
+ || (orig_page_type == FIL_PAGE_TYPE_XDES) ) {
+ *errorCode = PAGE_ENCRYPTION_WILL_NOT_ENCRYPT;
+ *out_len = len;
+ return (buf);
+ }
+
+ if (FIL_PAGE_PAGE_COMPRESSED == orig_page_type) {
+ page_len = log10((double)len)/log10((double)2);
+ }
+
+ byte checksum_byte = fil_page_encryption_calc_checksum(buf + FIL_PAGE_DATA, len - FIL_PAGE_DATA);
+
+ /* data_size bytes will be encrypted at first.
+ * data_size will be the length of the cipher text since no padding is used.*/
+ data_size = ((len - FIL_PAGE_DATA - FIL_PAGE_DATA_END) / MY_AES_BLOCK_SIZE) * MY_AES_BLOCK_SIZE;
+
+
+ unsigned char rkey[GetCryptoKeySize(encryption_key)];
+ uint key_len = sizeof(rkey);
+
+ unsigned char iv[16];
+ uint iv_len = sizeof(iv);
+
+ if (!HasCryptoKey(encryption_key)) {
+ err = PAGE_ENCRYPTION_KEY_MISSING;
+ } else {
+ int rc;
+
+ rc = GetCryptoKey(encryption_key, rkey, key_len);
+ if (rc != AES_OK)
+ {
+ err = PAGE_ENCRYPTION_KEY_MISSING;
+ }
+
+ rc = GetCryptoIV(encryption_key, iv, iv_len);
+ if (rc != AES_OK)
+ {
+ err = PAGE_ENCRYPTION_KEY_MISSING;
+ }
+ }
+
+ /* 1st encryption: data_size bytes starting from FIL_PAGE_DATA */
+ if (err == AES_OK) {
+ err = my_aes_encrypt_dynamic(
+ (uchar*) buf + FIL_PAGE_DATA,
+ data_size,
+ (uchar *) out_buf + FIL_PAGE_DATA,
+ &write_size,
+ (const unsigned char *) rkey,
+ key_len,
+ (const unsigned char *) iv,
+ iv_len,
+ 1);
+
+ ut_ad(write_size == data_size);
+
+ if (err == AES_OK) {
+ /* copy remaining bytes from input buffer to output buffer.
+ * Note, that this copies the final 8 bytes of a
+ * page, which consists of the
+ * Old-style checksum and the "Low 32 bits of LSN */
+ memcpy(out_buf + FIL_PAGE_DATA + data_size,
+ buf + FIL_PAGE_DATA + data_size ,
+ len - FIL_PAGE_DATA -data_size);
+
+ if (tmp_encryption_buf == NULL) {
+ //create temporary buffer for 2nd encryption
+ tmp_buf = static_cast<byte *>(ut_malloc(64));
+ } else {
+ tmp_buf = tmp_encryption_buf;
+ }
+
+ /* 2nd encryption: 64 bytes from out_buf,
+ result length is 64 bytes */
+ err = my_aes_encrypt_dynamic((uchar*)out_buf + len -offset -64,
+ 64,
+ (uchar*)tmp_buf,
+ &write_size,
+ (const unsigned char *)rkey,
+ key_len,
+ (const unsigned char *)iv,
+ iv_len, 1);
+ ut_ad(write_size == 64);
+
+ /* copy 64 bytes from 2nd encryption to out_buf*/
+ memcpy(out_buf + len - offset -64, tmp_buf, 64);
+ }
+
+ }
+
+ /* error handling */
+ if (err != AES_OK) {
+ /* If an error occurred we leave the actual page as it was */
+
+ fprintf(stderr,
+ "InnoDB: Warning: Encryption failed for space %lu "
+ "name %s len %lu rt %d write %lu, error: %d\n",
+ space_id, fil_space_name(space), len, err, (ulint)data_size, err);
+ fflush(stderr);
+ srv_stats.pages_page_encryption_error.inc();
+ *out_len = len;
+
+ /* free temporary buffer */
+ if (tmp_buf!=NULL && tmp_encryption_buf == NULL) {
+ ut_free(tmp_buf);
+ }
+ *errorCode = err;
+
+ return (buf);
+ }
+
+ /* Set up the page header. Copied from input buffer*/
+ memcpy(out_buf, buf, FIL_PAGE_DATA);
+
+ /* Set up the correct page type */
+ mach_write_to_2(out_buf + FIL_PAGE_TYPE, FIL_PAGE_PAGE_ENCRYPTED);
+
+ /* The 1st checksum field is used to store original page type, etc.
+ * checksum check for page encrypted pages is omitted.
+ */
+
+ /* Set up the encryption key. Written to the 1st byte of
+ the checksum header field. This header is currently used to store data. */
+ mach_write_to_1(out_buf + FIL_PAGE_SPACE_OR_CHKSUM, key);
+
+ /* store original page type. Written to 2nd and 3rd byte
+ of the checksum header field */
+ mach_write_to_2(out_buf + FIL_PAGE_SPACE_OR_CHKSUM + 1, orig_page_type);
+
+ if (FIL_PAGE_PAGE_COMPRESSED == orig_page_type) {
+ /* set byte 4 of checksum field to page length (ln(len)) */
+ memset(out_buf + FIL_PAGE_SPACE_OR_CHKSUM + 3, page_len, 1);
+ } else {
+ /* set byte 4 of checksum field to checksum byte */
+ memset(out_buf + FIL_PAGE_SPACE_OR_CHKSUM + 3, checksum_byte, 1);
+ }
+
+#ifdef UNIV_DEBUG
+ /* Verify */
+ ut_ad(fil_page_is_encrypted(out_buf));
+
+#endif /* UNIV_DEBUG */
+
+ srv_stats.pages_page_encrypted.inc();
+ *out_len = len;
+
+ /* free temporary buffer */
+ if (tmp_buf!=NULL && tmp_encryption_buf == NULL) {
+ ut_free(tmp_buf);
+ }
+
+ return (out_buf);
+}
+
+/****************************************************************//**
+ For page encrypted pages decrypt the page after actual read
+ operation.
+
+ See fil_encrypt_page for details, how the encryption works.
+
+ If the decryption can be verified, original page should be completely restored.
+ This includes original page type, 4-byte checksum field at page start.
+ If it is not a page compressed table's page, decryption is verified against
+ a 1-byte checksum built over the plain data bytes. If this verification
+ fails, an error state is returned.
+
+ @return decrypted page */
+ulint
+fil_decrypt_page(
+/*=============*/
+ byte* page_buf, /*!< in: preallocated buffer or NULL */
+ byte* buf, /*!< in/out: buffer from which to read; in aio
+ this must be appropriately aligned */
+ ulint len, /*!< in: length buffer, which should be decrypted.*/
+ ulint* write_size, /*!< out: size of the decrypted
+ data. If no error occurred equal to len */
+ ibool* page_compressed,/*!<out: is page compressed.*/
+ byte* tmp_encryption_buf) /*!< in: temporary buffer or NULL */
+{
+ int err = AES_OK;
+ ulint page_decryption_key;
+ uint32 data_size = 0;
+ ulint orig_page_type = 0;
+ uint32 tmp_write_size = 0;
+ ulint offset = 0;
+ byte *in_buf = NULL;
+ byte *tmp_buf = NULL;
+ fil_space_t* space = NULL;
+
+ ulint page_compression_flag = 0;
+
+ ut_ad(buf);
+ ut_ad(len);
+
+ /* Before actual decrypt, make sure that page type is correct */
+ ulint current_page_type = mach_read_from_2(buf + FIL_PAGE_TYPE);
+
+ if ((current_page_type == FIL_PAGE_TYPE_FSP_HDR)
+ || (current_page_type == FIL_PAGE_TYPE_XDES)) {
+ /* assumed as unencrypted */
+ if (write_size!=NULL) {
+ *write_size = len;
+ }
+ return AES_OK;
+ }
+
+ if (current_page_type != FIL_PAGE_PAGE_ENCRYPTED) {
+
+ fprintf(stderr, "InnoDB: Corruption: We try to decrypt corrupted page\n"
+ "InnoDB: CRC %lu type %lu.\n"
+ "InnoDB: len %lu\n",
+ mach_read_from_4(buf + FIL_PAGE_SPACE_OR_CHKSUM),
+ mach_read_from_2(buf + FIL_PAGE_TYPE), len);
+
+ fflush(stderr);
+ return PAGE_ENCRYPTION_WRONG_PAGE_TYPE;
+ }
+
+ /* 1st checksum field is used to store original page type, etc.
+ * checksum check for page encrypted pages is omitted.
+ */
+
+ /* read page encryption key */
+ page_decryption_key = mach_read_from_1(buf + FIL_PAGE_SPACE_OR_CHKSUM);
+
+ /* Get the page type */
+ orig_page_type = mach_read_from_2(buf + FIL_PAGE_SPACE_OR_CHKSUM + 1);
+
+ /* read checksum byte */
+ byte stored_checksum_byte = mach_read_from_1(buf + FIL_PAGE_SPACE_OR_CHKSUM + 3);
+
+ if (FIL_PAGE_PAGE_COMPRESSED == orig_page_type) {
+ if (page_compressed != NULL) {
+ *page_compressed = 1L;
+ }
+ page_compression_flag = 1;
+ len = pow((double)2, (double)((int)stored_checksum_byte));
+ offset = 0;
+ }
+
+ data_size = ((len - FIL_PAGE_DATA - FIL_PAGE_DATA_END) / MY_AES_BLOCK_SIZE) * MY_AES_BLOCK_SIZE;
+
+
+ unsigned char rkey[GetCryptoKeySize(page_decryption_key)];
+ uint key_len = sizeof(rkey);
+
+ unsigned char iv[16];
+ uint iv_len = sizeof(iv);
+
+ if (!HasCryptoKey(page_decryption_key)) {
+ err = PAGE_ENCRYPTION_KEY_MISSING;
+ } else {
+ int rc;
+
+ rc = GetCryptoKey(page_decryption_key, rkey, key_len);
+ if (rc != AES_OK)
+ {
+ err = PAGE_ENCRYPTION_KEY_MISSING;
+ }
+
+ rc = GetCryptoIV(page_decryption_key, iv, iv_len);
+ if (rc != AES_OK)
+ {
+ err = PAGE_ENCRYPTION_KEY_MISSING;
+ }
+ }
+
+
+ if (err != AES_OK) {
+ /* surely key could not be determined. */
+ fprintf(stderr, "InnoDB: Corruption: Page is marked as encrypted\n"
+ "InnoDB: but decrypt failed with error %d, encryption key %d.\n",
+ err, (int)page_decryption_key);
+ fflush(stderr);
+
+ return err;
+ }
+
+ if (tmp_encryption_buf == NULL) {
+ tmp_buf= static_cast<byte *>(ut_malloc(64));
+ } else {
+ tmp_buf = tmp_encryption_buf;
+ }
+
+ // If no buffer was given, we need to allocate temporal buffer
+ if (page_buf == NULL) {
+#ifdef UNIV_PAGECOMPRESS_DEBUG
+ fprintf(stderr,
+ "InnoDB: Note: FIL: Encryption buffer not given, allocating...\n");
+#endif /* UNIV_PAGECOMPRESS_DEBUG */
+ in_buf = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE*2));
+ } else {
+ in_buf = page_buf;
+ }
+
+ /* 1st decryption: 64 bytes */
+ /* 64 bytes from data area are copied to temporary buffer.
+ * These are the last 64 of the (encrypted) payload */
+ memcpy(tmp_buf, buf + len - offset - 64, 64);
+
+ err = my_aes_decrypt_dynamic(
+ (const uchar*) tmp_buf,
+ 64,
+ (uchar *) in_buf + len - offset - 64,
+ &tmp_write_size,
+ (const unsigned char *) rkey,
+ key_len,
+ (const unsigned char *) iv,
+ iv_len,
+ 1);
+
+ ut_ad(tmp_write_size == 64);
+
+ /* If decrypt fails it means that page is corrupted or has an unknown key */
+ if (err != AES_OK) {
+ fprintf(stderr, "InnoDB: Corruption: Page is marked as encrypted\n"
+ "InnoDB: but decrypt failed with error %d.\n"
+ "InnoDB: size %lu len %lu, key %d\n", err, (ulint)data_size,
+ len, (int)page_decryption_key);
+ fflush(stderr);
+
+ if (tmp_encryption_buf == NULL) {
+ ut_free(tmp_buf);
+ }
+
+ if (page_buf == NULL) {
+ ut_free(in_buf);
+ }
+ return err;
+ }
+
+ ut_ad(tmp_write_size == 64);
+
+ /* copy 1st part of payload from buf to in_buf */
+ /* do not override result of 1st decryption */
+ memcpy(in_buf + FIL_PAGE_DATA, buf + FIL_PAGE_DATA, len -offset -64 - FIL_PAGE_DATA);
+
+
+ /* Decrypt rest of the page */
+ err = my_aes_decrypt_dynamic((uchar*) in_buf + FIL_PAGE_DATA,
+ data_size,
+ (uchar *) buf + FIL_PAGE_DATA,
+ &tmp_write_size,
+ (const unsigned char *)&rkey,
+ key_len,
+ (const unsigned char *)&iv,
+ iv_len,
+ 1);
+
+ ut_ad(tmp_write_size = data_size);
+
+ /* copy remaining bytes from in_buf to buf.
+ */
+ ulint bytes_to_copy = len - FIL_PAGE_DATA - data_size - offset;
+ memcpy(buf + FIL_PAGE_DATA + data_size, in_buf + FIL_PAGE_DATA + data_size, bytes_to_copy);
+
+ /* apart from header data everything is now in in_buf */
+
+ if (tmp_encryption_buf == NULL) {
+ ut_free(tmp_buf);
+ }
+
+#ifdef UNIV_PAGEENCRIPTION_DEBUG
+ fprintf(stderr, "InnoDB: Note: Decryption succeeded for len %lu\n", len);
+ fflush(stderr);
+#endif
+
+ if (page_buf == NULL) {
+ ut_free(in_buf);
+ }
+
+ /* setting original page type */
+ mach_write_to_2(buf + FIL_PAGE_TYPE, orig_page_type);
+
+ ulint pageno = mach_read_from_4(buf + FIL_PAGE_OFFSET);
+ ulint flags = 0;
+ ulint zip_size = 0;
+
+ /* please note, that page with number 0 is not encrypted */
+ if (pageno == 0 ) {
+ flags = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + buf);
+ } else {
+ ulint space_id = mach_read_from_4(buf + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+ fil_system_enter();
+ space = fil_space_get_by_id(space_id);
+ flags = fil_space_flags(space);
+ fil_system_exit();
+ }
+
+ if (!(page_compression_flag)) {
+ zip_size = fsp_flags_get_zip_size(flags);
+ }
+
+ if (write_size!=NULL) {
+ *write_size = len;
+ }
+
+ if (!(page_compression_flag)) {
+ byte checksum_byte = fil_page_encryption_calc_checksum(buf + FIL_PAGE_DATA, len - FIL_PAGE_DATA);
+
+ if (checksum_byte != stored_checksum_byte) {
+ err = PAGE_ENCRYPTION_WRONG_KEY;
+ fprintf(stderr, "InnoDB: Corruption: Page is marked as encrypted\n"
+ "InnoDB: but decryption verification failed with error %d,"
+ " encryption key %d.\n",
+ err, (int)page_decryption_key);
+ fflush(stderr);
+ return err;
+ }
+
+ /* calc check sums and write to the buffer, if page is not of type PAGE_COMPRESSED.
+ * if the decryption is verified, it is assumed that the
+ * original page was restored, re-calculating the original
+ * checksums should be ok
+ */
+ do_check_sum(len, zip_size, buf);
+ } else {
+ /* page_compression uses BUF_NO_CHECKSUM_MAGIC as checksum */
+ mach_write_to_4(buf + FIL_PAGE_SPACE_OR_CHKSUM, BUF_NO_CHECKSUM_MAGIC);
+ }
+
+ srv_stats.pages_page_decrypted.inc();
+
+ return err;
+}
+
+