summaryrefslogtreecommitdiff
path: root/storage/innobase
diff options
context:
space:
mode:
Diffstat (limited to 'storage/innobase')
-rw-r--r--storage/innobase/CMakeLists.txt7
-rw-r--r--storage/innobase/btr/btr0btr.cc142
-rw-r--r--storage/innobase/btr/btr0cur.cc15
-rw-r--r--storage/innobase/btr/btr0scrub.cc898
-rw-r--r--storage/innobase/buf/buf0buf.cc280
-rw-r--r--storage/innobase/buf/buf0checksum.cc8
-rw-r--r--storage/innobase/buf/buf0dblwr.cc92
-rw-r--r--storage/innobase/buf/buf0flu.cc30
-rw-r--r--storage/innobase/buf/buf0rea.cc9
-rw-r--r--storage/innobase/dict/dict0dict.cc4
-rw-r--r--storage/innobase/dict/dict0load.cc101
-rw-r--r--storage/innobase/dict/dict0stats_bg.cc4
-rw-r--r--storage/innobase/fil/fil0crypt.cc2432
-rw-r--r--storage/innobase/fil/fil0fil.cc403
-rw-r--r--storage/innobase/fil/fil0pagecompress.cc25
-rw-r--r--storage/innobase/fsp/fsp0fsp.cc63
-rw-r--r--storage/innobase/handler/ha_innodb.cc298
-rw-r--r--storage/innobase/handler/ha_innodb.h4
-rw-r--r--storage/innobase/handler/handler0alter.cc7
-rw-r--r--storage/innobase/handler/i_s.cc592
-rw-r--r--storage/innobase/handler/i_s.h2
-rw-r--r--storage/innobase/include/btr0btr.h5
-rw-r--r--storage/innobase/include/btr0scrub.h166
-rw-r--r--storage/innobase/include/buf0buf.h72
-rw-r--r--storage/innobase/include/buf0buf.ic29
-rw-r--r--storage/innobase/include/dict0dict.h21
-rw-r--r--storage/innobase/include/dict0dict.ic37
-rw-r--r--storage/innobase/include/dict0mem.h57
-rw-r--r--storage/innobase/include/dict0pagecompress.ic26
-rw-r--r--storage/innobase/include/fil0fil.h316
-rw-r--r--storage/innobase/include/fil0pageencryption.h76
-rw-r--r--storage/innobase/include/fsp0fsp.h73
-rw-r--r--storage/innobase/include/fsp0pagecompress.ic2
-rw-r--r--storage/innobase/include/fsp0pageencryption.h66
-rw-r--r--storage/innobase/include/fsp0pageencryption.ic168
-rw-r--r--storage/innobase/include/fts0fts.h28
-rw-r--r--storage/innobase/include/log0crypt.h85
-rw-r--r--storage/innobase/include/log0log.h39
-rw-r--r--storage/innobase/include/log0recv.h5
-rw-r--r--storage/innobase/include/mtr0log.ic2
-rw-r--r--storage/innobase/include/mtr0mtr.h20
-rw-r--r--storage/innobase/include/os0file.h29
-rw-r--r--storage/innobase/include/os0file.ic8
-rw-r--r--storage/innobase/include/page0page.h2
-rw-r--r--storage/innobase/include/page0page.ic7
-rw-r--r--storage/innobase/include/srv0mon.h5
-rw-r--r--storage/innobase/include/srv0srv.h36
-rw-r--r--storage/innobase/include/univ.i3
-rw-r--r--storage/innobase/log/log0crypt.cc267
-rw-r--r--storage/innobase/log/log0log.cc169
-rw-r--r--storage/innobase/log/log0recv.cc44
-rw-r--r--storage/innobase/mtr/mtr0log.cc19
-rw-r--r--storage/innobase/mtr/mtr0mtr.cc33
-rw-r--r--storage/innobase/os/os0file.cc306
-rw-r--r--storage/innobase/page/page0page.cc10
-rw-r--r--storage/innobase/row/row0import.cc3
-rw-r--r--storage/innobase/row/row0mysql.cc38
-rw-r--r--storage/innobase/row/row0umod.cc11
-rw-r--r--storage/innobase/srv/srv0mon.cc24
-rw-r--r--storage/innobase/srv/srv0srv.cc41
-rw-r--r--storage/innobase/srv/srv0start.cc49
61 files changed, 7536 insertions, 277 deletions
diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt
index 964294a962d..e3e1e70feb7 100644
--- a/storage/innobase/CMakeLists.txt
+++ b/storage/innobase/CMakeLists.txt
@@ -336,6 +336,7 @@ SET(INNOBASE_SOURCES
btr/btr0btr.cc
btr/btr0cur.cc
btr/btr0pcur.cc
+ btr/btr0scrub.cc
btr/btr0sea.cc
btr/btr0defragment.cc
buf/buf0buddy.cc
@@ -360,7 +361,8 @@ SET(INNOBASE_SOURCES
eval/eval0eval.cc
eval/eval0proc.cc
fil/fil0fil.cc
- fil/fil0pagecompress.cc
+ fil/fil0pagecompress.cc
+ fil/fil0crypt.cc
fsp/fsp0fsp.cc
fut/fut0fut.cc
fut/fut0lst.cc
@@ -385,6 +387,7 @@ SET(INNOBASE_SOURCES
lock/lock0wait.cc
log/log0log.cc
log/log0recv.cc
+ log/log0crypt.cc
mach/mach0data.cc
mem/mem0mem.cc
mem/mem0pool.cc
@@ -469,5 +472,5 @@ ENDIF()
MYSQL_ADD_PLUGIN(innobase ${INNOBASE_SOURCES} STORAGE_ENGINE
MODULE_ONLY
MODULE_OUTPUT_NAME ha_innodb
- LINK_LIBRARIES ${ZLIB_LIBRARY} ${LINKER_SCRIPT})
+ LINK_LIBRARIES ${ZLIB_LIBRARY} ${LINKER_SCRIPT} pcre pcreposix mysys_ssl)
diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc
index ff27b470974..92539ce1524 100644
--- a/storage/innobase/btr/btr0btr.cc
+++ b/storage/innobase/btr/btr0btr.cc
@@ -1137,9 +1137,27 @@ btr_page_alloc_low(
reservation for free extents, and thus we know that a page can
be allocated: */
- return(fseg_alloc_free_page_general(
- seg_header, hint_page_no, file_direction,
- TRUE, mtr, init_mtr));
+ buf_block_t* block = fseg_alloc_free_page_general(
+ seg_header, hint_page_no, file_direction,
+ TRUE, mtr, init_mtr);
+
+#ifdef UNIV_DEBUG_SCRUBBING
+ if (block != NULL) {
+ fprintf(stderr,
+ "alloc %lu:%lu to index: %lu root: %lu\n",
+ buf_block_get_page_no(block),
+ buf_block_get_space(block),
+ index->id,
+ dict_index_get_page(index));
+ } else {
+ fprintf(stderr,
+ "failed alloc index: %lu root: %lu\n",
+ index->id,
+ dict_index_get_page(index));
+ }
+#endif /* UNIV_DEBUG_SCRUBBING */
+
+ return block;
}
/**************************************************************//**
@@ -1287,6 +1305,7 @@ btr_page_free_low(
dict_index_t* index, /*!< in: index tree */
buf_block_t* block, /*!< in: block to be freed, x-latched */
ulint level, /*!< in: page level */
+ bool blob, /*!< in: blob page */
mtr_t* mtr) /*!< in: mtr */
{
fseg_header_t* seg_header;
@@ -1299,6 +1318,76 @@ btr_page_free_low(
buf_block_modify_clock_inc(block);
btr_blob_dbg_assert_empty(index, buf_block_get_page_no(block));
+ if (blob) {
+ ut_a(level == 0);
+ }
+
+ bool scrub = srv_immediate_scrub_data_uncompressed;
+ /* scrub page */
+ if (scrub && blob) {
+ /* blob page: scrub entire page */
+ // TODO(jonaso): scrub only what is actually needed
+ page_t* page = buf_block_get_frame(block);
+ memset(page + PAGE_HEADER, 0,
+ UNIV_PAGE_SIZE - PAGE_HEADER);
+#ifdef UNIV_DEBUG_SCRUBBING
+ fprintf(stderr,
+ "btr_page_free_low: scrub blob page %lu/%lu\n",
+ buf_block_get_space(block),
+ buf_block_get_page_no(block));
+#endif /* UNIV_DEBUG_SCRUBBING */
+ } else if (scrub) {
+ /* scrub records on page */
+
+ /* TODO(jonaso): in theory we could clear full page
+ * but, since page still remains in buffer pool, and
+ * gets flushed etc. Lots of routines validates consistency
+ * of it. And in order to remain structurally consistent
+ * we clear each record by it own
+ *
+ * NOTE: The TODO below mentions removing page from buffer pool
+ * and removing redo entries, once that is done, clearing full
+ * pages should be possible
+ */
+ uint cnt = 0;
+ uint bytes = 0;
+ page_t* page = buf_block_get_frame(block);
+ mem_heap_t* heap = NULL;
+ ulint* offsets = NULL;
+ rec_t* rec = page_rec_get_next(page_get_infimum_rec(page));
+ while (!page_rec_is_supremum(rec)) {
+ offsets = rec_get_offsets(rec, index,
+ offsets, ULINT_UNDEFINED,
+ &heap);
+ uint size = rec_offs_data_size(offsets);
+ memset(rec, 0, size);
+ rec = page_rec_get_next(rec);
+ cnt++;
+ bytes += size;
+ }
+#ifdef UNIV_DEBUG_SCRUBBING
+ fprintf(stderr,
+ "btr_page_free_low: scrub %lu/%lu - "
+ "%u records %u bytes\n",
+ buf_block_get_space(block),
+ buf_block_get_page_no(block),
+ cnt, bytes);
+#endif /* UNIV_DEBUG_SCRUBBING */
+ if (heap) {
+ mem_heap_free(heap);
+ }
+ }
+
+#ifdef UNIV_DEBUG_SCRUBBING
+ if (scrub == false) {
+ fprintf(stderr,
+ "btr_page_free_low %lu/%lu blob: %u\n",
+ buf_block_get_space(block),
+ buf_block_get_page_no(block),
+ blob);
+ }
+#endif /* UNIV_DEBUG_SCRUBBING */
+
if (dict_index_is_ibuf(index)) {
btr_page_free_for_ibuf(index, block, mtr);
@@ -1314,6 +1403,14 @@ btr_page_free_low(
seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
}
+ if (scrub) {
+ /**
+ * Reset page type so that scrub thread won't try to scrub it
+ */
+ mlog_write_ulint(buf_block_get_frame(block) + FIL_PAGE_TYPE,
+ FIL_PAGE_TYPE_ALLOCATED, MLOG_2BYTES, mtr);
+ }
+
fseg_free_page(seg_header,
buf_block_get_space(block),
buf_block_get_page_no(block), mtr);
@@ -1343,7 +1440,7 @@ btr_page_free(
ulint level = btr_page_get_level(page, mtr);
ut_ad(fil_page_get_type(block->frame) == FIL_PAGE_INDEX);
- btr_page_free_low(index, block, level, mtr);
+ btr_page_free_low(index, block, level, false, mtr);
}
/**************************************************************//**
@@ -2284,9 +2381,14 @@ btr_root_raise_and_insert(
ibuf_reset_free_bits(new_block);
}
- /* Reposition the cursor to the child node */
- page_cur_search(new_block, index, tuple,
- PAGE_CUR_LE, page_cursor);
+ if (tuple != NULL) {
+ /* Reposition the cursor to the child node */
+ page_cur_search(new_block, index, tuple,
+ PAGE_CUR_LE, page_cursor);
+ } else {
+ /* Set cursor to first record on child node */
+ page_cur_set_before_first(new_block, page_cursor);
+ }
/* Split the child and insert tuple */
return(btr_page_split_and_insert(flags, cursor, offsets, heap,
@@ -2962,6 +3064,9 @@ function must always succeed, we cannot reverse it: therefore enough
free disk space (2 pages) must be guaranteed to be available before
this function is called.
+NOTE: jonaso added support for calling function with tuple == NULL
+which cause it to only split a page.
+
@return inserted record */
UNIV_INTERN
rec_t*
@@ -3039,7 +3144,7 @@ func_start:
half-page */
insert_left = FALSE;
- if (n_iterations > 0) {
+ if (tuple != NULL && n_iterations > 0) {
direction = FSP_UP;
hint_page_no = page_no + 1;
split_rec = btr_page_get_split_rec(cursor, tuple, n_ext);
@@ -3100,7 +3205,12 @@ func_start:
*offsets = rec_get_offsets(split_rec, cursor->index, *offsets,
n_uniq, heap);
- insert_left = cmp_dtuple_rec(tuple, split_rec, *offsets) < 0;
+ if (tuple != NULL) {
+ insert_left = cmp_dtuple_rec(
+ tuple, split_rec, *offsets) < 0;
+ } else {
+ insert_left = 1;
+ }
if (!insert_left && new_page_zip && n_iterations > 0) {
/* If a compressed page has already been split,
@@ -3134,8 +3244,10 @@ insert_empty:
on the appropriate half-page, we may release the tree x-latch.
We can then move the records after releasing the tree latch,
thus reducing the tree latch contention. */
-
- if (split_rec) {
+ if (tuple == NULL) {
+ insert_will_fit = 1;
+ }
+ else if (split_rec) {
insert_will_fit = !new_page_zip
&& btr_page_insert_fits(cursor, split_rec,
offsets, tuple, n_ext, heap);
@@ -3256,6 +3368,11 @@ insert_empty:
/* 6. The split and the tree modification is now completed. Decide the
page where the tuple should be inserted */
+ if (tuple == NULL) {
+ rec = NULL;
+ goto func_exit;
+ }
+
if (insert_left) {
insert_block = left_block;
} else {
@@ -3343,6 +3460,9 @@ func_exit:
ut_ad(page_validate(buf_block_get_frame(left_block), cursor->index));
ut_ad(page_validate(buf_block_get_frame(right_block), cursor->index));
+ if (tuple == NULL) {
+ ut_ad(rec == NULL);
+ }
ut_ad(!rec || rec_offs_validate(rec, cursor->index, *offsets));
return(rec);
}
diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc
index e68179a5c19..3992eda6e1c 100644
--- a/storage/innobase/btr/btr0cur.cc
+++ b/storage/innobase/btr/btr0cur.cc
@@ -4738,11 +4738,11 @@ alloc_another:
change when B-tree nodes are split or
merged. */
mlog_write_ulint(page
- + FIL_PAGE_FILE_FLUSH_LSN,
+ + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION,
space_id,
MLOG_4BYTES, &mtr);
mlog_write_ulint(page
- + FIL_PAGE_FILE_FLUSH_LSN + 4,
+ + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 4,
rec_page_no,
MLOG_4BYTES, &mtr);
@@ -4750,9 +4750,10 @@ alloc_another:
memset(page + page_zip_get_size(page_zip)
- c_stream.avail_out,
0, c_stream.avail_out);
- mlog_log_string(page + FIL_PAGE_FILE_FLUSH_LSN,
+ mlog_log_string(page
+ + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION,
page_zip_get_size(page_zip)
- - FIL_PAGE_FILE_FLUSH_LSN,
+ - FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION,
&mtr);
/* Copy the page to compressed storage,
because it will be flushed to disk
@@ -4925,7 +4926,7 @@ func_exit:
ut_ad(btr_blob_op_is_update(op));
for (i = 0; i < n_freed_pages; i++) {
- btr_page_free_low(index, freed_pages[i], 0, alloc_mtr);
+ btr_page_free_low(index, freed_pages[i], 0, true, alloc_mtr);
}
DBUG_EXECUTE_IF("btr_store_big_rec_extern",
@@ -5163,7 +5164,7 @@ btr_free_externally_stored_field(
}
next_page_no = mach_read_from_4(page + FIL_PAGE_NEXT);
- btr_page_free_low(index, ext_block, 0, &mtr);
+ btr_page_free_low(index, ext_block, 0, true, &mtr);
if (page_zip != NULL) {
mach_write_to_4(field_ref + BTR_EXTERN_PAGE_NO,
@@ -5194,7 +5195,7 @@ btr_free_externally_stored_field(
because we did not store it on the page (we save the
space overhead from an index page header. */
- btr_page_free_low(index, ext_block, 0, &mtr);
+ btr_page_free_low(index, ext_block, 0, true, &mtr);
mlog_write_ulint(field_ref + BTR_EXTERN_PAGE_NO,
next_page_no,
diff --git a/storage/innobase/btr/btr0scrub.cc b/storage/innobase/btr/btr0scrub.cc
new file mode 100644
index 00000000000..d53b478e429
--- /dev/null
+++ b/storage/innobase/btr/btr0scrub.cc
@@ -0,0 +1,898 @@
+// Copyright (c) 2014, Google Inc.
+
+/**************************************************//**
+@file btr/btr0scrub.cc
+Scrubbing of btree pages
+
+*******************************************************/
+
+#include "btr0btr.h"
+#include "btr0cur.h"
+#include "btr0scrub.h"
+#include "ibuf0ibuf.h"
+#include "fsp0fsp.h"
+#include "dict0dict.h"
+#include "mtr0mtr.h"
+
+/* used when trying to acquire dict-lock */
+UNIV_INTERN bool fil_crypt_is_closing(ulint space);
+
+/**
+* scrub data at delete time (e.g purge thread)
+*/
+my_bool srv_immediate_scrub_data_uncompressed = false;
+
+/**
+* background scrub uncompressed data
+*
+* if srv_immediate_scrub_data_uncompressed is enabled
+* this is only needed to handle "old" data
+*/
+my_bool srv_background_scrub_data_uncompressed = false;
+
+/**
+* backgrounds scrub compressed data
+*
+* reorganize compressed page for scrubbing
+* (only way to scrub compressed data)
+*/
+my_bool srv_background_scrub_data_compressed = false;
+
+/* check spaces once per hour */
+UNIV_INTERN uint srv_background_scrub_data_check_interval = (60 * 60);
+
+/* default to scrub spaces that hasn't been scrubbed in a week */
+UNIV_INTERN uint srv_background_scrub_data_interval = (7 * 24 * 60 * 60);
+
+/**
+* statistics for scrubbing by background threads
+*/
+static btr_scrub_stat_t scrub_stat;
+static ib_mutex_t scrub_stat_mutex;
+#ifdef UNIV_PFS_MUTEX
+UNIV_INTERN mysql_pfs_key_t scrub_stat_mutex_key;
+#endif
+
+#ifdef UNIV_DEBUG
+/**
+* srv_scrub_force_testing
+*
+* - force scrubbing using background threads even for uncompressed tables
+* - force pessimistic scrubbing (page split) even if not needed
+* (see test_pessimistic_scrub_pct)
+*/
+my_bool srv_scrub_force_testing = true;
+
+/**
+* Force pessimistic scrubbing in 50% of the cases (UNIV_DEBUG only)
+*/
+static int test_pessimistic_scrub_pct = 50;
+
+#endif
+static uint scrub_compression_level = page_zip_level;
+
+/**************************************************************//**
+Log a scrubbing failure */
+static
+void
+log_scrub_failure(
+/*===============*/
+ btr_scrub_t* scrub_data, /*!< in: data to store statistics on */
+ buf_block_t* block, /*!< in: block */
+ dberr_t err) /*!< in: error */
+{
+ const char* reason = "unknown";
+ switch(err) {
+ case DB_UNDERFLOW:
+ reason = "too few records on page";
+ scrub_data->scrub_stat.page_split_failures_underflow++;
+ break;
+ case DB_INDEX_CORRUPT:
+ reason = "unable to find index!";
+ scrub_data->scrub_stat.page_split_failures_missing_index++;
+ break;
+ case DB_OUT_OF_FILE_SPACE:
+ reason = "out of filespace";
+ scrub_data->scrub_stat.page_split_failures_out_of_filespace++;
+ break;
+ default:
+ ut_ad(0);
+ reason = "unknown";
+ scrub_data->scrub_stat.page_split_failures_unknown++;
+ }
+ fprintf(stderr,
+ "InnoDB: Warning: Failed to scrub page %lu in space %lu : %s\n",
+ buf_block_get_page_no(block),
+ buf_block_get_space(block),
+ reason);
+}
+
+/****************************************************************
+Lock dict mutexes */
+static
+bool
+btr_scrub_lock_dict_func(ulint space, bool lock_to_close_table,
+ const char * file, uint line)
+{
+ uint start = time(0);
+ uint last = start;
+
+ while (mutex_enter_nowait_func(&(dict_sys->mutex), file, line)) {
+ /* if we lock to close a table, we wait forever
+ * if we don't lock to close a table, we check if space
+ * is closing, and then instead give up
+ */
+ if (lock_to_close_table == false) {
+ if (fil_crypt_is_closing(space)) {
+ return false;
+ }
+ }
+ os_thread_sleep(250000);
+
+ uint now = time(0);
+ if (now >= last + 30) {
+ fprintf(stderr,
+ "WARNING: %s:%u waited %u seconds for"
+ " dict_sys lock, space: %lu"
+ " lock_to_close_table: %u\n",
+ file, line, now - start, space,
+ lock_to_close_table);
+
+ last = now;
+ }
+ }
+
+ ut_ad(mutex_own(&dict_sys->mutex));
+ return true;
+}
+
+#define btr_scrub_lock_dict(space, lock_to_close_table) \
+ btr_scrub_lock_dict_func(space, lock_to_close_table, __FILE__, __LINE__)
+
+/****************************************************************
+Unlock dict mutexes */
+static
+void
+btr_scrub_unlock_dict()
+{
+ dict_mutex_exit_for_mysql();
+}
+
+/****************************************************************
+Release reference to table
+*/
+static
+void
+btr_scrub_table_close(
+/*==================*/
+ dict_table_t* table) /*!< in: table */
+{
+ bool dict_locked = true;
+ bool try_drop = false;
+ table->stats_bg_flag &= ~BG_SCRUB_IN_PROGRESS;
+ dict_table_close(table, dict_locked, try_drop);
+}
+
+/****************************************************************
+Release reference to table
+*/
+static
+void
+btr_scrub_table_close_for_thread(
+ btr_scrub_t *scrub_data)
+{
+ if (scrub_data->current_table == NULL)
+ return;
+
+ bool lock_for_close = true;
+ btr_scrub_lock_dict(scrub_data->space, lock_for_close);
+
+ /* perform the actual closing */
+ btr_scrub_table_close(scrub_data->current_table);
+
+ btr_scrub_unlock_dict();
+
+ scrub_data->current_table = NULL;
+ scrub_data->current_index = NULL;
+}
+
+/**************************************************************//**
+Check if scrubbing is turned ON or OFF */
+static
+bool
+check_scrub_setting(
+/*=====================*/
+ btr_scrub_t* scrub_data) /*!< in: scrub data */
+{
+ if (scrub_data->compressed)
+ return srv_background_scrub_data_compressed;
+ else
+ return srv_background_scrub_data_uncompressed;
+}
+
+#define IBUF_INDEX_ID (DICT_IBUF_ID_MIN + IBUF_SPACE_ID)
+
+/**************************************************************//**
+Check if a page needs scrubbing */
+UNIV_INTERN
+int
+btr_page_needs_scrubbing(
+/*=====================*/
+ btr_scrub_t* scrub_data, /*!< in: scrub data */
+ buf_block_t* block, /*!< in: block to check, latched */
+ btr_scrub_page_allocation_status_t allocated) /*!< in: is block known
+ to be allocated */
+{
+ /**
+ * Check if scrubbing has been turned OFF.
+ *
+ * at start of space, we check if scrubbing is ON or OFF
+ * here we only check if scrubbing is turned OFF.
+ *
+ * Motivation is that it's only valueable to have a full table (space)
+ * scrubbed.
+ */
+ if (!check_scrub_setting(scrub_data)) {
+ bool before_value = scrub_data->scrubbing;
+ scrub_data->scrubbing = false;
+
+ if (before_value == true) {
+ /* we toggle scrubbing from on to off */
+ return BTR_SCRUB_TURNED_OFF;
+ }
+ }
+
+ if (scrub_data->scrubbing == false) {
+ return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE;
+ }
+
+ page_t* page = buf_block_get_frame(block);
+ uint type = fil_page_get_type(page);
+
+ if (allocated == BTR_SCRUB_PAGE_ALLOCATED) {
+ if (type != FIL_PAGE_INDEX) {
+ /* this function is called from fil-crypt-threads.
+ * these threads iterate all pages of all tablespaces
+ * and don't know about fil_page_type.
+ * But scrubbing is only needed for index-pages. */
+
+ /**
+ * NOTE: scrubbing is also needed for UNDO pages,
+ * but they are scrubbed at purge-time, since they are
+ * uncompressed
+ */
+
+ /* if encountering page type not needing scrubbing
+ release reference to table object */
+ return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE;
+ }
+
+ if (page_has_garbage(page) == false) {
+ /* no garbage (from deleted/shrunken records) */
+ return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE;
+ }
+
+ } else if (allocated == BTR_SCRUB_PAGE_FREE ||
+ allocated == BTR_SCRUB_PAGE_ALLOCATION_UNKNOWN) {
+
+ if (! (type == FIL_PAGE_INDEX ||
+ type == FIL_PAGE_TYPE_BLOB ||
+ type == FIL_PAGE_TYPE_ZBLOB ||
+ type == FIL_PAGE_TYPE_ZBLOB2)) {
+
+ /**
+ * If this is a dropped page, we also need to scrub
+ * BLOB pages
+ */
+
+ /* if encountering page type not needing scrubbing
+ release reference to table object */
+ return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE;
+ }
+ }
+
+ if (btr_page_get_index_id(page) == IBUF_INDEX_ID) {
+ /* skip ibuf */
+ return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE;
+ }
+
+ return BTR_SCRUB_PAGE;
+}
+
+/****************************************************************
+Handle a skipped page
+*/
+UNIV_INTERN
+void
+btr_scrub_skip_page(
+/*==================*/
+ btr_scrub_t* scrub_data, /*!< in: data with scrub state */
+ int needs_scrubbing) /*!< in: return code from
+ btr_page_needs_scrubbing */
+{
+ switch(needs_scrubbing) {
+ case BTR_SCRUB_SKIP_PAGE:
+ /* nothing todo */
+ return;
+ case BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE:
+ btr_scrub_table_close_for_thread(scrub_data);
+ return;
+ case BTR_SCRUB_TURNED_OFF:
+ case BTR_SCRUB_SKIP_PAGE_AND_COMPLETE_SPACE:
+ btr_scrub_complete_space(scrub_data);
+ return;
+ }
+
+ /* unknown value. should not happen */
+ ut_a(0);
+}
+
+/****************************************************************
+Try to scrub a page using btr_page_reorganize_low
+return DB_SUCCESS on success or DB_OVERFLOW on failure */
+static
+dberr_t
+btr_optimistic_scrub(
+/*==================*/
+ btr_scrub_t* scrub_data, /*!< in: data with scrub state */
+ buf_block_t* block, /*!< in: block to scrub */
+ dict_index_t* index, /*!< in: index */
+ mtr_t* mtr) /*!< in: mtr */
+{
+#ifdef UNIV_DEBUG
+ if (srv_scrub_force_testing &&
+ page_get_n_recs(buf_block_get_frame(block)) > 2 &&
+ (rand() % 100) < test_pessimistic_scrub_pct) {
+
+ fprintf(stderr,
+ "scrub: simulate btr_page_reorganize failed %lu:%lu "
+ " table: %llu:%s index: %llu:%s get_n_recs(): %lu\n",
+ buf_block_get_space(block),
+ buf_block_get_page_no(block),
+ (ulonglong)scrub_data->current_table->id,
+ scrub_data->current_table->name,
+ (ulonglong)scrub_data->current_index->id,
+ scrub_data->current_index->name,
+ page_get_n_recs(buf_block_get_frame(block)));
+ return DB_OVERFLOW;
+ }
+#endif
+
+ page_cur_t cur;
+ page_cur_set_before_first(block, &cur);
+ bool recovery = false;
+ if (!btr_page_reorganize_low(recovery, scrub_compression_level,
+ &cur, index, mtr)) {
+ return DB_OVERFLOW;
+ }
+
+ /* We play safe and reset the free bits */
+ if (!dict_index_is_clust(index) &&
+ page_is_leaf(buf_block_get_frame(block))) {
+
+ ibuf_reset_free_bits(block);
+ }
+
+ scrub_data->scrub_stat.page_reorganizations++;
+ return DB_SUCCESS;
+}
+
+/****************************************************************
+Try to scrub a page by splitting it
+return DB_SUCCESS on success
+DB_UNDERFLOW if page has too few records
+DB_OUT_OF_FILE_SPACE if we can't find space for split */
+static
+dberr_t
+btr_pessimistic_scrub(
+/*==================*/
+ btr_scrub_t* scrub_data, /*!< in: data with scrub state */
+ buf_block_t* block, /*!< in: block to scrub */
+ dict_index_t* index, /*!< in: index */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ page_t* page = buf_block_get_frame(block);
+ if (page_get_n_recs(page) < 2) {
+ /**
+ * There is no way we can split a page with < 2 records
+ */
+ log_scrub_failure(scrub_data, block, DB_UNDERFLOW);
+ return DB_UNDERFLOW;
+ }
+
+ /**
+ * Splitting page needs new space, allocate it here
+ * so that splitting won't fail due to this */
+ ulint n_extents = 3;
+ ulint n_reserved = 0;
+ if (!fsp_reserve_free_extents(&n_reserved, index->space,
+ n_extents, FSP_NORMAL, mtr)) {
+ log_scrub_failure(scrub_data, block,
+ DB_OUT_OF_FILE_SPACE);
+ return DB_OUT_OF_FILE_SPACE;
+ }
+
+ /* read block variables */
+ ulint space = buf_block_get_space(block);
+ ulint page_no = buf_block_get_page_no(block);
+ ulint zip_size = buf_block_get_zip_size(block);
+ ulint left_page_no = btr_page_get_prev(page, mtr);
+ ulint right_page_no = btr_page_get_next(page, mtr);
+
+ /**
+ * When splitting page, we need X-latches on left/right brothers
+ * see e.g btr_cur_latch_leaves
+ */
+
+ if (left_page_no != FIL_NULL) {
+ /**
+ * pages needs to be locked left-to-right, release block
+ * and re-lock. We still have x-lock on index
+ * so this should be safe
+ */
+ mtr_release_buf_page_at_savepoint(mtr, scrub_data->savepoint,
+ block);
+
+ buf_block_t* get_block = btr_block_get(
+ space, zip_size, left_page_no,
+ RW_X_LATCH, index, mtr);
+ get_block->check_index_page_at_flush = TRUE;
+
+ /**
+ * Refetch block and re-initialize page
+ */
+ block = btr_block_get(
+ space, zip_size, page_no,
+ RW_X_LATCH, index, mtr);
+
+ page = buf_block_get_frame(block);
+
+ /**
+ * structure should be unchanged
+ */
+ ut_a(left_page_no == btr_page_get_prev(page, mtr));
+ ut_a(right_page_no == btr_page_get_next(page, mtr));
+ }
+
+ if (right_page_no != FIL_NULL) {
+ buf_block_t* get_block = btr_block_get(
+ space, zip_size, right_page_no,
+ RW_X_LATCH, index, mtr);
+ get_block->check_index_page_at_flush = TRUE;
+ }
+
+ /* arguments to btr_page_split_and_insert */
+ mem_heap_t* heap = NULL;
+ dtuple_t* entry = NULL;
+ ulint* offsets = NULL;
+ ulint n_ext = 0;
+ ulint flags = BTR_MODIFY_TREE;
+
+ /**
+ * position a cursor on first record on page
+ */
+ rec_t* rec = page_rec_get_next(page_get_infimum_rec(page));
+ btr_cur_t cursor;
+ btr_cur_position(index, rec, block, &cursor);
+
+ /**
+ * call split page with NULL as argument for entry to insert
+ */
+ if (dict_index_get_page(index) == buf_block_get_page_no(block)) {
+ /* The page is the root page
+ * NOTE: ibuf_reset_free_bits is called inside
+ * btr_root_raise_and_insert */
+ rec = btr_root_raise_and_insert(
+ flags, &cursor, &offsets, &heap, entry, n_ext, mtr);
+ } else {
+ /* We play safe and reset the free bits
+ * NOTE: need to call this prior to btr_page_split_and_insert */
+ if (!dict_index_is_clust(index) &&
+ page_is_leaf(buf_block_get_frame(block))) {
+
+ ibuf_reset_free_bits(block);
+ }
+
+ rec = btr_page_split_and_insert(
+ flags, &cursor, &offsets, &heap, entry, n_ext, mtr);
+ }
+
+ if (heap) {
+ mem_heap_free(heap);
+ }
+
+ if (n_reserved > 0) {
+ fil_space_release_free_extents(index->space, n_reserved);
+ }
+
+ scrub_data->scrub_stat.page_splits++;
+ return DB_SUCCESS;
+}
+
+/****************************************************************
+Location index by id for a table
+return index or NULL */
+static
+dict_index_t*
+find_index(
+/*========*/
+ dict_table_t* table, /*!< in: table */
+ index_id_t index_id) /*!< in: index id */
+{
+ if (table != NULL) {
+ dict_index_t* index = dict_table_get_first_index(table);
+ while (index != NULL) {
+ if (index->id == index_id)
+ return index;
+ index = dict_table_get_next_index(index);
+ }
+ }
+
+ return NULL;
+}
+
+/****************************************************************
+Check if table should be scrubbed
+*/
+static
+bool
+btr_scrub_table_needs_scrubbing(
+/*============================*/
+ dict_table_t* table) /*!< in: table */
+{
+ if (table == NULL)
+ return false;
+
+ if (table->stats_bg_flag & BG_STAT_SHOULD_QUIT) {
+ return false;
+ }
+
+ if (table->to_be_dropped) {
+ return false;
+ }
+
+ if (table->corrupted) {
+ return false;
+ }
+
+ return true;
+}
+
+/****************************************************************
+Check if index should be scrubbed
+*/
+static
+bool
+btr_scrub_index_needs_scrubbing(
+/*============================*/
+ dict_index_t* index) /*!< in: index */
+{
+ if (index == NULL)
+ return false;
+
+ if (dict_index_is_ibuf(index)) {
+ return false;
+ }
+
+ if (dict_index_is_online_ddl(index)) {
+ return false;
+ }
+
+ return true;
+}
+
+/****************************************************************
+Get table and index and store it on scrub_data
+*/
+static
+void
+btr_scrub_get_table_and_index(
+/*=========================*/
+ btr_scrub_t* scrub_data, /*!< in/out: scrub data */
+ index_id_t index_id) /*!< in: index id */
+{
+ /* first check if it's an index to current table */
+ scrub_data->current_index = find_index(scrub_data->current_table,
+ index_id);
+
+ if (scrub_data->current_index != NULL) {
+ /* yes it was */
+ return;
+ }
+
+ if (!btr_scrub_lock_dict(scrub_data->space, false)) {
+ btr_scrub_complete_space(scrub_data);
+ return;
+ }
+
+ /* close current table (if any) */
+ if (scrub_data->current_table != NULL) {
+ btr_scrub_table_close(scrub_data->current_table);
+ scrub_data->current_table = NULL;
+ }
+
+ /* argument to dict_table_open_on_index_id */
+ bool dict_locked = true;
+
+ /* open table based on index_id */
+ dict_table_t* table = dict_table_open_on_index_id(
+ index_id,
+ dict_locked);
+
+ if (table != NULL) {
+ /* mark table as being scrubbed */
+ table->stats_bg_flag |= BG_SCRUB_IN_PROGRESS;
+
+ if (!btr_scrub_table_needs_scrubbing(table)) {
+ btr_scrub_table_close(table);
+ btr_scrub_unlock_dict();
+ return;
+ }
+ }
+
+ btr_scrub_unlock_dict();
+ scrub_data->current_table = table;
+ scrub_data->current_index = find_index(table, index_id);
+}
+
+/****************************************************************
+Handle free page */
+UNIV_INTERN
+int
+btr_scrub_free_page(
+/*====================*/
+ btr_scrub_t* scrub_data, /*!< in/out: scrub data */
+ buf_block_t* block, /*!< in: block to scrub */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ // TODO(jonaso): scrub only what is actually needed
+
+ {
+ /* note: perform both the memset and setting of FIL_PAGE_TYPE
+ * wo/ logging. so that if we crash before page is flushed
+ * it will be found by scrubbing thread again
+ */
+ memset(buf_block_get_frame(block) + PAGE_HEADER, 0,
+ UNIV_PAGE_SIZE - PAGE_HEADER);
+
+ mach_write_to_2(buf_block_get_frame(block) + FIL_PAGE_TYPE,
+ FIL_PAGE_TYPE_ALLOCATED);
+ }
+
+ ulint compact = 1;
+ page_create(block, mtr, compact);
+
+ mtr_commit(mtr);
+
+ /* page doesn't need further processing => SKIP
+ * and close table/index so that we don't keep references too long */
+ return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE;
+}
+
+/****************************************************************
+Recheck if a page needs scrubbing, and if it does load appropriate
+table and index */
+UNIV_INTERN
+int
+btr_scrub_recheck_page(
+/*====================*/
+ btr_scrub_t* scrub_data, /*!< inut: scrub data */
+ buf_block_t* block, /*!< in: block */
+ btr_scrub_page_allocation_status_t allocated, /*!< in: is block
+ allocated or free */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ /* recheck if page needs scrubbing (knowing allocation status) */
+ int needs_scrubbing = btr_page_needs_scrubbing(
+ scrub_data, block, allocated);
+
+ if (needs_scrubbing != BTR_SCRUB_PAGE) {
+ mtr_commit(mtr);
+ return needs_scrubbing;
+ }
+
+ if (allocated == BTR_SCRUB_PAGE_FREE) {
+ /** we don't need to load table/index for free pages
+ * so scrub directly here */
+ /* mtr is committed inside btr_scrub_page_free */
+ return btr_scrub_free_page(scrub_data,
+ block,
+ mtr);
+ }
+
+ page_t* page = buf_block_get_frame(block);
+ index_id_t index_id = btr_page_get_index_id(page);
+
+ if (scrub_data->current_index == NULL ||
+ scrub_data->current_index->id != index_id) {
+
+ /**
+ * commit mtr (i.e release locks on block)
+ * and try to get table&index potentially loading it
+ * from disk
+ */
+ mtr_commit(mtr);
+ btr_scrub_get_table_and_index(scrub_data, index_id);
+ } else {
+ /* we already have correct index
+ * commit mtr so that we can lock index before fetching page
+ */
+ mtr_commit(mtr);
+ }
+
+ /* check if table is about to be dropped */
+ if (!btr_scrub_table_needs_scrubbing(scrub_data->current_table)) {
+ return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE;
+ }
+
+ /* check if index is scrubbable */
+ if (!btr_scrub_index_needs_scrubbing(scrub_data->current_index)) {
+ return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE;
+ }
+
+ mtr_start(mtr);
+ mtr_x_lock(dict_index_get_lock(scrub_data->current_index), mtr);
+ /** set savepoint for X-latch of block */
+ scrub_data->savepoint = mtr_set_savepoint(mtr);
+ return BTR_SCRUB_PAGE;
+}
+
+/****************************************************************
+Perform actual scrubbing of page */
+UNIV_INTERN
+int
+btr_scrub_page(
+/*============*/
+ btr_scrub_t* scrub_data, /*!< in/out: scrub data */
+ buf_block_t* block, /*!< in: block */
+ btr_scrub_page_allocation_status_t allocated, /*!< in: is block
+ allocated or free */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ /* recheck if page needs scrubbing (knowing allocation status) */
+ int needs_scrubbing = btr_page_needs_scrubbing(
+ scrub_data, block, allocated);
+ if (needs_scrubbing != BTR_SCRUB_PAGE) {
+ mtr_commit(mtr);
+ return needs_scrubbing;
+ }
+
+ if (allocated == BTR_SCRUB_PAGE_FREE) {
+ /* mtr is committed inside btr_scrub_page_free */
+ return btr_scrub_free_page(scrub_data,
+ block,
+ mtr);
+ }
+
+ /* check that table/index still match now that they are loaded */
+
+ if (scrub_data->current_table->space != scrub_data->space) {
+ /* this is truncate table */
+ mtr_commit(mtr);
+ return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE;
+ }
+
+ if (scrub_data->current_index->space != scrub_data->space) {
+ /* this is truncate table */
+ mtr_commit(mtr);
+ return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE;
+ }
+
+ if (scrub_data->current_index->page == FIL_NULL) {
+ /* this is truncate table */
+ mtr_commit(mtr);
+ return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE;
+ }
+
+ if (btr_page_get_index_id(buf_block_get_frame(block)) !=
+ scrub_data->current_index->id) {
+ /* page has been reallocated to new index */
+ mtr_commit(mtr);
+ return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE;
+ }
+
+ /* check if I can scrub (reorganize) page wo/ overflow */
+ if (btr_optimistic_scrub(scrub_data,
+ block,
+ scrub_data->current_index,
+ mtr) != DB_SUCCESS) {
+
+ /**
+ * Can't reorganize page...need to split it
+ */
+ btr_pessimistic_scrub(scrub_data,
+ block,
+ scrub_data->current_index,
+ mtr);
+ }
+ mtr_commit(mtr);
+
+ return BTR_SCRUB_SKIP_PAGE; // no further action needed
+}
+
+/**************************************************************//**
+Start iterating a space */
+UNIV_INTERN
+bool
+btr_scrub_start_space(
+/*===================*/
+ ulint space, /*!< in: space */
+ btr_scrub_t* scrub_data) /*!< in/out: scrub data */
+{
+ scrub_data->space = space;
+ scrub_data->current_table = NULL;
+ scrub_data->current_index = NULL;
+
+ scrub_data->compressed = fil_space_get_zip_size(space) > 0;
+ scrub_data->scrubbing = check_scrub_setting(scrub_data);
+ return scrub_data->scrubbing;
+}
+
+/***********************************************************************
+Update global statistics with thread statistics */
+static
+void
+btr_scrub_update_total_stat(btr_scrub_t *scrub_data)
+{
+ mutex_enter(&scrub_stat_mutex);
+ scrub_stat.page_reorganizations +=
+ scrub_data->scrub_stat.page_reorganizations;
+ scrub_stat.page_splits +=
+ scrub_data->scrub_stat.page_splits;
+ scrub_stat.page_split_failures_underflow +=
+ scrub_data->scrub_stat.page_split_failures_underflow;
+ scrub_stat.page_split_failures_out_of_filespace +=
+ scrub_data->scrub_stat.page_split_failures_out_of_filespace;
+ scrub_stat.page_split_failures_missing_index +=
+ scrub_data->scrub_stat.page_split_failures_missing_index;
+ scrub_stat.page_split_failures_unknown +=
+ scrub_data->scrub_stat.page_split_failures_unknown;
+ mutex_exit(&scrub_stat_mutex);
+
+ // clear stat
+ memset(&scrub_data->scrub_stat, 0, sizeof(scrub_data->scrub_stat));
+}
+
+/**************************************************************//**
+Complete iterating a space */
+UNIV_INTERN
+bool
+btr_scrub_complete_space(
+/*=====================*/
+ btr_scrub_t* scrub_data) /*!< in/out: scrub data */
+{
+ btr_scrub_table_close_for_thread(scrub_data);
+ btr_scrub_update_total_stat(scrub_data);
+ return scrub_data->scrubbing;
+}
+
+/*********************************************************************
+Return scrub statistics */
+void
+btr_scrub_total_stat(btr_scrub_stat_t *stat)
+{
+ mutex_enter(&scrub_stat_mutex);
+ *stat = scrub_stat;
+ mutex_exit(&scrub_stat_mutex);
+}
+
+/*********************************************************************
+Init global variables */
+UNIV_INTERN
+void
+btr_scrub_init()
+{
+ mutex_create(scrub_stat_mutex_key,
+ &scrub_stat_mutex, SYNC_NO_ORDER_CHECK);
+
+ memset(&scrub_stat, 0, sizeof(scrub_stat));
+}
+
+/*********************************************************************
+Cleanup globals */
+UNIV_INTERN
+void
+btr_scrub_cleanup()
+{
+ mutex_free(&scrub_stat_mutex);
+}
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc
index f4ad7875bea..8b9f5a49e7d 100644
--- a/storage/innobase/buf/buf0buf.cc
+++ b/storage/innobase/buf/buf0buf.cc
@@ -54,7 +54,9 @@ Created 11/5/1995 Heikki Tuuri
#include "page0zip.h"
#include "srv0mon.h"
#include "buf0checksum.h"
-
+#include "fil0pageencryption.h"
+#include "fil0pagecompress.h"
+#include "ut0byte.h"
#include <new>
/*
@@ -502,12 +504,13 @@ buf_page_is_corrupted(
ulint zip_size) /*!< in: size of compressed page;
0 for uncompressed pages */
{
+ ulint page_encrypted = fil_page_is_compressed_encrypted(read_buf) || fil_page_is_encrypted(read_buf);
ulint checksum_field1;
ulint checksum_field2;
ibool crc32_inited = FALSE;
ib_uint32_t crc32 = ULINT32_UNDEFINED;
- if (!zip_size
+ if (!page_encrypted && !zip_size
&& memcmp(read_buf + FIL_PAGE_LSN + 4,
read_buf + UNIV_PAGE_SIZE
- FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) {
@@ -560,6 +563,9 @@ buf_page_is_corrupted(
if (zip_size) {
return(!page_zip_verify_checksum(read_buf, zip_size));
}
+ if (page_encrypted) {
+ return (FALSE);
+ }
checksum_field1 = mach_read_from_4(
read_buf + FIL_PAGE_SPACE_OR_CHKSUM);
@@ -995,6 +1001,11 @@ buf_block_init(
block->page.state = BUF_BLOCK_NOT_USED;
block->page.buf_fix_count = 0;
block->page.io_fix = BUF_IO_NONE;
+ block->page.crypt_buf = NULL;
+ block->page.crypt_buf_free = NULL;
+ block->page.comp_buf = NULL;
+ block->page.comp_buf_free = NULL;
+ block->page.key_version = 0;
block->modify_clock = 0;
@@ -3374,11 +3385,13 @@ page is not in the buffer pool it is not loaded and NULL is returned.
Suitable for using when holding the lock_sys_t::mutex.
@return pointer to a page or NULL */
UNIV_INTERN
-const buf_block_t*
+buf_block_t*
buf_page_try_get_func(
/*==================*/
ulint space_id,/*!< in: tablespace id */
ulint page_no,/*!< in: page number */
+ ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
+ bool possibly_freed,
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
mtr_t* mtr) /*!< in: mini-transaction */
@@ -3416,8 +3429,12 @@ buf_page_try_get_func(
buf_block_buf_fix_inc(block, file, line);
mutex_exit(&block->mutex);
- fix_type = MTR_MEMO_PAGE_S_FIX;
- success = rw_lock_s_lock_nowait(&block->lock, file, line);
+ if (rw_latch == RW_S_LATCH) {
+ fix_type = MTR_MEMO_PAGE_S_FIX;
+ success = rw_lock_s_lock_nowait(&block->lock, file, line);
+ } else {
+ success = false;
+ }
if (!success) {
/* Let us try to get an X-latch. If the current thread
@@ -3442,9 +3459,11 @@ buf_page_try_get_func(
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
- mutex_enter(&block->mutex);
- ut_a(!block->page.file_page_was_freed);
- mutex_exit(&block->mutex);
+ if (!possibly_freed) {
+ mutex_enter(&block->mutex);
+ ut_a(!block->page.file_page_was_freed);
+ mutex_exit(&block->mutex);
+ }
#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
@@ -3474,6 +3493,12 @@ buf_page_init_low(
bpage->newest_modification = 0;
bpage->oldest_modification = 0;
bpage->write_size = 0;
+ bpage->crypt_buf = NULL;
+ bpage->crypt_buf_free = NULL;
+ bpage->comp_buf = NULL;
+ bpage->comp_buf_free = NULL;
+ bpage->key_version = 0;
+
HASH_INVALIDATE(bpage, hash);
#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
bpage->file_page_was_freed = FALSE;
@@ -3987,7 +4012,7 @@ buf_page_create(
Then InnoDB could in a crash recovery print a big, false, corruption
warning if the stamp contains an lsn bigger than the ib_logfile lsn. */
- memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
+ memset(frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(++buf_dbg_counter % 5771 || buf_validate());
@@ -4187,6 +4212,16 @@ buf_page_io_complete(
ulint read_space_id;
byte* frame;
+ if (!buf_page_decrypt_after_read(bpage)) {
+ /* encryption error! */
+ if (buf_page_get_zip_size(bpage)) {
+ frame = bpage->zip.data;
+ } else {
+ frame = ((buf_block_t*) bpage)->frame;
+ }
+ goto corrupt;
+ }
+
if (buf_page_get_zip_size(bpage)) {
frame = bpage->zip.data;
buf_pool->n_pend_unzip++;
@@ -4327,6 +4362,9 @@ corrupt:
bpage->offset, buf_page_get_zip_size(bpage),
TRUE);
}
+ } else {
+ /* io_type == BUF_IO_WRITE */
+ buf_page_encrypt_after_write(bpage);
}
buf_pool_mutex_enter(buf_pool);
@@ -5561,3 +5599,227 @@ buf_page_init_for_backup_restore(
}
}
#endif /* !UNIV_HOTBACKUP */
+
+/********************************************************************//**
+Encrypts a buffer page right before it's flushed to disk
+*/
+byte*
+buf_page_encrypt_before_write(
+/*==========================*/
+ buf_page_t* bpage, /*!< in/out: buffer page to be flushed */
+ const byte* src_frame) /*!< in: src frame */
+{
+ if (srv_encrypt_tables == FALSE) {
+ /* Encryption is disabled */
+ return const_cast<byte*>(src_frame);
+ }
+
+ if (bpage->offset == 0) {
+ /* Page 0 of a tablespace is not encrypted */
+ ut_ad(bpage->key_version == 0);
+ return const_cast<byte*>(src_frame);
+ }
+
+ if (fil_space_check_encryption_write(bpage->space) == false) {
+ /* An unencrypted table */
+ bpage->key_version = 0;
+ return const_cast<byte*>(src_frame);
+ }
+
+ if (bpage->space == TRX_SYS_SPACE && bpage->offset == TRX_SYS_PAGE_NO) {
+ /* don't encrypt page as it contains address to dblwr buffer */
+ bpage->key_version = 0;
+ return const_cast<byte*>(src_frame);
+ }
+
+ ulint zip_size = buf_page_get_zip_size(bpage);
+ ulint page_size = (zip_size) ? zip_size : UNIV_PAGE_SIZE;
+
+ /**
+ * TODO(jonaso): figure out more clever malloc strategy
+ *
+ * This implementation does a malloc/free per iop for encrypted
+ * tablespaces. Alternative strategies that have been considered are
+ *
+ * 1) use buf_block_alloc (i.e alloc from buffer pool)
+ * this does not work as buf_block_alloc will then be called
+ * when needing to flush a page, which might be triggered
+ * due to shortage of memory in buffer pool
+ * 2) allocate a buffer per fil_node_t
+ * this would break abstraction layers and has therfore not been
+ * considered a lot.
+ */
+
+ if (bpage->crypt_buf_free == NULL) {
+ bpage->crypt_buf_free = (byte*)malloc(page_size*2);
+ // TODO: Is 4k aligment enough ?
+ bpage->crypt_buf = (byte *)ut_align(bpage->crypt_buf_free, page_size);
+ }
+
+ byte *dst_frame = bpage->crypt_buf;
+
+ if (!fil_space_is_page_compressed(bpage->space)) {
+ // encrypt page content
+ fil_space_encrypt(bpage->space, bpage->offset,
+ bpage->newest_modification,
+ src_frame, zip_size, dst_frame, 0);
+
+ unsigned key_version =
+ mach_read_from_4(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
+ ut_ad(key_version == 0 || key_version >= bpage->key_version);
+ bpage->key_version = key_version;
+
+ // return dst_frame which will be written
+ return dst_frame;
+ } else {
+ // We do compression and encryption later on os0file.cc
+ dst_frame = (byte *)src_frame;
+ }
+
+ // return dst_frame which will be written
+ return dst_frame;
+}
+
+/********************************************************************//**
+Release memory after encrypted page has been written to disk
+*/
+ibool
+buf_page_encrypt_after_write(
+/*=========================*/
+ buf_page_t* bpage) /*!< in/out: buffer page flushed */
+{
+ if (bpage->crypt_buf_free != NULL) {
+ free(bpage->crypt_buf_free);
+ bpage->crypt_buf_free = NULL;
+ bpage->crypt_buf = NULL;
+ }
+
+ if (bpage->comp_buf_free != NULL) {
+ free(bpage->comp_buf_free);
+ bpage->comp_buf_free = NULL;
+ bpage->comp_buf = NULL;
+ }
+
+ return (TRUE);
+}
+
+/********************************************************************//**
+Allocates memory to read in an encrypted page
+*/
+byte*
+buf_page_decrypt_before_read(
+/*=========================*/
+ buf_page_t* bpage, /*!< in/out: buffer page to be read */
+ ulint zip_size) /*!< in: compressed page size, or 0 */
+{
+ ulint size = (zip_size) ? zip_size : UNIV_PAGE_SIZE;
+
+ /*
+ Here we only need to allocate space for not header pages
+ in case of file space encryption. Table encryption is handled
+ later.
+ */
+ if (!srv_encrypt_tables || bpage->offset == 0 ||
+ fil_space_check_encryption_read(bpage->space) == false)
+ return zip_size ? bpage->zip.data : ((buf_block_t*) bpage)->frame;
+
+ if (bpage->crypt_buf_free == NULL)
+ {
+ // allocate buffer to read data into
+ bpage->crypt_buf_free = (byte*)malloc(size*2);
+ // TODO: Is 4K aligment enough ?
+ bpage->crypt_buf = (byte*)ut_align(bpage->crypt_buf_free, size);
+ }
+ return bpage->crypt_buf;
+}
+
+/********************************************************************//**
+Decrypt page after it has been read from disk
+*/
+ibool
+buf_page_decrypt_after_read(
+/*========================*/
+ buf_page_t* bpage) /*!< in/out: buffer page read from disk */
+{
+ ut_ad(bpage->key_version == 0);
+ ulint zip_size = buf_page_get_zip_size(bpage);
+ ulint size = (zip_size) ? zip_size : UNIV_PAGE_SIZE;
+
+ byte* dst_frame = (zip_size) ? bpage->zip.data :
+ ((buf_block_t*) bpage)->frame;
+
+ if (bpage->offset == 0) {
+ /* File header pages are not encrypted */
+ ut_a(bpage->crypt_buf == NULL);
+ return (TRUE);
+ }
+
+
+ const byte* src_frame = bpage->crypt_buf != NULL ?
+ bpage->crypt_buf : dst_frame;
+
+ unsigned key_version =
+ mach_read_from_4(src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
+
+ bool page_compressed_encrypted = fil_page_is_compressed_encrypted(dst_frame);
+
+ if (key_version == 0) {
+ /* the page we read is unencrypted */
+ if (dst_frame != src_frame) {
+ /* but we had allocated a crypt_buf */
+ // TODO: Can this be avoided ?
+ memcpy(dst_frame, src_frame, size);
+ }
+ } else {
+ /* the page we read is encrypted */
+ if (dst_frame == src_frame) {
+ /* but we had NOT allocated a crypt buf
+ * malloc a buffer, copy page to it
+ * and then decrypt from that into real page*/
+ bpage->crypt_buf_free = (byte *)malloc(UNIV_PAGE_SIZE*2);
+ // TODO: is 4k aligment enough ?
+ src_frame = bpage->crypt_buf = (byte*)ut_align(bpage->crypt_buf_free, UNIV_PAGE_SIZE);
+ memcpy(bpage->crypt_buf, dst_frame, size);
+ }
+
+ /* decrypt from src_frame to dst_frame */
+ fil_space_decrypt(bpage->space,
+ src_frame, size, dst_frame);
+
+ /* decompress from dst_frame to comp_buf and then copy to
+ buffer pool */
+ if (page_compressed_encrypted) {
+ if (bpage->comp_buf_free == NULL) {
+ bpage->comp_buf_free = (byte *)malloc(UNIV_PAGE_SIZE*2);
+ // TODO: is 4k aligment enough ?
+ bpage->comp_buf = (byte*)ut_align(bpage->comp_buf_free, UNIV_PAGE_SIZE);
+ }
+
+ fil_decompress_page(bpage->comp_buf, dst_frame, size, NULL);
+ }
+ }
+ bpage->key_version = key_version;
+
+ if (bpage->crypt_buf_free != NULL) {
+ // free temp page
+ free(bpage->crypt_buf_free);
+ bpage->crypt_buf = NULL;
+ bpage->crypt_buf_free = NULL;
+ }
+ return (TRUE);
+}
+
+/********************************************************************//**
+Release memory allocated for decryption
+*/
+void
+buf_page_decrypt_cleanup(
+/*=====================*/
+ buf_page_t* bpage) /*!< in/out: buffer page */
+{
+ if (bpage->crypt_buf != NULL) {
+ free(bpage->crypt_buf_free);
+ bpage->crypt_buf = NULL;
+ bpage->crypt_buf_free = NULL;
+ }
+}
diff --git a/storage/innobase/buf/buf0checksum.cc b/storage/innobase/buf/buf0checksum.cc
index 4ba65d6f2d0..aa02cda2937 100644
--- a/storage/innobase/buf/buf0checksum.cc
+++ b/storage/innobase/buf/buf0checksum.cc
@@ -64,7 +64,8 @@ buf_calc_page_crc32(
there we store the old formula checksum. */
checksum = ut_crc32(page + FIL_PAGE_OFFSET,
- FIL_PAGE_FILE_FLUSH_LSN - FIL_PAGE_OFFSET)
+ FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION
+ - FIL_PAGE_OFFSET)
^ ut_crc32(page + FIL_PAGE_DATA,
UNIV_PAGE_SIZE - FIL_PAGE_DATA
- FIL_PAGE_END_LSN_OLD_CHKSUM);
@@ -94,7 +95,8 @@ buf_calc_page_new_checksum(
there we store the old formula checksum. */
checksum = ut_fold_binary(page + FIL_PAGE_OFFSET,
- FIL_PAGE_FILE_FLUSH_LSN - FIL_PAGE_OFFSET)
+ FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION
+ - FIL_PAGE_OFFSET)
+ ut_fold_binary(page + FIL_PAGE_DATA,
UNIV_PAGE_SIZE - FIL_PAGE_DATA
- FIL_PAGE_END_LSN_OLD_CHKSUM);
@@ -119,7 +121,7 @@ buf_calc_page_old_checksum(
{
ulint checksum;
- checksum = ut_fold_binary(page, FIL_PAGE_FILE_FLUSH_LSN);
+ checksum = ut_fold_binary(page, FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
checksum = checksum & 0xFFFFFFFFUL;
diff --git a/storage/innobase/buf/buf0dblwr.cc b/storage/innobase/buf/buf0dblwr.cc
index c903f5fbffa..17e8143943c 100644
--- a/storage/innobase/buf/buf0dblwr.cc
+++ b/storage/innobase/buf/buf0dblwr.cc
@@ -50,6 +50,8 @@ UNIV_INTERN buf_dblwr_t* buf_dblwr = NULL;
/** Set to TRUE when the doublewrite buffer is being created */
UNIV_INTERN ibool buf_dblwr_being_created = FALSE;
+#define TRX_SYS_DOUBLEWRITE_BLOCKS 2
+
/****************************************************************//**
Determines if a page number is located inside the doublewrite buffer.
@return TRUE if the location is inside the two blocks of the
@@ -136,7 +138,7 @@ buf_dblwr_init(
/* There are two blocks of same size in the doublewrite
buffer. */
- buf_size = 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE;
+ buf_size = TRX_SYS_DOUBLEWRITE_BLOCKS * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE;
/* There must be atleast one buffer for single page writes
and one buffer for batch writes. */
@@ -216,7 +218,7 @@ start_again:
"Doublewrite buffer not found: creating new");
if (buf_pool_get_curr_size()
- < ((2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
+ < ((TRX_SYS_DOUBLEWRITE_BLOCKS * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
+ FSP_EXTENT_SIZE / 2 + 100)
* UNIV_PAGE_SIZE)) {
@@ -252,7 +254,7 @@ start_again:
fseg_header = doublewrite + TRX_SYS_DOUBLEWRITE_FSEG;
prev_page_no = 0;
- for (i = 0; i < 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
+ for (i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCKS * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
+ FSP_EXTENT_SIZE / 2; i++) {
new_block = fseg_alloc_free_page(
fseg_header, prev_page_no + 1, FSP_UP, &mtr);
@@ -375,7 +377,7 @@ buf_dblwr_init_or_load_pages(
/* We do the file i/o past the buffer pool */
- unaligned_read_buf = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE));
+ unaligned_read_buf = static_cast<byte*>(ut_malloc(3 * UNIV_PAGE_SIZE));
read_buf = static_cast<byte*>(
ut_align(unaligned_read_buf, UNIV_PAGE_SIZE));
@@ -387,6 +389,14 @@ buf_dblwr_init_or_load_pages(
doublewrite = read_buf + TRX_SYS_DOUBLEWRITE;
+ if (mach_read_from_4(read_buf + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION) != 0) {
+ fil_space_decrypt((ulint)TRX_SYS_SPACE,
+ read_buf,
+ UNIV_PAGE_SIZE, /* page size */
+ read_buf + UNIV_PAGE_SIZE);
+ doublewrite = read_buf + UNIV_PAGE_SIZE + TRX_SYS_DOUBLEWRITE;
+ }
+
if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
== TRX_SYS_DOUBLEWRITE_MAGIC_N) {
/* The doublewrite buffer has been created */
@@ -429,7 +439,7 @@ buf_dblwr_init_or_load_pages(
page = buf;
- for (i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 2; i++) {
+ for (i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * TRX_SYS_DOUBLEWRITE_BLOCKS; i++) {
ulint source_page_no;
@@ -514,11 +524,11 @@ buf_dblwr_process()
fil_io(OS_FILE_READ, true, space_id, zip_size,
page_no, 0,
zip_size ? zip_size : UNIV_PAGE_SIZE,
- read_buf, NULL, 0);
+ read_buf, NULL, 0, 0);
- /* Check if the page is corrupt */
-
- if (buf_page_is_corrupted(true, read_buf, zip_size)) {
+ if (fil_space_verify_crypt_checksum(read_buf, zip_size)) {
+ /* page is encrypted and checksum is OK */
+ } else if (buf_page_is_corrupted(true, read_buf, zip_size)) {
fprintf(stderr,
"InnoDB: Warning: database page"
@@ -529,8 +539,11 @@ buf_dblwr_process()
" the doublewrite buffer.\n",
(ulong) space_id, (ulong) page_no);
- if (buf_page_is_corrupted(true,
- page, zip_size)) {
+ if (fil_space_verify_crypt_checksum(page, zip_size)) {
+ /* the doublewrite buffer page is encrypted and OK */
+ } else if (buf_page_is_corrupted(true,
+ page,
+ zip_size)) {
fprintf(stderr,
"InnoDB: Dump of the page:\n");
buf_page_print(
@@ -566,7 +579,7 @@ buf_dblwr_process()
fil_io(OS_FILE_WRITE, true, space_id,
zip_size, page_no, 0,
zip_size ? zip_size : UNIV_PAGE_SIZE,
- page, NULL, 0);
+ page, NULL, 0, 0);
ib_logf(IB_LOG_LEVEL_INFO,
"Recovered the page from"
@@ -586,14 +599,34 @@ buf_dblwr_process()
zip_size, page_no, 0,
zip_size ? zip_size
: UNIV_PAGE_SIZE,
- page, NULL, 0);
+ page, NULL, 0, 0);
}
}
}
}
fil_flush_file_spaces(FIL_TABLESPACE);
- ut_free(unaligned_read_buf);
+
+ {
+ fprintf(stderr,
+ "Clear dblwr buffer after completing "
+ "processing of it...\n");
+
+ size_t bytes = TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
+ byte *unaligned_buf = static_cast<byte*>(
+ ut_malloc(bytes + UNIV_PAGE_SIZE - 1));
+
+ byte *buf = static_cast<byte*>(
+ ut_align(unaligned_buf, UNIV_PAGE_SIZE));
+ memset(buf, 0, bytes);
+
+ fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0,
+ buf_dblwr->block1, 0, bytes, buf, NULL, NULL, 0);
+ fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0,
+ buf_dblwr->block2, 0, bytes, buf, NULL, NULL, 0);
+
+ ut_free(unaligned_buf);
+ }
}
/****************************************************************//**
@@ -665,7 +698,7 @@ buf_dblwr_update(
break;
case BUF_FLUSH_SINGLE_PAGE:
{
- const ulint size = 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE;
+ const ulint size = TRX_SYS_DOUBLEWRITE_BLOCKS * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE;
ulint i;
mutex_enter(&buf_dblwr->mutex);
for (i = srv_doublewrite_batch_size; i < size; ++i) {
@@ -792,13 +825,15 @@ buf_dblwr_write_block_to_datafile(
? OS_FILE_WRITE
: OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER;
+ void * frame = buf_page_get_frame(bpage);
+
if (bpage->zip.data) {
fil_io(flags, sync, buf_page_get_space(bpage),
buf_page_get_zip_size(bpage),
buf_page_get_page_no(bpage), 0,
buf_page_get_zip_size(bpage),
- (void*) bpage->zip.data,
- (void*) bpage, 0);
+ frame,
+ (void*) bpage, 0, bpage->newest_modification);
return;
}
@@ -810,7 +845,7 @@ buf_dblwr_write_block_to_datafile(
fil_io(flags, sync, buf_block_get_space(block), 0,
buf_block_get_page_no(block), 0, UNIV_PAGE_SIZE,
- (void*) block->frame, (void*) block, (ulint *)&bpage->write_size);
+ frame, (void*) block, (ulint *)&bpage->write_size, bpage->newest_modification );
}
/********************************************************************//**
@@ -904,7 +939,7 @@ try_again:
fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0,
buf_dblwr->block1, 0, len,
- (void*) write_buf, NULL, 0);
+ (void*) write_buf, NULL, 0, 0);
if (buf_dblwr->first_free <= TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
/* No unwritten pages in the second block. */
@@ -920,7 +955,7 @@ try_again:
fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0,
buf_dblwr->block2, 0, len,
- (void*) write_buf, NULL, 0);
+ (void*) write_buf, NULL, 0, 0);
flush:
/* increment the doublewrite flushed pages counter */
@@ -1001,13 +1036,14 @@ try_again:
}
zip_size = buf_page_get_zip_size(bpage);
+ void * frame = buf_page_get_frame(bpage);
if (zip_size) {
UNIV_MEM_ASSERT_RW(bpage->zip.data, zip_size);
/* Copy the compressed page and clear the rest. */
memcpy(buf_dblwr->write_buf
+ UNIV_PAGE_SIZE * buf_dblwr->first_free,
- bpage->zip.data, zip_size);
+ frame, zip_size);
memset(buf_dblwr->write_buf
+ UNIV_PAGE_SIZE * buf_dblwr->first_free
+ zip_size, 0, UNIV_PAGE_SIZE - zip_size);
@@ -1018,7 +1054,7 @@ try_again:
memcpy(buf_dblwr->write_buf
+ UNIV_PAGE_SIZE * buf_dblwr->first_free,
- ((buf_block_t*) bpage)->frame, UNIV_PAGE_SIZE);
+ frame, UNIV_PAGE_SIZE);
}
buf_dblwr->buf_block_arr[buf_dblwr->first_free] = bpage;
@@ -1069,7 +1105,7 @@ buf_dblwr_write_single_page(
/* total number of slots available for single page flushes
starts from srv_doublewrite_batch_size to the end of the
buffer. */
- size = 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE;
+ size = TRX_SYS_DOUBLEWRITE_BLOCKS * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE;
ut_a(size > srv_doublewrite_batch_size);
n_slots = size - srv_doublewrite_batch_size;
@@ -1140,23 +1176,25 @@ retry:
bytes in the doublewrite page with zeros. */
zip_size = buf_page_get_zip_size(bpage);
+ void * frame = buf_page_get_frame(bpage);
+
if (zip_size) {
memcpy(buf_dblwr->write_buf + UNIV_PAGE_SIZE * i,
- bpage->zip.data, zip_size);
+ frame, zip_size);
memset(buf_dblwr->write_buf + UNIV_PAGE_SIZE * i
+ zip_size, 0, UNIV_PAGE_SIZE - zip_size);
fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0,
offset, 0, UNIV_PAGE_SIZE,
(void*) (buf_dblwr->write_buf
- + UNIV_PAGE_SIZE * i), NULL, 0);
+ + UNIV_PAGE_SIZE * i), NULL, 0, bpage->newest_modification);
} else {
/* It is a regular page. Write it directly to the
doublewrite buffer */
fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0,
offset, 0, UNIV_PAGE_SIZE,
- (void*) ((buf_block_t*) bpage)->frame,
- NULL, 0);
+ frame,
+ NULL, 0, bpage->newest_modification);
}
/* Now flush the doublewrite buffer data to disk */
diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc
index 20500d10b3f..9c11ae2b43e 100644
--- a/storage/innobase/buf/buf0flu.cc
+++ b/storage/innobase/buf/buf0flu.cc
@@ -715,7 +715,7 @@ buf_flush_update_zip_checksum(
srv_checksum_algorithm)));
mach_write_to_8(page + FIL_PAGE_LSN, lsn);
- memset(page + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
+ memset(page + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8);
mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM, checksum);
}
@@ -894,7 +894,7 @@ buf_flush_write_block_low(
mach_write_to_8(frame + FIL_PAGE_LSN,
bpage->newest_modification);
- memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
+ memset(frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8);
break;
case BUF_BLOCK_FILE_PAGE:
frame = bpage->zip.data;
@@ -909,12 +909,20 @@ buf_flush_write_block_low(
break;
}
+ frame = buf_page_encrypt_before_write(bpage, frame);
+
if (!srv_use_doublewrite_buf || !buf_dblwr) {
fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
- sync, buf_page_get_space(bpage), zip_size,
- buf_page_get_page_no(bpage), 0,
+ sync,
+ buf_page_get_space(bpage),
+ zip_size,
+ buf_page_get_page_no(bpage),
+ 0,
zip_size ? zip_size : UNIV_PAGE_SIZE,
- frame, bpage, &bpage->write_size);
+ frame,
+ bpage,
+ &bpage->write_size,
+ bpage->newest_modification);
} else {
/* InnoDB uses doublewrite buffer and doublewrite buffer
@@ -926,10 +934,16 @@ buf_flush_write_block_low(
if (awrites == ATOMIC_WRITES_ON) {
fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
- FALSE, buf_page_get_space(bpage), zip_size,
- buf_page_get_page_no(bpage), 0,
+ FALSE,
+ buf_page_get_space(bpage),
+ zip_size,
+ buf_page_get_page_no(bpage),
+ 0,
zip_size ? zip_size : UNIV_PAGE_SIZE,
- frame, bpage, &bpage->write_size);
+ frame,
+ bpage,
+ &bpage->write_size,
+ bpage->newest_modification);
} else if (flush_type == BUF_FLUSH_SINGLE_PAGE) {
buf_dblwr_write_single_page(bpage, sync);
} else {
diff --git a/storage/innobase/buf/buf0rea.cc b/storage/innobase/buf/buf0rea.cc
index 9e81d010d0f..19d18dcd870 100644
--- a/storage/innobase/buf/buf0rea.cc
+++ b/storage/innobase/buf/buf0rea.cc
@@ -176,6 +176,8 @@ buf_read_page_low(
ut_ad(buf_page_in_file(bpage));
+ byte* frame = buf_page_decrypt_before_read(bpage, zip_size);
+
if (sync) {
thd_wait_begin(NULL, THD_WAIT_DISKIO);
}
@@ -184,15 +186,15 @@ buf_read_page_low(
*err = fil_io(OS_FILE_READ | wake_later
| ignore_nonexistent_pages,
sync, space, zip_size, offset, 0, zip_size,
- bpage->zip.data, bpage, &bpage->write_size);
+ frame, bpage, &bpage->write_size, 0);
} else {
ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
*err = fil_io(OS_FILE_READ | wake_later
| ignore_nonexistent_pages,
sync, space, 0, offset, 0, UNIV_PAGE_SIZE,
- ((buf_block_t*) bpage)->frame, bpage,
- &bpage->write_size);
+ frame, bpage,
+ &bpage->write_size, 0);
}
if (sync) {
@@ -200,6 +202,7 @@ buf_read_page_low(
}
if (*err != DB_SUCCESS) {
+ buf_page_decrypt_cleanup(bpage);
if (ignore_nonexistent_pages || *err == DB_TABLESPACE_DELETED) {
buf_read_page_handle_error(bpage);
return(0);
diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc
index 25d4bb7c906..31a493855ad 100644
--- a/storage/innobase/dict/dict0dict.cc
+++ b/storage/innobase/dict/dict0dict.cc
@@ -321,10 +321,10 @@ dict_get_db_name_len(
Reserves the dictionary system mutex for MySQL. */
UNIV_INTERN
void
-dict_mutex_enter_for_mysql(void)
+dict_mutex_enter_for_mysql_func(const char * file, ulint line)
/*============================*/
{
- mutex_enter(&(dict_sys->mutex));
+ mutex_enter_func(&(dict_sys->mutex), file, line);
}
/********************************************************************//**
diff --git a/storage/innobase/dict/dict0load.cc b/storage/innobase/dict/dict0load.cc
index 16e64da6619..149811dab60 100644
--- a/storage/innobase/dict/dict0load.cc
+++ b/storage/innobase/dict/dict0load.cc
@@ -1153,6 +1153,12 @@ loop:
space_id, name);
}
+ /* We need to read page 0 to get (optional) IV
+ regardless if encryptions is turned on or not,
+ since if it's off we should decrypt a potentially
+ already encrypted table */
+ bool read_page_0 = true;
+
/* We set the 2nd param (fix_dict = true)
here because we already have an x-lock on
dict_operation_lock and dict_sys->mutex. Besides,
@@ -1160,7 +1166,7 @@ loop:
If the filepath is not known, it will need to
be discovered. */
dberr_t err = fil_open_single_table_tablespace(
- false, srv_read_only_mode ? false : true,
+ read_page_0, srv_read_only_mode ? false : true,
space_id, dict_tf_to_fsp_flags(flags),
name, filepath);
@@ -2640,6 +2646,99 @@ check_rec:
return(table);
}
+/***********************************************************************//**
+Loads a table id based on the index id.
+@return true if found */
+static
+bool
+dict_load_table_id_on_index_id(
+/*==================*/
+ index_id_t index_id, /*!< in: index id */
+ table_id_t* table_id) /*!< out: table id */
+{
+ /* check hard coded indexes */
+ switch(index_id) {
+ case DICT_TABLES_ID:
+ case DICT_COLUMNS_ID:
+ case DICT_INDEXES_ID:
+ case DICT_FIELDS_ID:
+ *table_id = index_id;
+ return true;
+ case DICT_TABLE_IDS_ID:
+ /* The following is a secondary index on SYS_TABLES */
+ *table_id = DICT_TABLES_ID;
+ return true;
+ }
+
+ bool found = false;
+ mtr_t mtr;
+
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ /* NOTE that the operation of this function is protected by
+ the dictionary mutex, and therefore no deadlocks can occur
+ with other dictionary operations. */
+
+ mtr_start(&mtr);
+
+ btr_pcur_t pcur;
+ const rec_t* rec = dict_startscan_system(&pcur, &mtr, SYS_INDEXES);
+
+ while (rec) {
+ ulint len;
+ const byte* field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_INDEXES__ID, &len);
+ ut_ad(len == 8);
+
+ /* Check if the index id is the one searched for */
+ if (index_id == mach_read_from_8(field)) {
+ found = true;
+ /* Now we get the table id */
+ const byte* field = rec_get_nth_field_old(
+ rec,
+ DICT_FLD__SYS_INDEXES__TABLE_ID,
+ &len);
+ *table_id = mach_read_from_8(field);
+ break;
+ }
+ mtr_commit(&mtr);
+ mtr_start(&mtr);
+ rec = dict_getnext_system(&pcur, &mtr);
+ }
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ return(found);
+}
+
+UNIV_INTERN
+dict_table_t*
+dict_table_open_on_index_id(
+/*==================*/
+ index_id_t index_id, /*!< in: index id */
+ bool dict_locked) /*!< in: dict locked */
+{
+ if (!dict_locked) {
+ mutex_enter(&dict_sys->mutex);
+ }
+
+ ut_ad(mutex_own(&dict_sys->mutex));
+ table_id_t table_id;
+ dict_table_t * table = NULL;
+ if (dict_load_table_id_on_index_id(index_id, &table_id)) {
+ bool local_dict_locked = true;
+ table = dict_table_open_on_id(table_id,
+ local_dict_locked,
+ DICT_TABLE_OP_LOAD_TABLESPACE);
+ }
+
+ if (!dict_locked) {
+ mutex_exit(&dict_sys->mutex);
+ }
+ return table;
+}
+
/********************************************************************//**
This function is called when the database is booted. Loads system table
index definitions except for the clustered index which is added to the
diff --git a/storage/innobase/dict/dict0stats_bg.cc b/storage/innobase/dict/dict0stats_bg.cc
index 0089f9897ae..076ceb79613 100644
--- a/storage/innobase/dict/dict0stats_bg.cc
+++ b/storage/innobase/dict/dict0stats_bg.cc
@@ -427,7 +427,7 @@ dict_stats_process_entry_from_recalc_pool()
return;
}
- table->stats_bg_flag = BG_STAT_IN_PROGRESS;
+ table->stats_bg_flag |= BG_STAT_IN_PROGRESS;
mutex_exit(&dict_sys->mutex);
@@ -454,7 +454,7 @@ dict_stats_process_entry_from_recalc_pool()
mutex_enter(&dict_sys->mutex);
- table->stats_bg_flag = BG_STAT_NONE;
+ table->stats_bg_flag &= ~BG_STAT_IN_PROGRESS;
dict_table_close(table, TRUE, FALSE);
diff --git a/storage/innobase/fil/fil0crypt.cc b/storage/innobase/fil/fil0crypt.cc
new file mode 100644
index 00000000000..8ff2c490d35
--- /dev/null
+++ b/storage/innobase/fil/fil0crypt.cc
@@ -0,0 +1,2432 @@
+#include "fil0fil.h"
+#include "srv0srv.h"
+#include "srv0start.h"
+#include "mach0data.h"
+#include "log0recv.h"
+#include "mtr0mtr.h"
+#include "mtr0log.h"
+#include "page0zip.h"
+#include "ut0ut.h"
+#include "btr0scrub.h"
+#include "fsp0fsp.h"
+#include "fil0pagecompress.h"
+#include "fil0pageencryption.h"
+
+#include <my_crypt.h>
+#include <my_crypt_key_management.h>
+
+#include <my_aes.h>
+#include <math.h>
+
+
+/** Mutex for keys */
+UNIV_INTERN ib_mutex_t fil_crypt_key_mutex;
+
+#ifdef UNIV_PFS_MUTEX
+UNIV_INTERN mysql_pfs_key_t fil_crypt_key_mutex_key;
+#endif
+
+/** Is encryption enabled/disabled */
+UNIV_INTERN my_bool srv_encrypt_tables = FALSE;
+
+/** No of key rotation threads requested */
+UNIV_INTERN uint srv_n_fil_crypt_threads = 0;
+
+/** No of key rotation threads started */
+static uint srv_n_fil_crypt_threads_started = 0;
+
+/** At this age or older a space/page will be rotated */
+UNIV_INTERN uint srv_fil_crypt_rotate_key_age = 1;
+
+/** Event to signal FROM the key rotation threads. */
+UNIV_INTERN os_event_t fil_crypt_event;
+
+/** Event to signal TO the key rotation threads. */
+UNIV_INTERN os_event_t fil_crypt_threads_event;
+
+/** Event for waking up threads throttle */
+UNIV_INTERN os_event_t fil_crypt_throttle_sleep_event;
+
+/** Mutex for key rotation threads */
+UNIV_INTERN ib_mutex_t fil_crypt_threads_mutex;
+
+#ifdef UNIV_PFS_MUTEX
+UNIV_INTERN mysql_pfs_key_t fil_crypt_threads_mutex_key;
+#endif
+
+/** Variable ensuring only 1 thread at time does initial conversion */
+static bool fil_crypt_start_converting = false;
+
+/** Variables for throttling */
+UNIV_INTERN uint srv_n_fil_crypt_iops = 100; // 10ms per iop
+static uint srv_alloc_time = 3; // allocate iops for 3s at a time
+static uint n_fil_crypt_iops_allocated = 0;
+
+/** Variables for scrubbing */
+extern uint srv_background_scrub_data_interval;
+extern uint srv_background_scrub_data_check_interval;
+
+#define DEBUG_KEYROTATION_THROTTLING 0
+
+/** Statistics variables */
+static fil_crypt_stat_t crypt_stat;
+static ib_mutex_t crypt_stat_mutex;
+
+#ifdef UNIV_PFS_MUTEX
+UNIV_INTERN mysql_pfs_key_t fil_crypt_stat_mutex_key;
+#endif
+
+/**
+ * key for crypt data mutex
+*/
+#ifdef UNIV_PFS_MUTEX
+UNIV_INTERN mysql_pfs_key_t fil_crypt_data_mutex_key;
+#endif
+
+/**
+* Magic pattern in start of crypt data on page 0
+*/
+#define MAGIC_SZ 6
+
+static const unsigned char CRYPT_MAGIC[MAGIC_SZ] = {
+ 's', 0xE, 0xC, 'R', 'E', 't' };
+
+static const unsigned char EMPTY_PATTERN[MAGIC_SZ] = {
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
+
+/**
+ * CRYPT_SCHEME_UNENCRYPTED
+ *
+ * Used as intermediate state when convering a space from unencrypted
+ * to encrypted
+ */
+#define CRYPT_SCHEME_UNENCRYPTED 0
+
+/**
+ * CRYPT_SCHEME_1
+ *
+ * L = AES_ECB(KEY, IV)
+ * CRYPT(PAGE) = AES_CRT(KEY=L, IV=C, PAGE)
+ */
+#define CRYPT_SCHEME_1 1
+#define CRYPT_SCHEME_1_IV_LEN 16
+// cached L given key_version
+struct key_struct
+{
+ uint key_version;
+ byte key[CRYPT_SCHEME_1_IV_LEN];
+};
+
+struct fil_space_rotate_state_t
+{
+ time_t start_time; // time when rotation started
+ ulint active_threads; // active threads in space
+ ulint next_offset; // next "free" offset
+ ulint max_offset; // max offset needing to be rotated
+ uint min_key_version_found; // min key version found but not rotated
+ lsn_t end_lsn; // max lsn created when rotating this space
+ bool starting; // initial write of IV
+ bool flushing; // space is being flushed at end of rotate
+ struct {
+ bool is_active; // is scrubbing active in this space
+ time_t last_scrub_completed; // when was last scrub completed
+ } scrubbing;
+};
+
+struct fil_space_crypt_struct
+{
+ ulint type; // CRYPT_SCHEME
+ uint keyserver_requests; // no of key requests to key server
+ uint key_count; // No of initalized key-structs
+ key_struct keys[3]; // cached L = AES_ECB(KEY, IV)
+ uint min_key_version; // min key version for this space
+ ulint page0_offset; // byte offset on page 0 for crypt data
+
+ ib_mutex_t mutex; // mutex protecting following variables
+ bool closing; // is tablespace being closed
+ fil_space_rotate_state_t rotate_state;
+
+ uint iv_length; // length of IV
+ byte iv[1]; // IV-data
+};
+
+/*********************************************************************
+Init space crypt */
+UNIV_INTERN
+void
+fil_space_crypt_init()
+{
+ mutex_create(fil_crypt_key_mutex_key,
+ &fil_crypt_key_mutex, SYNC_NO_ORDER_CHECK);
+
+ fil_crypt_throttle_sleep_event = os_event_create();
+
+ mutex_create(fil_crypt_stat_mutex_key,
+ &crypt_stat_mutex, SYNC_NO_ORDER_CHECK);
+ memset(&crypt_stat, 0, sizeof(crypt_stat));
+}
+
+/*********************************************************************
+Cleanup space crypt */
+UNIV_INTERN
+void
+fil_space_crypt_cleanup()
+{
+ os_event_free(fil_crypt_throttle_sleep_event);
+}
+
+/******************************************************************
+Get key bytes for a space/key-version */
+static
+void
+fil_crypt_get_key(byte *dst, uint* key_length,
+ fil_space_crypt_t* crypt_data, uint version, bool page_encrypted)
+{
+ unsigned char keybuf[MY_AES_MAX_KEY_LENGTH];
+ unsigned char iv[CRYPT_SCHEME_1_IV_LEN];
+ ulint iv_len = sizeof(iv);
+
+ if (!page_encrypted) {
+ mutex_enter(&crypt_data->mutex);
+
+ // Check if we already have key
+ for (uint i = 0; i < crypt_data->key_count; i++) {
+ if (crypt_data->keys[i].key_version == version) {
+ memcpy(dst, crypt_data->keys[i].key,
+ sizeof(crypt_data->keys[i].key));
+ mutex_exit(&crypt_data->mutex);
+ return;
+ }
+ }
+ // Not found!
+ crypt_data->keyserver_requests++;
+
+ // Rotate keys to make room for a new
+ for (uint i = 1; i < array_elements(crypt_data->keys); i++) {
+ crypt_data->keys[i] = crypt_data->keys[i - 1];
+ }
+ }
+ else
+ {
+ // load iv
+
+ int rc = GetCryptoIV(version, (unsigned char*)iv, iv_len);
+
+ if (rc != CRYPT_KEY_OK) {
+ ib_logf(IB_LOG_LEVEL_FATAL,
+ "IV %d can not be found. Reason=%d", version, rc);
+ ut_error;
+ }
+ }
+
+ if (HasCryptoKey(version)) {
+ *key_length = GetCryptoKeySize(version);
+
+ int rc = GetCryptoKey(version, (unsigned char*)keybuf, *key_length);
+
+ if (rc != CRYPT_KEY_OK) {
+ ib_logf(IB_LOG_LEVEL_FATAL,
+ "Key %d can not be found. Reason=%d", version, rc);
+ ut_error;
+ }
+ } else {
+ ib_logf(IB_LOG_LEVEL_FATAL,
+ "Key %d not found", version);
+ ut_error;
+ }
+
+
+ // do ctr key initialization
+ if (current_aes_dynamic_method == MY_AES_ALGORITHM_CTR)
+ {
+ // Now compute L by encrypting IV using this key
+ const unsigned char* src = page_encrypted ? iv : crypt_data->iv;
+ const int srclen = page_encrypted ? iv_len : crypt_data->iv_length;
+ unsigned char* buf = page_encrypted ? keybuf : crypt_data->keys[0].key;
+ uint32 buflen = page_encrypted ? *key_length : sizeof(crypt_data->keys[0].key);
+
+ // call ecb explicit
+ my_aes_encrypt_dynamic_type func = get_aes_encrypt_func(MY_AES_ALGORITHM_ECB);
+ int rc = (*func)(src, srclen,
+ buf, &buflen,
+ (unsigned char*)keybuf, *key_length,
+ NULL, 0,
+ 1);
+
+ if (rc != AES_OK) {
+ ib_logf(IB_LOG_LEVEL_FATAL,
+ "Unable to encrypt key-block "
+ " src: %p srclen: %d buf: %p buflen: %d."
+ " return-code: %d. Can't continue!\n",
+ src, srclen, buf, buflen, rc);
+ ut_error;
+ }
+
+ if (!page_encrypted) {
+ crypt_data->keys[0].key_version = version;
+ crypt_data->key_count++;
+
+ if (crypt_data->key_count > array_elements(crypt_data->keys)) {
+ crypt_data->key_count = array_elements(crypt_data->keys);
+ }
+ }
+
+ // set the key size to the aes block size because this encrypted data is the key
+ *key_length = MY_AES_BLOCK_SIZE;
+ memcpy(dst, buf, buflen);
+ }
+ else
+ {
+ // otherwise keybuf contains the right key
+ memcpy(dst, keybuf, *key_length);
+ }
+
+ if (!page_encrypted) {
+ mutex_exit(&crypt_data->mutex);
+ }
+}
+
+/******************************************************************
+Get key bytes for a space/latest(key-version) */
+static inline
+void
+fil_crypt_get_latest_key(byte *dst, uint* key_length,
+ fil_space_crypt_t* crypt_data, uint *version)
+{
+ if (srv_encrypt_tables) {
+ // used for key rotation - get the next key id from the key provider
+ int rc = GetLatestCryptoKeyVersion();
+
+ // if no new key was created use the last one
+ if (rc >= 0)
+ {
+ *version = rc;
+ }
+
+ return fil_crypt_get_key(dst, key_length, crypt_data, *version, false);
+ }
+ return fil_crypt_get_key(dst, key_length, NULL, *version, true);
+}
+
+/******************************************************************
+Create a fil_space_crypt_t object */
+UNIV_INTERN
+fil_space_crypt_t*
+fil_space_create_crypt_data()
+{
+ const uint iv_length = CRYPT_SCHEME_1_IV_LEN;
+ const uint sz = sizeof(fil_space_crypt_t) + iv_length;
+ fil_space_crypt_t* crypt_data =
+ static_cast<fil_space_crypt_t*>(malloc(sz));
+ memset(crypt_data, 0, sz);
+
+ if (srv_encrypt_tables == FALSE) {
+ crypt_data->type = CRYPT_SCHEME_UNENCRYPTED;
+ crypt_data->min_key_version = 0;
+ } else {
+ crypt_data->type = CRYPT_SCHEME_1;
+ crypt_data->min_key_version = GetLatestCryptoKeyVersion();
+ }
+
+ mutex_create(fil_crypt_data_mutex_key,
+ &crypt_data->mutex, SYNC_NO_ORDER_CHECK);
+ crypt_data->iv_length = iv_length;
+ my_random_bytes(crypt_data->iv, iv_length);
+ return crypt_data;
+}
+
+/******************************************************************
+Compare two crypt objects */
+UNIV_INTERN
+int
+fil_space_crypt_compare(const fil_space_crypt_t* crypt_data1,
+ const fil_space_crypt_t* crypt_data2)
+{
+ ut_a(crypt_data1->type == CRYPT_SCHEME_UNENCRYPTED ||
+ crypt_data1->type == CRYPT_SCHEME_1);
+ ut_a(crypt_data2->type == CRYPT_SCHEME_UNENCRYPTED ||
+ crypt_data2->type == CRYPT_SCHEME_1);
+
+ ut_a(crypt_data1->iv_length == CRYPT_SCHEME_1_IV_LEN);
+ ut_a(crypt_data2->iv_length == CRYPT_SCHEME_1_IV_LEN);
+
+ /* no support for changing iv (yet?) */
+ ut_a(memcmp(crypt_data1->iv, crypt_data2->iv,
+ crypt_data1->iv_length) == 0);
+
+ return 0;
+}
+
+/******************************************************************
+Read crypt data from a page (0) */
+UNIV_INTERN
+fil_space_crypt_t*
+fil_space_read_crypt_data(ulint space, const byte* page, ulint offset)
+{
+ if (memcmp(page + offset, EMPTY_PATTERN, MAGIC_SZ) == 0) {
+ /* crypt is not stored */
+ return NULL;
+ }
+
+ if (memcmp(page + offset, CRYPT_MAGIC, MAGIC_SZ) != 0) {
+ fprintf(stderr,
+ "Warning: found potentially bogus bytes on "
+ "page 0 offset %lu for space %lu : "
+ "[ %.2x %.2x %.2x %.2x %.2x %.2x ]. "
+ "Assuming space is not encrypted!\n",
+ offset, space,
+ page[offset + 0],
+ page[offset + 1],
+ page[offset + 2],
+ page[offset + 3],
+ page[offset + 4],
+ page[offset + 5]);
+ return NULL;
+ }
+
+ ulint type = mach_read_from_1(page + offset + MAGIC_SZ + 0);
+
+ if (! (type == CRYPT_SCHEME_UNENCRYPTED ||
+ type == CRYPT_SCHEME_1)) {
+ fprintf(stderr,
+ "Found non sensible crypt scheme: %lu for space %lu "
+ " offset: %lu bytes: "
+ "[ %.2x %.2x %.2x %.2x %.2x %.2x ]\n",
+ type, space, offset,
+ page[offset + 0 + MAGIC_SZ],
+ page[offset + 1 + MAGIC_SZ],
+ page[offset + 2 + MAGIC_SZ],
+ page[offset + 3 + MAGIC_SZ],
+ page[offset + 4 + MAGIC_SZ],
+ page[offset + 5 + MAGIC_SZ]);
+ ut_error;
+ }
+
+ ulint iv_length = mach_read_from_1(page + offset + MAGIC_SZ + 1);
+ if (! (iv_length == CRYPT_SCHEME_1_IV_LEN)) {
+ fprintf(stderr,
+ "Found non sensible iv length: %lu for space %lu "
+ " offset: %lu type: %lu bytes: "
+ "[ %.2x %.2x %.2x %.2x %.2x %.2x ]\n",
+ iv_length, space, offset, type,
+ page[offset + 0 + MAGIC_SZ],
+ page[offset + 1 + MAGIC_SZ],
+ page[offset + 2 + MAGIC_SZ],
+ page[offset + 3 + MAGIC_SZ],
+ page[offset + 4 + MAGIC_SZ],
+ page[offset + 5 + MAGIC_SZ]);
+ ut_error;
+ }
+
+ uint min_key_version = mach_read_from_4
+ (page + offset + MAGIC_SZ + 2 + iv_length);
+
+ const uint sz = sizeof(fil_space_crypt_t) + iv_length;
+ fil_space_crypt_t* crypt_data = static_cast<fil_space_crypt_t*>(
+ malloc(sz));
+ memset(crypt_data, 0, sz);
+
+ crypt_data->type = type;
+ crypt_data->min_key_version = min_key_version;
+ crypt_data->page0_offset = offset;
+ mutex_create(fil_crypt_data_mutex_key,
+ &crypt_data->mutex, SYNC_NO_ORDER_CHECK);
+ crypt_data->iv_length = iv_length;
+ memcpy(crypt_data->iv, page + offset + MAGIC_SZ + 2, iv_length);
+
+ return crypt_data;
+}
+
+/******************************************************************
+Free a crypt data object */
+UNIV_INTERN
+void
+fil_space_destroy_crypt_data(fil_space_crypt_t **crypt_data)
+{
+ if (crypt_data != NULL && (*crypt_data) != NULL) {
+ /* lock (and unlock) mutex to make sure no one has it locked
+ * currently */
+ mutex_enter(& (*crypt_data)->mutex);
+ mutex_exit(& (*crypt_data)->mutex);
+ mutex_free(& (*crypt_data)->mutex);
+ free(*crypt_data);
+ (*crypt_data) = NULL;
+ }
+}
+
+/******************************************************************
+Write crypt data to a page (0) */
+static
+void
+fil_space_write_crypt_data_low(fil_space_crypt_t *crypt_data,
+ ulint type,
+ byte* page, ulint offset,
+ ulint maxsize, mtr_t* mtr)
+{
+ ut_a(offset > 0 && offset < UNIV_PAGE_SIZE);
+ ulint space_id = mach_read_from_4(
+ page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+ const uint len = crypt_data->iv_length;
+ const uint min_key_version = crypt_data->min_key_version;
+ crypt_data->page0_offset = offset;
+ ut_a(2 + len + 4 + MAGIC_SZ < maxsize);
+
+ /*
+ redo log this as bytewise updates to page 0
+ followed by an MLOG_FILE_WRITE_CRYPT_DATA
+ (that will during recovery update fil_space_t)
+ */
+ mlog_write_string(page + offset, CRYPT_MAGIC, MAGIC_SZ, mtr);
+ mlog_write_ulint(page + offset + MAGIC_SZ + 0, type, MLOG_1BYTE, mtr);
+ mlog_write_ulint(page + offset + MAGIC_SZ + 1, len, MLOG_1BYTE, mtr);
+ mlog_write_string(page + offset + MAGIC_SZ + 2, crypt_data->iv, len,
+ mtr);
+ mlog_write_ulint(page + offset + MAGIC_SZ + 2 + len, min_key_version,
+ MLOG_4BYTES, mtr);
+
+ byte* log_ptr = mlog_open(mtr, 11 + 12 + len);
+ if (log_ptr != NULL) {
+ log_ptr = mlog_write_initial_log_record_fast(
+ page,
+ MLOG_FILE_WRITE_CRYPT_DATA,
+ log_ptr, mtr);
+ mach_write_to_4(log_ptr, space_id);
+ log_ptr += 4;
+ mach_write_to_2(log_ptr, offset);
+ log_ptr += 2;
+ mach_write_to_1(log_ptr, type);
+ log_ptr += 1;
+ mach_write_to_1(log_ptr, len);
+ log_ptr += 1;
+ mach_write_to_4(log_ptr, min_key_version);
+ log_ptr += 4;
+ mlog_close(mtr, log_ptr);
+
+ mlog_catenate_string(mtr, crypt_data->iv, len);
+ }
+}
+
+/******************************************************************
+Write crypt data to a page (0) */
+UNIV_INTERN
+void
+fil_space_write_crypt_data(ulint space, byte* page, ulint offset,
+ ulint maxsize, mtr_t* mtr)
+{
+ fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space);
+ if (crypt_data == NULL) {
+ return;
+ }
+
+ fil_space_write_crypt_data_low(crypt_data, crypt_data->type,
+ page, offset, maxsize, mtr);
+}
+
+/******************************************************************
+Parse a MLOG_FILE_WRITE_CRYPT_DATA log entry */
+UNIV_INTERN
+byte*
+fil_parse_write_crypt_data(byte* ptr, byte* end_ptr,
+ buf_block_t* block)
+{
+ /* check that redo log entry is complete */
+ uint entry_size =
+ 4 + // size of space_id
+ 2 + // size of offset
+ 1 + // size of type
+ 1 + // size of iv-len
+ 4; // size of min_key_version
+ if (end_ptr - ptr < entry_size)
+ return NULL;
+
+ ulint space_id = mach_read_from_4(ptr);
+ ptr += 4;
+ uint offset = mach_read_from_2(ptr);
+ ptr += 2;
+ uint type = mach_read_from_1(ptr);
+ ptr += 1;
+ uint len = mach_read_from_1(ptr);
+ ptr += 1;
+
+ ut_a(type == CRYPT_SCHEME_UNENCRYPTED ||
+ type == CRYPT_SCHEME_1); // only supported
+ ut_a(len == CRYPT_SCHEME_1_IV_LEN); // only supported
+ uint min_key_version = mach_read_from_4(ptr);
+ ptr += 4;
+
+ if (end_ptr - ptr < len)
+ return NULL;
+
+ fil_space_crypt_t* crypt_data = fil_space_create_crypt_data();
+ crypt_data->page0_offset = offset;
+ crypt_data->min_key_version = min_key_version;
+ memcpy(crypt_data->iv, ptr, len);
+ ptr += len;
+
+ /* update fil_space memory cache with crypt_data */
+ fil_space_set_crypt_data(space_id, crypt_data);
+
+ return ptr;
+}
+
+/******************************************************************
+Clear crypt data from a page (0) */
+UNIV_INTERN
+void
+fil_space_clear_crypt_data(byte* page, ulint offset)
+{
+ //TODO(jonaso): pass crypt-data and read len from there
+ ulint len = CRYPT_SCHEME_1_IV_LEN;
+ ulint size =
+ sizeof(CRYPT_MAGIC) +
+ 1 + // type
+ 1 + // len
+ len + // iv
+ 4; // min key version
+ memset(page + offset, 0, size);
+}
+
+/*********************************************************************
+Check if page shall be encrypted before write */
+UNIV_INTERN
+bool
+fil_space_check_encryption_write(
+/*==============================*/
+ ulint space) /*!< in: tablespace id */
+{
+ if (srv_encrypt_tables == FALSE)
+ return false;
+
+ fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space);
+ if (crypt_data == NULL)
+ return false;
+
+ if (crypt_data->type == CRYPT_SCHEME_UNENCRYPTED)
+ return false;
+
+ return true;
+}
+
+/******************************************************************
+Encrypt a page */
+UNIV_INTERN
+void
+fil_space_encrypt(ulint space, ulint offset, lsn_t lsn,
+ const byte* src_frame, ulint zip_size, byte* dst_frame, ulint encryption_key)
+{
+ fil_space_crypt_t* crypt_data;
+ ulint page_size = (zip_size) ? zip_size : UNIV_PAGE_SIZE;
+
+ // get key (L)
+ uint key_version;
+ byte key[MY_AES_MAX_KEY_LENGTH];
+ uint key_length;
+
+ if (srv_encrypt_tables) {
+ crypt_data = fil_space_get_crypt_data(space);
+ if (crypt_data == NULL) {
+ //TODO: Is this really needed ?
+ memcpy(dst_frame, src_frame, page_size);
+ return;
+ }
+ fil_crypt_get_latest_key(key, &key_length, crypt_data, &key_version);
+ } else {
+ key_version = encryption_key;
+ fil_crypt_get_latest_key(key, &key_length, NULL, (uint*)&key_version);
+ }
+
+
+ /* Load the iv or counter (depending to the encryption algorithm used) */
+ unsigned char iv[MY_AES_BLOCK_SIZE];
+
+ if (current_aes_dynamic_method == MY_AES_ALGORITHM_CTR)
+ {
+ // create counter block (C)
+ mach_write_to_4(iv + 0, space);
+ ulint space_offset = mach_read_from_4(
+ src_frame + FIL_PAGE_OFFSET);
+ mach_write_to_4(iv + 4, space_offset);
+ mach_write_to_8(iv + 8, lsn);
+ }
+ else
+ {
+ // take the iv from the key provider
+
+ int load_iv_rc = GetCryptoIV(key_version, (uchar *) iv, sizeof(iv));
+
+ // if the iv can not be loaded the whole page can not be encrypted
+ if (load_iv_rc != CRYPT_KEY_OK)
+ {
+ ib_logf(IB_LOG_LEVEL_FATAL,
+ "Unable to decrypt data-block. "
+ " Can not load iv for key %d"
+ " return-code: %d. Can't continue!\n",
+ key_version, load_iv_rc);
+
+ ut_error;
+ }
+ }
+
+
+ ibool page_compressed = (mach_read_from_2(src_frame+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED);
+ ibool page_encrypted = fil_space_is_page_encrypted(space);
+
+ ulint compression_alg = mach_read_from_8(src_frame+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
+
+ ulint orig_page_type = mach_read_from_2(src_frame+FIL_PAGE_TYPE);
+ if (orig_page_type==FIL_PAGE_TYPE_FSP_HDR
+ || orig_page_type==FIL_PAGE_TYPE_XDES
+ || orig_page_type== FIL_PAGE_PAGE_ENCRYPTED
+ || orig_page_type== FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) {
+ memcpy(dst_frame, src_frame, page_size);
+ return;
+ }
+
+ // copy page header
+ memcpy(dst_frame, src_frame, FIL_PAGE_DATA);
+
+
+ if (page_encrypted && !page_compressed) {
+ // key id
+ mach_write_to_2(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION,
+ key_version);
+ // original page type
+ mach_write_to_2(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 2,
+ orig_page_type);
+ // new page type
+ mach_write_to_2(dst_frame+FIL_PAGE_TYPE, FIL_PAGE_PAGE_ENCRYPTED);
+ } else {
+ // store key version
+ mach_write_to_4(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION,
+ key_version);
+ }
+
+ // encrypt page data
+ ulint unencrypted_bytes = FIL_PAGE_DATA + FIL_PAGE_DATA_END;
+ ulint srclen = page_size - unencrypted_bytes;
+ const byte* src = src_frame + FIL_PAGE_DATA;
+ byte* dst = dst_frame + FIL_PAGE_DATA;
+ uint32 dstlen;
+
+ if (page_compressed) {
+ srclen = page_size - FIL_PAGE_DATA;;
+ }
+
+
+ int rc = (* my_aes_encrypt_dynamic)(src, srclen,
+ dst, &dstlen,
+ (unsigned char*)key, key_length,
+ (unsigned char*)iv, sizeof(iv),
+ 1);
+
+ if (! ((rc == AES_OK) && ((ulint) dstlen == srclen))) {
+ ib_logf(IB_LOG_LEVEL_FATAL,
+ "Unable to encrypt data-block "
+ " src: %p srclen: %ld buf: %p buflen: %d."
+ " return-code: %d. Can't continue!\n",
+ src, (long)srclen,
+ dst, dstlen, rc);
+ ut_error;
+ }
+
+ if (!page_compressed) {
+ // copy page trailer
+ memcpy(dst_frame + page_size - FIL_PAGE_DATA_END,
+ src_frame + page_size - FIL_PAGE_DATA_END,
+ FIL_PAGE_DATA_END);
+
+ /* handle post encryption checksum */
+ ib_uint32_t checksum = 0;
+ srv_checksum_algorithm_t algorithm =
+ static_cast<srv_checksum_algorithm_t>(srv_checksum_algorithm);
+
+ if (zip_size == 0) {
+ switch (algorithm) {
+ case SRV_CHECKSUM_ALGORITHM_CRC32:
+ case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
+ checksum = buf_calc_page_crc32(dst_frame);
+ break;
+ case SRV_CHECKSUM_ALGORITHM_INNODB:
+ case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB:
+ checksum = (ib_uint32_t) buf_calc_page_new_checksum(
+ dst_frame);
+ break;
+ case SRV_CHECKSUM_ALGORITHM_NONE:
+ case SRV_CHECKSUM_ALGORITHM_STRICT_NONE:
+ checksum = BUF_NO_CHECKSUM_MAGIC;
+ break;
+ /* no default so the compiler will emit a warning
+ * if new enum is added and not handled here */
+ }
+ } else {
+ checksum = page_zip_calc_checksum(dst_frame, zip_size,
+ algorithm);
+ }
+
+ // store the post-encryption checksum after the key-version
+ mach_write_to_4(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 4,
+ checksum);
+ } else {
+ /* Page compressed and encrypted tables have different
+ FIL_HEADER */
+ ulint page_len = log10((double)page_size)/log10((double)2);
+ /* Set up the correct page type */
+ mach_write_to_2(dst_frame+FIL_PAGE_TYPE, FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED);
+ /* Set up the compression algorithm */
+ mach_write_to_2(dst_frame+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION+4, orig_page_type);
+ /* Set up the compressed size */
+ mach_write_to_1(dst_frame+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION+6, page_len);
+ /* Set up the compression method */
+ mach_write_to_1(dst_frame+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION+7, compression_alg);
+ }
+
+}
+
+/*********************************************************************
+Check if extra buffer shall be allocated for decrypting after read */
+UNIV_INTERN
+bool
+fil_space_check_encryption_read(
+/*==============================*/
+ ulint space) /*!< in: tablespace id */
+{
+ fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space);
+ if (crypt_data == NULL)
+ return false;
+
+ if (crypt_data->type == CRYPT_SCHEME_UNENCRYPTED)
+ return false;
+
+ return true;
+}
+
+/******************************************************************
+Decrypt a page */
+UNIV_INTERN
+bool
+fil_space_decrypt(fil_space_crypt_t* crypt_data,
+ const byte* src_frame, ulint page_size, byte* dst_frame)
+{
+ ulint page_type = mach_read_from_2(src_frame+FIL_PAGE_TYPE);
+ // key version
+ uint key_version;
+ bool page_encrypted = (page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED
+ || page_type == FIL_PAGE_PAGE_ENCRYPTED);
+
+ bool page_compressed = (page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED
+ || page_type == FIL_PAGE_PAGE_COMPRESSED);
+
+ ulint orig_page_type=0;
+ if (page_type == FIL_PAGE_PAGE_ENCRYPTED) {
+ key_version = mach_read_from_2(
+ src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
+ orig_page_type = mach_read_from_2(
+ src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 2);
+ } else {
+ key_version = mach_read_from_4(
+ src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
+ }
+
+ if (key_version == 0 && !page_encrypted) {
+ //TODO: is this really needed ?
+ memcpy(dst_frame, src_frame, page_size);
+ return false; /* page not decrypted */
+ }
+
+ // read space & offset & lsn
+ ulint space = mach_read_from_4(
+ src_frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+ ulint offset = mach_read_from_4(
+ src_frame + FIL_PAGE_OFFSET);
+ ib_uint64_t lsn = mach_read_from_8(src_frame + FIL_PAGE_LSN);
+
+ // copy page header
+ memcpy(dst_frame, src_frame, FIL_PAGE_DATA);
+
+ if (page_type == FIL_PAGE_PAGE_ENCRYPTED) {
+ // orig page type
+ mach_write_to_2(dst_frame+FIL_PAGE_TYPE, orig_page_type);
+ }
+
+
+ // get key
+ byte key[MY_AES_MAX_KEY_LENGTH];
+ uint key_length;
+ fil_crypt_get_key(key, &key_length, crypt_data, key_version, page_encrypted);
+
+ // get the iv
+ unsigned char iv[MY_AES_BLOCK_SIZE];
+
+ if (current_aes_dynamic_method == MY_AES_ALGORITHM_CTR)
+ {
+ // create counter block
+
+ mach_write_to_4(iv + 0, space);
+ mach_write_to_4(iv + 4, offset);
+ mach_write_to_8(iv + 8, lsn);
+ }
+ else
+ {
+ // take the iv from the key provider
+
+ int load_iv_rc = GetCryptoIV(key_version, (uchar *) iv, sizeof(iv));
+
+ // if the iv can not be loaded the whole page can not be decrypted
+ if (load_iv_rc != CRYPT_KEY_OK)
+ {
+ ib_logf(IB_LOG_LEVEL_FATAL,
+ "Unable to decrypt data-block. "
+ " Can not load iv for key %d"
+ " return-code: %d. Can't continue!\n",
+ key_version, load_iv_rc);
+
+ return AES_KEY_CREATION_FAILED;
+ }
+ }
+
+ const byte* src = src_frame + FIL_PAGE_DATA;
+ byte* dst = dst_frame + FIL_PAGE_DATA;
+ uint32 dstlen;
+ ulint srclen = page_size - (FIL_PAGE_DATA + FIL_PAGE_DATA_END);
+
+ ulint compressed_len;
+ ulint compression_method;
+
+ if (page_compressed) {
+ orig_page_type = mach_read_from_2(src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION+4);
+ compressed_len = mach_read_from_1(src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION+6);
+ compression_method = mach_read_from_1(src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION+7);
+ }
+
+ if (page_encrypted && !page_compressed) {
+ orig_page_type = mach_read_from_2(src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION+2);
+ }
+
+ if (page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) {
+ srclen = pow((double)2, (double)((int)compressed_len)) - FIL_PAGE_DATA;
+ }
+
+ int rc = (* my_aes_decrypt_dynamic)(src, srclen,
+ dst, &dstlen,
+ (unsigned char*)key, key_length,
+ (unsigned char*)iv, sizeof(iv),
+ 1);
+
+ if (! ((rc == AES_OK) && ((ulint) dstlen == srclen))) {
+ ib_logf(IB_LOG_LEVEL_FATAL,
+ "Unable to decrypt data-block "
+ " src: %p srclen: %ld buf: %p buflen: %d."
+ " return-code: %d. Can't continue!\n",
+ src, (long)srclen,
+ dst, dstlen, rc);
+ ut_error;
+ }
+
+ if (page_type != FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) {
+ // copy page trailer
+ memcpy(dst_frame + page_size - FIL_PAGE_DATA_END,
+ src_frame + page_size - FIL_PAGE_DATA_END,
+ FIL_PAGE_DATA_END);
+
+ // clear key-version & crypt-checksum from dst
+ memset(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8);
+ } else {
+ /* For page compressed tables we set up the FIL_HEADER again */
+ /* setting original page type */
+ mach_write_to_2(dst_frame + FIL_PAGE_TYPE, orig_page_type);
+ /* page_compression uses BUF_NO_CHECKSUM_MAGIC as checksum */
+ mach_write_to_4(dst_frame + FIL_PAGE_SPACE_OR_CHKSUM, BUF_NO_CHECKSUM_MAGIC);
+ /* Set up the flush lsn to be compression algorithm */
+ mach_write_to_8(dst_frame+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, compression_method);
+ }
+
+ return true; /* page was decrypted */
+}
+
+/******************************************************************
+Decrypt a page */
+UNIV_INTERN
+void
+fil_space_decrypt(ulint space,
+ const byte* src_frame, ulint page_size, byte* dst_frame)
+{
+ fil_space_decrypt(fil_space_get_crypt_data(space),
+ src_frame, page_size, dst_frame);
+}
+
+/*********************************************************************
+Verify checksum for a page (iff it's encrypted)
+NOTE: currently this function can only be run in single threaded mode
+as it modifies srv_checksum_algorithm (temporarily)
+@return true if page is encrypted AND OK, false otherwise */
+bool
+fil_space_verify_crypt_checksum(const byte* src_frame, ulint zip_size)
+{
+ // key version
+ uint key_version = mach_read_from_4(
+ src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
+
+ if (key_version == 0) {
+ return false; // unencrypted page
+ }
+
+ /* "trick" the normal checksum routines by storing the post-encryption
+ * checksum into the normal checksum field allowing for reuse of
+ * the normal routines */
+
+ // post encryption checksum
+ ib_uint32_t stored_post_encryption = mach_read_from_4(
+ src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 4);
+
+ // save pre encryption checksum for restore in end of this function
+ ib_uint32_t stored_pre_encryption = mach_read_from_4(
+ src_frame + FIL_PAGE_SPACE_OR_CHKSUM);
+
+ ib_uint32_t checksum_field2 = mach_read_from_4(
+ src_frame + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM);
+
+ /** prepare frame for usage of normal checksum routines */
+ mach_write_to_4(const_cast<byte*>(src_frame) + FIL_PAGE_SPACE_OR_CHKSUM,
+ stored_post_encryption);
+
+ /* NOTE: this function is (currently) only run when restoring
+ * dblwr-buffer, server is single threaded so it's safe to modify
+ * srv_checksum_algorithm */
+ srv_checksum_algorithm_t save_checksum_algorithm =
+ (srv_checksum_algorithm_t)srv_checksum_algorithm;
+ if (zip_size == 0 &&
+ (save_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB ||
+ save_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_INNODB)) {
+ /* handle ALGORITHM_INNODB specially,
+ * "downgrade" to ALGORITHM_INNODB and store BUF_NO_CHECKSUM_MAGIC
+ * checksum_field2 is sort of pointless anyway...
+ */
+ srv_checksum_algorithm = SRV_CHECKSUM_ALGORITHM_INNODB;
+ mach_write_to_4(const_cast<byte*>(src_frame) +
+ UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
+ BUF_NO_CHECKSUM_MAGIC);
+ }
+
+ /* verify checksums */
+ ibool corrupted = buf_page_is_corrupted(false, src_frame, zip_size);
+
+ /** restore frame & algorithm */
+ srv_checksum_algorithm = save_checksum_algorithm;
+
+ mach_write_to_4(const_cast<byte*>(src_frame) +
+ FIL_PAGE_SPACE_OR_CHKSUM,
+ stored_pre_encryption);
+
+ mach_write_to_4(const_cast<byte*>(src_frame) +
+ UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
+ checksum_field2);
+
+ if (!corrupted) {
+ return true; // page was encrypted and checksum matched
+ } else {
+ return false; // page was encrypted but checksum didn't match
+ }
+}
+
+/***********************************************************************/
+
+/** A copy of global key state */
+struct key_state_t {
+ key_state_t() : key_version(0),
+ rotate_key_age(srv_fil_crypt_rotate_key_age) {}
+ bool operator==(const key_state_t& other) const {
+ return key_version == other.key_version &&
+ rotate_key_age == other.rotate_key_age;
+ }
+ uint key_version;
+ uint rotate_key_age;
+};
+
+/***********************************************************************
+Copy global key state */
+static void
+fil_crypt_get_key_state(
+ key_state_t *new_state)
+{
+ if (srv_encrypt_tables == TRUE) {
+ new_state->key_version = GetLatestCryptoKeyVersion();
+ new_state->rotate_key_age = srv_fil_crypt_rotate_key_age;
+ ut_a(new_state->key_version > 0);
+ } else {
+ new_state->key_version = 0;
+ new_state->rotate_key_age = 0;
+ }
+}
+
+/***********************************************************************
+Check if a key needs rotation given a key_state */
+static bool
+fil_crypt_needs_rotation(uint key_version, const key_state_t *key_state)
+{
+ // TODO(jonaso): Add support for rotating encrypted => unencrypted
+
+ if (key_version == 0 && key_state->key_version != 0) {
+ /* this is rotation unencrypted => encrypted
+ * ignore rotate_key_age */
+ return true;
+ }
+
+ if (key_state->key_version == 0 && key_version != 0) {
+ /* this is rotation encrypted => unencrypted */
+ return true;
+ }
+
+ /* this is rotation encrypted => encrypted,
+ * only reencrypt if key is sufficiently old */
+ if (key_version + key_state->rotate_key_age < key_state->key_version)
+ return true;
+
+ return false;
+}
+
+/***********************************************************************
+Check if a space is closing (i.e just before drop) */
+UNIV_INTERN bool
+fil_crypt_is_closing(ulint space)
+{
+ bool closing;
+ fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space);
+ mutex_enter(&crypt_data->mutex);
+ closing = crypt_data->closing;
+ mutex_exit(&crypt_data->mutex);
+ return closing;
+}
+
+/***********************************************************************
+Start encrypting a space
+@return true if a pending op (fil_inc_pending_ops/fil_decr_pending_ops) is held
+*/
+static bool
+fil_crypt_start_encrypting_space(ulint space, bool *recheck) {
+
+ /* we have a pending op when entering function */
+ bool pending_op = true;
+
+ mutex_enter(&fil_crypt_threads_mutex);
+ fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space);
+ if (crypt_data != NULL || fil_crypt_start_converting) {
+ /* someone beat us to it */
+ if (fil_crypt_start_converting)
+ *recheck = true;
+
+ mutex_exit(&fil_crypt_threads_mutex);
+ return pending_op;
+ }
+
+ /* NOTE: we need to write and flush page 0 before publishing
+ * the crypt data. This so that after restart there is no
+ * risk of finding encrypted pages without having
+ * crypt data in page 0 */
+
+ /* 1 - create crypt data */
+ crypt_data = fil_space_create_crypt_data();
+ if (crypt_data == NULL) {
+ mutex_exit(&fil_crypt_threads_mutex);
+ return pending_op;
+ }
+
+ crypt_data->type = CRYPT_SCHEME_UNENCRYPTED;
+ crypt_data->min_key_version = 0; // all pages are unencrypted
+ crypt_data->rotate_state.start_time = time(0);
+ crypt_data->rotate_state.starting = true;
+ crypt_data->rotate_state.active_threads = 1;
+
+ mutex_enter(&crypt_data->mutex);
+ fil_space_set_crypt_data(space, crypt_data);
+ mutex_exit(&crypt_data->mutex);
+
+ fil_crypt_start_converting = true;
+ mutex_exit(&fil_crypt_threads_mutex);
+
+ do
+ {
+ if (fil_crypt_is_closing(space) ||
+ fil_tablespace_is_being_deleted(space))
+ break;
+
+ mtr_t mtr;
+ mtr_start(&mtr);
+
+ /* 2 - get page 0 */
+ ulint offset = 0;
+ ulint zip_size = fil_space_get_zip_size(space);
+ buf_block_t* block = buf_page_get_gen(space, zip_size, offset,
+ RW_X_LATCH,
+ NULL,
+ BUF_GET,
+ __FILE__, __LINE__,
+ &mtr);
+
+ if (fil_crypt_is_closing(space) ||
+ fil_tablespace_is_being_deleted(space)) {
+ mtr_commit(&mtr);
+ break;
+ }
+
+ /* 3 - compute location to store crypt data */
+ byte* frame = buf_block_get_frame(block);
+ ulint maxsize;
+ crypt_data->page0_offset =
+ fsp_header_get_crypt_offset(zip_size, &maxsize);
+
+ /* 4 - write crypt data to page 0 */
+ fil_space_write_crypt_data_low(crypt_data,
+ CRYPT_SCHEME_1,
+ frame,
+ crypt_data->page0_offset,
+ maxsize, &mtr);
+
+ mtr_commit(&mtr);
+
+ if (fil_crypt_is_closing(space) ||
+ fil_tablespace_is_being_deleted(space)) {
+ break;
+ }
+
+ /* record lsn of update */
+ lsn_t end_lsn = mtr.end_lsn;
+
+ /* 4 - sync tablespace before publishing crypt data */
+
+ /* release "lock" while syncing */
+ fil_decr_pending_ops(space);
+ pending_op = false;
+
+ bool success = false;
+ ulint n_pages = 0;
+ ulint sum_pages = 0;
+ do {
+ success = buf_flush_list(ULINT_MAX, end_lsn, &n_pages);
+ buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
+ sum_pages += n_pages;
+ } while (!success &&
+ !fil_crypt_is_closing(space) &&
+ !fil_tablespace_is_being_deleted(space));
+
+ /* try to reacquire pending op */
+ if (fil_inc_pending_ops(space, true))
+ break;
+
+ /* pending op reacquired! */
+ pending_op = true;
+
+ if (fil_crypt_is_closing(space) ||
+ fil_tablespace_is_being_deleted(space)) {
+ break;
+ }
+
+ /* 5 - publish crypt data */
+ mutex_enter(&fil_crypt_threads_mutex);
+ mutex_enter(&crypt_data->mutex);
+ crypt_data->type = CRYPT_SCHEME_1;
+ ut_a(crypt_data->rotate_state.active_threads == 1);
+ crypt_data->rotate_state.active_threads = 0;
+ crypt_data->rotate_state.starting = false;
+
+ fil_crypt_start_converting = false;
+ mutex_exit(&crypt_data->mutex);
+ mutex_exit(&fil_crypt_threads_mutex);
+
+ return pending_op;
+ } while (0);
+
+ mutex_enter(&crypt_data->mutex);
+ ut_a(crypt_data->rotate_state.active_threads == 1);
+ crypt_data->rotate_state.active_threads = 0;
+ mutex_exit(&crypt_data->mutex);
+
+ mutex_enter(&fil_crypt_threads_mutex);
+ fil_crypt_start_converting = false;
+ mutex_exit(&fil_crypt_threads_mutex);
+
+ return pending_op;
+}
+
+/***********************************************************************
+Check if space needs rotation given a key_state */
+static bool
+fil_crypt_space_needs_rotation(uint space, const key_state_t *key_state,
+ bool *recheck)
+{
+ if (fil_space_get_type(space) != FIL_TABLESPACE)
+ return false;
+
+ if (fil_inc_pending_ops(space, true)) {
+ /* tablespace being dropped */
+ return false;
+ }
+
+ /* keep track of if we have pending op */
+ bool pending_op = true;
+
+ fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space);
+ if (crypt_data == NULL) {
+ /**
+ * space has no crypt data
+ * start encrypting it...
+ */
+ pending_op = fil_crypt_start_encrypting_space(space, recheck);
+ crypt_data = fil_space_get_crypt_data(space);
+ if (crypt_data == NULL) {
+ if (pending_op) {
+ fil_decr_pending_ops(space);
+ }
+ return false;
+ }
+ }
+
+ mutex_enter(&crypt_data->mutex);
+ do {
+ /* prevent threads from starting to rotate space */
+ if (crypt_data->rotate_state.starting) {
+ /* recheck this space later */
+ *recheck = true;
+ break;
+ }
+
+ /* prevent threads from starting to rotate space */
+ if (crypt_data->closing)
+ break;
+
+ if (crypt_data->rotate_state.flushing)
+ break;
+
+ bool need_key_rotation = fil_crypt_needs_rotation(
+ crypt_data->min_key_version, key_state);
+
+ time_t diff = time(0) - crypt_data->rotate_state.scrubbing.
+ last_scrub_completed;
+ bool need_scrubbing =
+ diff >= srv_background_scrub_data_interval;
+
+ if (need_key_rotation == false && need_scrubbing == false)
+ break;
+
+ mutex_exit(&crypt_data->mutex);
+ /* NOTE! fil_decr_pending_ops is performed outside */
+ return true;
+ } while (0);
+
+ mutex_exit(&crypt_data->mutex);
+ if (pending_op) {
+ fil_decr_pending_ops(space);
+ }
+ return false;
+}
+
+/** State of a rotation thread */
+struct rotate_thread_t {
+ explicit rotate_thread_t(uint no) {
+ memset(this, 0, sizeof(* this));
+ thread_no = no;
+ first = true;
+ estimated_max_iops = 20;
+ }
+
+ uint thread_no;
+ bool first; /*!< is position before first space */
+ ulint space; /*!< current space */
+ ulint offset; /*!< current offset */
+ ulint batch; /*!< #pages to rotate */
+ uint min_key_version_found;/*!< min key version found but not rotated */
+ lsn_t end_lsn; /*!< max lsn when rotating this space */
+
+ uint estimated_max_iops; /*!< estimation of max iops */
+ uint allocated_iops; /*!< allocated iops */
+ uint cnt_waited; /*!< #times waited during this slot */
+ uint sum_waited_us; /*!< wait time during this slot */
+
+ fil_crypt_stat_t crypt_stat; // statistics
+
+ btr_scrub_t scrub_data; /* thread local data used by btr_scrub-functions
+ * when iterating pages of tablespace */
+
+ /* check if this thread should shutdown */
+ bool should_shutdown() const {
+ return ! (srv_shutdown_state == SRV_SHUTDOWN_NONE &&
+ thread_no < srv_n_fil_crypt_threads);
+ }
+};
+
+/***********************************************************************
+Update global statistics with thread statistics */
+static void
+fil_crypt_update_total_stat(rotate_thread_t *state)
+{
+ mutex_enter(&crypt_stat_mutex);
+ crypt_stat.pages_read_from_cache +=
+ state->crypt_stat.pages_read_from_cache;
+ crypt_stat.pages_read_from_disk +=
+ state->crypt_stat.pages_read_from_disk;
+ crypt_stat.pages_modified += state->crypt_stat.pages_modified;
+ crypt_stat.pages_flushed += state->crypt_stat.pages_flushed;
+ // remote old estimate
+ crypt_stat.estimated_iops -= state->crypt_stat.estimated_iops;
+ // add new estimate
+ crypt_stat.estimated_iops += state->estimated_max_iops;
+ mutex_exit(&crypt_stat_mutex);
+
+ // make new estimate "current" estimate
+ memset(&state->crypt_stat, 0, sizeof(state->crypt_stat));
+ // record our old (current) estimate
+ state->crypt_stat.estimated_iops = state->estimated_max_iops;
+}
+
+/***********************************************************************
+Allocate iops to thread from global setting,
+used before starting to rotate a space */
+static bool
+fil_crypt_alloc_iops(rotate_thread_t *state)
+{
+ ut_ad(state->allocated_iops == 0);
+
+ uint max_iops = state->estimated_max_iops;
+ mutex_enter(&fil_crypt_threads_mutex);
+ if (n_fil_crypt_iops_allocated >= srv_n_fil_crypt_iops) {
+ /* this can happen when user decreases srv_fil_crypt_iops */
+ mutex_exit(&fil_crypt_threads_mutex);
+ return false;
+ }
+
+ uint alloc = srv_n_fil_crypt_iops - n_fil_crypt_iops_allocated;
+ if (alloc > max_iops)
+ alloc = max_iops;
+
+ n_fil_crypt_iops_allocated += alloc;
+ mutex_exit(&fil_crypt_threads_mutex);
+
+ state->allocated_iops = alloc;
+
+ return alloc > 0;
+}
+
+/***********************************************************************
+Reallocate iops to thread,
+used when inside a space */
+static void
+fil_crypt_realloc_iops(rotate_thread_t *state)
+{
+ ut_a(state->allocated_iops > 0);
+
+ if (10 * state->cnt_waited > state->batch) {
+ /* if we waited more than 10% re-estimate max_iops */
+ uint avg_wait_time_us =
+ state->sum_waited_us / state->cnt_waited;
+
+#if DEBUG_KEYROTATION_THROTTLING
+ fprintf(stderr,
+ "thr_no: %u - update estimated_max_iops from %u to %u\n",
+ state->thread_no,
+ state->estimated_max_iops,
+ 1000000 / avg_wait_time_us);
+#endif
+ if (avg_wait_time_us == 0)
+ avg_wait_time_us = 1; // prevent division by zero
+
+ state->estimated_max_iops = 1000000 / avg_wait_time_us;
+ state->cnt_waited = 0;
+ state->sum_waited_us = 0;
+ } else {
+#if DEBUG_KEYROTATION_THROTTLING
+ fprintf(stderr,
+ "thr_no: %u only waited %lu%% skip re-estimate\n",
+ state->thread_no,
+ (100 * state->cnt_waited) / state->batch);
+#endif
+ }
+
+ if (state->estimated_max_iops <= state->allocated_iops) {
+ /* return extra iops */
+ uint extra = state->allocated_iops - state->estimated_max_iops;
+
+ if (extra > 0) {
+ mutex_enter(&fil_crypt_threads_mutex);
+ if (n_fil_crypt_iops_allocated < extra) {
+ /* unknown bug!
+ * crash in debug
+ * keep n_fil_crypt_iops_allocated unchanged
+ * in release */
+ ut_ad(0);
+ extra = 0;
+ }
+ n_fil_crypt_iops_allocated -= extra;
+ state->allocated_iops -= extra;
+
+ if (state->allocated_iops == 0) {
+ /* no matter how slow io system seems to be
+ * never decrease allocated_iops to 0... */
+ state->allocated_iops ++;
+ n_fil_crypt_iops_allocated ++;
+ }
+ mutex_exit(&fil_crypt_threads_mutex);
+ os_event_set(fil_crypt_threads_event);
+ }
+ } else {
+ /* see if there are more to get */
+ mutex_enter(&fil_crypt_threads_mutex);
+ if (n_fil_crypt_iops_allocated < srv_n_fil_crypt_iops) {
+ /* there are extra iops free */
+ uint extra = srv_n_fil_crypt_iops -
+ n_fil_crypt_iops_allocated;
+ if (state->allocated_iops + extra >
+ state->estimated_max_iops) {
+ /* but don't alloc more than our max */
+ extra = state->estimated_max_iops -
+ state->allocated_iops;
+ }
+ n_fil_crypt_iops_allocated += extra;
+ state->allocated_iops += extra;
+#if DEBUG_KEYROTATION_THROTTLING
+ fprintf(stderr,
+ "thr_no: %u increased iops from %u to %u\n",
+ state->thread_no,
+ state->allocated_iops - extra,
+ state->allocated_iops);
+#endif
+ }
+ mutex_exit(&fil_crypt_threads_mutex);
+ }
+
+ fil_crypt_update_total_stat(state);
+}
+
+/***********************************************************************
+Return allocated iops to global */
+static void
+fil_crypt_return_iops(rotate_thread_t *state)
+{
+ if (state->allocated_iops > 0) {
+ uint iops = state->allocated_iops;
+ mutex_enter(&fil_crypt_threads_mutex);
+ if (n_fil_crypt_iops_allocated < iops) {
+ /* unknown bug!
+ * crash in debug
+ * keep n_fil_crypt_iops_allocated unchanged
+ * in release */
+ ut_ad(0);
+ iops = 0;
+ }
+ n_fil_crypt_iops_allocated -= iops;
+ mutex_exit(&fil_crypt_threads_mutex);
+ state->allocated_iops = 0;
+ os_event_set(fil_crypt_threads_event);
+ }
+
+ fil_crypt_update_total_stat(state);
+}
+
+/***********************************************************************
+Search for a space needing rotation */
+bool
+fil_crypt_find_space_to_rotate(
+ const key_state_t *key_state,
+ rotate_thread_t *state,
+ bool *recheck)
+{
+ /* we need iops to start rotating */
+ while (!state->should_shutdown() && !fil_crypt_alloc_iops(state)) {
+ os_event_reset(fil_crypt_threads_event);
+ os_event_wait_time(fil_crypt_threads_event, 1000000);
+ }
+
+ if (state->should_shutdown())
+ return false;
+
+ if (state->first) {
+ state->first = false;
+ state->space = fil_get_first_space();
+ } else {
+ state->space = fil_get_next_space(state->space);
+ }
+
+ while (!state->should_shutdown() && state->space != ULINT_UNDEFINED) {
+
+ ulint space = state->space;
+ if (fil_crypt_space_needs_rotation(space, key_state, recheck)) {
+ /* init state->min_key_version_found before
+ * starting on a space */
+ state->min_key_version_found = key_state->key_version;
+ return true;
+ }
+
+ state->space = fil_get_next_space(space);
+ }
+
+ /* if we didn't find any space return iops */
+ fil_crypt_return_iops(state);
+
+ return false;
+
+}
+
+/***********************************************************************
+Start rotating a space */
+static
+void
+fil_crypt_start_rotate_space(
+ const key_state_t *key_state,
+ rotate_thread_t *state)
+{
+ ulint space = state->space;
+ fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space);
+ mutex_enter(&crypt_data->mutex);
+ if (crypt_data->rotate_state.active_threads == 0) {
+ /* only first thread needs to init */
+ crypt_data->rotate_state.next_offset = 1; // skip page 0
+ /* no need to rotate beyond current max
+ * if space extends, it will be encrypted with newer version */
+ crypt_data->rotate_state.max_offset = fil_space_get_size(space);
+
+ crypt_data->rotate_state.end_lsn = 0;
+ crypt_data->rotate_state.min_key_version_found =
+ key_state->key_version;
+
+ crypt_data->rotate_state.start_time = time(0);
+ }
+
+ /* count active threads in space */
+ crypt_data->rotate_state.active_threads++;
+
+ /* Initialize thread local state */
+ state->end_lsn = crypt_data->rotate_state.end_lsn;
+ state->min_key_version_found =
+ crypt_data->rotate_state.min_key_version_found;
+
+ /* inform scrubbing */
+ crypt_data->rotate_state.scrubbing.is_active =
+ btr_scrub_start_space(space, &state->scrub_data);
+
+ mutex_exit(&crypt_data->mutex);
+}
+
+/***********************************************************************
+Search for batch of pages needing rotation */
+static
+bool
+fil_crypt_find_page_to_rotate(
+ const key_state_t *key_state,
+ rotate_thread_t *state)
+{
+ ulint batch = srv_alloc_time * state->allocated_iops;
+ ulint space = state->space;
+ fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space);
+ mutex_enter(&crypt_data->mutex);
+ if (crypt_data->closing == false &&
+ crypt_data->rotate_state.next_offset <
+ crypt_data->rotate_state.max_offset) {
+
+ state->offset = crypt_data->rotate_state.next_offset;
+ ulint remaining = crypt_data->rotate_state.max_offset -
+ crypt_data->rotate_state.next_offset;
+
+ if (batch <= remaining)
+ state->batch = batch;
+ else
+ state->batch = remaining;
+
+ crypt_data->rotate_state.next_offset += batch;
+ mutex_exit(&crypt_data->mutex);
+ return true;
+ }
+
+ mutex_exit(&crypt_data->mutex);
+ return false;
+}
+
+/***********************************************************************
+Check if a page is uninitialized (doesn't need to be rotated) */
+static bool
+fil_crypt_is_page_uninitialized(const byte* frame, uint zip_size)
+{
+ if (zip_size) {
+ ulint stored_checksum = mach_read_from_4(
+ frame + FIL_PAGE_SPACE_OR_CHKSUM);
+ /* empty pages aren't encrypted */
+ if (stored_checksum == 0) {
+ return true;
+ }
+ } else {
+ ulint size = UNIV_PAGE_SIZE;
+ ulint checksum_field1 = mach_read_from_4(
+ frame + FIL_PAGE_SPACE_OR_CHKSUM);
+ ulint checksum_field2 = mach_read_from_4(
+ frame + size - FIL_PAGE_END_LSN_OLD_CHKSUM);
+ /* empty pages are not encrypted */
+ if (checksum_field1 == 0 && checksum_field2 == 0
+ && mach_read_from_4(frame + FIL_PAGE_LSN) == 0) {
+ return true;
+ }
+ }
+ return false;
+}
+
+#define fil_crypt_get_page_throttle(state,space,zip_size,offset,mtr,sleeptime_ms) \
+ fil_crypt_get_page_throttle_func(state, space, zip_size, offset, mtr, \
+ sleeptime_ms, __FILE__, __LINE__)
+
+/***********************************************************************
+Get a page and compute sleep time */
+static
+buf_block_t*
+fil_crypt_get_page_throttle_func(rotate_thread_t *state,
+ ulint space, uint zip_size, ulint offset,
+ mtr_t *mtr,
+ ulint *sleeptime_ms,
+ const char *file,
+ ulint line)
+{
+ buf_block_t* block = buf_page_try_get_func(space, offset, RW_X_LATCH,
+ true,
+ file, line, mtr);
+ if (block != NULL) {
+ /* page was in buffer pool */
+ state->crypt_stat.pages_read_from_cache++;
+ return block;
+ }
+
+ state->crypt_stat.pages_read_from_disk++;
+
+ ullint start = ut_time_us(NULL);
+ block = buf_page_get_gen(space, zip_size, offset,
+ RW_X_LATCH,
+ NULL, BUF_GET_POSSIBLY_FREED,
+ file, line, mtr);
+ ullint end = ut_time_us(NULL);
+
+ if (end < start) {
+ end = start; // safety...
+ }
+
+ state->cnt_waited++;
+ state->sum_waited_us += (end - start);
+
+ /* average page load */
+ ulint add_sleeptime_ms = 0;
+ ulint avg_wait_time_us = state->sum_waited_us / state->cnt_waited;
+ ulint alloc_wait_us = 1000000 / state->allocated_iops;
+ if (avg_wait_time_us < alloc_wait_us) {
+ /* we reading faster than we allocated */
+ add_sleeptime_ms = (alloc_wait_us - avg_wait_time_us) / 1000;
+ } else {
+ /* if page load time is longer than we want, skip sleeping */
+ }
+
+ *sleeptime_ms += add_sleeptime_ms;
+ return block;
+}
+
+
+/***********************************************************************
+Get block and allocation status
+
+note: innodb locks fil_space_latch and then block when allocating page
+but locks block and then fil_space_latch when freeing page.
+*/
+static
+buf_block_t*
+btr_scrub_get_block_and_allocation_status(
+ rotate_thread_t *state,
+ ulint space,
+ ulint zip_size,
+ ulint offset,
+ mtr_t *mtr,
+ btr_scrub_page_allocation_status_t *allocation_status,
+ ulint *sleeptime_ms)
+{
+ mtr_t local_mtr;
+ buf_block_t *block = NULL;
+ mtr_start(&local_mtr);
+ *allocation_status = fsp_page_is_free(space, offset, &local_mtr) ?
+ BTR_SCRUB_PAGE_FREE :
+ BTR_SCRUB_PAGE_ALLOCATED;
+
+ if (*allocation_status == BTR_SCRUB_PAGE_FREE) {
+ /* this is easy case, we lock fil_space_latch first and
+ then block */
+ block = fil_crypt_get_page_throttle(state,
+ space, zip_size,
+ offset, mtr,
+ sleeptime_ms);
+ mtr_commit(&local_mtr);
+ } else {
+ /* page is allocated according to xdes */
+
+ /* release fil_space_latch *before* fetching block */
+ mtr_commit(&local_mtr);
+
+ /* NOTE: when we have locked dict_index_get_lock(),
+ * it's safe to release fil_space_latch and then fetch block
+ * as dict_index_get_lock() is needed to make tree modifications
+ * such as free-ing a page
+ */
+
+ block = fil_crypt_get_page_throttle(state,
+ space, zip_size,
+ offset, mtr,
+ sleeptime_ms);
+ }
+
+ return block;
+}
+
+
+/***********************************************************************
+Rotate one page */
+static
+void
+fil_crypt_rotate_page(
+ const key_state_t *key_state,
+ rotate_thread_t *state)
+{
+ ulint space = state->space;
+ ulint offset = state->offset;
+ const uint zip_size = fil_space_get_zip_size(space);
+ ulint sleeptime_ms = 0;
+
+ /* check if tablespace is closing before reading page */
+ if (fil_crypt_is_closing(space))
+ return;
+
+ if (space == TRX_SYS_SPACE && offset == TRX_SYS_PAGE_NO) {
+ /* don't encrypt this as it contains address to dblwr buffer */
+ return;
+ }
+
+ mtr_t mtr;
+ mtr_start(&mtr);
+ buf_block_t* block = fil_crypt_get_page_throttle(state,
+ space, zip_size,
+ offset, &mtr,
+ &sleeptime_ms);
+
+ bool modified = false;
+ int needs_scrubbing = BTR_SCRUB_SKIP_PAGE;
+ lsn_t block_lsn = block->page.newest_modification;
+ uint kv = block->page.key_version;
+
+ /* check if tablespace is closing after reading page */
+ if (!fil_crypt_is_closing(space)) {
+ byte* frame = buf_block_get_frame(block);
+ fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space);
+
+ if (kv == 0 &&
+ fil_crypt_is_page_uninitialized(frame, zip_size)) {
+ ;
+ } else if (fil_crypt_needs_rotation(kv, key_state)) {
+
+ /* page can be "fresh" i.e never written in case
+ * kv == 0 or it should have a key version at least
+ * as big as the space minimum key version*/
+ ut_a(kv == 0 || kv >= crypt_data->min_key_version);
+
+ modified = true;
+
+ /* force rotation by dummy updating page */
+ mlog_write_ulint(frame +
+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
+ space, MLOG_4BYTES, &mtr);
+
+ /* update block */
+ block->page.key_version = key_state->key_version;
+
+ /* statistics */
+ state->crypt_stat.pages_modified++;
+ } else {
+ ut_a(kv >= crypt_data->min_key_version ||
+ (kv == 0 && key_state->key_version == 0));
+
+ if (kv < state->min_key_version_found) {
+ state->min_key_version_found = kv;
+ }
+ }
+
+ needs_scrubbing = btr_page_needs_scrubbing(
+ &state->scrub_data, block,
+ BTR_SCRUB_PAGE_ALLOCATION_UNKNOWN);
+ }
+
+ mtr_commit(&mtr);
+ lsn_t end_lsn = mtr.end_lsn;
+
+ if (needs_scrubbing == BTR_SCRUB_PAGE) {
+ mtr_start(&mtr);
+ /*
+ * refetch page and allocation status
+ */
+ btr_scrub_page_allocation_status_t allocated;
+ block = btr_scrub_get_block_and_allocation_status(
+ state, space, zip_size, offset, &mtr,
+ &allocated,
+ &sleeptime_ms);
+
+ /* get required table/index and index-locks */
+ needs_scrubbing = btr_scrub_recheck_page(
+ &state->scrub_data, block, allocated, &mtr);
+
+ if (needs_scrubbing == BTR_SCRUB_PAGE) {
+ /* we need to refetch it once more now that we have
+ * index locked */
+ block = btr_scrub_get_block_and_allocation_status(
+ state, space, zip_size, offset, &mtr,
+ &allocated,
+ &sleeptime_ms);
+
+ needs_scrubbing = btr_scrub_page(&state->scrub_data,
+ block, allocated,
+ &mtr);
+ }
+
+ /* NOTE: mtr is committed inside btr_scrub_recheck_page()
+ * and/or btr_scrub_page. This is to make sure that
+ * locks & pages are latched in corrected order,
+ * the mtr is in some circumstances restarted.
+ * (mtr_commit() + mtr_start())
+ */
+ }
+
+ if (needs_scrubbing != BTR_SCRUB_PAGE) {
+ /* if page didn't need scrubbing it might be that cleanups
+ are needed. do those outside of any mtr to prevent deadlocks.
+
+ the information what kinds of cleanups that are needed are
+ encoded inside the needs_scrubbing, but this is opaque to
+ this function (except the value BTR_SCRUB_PAGE) */
+ btr_scrub_skip_page(&state->scrub_data, needs_scrubbing);
+ }
+
+ if (needs_scrubbing == BTR_SCRUB_TURNED_OFF) {
+ /* if we just detected that scrubbing was turned off
+ * update global state to reflect this */
+ fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space);
+ mutex_enter(&crypt_data->mutex);
+ crypt_data->rotate_state.scrubbing.is_active = false;
+ mutex_exit(&crypt_data->mutex);
+ }
+
+ if (modified) {
+ /* if we modified page, we take lsn from mtr */
+ ut_a(end_lsn > state->end_lsn);
+ ut_a(end_lsn > block_lsn);
+ state->end_lsn = end_lsn;
+ } else {
+ /* if we did not modify page, check for max lsn */
+ if (block_lsn > state->end_lsn) {
+ state->end_lsn = block_lsn;
+ }
+ }
+
+ if (sleeptime_ms) {
+ os_event_reset(fil_crypt_throttle_sleep_event);
+ os_event_wait_time(fil_crypt_throttle_sleep_event,
+ 1000 * sleeptime_ms);
+ }
+}
+
+/***********************************************************************
+Rotate a batch of pages */
+static
+void
+fil_crypt_rotate_pages(
+ const key_state_t *key_state,
+ rotate_thread_t *state)
+{
+ ulint space = state->space;
+ ulint end = state->offset + state->batch;
+ for (; state->offset < end; state->offset++) {
+
+ /* we can't rotate pages in dblwr buffer as
+ * it's not possible to read those due to lots of asserts
+ * in buffer pool.
+ *
+ * However since these are only (short-lived) copies of
+ * real pages, they will be updated anyway when the
+ * real page is updated
+ */
+ if (space == TRX_SYS_SPACE &&
+ buf_dblwr_page_inside(state->offset)) {
+ continue;
+ }
+
+ fil_crypt_rotate_page(key_state, state);
+ }
+}
+
+/***********************************************************************
+Flush rotated pages and then update page 0 */
+static
+void
+fil_crypt_flush_space(rotate_thread_t *state, ulint space)
+{
+ fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space);
+
+ /* flush tablespace pages so that there are no pages left with old key */
+ lsn_t end_lsn = crypt_data->rotate_state.end_lsn;
+ if (end_lsn > 0 && !fil_crypt_is_closing(space)) {
+ bool success = false;
+ ulint n_pages = 0;
+ ulint sum_pages = 0;
+ ullint start = ut_time_us(NULL);
+ do {
+ success = buf_flush_list(ULINT_MAX, end_lsn, &n_pages);
+ buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
+ sum_pages += n_pages;
+ } while (!success && !fil_crypt_is_closing(space));
+ ullint end = ut_time_us(NULL);
+ if (sum_pages && end > start) {
+ state->cnt_waited += sum_pages;
+ state->sum_waited_us += (end - start);
+
+ /* statistics */
+ state->crypt_stat.pages_flushed += sum_pages;
+ }
+ }
+
+ if (crypt_data->min_key_version == 0) {
+ crypt_data->type = CRYPT_SCHEME_UNENCRYPTED;
+ }
+
+ /* update page 0 */
+ if (!fil_crypt_is_closing(space)) {
+ mtr_t mtr;
+ mtr_start(&mtr);
+ ulint offset = 0; // page 0
+ const uint zip_size = fil_space_get_zip_size(space);
+ buf_block_t* block = buf_page_get_gen(space, zip_size, offset,
+ RW_X_LATCH, NULL, BUF_GET,
+ __FILE__, __LINE__, &mtr);
+ byte* frame = buf_block_get_frame(block);
+ fil_space_write_crypt_data(space, frame,
+ crypt_data->page0_offset,
+ ULINT_MAX, &mtr);
+ mtr_commit(&mtr);
+ }
+}
+
+/***********************************************************************
+Complete rotating a space */
+static
+void
+fil_crypt_complete_rotate_space(
+ const key_state_t *key_state,
+ rotate_thread_t *state)
+{
+ ulint space = state->space;
+ fil_space_crypt_t *crypt_data = fil_space_get_crypt_data(space);
+ mutex_enter(&crypt_data->mutex);
+
+ /**
+ * Update crypt data state with state from thread
+ */
+ if (state->min_key_version_found <
+ crypt_data->rotate_state.min_key_version_found) {
+ crypt_data->rotate_state.min_key_version_found =
+ state->min_key_version_found;
+ }
+
+ if (state->end_lsn > crypt_data->rotate_state.end_lsn) {
+ crypt_data->rotate_state.end_lsn = state->end_lsn;
+ }
+
+ ut_a(crypt_data->rotate_state.active_threads > 0);
+ crypt_data->rotate_state.active_threads--;
+ bool last = crypt_data->rotate_state.active_threads == 0;
+
+ /**
+ * check if space is fully done
+ * this as when threads shutdown, it could be that we "complete"
+ * iterating before we have scanned the full space.
+ */
+ bool done = crypt_data->rotate_state.next_offset >=
+ crypt_data->rotate_state.max_offset;
+
+ /**
+ * we should flush space if we're last thread AND
+ * the iteration is done
+ */
+ bool should_flush = last && done;
+
+ if (should_flush) {
+ /* we're the last active thread */
+ crypt_data->rotate_state.flushing = true;
+ crypt_data->min_key_version =
+ crypt_data->rotate_state.min_key_version_found;
+ }
+
+ /* inform scrubbing */
+ crypt_data->rotate_state.scrubbing.is_active = false;
+ mutex_exit(&crypt_data->mutex);
+
+ /* all threads must call btr_scrub_complete_space wo/ mutex held */
+ if (btr_scrub_complete_space(&state->scrub_data) == true) {
+ if (should_flush) {
+ /* only last thread updates last_scrub_completed */
+ mutex_enter(&crypt_data->mutex);
+ crypt_data->rotate_state.scrubbing.
+ last_scrub_completed = time(0);
+ mutex_exit(&crypt_data->mutex);
+ }
+ }
+
+ if (should_flush) {
+ fil_crypt_flush_space(state, space);
+
+ mutex_enter(&crypt_data->mutex);
+ crypt_data->rotate_state.flushing = false;
+ mutex_exit(&crypt_data->mutex);
+ }
+}
+
+/*********************************************************************//**
+A thread which monitors global key state and rotates tablespaces accordingly
+@return a dummy parameter */
+extern "C" UNIV_INTERN
+os_thread_ret_t
+DECLARE_THREAD(fil_crypt_thread)(
+/*===============================*/
+ void* arg __attribute__((unused))) /*!< in: a dummy parameter required
+ * by os_thread_create */
+{
+ UT_NOT_USED(arg);
+
+ mutex_enter(&fil_crypt_threads_mutex);
+ uint thread_no = srv_n_fil_crypt_threads_started;
+ srv_n_fil_crypt_threads_started++;
+ mutex_exit(&fil_crypt_threads_mutex);
+ os_event_set(fil_crypt_event); /* signal that we started */
+
+ /* state of this thread */
+ rotate_thread_t thr(thread_no);
+
+ /* if we find a space that is starting, skip over it and recheck it later */
+ bool recheck = false;
+
+ key_state_t key_state;
+ fil_crypt_get_key_state(&key_state);
+
+ /* make sure that thread always checks all tablespace when starting.
+ *
+ * by decreasing key_version, loop that waits for change in key-state
+ * should exit immediately causing thread to check all spaces when starting */
+ key_state.key_version--;
+
+ while (!thr.should_shutdown()) {
+
+ key_state_t new_state;
+ fil_crypt_get_key_state(&new_state);
+
+ time_t wait_start = time(0);
+ while (!thr.should_shutdown() && key_state == new_state) {
+
+ /* wait for key state changes
+ * i.e either new key version of change or
+ * new rotate_key_age */
+ os_event_reset(fil_crypt_threads_event);
+ os_event_wait_time(fil_crypt_threads_event, 1000000);
+ fil_crypt_get_key_state(&new_state);
+
+ if (recheck) {
+ /* check recheck here, after sleep, so
+ * that we don't busy loop while when one thread is starting
+ * a space*/
+ break;
+ }
+
+ time_t waited = time(0) - wait_start;
+ if (waited >= srv_background_scrub_data_check_interval)
+ break;
+ }
+
+ recheck = false;
+ thr.first = true; // restart from first tablespace
+ key_state = new_state; // save for next loop
+
+ /* iterate all spaces searching for those needing rotation */
+ while (!thr.should_shutdown() &&
+ fil_crypt_find_space_to_rotate(&new_state, &thr, &recheck)) {
+
+ /* we found a space to rotate */
+ fil_crypt_start_rotate_space(&new_state, &thr);
+
+ /* decrement pending ops that was incremented in
+ * fil_crypt_space_needs_rotation
+ * (called from fil_crypt_find_space_to_rotate),
+ * this makes sure that tablespace won't be dropped
+ * just after we decided to start processing it. */
+ fil_decr_pending_ops(thr.space);
+
+ /* iterate all pages (cooperativly with other threads) */
+ while (!thr.should_shutdown() &&
+ fil_crypt_find_page_to_rotate(&new_state, &thr)) {
+
+ /* rotate a (set) of pages */
+ fil_crypt_rotate_pages(&new_state, &thr);
+
+ /* realloc iops */
+ fil_crypt_realloc_iops(&thr);
+ }
+
+ /* complete rotation */
+ fil_crypt_complete_rotate_space(&new_state, &thr);
+
+ /* refresh key state */
+ fil_crypt_get_key_state(&new_state);
+
+ /* return iops */
+ fil_crypt_return_iops(&thr);
+ }
+ }
+
+ /* return iops if shutting down */
+ fil_crypt_return_iops(&thr);
+
+ mutex_enter(&fil_crypt_threads_mutex);
+ srv_n_fil_crypt_threads_started--;
+ mutex_exit(&fil_crypt_threads_mutex);
+ os_event_set(fil_crypt_event); /* signal that we stopped */
+
+ /* We count the number of threads in os_thread_exit(). A created
+ thread should always use that to exit and not use return() to exit. */
+
+ os_thread_exit(NULL);
+
+ OS_THREAD_DUMMY_RETURN;
+}
+
+/*********************************************************************
+Adjust thread count for key rotation */
+UNIV_INTERN
+void
+fil_crypt_set_thread_cnt(uint new_cnt) {
+ if (new_cnt > srv_n_fil_crypt_threads) {
+ uint add = new_cnt - srv_n_fil_crypt_threads;
+ srv_n_fil_crypt_threads = new_cnt;
+ for (uint i = 0; i < add; i++) {
+ os_thread_create(fil_crypt_thread, NULL, NULL);
+ }
+ } else if (new_cnt < srv_n_fil_crypt_threads) {
+ srv_n_fil_crypt_threads = new_cnt;
+ os_event_set(fil_crypt_threads_event);
+ }
+
+ while(srv_n_fil_crypt_threads_started != srv_n_fil_crypt_threads) {
+ os_event_reset(fil_crypt_event);
+ os_event_wait_time(fil_crypt_event, 1000000);
+ }
+}
+
+/*********************************************************************
+Adjust max key age */
+UNIV_INTERN
+void
+fil_crypt_set_rotate_key_age(uint val)
+{
+ srv_fil_crypt_rotate_key_age = val;
+ os_event_set(fil_crypt_threads_event);
+}
+
+/*********************************************************************
+Adjust rotation iops */
+UNIV_INTERN
+void
+fil_crypt_set_rotation_iops(uint val)
+{
+ srv_n_fil_crypt_iops = val;
+ os_event_set(fil_crypt_threads_event);
+}
+
+/*********************************************************************
+Init threads for key rotation */
+UNIV_INTERN
+void
+fil_crypt_threads_init()
+{
+ fil_crypt_event = os_event_create();
+ fil_crypt_threads_event = os_event_create();
+ mutex_create(fil_crypt_threads_mutex_key,
+ &fil_crypt_threads_mutex, SYNC_NO_ORDER_CHECK);
+
+ uint cnt = srv_n_fil_crypt_threads;
+ srv_n_fil_crypt_threads = 0;
+ fil_crypt_set_thread_cnt(cnt);
+}
+
+/*********************************************************************
+End threads for key rotation */
+UNIV_INTERN
+void
+fil_crypt_threads_end()
+{
+ /* stop threads */
+ fil_crypt_set_thread_cnt(0);
+}
+
+/*********************************************************************
+Clean up key rotation threads resources */
+UNIV_INTERN
+void
+fil_crypt_threads_cleanup() {
+ os_event_free(fil_crypt_event);
+ os_event_free(fil_crypt_threads_event);
+}
+
+/*********************************************************************
+Mark a space as closing */
+UNIV_INTERN
+void
+fil_space_crypt_mark_space_closing(
+ ulint space)
+{
+ mutex_enter(&fil_crypt_threads_mutex);
+ fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space);
+ if (crypt_data == NULL) {
+ mutex_exit(&fil_crypt_threads_mutex);
+ return;
+ }
+
+ mutex_enter(&crypt_data->mutex);
+ mutex_exit(&fil_crypt_threads_mutex);
+ crypt_data->closing = true;
+ mutex_exit(&crypt_data->mutex);
+}
+
+/*********************************************************************
+Wait for crypt threads to stop accessing space */
+UNIV_INTERN
+void
+fil_space_crypt_close_tablespace(
+ ulint space)
+{
+ mutex_enter(&fil_crypt_threads_mutex);
+ fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(space);
+ if (crypt_data == NULL) {
+ mutex_exit(&fil_crypt_threads_mutex);
+ return;
+ }
+
+ uint start = time(0);
+ uint last = start;
+ mutex_enter(&crypt_data->mutex);
+ mutex_exit(&fil_crypt_threads_mutex);
+ crypt_data->closing = true;
+ uint cnt = crypt_data->rotate_state.active_threads;
+ bool flushing = crypt_data->rotate_state.flushing;
+ while (cnt > 0 || flushing) {
+ mutex_exit(&crypt_data->mutex);
+ /* release dict mutex so that scrub threads can release their
+ * table references */
+ dict_mutex_exit_for_mysql();
+ /* wakeup throttle (all) sleepers */
+ os_event_set(fil_crypt_throttle_sleep_event);
+ os_thread_sleep(20000);
+ dict_mutex_enter_for_mysql();
+ mutex_enter(&crypt_data->mutex);
+ cnt = crypt_data->rotate_state.active_threads;
+ flushing = crypt_data->rotate_state.flushing;
+
+ uint now = time(0);
+ if (now >= last + 30) {
+ fprintf(stderr,
+ "WARNING: "
+ "waited %u seconds to drop space: %lu\n",
+ now - start, space);
+ last = now;
+ }
+ }
+ mutex_exit(&crypt_data->mutex);
+}
+
+/*********************************************************************
+Get crypt status for a space (used by information_schema)
+return 0 if crypt data present */
+int
+fil_space_crypt_get_status(
+/*==================*/
+ ulint id, /*!< in: space id */
+ struct fil_space_crypt_status_t* status) /*!< out: status */
+{
+ fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(id);
+
+ if (crypt_data != NULL) {
+ status->space = id;
+ status->scheme = crypt_data->type;
+ mutex_enter(&crypt_data->mutex);
+ status->keyserver_requests = crypt_data->keyserver_requests;
+ status->min_key_version = crypt_data->min_key_version;
+ if (crypt_data->rotate_state.active_threads > 0 ||
+ crypt_data->rotate_state.flushing) {
+ status->rotating = true;
+ status->flushing =
+ crypt_data->rotate_state.flushing;
+ status->rotate_next_page_number =
+ crypt_data->rotate_state.next_offset;
+ status->rotate_max_page_number =
+ crypt_data->rotate_state.max_offset;
+ } else {
+ status->rotating = false;
+ }
+ mutex_exit(&crypt_data->mutex);
+ } else {
+ memset(status, 0, sizeof(*status));
+ }
+
+ if (srv_encrypt_tables == TRUE) {
+ status->current_key_version = GetLatestCryptoKeyVersion();
+ } else {
+ status->current_key_version = 0;
+ }
+ return crypt_data == NULL ? 1 : 0;
+}
+
+/*********************************************************************
+Return crypt statistics */
+void
+fil_crypt_total_stat(fil_crypt_stat_t *stat)
+{
+ mutex_enter(&crypt_stat_mutex);
+ *stat = crypt_stat;
+ mutex_exit(&crypt_stat_mutex);
+}
+
+/*********************************************************************
+Get scrub status for a space (used by information_schema)
+return 0 if data found */
+int
+fil_space_get_scrub_status(
+/*==================*/
+ ulint id, /*!< in: space id */
+ struct fil_space_scrub_status_t* status) /*!< out: status */
+{
+ fil_space_crypt_t* crypt_data = fil_space_get_crypt_data(id);
+ memset(status, 0, sizeof(*status));
+ if (crypt_data != NULL) {
+ status->space = id;
+ status->compressed = fil_space_get_zip_size(id) > 0;
+ mutex_enter(&crypt_data->mutex);
+ status->last_scrub_completed =
+ crypt_data->rotate_state.scrubbing.last_scrub_completed;
+ if (crypt_data->rotate_state.active_threads > 0 &&
+ crypt_data->rotate_state.scrubbing.is_active) {
+ status->scrubbing = true;
+ status->current_scrub_started =
+ crypt_data->rotate_state.start_time;
+ status->current_scrub_active_threads =
+ crypt_data->rotate_state.active_threads;
+ status->current_scrub_page_number =
+ crypt_data->rotate_state.next_offset;
+ status->current_scrub_max_page_number =
+ crypt_data->rotate_state.max_offset;
+ } else {
+ status->scrubbing = false;
+ }
+ mutex_exit(&crypt_data->mutex);
+ } else {
+ memset(status, 0, sizeof(*status));
+ }
+
+ return crypt_data == NULL ? 1 : 0;
+}
diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc
index 7e62fb46b6f..577effa295b 100644
--- a/storage/innobase/fil/fil0fil.cc
+++ b/storage/innobase/fil/fil0fil.cc
@@ -27,6 +27,8 @@ Created 10/25/1995 Heikki Tuuri
#include "fil0fil.h"
#include "fil0pagecompress.h"
#include "fsp0pagecompress.h"
+#include "fil0pageencryption.h"
+#include "fsp0pageencryption.h"
#include <debug_sync.h>
#include <my_dbug.h>
@@ -282,7 +284,7 @@ fil_read(
actual page size does not decrease. */
{
return(fil_io(OS_FILE_READ, sync, space_id, zip_size, block_offset,
- byte_offset, len, buf, message, write_size));
+ byte_offset, len, buf, message, write_size, 0));
}
/********************************************************************//**
@@ -309,16 +311,17 @@ fil_write(
this must be appropriately aligned */
void* message, /*!< in: message for aio handler if non-sync
aio used, else ignored */
- ulint* write_size) /*!< in/out: Actual write size initialized
+ ulint* write_size, /*!< in/out: Actual write size initialized
after fist successfull trim
operation for this page and if
initialized we do not trim again if
actual page size does not decrease. */
+ lsn_t lsn) /* lsn of the newest modification */
{
ut_ad(!srv_read_only_mode);
return(fil_io(OS_FILE_WRITE, sync, space_id, zip_size, block_offset,
- byte_offset, len, buf, message, write_size));
+ byte_offset, len, buf, message, write_size, lsn));
}
/*******************************************************************//**
@@ -645,8 +648,23 @@ fil_node_open_file(
success = os_file_read(node->handle, page, 0, UNIV_PAGE_SIZE,
space->flags);
+ if (fil_page_encryption_status(page)) {
+ /* if page is (still) encrypted, write an error and return.
+ * Otherwise the server would crash if decrypting is not possible.
+ * This may be the case, if the key file could not be
+ * opened on server startup.
+ */
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "InnoDB: can not decrypt page, because "
+ "keys could not be read.\n"
+ );
+ return false;
+
+ }
+
space_id = fsp_header_get_space_id(page);
flags = fsp_header_get_flags(page);
+
page_size = fsp_flags_get_page_size(flags);
atomic_writes = fsp_flags_get_atomic_writes(flags);
@@ -1125,7 +1143,8 @@ fil_space_create(
const char* name, /*!< in: space name */
ulint id, /*!< in: space id */
ulint flags, /*!< in: tablespace flags */
- ulint purpose)/*!< in: FIL_TABLESPACE, or FIL_LOG if log */
+ ulint purpose,/*!< in: FIL_TABLESPACE, or FIL_LOG if log */
+ fil_space_crypt_t* crypt_data) /*!< in: crypt data */
{
fil_space_t* space;
@@ -1133,6 +1152,21 @@ fil_space_create(
ut_a(fil_system);
+ if (fsp_flags_is_page_encrypted(flags)) {
+ if (!HasCryptoKey(fsp_flags_get_page_encryption_key(flags))) {
+ /* by returning here it should be avoided that
+ * the server crashes, if someone tries to access an
+ * encrypted table and the encryption key is not available.
+ * The the table is treaded as non-existent.
+ */
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Tablespace '%s' can not be opened, because "
+ " encryption key can not be found (space id: %lu, key %lu)\n"
+ , name, (ulong) id, fsp_flags_get_page_encryption_key(flags));
+ return (FALSE);
+ }
+ }
+
/* Look for a matching tablespace and if found free it. */
do {
mutex_enter(&fil_system->mutex);
@@ -1219,6 +1253,8 @@ fil_space_create(
UT_LIST_ADD_LAST(space_list, fil_system->space_list, space);
+ space->crypt_data = crypt_data;
+
mutex_exit(&fil_system->mutex);
return(TRUE);
@@ -1353,6 +1389,8 @@ fil_space_free(
rw_lock_free(&(space->latch));
+ fil_space_destroy_crypt_data(&(space->crypt_data));
+
mem_free(space->name);
mem_free(space);
@@ -1586,6 +1624,8 @@ fil_init(
UT_LIST_INIT(fil_system->LRU);
fil_system->max_n_open = max_n_open;
+
+ fil_space_crypt_init();
}
/*******************************************************************//**
@@ -1787,10 +1827,11 @@ fil_write_lsn_and_arch_no_to_file(
err = fil_read(TRUE, space, 0, sum_of_sizes, 0,
UNIV_PAGE_SIZE, buf, NULL, 0);
if (err == DB_SUCCESS) {
- mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn);
+ mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION,
+ lsn);
err = fil_write(TRUE, space, 0, sum_of_sizes, 0,
- UNIV_PAGE_SIZE, buf, NULL, 0);
+ UNIV_PAGE_SIZE, buf, NULL, 0, lsn);
}
mem_free(buf1);
@@ -1869,6 +1910,7 @@ fil_check_first_page(
{
ulint space_id;
ulint flags;
+ ulint page_is_encrypted;
if (srv_force_recovery >= SRV_FORCE_IGNORE_CORRUPT) {
return(NULL);
@@ -1876,12 +1918,23 @@ fil_check_first_page(
space_id = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_ID + page);
flags = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page);
-
- if (UNIV_PAGE_SIZE != fsp_flags_get_page_size(flags)) {
- fprintf(stderr, "InnoDB: Error: Current page size %lu != page size on page %lu\n",
- UNIV_PAGE_SIZE, fsp_flags_get_page_size(flags));
-
- return("innodb-page-size mismatch");
+ /* Note: the 1st page is usually not encrypted. If the Key Provider
+ or the encryption key is not available, the
+ check for reading the first page should intentionally fail
+ with "can not decrypt" message. */
+ page_is_encrypted = fil_page_encryption_status(page);
+ if ((page_is_encrypted == PAGE_ENCRYPTION_KEY_MISSING) && page_is_encrypted) {
+ page_is_encrypted = 1;
+ } else {
+ page_is_encrypted = 0;
+ if (UNIV_PAGE_SIZE != fsp_flags_get_page_size(flags)) {
+ fprintf(stderr,
+ "InnoDB: Error: Current page size %lu != "
+ " page size on page %lu\n",
+ UNIV_PAGE_SIZE, fsp_flags_get_page_size(flags));
+
+ return("innodb-page-size mismatch");
+ }
}
if (!space_id && !flags) {
@@ -1897,9 +1950,17 @@ fil_check_first_page(
}
}
- if (buf_page_is_corrupted(
+ if (!page_is_encrypted && buf_page_is_corrupted(
false, page, fsp_flags_get_zip_size(flags))) {
return("checksum mismatch");
+ } else {
+ if (page_is_encrypted) {
+ /* this error message is interpreted by the calling method, which is
+ * executed if the server starts in recovery mode.
+ */
+ return(MSG_CANNOT_DECRYPT);
+
+ }
}
if (page_get_space_id(page) == space_id
@@ -1935,8 +1996,9 @@ fil_read_first_page(
lsn values in data files */
lsn_t* max_flushed_lsn, /*!< out: max of flushed
lsn values in data files */
- ulint orig_space_id) /*!< in: original file space
+ ulint orig_space_id, /*!< in: original file space
id */
+ fil_space_crypt_t** crypt_data) /*< out: crypt data */
{
byte* buf;
byte* page;
@@ -1974,7 +2036,16 @@ fil_read_first_page(
check_msg = fil_check_first_page(page);
}
- flushed_lsn = mach_read_from_8(page + FIL_PAGE_FILE_FLUSH_LSN);
+ flushed_lsn = mach_read_from_8(page +
+ FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
+
+ if (crypt_data) {
+ ulint space = fsp_header_get_space_id(page);
+ ulint offset =
+ fsp_header_get_crypt_offset(
+ fsp_flags_get_zip_size(*flags), NULL);
+ *crypt_data = fil_space_read_crypt_data(space, page, offset);
+ }
ut_free(buf);
@@ -2459,6 +2530,9 @@ fil_check_pending_operations(
*space = 0;
+ /* Wait for crypt threads to stop accessing space */
+ fil_space_crypt_close_tablespace(id);
+
mutex_enter(&fil_system->mutex);
fil_space_t* sp = fil_space_get_by_id(id);
if (sp) {
@@ -3438,7 +3512,8 @@ fil_create_new_single_table_tablespace(
}
}
- success = fil_space_create(tablename, space_id, flags, FIL_TABLESPACE);
+ success = fil_space_create(tablename, space_id, flags, FIL_TABLESPACE,
+ fil_space_create_crypt_data());
if (!success || !fil_node_create(path, size, space_id, FALSE)) {
err = DB_ERROR;
goto error_exit_1;
@@ -3566,6 +3641,7 @@ fil_open_single_table_tablespace(
ulint tablespaces_found = 0;
ulint valid_tablespaces_found = 0;
ulint atomic_writes = 0;
+ fil_space_crypt_t* crypt_data = NULL;
#ifdef UNIV_SYNC_DEBUG
ut_ad(!fix_dict || rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
@@ -3667,7 +3743,7 @@ fil_open_single_table_tablespace(
#ifdef UNIV_LOG_ARCHIVE
&space_arch_log_no, &space_arch_log_no,
#endif /* UNIV_LOG_ARCHIVE */
- &def.lsn, &def.lsn, id);
+ &def.lsn, &def.lsn, id, &def.crypt_data);
def.valid = !def.check_msg;
/* Validate this single-table-tablespace with SYS_TABLES,
@@ -3692,7 +3768,7 @@ fil_open_single_table_tablespace(
#ifdef UNIV_LOG_ARCHIVE
&remote.arch_log_no, &remote.arch_log_no,
#endif /* UNIV_LOG_ARCHIVE */
- &remote.lsn, &remote.lsn, id);
+ &remote.lsn, &remote.lsn, id, &remote.crypt_data);
remote.valid = !remote.check_msg;
/* Validate this single-table-tablespace with SYS_TABLES,
@@ -3718,7 +3794,7 @@ fil_open_single_table_tablespace(
#ifdef UNIV_LOG_ARCHIVE
&dict.arch_log_no, &dict.arch_log_no,
#endif /* UNIV_LOG_ARCHIVE */
- &dict.lsn, &dict.lsn, id);
+ &dict.lsn, &dict.lsn, id, &dict.crypt_data);
dict.valid = !dict.check_msg;
/* Validate this single-table-tablespace with SYS_TABLES,
@@ -3871,9 +3947,17 @@ fil_open_single_table_tablespace(
}
skip_validate:
+ if (remote.success)
+ crypt_data = remote.crypt_data;
+ else if (dict.success)
+ crypt_data = dict.crypt_data;
+ else if (def.success)
+ crypt_data = def.crypt_data;
+
if (err != DB_SUCCESS) {
; // Don't load the tablespace into the cache
- } else if (!fil_space_create(tablename, id, flags, FIL_TABLESPACE)) {
+ } else if (!fil_space_create(tablename, id, flags, FIL_TABLESPACE,
+ crypt_data)) {
err = DB_ERROR;
} else {
/* We do not measure the size of the file, that is why
@@ -3893,15 +3977,25 @@ cleanup_and_exit:
if (remote.filepath) {
mem_free(remote.filepath);
}
+ if (remote.crypt_data && remote.crypt_data != crypt_data) {
+ fil_space_destroy_crypt_data(&remote.crypt_data);
+ }
if (dict.success) {
os_file_close(dict.file);
}
if (dict.filepath) {
mem_free(dict.filepath);
}
+ if (dict.crypt_data && dict.crypt_data != crypt_data) {
+ fil_space_destroy_crypt_data(&dict.crypt_data);
+ }
if (def.success) {
os_file_close(def.file);
}
+ if (def.crypt_data && def.crypt_data != crypt_data) {
+ fil_space_destroy_crypt_data(&def.crypt_data);
+ }
+
mem_free(def.filepath);
return(err);
@@ -4118,16 +4212,25 @@ fil_validate_single_table_tablespace(
check_first_page:
fsp->success = TRUE;
+ fsp->encryption_error = 0;
if (const char* check_msg = fil_read_first_page(
fsp->file, FALSE, &fsp->flags, &fsp->id,
#ifdef UNIV_LOG_ARCHIVE
&fsp->arch_log_no, &fsp->arch_log_no,
#endif /* UNIV_LOG_ARCHIVE */
- &fsp->lsn, &fsp->lsn, ULINT_UNDEFINED)) {
+ &fsp->lsn, &fsp->lsn, ULINT_UNDEFINED, &fsp->crypt_data)) {
ib_logf(IB_LOG_LEVEL_ERROR,
"%s in tablespace %s (table %s)",
check_msg, fsp->filepath, tablename);
fsp->success = FALSE;
+ if (strncmp(check_msg, MSG_CANNOT_DECRYPT, strlen(check_msg))==0) {
+ /* by returning here, it should be avoided, that the server crashes,
+ * if started in recovery mode and can not decrypt tables, if
+ * the key file can not be read.
+ */
+ fsp->encryption_error = 1;
+ return;
+ }
}
if (!fsp->success) {
@@ -4281,6 +4384,14 @@ fil_load_single_table_tablespace(
}
if (!def.success && !remote.success) {
+
+ if (def.encryption_error || remote.encryption_error) {
+ fprintf(stderr,
+ "InnoDB: Error: could not open single-table"
+ " tablespace file %s. Encryption error!\n", def.filepath);
+ return;
+ }
+
/* The following call prints an error message */
os_file_get_last_error(true);
fprintf(stderr,
@@ -4464,7 +4575,8 @@ will_not_choose:
mutex_exit(&fil_system->mutex);
#endif /* UNIV_HOTBACKUP */
ibool file_space_create_success = fil_space_create(
- tablename, fsp->id, fsp->flags, FIL_TABLESPACE);
+ tablename, fsp->id, fsp->flags, FIL_TABLESPACE,
+ fsp->crypt_data);
if (!file_space_create_success) {
if (srv_force_recovery > 0) {
@@ -5099,7 +5211,7 @@ retry:
success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC,
node->name, node->handle, buf,
offset, page_size * n_pages,
- node, NULL, 0, FALSE, 0);
+ node, NULL, 0, FALSE, 0, 0, 0, 0);
#endif /* UNIV_HOTBACKUP */
if (success) {
os_has_said_disk_full = FALSE;
@@ -5475,11 +5587,12 @@ fil_io(
appropriately aligned */
void* message, /*!< in: message for aio handler if non-sync
aio used, else ignored */
- ulint* write_size) /*!< in/out: Actual write size initialized
+ ulint* write_size, /*!< in/out: Actual write size initialized
after fist successfull trim
operation for this page and if
initialized we do not trim again if
actual page size does not decrease. */
+ lsn_t lsn) /* lsn of the newest modification */
{
ulint mode;
fil_space_t* space;
@@ -5491,6 +5604,8 @@ fil_io(
ibool ignore_nonexistent_pages;
ibool page_compressed = FALSE;
ulint page_compression_level = 0;
+ ibool page_encrypted;
+ ulint page_encryption_key;
is_log = type & OS_FILE_LOG;
type = type & ~OS_FILE_LOG;
@@ -5676,6 +5791,8 @@ fil_io(
page_compressed = fsp_flags_is_page_compressed(space->flags);
page_compression_level = fsp_flags_get_page_compression_level(space->flags);
+ page_encrypted = fsp_flags_is_page_encrypted(space->flags);
+ page_encryption_key = fsp_flags_get_page_encryption_key(space->flags);
#ifdef UNIV_HOTBACKUP
/* In mysqlbackup do normal i/o, not aio */
@@ -5688,9 +5805,23 @@ fil_io(
}
#else
/* Queue the aio request */
- ret = os_aio(type, mode | wake_later, node->name, node->handle, buf,
- offset, len, node, message, write_size,
- page_compressed, page_compression_level);
+ ret = os_aio(
+ type,
+ mode | wake_later,
+ node->name,
+ node->handle,
+ buf,
+ offset,
+ len,
+ node,
+ message,
+ write_size,
+ page_compressed,
+ page_compression_level,
+ page_encrypted,
+ page_encryption_key,
+ lsn);
+
#endif /* UNIV_HOTBACKUP */
@@ -6118,6 +6249,8 @@ void
fil_close(void)
/*===========*/
{
+ fil_space_crypt_cleanup();
+
#ifndef UNIV_HOTBACKUP
/* The mutex should already have been freed. */
ut_ad(fil_system->mutex.magic_n == 0);
@@ -6167,6 +6300,8 @@ struct fil_iterator_t {
ulint n_io_buffers; /*!< Number of pages to use
for IO */
byte* io_buffer; /*!< Buffer to use for IO */
+ fil_space_crypt_t *crypt_data; /*!< Crypt data (if encrypted) */
+ byte* crypt_io_buffer; /*!< IO buffer when encrypted */
};
/********************************************************************//**
@@ -6229,7 +6364,12 @@ fil_iterate(
ut_ad(n_bytes > 0);
ut_ad(!(n_bytes % iter.page_size));
- if (!os_file_read(iter.file, io_buffer, offset,
+ byte* readptr = io_buffer;
+ if (iter.crypt_data != NULL) {
+ readptr = iter.crypt_io_buffer;
+ }
+
+ if (!os_file_read(iter.file, readptr, offset,
(ulint) n_bytes,
fil_space_is_page_compressed(space_id))) {
@@ -6244,6 +6384,18 @@ fil_iterate(
for (ulint i = 0; i < n_pages_read; ++i) {
+ if (iter.crypt_data != NULL) {
+ bool decrypted = fil_space_decrypt(
+ iter.crypt_data,
+ readptr + i * iter.page_size, // src
+ iter.page_size,
+ io_buffer + i * iter.page_size); // dst
+ if (decrypted) {
+ /* write back unencrypted page */
+ updated = true;
+ }
+ }
+
buf_block_set_file_page(block, space_id, page_no++);
dberr_t err;
@@ -6386,6 +6538,13 @@ fil_tablespace_iterate(
iter.n_io_buffers = n_io_buffers;
iter.page_size = callback.get_page_size();
+ ulint crypt_data_offset = fsp_header_get_crypt_offset(
+ callback.get_zip_size(), 0);
+
+ /* read (optional) crypt data */
+ iter.crypt_data = fil_space_read_crypt_data(
+ 0, page, crypt_data_offset);
+
/* Compressed pages can't be optimised for block IO for now.
We do the IMPORT page by page. */
@@ -6394,6 +6553,14 @@ fil_tablespace_iterate(
ut_a(iter.page_size == callback.get_zip_size());
}
+ /** If tablespace is encrypted, it needs extra buffers */
+ if (iter.crypt_data != NULL) {
+ /* decrease io buffers so that memory
+ * consumption doesnt double
+ * note: the +1 is to avoid n_io_buffers getting down to 0 */
+ iter.n_io_buffers = (iter.n_io_buffers + 1) / 2;
+ }
+
/** Add an extra page for compressed page scratch area. */
void* io_buffer = mem_alloc(
@@ -6402,9 +6569,45 @@ fil_tablespace_iterate(
iter.io_buffer = static_cast<byte*>(
ut_align(io_buffer, UNIV_PAGE_SIZE));
+ void* crypt_io_buffer = NULL;
+ if (iter.crypt_data != NULL) {
+ crypt_io_buffer = mem_alloc(
+ iter.n_io_buffers * UNIV_PAGE_SIZE);
+ iter.crypt_io_buffer = static_cast<byte*>(
+ crypt_io_buffer);
+ }
+
err = fil_iterate(iter, &block, callback);
mem_free(io_buffer);
+
+ if (iter.crypt_data != NULL) {
+ /* clear crypt data from page 0 and write it back */
+ os_file_read(file, page, 0, UNIV_PAGE_SIZE, 0);
+ fil_space_clear_crypt_data(page, crypt_data_offset);
+ lsn_t lsn = mach_read_from_8(page + FIL_PAGE_LSN);
+ if (callback.get_zip_size() == 0) {
+ buf_flush_init_for_writing(
+ page, 0, lsn);
+ } else {
+ buf_flush_update_zip_checksum(
+ page, callback.get_zip_size(), lsn);
+ }
+
+ if (!os_file_write(
+ iter.filepath, iter.file, page,
+ 0, iter.page_size)) {
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "os_file_write() failed");
+
+ return(DB_IO_ERROR);
+ }
+
+ mem_free(crypt_io_buffer);
+ iter.crypt_io_buffer = NULL;
+ fil_space_destroy_crypt_data(&iter.crypt_data);
+ }
}
if (err == DB_SUCCESS) {
@@ -6569,6 +6772,16 @@ fil_space_name(
}
/*******************************************************************//**
+Return space flags */
+ulint
+fil_space_flags(
+/*===========*/
+ fil_space_t* space) /*!< in: space */
+{
+ return (space->flags);
+}
+
+/*******************************************************************//**
Return page type name */
const char*
fil_get_page_type_name(
@@ -6621,3 +6834,137 @@ fil_node_get_block_size(
{
return (node->file_block_size);
}
+
+/******************************************************************
+Get id of first tablespace or ULINT_UNDEFINED if none */
+UNIV_INTERN
+ulint
+fil_get_first_space()
+{
+ ulint out_id = ULINT_UNDEFINED;
+ fil_space_t* space;
+
+ mutex_enter(&fil_system->mutex);
+
+ space = UT_LIST_GET_FIRST(fil_system->space_list);
+ if (space != NULL) {
+ do
+ {
+ if (!space->stop_new_ops) {
+ out_id = space->id;
+ break;
+ }
+ space = UT_LIST_GET_NEXT(space_list, space);
+ } while (space != NULL);
+ }
+
+ mutex_exit(&fil_system->mutex);
+
+ return out_id;
+}
+
+/******************************************************************
+Get id of next tablespace or ULINT_UNDEFINED if none */
+UNIV_INTERN
+ulint
+fil_get_next_space(ulint id)
+{
+ bool found;
+ fil_space_t* space;
+ ulint out_id = ULINT_UNDEFINED;
+
+ mutex_enter(&fil_system->mutex);
+
+ space = fil_space_get_by_id(id);
+ if (space == NULL) {
+ /* we didn't find it...search for space with space->id > id */
+ found = false;
+ space = UT_LIST_GET_FIRST(fil_system->space_list);
+ } else {
+ /* we found it, take next available space */
+ found = true;
+ }
+
+ while ((space = UT_LIST_GET_NEXT(space_list, space)) != NULL) {
+
+ if (!found && space->id <= id)
+ continue;
+
+ if (!space->stop_new_ops) {
+ /* inc reference to prevent drop */
+ out_id = space->id;
+ break;
+ }
+ }
+
+ mutex_exit(&fil_system->mutex);
+
+ return out_id;
+}
+
+/******************************************************************
+Get crypt data for a tablespace */
+UNIV_INTERN
+fil_space_crypt_t*
+fil_space_get_crypt_data(
+/*==================*/
+ ulint id) /*!< in: space id */
+{
+ fil_space_t* space;
+ fil_space_crypt_t* crypt_data = NULL;
+
+ ut_ad(fil_system);
+
+ mutex_enter(&fil_system->mutex);
+
+ space = fil_space_get_by_id(id);
+ if (space != NULL) {
+ crypt_data = space->crypt_data;
+ }
+
+ mutex_exit(&fil_system->mutex);
+
+ return(crypt_data);
+}
+
+/******************************************************************
+Get crypt data for a tablespace */
+UNIV_INTERN
+void
+fil_space_set_crypt_data(
+/*==================*/
+ ulint id, /*!< in: space id */
+ fil_space_crypt_t* crypt_data) /*!< in: crypt data */
+{
+ fil_space_t* space;
+ fil_space_crypt_t* old_crypt_data = NULL;
+
+ ut_ad(fil_system);
+
+ mutex_enter(&fil_system->mutex);
+
+ space = fil_space_get_by_id(id);
+ if (space != NULL) {
+
+ if (space->crypt_data != NULL) {
+ ut_a(!fil_space_crypt_compare(crypt_data,
+ space->crypt_data));
+ old_crypt_data = space->crypt_data;
+ }
+
+ space->crypt_data = crypt_data;
+ } else {
+ /* there is a small risk that tablespace has been deleted */
+ old_crypt_data = crypt_data;
+ }
+
+ mutex_exit(&fil_system->mutex);
+
+ if (old_crypt_data != NULL) {
+ /* first assign space->crypt_data
+ * then destroy old_crypt_data when no new references to
+ * it can be created.
+ */
+ fil_space_destroy_crypt_data(&old_crypt_data);
+ }
+}
diff --git a/storage/innobase/fil/fil0pagecompress.cc b/storage/innobase/fil/fil0pagecompress.cc
index 77b9da8b060..29b9580f8e2 100644
--- a/storage/innobase/fil/fil0pagecompress.cc
+++ b/storage/innobase/fil/fil0pagecompress.cc
@@ -269,15 +269,26 @@ fil_compress_page(
int level = 0;
ulint header_len = FIL_PAGE_DATA + FIL_PAGE_COMPRESSED_SIZE;
ulint write_size=0;
- ulint comp_method = innodb_compression_algorithm; /* Cache to avoid
- change during
- function execution */
+ /* Cache to avoid change during function execution */
+ ulint comp_method = innodb_compression_algorithm;
+ ulint orig_page_type;
ut_ad(buf);
ut_ad(out_buf);
ut_ad(len);
ut_ad(out_len);
+ /* read original page type */
+ orig_page_type = mach_read_from_2(buf + FIL_PAGE_TYPE);
+
+ /* Let's not compress file space header or
+ extent descriptor */
+ if ((orig_page_type == FIL_PAGE_TYPE_FSP_HDR)
+ || (orig_page_type == FIL_PAGE_TYPE_XDES) ) {
+ *out_len = len;
+ return (buf);
+ }
+
level = compression_level;
ut_ad(fil_space_is_page_compressed(space_id));
@@ -422,7 +433,7 @@ fil_compress_page(
/* Set up the correct page type */
mach_write_to_2(out_buf+FIL_PAGE_TYPE, FIL_PAGE_PAGE_COMPRESSED);
/* Set up the flush lsn to be compression algorithm */
- mach_write_to_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN, comp_method);
+ mach_write_to_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, comp_method);
/* Set up the actual payload lenght */
mach_write_to_2(out_buf+FIL_PAGE_DATA, write_size);
@@ -431,7 +442,7 @@ fil_compress_page(
ut_ad(fil_page_is_compressed(out_buf));
ut_ad(mach_read_from_4(out_buf+FIL_PAGE_SPACE_OR_CHKSUM) == BUF_NO_CHECKSUM_MAGIC);
ut_ad(mach_read_from_2(out_buf+FIL_PAGE_DATA) == write_size);
- ut_ad(mach_read_from_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN) == (ulint)comp_method);
+ ut_ad(mach_read_from_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION) == (ulint)comp_method);
/* Verify that page can be decompressed */
{
@@ -555,7 +566,7 @@ fil_decompress_page(
}
/* Get compression algorithm */
- compression_alg = mach_read_from_8(buf+FIL_PAGE_FILE_FLUSH_LSN);
+ compression_alg = mach_read_from_8(buf+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
/* Get the actual size of compressed page */
actual_size = mach_read_from_2(buf+FIL_PAGE_DATA);
@@ -726,5 +737,3 @@ fil_decompress_page(
ut_free(in_buf);
}
}
-
-
diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc
index d1bb22ed7a9..ee1f2fd9510 100644
--- a/storage/innobase/fsp/fsp0fsp.cc
+++ b/storage/innobase/fsp/fsp0fsp.cc
@@ -766,7 +766,12 @@ fsp_header_init(
} else {
fsp_fill_free_list(TRUE, space, header, mtr);
}
+
+ ulint maxsize = 0;
+ ulint offset = fsp_header_get_crypt_offset(zip_size, &maxsize);
+ fil_space_write_crypt_data(space, page, offset, maxsize, mtr);
}
+
#endif /* !UNIV_HOTBACKUP */
/**********************************************************************//**
@@ -4121,3 +4126,61 @@ fsp_print(
fprintf(stderr, "NUMBER of file segments: %lu\n", (ulong) n_segs);
}
#endif /* !UNIV_HOTBACKUP */
+
+/**********************************************************************//**
+Compute offset after xdes where crypt data can be stored
+@return offset */
+ulint
+fsp_header_get_crypt_offset(
+/*========================*/
+ ulint zip_size, /*!< in: zip_size */
+ ulint* max_size) /*!< out: free space available for crypt data */
+{
+ ulint pageno = 0;
+ /* compute first page_no that will have xdes stored on page != 0*/
+ for (ulint i = 0;
+ (pageno = xdes_calc_descriptor_page(zip_size, i)) == 0; )
+ i++;
+
+ /* use pageno prior to this...i.e last page on page 0 */
+ ut_ad(pageno > 0);
+ pageno--;
+
+ ulint iv_offset = XDES_ARR_OFFSET +
+ XDES_SIZE * (1 + xdes_calc_descriptor_index(zip_size, pageno));
+
+ if (max_size != NULL) {
+ /* return how much free space there is available on page */
+ *max_size = (zip_size ? zip_size : UNIV_PAGE_SIZE) -
+ (FSP_HEADER_OFFSET + iv_offset + FIL_PAGE_DATA_END);
+ }
+
+ return FSP_HEADER_OFFSET + iv_offset;
+}
+
+/**********************************************************************//**
+Checks if a single page is free.
+@return true if free */
+UNIV_INTERN
+bool
+fsp_page_is_free_func(
+/*==============*/
+ ulint space, /*!< in: space id */
+ ulint page_no, /*!< in: page offset */
+ mtr_t* mtr, /*!< in/out: mini-transaction */
+ const char *file,
+ ulint line)
+{
+ ulint flags;
+
+ ut_ad(mtr);
+
+ mtr_x_lock_func(fil_space_get_latch(space, &flags), file, line, mtr);
+ ulint zip_size = fsp_flags_get_zip_size(flags);
+
+ xdes_t* descr = xdes_get_descriptor(space, zip_size, page_no, mtr);
+ ut_a(descr);
+
+ return xdes_mtr_get_bit(
+ descr, XDES_FREE_BIT, page_no % FSP_EXTENT_SIZE, mtr);
+}
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index f719594fa98..df5867ce43b 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -235,6 +235,20 @@ static char* internal_innobase_data_file_path = NULL;
static char* innodb_version_str = (char*) INNODB_VERSION_STR;
+extern my_bool srv_encrypt_tables;
+extern uint srv_n_fil_crypt_threads;
+extern uint srv_fil_crypt_rotate_key_age;
+extern uint srv_n_fil_crypt_iops;
+
+extern my_bool srv_immediate_scrub_data_uncompressed;
+extern my_bool srv_background_scrub_data_uncompressed;
+extern my_bool srv_background_scrub_data_compressed;
+extern uint srv_background_scrub_data_interval;
+extern uint srv_background_scrub_data_check_interval;
+#ifdef UNIV_DEBUG
+extern my_bool srv_scrub_force_testing;
+#endif
+
/** Possible values for system variable "innodb_stats_method". The values
are defined the same as its corresponding MyISAM system variable
"myisam_stats_method"(see "myisam_stats_method_names"), for better usability */
@@ -551,6 +565,12 @@ ha_create_table_option innodb_table_option_list[]=
HA_TOPTION_NUMBER("PAGE_COMPRESSION_LEVEL", page_compression_level, ULINT_UNDEFINED, 0, 9, 1),
/* With this option user can enable atomic writes feature for this table */
HA_TOPTION_ENUM("ATOMIC_WRITES", atomic_writes, "DEFAULT,ON,OFF", 0),
+ /* With this option the user can enable page encryption for the table */
+ HA_TOPTION_BOOL("PAGE_ENCRYPTION", page_encryption, 0),
+
+ /* With this option the user defines the key identifier using for the encryption */
+ HA_TOPTION_NUMBER("PAGE_ENCRYPTION_KEY", page_encryption_key, ULINT_UNDEFINED, 1, 255, 1),
+
HA_TOPTION_END
};
@@ -792,6 +812,14 @@ static SHOW_VAR innodb_status_variables[]= {
(char*) &export_vars.innodb_page_compressed_trim_op_saved, SHOW_LONGLONG},
{"num_pages_page_decompressed",
(char*) &export_vars.innodb_pages_page_decompressed, SHOW_LONGLONG},
+ {"num_pages_page_compression_error",
+ (char*) &export_vars.innodb_pages_page_compression_error, SHOW_LONGLONG},
+ {"num_pages_page_encrypted",
+ (char*) &export_vars.innodb_pages_page_encrypted, SHOW_LONGLONG},
+ {"num_pages_page_decrypted",
+ (char*) &export_vars.innodb_pages_page_decrypted, SHOW_LONGLONG},
+ {"num_pages_page_encryption_error",
+ (char*) &export_vars.innodb_pages_page_encryption_error, SHOW_LONGLONG},
{"have_lz4",
(char*) &innodb_have_lz4, SHOW_BOOL},
{"have_lzo",
@@ -824,6 +852,42 @@ static SHOW_VAR innodb_status_variables[]= {
{"secondary_index_triggered_cluster_reads_avoided",
(char*) &export_vars.innodb_sec_rec_cluster_reads_avoided, SHOW_LONG},
+ /* Encryption */
+ {"encryption_rotation_pages_read_from_cache",
+ (char*) &export_vars.innodb_encryption_rotation_pages_read_from_cache,
+ SHOW_LONG},
+ {"encryption_rotation_pages_read_from_disk",
+ (char*) &export_vars.innodb_encryption_rotation_pages_read_from_disk,
+ SHOW_LONG},
+ {"encryption_rotation_pages_modified",
+ (char*) &export_vars.innodb_encryption_rotation_pages_modified,
+ SHOW_LONG},
+ {"encryption_rotation_pages_flushed",
+ (char*) &export_vars.innodb_encryption_rotation_pages_flushed,
+ SHOW_LONG},
+ {"encryption_rotation_estimated_iops",
+ (char*) &export_vars.innodb_encryption_rotation_estimated_iops,
+ SHOW_LONG},
+
+ /* scrubing */
+ {"scrub_background_page_reorganizations",
+ (char*) &export_vars.innodb_scrub_page_reorganizations,
+ SHOW_LONG},
+ {"scrub_background_page_splits",
+ (char*) &export_vars.innodb_scrub_page_splits,
+ SHOW_LONG},
+ {"scrub_background_page_split_failures_underflow",
+ (char*) &export_vars.innodb_scrub_page_split_failures_underflow,
+ SHOW_LONG},
+ {"scrub_background_page_split_failures_out_of_filespace",
+ (char*) &export_vars.innodb_scrub_page_split_failures_out_of_filespace,
+ SHOW_LONG},
+ {"scrub_background_page_split_failures_missing_index",
+ (char*) &export_vars.innodb_scrub_page_split_failures_missing_index,
+ SHOW_LONG},
+ {"scrub_background_page_split_failures_unknown",
+ (char*) &export_vars.innodb_scrub_page_split_failures_unknown,
+ SHOW_LONG},
{NullS, NullS, SHOW_LONG}
};
@@ -10957,6 +11021,8 @@ innobase_table_flags(
modified by another thread while the table is being created. */
const ulint default_compression_level = page_zip_level;
+ const ulint default_encryption_key = 1;
+
*flags = 0;
*flags2 = 0;
@@ -11158,7 +11224,10 @@ index_bad:
options->page_compressed,
(ulint)options->page_compression_level == ULINT_UNDEFINED ?
default_compression_level : options->page_compression_level,
- options->atomic_writes);
+ options->atomic_writes,
+ options->page_encryption,
+ (ulint)options->page_encryption_key == ULINT_UNDEFINED ?
+ default_encryption_key : options->page_encryption_key);
if (create_info->options & HA_LEX_CREATE_TMP_TABLE) {
*flags2 |= DICT_TF2_TEMPORARY;
@@ -11196,6 +11265,24 @@ ha_innobase::check_table_options(
ha_table_option_struct *options= table->s->option_struct;
atomic_writes_t awrites = (atomic_writes_t)options->atomic_writes;
+ if (options->page_encryption) {
+ if (srv_encrypt_tables) {
+ push_warning(
+ thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_WRONG_CREATE_OPTION,
+ "InnoDB: PAGE_ENCRYPTION not available if innodb_encrypt_tables=ON");
+ return "INNODB_ENCRYPT_TABLES";
+ }
+ if (!use_tablespace) {
+ push_warning(
+ thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_WRONG_CREATE_OPTION,
+ "InnoDB: PAGE_ENCRYPTION requires"
+ " innodb_file_per_table.");
+ return "PAGE_ENCRYPTION";
+ }
+ }
+
/* Check page compression requirements */
if (options->page_compressed) {
@@ -11268,6 +11355,33 @@ ha_innobase::check_table_options(
}
}
+ if ((ulint)options->page_encryption_key != ULINT_UNDEFINED) {
+ if (options->page_encryption == false) {
+ /* ignore this to allow alter table without changing page_encryption_key ...*/
+ }
+
+ if (options->page_encryption_key < 1 || options->page_encryption_key > 255) {
+ push_warning_printf(
+ thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_WRONG_CREATE_OPTION,
+ "InnoDB: invalid PAGE_ENCRYPTION_KEY = %lu."
+ " Valid values are [1..255]",
+ options->page_encryption_key);
+ return "PAGE_ENCRYPTION_KEY";
+ }
+
+ if (!HasCryptoKey(options->page_encryption_key)) {
+ push_warning_printf(
+ thd, Sql_condition::WARN_LEVEL_WARN,
+ HA_WRONG_CREATE_OPTION,
+ "InnoDB: PAGE_ENCRYPTION_KEY encryption key %lu not available",
+ options->page_encryption_key
+ );
+ return "PAGE_ENCRYPTION_KEY";
+
+ }
+ }
+
/* Check atomic writes requirements */
if (awrites == ATOMIC_WRITES_ON ||
(awrites == ATOMIC_WRITES_DEFAULT && srv_use_atomic_writes)) {
@@ -17636,6 +17750,57 @@ innodb_status_output_update(
os_event_set(srv_monitor_event);
}
+/******************************************************************
+Update the system variable innodb_encryption_threads */
+static
+void
+innodb_encryption_threads_update(
+/*=========================*/
+ THD* thd, /*!< in: thread handle */
+ struct st_mysql_sys_var* var, /*!< in: pointer to
+ system variable */
+ void* var_ptr,/*!< out: where the
+ formal string goes */
+ const void* save) /*!< in: immediate result
+ from check function */
+{
+ fil_crypt_set_thread_cnt(*static_cast<const uint*>(save));
+}
+
+/******************************************************************
+Update the system variable innodb_encryption_rotate_key_age */
+static
+void
+innodb_encryption_rotate_key_age_update(
+/*=========================*/
+ THD* thd, /*!< in: thread handle */
+ struct st_mysql_sys_var* var, /*!< in: pointer to
+ system variable */
+ void* var_ptr,/*!< out: where the
+ formal string goes */
+ const void* save) /*!< in: immediate result
+ from check function */
+{
+ fil_crypt_set_rotate_key_age(*static_cast<const uint*>(save));
+}
+
+/******************************************************************
+Update the system variable innodb_encryption_rotation_iops */
+static
+void
+innodb_encryption_rotation_iops_update(
+/*=========================*/
+ THD* thd, /*!< in: thread handle */
+ struct st_mysql_sys_var* var, /*!< in: pointer to
+ system variable */
+ void* var_ptr,/*!< out: where the
+ formal string goes */
+ const void* save) /*!< in: immediate result
+ from check function */
+{
+ fil_crypt_set_rotation_iops(*static_cast<const uint*>(save));
+}
+
static SHOW_VAR innodb_status_variables_export[]= {
{"Innodb", (char*) &show_innodb_vars, SHOW_FUNC},
{NullS, NullS, SHOW_LONG}
@@ -18928,6 +19093,108 @@ static MYSQL_SYSVAR_ULONG(fatal_semaphore_wait_threshold, srv_fatal_semaphore_wa
UINT_MAX32, /* Maximum setting */
0);
+static MYSQL_SYSVAR_BOOL(encrypt_tables, srv_encrypt_tables, 0,
+ "Encrypt all tables in the storage engine",
+ 0, 0, 0);
+
+static MYSQL_SYSVAR_UINT(encryption_threads, srv_n_fil_crypt_threads,
+ PLUGIN_VAR_RQCMDARG,
+ "No of threads performing background key rotation and "
+ "scrubbing",
+ NULL,
+ innodb_encryption_threads_update,
+ srv_n_fil_crypt_threads, 0, UINT_MAX32, 0);
+
+static MYSQL_SYSVAR_UINT(encryption_rotate_key_age,
+ srv_fil_crypt_rotate_key_age,
+ PLUGIN_VAR_RQCMDARG,
+ "Rotate any page having a key older than this",
+ NULL,
+ innodb_encryption_rotate_key_age_update,
+ srv_fil_crypt_rotate_key_age, 0, UINT_MAX32, 0);
+
+static MYSQL_SYSVAR_UINT(encryption_rotation_iops, srv_n_fil_crypt_iops,
+ PLUGIN_VAR_RQCMDARG,
+ "Use this many iops for background key rotation",
+ NULL,
+ innodb_encryption_rotation_iops_update,
+ srv_n_fil_crypt_iops, 0, UINT_MAX32, 0);
+
+static MYSQL_SYSVAR_BOOL(scrub_log, srv_scrub_log,
+ PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
+ "Enable redo log scrubbing",
+ 0, 0, 0);
+
+/*
+ If innodb_scrub_log is on, logs will be scrubbed in less than
+ (((innodb_log_file_size * innodb_log_files_in_group) / 512 ) /
+ ((1000 * 86400) / innodb_scrub_log_interval))
+ days.
+ In above formula, the first line calculates the number of log blocks to scrub,
+ and the second line calculates the number of log blocks scrubbed in one day.
+*/
+static MYSQL_SYSVAR_ULONGLONG(scrub_log_interval, innodb_scrub_log_interval,
+ PLUGIN_VAR_OPCMDARG,
+ "Innodb redo log scrubbing interval in ms",
+ NULL, NULL,
+ 2000, /* default */
+ 10, /* min */
+ ULONGLONG_MAX, 0);/* max */
+
+static MYSQL_SYSVAR_BOOL(encrypt_log, srv_encrypt_log,
+ PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
+ "Enable redo log encryption/decryption.",
+ NULL, NULL, FALSE);
+
+static MYSQL_SYSVAR_BOOL(immediate_scrub_data_uncompressed,
+ srv_immediate_scrub_data_uncompressed,
+ 0,
+ "Enable scrubbing of data",
+ NULL, NULL, FALSE);
+
+static MYSQL_SYSVAR_BOOL(background_scrub_data_uncompressed,
+ srv_background_scrub_data_uncompressed,
+ 0,
+ "Enable scrubbing of uncompressed data by "
+ "background threads (same as encryption_threads)",
+ NULL, NULL, FALSE);
+
+static MYSQL_SYSVAR_BOOL(background_scrub_data_compressed,
+ srv_background_scrub_data_compressed,
+ 0,
+ "Enable scrubbing of compressed data by "
+ "background threads (same as encryption_threads)",
+ NULL, NULL, FALSE);
+
+static MYSQL_SYSVAR_UINT(background_scrub_data_check_interval,
+ srv_background_scrub_data_check_interval,
+ 0,
+ "check if spaces needs scrubbing every "
+ "innodb_background_scrub_data_check_interval "
+ "seconds",
+ NULL, NULL,
+ srv_background_scrub_data_check_interval,
+ 1,
+ UINT_MAX32, 0);
+
+static MYSQL_SYSVAR_UINT(background_scrub_data_interval,
+ srv_background_scrub_data_interval,
+ 0,
+ "scrub spaces that were last scrubbed longer than "
+ " innodb_background_scrub_data_interval seconds ago",
+ NULL, NULL,
+ srv_background_scrub_data_interval,
+ 1,
+ UINT_MAX32, 0);
+
+#ifdef UNIV_DEBUG
+static MYSQL_SYSVAR_BOOL(scrub_force_testing,
+ srv_scrub_force_testing,
+ 0,
+ "Perform extra scrubbing to increase test exposure",
+ NULL, NULL, FALSE);
+#endif /* UNIV_DEBUG */
+
static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(additional_mem_pool_size),
MYSQL_SYSVAR(api_trx_level),
@@ -19100,12 +19367,30 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
#endif /* UNIV_DEBUG */
MYSQL_SYSVAR(simulate_comp_failures),
MYSQL_SYSVAR(force_primary_key),
+ MYSQL_SYSVAR(fatal_semaphore_wait_threshold),
+ /* Table page compression feature */
MYSQL_SYSVAR(use_trim),
MYSQL_SYSVAR(compression_algorithm),
MYSQL_SYSVAR(mtflush_threads),
MYSQL_SYSVAR(use_mtflush),
-
- MYSQL_SYSVAR(fatal_semaphore_wait_threshold),
+ /* Encryption feature */
+ MYSQL_SYSVAR(encrypt_tables),
+ MYSQL_SYSVAR(encryption_threads),
+ MYSQL_SYSVAR(encryption_rotate_key_age),
+ MYSQL_SYSVAR(encryption_rotation_iops),
+ MYSQL_SYSVAR(scrub_log),
+ MYSQL_SYSVAR(scrub_log_interval),
+ MYSQL_SYSVAR(encrypt_log),
+
+ /* Scrubing feature */
+ MYSQL_SYSVAR(immediate_scrub_data_uncompressed),
+ MYSQL_SYSVAR(background_scrub_data_uncompressed),
+ MYSQL_SYSVAR(background_scrub_data_compressed),
+ MYSQL_SYSVAR(background_scrub_data_interval),
+ MYSQL_SYSVAR(background_scrub_data_check_interval),
+#ifdef UNIV_DEBUG
+ MYSQL_SYSVAR(scrub_force_testing),
+#endif
NULL
};
@@ -19115,7 +19400,7 @@ maria_declare_plugin(innobase)
&innobase_storage_engine,
innobase_hton_name,
plugin_author,
- "Supports transactions, row-level locking, and foreign keys",
+ "Supports transactions, row-level locking, foreign keys and encryption for tables",
PLUGIN_LICENSE_GPL,
innobase_init, /* Plugin Init */
NULL, /* Plugin Deinit */
@@ -19152,8 +19437,9 @@ i_s_innodb_sys_fields,
i_s_innodb_sys_foreign,
i_s_innodb_sys_foreign_cols,
i_s_innodb_sys_tablespaces,
-i_s_innodb_sys_datafiles
-
+i_s_innodb_sys_datafiles,
+i_s_innodb_tablespaces_encryption,
+i_s_innodb_tablespaces_scrubbing
maria_declare_plugin_end;
/** @brief Initialize the default value of innodb_commit_concurrency.
diff --git a/storage/innobase/handler/ha_innodb.h b/storage/innobase/handler/ha_innodb.h
index 6da31c8ecc6..7807c7ca7e6 100644
--- a/storage/innobase/handler/ha_innodb.h
+++ b/storage/innobase/handler/ha_innodb.h
@@ -57,7 +57,7 @@ typedef struct st_innobase_share {
/** Prebuilt structures in an InnoDB table handle used within MySQL */
struct row_prebuilt_t;
-/** Engine specific table options are definined using this struct */
+/** Engine specific table options are defined using this struct */
struct ha_table_option_struct
{
bool page_compressed; /*!< Table is using page compression
@@ -70,6 +70,8 @@ struct ha_table_option_struct
srv_use_atomic_writes=1.
Atomic writes are not used if
value OFF.*/
+ bool page_encryption; /*!< Flag for an encrypted table */
+ int page_encryption_key; /*!< ID of the encryption key */
};
diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc
index f426c86c7c3..7cc16197f57 100644
--- a/storage/innobase/handler/handler0alter.cc
+++ b/storage/innobase/handler/handler0alter.cc
@@ -277,6 +277,13 @@ ha_innobase::check_if_supported_inplace_alter(
ER_ALTER_OPERATION_NOT_SUPPORTED_REASON);
DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
}
+
+ if (new_options->page_encryption != old_options->page_encryption ||
+ new_options->page_encryption_key != old_options->page_encryption_key) {
+ ha_alter_info->unsupported_reason = innobase_get_err_msg(
+ ER_ALTER_OPERATION_NOT_SUPPORTED_REASON);
+ DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+ }
}
if (ha_alter_info->handler_flags
diff --git a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
index f6b3dbd2d5d..f0202e232b7 100644
--- a/storage/innobase/handler/i_s.cc
+++ b/storage/innobase/handler/i_s.cc
@@ -4465,10 +4465,14 @@ i_s_innodb_stats_fill(
info->pages_written_rate));
if (info->n_page_get_delta) {
- OK(fields[IDX_BUF_STATS_HIT_RATE]->store(
- static_cast<double>(
- 1000 - (1000 * info->page_read_delta
- / info->n_page_get_delta))));
+ if (info->page_read_delta <= info->n_page_get_delta) {
+ OK(fields[IDX_BUF_STATS_HIT_RATE]->store(
+ static_cast<double>(
+ 1000 - (1000 * info->page_read_delta
+ / info->n_page_get_delta))));
+ } else {
+ OK(fields[IDX_BUF_STATS_HIT_RATE]->store(0));
+ }
OK(fields[IDX_BUF_STATS_MADE_YOUNG_PCT]->store(
static_cast<double>(
@@ -8070,3 +8074,583 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_sys_datafiles =
STRUCT_FLD(version_info, INNODB_VERSION_STR),
STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
};
+
+/** TABLESPACES_ENCRYPTION ********************************************/
+/* Fields of the table INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION */
+static ST_FIELD_INFO innodb_tablespaces_encryption_fields_info[] =
+{
+#define TABLESPACES_ENCRYPTION_SPACE 0
+ {STRUCT_FLD(field_name, "SPACE"),
+ STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
+ STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
+#define TABLESPACES_ENCRYPTION_NAME 1
+ {STRUCT_FLD(field_name, "NAME"),
+ STRUCT_FLD(field_length, MAX_FULL_NAME_LEN + 1),
+ STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
+#define TABLESPACES_ENCRYPTION_ENCRYPTION_SCHEME 2
+ {STRUCT_FLD(field_name, "ENCRYPTION_SCHEME"),
+ STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
+ STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
+#define TABLESPACES_ENCRYPTION_KEYSERVER_REQUESTS 3
+ {STRUCT_FLD(field_name, "KEYSERVER_REQUESTS"),
+ STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
+ STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
+#define TABLESPACES_ENCRYPTION_MIN_KEY_VERSION 4
+ {STRUCT_FLD(field_name, "MIN_KEY_VERSION"),
+ STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
+ STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
+#define TABLESPACES_ENCRYPTION_CURRENT_KEY_VERSION 5
+ {STRUCT_FLD(field_name, "CURRENT_KEY_VERSION"),
+ STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
+ STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
+#define TABLESPACES_ENCRYPTION_KEY_ROTATION_PAGE_NUMBER 6
+ {STRUCT_FLD(field_name, "KEY_ROTATION_PAGE_NUMBER"),
+ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
+ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED | MY_I_S_MAYBE_NULL),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
+#define TABLESPACES_ENCRYPTION_KEY_ROTATION_MAX_PAGE_NUMBER 7
+ {STRUCT_FLD(field_name, "KEY_ROTATION_MAX_PAGE_NUMBER"),
+ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
+ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED | MY_I_S_MAYBE_NULL),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
+ END_OF_ST_FIELD_INFO
+};
+
+/**********************************************************************//**
+Function to fill INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION
+with information collected by scanning SYS_TABLESPACES table and then use
+fil_space()
+@return 0 on success */
+static
+int
+i_s_dict_fill_tablespaces_encryption(
+/*==========================*/
+ THD* thd, /*!< in: thread */
+ ulint space, /*!< in: space ID */
+ const char* name, /*!< in: tablespace name */
+ TABLE* table_to_fill) /*!< in/out: fill this table */
+{
+ Field** fields;
+ struct fil_space_crypt_status_t status;
+
+ DBUG_ENTER("i_s_dict_fill_tablespaces_encryption");
+
+ fields = table_to_fill->field;
+
+ fil_space_crypt_get_status(space, &status);
+ OK(fields[TABLESPACES_ENCRYPTION_SPACE]->store(space));
+
+ OK(field_store_string(fields[TABLESPACES_ENCRYPTION_NAME],
+ name));
+
+ OK(fields[TABLESPACES_ENCRYPTION_ENCRYPTION_SCHEME]->store(
+ status.scheme));
+ OK(fields[TABLESPACES_ENCRYPTION_KEYSERVER_REQUESTS]->store(
+ status.keyserver_requests));
+ OK(fields[TABLESPACES_ENCRYPTION_MIN_KEY_VERSION]->store(
+ status.min_key_version));
+ OK(fields[TABLESPACES_ENCRYPTION_CURRENT_KEY_VERSION]->store(
+ status.current_key_version));
+ if (status.rotating) {
+ fields[TABLESPACES_ENCRYPTION_KEY_ROTATION_PAGE_NUMBER]->set_notnull();
+ OK(fields[TABLESPACES_ENCRYPTION_KEY_ROTATION_PAGE_NUMBER]->store(
+ status.rotate_next_page_number));
+ fields[TABLESPACES_ENCRYPTION_KEY_ROTATION_MAX_PAGE_NUMBER]->set_notnull();
+ OK(fields[TABLESPACES_ENCRYPTION_KEY_ROTATION_MAX_PAGE_NUMBER]->store(
+ status.rotate_max_page_number));
+ } else {
+ fields[TABLESPACES_ENCRYPTION_KEY_ROTATION_PAGE_NUMBER]
+ ->set_null();
+ fields[TABLESPACES_ENCRYPTION_KEY_ROTATION_MAX_PAGE_NUMBER]
+ ->set_null();
+ }
+ OK(schema_table_store_record(thd, table_to_fill));
+
+ DBUG_RETURN(0);
+}
+/*******************************************************************//**
+Function to populate INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION table.
+Loop through each record in TABLESPACES_ENCRYPTION, and extract the column
+information and fill the INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION table.
+@return 0 on success */
+static
+int
+i_s_tablespaces_encryption_fill_table(
+/*===========================*/
+ THD* thd, /*!< in: thread */
+ TABLE_LIST* tables, /*!< in/out: tables to fill */
+ Item* ) /*!< in: condition (not used) */
+{
+ btr_pcur_t pcur;
+ const rec_t* rec;
+ mem_heap_t* heap;
+ mtr_t mtr;
+ bool found_space_0 = false;
+
+ DBUG_ENTER("i_s_tablespaces_encryption_fill_table");
+
+ /* deny access to user without PROCESS_ACL privilege */
+ if (check_global_access(thd, SUPER_ACL)) {
+ DBUG_RETURN(0);
+ }
+
+ heap = mem_heap_create(1000);
+ mutex_enter(&dict_sys->mutex);
+ mtr_start(&mtr);
+
+ rec = dict_startscan_system(&pcur, &mtr, SYS_TABLESPACES);
+
+ while (rec) {
+ const char* err_msg;
+ ulint space;
+ const char* name;
+ ulint flags;
+
+ /* Extract necessary information from a SYS_TABLESPACES row */
+ err_msg = dict_process_sys_tablespaces(
+ heap, rec, &space, &name, &flags);
+
+ mtr_commit(&mtr);
+ mutex_exit(&dict_sys->mutex);
+
+ if (space == 0) {
+ found_space_0 = true;
+ }
+
+ if (!err_msg) {
+ i_s_dict_fill_tablespaces_encryption(
+ thd, space, name, tables->table);
+ } else {
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_CANT_FIND_SYSTEM_REC, "%s",
+ err_msg);
+ }
+
+ mem_heap_empty(heap);
+
+ /* Get the next record */
+ mutex_enter(&dict_sys->mutex);
+ mtr_start(&mtr);
+ rec = dict_getnext_system(&pcur, &mtr);
+ }
+
+ mtr_commit(&mtr);
+ mutex_exit(&dict_sys->mutex);
+ mem_heap_free(heap);
+
+ if (found_space_0 == false) {
+ /* space 0 does for what ever unknown reason not show up
+ * in iteration above, add it manually */
+ ulint space = 0;
+ const char* name = NULL;
+ i_s_dict_fill_tablespaces_encryption(
+ thd, space, name, tables->table);
+ }
+
+ DBUG_RETURN(0);
+}
+/*******************************************************************//**
+Bind the dynamic table INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION
+@return 0 on success */
+static
+int
+innodb_tablespaces_encryption_init(
+/*========================*/
+ void* p) /*!< in/out: table schema object */
+{
+ ST_SCHEMA_TABLE* schema;
+
+ DBUG_ENTER("innodb_tablespaces_encryption_init");
+
+ schema = (ST_SCHEMA_TABLE*) p;
+
+ schema->fields_info = innodb_tablespaces_encryption_fields_info;
+ schema->fill_table = i_s_tablespaces_encryption_fill_table;
+
+ DBUG_RETURN(0);
+}
+
+UNIV_INTERN struct st_maria_plugin i_s_innodb_tablespaces_encryption =
+{
+ /* the plugin type (a MYSQL_XXX_PLUGIN value) */
+ /* int */
+ STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
+
+ /* pointer to type-specific plugin descriptor */
+ /* void* */
+ STRUCT_FLD(info, &i_s_info),
+
+ /* plugin name */
+ /* const char* */
+ STRUCT_FLD(name, "INNODB_TABLESPACES_ENCRYPTION"),
+
+ /* plugin author (for SHOW PLUGINS) */
+ /* const char* */
+ STRUCT_FLD(author, "Google Inc"),
+
+ /* general descriptive text (for SHOW PLUGINS) */
+ /* const char* */
+ STRUCT_FLD(descr, "InnoDB TABLESPACES_ENCRYPTION"),
+
+ /* the plugin license (PLUGIN_LICENSE_XXX) */
+ /* int */
+ STRUCT_FLD(license, PLUGIN_LICENSE_BSD),
+
+ /* the function to invoke when plugin is loaded */
+ /* int (*)(void*); */
+ STRUCT_FLD(init, innodb_tablespaces_encryption_init),
+
+ /* the function to invoke when plugin is unloaded */
+ /* int (*)(void*); */
+ STRUCT_FLD(deinit, i_s_common_deinit),
+
+ /* plugin version (for SHOW PLUGINS) */
+ /* unsigned int */
+ STRUCT_FLD(version, INNODB_VERSION_SHORT),
+
+ /* struct st_mysql_show_var* */
+ STRUCT_FLD(status_vars, NULL),
+
+ /* struct st_mysql_sys_var** */
+ STRUCT_FLD(system_vars, NULL),
+
+ /* Maria extension */
+ STRUCT_FLD(version_info, INNODB_VERSION_STR),
+ STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE)
+};
+
+/** TABLESPACES_SCRUBBING ********************************************/
+/* Fields of the table INFORMATION_SCHEMA.INNODB_TABLESPACES_SCRUBBING */
+static ST_FIELD_INFO innodb_tablespaces_scrubbing_fields_info[] =
+{
+#define TABLESPACES_SCRUBBING_SPACE 0
+ {STRUCT_FLD(field_name, "SPACE"),
+ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
+ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
+#define TABLESPACES_SCRUBBING_NAME 1
+ {STRUCT_FLD(field_name, "NAME"),
+ STRUCT_FLD(field_length, MAX_FULL_NAME_LEN + 1),
+ STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
+#define TABLESPACES_SCRUBBING_COMPRESSED 2
+ {STRUCT_FLD(field_name, "COMPRESSED"),
+ STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
+ STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
+#define TABLESPACES_SCRUBBING_LAST_SCRUB_COMPLETED 3
+ {STRUCT_FLD(field_name, "LAST_SCRUB_COMPLETED"),
+ STRUCT_FLD(field_length, 0),
+ STRUCT_FLD(field_type, MYSQL_TYPE_DATETIME),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
+#define TABLESPACES_SCRUBBING_CURRENT_SCRUB_STARTED 4
+ {STRUCT_FLD(field_name, "CURRENT_SCRUB_STARTED"),
+ STRUCT_FLD(field_length, 0),
+ STRUCT_FLD(field_type, MYSQL_TYPE_DATETIME),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
+#define TABLESPACES_SCRUBBING_CURRENT_SCRUB_ACTIVE_THREADS 5
+ {STRUCT_FLD(field_name, "CURRENT_SCRUB_ACTIVE_THREADS"),
+ STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
+ STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED | MY_I_S_MAYBE_NULL),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
+#define TABLESPACES_SCRUBBING_CURRENT_SCRUB_PAGE_NUMBER 6
+ {STRUCT_FLD(field_name, "CURRENT_SCRUB_PAGE_NUMBER"),
+ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
+ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
+#define TABLESPACES_SCRUBBING_CURRENT_SCRUB_MAX_PAGE_NUMBER 7
+ {STRUCT_FLD(field_name, "CURRENT_SCRUB_MAX_PAGE_NUMBER"),
+ STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
+ STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
+ END_OF_ST_FIELD_INFO
+};
+
+/**********************************************************************//**
+Function to fill INFORMATION_SCHEMA.INNODB_TABLESPACES_SCRUBBING
+with information collected by scanning SYS_TABLESPACES table and then use
+fil_space()
+@return 0 on success */
+static
+int
+i_s_dict_fill_tablespaces_scrubbing(
+/*==========================*/
+ THD* thd, /*!< in: thread */
+ ulint space, /*!< in: space ID */
+ const char* name, /*!< in: tablespace name */
+ TABLE* table_to_fill) /*!< in/out: fill this table */
+{
+ Field** fields;
+ struct fil_space_scrub_status_t status;
+
+ DBUG_ENTER("i_s_dict_fill_tablespaces_scrubbing");
+
+ fields = table_to_fill->field;
+
+ fil_space_get_scrub_status(space, &status);
+ OK(fields[TABLESPACES_SCRUBBING_SPACE]->store(space));
+
+ OK(field_store_string(fields[TABLESPACES_SCRUBBING_NAME],
+ name));
+
+ OK(fields[TABLESPACES_SCRUBBING_COMPRESSED]->store(
+ status.compressed ? 1 : 0));
+
+ if (status.last_scrub_completed == 0) {
+ fields[TABLESPACES_SCRUBBING_LAST_SCRUB_COMPLETED]->set_null();
+ } else {
+ fields[TABLESPACES_SCRUBBING_LAST_SCRUB_COMPLETED]
+ ->set_notnull();
+ OK(field_store_time_t(
+ fields[TABLESPACES_SCRUBBING_LAST_SCRUB_COMPLETED],
+ status.last_scrub_completed));
+ }
+
+ int field_numbers[] = {
+ TABLESPACES_SCRUBBING_CURRENT_SCRUB_STARTED,
+ TABLESPACES_SCRUBBING_CURRENT_SCRUB_ACTIVE_THREADS,
+ TABLESPACES_SCRUBBING_CURRENT_SCRUB_PAGE_NUMBER,
+ TABLESPACES_SCRUBBING_CURRENT_SCRUB_MAX_PAGE_NUMBER };
+ if (status.scrubbing) {
+ for (uint i = 0; i < array_elements(field_numbers); i++) {
+ fields[field_numbers[i]]->set_notnull();
+ }
+
+ OK(field_store_time_t(
+ fields[TABLESPACES_SCRUBBING_CURRENT_SCRUB_STARTED],
+ status.current_scrub_started));
+ OK(fields[TABLESPACES_SCRUBBING_CURRENT_SCRUB_ACTIVE_THREADS]
+ ->store(status.current_scrub_active_threads));
+ OK(fields[TABLESPACES_SCRUBBING_CURRENT_SCRUB_PAGE_NUMBER]
+ ->store(status.current_scrub_page_number));
+ OK(fields[TABLESPACES_SCRUBBING_CURRENT_SCRUB_MAX_PAGE_NUMBER]
+ ->store(status.current_scrub_max_page_number));
+ } else {
+ for (uint i = 0; i < array_elements(field_numbers); i++) {
+ fields[field_numbers[i]]->set_null();
+ }
+ }
+ OK(schema_table_store_record(thd, table_to_fill));
+
+ DBUG_RETURN(0);
+}
+/*******************************************************************//**
+Function to populate INFORMATION_SCHEMA.INNODB_TABLESPACES_SCRUBBING table.
+Loop through each record in TABLESPACES_SCRUBBING, and extract the column
+information and fill the INFORMATION_SCHEMA.INNODB_TABLESPACES_SCRUBBING table.
+@return 0 on success */
+static
+int
+i_s_tablespaces_scrubbing_fill_table(
+/*===========================*/
+ THD* thd, /*!< in: thread */
+ TABLE_LIST* tables, /*!< in/out: tables to fill */
+ Item* ) /*!< in: condition (not used) */
+{
+ btr_pcur_t pcur;
+ const rec_t* rec;
+ mem_heap_t* heap;
+ mtr_t mtr;
+ bool found_space_0 = false;
+
+ DBUG_ENTER("i_s_tablespaces_scrubbing_fill_table");
+
+ /* deny access to user without SUPER_ACL privilege */
+ if (check_global_access(thd, SUPER_ACL)) {
+ DBUG_RETURN(0);
+ }
+
+ heap = mem_heap_create(1000);
+ mutex_enter(&dict_sys->mutex);
+ mtr_start(&mtr);
+
+ rec = dict_startscan_system(&pcur, &mtr, SYS_TABLESPACES);
+
+ while (rec) {
+ const char* err_msg;
+ ulint space;
+ const char* name;
+ ulint flags;
+
+ /* Extract necessary information from a SYS_TABLESPACES row */
+ err_msg = dict_process_sys_tablespaces(
+ heap, rec, &space, &name, &flags);
+
+ mtr_commit(&mtr);
+ mutex_exit(&dict_sys->mutex);
+
+ if (space == 0) {
+ found_space_0 = true;
+ }
+
+ if (!err_msg) {
+ i_s_dict_fill_tablespaces_scrubbing(
+ thd, space, name, tables->table);
+ } else {
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_CANT_FIND_SYSTEM_REC, "%s",
+ err_msg);
+ }
+
+ mem_heap_empty(heap);
+
+ /* Get the next record */
+ mutex_enter(&dict_sys->mutex);
+ mtr_start(&mtr);
+ rec = dict_getnext_system(&pcur, &mtr);
+ }
+
+ mtr_commit(&mtr);
+ mutex_exit(&dict_sys->mutex);
+ mem_heap_free(heap);
+
+ if (found_space_0 == false) {
+ /* space 0 does for what ever unknown reason not show up
+ * in iteration above, add it manually */
+ ulint space = 0;
+ const char* name = NULL;
+ i_s_dict_fill_tablespaces_scrubbing(
+ thd, space, name, tables->table);
+ }
+
+ DBUG_RETURN(0);
+}
+/*******************************************************************//**
+Bind the dynamic table INFORMATION_SCHEMA.INNODB_TABLESPACES_SCRUBBING
+@return 0 on success */
+static
+int
+innodb_tablespaces_scrubbing_init(
+/*========================*/
+ void* p) /*!< in/out: table schema object */
+{
+ ST_SCHEMA_TABLE* schema;
+
+ DBUG_ENTER("innodb_tablespaces_scrubbing_init");
+
+ schema = (ST_SCHEMA_TABLE*) p;
+
+ schema->fields_info = innodb_tablespaces_scrubbing_fields_info;
+ schema->fill_table = i_s_tablespaces_scrubbing_fill_table;
+
+ DBUG_RETURN(0);
+}
+
+UNIV_INTERN struct st_maria_plugin i_s_innodb_tablespaces_scrubbing =
+{
+ /* the plugin type (a MYSQL_XXX_PLUGIN value) */
+ /* int */
+ STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
+
+ /* pointer to type-specific plugin descriptor */
+ /* void* */
+ STRUCT_FLD(info, &i_s_info),
+
+ /* plugin name */
+ /* const char* */
+ STRUCT_FLD(name, "INNODB_TABLESPACES_SCRUBBING"),
+
+ /* plugin author (for SHOW PLUGINS) */
+ /* const char* */
+ STRUCT_FLD(author, "Google Inc"),
+
+ /* general descriptive text (for SHOW PLUGINS) */
+ /* const char* */
+ STRUCT_FLD(descr, "InnoDB TABLESPACES_SCRUBBING"),
+
+ /* the plugin license (PLUGIN_LICENSE_XXX) */
+ /* int */
+ STRUCT_FLD(license, PLUGIN_LICENSE_BSD),
+
+ /* the function to invoke when plugin is loaded */
+ /* int (*)(void*); */
+ STRUCT_FLD(init, innodb_tablespaces_scrubbing_init),
+
+ /* the function to invoke when plugin is unloaded */
+ /* int (*)(void*); */
+ STRUCT_FLD(deinit, i_s_common_deinit),
+
+ /* plugin version (for SHOW PLUGINS) */
+ /* unsigned int */
+ STRUCT_FLD(version, INNODB_VERSION_SHORT),
+
+ /* struct st_mysql_show_var* */
+ STRUCT_FLD(status_vars, NULL),
+
+ /* struct st_mysql_sys_var** */
+ STRUCT_FLD(system_vars, NULL),
+
+ /* Maria extension */
+ STRUCT_FLD(version_info, INNODB_VERSION_STR),
+ STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE)
+};
diff --git a/storage/innobase/handler/i_s.h b/storage/innobase/handler/i_s.h
index a2b324cb314..4b248b0673a 100644
--- a/storage/innobase/handler/i_s.h
+++ b/storage/innobase/handler/i_s.h
@@ -56,5 +56,7 @@ extern struct st_maria_plugin i_s_innodb_sys_foreign;
extern struct st_maria_plugin i_s_innodb_sys_foreign_cols;
extern struct st_maria_plugin i_s_innodb_sys_tablespaces;
extern struct st_maria_plugin i_s_innodb_sys_datafiles;
+extern struct st_maria_plugin i_s_innodb_tablespaces_encryption;
+extern struct st_maria_plugin i_s_innodb_tablespaces_scrubbing;
#endif /* i_s_h */
diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h
index b6f8a685ae9..68ba7bd4f3f 100644
--- a/storage/innobase/include/btr0btr.h
+++ b/storage/innobase/include/btr0btr.h
@@ -731,6 +731,7 @@ btr_page_free_low(
dict_index_t* index, /*!< in: index tree */
buf_block_t* block, /*!< in: block to be freed, x-latched */
ulint level, /*!< in: page level */
+ bool blob, /*!< in: blob page */
mtr_t* mtr) /*!< in: mtr */
__attribute__((nonnull));
/*************************************************************//**
@@ -867,4 +868,8 @@ btr_lift_page_up(
#include "btr0btr.ic"
#endif
+/****************************************************************
+Global variable controlling if scrubbing should be performed */
+extern my_bool srv_immediate_scrub_data_uncompressed;
+
#endif
diff --git a/storage/innobase/include/btr0scrub.h b/storage/innobase/include/btr0scrub.h
new file mode 100644
index 00000000000..608266c206d
--- /dev/null
+++ b/storage/innobase/include/btr0scrub.h
@@ -0,0 +1,166 @@
+// Copyright 2014 Google
+
+#ifndef btr0scrub_h
+#define btr0scrub_h
+
+#include "univ.i"
+
+#include "dict0dict.h"
+#include "data0data.h"
+#include "page0cur.h"
+#include "mtr0mtr.h"
+#include "btr0types.h"
+
+/**
+ * enum describing page allocation status
+ */
+enum btr_scrub_page_allocation_status_t {
+ BTR_SCRUB_PAGE_FREE,
+ BTR_SCRUB_PAGE_ALLOCATED,
+ BTR_SCRUB_PAGE_ALLOCATION_UNKNOWN
+};
+
+/**
+* constants returned by btr_page_needs_scrubbing & btr_scrub_recheck_page
+*/
+#define BTR_SCRUB_PAGE 1 /* page should be scrubbed */
+#define BTR_SCRUB_SKIP_PAGE 2 /* no scrub & no action */
+#define BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE 3 /* no scrub & close table */
+#define BTR_SCRUB_SKIP_PAGE_AND_COMPLETE_SPACE 4 /* no scrub & complete space */
+#define BTR_SCRUB_TURNED_OFF 5 /* we detected that scrubbing
+ was disabled by global
+ variable */
+
+/**************************************************************//**
+struct for keeping scrub statistics. */
+struct btr_scrub_stat_t {
+ /* page reorganizations */
+ ulint page_reorganizations;
+ /* page splits */
+ ulint page_splits;
+ /* scrub failures */
+ ulint page_split_failures_underflow;
+ ulint page_split_failures_out_of_filespace;
+ ulint page_split_failures_missing_index;
+ ulint page_split_failures_unknown;
+};
+
+/**************************************************************//**
+struct for thread local scrub state. */
+struct btr_scrub_t {
+
+ /* current space */
+ ulint space;
+
+ /* is scrubbing enabled for this space */
+ bool scrubbing;
+
+ /* is current space compressed */
+ bool compressed;
+
+ dict_table_t* current_table;
+ dict_index_t* current_index;
+ /* savepoint for X_LATCH of block */
+ ulint savepoint;
+
+ /* statistic counters */
+ btr_scrub_stat_t scrub_stat;
+};
+
+/*********************************************************************
+Init scrub global variables */
+UNIV_INTERN
+void
+btr_scrub_init();
+
+/*********************************************************************
+Cleanup scrub globals */
+UNIV_INTERN
+void
+btr_scrub_cleanup();
+
+/***********************************************************************
+Return crypt statistics */
+UNIV_INTERN
+void
+btr_scrub_total_stat(
+/*==================*/
+ btr_scrub_stat_t *stat); /*!< out: stats to update */
+
+/**************************************************************//**
+Check if a page needs scrubbing
+* @return BTR_SCRUB_PAGE if page should be scrubbed
+* else btr_scrub_skip_page should be called
+* with this return value (and without any latches held)
+*/
+UNIV_INTERN
+int
+btr_page_needs_scrubbing(
+/*=====================*/
+ btr_scrub_t* scrub_data, /*!< in: scrub data */
+ buf_block_t* block, /*!< in: block to check, latched */
+ btr_scrub_page_allocation_status_t allocated); /*!< in: is block
+ allocated, free or
+ unknown */
+
+/****************************************************************
+Recheck if a page needs scrubbing, and if it does load appropriate
+table and index
+* @return BTR_SCRUB_PAGE if page should be scrubbed
+* else btr_scrub_skip_page should be called
+* with this return value (and without any latches held)
+*/
+UNIV_INTERN
+int
+btr_scrub_recheck_page(
+/*====================*/
+ btr_scrub_t* scrub_data, /*!< inut: scrub data */
+ buf_block_t* block, /*!< in: block */
+ btr_scrub_page_allocation_status_t allocated, /*!< in: is block
+ allocated or free */
+ mtr_t* mtr); /*!< in: mtr */
+
+/****************************************************************
+Perform actual scrubbing of page */
+UNIV_INTERN
+int
+btr_scrub_page(
+/*============*/
+ btr_scrub_t* scrub_data, /*!< in/out: scrub data */
+ buf_block_t* block, /*!< in: block */
+ btr_scrub_page_allocation_status_t allocated, /*!< in: is block
+ allocated or free */
+ mtr_t* mtr); /*!< in: mtr */
+
+/****************************************************************
+Perform cleanup needed for a page not needing scrubbing */
+UNIV_INTERN
+void
+btr_scrub_skip_page(
+/*============*/
+ btr_scrub_t* scrub_data, /*!< in/out: scrub data */
+ int needs_scrubbing); /*!< in: return value from
+ btr_page_needs_scrubbing or
+ btr_scrub_recheck_page which encodes what kind
+ of cleanup is needed */
+
+/****************************************************************
+Start iterating a space
+* @return true if scrubbing is turned on */
+UNIV_INTERN
+bool
+btr_scrub_start_space(
+/*===================*/
+ ulint space, /*!< in: space */
+ btr_scrub_t* scrub_data); /*!< in/out: scrub data */
+
+/****************************************************************
+Complete iterating a space
+* @return true if space was scrubbed */
+UNIV_INTERN
+bool
+btr_scrub_complete_space(
+/*=====================*/
+ btr_scrub_t* scrub_data); /*!< in/out: scrub data */
+
+#endif
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
index 5dd5102f65b..2863ab01ff9 100644
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -375,11 +375,13 @@ Given a tablespace id and page number tries to get that page. If the
page is not in the buffer pool it is not loaded and NULL is returned.
Suitable for using when holding the lock_sys_t::mutex. */
UNIV_INTERN
-const buf_block_t*
+buf_block_t*
buf_page_try_get_func(
/*==================*/
ulint space_id,/*!< in: tablespace id */
ulint page_no,/*!< in: page number */
+ ulint rw_latch, /*!< in: RW_S_LATCH, RW_X_LATCH */
+ bool possibly_freed, /*!< in: don't mind if page is freed */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
mtr_t* mtr); /*!< in: mini-transaction */
@@ -391,7 +393,8 @@ not loaded. Suitable for using when holding the lock_sys_t::mutex.
@param mtr in: mini-transaction
@return the page if in buffer pool, NULL if not */
#define buf_page_try_get(space_id, page_no, mtr) \
- buf_page_try_get_func(space_id, page_no, __FILE__, __LINE__, mtr);
+ buf_page_try_get_func(space_id, page_no, RW_S_LATCH, false, \
+ __FILE__, __LINE__, mtr);
/********************************************************************//**
Get read access to a compressed page (usually of type
@@ -1434,6 +1437,53 @@ buf_flush_update_zip_checksum(
#endif /* !UNIV_HOTBACKUP */
+/********************************************************************//**
+The hook that is called just before a page is written to disk.
+The function encrypts the content of the page and returns a pointer
+to a frame that will be written instead of the real frame. */
+byte*
+buf_page_encrypt_before_write(
+/*==========================*/
+ buf_page_t* page, /*!< in/out: buffer page to be flushed */
+ const byte* frame);
+
+/**********************************************************************
+The hook that is called after page is written to disk.
+The function releases any resources needed for encryption that was allocated
+in buf_page_encrypt_before_write */
+ibool
+buf_page_encrypt_after_write(
+/*=========================*/
+ buf_page_t* page); /*!< in/out: buffer page that was flushed */
+
+/********************************************************************//**
+The hook that is called just before a page is read from disk.
+The function allocates memory that is used to temporarily store disk content
+before getting decrypted */
+byte*
+buf_page_decrypt_before_read(
+/*=========================*/
+ buf_page_t* page, /*!< in/out: buffer page read from disk */
+ ulint zip_size); /*!< in: compressed page size, or 0 */
+
+/********************************************************************//**
+The hook that is called just after a page is read from disk.
+The function decrypt disk content into buf_page_t and releases the
+temporary buffer that was allocated in buf_page_decrypt_before_read */
+ibool
+buf_page_decrypt_after_read(
+/*========================*/
+ buf_page_t* page); /*!< in/out: buffer page read from disk */
+
+/********************************************************************//**
+Release memory allocated for page decryption.
+Only used in scenarios where read fails, e.g due to tablespace being dropped */
+void
+buf_page_decrypt_cleanup(
+/*=====================*/
+ buf_page_t* page); /*!< in/out: buffer page read from disk */
+
+
/** The common buffer control block structure
for compressed and uncompressed frames */
@@ -1499,11 +1549,25 @@ struct buf_page_t{
zip.data == NULL means an active
buf_pool->watch */
- ulint write_size; /* Write size is set when this
+ ulint write_size; /* Write size is set when this
page is first time written and then
if written again we check is TRIM
operation needed. */
-#ifndef UNIV_HOTBACKUP
+
+ unsigned key_version; /*!< key version for this block */
+ byte* crypt_buf; /*!< for encryption the data needs to be
+ copied to a separate buffer before it's
+ encrypted&written. this as a page can be
+ read while it's being flushed */
+ byte* crypt_buf_free; /*!< for encryption, allocated buffer
+ that is then alligned */
+ byte* comp_buf; /*!< for compression we need
+ temporal buffer because page
+ can be read while it's being flushed */
+ byte* comp_buf_free; /*!< for compression, allocated
+ buffer that is then alligned */
+
+ #ifndef UNIV_HOTBACKUP
buf_page_t* hash; /*!< node used in chaining to
buf_pool->page_hash or
buf_pool->zip_hash */
diff --git a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic
index 6e419674f98..51d77272ef9 100644
--- a/storage/innobase/include/buf0buf.ic
+++ b/storage/innobase/include/buf0buf.ic
@@ -1453,4 +1453,33 @@ buf_get_nth_chunk_block(
*chunk_size = chunk->size;
return(chunk->blocks);
}
+
+/********************************************************************//**
+Get crypt buffer. */
+UNIV_INLINE
+byte*
+buf_page_get_crypt_buffer(
+/*=========================*/
+ const buf_page_t* bpage) /*!< in: buffer pool page */
+{
+ return bpage->crypt_buf;
+}
+
+/********************************************************************//**
+Get buf frame. */
+UNIV_INLINE
+void *
+buf_page_get_frame(
+/*=========================*/
+ const buf_page_t* bpage) /*!< in: buffer pool page */
+{
+ if (bpage->crypt_buf) {
+ return buf_page_get_crypt_buffer(bpage);
+ } else if (bpage->zip.data) {
+ return bpage->zip.data;
+ } else {
+ return ((buf_block_t*) bpage)->frame;
+ }
+}
+
#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h
index 99af4b78e1c..326b9e7c986 100644
--- a/storage/innobase/include/dict0dict.h
+++ b/storage/innobase/include/dict0dict.h
@@ -139,6 +139,17 @@ dict_table_open_on_id(
ibool dict_locked, /*!< in: TRUE=data dictionary locked */
dict_table_op_t table_op) /*!< in: operation to perform */
__attribute__((warn_unused_result));
+
+/**********************************************************************//**
+Returns a table object based on table id.
+@return table, NULL if does not exist */
+UNIV_INTERN
+dict_table_t*
+dict_table_open_on_index_id(
+/*==================*/
+ table_id_t table_id, /*!< in: table id */
+ bool dict_locked) /*!< in: TRUE=data dictionary locked */
+ __attribute__((warn_unused_result));
/********************************************************************//**
Decrements the count of open handles to a table. */
UNIV_INTERN
@@ -918,8 +929,10 @@ dict_tf_set(
pages */
ulint page_compression_level, /*!< in: table page compression
level */
- ulint atomic_writes) /*!< in: table atomic
+ ulint atomic_writes, /*!< in: table atomic
writes option value*/
+ bool page_encrypted,/*!< in: table uses page encryption */
+ ulint page_encryption_key) /*!< in: page encryption key */
__attribute__((nonnull));
/********************************************************************//**
Convert a 32 bit integer table flags to the 32 bit integer that is
@@ -1446,8 +1459,12 @@ dict_index_calc_min_rec_len(
Reserves the dictionary system mutex for MySQL. */
UNIV_INTERN
void
-dict_mutex_enter_for_mysql(void);
+dict_mutex_enter_for_mysql_func(const char * file, ulint line);
/*============================*/
+
+#define dict_mutex_enter_for_mysql() \
+ dict_mutex_enter_for_mysql_func(__FILE__, __LINE__)
+
/********************************************************************//**
Releases the dictionary system mutex for MySQL. */
UNIV_INTERN
diff --git a/storage/innobase/include/dict0dict.ic b/storage/innobase/include/dict0dict.ic
index 43bd42ae025..7c51faf844e 100644
--- a/storage/innobase/include/dict0dict.ic
+++ b/storage/innobase/include/dict0dict.ic
@@ -543,6 +543,9 @@ dict_tf_is_valid(
ulint data_dir = DICT_TF_HAS_DATA_DIR(flags);
ulint atomic_writes = DICT_TF_GET_ATOMIC_WRITES(flags);
+ ulint page_encryption = DICT_TF_GET_PAGE_ENCRYPTION(flags);
+ ulint page_encryption_key = DICT_TF_GET_PAGE_ENCRYPTION_KEY(flags);
+
/* Make sure there are no bits that we do not know about. */
if (unused != 0) {
fprintf(stderr,
@@ -552,10 +555,12 @@ dict_tf_is_valid(
"InnoDB: compact %ld atomic_blobs %ld\n"
"InnoDB: unused %ld data_dir %ld zip_ssize %ld\n"
"InnoDB: page_compression %ld page_compression_level %ld\n"
- "InnoDB: atomic_writes %ld\n",
+ "InnoDB: atomic_writes %ld\n"
+ "InnoDB: page_encryption %ld page_encryption_key %ld\n",
unused,
compact, atomic_blobs, unused, data_dir, zip_ssize,
- page_compression, page_compression_level, atomic_writes
+ page_compression, page_compression_level, atomic_writes,
+ page_encryption, page_encryption_key
);
return(false);
@@ -852,7 +857,9 @@ dict_tf_set(
pages */
ulint page_compression_level, /*!< in: table page compression
level */
- ulint atomic_writes) /*!< in: table atomic writes setup */
+ ulint atomic_writes, /*!< in: table atomic writes setup */
+ bool page_encrypted, /*!< in: table uses page encryption */
+ ulint page_encryption_key /*!< in: page encryption key */)
{
atomic_writes_t awrites = (atomic_writes_t)atomic_writes;
@@ -893,6 +900,11 @@ dict_tf_set(
if (use_data_dir) {
*flags |= (1 << DICT_TF_POS_DATA_DIR);
}
+
+ if (page_encrypted) {
+ *flags |= (1 << DICT_TF_POS_PAGE_ENCRYPTION)
+ | (page_encryption_key << DICT_TF_POS_PAGE_ENCRYPTION_KEY);
+ }
}
/********************************************************************//**
@@ -915,6 +927,10 @@ dict_tf_to_fsp_flags(
ulint fsp_flags;
ulint page_compression = DICT_TF_GET_PAGE_COMPRESSION(table_flags);
ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(table_flags);
+
+ ulint page_encryption = DICT_TF_GET_PAGE_ENCRYPTION(table_flags);
+ ulint page_encryption_key = DICT_TF_GET_PAGE_ENCRYPTION_KEY(table_flags);
+
ulint atomic_writes = DICT_TF_GET_ATOMIC_WRITES(table_flags);
DBUG_EXECUTE_IF("dict_tf_to_fsp_flags_failure",
@@ -942,6 +958,14 @@ dict_tf_to_fsp_flags(
if page compression is used for this table. */
fsp_flags |= FSP_FLAGS_SET_PAGE_COMPRESSION_LEVEL(fsp_flags, page_compression_level);
+ /* In addition, tablespace flags also contain if the page
+ encryption is used for this table. */
+ fsp_flags |= FSP_FLAGS_SET_PAGE_ENCRYPTION(fsp_flags, page_encryption);
+
+ /* In addition, tablespace flags also contain page encryption key if the page
+ encryption is used for this table. */
+ fsp_flags |= FSP_FLAGS_SET_PAGE_ENCRYPTION_KEY(fsp_flags, page_encryption_key);
+
/* In addition, tablespace flags also contain flag if atomic writes
is used for this table */
fsp_flags |= FSP_FLAGS_SET_ATOMIC_WRITES(fsp_flags, atomic_writes);
@@ -983,6 +1007,9 @@ dict_sys_tables_type_to_tf(
| DICT_TF_MASK_PAGE_COMPRESSION
| DICT_TF_MASK_PAGE_COMPRESSION_LEVEL
| DICT_TF_MASK_ATOMIC_WRITES
+ | DICT_TF_MASK_PAGE_ENCRYPTION
+ | DICT_TF_MASK_PAGE_ENCRYPTION_KEY
+
);
return(flags);
@@ -1018,7 +1045,9 @@ dict_tf_to_sys_tables_type(
| DICT_TF_MASK_DATA_DIR
| DICT_TF_MASK_PAGE_COMPRESSION
| DICT_TF_MASK_PAGE_COMPRESSION_LEVEL
- | DICT_TF_MASK_ATOMIC_WRITES);
+ | DICT_TF_MASK_ATOMIC_WRITES
+ | DICT_TF_MASK_PAGE_ENCRYPTION
+ | DICT_TF_MASK_PAGE_ENCRYPTION_KEY);
return(type);
}
diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h
index 1d59bc09f6d..3e0ca662bc5 100644
--- a/storage/innobase/include/dict0mem.h
+++ b/storage/innobase/include/dict0mem.h
@@ -132,6 +132,12 @@ Width of the page compression flag
#define DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL 4
/**
+Width of the page encryption flag
+*/
+#define DICT_TF_WIDTH_PAGE_ENCRYPTION 1
+#define DICT_TF_WIDTH_PAGE_ENCRYPTION_KEY 8
+
+/**
Width of atomic writes flag
DEFAULT=0, ON = 1, OFF = 2
*/
@@ -144,7 +150,9 @@ DEFAULT=0, ON = 1, OFF = 2
+ DICT_TF_WIDTH_DATA_DIR \
+ DICT_TF_WIDTH_PAGE_COMPRESSION \
+ DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL \
- + DICT_TF_WIDTH_ATOMIC_WRITES)
+ + DICT_TF_WIDTH_ATOMIC_WRITES \
+ + DICT_TF_WIDTH_PAGE_ENCRYPTION \
+ + DICT_TF_WIDTH_PAGE_ENCRYPTION_KEY)
/** A mask of all the known/used bits in table flags */
#define DICT_TF_BIT_MASK (~(~0 << DICT_TF_BITS))
@@ -167,12 +175,16 @@ DEFAULT=0, ON = 1, OFF = 2
#define DICT_TF_POS_PAGE_COMPRESSION_LEVEL (DICT_TF_POS_PAGE_COMPRESSION \
+ DICT_TF_WIDTH_PAGE_COMPRESSION)
/** Zero relative shift position of the ATOMIC_WRITES field */
-#define DICT_TF_POS_ATOMIC_WRITES (DICT_TF_POS_PAGE_COMPRESSION_LEVEL \
+#define DICT_TF_POS_ATOMIC_WRITES (DICT_TF_POS_PAGE_COMPRESSION_LEVEL \
+ DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL)
-
-/** Zero relative shift position of the start of the UNUSED bits */
-#define DICT_TF_POS_UNUSED (DICT_TF_POS_ATOMIC_WRITES \
- + DICT_TF_WIDTH_ATOMIC_WRITES)
+/** Zero relative shift position of the PAGE_ENCRYPTION field */
+#define DICT_TF_POS_PAGE_ENCRYPTION (DICT_TF_POS_ATOMIC_WRITES \
+ + DICT_TF_WIDTH_ATOMIC_WRITES)
+/** Zero relative shift position of the PAGE_ENCRYPTION_KEY field */
+#define DICT_TF_POS_PAGE_ENCRYPTION_KEY (DICT_TF_POS_PAGE_ENCRYPTION \
+ + DICT_TF_WIDTH_PAGE_ENCRYPTION)
+#define DICT_TF_POS_UNUSED (DICT_TF_POS_PAGE_ENCRYPTION_KEY \
+ + DICT_TF_WIDTH_PAGE_ENCRYPTION_KEY)
/** Bit mask of the COMPACT field */
#define DICT_TF_MASK_COMPACT \
@@ -202,6 +214,14 @@ DEFAULT=0, ON = 1, OFF = 2
#define DICT_TF_MASK_ATOMIC_WRITES \
((~(~0 << DICT_TF_WIDTH_ATOMIC_WRITES)) \
<< DICT_TF_POS_ATOMIC_WRITES)
+/** Bit mask of the PAGE_ENCRYPTION field */
+#define DICT_TF_MASK_PAGE_ENCRYPTION \
+ ((~(~0 << DICT_TF_WIDTH_PAGE_ENCRYPTION)) \
+ << DICT_TF_POS_PAGE_ENCRYPTION)
+/** Bit mask of the PAGE_ENCRYPTION_KEY field */
+#define DICT_TF_MASK_PAGE_ENCRYPTION_KEY \
+ ((~(~0 << DICT_TF_WIDTH_PAGE_ENCRYPTION_KEY)) \
+ << DICT_TF_POS_PAGE_ENCRYPTION_KEY)
/** Return the value of the COMPACT field */
#define DICT_TF_GET_COMPACT(flags) \
@@ -231,6 +251,14 @@ DEFAULT=0, ON = 1, OFF = 2
#define DICT_TF_GET_ATOMIC_WRITES(flags) \
((flags & DICT_TF_MASK_ATOMIC_WRITES) \
>> DICT_TF_POS_ATOMIC_WRITES)
+/** Return the contents of the PAGE_ENCRYPTION field */
+#define DICT_TF_GET_PAGE_ENCRYPTION(flags) \
+ ((flags & DICT_TF_MASK_PAGE_ENCRYPTION) \
+ >> DICT_TF_POS_PAGE_ENCRYPTION)
+/** Return the contents of the PAGE_ENCRYPTION KEY field */
+#define DICT_TF_GET_PAGE_ENCRYPTION_KEY(flags) \
+ ((flags & DICT_TF_MASK_PAGE_ENCRYPTION_KEY) \
+ >> DICT_TF_POS_PAGE_ENCRYPTION_KEY)
/** Return the contents of the UNUSED bits */
#define DICT_TF_GET_UNUSED(flags) \
@@ -1190,20 +1218,29 @@ struct dict_table_t{
calculation; this counter is not protected by
any latch, because this is only used for
heuristics */
-#define BG_STAT_NONE 0
-#define BG_STAT_IN_PROGRESS (1 << 0)
+
+#define BG_STAT_IN_PROGRESS ((byte)(1 << 0))
/*!< BG_STAT_IN_PROGRESS is set in
stats_bg_flag when the background
stats code is working on this table. The DROP
TABLE code waits for this to be cleared
before proceeding. */
-#define BG_STAT_SHOULD_QUIT (1 << 1)
+#define BG_STAT_SHOULD_QUIT ((byte)(1 << 1))
/*!< BG_STAT_SHOULD_QUIT is set in
stats_bg_flag when DROP TABLE starts
waiting on BG_STAT_IN_PROGRESS to be cleared,
the background stats thread will detect this
and will eventually quit sooner */
- byte stats_bg_flag;
+#define BG_SCRUB_IN_PROGRESS ((byte)(1 << 2))
+ /*!< BG_SCRUB_IN_PROGRESS is set in
+ stats_bg_flag when the background
+ scrub code is working on this table. The DROP
+ TABLE code waits for this to be cleared
+ before proceeding. */
+
+#define BG_IN_PROGRESS (BG_STAT_IN_PROGRESS | BG_SCRUB_IN_PROGRESS)
+
+ byte stats_bg_flag;
/*!< see BG_STAT_* above.
Writes are covered by dict_sys->mutex.
Dirty reads are possible. */
diff --git a/storage/innobase/include/dict0pagecompress.ic b/storage/innobase/include/dict0pagecompress.ic
index 811976434a8..a71b2b34b07 100644
--- a/storage/innobase/include/dict0pagecompress.ic
+++ b/storage/innobase/include/dict0pagecompress.ic
@@ -42,6 +42,8 @@ dict_tf_verify_flags(
ulint page_compression = DICT_TF_GET_PAGE_COMPRESSION(table_flags);
ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(table_flags);
ulint atomic_writes = DICT_TF_GET_ATOMIC_WRITES(table_flags);
+ ulint page_encryption = DICT_TF_GET_PAGE_ENCRYPTION(table_flags);
+ ulint page_encryption_key = DICT_TF_GET_PAGE_ENCRYPTION_KEY(table_flags);
ulint post_antelope = FSP_FLAGS_GET_POST_ANTELOPE(fsp_flags);
ulint zip_ssize = FSP_FLAGS_GET_ZIP_SSIZE(fsp_flags);
ulint fsp_atomic_blobs = FSP_FLAGS_HAS_ATOMIC_BLOBS(fsp_flags);
@@ -50,6 +52,9 @@ dict_tf_verify_flags(
ulint fsp_page_compression = FSP_FLAGS_GET_PAGE_COMPRESSION(fsp_flags);
ulint fsp_page_compression_level = FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(fsp_flags);
ulint fsp_atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(fsp_flags);
+ ulint fsp_page_encryption = FSP_FLAGS_GET_PAGE_ENCRYPTION(fsp_flags);
+ ulint fsp_page_encryption_key = FSP_FLAGS_GET_PAGE_ENCRYPTION_KEY(fsp_flags);
+
DBUG_EXECUTE_IF("dict_tf_verify_flags_failure",
return(ULINT_UNDEFINED););
@@ -107,6 +112,27 @@ dict_tf_verify_flags(
return (FALSE);
}
+ if (page_encryption != fsp_page_encryption) {
+ fprintf(stderr,
+ "InnoDB: Error: table flags has page_encryption %ld"
+ " in the data dictionary\n"
+ "InnoDB: but the flags in file has page_encryption %ld\n",
+ page_encryption, fsp_page_encryption);
+
+ return (FALSE);
+ }
+
+ if (page_encryption_key != fsp_page_encryption_key) {
+ fprintf(stderr,
+ "InnoDB: Error: table flags has page_encryption_key %ld"
+ " in the data dictionary\n"
+ "InnoDB: but the flags in file has page_encryption_key %ld\n",
+ page_encryption_key, fsp_page_encryption_key);
+
+ return (FALSE);
+ }
+
+
return(TRUE);
}
diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
index c91e1777a13..1ed8cbf3293 100644
--- a/storage/innobase/include/fil0fil.h
+++ b/storage/innobase/include/fil0fil.h
@@ -26,7 +26,7 @@ Created 10/25/1995 Heikki Tuuri
#ifndef fil0fil_h
#define fil0fil_h
-
+#define MSG_CANNOT_DECRYPT "can not decrypt"
#include "univ.i"
#ifndef UNIV_INNOCHECKSUM
@@ -127,16 +127,20 @@ extern fil_addr_t fil_addr_null;
MySQL/InnoDB 5.1.7 or later, the
contents of this field is valid
for all uncompressed pages. */
-#define FIL_PAGE_FILE_FLUSH_LSN 26 /*!< this is only defined for the
- first page in a system tablespace
- data file (ibdata*, not *.ibd):
- the file has been flushed to disk
- at least up to this lsn */
+#define FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION 26 /*!< for the first page
+ in a system tablespace data file
+ (ibdata*, not *.ibd): the file has
+ been flushed to disk at least up
+ to this lsn
+ for other pages: a 32-bit key version
+ used to encrypt the page + 32-bit checksum
+ or 64 bits of zero if no encryption
+ */
/** If page type is FIL_PAGE_COMPRESSED then the 8 bytes starting at
FIL_PAGE_FILE_FLUSH_LSN are broken down as follows: */
/** Control information version format (u8) */
-static const ulint FIL_PAGE_VERSION = FIL_PAGE_FILE_FLUSH_LSN;
+static const ulint FIL_PAGE_VERSION = FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION;
/** Compression algorithm (u8) */
static const ulint FIL_PAGE_ALGORITHM_V1 = FIL_PAGE_VERSION + 1;
@@ -169,7 +173,10 @@ static const ulint FIL_PAGE_COMPRESS_SIZE_V1 = FIL_PAGE_ORIGINAL_SIZE_V1 + 2;
/* @} */
/** File page types (values of FIL_PAGE_TYPE) @{ */
+#define FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED 35631 /* page compressed +
+ encrypted page */
#define FIL_PAGE_PAGE_COMPRESSED 34354 /*!< page compressed page */
+#define FIL_PAGE_PAGE_ENCRYPTED 34355 /*!< Page encrypted page */
#define FIL_PAGE_INDEX 17855 /*!< B-tree node */
#define FIL_PAGE_UNDO_LOG 2 /*!< Undo log page */
#define FIL_PAGE_INODE 3 /*!< Index node */
@@ -207,6 +214,9 @@ extern ulint fil_n_pending_tablespace_flushes;
/** Number of files currently open */
extern ulint fil_n_file_opened;
+/* structure containing encryption specification */
+typedef struct fil_space_crypt_struct fil_space_crypt_t;
+
struct fsp_open_info {
ibool success; /*!< Has the tablespace been opened? */
const char* check_msg; /*!< fil_check_first_page() message */
@@ -216,9 +226,11 @@ struct fsp_open_info {
lsn_t lsn; /*!< Flushed LSN from header page */
ulint id; /*!< Space ID */
ulint flags; /*!< Tablespace flags */
+ ulint encryption_error; /*!< if an encryption error occurs */
#ifdef UNIV_LOG_ARCHIVE
ulint arch_log_no; /*!< latest archived log file number */
#endif /* UNIV_LOG_ARCHIVE */
+ fil_space_crypt_t* crypt_data; /*!< crypt data */
};
struct fil_space_t;
@@ -333,6 +345,7 @@ struct fil_space_t {
unflushed_spaces */
UT_LIST_NODE_T(fil_space_t) space_list;
/*!< list of all spaces */
+ fil_space_crypt_t* crypt_data;
ulint magic_n;/*!< FIL_SPACE_MAGIC_N */
};
@@ -468,7 +481,9 @@ fil_space_create(
ulint id, /*!< in: space id */
ulint zip_size,/*!< in: compressed page size, or
0 for uncompressed tablespaces */
- ulint purpose);/*!< in: FIL_TABLESPACE, or FIL_LOG if log */
+ ulint purpose, /*!< in: FIL_TABLESPACE, or FIL_LOG if log */
+ fil_space_crypt_t* crypt_data); /*!< in: crypt data */
+
/*******************************************************************//**
Assigns a new space id for a new single-table tablespace. This works simply by
incrementing the global counter. If 4 billion id's is not enough, we may need
@@ -609,8 +624,10 @@ fil_read_first_page(
lsn values in data files */
lsn_t* max_flushed_lsn, /*!< out: max of flushed
lsn values in data files */
- ulint orig_space_id) /*!< in: file space id or
+ ulint orig_space_id, /*!< in: file space id or
ULINT_UNDEFINED */
+ fil_space_crypt_t** crypt_data) /*!< out: crypt data */
+
__attribute__((warn_unused_result));
/*******************************************************************//**
Increments the count of pending operation, if space is not being deleted.
@@ -976,11 +993,12 @@ fil_io(
appropriately aligned */
void* message, /*!< in: message for aio handler if non-sync
aio used, else ignored */
- ulint* write_size) /*!< in/out: Actual write size initialized
+ ulint* write_size, /*!< in/out: Actual write size initialized
after fist successfull trim
operation for this page and if
initialized we do not trim again if
actual page size does not decrease. */
+ lsn_t lsn) /* lsn of the newest modification */
__attribute__((nonnull(8)));
/**********************************************************************//**
Waits for an aio operation to complete. This function is used to write the
@@ -1231,6 +1249,13 @@ fil_user_tablespace_restore_page(
ulint page_no); /* in: page_no to obtain from double
write buffer */
+/*******************************************************************//**
+Return space flags */
+ulint
+fil_space_flags(
+/*===========*/
+ fil_space_t* space); /*!< in: space */
+
#endif /* !UNIV_INNOCHECKSUM */
/****************************************************************//**
@@ -1257,6 +1282,277 @@ char*
fil_space_name(
/*===========*/
fil_space_t* space); /*!< in: space */
+
+/******************************************************************
+Get id of first tablespace or ULINT_UNDEFINED if none */
+UNIV_INTERN
+ulint
+fil_get_first_space();
+
+/******************************************************************
+Get id of next tablespace or ULINT_UNDEFINED if none */
+UNIV_INTERN
+ulint
+fil_get_next_space(
+ ulint id); /*!< in: space id */
+
+/*********************************************************************
+Init global resources needed for tablespace encryption/decryption */
+void
+fil_space_crypt_init();
+
+/*********************************************************************
+Cleanup global resources needed for tablespace encryption/decryption */
+void
+fil_space_crypt_cleanup();
+
+/*********************************************************************
+Create crypt data, i.e data that is used for a single tablespace */
+fil_space_crypt_t *
+fil_space_create_crypt_data();
+
+/*********************************************************************
+Destroy crypt data */
+UNIV_INTERN
+void
+fil_space_destroy_crypt_data(
+/*=========================*/
+ fil_space_crypt_t **crypt_data); /*!< in/out: crypt data */
+
+/*********************************************************************
+Get crypt data for a space*/
+fil_space_crypt_t *
+fil_space_get_crypt_data(
+/*======================*/
+ ulint space); /*!< in: tablespace id */
+
+/*********************************************************************
+Set crypt data for a space*/
+void
+fil_space_set_crypt_data(
+/*======================*/
+ ulint space, /*!< in: tablespace id */
+ fil_space_crypt_t* crypt_data); /*!< in: crypt data */
+
+/*********************************************************************
+Compare crypt data*/
+int
+fil_space_crypt_compare(
+/*======================*/
+ const fil_space_crypt_t* crypt_data1, /*!< in: crypt data */
+ const fil_space_crypt_t* crypt_data2); /*!< in: crypt data */
+
+/*********************************************************************
+Read crypt data from buffer page */
+fil_space_crypt_t *
+fil_space_read_crypt_data(
+/*======================*/
+ ulint space, /*!< in: tablespace id */
+ const byte* page, /*!< in: buffer page */
+ ulint offset); /*!< in: offset where crypt data is stored */
+
+/*********************************************************************
+Write crypt data to buffer page */
+void
+fil_space_write_crypt_data(
+/*=======================*/
+ ulint space, /*!< in: tablespace id */
+ byte* page, /*!< in: buffer page */
+ ulint offset, /*!< in: offset where to store data */
+ ulint maxsize, /*!< in: max space available to store crypt data in */
+ mtr_t * mtr); /*!< in: mini-transaction */
+
+/*********************************************************************
+Clear crypt data from page 0 (used for import tablespace) */
+void
+fil_space_clear_crypt_data(
+/*======================*/
+ byte* page, /*!< in: buffer page */
+ ulint offset); /*!< in: offset where crypt data is stored */
+
+/*********************************************************************
+Parse crypt data log record */
+byte*
+fil_parse_write_crypt_data(
+/*=======================*/
+ byte* ptr, /*!< in: start of log record */
+ byte* end_ptr, /*!< in: end of log record */
+ buf_block_t*); /*!< in: buffer page to apply record to */
+
+/*********************************************************************
+Check if extra buffer shall be allocated for decrypting after read */
+UNIV_INTERN
+bool
+fil_space_check_encryption_read(
+/*==============================*/
+ ulint space); /*!< in: tablespace id */
+
+/*********************************************************************
+Check if page shall be encrypted before write */
+UNIV_INTERN
+bool
+fil_space_check_encryption_write(
+/*==============================*/
+ ulint space); /*!< in: tablespace id */
+
+/*********************************************************************
+Encrypt buffer page */
+void
+fil_space_encrypt(
+/*===============*/
+ ulint space, /*!< in: tablespace id */
+ ulint offset, /*!< in: page no */
+ lsn_t lsn, /*!< in: page lsn */
+ const byte* src_frame,/*!< in: page frame */
+ ulint size, /*!< in: size of data to encrypt */
+ byte* dst_frame, /*!< in: where to encrypt to */
+ ulint page_encryption_key); /*!< in: page encryption key id if page
+ encrypted */
+
+/*********************************************************************
+Decrypt buffer page */
+void
+fil_space_decrypt(
+/*===============*/
+ ulint space, /*!< in: tablespace id */
+ const byte* src_frame,/*!< in: page frame */
+ ulint page_size, /*!< in: size of data to encrypt */
+ byte* dst_frame); /*!< in: where to decrypt to */
+
+
+/*********************************************************************
+Decrypt buffer page
+@return true if page was encrypted */
+bool
+fil_space_decrypt(
+/*===============*/
+ fil_space_crypt_t* crypt_data, /*!< in: crypt data */
+ const byte* src_frame,/*!< in: page frame */
+ ulint page_size, /*!< in: page size */
+ byte* dst_frame); /*!< in: where to decrypt to */
+
+/*********************************************************************
+fil_space_verify_crypt_checksum
+NOTE: currently this function can only be run in single threaded mode
+as it modifies srv_checksum_algorithm (temporarily)
+@return true if page is encrypted AND OK, false otherwise */
+bool
+fil_space_verify_crypt_checksum(
+/*===============*/
+ const byte* src_frame,/*!< in: page frame */
+ ulint zip_size); /*!< in: size of data to encrypt */
+
+/*********************************************************************
+Init threads for key rotation */
+void
+fil_crypt_threads_init();
+
+/*********************************************************************
+Set thread count (e.g start or stops threads) used for key rotation */
+void
+fil_crypt_set_thread_cnt(
+/*=====================*/
+ uint new_cnt); /*!< in: requested #threads */
+
+/*********************************************************************
+End threads for key rotation */
+void
+fil_crypt_threads_end();
+
+/*********************************************************************
+Cleanup resources for threads for key rotation */
+void
+fil_crypt_threads_cleanup();
+
+/*********************************************************************
+Set rotate key age */
+void
+fil_crypt_set_rotate_key_age(
+/*=====================*/
+ uint rotate_age); /*!< in: requested rotate age */
+
+/*********************************************************************
+Set rotation threads iops */
+void
+fil_crypt_set_rotation_iops(
+/*=====================*/
+ uint iops); /*!< in: requested iops */
+
+/*********************************************************************
+Mark a space as closing */
+UNIV_INTERN
+void
+fil_space_crypt_mark_space_closing(
+/*===============*/
+ ulint space); /*!< in: tablespace id */
+
+/*********************************************************************
+Wait for crypt threads to stop accessing space */
+UNIV_INTERN
+void
+fil_space_crypt_close_tablespace(
+/*===============*/
+ ulint space); /*!< in: tablespace id */
+
+/** Struct for retreiving info about encryption */
+struct fil_space_crypt_status_t {
+ ulint space; /*!< tablespace id */
+ ulint scheme; /*!< encryption scheme */
+ uint min_key_version; /*!< min key version */
+ uint current_key_version;/*!< current key version */
+ uint keyserver_requests;/*!< no of key requests to key server */
+ bool rotating; /*!< is key rotation ongoing */
+ bool flushing; /*!< is flush at end of rotation ongoing */
+ ulint rotate_next_page_number; /*!< next page if key rotating */
+ ulint rotate_max_page_number; /*!< max page if key rotating */
+};
+
+/*********************************************************************
+Get crypt status for a space
+@return 0 if crypt data found */
+int
+fil_space_crypt_get_status(
+/*==================*/
+ ulint id, /*!< in: space id */
+ struct fil_space_crypt_status_t * status); /*!< out: status */
+
+/** Struct for retreiving statistics about encryption key rotation */
+struct fil_crypt_stat_t {
+ ulint pages_read_from_cache;
+ ulint pages_read_from_disk;
+ ulint pages_modified;
+ ulint pages_flushed;
+ ulint estimated_iops;
+};
+
+/*********************************************************************
+Get crypt rotation statistics */
+void
+fil_crypt_total_stat(
+/*==================*/
+ fil_crypt_stat_t* stat); /*!< out: crypt stat */
+
+/** Struct for retreiving info about scrubbing */
+struct fil_space_scrub_status_t {
+ ulint space; /*!< tablespace id */
+ bool compressed; /*!< is space compressed */
+ time_t last_scrub_completed; /*!< when was last scrub completed */
+ bool scrubbing; /*!< is scrubbing ongoing */
+ time_t current_scrub_started; /*!< when started current scrubbing */
+ ulint current_scrub_active_threads; /*!< current scrub active threads */
+ ulint current_scrub_page_number; /*!< current scrub page no */
+ ulint current_scrub_max_page_number; /*!< current scrub max page no */
+};
+
+/*********************************************************************
+Get scrub status for a space
+@return 0 if no scrub info found */
+int
+fil_space_get_scrub_status(
+/*==================*/
+ ulint id, /*!< in: space id */
+ struct fil_space_scrub_status_t * status); /*!< out: status */
+
#endif
/*******************************************************************//**
diff --git a/storage/innobase/include/fil0pageencryption.h b/storage/innobase/include/fil0pageencryption.h
new file mode 100644
index 00000000000..9769f8c1912
--- /dev/null
+++ b/storage/innobase/include/fil0pageencryption.h
@@ -0,0 +1,76 @@
+/*****************************************************************************
+
+Copyright (C) 2014 eperi GmbH. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*****************************************************************************/
+
+#ifndef fil0pageencryption_h
+#define fil0pageencryption_h
+
+#define PAGE_ENCRYPTION_WRONG_KEY 1
+#define PAGE_ENCRYPTION_WRONG_PAGE_TYPE 2
+#define PAGE_ENCRYPTION_ERROR 3
+#define PAGE_ENCRYPTION_KEY_MISSING 4
+#define PAGE_ENCRYPTION_OK 0
+#define PAGE_ENCRYPTION_WILL_NOT_ENCRYPT 5
+
+#include "fsp0fsp.h"
+#include "fsp0pageencryption.h"
+
+/******************************************************************//**
+@file include/fil0pageencryption.h
+Helper functions for encryption/decryption page data on to table space.
+
+Created 08/25/2014
+***********************************************************************/
+
+
+/******************************PAGE_ENCRYPTION_ERROR*************************************//**
+Returns the page encryption flag of the space, or false if the space
+is not encrypted. The tablespace must be cached in the memory cache.
+@return true if page encrypted, false if not or space not found */
+ibool
+fil_space_is_page_encrypted(
+/*=========================*/
+ ulint id); /*!< in: space id */
+
+/*******************************************************************//**
+Find out whether the page is page encrypted
+@return true if page is page encrypted, false if not */
+UNIV_INLINE
+ibool
+fil_page_is_encrypted(
+/*==================*/
+ const byte *buf); /*!< in: page */
+
+/*******************************************************************//**
+Find out whether the page is page compressed and then encrypted
+@return true if page is page compressed+encrypted, false if not */
+UNIV_INLINE
+ibool
+fil_page_is_compressed_encrypted(
+/*=============================*/
+ const byte *buf); /*!< in: page */
+
+/*******************************************************************//**
+Find out whether the page can be decrypted
+@return true if page can be decrypted, false if not. */
+UNIV_INLINE
+ulint
+fil_page_encryption_status(
+/*===================*/
+ const byte *buf); /*!< in: page */
+
+#endif // fil0pageencryption_h
diff --git a/storage/innobase/include/fsp0fsp.h b/storage/innobase/include/fsp0fsp.h
index 87f1f5a636d..96e638fcdd9 100644
--- a/storage/innobase/include/fsp0fsp.h
+++ b/storage/innobase/include/fsp0fsp.h
@@ -57,6 +57,11 @@ is found in a remote location, not the default data directory. */
/** Number of flag bits used to indicate the page compression and compression level */
#define FSP_FLAGS_WIDTH_PAGE_COMPRESSION 1
#define FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL 4
+
+/** Number of flag bits used to indicate the page compression and compression level */
+#define FSP_FLAGS_WIDTH_PAGE_ENCRYPTION 1
+#define FSP_FLAGS_WIDTH_PAGE_ENCRYPTION_KEY 8
+
/** Number of flag bits used to indicate atomic writes for this tablespace */
#define FSP_FLAGS_WIDTH_ATOMIC_WRITES 2
@@ -68,7 +73,9 @@ is found in a remote location, not the default data directory. */
+ FSP_FLAGS_WIDTH_DATA_DIR \
+ FSP_FLAGS_WIDTH_PAGE_COMPRESSION \
+ FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL \
- + FSP_FLAGS_WIDTH_ATOMIC_WRITES)
+ + FSP_FLAGS_WIDTH_ATOMIC_WRITES \
+ + FSP_FLAGS_WIDTH_PAGE_ENCRYPTION \
+ + FSP_FLAGS_WIDTH_PAGE_ENCRYPTION_KEY)
/** A mask of all the known/used bits in tablespace flags */
#define FSP_FLAGS_MASK (~(~0 << FSP_FLAGS_WIDTH))
@@ -92,9 +99,15 @@ dictionary */
/** Zero relative shift position of the ATOMIC_WRITES field */
#define FSP_FLAGS_POS_ATOMIC_WRITES (FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL \
+ FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL)
- /** Zero relative shift position of the PAGE_SSIZE field */
-#define FSP_FLAGS_POS_PAGE_SSIZE (FSP_FLAGS_POS_ATOMIC_WRITES \
- + FSP_FLAGS_WIDTH_ATOMIC_WRITES)
+/** Zero relative shift position of the PAGE_ENCRYPTION field */
+#define FSP_FLAGS_POS_PAGE_ENCRYPTION (FSP_FLAGS_POS_ATOMIC_WRITES \
+ + FSP_FLAGS_WIDTH_ATOMIC_WRITES)
+/** Zero relative shift position of the PAGE_ENCRYPTION_KEY field */
+#define FSP_FLAGS_POS_PAGE_ENCRYPTION_KEY (FSP_FLAGS_POS_PAGE_ENCRYPTION \
+ + FSP_FLAGS_WIDTH_PAGE_ENCRYPTION)
+/** Zero relative shift position of the PAGE_SSIZE field */
+#define FSP_FLAGS_POS_PAGE_SSIZE (FSP_FLAGS_POS_PAGE_ENCRYPTION_KEY \
+ + FSP_FLAGS_WIDTH_PAGE_ENCRYPTION_KEY)
/** Zero relative shift position of the start of the UNUSED bits */
#define FSP_FLAGS_POS_DATA_DIR (FSP_FLAGS_POS_PAGE_SSIZE \
+ FSP_FLAGS_WIDTH_PAGE_SSIZE)
@@ -130,11 +143,18 @@ dictionary */
#define FSP_FLAGS_MASK_PAGE_COMPRESSION_LEVEL \
((~(~0 << FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL)) \
<< FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL)
+/** Bit mask of the PAGE_ENCRYPTION field */
+#define FSP_FLAGS_MASK_PAGE_ENCRYPTION \
+ ((~(~0 << FSP_FLAGS_WIDTH_PAGE_ENCRYPTION)) \
+ << FSP_FLAGS_POS_PAGE_ENCRYPTION)
+/** Bit mask of the PAGE_ENCRYPTION_KEY field */
+#define FSP_FLAGS_MASK_PAGE_ENCRYPTION_KEY \
+ ((~(~0 << FSP_FLAGS_WIDTH_PAGE_ENCRYPTION_KEY)) \
+ << FSP_FLAGS_POS_PAGE_ENCRYPTION_KEY)
/** Bit mask of the ATOMIC_WRITES field */
#define FSP_FLAGS_MASK_ATOMIC_WRITES \
((~(~0 << FSP_FLAGS_WIDTH_ATOMIC_WRITES)) \
<< FSP_FLAGS_POS_ATOMIC_WRITES)
-
/** Return the value of the POST_ANTELOPE field */
#define FSP_FLAGS_GET_POST_ANTELOPE(flags) \
((flags & FSP_FLAGS_MASK_POST_ANTELOPE) \
@@ -171,6 +191,14 @@ dictionary */
#define FSP_FLAGS_GET_ATOMIC_WRITES(flags) \
((flags & FSP_FLAGS_MASK_ATOMIC_WRITES) \
>> FSP_FLAGS_POS_ATOMIC_WRITES)
+/** Return the value of the PAGE_ENCRYPTION field */
+#define FSP_FLAGS_GET_PAGE_ENCRYPTION(flags) \
+ ((flags & FSP_FLAGS_MASK_PAGE_ENCRYPTION) \
+ >> FSP_FLAGS_POS_PAGE_ENCRYPTION)
+/** Return the value of the PAGE_ENCRYPTION_KEY field */
+#define FSP_FLAGS_GET_PAGE_ENCRYPTION_KEY(flags) \
+ ((flags & FSP_FLAGS_MASK_PAGE_ENCRYPTION_KEY) \
+ >> FSP_FLAGS_POS_PAGE_ENCRYPTION_KEY)
/** Set a PAGE_SSIZE into the correct bits in a given
tablespace flags. */
@@ -186,6 +214,14 @@ tablespace flags. */
tablespace flags. */
#define FSP_FLAGS_SET_PAGE_COMPRESSION_LEVEL(flags, level) \
(flags | (level << FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL))
+
+/** Set a PAGE_ENCRYPTION into the correct bits in a given tablespace flags. */
+#define FSP_FLAGS_SET_PAGE_ENCRYPTION(flags, encryption) \
+ (flags | (encryption << FSP_FLAGS_POS_PAGE_ENCRYPTION))
+/** Set a PAGE_ENCRYPTION_KEY into the correct bits in a given tablespace flags. */
+#define FSP_FLAGS_SET_PAGE_ENCRYPTION_KEY(flags, encryption_key) \
+ (flags | (encryption_key << FSP_FLAGS_POS_PAGE_ENCRYPTION_KEY))
+
/** Set a ATOMIC_WRITES into the correct bits in a given
tablespace flags. */
#define FSP_FLAGS_SET_ATOMIC_WRITES(flags, atomics) \
@@ -800,6 +836,33 @@ fsp_flags_get_page_size(
/*====================*/
ulint flags); /*!< in: tablespace flags */
+/*********************************************************************/
+/* @return offset into fsp header where crypt data is stored */
+UNIV_INTERN
+ulint
+fsp_header_get_crypt_offset(
+/*========================*/
+ ulint zip_size, /*!< in: zip_size */
+ ulint* max_size); /*!< out: free space after offset */
+
+#define fsp_page_is_free(space,page,mtr) \
+ fsp_page_is_free_func(space,page,mtr, __FILE__, __LINE__)
+
+#ifndef UNIV_INNOCHECKSUM
+/**********************************************************************//**
+Checks if a single page is free.
+@return true if free */
+UNIV_INTERN
+bool
+fsp_page_is_free_func(
+/*==============*/
+ ulint space, /*!< in: space id */
+ ulint page, /*!< in: page offset */
+ mtr_t* mtr, /*!< in/out: mini-transaction */
+ const char *file,
+ ulint line);
+#endif
+
#ifndef UNIV_NONINL
#include "fsp0fsp.ic"
#endif
diff --git a/storage/innobase/include/fsp0pagecompress.ic b/storage/innobase/include/fsp0pagecompress.ic
index 3e59106b05d..4d4ee1c376a 100644
--- a/storage/innobase/include/fsp0pagecompress.ic
+++ b/storage/innobase/include/fsp0pagecompress.ic
@@ -193,5 +193,5 @@ fil_page_is_lzo_compressed(
byte *buf) /*!< in: page */
{
return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED &&
- mach_read_from_8(buf+FIL_PAGE_FILE_FLUSH_LSN) == PAGE_LZO_ALGORITHM);
+ mach_read_from_8(buf+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION) == PAGE_LZO_ALGORITHM);
}
diff --git a/storage/innobase/include/fsp0pageencryption.h b/storage/innobase/include/fsp0pageencryption.h
new file mode 100644
index 00000000000..52365c8e93c
--- /dev/null
+++ b/storage/innobase/include/fsp0pageencryption.h
@@ -0,0 +1,66 @@
+/*****************************************************************************
+
+ Copyright (C) 2014 eperi GmbH. All Rights Reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
+
+/******************************************************************/
+
+/******************************************************************//**
+@file include/fsp0pageencryption.h
+Helper functions for extracting/storing page encryption information to file space.
+
+Created 08/28/2014
+***********************************************************************/
+
+#ifndef FSP0PAGEENCRYPTION_H_
+#define FSP0PAGEENCRYPTION_H_
+
+
+#define FIL_PAGE_ENCRYPTION_AES_128 16 /*!< Encryption algorithm AES-128. */
+#define FIL_PAGE_ENCRYPTION_AES_196 24 /*!< Encryption algorithm AES-196. */
+#define FIL_PAGE_ENCRYPTION_AES_256 32 /*!< Encryption algorithm AES-256. */
+
+#define FIL_PAGE_ENCRYPTED_SIZE 2 /*!< Number of bytes used to store
+ actual payload data size on encrypted pages. */
+
+/********************************************************************//**
+Determine if the tablespace is page encrypted from dict_table_t::flags.
+@return TRUE if page encrypted, FALSE if not page encrypted */
+UNIV_INLINE
+ibool
+fsp_flags_is_page_encrypted(
+/*=========================*/
+ ulint flags); /*!< in: tablespace flags */
+
+
+/********************************************************************//**
+Extract the page encryption key from tablespace flags.
+A tablespace has only one physical page encryption key
+whether that page is encrypted or not.
+@return page encryption key of the file-per-table tablespace,
+or zero if the table is not encrypted. */
+UNIV_INLINE
+ulint
+fsp_flags_get_page_encryption_key(
+/*=================================*/
+ ulint flags); /*!< in: tablespace flags */
+
+
+#ifndef UNIV_NONINL
+#include "fsp0pageencryption.ic"
+#endif
+
+
+#endif /* FSP0PAGEENCRYPTION_H_ */
diff --git a/storage/innobase/include/fsp0pageencryption.ic b/storage/innobase/include/fsp0pageencryption.ic
new file mode 100644
index 00000000000..42c980b0430
--- /dev/null
+++ b/storage/innobase/include/fsp0pageencryption.ic
@@ -0,0 +1,168 @@
+/*****************************************************************************
+
+ Copyright (C) 2014 eperi GmbH. All Rights Reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
+
+/******************************************************************//**
+@file include/fsp0pageencryption.ic
+Implementation for helper functions for encrypting/decrypting pages
+and atomic writes information to file space.
+
+Created 08/28/2014
+***********************************************************************/
+
+#include "fsp0fsp.h"
+#include "fil0pageencryption.h"
+#include <my_crypt_key_management.h>
+
+
+/********************************************************************//**
+Determine if the tablespace is page encrypted from dict_table_t::flags.
+@return TRUE if page encrypted, FALSE if not page encrypted */
+UNIV_INLINE
+ibool
+fsp_flags_is_page_encrypted(
+/*=========================*/
+ ulint flags) /*!< in: tablespace flags */
+{
+ return(FSP_FLAGS_GET_PAGE_ENCRYPTION(flags));
+}
+
+/********************************************************************//**
+Extract the page encryption key from tablespace flags.
+A tablespace has only one physical page encryption key
+whether that page is encrypted or not.
+@return page encryption key of the file-per-table tablespace,
+or zero if the table is not encrypted. */
+UNIV_INLINE
+ulint
+fsp_flags_get_page_encryption_key(
+/*=================================*/
+ ulint flags) /*!< in: tablespace flags */
+{
+ return(FSP_FLAGS_GET_PAGE_ENCRYPTION_KEY(flags));
+}
+
+
+/*******************************************************************//**
+Returns the page encryption flag of the space, or false if the space
+is not encrypted. The tablespace must be cached in the memory cache.
+@return true if page encrypted, false if not or space not found */
+UNIV_INLINE
+ibool
+fil_space_is_page_encrypted(
+/*=========================*/
+ ulint id) /*!< in: space id */
+{
+ ulint flags;
+
+ flags = fil_space_get_flags(id);
+
+ if (flags && flags != ULINT_UNDEFINED) {
+
+ return(fsp_flags_is_page_encrypted(flags));
+ }
+
+ return(flags);
+}
+
+/*******************************************************************//**
+Returns the page encryption key of the space, or 0 if the space
+is not encrypted. The tablespace must be cached in the memory cache.
+@return page compression level, ULINT_UNDEFINED if space not found */
+UNIV_INLINE
+ulint
+fil_space_get_page_encryption_key(
+/*=================================*/
+ ulint id) /*!< in: space id */
+{
+ ulint flags;
+
+ flags = fil_space_get_flags(id);
+
+ if (flags && flags != ULINT_UNDEFINED) {
+
+ return(fsp_flags_get_page_encryption_key(flags));
+ }
+
+ return(flags);
+}
+
+/*******************************************************************//**
+Find out whether the page is page is encrypted
+@return true if page is page encrypted, false if not */
+UNIV_INLINE
+ibool
+fil_page_is_encrypted(
+/*==================*/
+ const byte *buf) /*!< in: page */
+{
+ return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_ENCRYPTED);
+}
+
+/*******************************************************************//**
+Find out whether the page is page is first compressed and then encrypted
+@return true if page is page compressed+encrypted, false if not */
+UNIV_INLINE
+ibool
+fil_page_is_compressed_encrypted(
+/*=============================*/
+ const byte *buf) /*!< in: page */
+{
+ return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED);
+}
+
+/*******************************************************************//**
+Find out whether the page can be decrypted.
+This is the case, if the page is already decrypted and is not the first page of the table space.
+If the page is already decrypted it is not of the FIL_PAGE_PAGE_ENCRYPTED type.
+if it is the first page of the table space, it is assumed that a page can be decrypted if the
+key found in the flags (part of the 1st page) can be read from the key provider.
+The case, if the key changed, is currently not caught.
+The function for decrypting the page should already be executed before this.
+@return PAGE_ENCRYPTION_KEY_MISSING if key provider is available, but key is not available
+ PAGE_ENCRYPTION_ERROR if other error occurred
+ 0 if decryption should be possible
+*/
+UNIV_INLINE
+ulint
+fil_page_encryption_status(
+/*=====================*/
+ const byte *buf) /*!< in: page */
+{
+ ulint page_type = mach_read_from_2(buf+FIL_PAGE_TYPE);
+
+ if (page_type == FIL_PAGE_TYPE_FSP_HDR) {
+ ulint flags = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + buf);
+ if (fsp_flags_is_page_encrypted(flags)) {
+ if (!HasCryptoKey(fsp_flags_get_page_encryption_key(flags))) {
+ /* accessing table would surely fail, because no key or no key provider available */
+ if (!HasCryptoKey(fsp_flags_get_page_encryption_key(flags))) {
+ return PAGE_ENCRYPTION_KEY_MISSING;
+ }
+ return PAGE_ENCRYPTION_ERROR;
+ }
+ }
+ }
+
+ if(page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) {
+ ulint key = mach_read_from_4(buf + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
+ if (!HasCryptoKey(key)) {
+ return PAGE_ENCRYPTION_KEY_MISSING;
+ }
+ return PAGE_ENCRYPTION_ERROR;
+ }
+ return 0;
+}
diff --git a/storage/innobase/include/fts0fts.h b/storage/innobase/include/fts0fts.h
index a2996ecacc8..f00d754ac66 100644
--- a/storage/innobase/include/fts0fts.h
+++ b/storage/innobase/include/fts0fts.h
@@ -715,6 +715,34 @@ fts_drop_index_tables(
dict_index_t* index) /*!< in: Index to drop */
__attribute__((nonnull, warn_unused_result));
+/******************************************************************
+Wait for background threads to stop using FTS index
+*/
+UNIV_INTERN
+void
+fts_wait_bg_to_stop_using_index(
+/*======================*/
+ trx_t* trx, /*!< in: transaction */
+ dict_index_t* index, /*!< in: FTS Index */
+ bool drop_table); /*!< in: in addition to stop
+ using index, also prevent
+ threads from start using it,
+ used by drop table */
+
+/******************************************************************
+Wait for background threads to stop using any FTS index of the table
+*/
+UNIV_INTERN
+void
+fts_wait_bg_to_stop_using_table(
+/*======================*/
+ trx_t* trx, /*!< in: transaction */
+ dict_table_t* table, /*!< in: table to stop threads */
+ bool drop_table); /*!< in: in addition to stop
+ using table, also prevent
+ threads from start using it,
+ used by drop table */
+
/******************************************************************//**
Remove the table from the OPTIMIZER's list. We do wait for
acknowledgement from the consumer of the message. */
diff --git a/storage/innobase/include/log0crypt.h b/storage/innobase/include/log0crypt.h
new file mode 100644
index 00000000000..188e82397a2
--- /dev/null
+++ b/storage/innobase/include/log0crypt.h
@@ -0,0 +1,85 @@
+/**************************************************//**
+@file include/log0crypt.h
+Innodb log encrypt/decrypt
+
+Created 11/25/2013 Minli Zhu
+*******************************************************/
+#ifndef log0crypt_h
+#define log0crypt_h
+
+#include "univ.i"
+#include "ut0byte.h"
+#include "ut0lst.h"
+#include "ut0rnd.h"
+#include "my_aes.h"
+#include "my_crypt_key_management.h" // for key version and key
+
+#define PURPOSE_BYTE_LEN MY_AES_BLOCK_SIZE - 1
+#define PURPOSE_BYTE_OFFSET 0
+#define UNENCRYPTED_KEY_VER 0
+
+/* If true, enable redo log encryption. */
+extern my_bool srv_encrypt_log;
+/* Plain text used by AES_ECB to generate redo log crypt key. */
+extern byte redo_log_crypt_msg[MY_AES_BLOCK_SIZE];
+/* IV to concatenate with counter used by AES_CTR for redo log crypto. */
+extern byte aes_ctr_nonce[MY_AES_BLOCK_SIZE];
+
+/*********************************************************************//**
+Generate a 128-bit random message used to generate redo log crypto key.
+Init AES-CTR iv/nonce with random number.
+It is called only when clean startup (i.e., redo logs do not exist). */
+UNIV_INTERN
+void
+log_init_crypt_msg_and_nonce(void);
+/*===============================*/
+/*********************************************************************//**
+Init log_sys redo log crypto key. */
+UNIV_INTERN
+void
+log_init_crypt_key(
+/*===============*/
+ const byte* crypt_msg, /*< in: crypt msg */
+ const uint crypt_ver, /*< in: mysqld key version */
+ byte* crypt_key); /*< out: crypt struct with key and iv */
+/*********************************************************************//**
+Encrypt log blocks. */
+UNIV_INTERN
+Crypt_result
+log_blocks_encrypt(
+/*===============*/
+ const byte* blocks, /*!< in: blocks before encryption */
+ const ulint size, /*!< in: size of blocks, must be multiple of a log block */
+ byte* dst_blocks); /*!< out: blocks after encryption */
+
+/*********************************************************************//**
+Decrypt log blocks. */
+UNIV_INTERN
+Crypt_result
+log_blocks_decrypt(
+/*===============*/
+ const byte* blocks, /*!< in: blocks before decryption */
+ const ulint size, /*!< in: size of blocks, must be multiple of a log block */
+ byte* dst_blocks); /*!< out: blocks after decryption */
+
+/*********************************************************************//**
+Set next checkpoint's key version to latest one, and generate current
+key. Key version 0 means no encryption. */
+UNIV_INTERN
+void
+log_crypt_set_ver_and_key(
+/*======================*/
+ uint& key_ver, /*!< out: latest key version */
+ byte* crypt_key); /*!< out: crypto key */
+
+/*********************************************************************//**
+Writes the crypto (version, msg and iv) info, which has been used for
+log blocks with lsn <= this checkpoint's lsn, to a log header's
+checkpoint buf. */
+UNIV_INTERN
+void
+log_crypt_write_checkpoint_buf(
+/*===========================*/
+ byte* buf); /*!< in/out: checkpoint buffer */
+
+#endif // log0crypt.h
diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h
index ad9710b1870..79667097724 100644
--- a/storage/innobase/include/log0log.h
+++ b/storage/innobase/include/log0log.h
@@ -40,9 +40,8 @@ Created 12/9/1995 Heikki Tuuri
#include "sync0sync.h"
#include "sync0rw.h"
#endif /* !UNIV_HOTBACKUP */
+#include "log0crypt.h"
-/* Type used for all log sequence number storage and arithmetics */
-typedef ib_uint64_t lsn_t;
#define LSN_MAX IB_UINT64_MAX
#define LSN_PF UINT64PF
@@ -677,8 +676,20 @@ extern log_t* log_sys;
is valid */
#endif
#define LOG_CHECKPOINT_OFFSET_HIGH32 (16 + LOG_CHECKPOINT_ARRAY_END)
-#define LOG_CHECKPOINT_SIZE (20 + LOG_CHECKPOINT_ARRAY_END)
-
+#define LOG_CRYPT_VER (20 + LOG_CHECKPOINT_ARRAY_END)
+ /*!< 32-bit key version. Corresponding
+ key has been used for log records with
+ lsn <= the checkpoint' lsn */
+#define LOG_CRYPT_MSG (24 + LOG_CHECKPOINT_ARRAY_END)
+ /*!< a 128-bit value used to
+ derive cryto key for redo log.
+ It is generated via the concatenation
+ of 1 purpose byte T (0x02) and a
+ 15-byte random number.*/
+#define LOG_CRYPT_IV (40 + LOG_CHECKPOINT_ARRAY_END)
+ /*!< a 128-bit random number used as
+ AES-CTR iv/nonce for redo log */
+#define LOG_CHECKPOINT_SIZE (56 + LOG_CHECKPOINT_ARRAY_END)
/* Offsets of a log file header */
#define LOG_GROUP_ID 0 /* log group number */
@@ -783,6 +794,10 @@ struct log_t{
lsn_t lsn; /*!< log sequence number */
ulint buf_free; /*!< first free offset within the log
buffer */
+ uint redo_log_crypt_ver;
+ /*!< 32-bit crypto ver */
+ byte redo_log_crypt_key[MY_AES_BLOCK_SIZE];
+ /*!< crypto key to encrypt redo log */
#ifndef UNIV_HOTBACKUP
ib_mutex_t mutex; /*!< mutex protecting the log */
@@ -1006,6 +1021,22 @@ struct log_t{
/* @} */
#endif /* UNIV_LOG_ARCHIVE */
+extern os_event_t log_scrub_event;
+/* log scrubbing interval in ms */
+extern ulonglong innodb_scrub_log_interval;
+
+/*****************************************************************//**
+This is the main thread for log scrub. It waits for an event and
+when waked up fills current log block with dummy records and
+sleeps again.
+@return this function does not return, it calls os_thread_exit() */
+extern "C" UNIV_INTERN
+os_thread_ret_t
+DECLARE_THREAD(log_scrub_thread)(
+/*===============================*/
+ void* arg); /*!< in: a dummy parameter
+ required by os_thread_create */
+
#ifndef UNIV_NONINL
#include "log0log.ic"
#endif
diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h
index 8ede49d4ecc..f8785faafdf 100644
--- a/storage/innobase/include/log0recv.h
+++ b/storage/innobase/include/log0recv.h
@@ -434,6 +434,11 @@ struct recv_sys_t{
scan find a corrupt log block, or a corrupt
log record, or there is a log parsing
buffer overflow */
+ uint recv_log_crypt_ver;
+ /*!< mysqld key version to generate redo
+ log crypt key for recovery */
+ byte recv_log_crypt_key[MY_AES_BLOCK_SIZE];
+ /*!< crypto key to decrypt redo log for recovery */
#ifdef UNIV_LOG_ARCHIVE
log_group_t* archive_group;
/*!< in archive recovery: the log group whose
diff --git a/storage/innobase/include/mtr0log.ic b/storage/innobase/include/mtr0log.ic
index 3ed4876eeab..6457e02d455 100644
--- a/storage/innobase/include/mtr0log.ic
+++ b/storage/innobase/include/mtr0log.ic
@@ -191,7 +191,7 @@ mlog_write_initial_log_record_fast(
ulint offset;
ut_ad(mtr_memo_contains_page(mtr, ptr, MTR_MEMO_PAGE_X_FIX));
- ut_ad(type <= MLOG_BIGGEST_TYPE);
+ ut_ad(type <= MLOG_BIGGEST_TYPE || EXTRA_CHECK_MLOG_NUMBER(type));
ut_ad(ptr && log_ptr);
page = (const byte*) ut_align_down(ptr, UNIV_PAGE_SIZE);
diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h
index b91dbd0353c..eae981f2fbb 100644
--- a/storage/innobase/include/mtr0mtr.h
+++ b/storage/innobase/include/mtr0mtr.h
@@ -189,6 +189,14 @@ For 1 - 8 bytes, the flag value must give the length also! @{ */
page */
#define MLOG_BIGGEST_TYPE ((byte)53) /*!< biggest value (used in
assertions) */
+
+#define MLOG_FILE_WRITE_CRYPT_DATA ((byte)100) /*!< log record for
+ writing/updating crypt data of
+ a tablespace */
+
+#define EXTRA_CHECK_MLOG_NUMBER(x) \
+ ((x) == MLOG_FILE_WRITE_CRYPT_DATA)
+
/* @} */
/** @name Flags for MLOG_FILE operations
@@ -251,6 +259,18 @@ mtr_release_s_latch_at_savepoint(
#else /* !UNIV_HOTBACKUP */
# define mtr_release_s_latch_at_savepoint(mtr,savepoint,lock) ((void) 0)
#endif /* !UNIV_HOTBACKUP */
+
+/**********************************************************//**
+Releases a buf_page stored in an mtr memo after a
+savepoint. */
+UNIV_INTERN
+void
+mtr_release_buf_page_at_savepoint(
+/*=============================*/
+ mtr_t* mtr, /*!< in: mtr */
+ ulint savepoint, /*!< in: savepoint */
+ buf_block_t* block); /*!< in: block to release */
+
/***************************************************************//**
Gets the logging mode of a mini-transaction.
@return logging mode: MTR_LOG_NONE, ... */
diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
index 8f8aef4f45c..e2d0cf26682 100644
--- a/storage/innobase/include/os0file.h
+++ b/storage/innobase/include/os0file.h
@@ -313,10 +313,14 @@ The wrapper functions have the prefix of "innodb_". */
# define os_aio(type, mode, name, file, buf, offset, \
n, message1, message2, write_size, \
- page_compression, page_compression_level) \
+ page_compression, page_compression_level, \
+ page_encryption, page_encryption_key, lsn) \
pfs_os_aio_func(type, mode, name, file, buf, offset, \
n, message1, message2, write_size, \
- page_compression, page_compression_level, __FILE__, __LINE__)
+ page_compression, page_compression_level, \
+ page_encryption, page_encryption_key, \
+ lsn, __FILE__, __LINE__)
+
# define os_file_read(file, buf, offset, n, compressed) \
pfs_os_file_read_func(file, buf, offset, n, compressed, __FILE__, __LINE__)
@@ -357,9 +361,13 @@ to original un-instrumented file I/O APIs */
# define os_file_close(file) os_file_close_func(file)
-# define os_aio(type, mode, name, file, buf, offset, n, message1, message2, write_size, page_compression, page_compression_level) \
+# define os_aio(type, mode, name, file, buf, offset, n, message1, \
+ message2, write_size, page_compression, page_compression_level, \
+ page_encryption, page_encryption_key, lsn) \
os_aio_func(type, mode, name, file, buf, offset, n, \
- message1, message2, write_size, page_compression, page_compression_level)
+ message1, message2, write_size, \
+ page_compression, page_compression_level, \
+ page_encryption, page_encryption_key, lsn)
# define os_file_read(file, buf, offset, n, compressed) \
os_file_read_func(file, buf, offset, n, compressed)
@@ -777,6 +785,11 @@ pfs_os_aio_func(
on this file space */
ulint page_compression_level, /*!< page compression
level to be used */
+ ibool page_encryption, /*!< in: is page encryption used
+ on this file space */
+ ulint page_encryption_key, /*!< page encryption
+ key to be used */
+ lsn_t lsn, /* lsn of the newest modification */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line);/*!< in: line where the func invoked */
/*******************************************************************//**
@@ -1153,9 +1166,13 @@ os_aio_func(
actual page size does not decrease. */
ibool page_compression, /*!< in: is page compression used
on this file space */
- ulint page_compression_level); /*!< page compression
+ ulint page_compression_level, /*!< page compression
level to be used */
-
+ ibool page_encryption, /*!< in: is page encryption used
+ on this file space */
+ ulint page_encryption_key, /*!< page encryption key
+ to be used */
+ lsn_t lsn); /* lsn of the newest modification */
/************************************************************************//**
Wakes up all async i/o threads so that they know to exit themselves in
shutdown. */
diff --git a/storage/innobase/include/os0file.ic b/storage/innobase/include/os0file.ic
index 8e1cea585e6..bbd3826d50b 100644
--- a/storage/innobase/include/os0file.ic
+++ b/storage/innobase/include/os0file.ic
@@ -224,6 +224,11 @@ pfs_os_aio_func(
on this file space */
ulint page_compression_level, /*!< page compression
level to be used */
+ ibool page_encryption, /*!< in: is page encryption used
+ on this file space */
+ ulint page_encryption_key, /*!< page encryption
+ key to be used */
+ lsn_t lsn, /* lsn of the newest modification */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line)/*!< in: line where the func invoked */
{
@@ -240,7 +245,8 @@ pfs_os_aio_func(
result = os_aio_func(type, mode, name, file, buf, offset,
n, message1, message2, write_size,
- page_compression, page_compression_level);
+ page_compression, page_compression_level,
+ page_encryption, page_encryption_key, lsn);
register_pfs_file_io_end(locker, n);
diff --git a/storage/innobase/include/page0page.h b/storage/innobase/include/page0page.h
index cb6633bb941..2b47aef8790 100644
--- a/storage/innobase/include/page0page.h
+++ b/storage/innobase/include/page0page.h
@@ -162,6 +162,8 @@ directory. */
#define PAGE_DIR_SLOT_MAX_N_OWNED 8
#define PAGE_DIR_SLOT_MIN_N_OWNED 4
+extern my_bool srv_immediate_scrub_data_uncompressed;
+
/************************************************************//**
Gets the start of a page.
@return start of the page */
diff --git a/storage/innobase/include/page0page.ic b/storage/innobase/include/page0page.ic
index 99e17001c0a..cde3cad33f0 100644
--- a/storage/innobase/include/page0page.ic
+++ b/storage/innobase/include/page0page.ic
@@ -1169,6 +1169,13 @@ page_mem_free(
ut_ad(rec_offs_validate(rec, index, offsets));
free = page_header_get_ptr(page, PAGE_FREE);
+ bool scrub = srv_immediate_scrub_data_uncompressed;
+ if (scrub) {
+ /* scrub record */
+ uint size = rec_offs_data_size(offsets);
+ memset(rec, 0, size);
+ }
+
page_rec_set_next(rec, free);
page_header_set_ptr(page, page_zip, PAGE_FREE, rec);
diff --git a/storage/innobase/include/srv0mon.h b/storage/innobase/include/srv0mon.h
index 0a47d514e1b..d1d902ef57d 100644
--- a/storage/innobase/include/srv0mon.h
+++ b/storage/innobase/include/srv0mon.h
@@ -327,6 +327,11 @@ enum monitor_id_t {
MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED,
MONITOR_OVLD_PAGES_PAGE_COMPRESSION_ERROR,
+ /* New monitor variables for page encryption */
+ MONITOR_OVLD_PAGES_PAGE_ENCRYPTED,
+ MONITOR_OVLD_PAGES_PAGE_DECRYPTED,
+ MONITOR_OVLD_PAGES_PAGE_ENCRYPTION_ERROR,
+
/* Index related counters */
MONITOR_MODULE_INDEX,
MONITOR_INDEX_SPLIT,
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
index 83c478582b2..be0112959c7 100644
--- a/storage/innobase/include/srv0srv.h
+++ b/storage/innobase/include/srv0srv.h
@@ -133,6 +133,12 @@ struct srv_stats_t {
ulint_ctr_64_t pages_page_decompressed;
/* Number of page compression errors */
ulint_ctr_64_t pages_page_compression_error;
+ /* Number of pages encrypted with page encryption */
+ ulint_ctr_64_t pages_page_encrypted;
+ /* Number of pages decrypted with page encryption */
+ ulint_ctr_64_t pages_page_decrypted;
+ /* Number of page encryption errors */
+ ulint_ctr_64_t pages_page_encryption_error;
/** Number of data read in total (in bytes) */
ulint_ctr_1_t data_read;
@@ -471,6 +477,11 @@ extern ibool srv_buf_dump_thread_active;
/* TRUE during the lifetime of the stats thread */
extern ibool srv_dict_stats_thread_active;
+/* TRUE if enable log scrubbing */
+extern my_bool srv_scrub_log;
+/* TRUE during the lifetime of the log scrub thread */
+extern ibool srv_log_scrub_thread_active;
+
extern ulong srv_n_spin_wait_rounds;
extern ulong srv_n_free_tickets_to_enter;
extern ulong srv_thread_sleep_delay;
@@ -534,6 +545,9 @@ extern my_bool srv_print_all_deadlocks;
extern my_bool srv_cmp_per_index_enabled;
+/* is encryption enabled */
+extern my_bool srv_encrypt_tables;
+
/** Status variables to be passed to MySQL */
extern struct export_var_t export_vars;
@@ -1000,9 +1014,29 @@ struct export_var_t{
compression */
ib_int64_t innodb_pages_page_compression_error;/*!< Number of page
compression errors */
+ ib_int64_t innodb_pages_page_encrypted;/*!< Number of pages
+ encrypted by page encryption */
+ ib_int64_t innodb_pages_page_decrypted;/*!< Number of pages
+ decrypted by page encryption */
+ ib_int64_t innodb_pages_page_encryption_error;/*!< Number of page
+ encryption errors */
ulint innodb_sec_rec_cluster_reads; /*!< srv_sec_rec_cluster_reads */
- ulint innodb_sec_rec_cluster_reads_avoided; /*!< srv_sec_rec_cluster_reads_avoided */
+ ulint innodb_sec_rec_cluster_reads_avoided;
+ /*!< srv_sec_rec_cluster_reads_avoided */
+
+ ulint innodb_encryption_rotation_pages_read_from_cache;
+ ulint innodb_encryption_rotation_pages_read_from_disk;
+ ulint innodb_encryption_rotation_pages_modified;
+ ulint innodb_encryption_rotation_pages_flushed;
+ ulint innodb_encryption_rotation_estimated_iops;
+
+ ulint innodb_scrub_page_reorganizations;
+ ulint innodb_scrub_page_splits;
+ ulint innodb_scrub_page_split_failures_underflow;
+ ulint innodb_scrub_page_split_failures_out_of_filespace;
+ ulint innodb_scrub_page_split_failures_missing_index;
+ ulint innodb_scrub_page_split_failures_unknown;
};
/** Thread slot in the thread table. */
diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i
index 86f03c7917f..b2f486d059e 100644
--- a/storage/innobase/include/univ.i
+++ b/storage/innobase/include/univ.i
@@ -478,6 +478,9 @@ typedef uint32_t ib_uint32_t;
# define IB_ID_FMT UINT64PF
+/* Type used for all log sequence number storage and arithmetics */
+typedef ib_uint64_t lsn_t;
+
#ifdef _WIN64
typedef unsigned __int64 ulint;
typedef __int64 lint;
diff --git a/storage/innobase/log/log0crypt.cc b/storage/innobase/log/log0crypt.cc
new file mode 100644
index 00000000000..0647fd04e84
--- /dev/null
+++ b/storage/innobase/log/log0crypt.cc
@@ -0,0 +1,267 @@
+/**************************************************//**
+@file log0crypt.cc
+Innodb log encrypt/decrypt
+
+Created 11/25/2013 Minli Zhu
+*******************************************************/
+#include "m_string.h"
+#include "log0crypt.h"
+#include <my_crypt.h>
+
+#include "log0log.h"
+#include "srv0start.h" // for srv_start_lsn
+#include "log0recv.h" // for recv_sys
+
+/* If true, enable redo log encryption. */
+UNIV_INTERN my_bool srv_encrypt_log = FALSE;
+/*
+ Sub system type for InnoDB redo log crypto.
+ Set and used to validate crypto msg.
+*/
+static const byte redo_log_purpose_byte = 0x02;
+/* Plain text used by AES_ECB to generate redo log crypt key. */
+byte redo_log_crypt_msg[MY_AES_BLOCK_SIZE] = {0};
+/* IV to concatenate with counter used by AES_CTR for redo log
+ * encryption/decryption. */
+byte aes_ctr_nonce[MY_AES_BLOCK_SIZE] = {0};
+
+/*********************************************************************//**
+Generate a 128-bit value used to generate crypt key for redo log.
+It is generated via the concatenation of 1 purpose byte (0x02) and 15-byte
+random number.
+Init AES-CTR iv/nonce with random number.
+It is called when:
+- redo logs do not exist when start up, or
+- transition from without crypto.
+Note:
+We should not use flags and conditions such as:
+ (srv_encrypt_log &&
+ debug_use_static_keys &&
+ GetLatestCryptoKeyVersion() == UNENCRYPTED_KEY_VER)
+because they haven't been read and set yet in the situation of resetting
+redo logs.
+*/
+UNIV_INTERN
+void
+log_init_crypt_msg_and_nonce(void)
+/*==============================*/
+{
+ mach_write_to_1(redo_log_crypt_msg, redo_log_purpose_byte);
+ if (my_random_bytes(redo_log_crypt_msg + 1, PURPOSE_BYTE_LEN) != AES_OK)
+ {
+ fprintf(stderr,
+ "\nInnodb redo log crypto: generate "
+ "%u-byte random number as crypto msg failed.\n",
+ PURPOSE_BYTE_LEN);
+ abort();
+ }
+
+ if (my_random_bytes(aes_ctr_nonce, MY_AES_BLOCK_SIZE) != AES_OK)
+ {
+ fprintf(stderr,
+ "\nInnodb redo log crypto: generate "
+ "%u-byte random number as AES_CTR nonce failed.\n",
+ MY_AES_BLOCK_SIZE);
+ abort();
+ }
+}
+
+/*********************************************************************//**
+Generate crypt key from crypt msg. */
+UNIV_INTERN
+void
+log_init_crypt_key(
+/*===============*/
+ const byte* crypt_msg, /*< in: crypt msg */
+ const uint crypt_ver, /*< in: key version */
+ byte* key) /*< out: crypt key*/
+{
+ if (crypt_ver == UNENCRYPTED_KEY_VER)
+ {
+ fprintf(stderr, "\nInnodb redo log crypto: unencrypted key ver.\n\n");
+ memset(key, 0, MY_AES_BLOCK_SIZE);
+ return;
+ }
+
+ if (crypt_msg[PURPOSE_BYTE_OFFSET] != redo_log_purpose_byte)
+ {
+ fprintf(stderr,
+ "\nInnodb redo log crypto: msg type mismatched. "
+ "Expected: %x; Actual: %x\n",
+ redo_log_purpose_byte, crypt_msg[PURPOSE_BYTE_OFFSET]);
+ abort();
+ }
+
+ byte mysqld_key[MY_AES_BLOCK_SIZE] = {0};
+ if (GetCryptoKey(crypt_ver, mysqld_key, MY_AES_BLOCK_SIZE))
+ {
+ fprintf(stderr,
+ "\nInnodb redo log crypto: getting mysqld crypto key "
+ "from key version failed.\n");
+ abort();
+ }
+
+ uint32 dst_len;
+ my_aes_encrypt_dynamic_type func= get_aes_encrypt_func(MY_AES_ALGORITHM_ECB);
+ int rc= (*func)(crypt_msg, MY_AES_BLOCK_SIZE, //src, srclen
+ key, &dst_len, //dst, &dstlen
+ (unsigned char*)&mysqld_key, sizeof(mysqld_key),
+ NULL, 0,
+ 1);
+
+ if (rc != AES_OK || dst_len != MY_AES_BLOCK_SIZE)
+ {
+ fprintf(stderr,
+ "\nInnodb redo log crypto: getting redo log crypto key "
+ "failed.\n");
+ abort();
+ }
+}
+
+/*********************************************************************//**
+Get a log block's start lsn.
+@return a log block's start lsn */
+static inline
+lsn_t
+log_block_get_start_lsn(
+/*====================*/
+ lsn_t lsn, /*!< in: checkpoint lsn */
+ ulint log_block_no) /*!< in: log block number */
+{
+ lsn_t start_lsn =
+ (lsn & (lsn_t)0xffffffff00000000ULL) |
+ (((log_block_no - 1) & (lsn_t)0x3fffffff) << 9);
+ return start_lsn;
+}
+
+/*********************************************************************//**
+Call AES CTR to encrypt/decrypt log blocks. */
+static
+Crypt_result
+log_blocks_crypt(
+/*=============*/
+ const byte* block, /*!< in: blocks before encrypt/decrypt*/
+ const ulint size, /*!< in: size of block, must be multiple of a log block*/
+ byte* dst_block, /*!< out: blocks after encrypt/decrypt */
+ const bool is_encrypt) /*!< in: encrypt or decrypt*/
+{
+ byte *log_block = (byte*)block;
+ Crypt_result rc = AES_OK;
+ uint32 src_len, dst_len;
+ byte aes_ctr_counter[MY_AES_BLOCK_SIZE];
+ ulint log_block_no, log_block_start_lsn;
+ byte *key;
+ ulint lsn;
+ if (is_encrypt)
+ {
+ ut_a(log_sys && log_sys->redo_log_crypt_ver != UNENCRYPTED_KEY_VER);
+ key = (byte *)(log_sys->redo_log_crypt_key);
+ lsn = log_sys->lsn;
+
+ } else {
+ ut_a(recv_sys && recv_sys->recv_log_crypt_ver != UNENCRYPTED_KEY_VER);
+ key = (byte *)(recv_sys->recv_log_crypt_key);
+ lsn = srv_start_lsn;
+ }
+ ut_a(size % OS_FILE_LOG_BLOCK_SIZE == 0);
+ src_len = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE;
+ for (ulint i = 0; i < size ; i += OS_FILE_LOG_BLOCK_SIZE)
+ {
+ log_block_no = log_block_get_hdr_no(log_block);
+ log_block_start_lsn = log_block_get_start_lsn(lsn, log_block_no);
+
+ // Assume log block header is not encrypted
+ memcpy(dst_block, log_block, LOG_BLOCK_HDR_SIZE);
+
+ // aes_ctr_counter = nonce(3-byte) + start lsn to a log block
+ // (8-byte) + lbn (4-byte) + abn
+ // (1-byte, only 5 bits are used). "+" means concatenate.
+ bzero(aes_ctr_counter, MY_AES_BLOCK_SIZE);
+ memcpy(aes_ctr_counter, &aes_ctr_nonce, 3);
+ mach_write_to_8(aes_ctr_counter + 3, log_block_start_lsn);
+ mach_write_to_4(aes_ctr_counter + 11, log_block_no);
+ bzero(aes_ctr_counter + 15, 1);
+
+ int rc = (* my_aes_encrypt_dynamic)(log_block + LOG_BLOCK_HDR_SIZE, src_len,
+ dst_block + LOG_BLOCK_HDR_SIZE, &dst_len,
+ (unsigned char*)key, 16,
+ aes_ctr_counter, MY_AES_BLOCK_SIZE,
+ 1);
+
+ ut_a(rc == AES_OK);
+ ut_a(dst_len == src_len);
+ log_block += OS_FILE_LOG_BLOCK_SIZE;
+ dst_block += OS_FILE_LOG_BLOCK_SIZE;
+ }
+
+ return rc;
+}
+
+/*********************************************************************//**
+Encrypt log blocks. */
+UNIV_INTERN
+Crypt_result
+log_blocks_encrypt(
+/*===============*/
+ const byte* block, /*!< in: blocks before encryption */
+ const ulint size, /*!< in: size of blocks, must be multiple of a log block */
+ byte* dst_block) /*!< out: blocks after encryption */
+{
+ return log_blocks_crypt(block, size, dst_block, true);
+}
+
+/*********************************************************************//**
+Decrypt log blocks. */
+UNIV_INTERN
+Crypt_result
+log_blocks_decrypt(
+/*===============*/
+ const byte* block, /*!< in: blocks before decryption */
+ const ulint size, /*!< in: size of blocks, must be multiple of a log block */
+ byte* dst_block) /*!< out: blocks after decryption */
+{
+ return log_blocks_crypt(block, size, dst_block, false);
+}
+
+/*********************************************************************//**
+Set next checkpoint's key version to latest one, and generate current
+key. Key version 0 means no encryption. */
+UNIV_INTERN
+void
+log_crypt_set_ver_and_key(
+/*======================*/
+ uint& key_ver, /*!< out: latest key version */
+ byte* crypt_key) /*!< out: crypto key */
+{
+ if (!srv_encrypt_log ||
+ (key_ver = GetLatestCryptoKeyVersion()) == UNENCRYPTED_KEY_VER)
+ {
+ key_ver = UNENCRYPTED_KEY_VER;
+ memset(crypt_key, 0, MY_AES_BLOCK_SIZE);
+ return;
+ }
+ log_init_crypt_key(redo_log_crypt_msg, key_ver, crypt_key);
+}
+
+/*********************************************************************//**
+Writes the crypto (version, msg and iv) info, which has been used for
+log blocks with lsn <= this checkpoint's lsn, to a log header's
+checkpoint buf. */
+UNIV_INTERN
+void
+log_crypt_write_checkpoint_buf(
+/*===========================*/
+ byte* buf) /*!< in/out: checkpoint buffer */
+{
+ ut_a(log_sys);
+ mach_write_to_4(buf + LOG_CRYPT_VER, log_sys->redo_log_crypt_ver);
+ if (!srv_encrypt_log ||
+ log_sys->redo_log_crypt_ver == UNENCRYPTED_KEY_VER) {
+ memset(buf + LOG_CRYPT_MSG, 0, MY_AES_BLOCK_SIZE);
+ memset(buf + LOG_CRYPT_IV, 0, MY_AES_BLOCK_SIZE);
+ return;
+ }
+ ut_a(redo_log_crypt_msg[PURPOSE_BYTE_OFFSET] == redo_log_purpose_byte);
+ memcpy(buf + LOG_CRYPT_MSG, redo_log_crypt_msg, MY_AES_BLOCK_SIZE);
+ memcpy(buf + LOG_CRYPT_IV, aes_ctr_nonce, MY_AES_BLOCK_SIZE);
+}
diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc
index d0f17a43cf3..ba05987dfbe 100644
--- a/storage/innobase/log/log0log.cc
+++ b/storage/innobase/log/log0log.cc
@@ -81,6 +81,10 @@ reduce the size of the log.
/* Global log system variable */
UNIV_INTERN log_t* log_sys = NULL;
+/* Next log block number to do dummy record filling if no log records written
+for a while */
+static ulint next_lbn_to_pad = 0;
+
#ifdef UNIV_PFS_RWLOCK
UNIV_INTERN mysql_pfs_key_t checkpoint_lock_key;
# ifdef UNIV_LOG_ARCHIVE
@@ -532,10 +536,9 @@ function_exit:
return(lsn);
}
-#ifdef UNIV_LOG_ARCHIVE
/******************************************************//**
Pads the current log block full with dummy log records. Used in producing
-consistent archived log files. */
+consistent archived log files and scrubbing redo log. */
static
void
log_pad_current_log_block(void)
@@ -564,7 +567,6 @@ log_pad_current_log_block(void)
ut_a(lsn % OS_FILE_LOG_BLOCK_SIZE == LOG_BLOCK_HDR_SIZE);
}
-#endif /* UNIV_LOG_ARCHIVE */
/******************************************************//**
Calculates the data capacity of a log group, when the log file headers are not
@@ -900,6 +902,7 @@ log_init(void)
/*----------------------------*/
log_sys->next_checkpoint_no = 0;
+ log_sys->redo_log_crypt_ver = UNENCRYPTED_KEY_VER;
log_sys->last_checkpoint_lsn = log_sys->lsn;
log_sys->n_pending_checkpoint_writes = 0;
@@ -945,7 +948,7 @@ log_init(void)
log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE);
log_sys->buf_free = LOG_BLOCK_HDR_SIZE;
- log_sys->lsn = LOG_START_LSN + LOG_BLOCK_HDR_SIZE;
+ log_sys->lsn = LOG_START_LSN + LOG_BLOCK_HDR_SIZE; // TODO(minliz): ensure various LOG_START_LSN?
MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
log_sys->lsn - log_sys->last_checkpoint_lsn);
@@ -1273,7 +1276,7 @@ log_group_file_header_flush(
(ulint) (dest_offset / UNIV_PAGE_SIZE),
(ulint) (dest_offset % UNIV_PAGE_SIZE),
OS_FILE_LOG_BLOCK_SIZE,
- buf, group, 0);
+ buf, group, 0, 0);
srv_stats.os_log_pending_writes.dec();
}
@@ -1293,6 +1296,36 @@ log_block_store_checksum(
}
/******************************************************//**
+Encrypt one or more log block before it is flushed to disk
+@return true if encryption succeeds. */
+static
+bool
+log_group_encrypt_before_write(
+/*===========================*/
+ const log_group_t* group, /*!< in: log group to be flushed */
+ byte* block, /*!< in/out: pointer to a log block */
+ const ulint size) /*!< in: size of log blocks */
+
+{
+ Crypt_result result = AES_OK;
+
+ ut_ad(size % OS_FILE_LOG_BLOCK_SIZE == 0);
+ byte* dst_frame = (byte*)malloc(size);
+
+ //encrypt log blocks content
+ result = log_blocks_encrypt(block, size, dst_frame);
+
+ if (result == AES_OK)
+ {
+ ut_ad(block[0] == dst_frame[0]);
+ memcpy(block, dst_frame, size);
+ }
+ free(dst_frame);
+
+ return (result == AES_OK);
+}
+
+/******************************************************//**
Writes a buffer to a log file group. */
UNIV_INTERN
void
@@ -1398,10 +1431,19 @@ loop:
ut_a(next_offset / UNIV_PAGE_SIZE <= ULINT_MAX);
+ if (srv_encrypt_log &&
+ log_sys->redo_log_crypt_ver != UNENCRYPTED_KEY_VER &&
+ !log_group_encrypt_before_write(group, buf, write_len))
+ {
+ fprintf(stderr,
+ "\nInnodb redo log encryption failed.\n");
+ abort();
+ }
+
fil_io(OS_FILE_WRITE | OS_FILE_LOG, true, group->space_id, 0,
(ulint) (next_offset / UNIV_PAGE_SIZE),
(ulint) (next_offset % UNIV_PAGE_SIZE), write_len, buf,
- group, 0);
+ group, 0, 0);
srv_stats.os_log_pending_writes.dec();
@@ -1884,6 +1926,8 @@ log_group_checkpoint(
mach_write_to_8(buf + LOG_CHECKPOINT_NO, log_sys->next_checkpoint_no);
mach_write_to_8(buf + LOG_CHECKPOINT_LSN, log_sys->next_checkpoint_lsn);
+ log_crypt_write_checkpoint_buf(buf);
+
lsn_offset = log_group_calc_lsn_offset(log_sys->next_checkpoint_lsn,
group);
mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET_LOW32,
@@ -1967,7 +2011,7 @@ log_group_checkpoint(
write_offset / UNIV_PAGE_SIZE,
write_offset % UNIV_PAGE_SIZE,
OS_FILE_LOG_BLOCK_SIZE,
- buf, ((byte*) group + 1), 0);
+ buf, ((byte*) group + 1), 0, 0);
ut_ad(((ulint) group & 0x1UL) == 0);
}
@@ -2008,6 +2052,8 @@ log_reset_first_header_and_checkpoint(
mach_write_to_8(buf + LOG_CHECKPOINT_NO, 0);
mach_write_to_8(buf + LOG_CHECKPOINT_LSN, lsn);
+ log_crypt_write_checkpoint_buf(buf);
+
mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET_LOW32,
LOG_FILE_HDR_SIZE + LOG_BLOCK_HDR_SIZE);
mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET_HIGH32, 0);
@@ -2047,7 +2093,7 @@ log_group_read_checkpoint_info(
fil_io(OS_FILE_READ | OS_FILE_LOG, true, group->space_id, 0,
field / UNIV_PAGE_SIZE, field % UNIV_PAGE_SIZE,
- OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL, 0);
+ OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL, 0, 0);
}
/******************************************************//**
@@ -2146,7 +2192,6 @@ log_checkpoint(
}
log_sys->next_checkpoint_lsn = oldest_lsn;
-
#ifdef UNIV_DEBUG
if (log_debug_writes) {
fprintf(stderr, "Making checkpoint no "
@@ -2158,6 +2203,10 @@ log_checkpoint(
log_groups_write_checkpoint_info();
+ /* generate key version and key used to encrypt next log block */
+ log_crypt_set_ver_and_key(log_sys->redo_log_crypt_ver,
+ log_sys->redo_log_crypt_key);
+
MONITOR_INC(MONITOR_NUM_CHECKPOINT);
mutex_exit(&(log_sys->mutex));
@@ -2291,6 +2340,33 @@ loop:
}
/******************************************************//**
+Decrypt a specified log segment after they are read from a log file to a buffer.
+@return true if decryption succeeds. */
+static
+bool
+log_group_decrypt_after_read(
+/*==========================*/
+ const log_group_t* group, /*!< in: log group to be read from */
+ byte* frame, /*!< in/out: log segment */
+ const ulint size) /*!< in: log segment size */
+{
+ Crypt_result result;
+ ut_ad(size % OS_FILE_LOG_BLOCK_SIZE == 0);
+ byte* dst_frame = (byte*)malloc(size);
+
+ // decrypt log blocks content
+ result = log_blocks_decrypt(frame, size, dst_frame);
+
+ if (result == AES_OK)
+ {
+ memcpy(frame, dst_frame, size);
+ }
+ free(dst_frame);
+
+ return (result == AES_OK);
+}
+
+/******************************************************//**
Reads a specified log segment to a buffer. */
UNIV_INTERN
void
@@ -2341,7 +2417,14 @@ loop:
fil_io(OS_FILE_READ | OS_FILE_LOG, sync, group->space_id, 0,
(ulint) (source_offset / UNIV_PAGE_SIZE),
(ulint) (source_offset % UNIV_PAGE_SIZE),
- len, buf, NULL, 0);
+ len, buf, NULL, 0, 0);
+
+ if (recv_sys->recv_log_crypt_ver != UNENCRYPTED_KEY_VER &&
+ !log_group_decrypt_after_read(group, buf, len))
+ {
+ fprintf(stderr, "Innodb redo log decryption failed.\n");
+ abort();
+ }
start_lsn += len;
buf += len;
@@ -2566,6 +2649,14 @@ loop:
MONITOR_INC(MONITOR_LOG_IO);
+ if (srv_encrypt_log &&
+ log_sys->redo_log_crypt_ver != UNENCRYPTED_KEY_VER &&
+ !log_group_encrypt_before_write(group, buf, len))
+ {
+ fprintf(stderr, "Innodb redo log encryption failed.\n");
+ abort();
+ }
+
fil_io(OS_FILE_WRITE | OS_FILE_LOG, false, group->archive_space_id,
(ulint) (next_offset / UNIV_PAGE_SIZE),
(ulint) (next_offset % UNIV_PAGE_SIZE),
@@ -3738,4 +3829,62 @@ log_mem_free(void)
log_sys = NULL;
}
}
+
+/** Event to wake up the log scrub thread */
+UNIV_INTERN os_event_t log_scrub_event = NULL;
+
+UNIV_INTERN ibool srv_log_scrub_thread_active = FALSE;
+
+/*****************************************************************//*
+If no log record has been written for a while, fill current log
+block with dummy records. */
+static
+void
+log_scrub()
+/*=========*/
+{
+ ulint cur_lbn = log_block_convert_lsn_to_no(log_sys->lsn);
+ if (next_lbn_to_pad == cur_lbn)
+ {
+ log_pad_current_log_block();
+ }
+ next_lbn_to_pad = log_block_convert_lsn_to_no(log_sys->lsn);
+}
+
+/* log scrubbing interval in ms. */
+UNIV_INTERN ulonglong innodb_scrub_log_interval;
+
+/*****************************************************************//**
+This is the main thread for log scrub. It waits for an event and
+when waked up fills current log block with dummy records and
+sleeps again.
+@return this function does not return, it calls os_thread_exit() */
+extern "C" UNIV_INTERN
+os_thread_ret_t
+DECLARE_THREAD(log_scrub_thread)(
+/*===============================*/
+ void* arg __attribute__((unused))) /*!< in: a dummy parameter
+ required by os_thread_create */
+{
+ ut_ad(!srv_read_only_mode);
+
+ srv_log_scrub_thread_active = TRUE;
+
+ while(srv_shutdown_state == SRV_SHUTDOWN_NONE)
+ {
+ os_event_wait_time(log_scrub_event, innodb_scrub_log_interval * 1000);
+
+ log_scrub();
+
+ os_event_reset(log_scrub_event);
+ }
+
+ srv_log_scrub_thread_active = FALSE;
+
+ /* We count the number of threads in os_thread_exit(). A created
+ thread should always use that to exit and not use return() to exit. */
+ os_thread_exit(NULL);
+
+ OS_THREAD_DUMMY_RETURN;
+}
#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc
index 48a204ff327..fbed6137cd7 100644
--- a/storage/innobase/log/log0recv.cc
+++ b/storage/innobase/log/log0recv.cc
@@ -694,8 +694,9 @@ recv_synchronize_groups(
recovered_lsn */
log_group_set_fields(group, recovered_lsn);
- }
+ ut_a(log_sys);
+ }
/* Copy the checkpoint info to the groups; remember that we have
incremented checkpoint_no by one, and the info will not be written
over the max checkpoint info, thus making the preservation of max
@@ -1144,7 +1145,9 @@ recv_parse_or_apply_log_rec_body(
+ 0 /*FLST_PREV*/
|| offs == PAGE_BTR_IBUF_FREE_LIST_NODE
+ PAGE_HEADER + FIL_ADDR_PAGE
- + FIL_ADDR_SIZE /*FLST_NEXT*/);
+ + FIL_ADDR_SIZE /*FLST_NEXT*/
+ || offs ==
+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
break;
}
}
@@ -1371,6 +1374,9 @@ recv_parse_or_apply_log_rec_body(
ptr, end_ptr, page, page_zip, index);
}
break;
+ case MLOG_FILE_WRITE_CRYPT_DATA:
+ ptr = fil_parse_write_crypt_data(ptr, end_ptr, block);
+ break;
default:
ptr = NULL;
recv_sys->found_corrupt_log = TRUE;
@@ -3021,6 +3027,7 @@ recv_recovery_from_checkpoint_start_func(
ulint max_cp_field;
lsn_t checkpoint_lsn;
ib_uint64_t checkpoint_no;
+ uint recv_crypt_ver;
lsn_t group_scanned_lsn = 0;
lsn_t contiguous_lsn;
#ifdef UNIV_LOG_ARCHIVE
@@ -3080,13 +3087,21 @@ recv_recovery_from_checkpoint_start_func(
#ifdef UNIV_LOG_ARCHIVE
archived_lsn = mach_read_from_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN);
#endif /* UNIV_LOG_ARCHIVE */
+ recv_crypt_ver = mach_read_from_4(buf + LOG_CRYPT_VER);
+ if (recv_crypt_ver == UNENCRYPTED_KEY_VER)
+ {
+ log_init_crypt_msg_and_nonce();
+ } else {
+ ut_memcpy(redo_log_crypt_msg, buf + LOG_CRYPT_MSG, MY_AES_BLOCK_SIZE);
+ ut_memcpy(aes_ctr_nonce, buf + LOG_CRYPT_IV, MY_AES_BLOCK_SIZE);
+ }
/* Read the first log file header to print a note if this is
a recovery from a restored InnoDB Hot Backup */
fil_io(OS_FILE_READ | OS_FILE_LOG, true, max_cp_group->space_id, 0,
0, 0, LOG_FILE_HDR_SIZE,
- log_hdr_buf, max_cp_group, 0);
+ log_hdr_buf, max_cp_group, 0, 0);
if (0 == ut_memcmp(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
(byte*)"ibbackup", (sizeof "ibbackup") - 1)) {
@@ -3117,7 +3132,7 @@ recv_recovery_from_checkpoint_start_func(
fil_io(OS_FILE_WRITE | OS_FILE_LOG, true,
max_cp_group->space_id, 0,
0, 0, OS_FILE_LOG_BLOCK_SIZE,
- log_hdr_buf, max_cp_group, 0);
+ log_hdr_buf, max_cp_group, 0, 0);
}
#ifdef UNIV_LOG_ARCHIVE
@@ -3141,7 +3156,10 @@ recv_recovery_from_checkpoint_start_func(
recv_sys->scanned_lsn = checkpoint_lsn;
recv_sys->scanned_checkpoint_no = 0;
recv_sys->recovered_lsn = checkpoint_lsn;
-
+ recv_sys->recv_log_crypt_ver = recv_crypt_ver;
+ log_init_crypt_key(redo_log_crypt_msg,
+ recv_sys->recv_log_crypt_ver,
+ recv_sys->recv_log_crypt_key);
srv_start_lsn = checkpoint_lsn;
}
@@ -3224,7 +3242,6 @@ recv_recovery_from_checkpoint_start_func(
group = UT_LIST_GET_NEXT(log_groups, group);
}
-
/* Done with startup scan. Clear the flag. */
recv_log_scan_is_startup_type = FALSE;
if (TYPE_CHECKPOINT) {
@@ -3312,6 +3329,8 @@ recv_recovery_from_checkpoint_start_func(
log_sys->next_checkpoint_lsn = checkpoint_lsn;
log_sys->next_checkpoint_no = checkpoint_no + 1;
+ log_crypt_set_ver_and_key(log_sys->redo_log_crypt_ver,
+ log_sys->redo_log_crypt_key);
#ifdef UNIV_LOG_ARCHIVE
log_sys->archived_lsn = archived_lsn;
@@ -3342,6 +3361,8 @@ recv_recovery_from_checkpoint_start_func(
log_sys->lsn - log_sys->last_checkpoint_lsn);
log_sys->next_checkpoint_no = checkpoint_no + 1;
+ log_crypt_set_ver_and_key(log_sys->redo_log_crypt_ver,
+ log_sys->redo_log_crypt_key);
#ifdef UNIV_LOG_ARCHIVE
if (archived_lsn == LSN_MAX) {
@@ -3543,6 +3564,16 @@ recv_reset_logs(
log_sys->next_checkpoint_no = 0;
log_sys->last_checkpoint_lsn = 0;
+ /* redo_log_crypt_ver will be set by log_checkpoint() to the
+ latest key version. */
+ log_sys->redo_log_crypt_ver = UNENCRYPTED_KEY_VER;
+ /*
+ Note: flags (srv_encrypt_log and debug_use_static_keys)
+ haven't been read and set yet!
+ So don't use condition such as:
+ if (srv_encrypt_log && debug_use_static_keys)
+ */
+ log_init_crypt_msg_and_nonce();
#ifdef UNIV_LOG_ARCHIVE
log_sys->archived_lsn = log_sys->lsn;
@@ -4019,4 +4050,3 @@ byte* recv_dblwr_t::find_page(ulint space_id, ulint page_no)
return(result);
}
-
diff --git a/storage/innobase/mtr/mtr0log.cc b/storage/innobase/mtr/mtr0log.cc
index 5335cb4c9ef..82df1df63d4 100644
--- a/storage/innobase/mtr/mtr0log.cc
+++ b/storage/innobase/mtr/mtr0log.cc
@@ -75,7 +75,7 @@ mlog_write_initial_log_record(
{
byte* log_ptr;
- ut_ad(type <= MLOG_BIGGEST_TYPE);
+ ut_ad(type <= MLOG_BIGGEST_TYPE || EXTRA_CHECK_MLOG_NUMBER(type));
ut_ad(type > MLOG_8BYTES);
log_ptr = mlog_open(mtr, 11);
@@ -111,7 +111,7 @@ mlog_parse_initial_log_record(
}
*type = (byte)((ulint)*ptr & ~MLOG_SINGLE_REC_FLAG);
- ut_ad(*type <= MLOG_BIGGEST_TYPE);
+ ut_ad(*type <= MLOG_BIGGEST_TYPE || EXTRA_CHECK_MLOG_NUMBER(*type));
ptr++;
@@ -150,8 +150,6 @@ mlog_parse_nbytes(
ib_uint64_t dval;
ut_a(type <= MLOG_8BYTES);
- ut_a(!page || !page_zip || fil_page_get_type(page) != FIL_PAGE_INDEX);
-
if (end_ptr < ptr + 2) {
return(NULL);
@@ -160,6 +158,11 @@ mlog_parse_nbytes(
offset = mach_read_from_2(ptr);
ptr += 2;
+ ut_a(!page || !page_zip || fil_page_get_type(page) != FIL_PAGE_INDEX ||
+ /* scrubbing changes page type from FIL_PAGE_INDEX to
+ * FIL_PAGE_TYPE_ALLOCATED (rest of this assertion is below) */
+ (type == MLOG_2BYTES && offset == FIL_PAGE_TYPE));
+
if (offset >= UNIV_PAGE_SIZE) {
recv_sys->found_corrupt_log = TRUE;
@@ -219,6 +222,14 @@ mlog_parse_nbytes(
}
mach_write_to_2(page + offset, val);
}
+ ut_a(!page || !page_zip ||
+ fil_page_get_type(page) != FIL_PAGE_INDEX ||
+ /* scrubbing changes page type from FIL_PAGE_INDEX to
+ * FIL_PAGE_TYPE_ALLOCATED */
+ (type == MLOG_2BYTES &&
+ offset == FIL_PAGE_TYPE &&
+ val == FIL_PAGE_TYPE_ALLOCATED));
+
break;
case MLOG_4BYTES:
if (page) {
diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc
index 869586bcd90..400aa9bff57 100644
--- a/storage/innobase/mtr/mtr0mtr.cc
+++ b/storage/innobase/mtr/mtr0mtr.cc
@@ -437,3 +437,36 @@ mtr_print(
}
# endif /* !UNIV_HOTBACKUP */
#endif /* UNIV_DEBUG */
+
+/**********************************************************//**
+Releases a buf_page stored in an mtr memo after a
+savepoint. */
+UNIV_INTERN
+void
+mtr_release_buf_page_at_savepoint(
+/*=============================*/
+ mtr_t* mtr, /*!< in: mtr */
+ ulint savepoint, /*!< in: savepoint */
+ buf_block_t* block) /*!< in: block to release */
+{
+ mtr_memo_slot_t* slot;
+ dyn_array_t* memo;
+
+ ut_ad(mtr);
+ ut_ad(mtr->magic_n == MTR_MAGIC_N);
+ ut_ad(mtr->state == MTR_ACTIVE);
+
+ memo = &(mtr->memo);
+
+ ut_ad(dyn_array_get_data_size(memo) > savepoint);
+
+ slot = (mtr_memo_slot_t*) dyn_array_get_element(memo, savepoint);
+
+ ut_ad(slot->object == block);
+ ut_ad(slot->type == MTR_MEMO_PAGE_S_FIX ||
+ slot->type == MTR_MEMO_PAGE_X_FIX ||
+ slot->type == MTR_MEMO_BUF_FIX);
+
+ buf_page_release((buf_block_t*) slot->object, slot->type);
+ slot->object = NULL;
+}
diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index 4db5f183892..f41ddaf2b30 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -43,7 +43,9 @@ Created 10/21/1995 Heikki Tuuri
#include "srv0srv.h"
#include "srv0start.h"
#include "fil0fil.h"
+#include "fsp0fsp.h"
#include "fil0pagecompress.h"
+#include "fil0pageencryption.h"
#include "buf0buf.h"
#include "srv0mon.h"
#include "srv0srv.h"
@@ -223,9 +225,17 @@ struct os_aio_slot_t{
freed after the write
has been completed */
+ byte* page_encryption_page; /*!< Memory allocated for
+ page encrypted page and
+ freed after the write
+ has been completed */
+
ibool page_compression;
ulint page_compression_level;
+ ibool page_encryption;
+ ulint page_encryption_key;
+
ulint* write_size; /*!< Actual write size initialized
after fist successfull trim
operation for this page and if
@@ -236,9 +246,17 @@ struct os_aio_slot_t{
page compressed pages, do not
free this */
- ibool page_compress_success;
- /*!< TRUE if page compression was
- successfull, false if not */
+ byte* page_buf2; /*!< Actual page buffer for
+ page encrypted pages, do not
+ free this */
+ byte* tmp_encryption_buf; /*!< a temporal buffer used by page encryption */
+
+ ibool page_compression_success;
+ ibool page_encryption_success;
+ /*!< TRUE if page compression was
+ successfull, false if not */
+
+ lsn_t lsn; /* lsn of the newest modification */
ulint file_block_size;/*!< file block size */
@@ -398,6 +416,19 @@ os_slot_alloc_lzo_mem(
os_aio_slot_t* slot); /*!< in: slot structure */
#endif
+/**********************************************************************//**
+Allocate memory for temporal buffer used for page encryption. This
+buffer is freed later. */
+UNIV_INTERN
+void
+os_slot_alloc_page_buf2(
+ os_aio_slot_t* slot); /*!< in: slot structure */
+/**********************************************************************//**
+Allocate memory for temporal buffer used for page encryption. */
+UNIV_INTERN
+void
+os_slot_alloc_tmp_encryption_buf(
+ os_aio_slot_t* slot); /*!< in: slot structure */
/****************************************************************//**
Does error handling when a file operation fails.
@return TRUE if we should retry the operation */
@@ -2923,14 +2954,6 @@ try_again:
os_mutex_exit(os_file_count_mutex);
if (ret && len == n) {
- /* Note that InnoDB writes files that are not formated
- as file spaces and they do not have FIL_PAGE_TYPE
- field, thus we must use here information is the actual
- file space compressed. */
- if (fil_page_is_compressed((byte *)buf)) {
- fil_decompress_page(NULL, (byte *)buf, len, NULL);
- }
-
return(TRUE);
}
#else /* __WIN__ */
@@ -2943,14 +2966,6 @@ try_again:
ret = os_file_pread(file, buf, n, offset);
if ((ulint) ret == n) {
- /* Note that InnoDB writes files that are not formated
- as file spaces and they do not have FIL_PAGE_TYPE
- field, thus we must use here information is the actual
- file space compressed. */
- if (fil_page_is_compressed((byte *)buf)) {
- fil_decompress_page(NULL, (byte *)buf, n, NULL);
- }
-
return(TRUE);
}
@@ -3066,15 +3081,6 @@ try_again:
os_mutex_exit(os_file_count_mutex);
if (ret && len == n) {
-
- /* Note that InnoDB writes files that are not formated
- as file spaces and they do not have FIL_PAGE_TYPE
- field, thus we must use here information is the actual
- file space compressed. */
- if (fil_page_is_compressed((byte *)buf)) {
- fil_decompress_page(NULL, (byte *)buf, n, NULL);
- }
-
return(TRUE);
}
#else /* __WIN__ */
@@ -3087,14 +3093,6 @@ try_again:
ret = os_file_pread(file, buf, n, offset);
if ((ulint) ret == n) {
- /* Note that InnoDB writes files that are not formated
- as file spaces and they do not have FIL_PAGE_TYPE
- field, thus we must use here information is the actual
- file space compressed. */
- if (fil_page_is_compressed((byte *)buf)) {
- fil_decompress_page(NULL, (byte *)buf, n, NULL);
- }
-
return(TRUE);
}
#endif /* __WIN__ */
@@ -4180,6 +4178,7 @@ os_aio_array_free(
for (i = 0; i < array->n_slots; i++) {
os_aio_slot_t* slot = os_aio_array_get_nth_slot(array, i);
+
if (slot->page_compression_page) {
ut_free(slot->page_compression_page);
slot->page_compression_page = NULL;
@@ -4189,8 +4188,19 @@ os_aio_array_free(
ut_free(slot->lzo_mem);
slot->lzo_mem = NULL;
}
+
+ if (slot->page_encryption_page) {
+ ut_free(slot->page_encryption_page);
+ slot->page_encryption_page = NULL;
+ }
+
+ if (slot->tmp_encryption_buf) {
+ ut_free(slot->tmp_encryption_buf);
+ slot->tmp_encryption_buf = NULL;
+ }
}
+
ut_free(array->slots);
ut_free(array);
@@ -4532,8 +4542,13 @@ os_aio_array_reserve_slot(
actual page size does not decrease. */
ibool page_compression, /*!< in: is page compression used
on this file space */
- ulint page_compression_level) /*!< page compression
- level to be used */
+ ulint page_compression_level, /*!< page compression
+ level to be used */
+ ibool page_encryption, /*!< in: is page encryption used
+ on this file space */
+ ulint page_encryption_key, /*!< page encryption key
+ to be used */
+ lsn_t lsn) /* lsn of the newest modification */
{
os_aio_slot_t* slot = NULL;
#ifdef WIN_ASYNC_IO
@@ -4622,11 +4637,15 @@ found:
slot->type = type;
slot->buf = static_cast<byte*>(buf);
slot->offset = offset;
+ slot->lsn = lsn;
slot->io_already_done = FALSE;
- slot->page_compress_success = FALSE;
+ slot->page_compression_success = FALSE;
+ slot->page_encryption_success = FALSE;
slot->write_size = write_size;
slot->page_compression_level = page_compression_level;
slot->page_compression = page_compression;
+ slot->page_encryption_key = page_encryption_key;
+ slot->page_encryption = page_encryption;
if (message1) {
slot->file_block_size = fil_node_get_block_size(message1);
@@ -4652,7 +4671,8 @@ found:
#endif
/* Call page compression */
- tmp = fil_compress_page(fil_node_get_space_id(slot->message1),
+ tmp = fil_compress_page(
+ fil_node_get_space_id(slot->message1),
(byte *)buf,
slot->page_buf,
len,
@@ -4667,9 +4687,9 @@ found:
len = real_len;
buf = slot->page_buf;
slot->len = real_len;
- slot->page_compress_success = TRUE;
+ slot->page_compression_success = TRUE;
} else {
- slot->page_compress_success = FALSE;
+ slot->page_compression_success = FALSE;
}
/* Take array mutex back, not sure if this is really needed
@@ -4678,6 +4698,35 @@ found:
}
+// if (srv_encrypt_tables) {
+ //page_encryption = TRUE;
+// }
+
+ /* If the space is page encryption and this is write operation
+ then we encrypt the page */
+ if (message1 && type == OS_FILE_WRITE && page_encryption ) {
+ /* Release the array mutex while encrypting */
+ os_mutex_exit(array->mutex);
+
+ // We allocate memory for page encrypted buffer if and only
+ // if it is not yet allocated.
+ os_slot_alloc_page_buf2(slot);
+
+ fil_space_encrypt(
+ fil_node_get_space_id(slot->message1),
+ slot->offset,
+ slot->lsn,
+ (byte *)buf,
+ slot->len,
+ slot->page_buf2,
+ slot->page_encryption_key);
+
+ slot->page_encryption_success = TRUE;
+ buf = slot->page_buf2;
+
+ /* Take array mutex back */
+ os_mutex_enter(array->mutex);
+ }
#ifdef WIN_ASYNC_IO
control = &slot->control;
@@ -4963,12 +5012,18 @@ os_aio_func(
actual page size does not decrease. */
ibool page_compression, /*!< in: is page compression used
on this file space */
- ulint page_compression_level) /*!< page compression
+ ulint page_compression_level, /*!< page compression
level to be used */
+ ibool page_encryption, /*!< in: is page encryption used
+ on this file space */
+ ulint page_encryption_key, /*!< page encryption key
+ to be used */
+ lsn_t lsn) /* lsn of the newest modification */
{
os_aio_array_t* array;
os_aio_slot_t* slot;
#ifdef WIN_ASYNC_IO
+ void* buffer = NULL;
ibool retval;
BOOL ret = TRUE;
DWORD len = (DWORD) n;
@@ -4987,6 +5042,7 @@ os_aio_func(
ut_ad((n & 0xFFFFFFFFUL) == n);
#endif
+
wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER;
mode = mode & (~OS_AIO_SIMULATED_WAKE_LATER);
@@ -5077,7 +5133,9 @@ try_again:
}
slot = os_aio_array_reserve_slot(type, array, message1, message2, file,
- name, buf, offset, n, write_size, page_compression, page_compression_level);
+ name, buf, offset, n, write_size,
+ page_compression, page_compression_level,
+ page_encryption, page_encryption_key, lsn);
if (type == OS_FILE_READ) {
if (srv_use_native_aio) {
@@ -5104,7 +5162,18 @@ try_again:
if (srv_use_native_aio) {
os_n_file_writes++;
#ifdef WIN_ASYNC_IO
- ret = WriteFile(file, buf, (DWORD) n, &len,
+ if (page_encryption && slot->page_encryption_success) {
+ buffer = slot->page_buf2;
+ n = slot->len;
+ } else {
+ if (page_compression && slot->page_compression_success) {
+ buffer = slot->page_buf;
+ n = slot->len;
+ } else {
+ buffer = buf;
+ }
+ }
+ ret = WriteFile(file, buffer, (DWORD) n, &len,
&(slot->control));
#elif defined(LINUX_NATIVE_AIO)
@@ -5307,21 +5376,29 @@ os_aio_windows_handle(
ut_a((slot->len & 0xFFFFFFFFUL) == slot->len);
- switch (slot->type) {
- case OS_FILE_WRITE:
- if (slot->message1 &&
- slot->page_compression &&
- slot->page_compress_success &&
- slot->page_buf) {
- ret = WriteFile(slot->file, slot->page_buf,
- (DWORD) slot->len, &len,
- &(slot->control));
+ switch (slot->type) {
+ case OS_FILE_WRITE:
+ if (slot->message1
+ && slot->page_encryption
+ && slot->page_encryption_success) {
+ ret_val = os_file_write(slot->name,
+ slot->file,
+ slot->page_buf2,
+ slot->offset,
+ slot->len);
} else {
- ret = WriteFile(slot->file, slot->buf,
- (DWORD) slot->len, &len,
- &(slot->control));
- }
-
+ if (slot->message1
+ && slot->page_compression
+ && slot->page_compression_success) {
+ ret = WriteFile(slot->file, slot->page_buf,
+ (DWORD) slot->len, &len,
+ &(slot->control));
+ } else {
+ ret = WriteFile(slot->file, slot->buf,
+ (DWORD) slot->len, &len,
+ &(slot->control));
+ }
+ }
break;
case OS_FILE_READ:
ret = ReadFile(slot->file, slot->buf,
@@ -5353,20 +5430,41 @@ os_aio_windows_handle(
}
if (slot->type == OS_FILE_READ) {
- if(fil_page_is_compressed(slot->buf)) {
+ if (fil_page_is_compressed_encrypted(slot->buf) ||
+ fil_page_is_encrypted(slot->buf)) {
+ ut_ad(slot->message1 != NULL);
+ os_slot_alloc_page_buf2(slot);
+ os_slot_alloc_tmp_encryption_buf(slot);
+
+ // Decrypt the data
+ fil_space_decrypt(
+ fil_node_get_space_id(slot->message1),
+ slot->buf,
+ slot->len,
+ slot->page_buf2);
+ // Copy decrypted buffer back to buf
+ memcpy(slot->buf, slot->page_buf2, slot->len);
+ }
+ if (fil_page_is_compressed(slot->buf)) {
+ /* We allocate memory for page compressed buffer if
+ and only if it is not yet allocated. */
os_slot_alloc_page_buf(slot);
-
#ifdef HAVE_LZO
if (fil_page_is_lzo_compressed(slot->buf)) {
os_slot_alloc_lzo_mem(slot);
}
#endif
-
- fil_decompress_page(slot->page_buf, slot->buf, slot->len, slot->write_size);
+ fil_decompress_page(
+ slot->page_buf,
+ slot->buf,
+ slot->len,
+ slot->write_size);
}
} else {
/* OS_FILE_WRITE */
- if (slot->page_compress_success && fil_page_is_compressed(slot->page_buf)) {
+ if (slot->page_compression_success &&
+ (fil_page_is_compressed(slot->page_buf) ||
+ fil_page_is_compressed_encrypted(slot->buf))) {
if (srv_use_trim && os_fallocate_failed == FALSE) {
// Deallocate unused blocks from file system
os_file_trim(slot);
@@ -5464,9 +5562,27 @@ retry:
ut_a(slot->pos < end_pos);
if (slot->type == OS_FILE_READ) {
- /* If the table is page compressed and this is read,
- we decompress before we annouce the read is
- complete. For writes, we free the compressed page. */
+ /* If the page is page encrypted we encrypt */
+ if (fil_page_is_compressed_encrypted(slot->buf) ||
+ fil_page_is_encrypted(slot->buf)) {
+ os_slot_alloc_page_buf2(slot);
+ os_slot_alloc_tmp_encryption_buf(slot);
+ ut_ad(slot->message1 != NULL);
+
+ // Decrypt the data
+ fil_space_decrypt(
+ fil_node_get_space_id(slot->message1),
+ slot->buf,
+ slot->len,
+ slot->page_buf2);
+ // Copy decrypted buffer back to buf
+ memcpy(slot->buf, slot->page_buf2, slot->len);
+ }
+
+ /* If the table is page compressed and this
+ is read, we decompress before we announce
+ the read is complete. For writes, we free
+ the compressed page. */
if (fil_page_is_compressed(slot->buf)) {
// We allocate memory for page compressed buffer if and only
// if it is not yet allocated.
@@ -5481,9 +5597,9 @@ retry:
}
} else {
/* OS_FILE_WRITE */
- if (slot->page_compress_success &&
- fil_page_is_compressed(slot->page_buf)) {
- ut_ad(slot->page_compression_page);
+ if (slot->page_compression_success &&
+ (fil_page_is_compressed(slot->page_buf) ||
+ fil_page_is_compressed_encrypted(slot->buf))) {
if (srv_use_trim && os_fallocate_failed == FALSE) {
// Deallocate unused blocks from file system
os_file_trim(slot);
@@ -6509,6 +6625,29 @@ os_file_trim(
#endif /* !UNIV_HOTBACKUP */
/**********************************************************************//**
+Allocate memory for temporal buffer used for page encryption. This
+buffer is freed later. */
+UNIV_INTERN
+void
+os_slot_alloc_page_buf2(
+/*===================*/
+ os_aio_slot_t* slot) /*!< in: slot structure */
+{
+ ut_a(slot != NULL);
+
+ if(slot->page_buf2 == NULL) {
+ byte* cbuf2;
+ byte* cbuf;
+
+ cbuf2 = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE*2));
+ cbuf = static_cast<byte *>(ut_align(cbuf2, UNIV_PAGE_SIZE));
+ slot->page_encryption_page = static_cast<byte *>(cbuf2);
+ slot->page_buf2 = static_cast<byte *>(cbuf);
+ memset(slot->page_encryption_page, 0, UNIV_PAGE_SIZE*2);
+ }
+}
+
+/**********************************************************************//**
Allocate memory for temporal buffer used for page compression. This
buffer is freed later. */
UNIV_INTERN
@@ -6517,18 +6656,17 @@ os_slot_alloc_page_buf(
/*===================*/
os_aio_slot_t* slot) /*!< in: slot structure */
{
- byte* cbuf2;
- byte* cbuf;
-
ut_a(slot != NULL);
- if (slot->page_compression_page == NULL) {
+ if (slot->page_buf == NULL) {
+ byte* cbuf2;
+ byte* cbuf;
/* We allocate extra to avoid memory overwrite on compression */
cbuf2 = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE*2));
cbuf = static_cast<byte *>(ut_align(cbuf2, UNIV_PAGE_SIZE));
slot->page_compression_page = static_cast<byte *>(cbuf2);
slot->page_buf = static_cast<byte *>(cbuf);
- memset(slot->page_compression_page, 0, UNIV_PAGE_SIZE*2);
ut_a(slot->page_buf != NULL);
+ memset(slot->page_compression_page, 0, UNIV_PAGE_SIZE*2);
}
}
@@ -6545,12 +6683,28 @@ os_slot_alloc_lzo_mem(
ut_a(slot != NULL);
if(slot->lzo_mem == NULL) {
slot->lzo_mem = static_cast<byte *>(ut_malloc(LZO1X_1_15_MEM_COMPRESS));
- memset(slot->lzo_mem, 0, LZO1X_1_15_MEM_COMPRESS);
ut_a(slot->lzo_mem != NULL);
+ memset(slot->lzo_mem, 0, LZO1X_1_15_MEM_COMPRESS);
}
}
#endif
+/**********************************************************************//**
+Allocate memory for temporal buffer used for page encryption. */
+UNIV_INTERN
+void
+os_slot_alloc_tmp_encryption_buf(
+/*=============================*/
+ os_aio_slot_t* slot) /*!< in: slot structure */
+{
+ ut_a(slot != NULL);
+ if (slot->tmp_encryption_buf == NULL) {
+ slot->tmp_encryption_buf = static_cast<byte *>(ut_malloc(64));
+ memset(slot->tmp_encryption_buf, 0, 64);
+ }
+}
+
+
/***********************************************************************//**
Try to get number of bytes per sector from file system.
@return file block size */
diff --git a/storage/innobase/page/page0page.cc b/storage/innobase/page/page0page.cc
index bd5fb36af8f..4aff88818bb 100644
--- a/storage/innobase/page/page0page.cc
+++ b/storage/innobase/page/page0page.cc
@@ -1087,7 +1087,9 @@ delete_all:
last_rec = page_rec_get_prev(page_get_supremum_rec(page));
- if ((size == ULINT_UNDEFINED) || (n_recs == ULINT_UNDEFINED)) {
+ bool scrub = srv_immediate_scrub_data_uncompressed;
+ if ((size == ULINT_UNDEFINED) || (n_recs == ULINT_UNDEFINED) ||
+ scrub) {
rec_t* rec2 = rec;
/* Calculate the sum of sizes and the number of records */
size = 0;
@@ -1104,6 +1106,12 @@ delete_all:
size += s;
n_recs++;
+ if (scrub) {
+ /* scrub record */
+ uint recsize = rec_offs_data_size(offsets);
+ memset(rec2, 0, recsize);
+ }
+
rec2 = page_rec_get_next(rec2);
} while (!page_rec_is_supremum(rec2));
diff --git a/storage/innobase/row/row0import.cc b/storage/innobase/row/row0import.cc
index c513320afc1..d5f766ef51b 100644
--- a/storage/innobase/row/row0import.cc
+++ b/storage/innobase/row/row0import.cc
@@ -1990,7 +1990,8 @@ PageConverter::update_header(
}
mach_write_to_8(
- get_frame(block) + FIL_PAGE_FILE_FLUSH_LSN, m_current_lsn);
+ get_frame(block) + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION,
+ m_current_lsn);
/* Write space_id to the tablespace header, page 0. */
mach_write_to_4(
diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc
index 68941b11c05..43446112bca 100644
--- a/storage/innobase/row/row0mysql.cc
+++ b/storage/innobase/row/row0mysql.cc
@@ -3245,6 +3245,41 @@ run_again:
return(err);
}
+static
+void
+fil_wait_crypt_bg_threads(
+ dict_table_t* table)
+{
+ uint start = time(0);
+ uint last = start;
+
+ if (table->space != 0) {
+ fil_space_crypt_mark_space_closing(table->space);
+ }
+
+ while (table->n_ref_count > 0) {
+ dict_mutex_exit_for_mysql();
+ os_thread_sleep(20000);
+ dict_mutex_enter_for_mysql();
+ uint now = time(0);
+ if (now >= last + 30) {
+ fprintf(stderr,
+ "WARNING: waited %u seconds "
+ "for ref-count on table: %s space: %u\n",
+ now - start, table->name, table->space);
+ last = now;
+ }
+
+ if (now >= start + 300) {
+ fprintf(stderr,
+ "WARNING: after %u seconds, gave up waiting "
+ "for ref-count on table: %s space: %u\n",
+ now - start, table->name, table->space);
+ break;
+ }
+ }
+}
+
/*********************************************************************//**
Truncates a table for MySQL.
@return error code or DB_SUCCESS */
@@ -4055,6 +4090,9 @@ row_drop_table_for_mysql(
shouldn't have to. There should never be record locks on a table
that is going to be dropped. */
+ /* Wait on background threads to stop using table */
+ fil_wait_crypt_bg_threads(table);
+
if (table->n_ref_count == 0) {
lock_remove_all_on_table(table, TRUE);
ut_a(table->n_rec_locks == 0);
diff --git a/storage/innobase/row/row0umod.cc b/storage/innobase/row/row0umod.cc
index 8580aa45145..7649add4b33 100644
--- a/storage/innobase/row/row0umod.cc
+++ b/storage/innobase/row/row0umod.cc
@@ -364,8 +364,15 @@ row_undo_mod_clust(
}
}
- ut_ad(rec_get_trx_id(btr_pcur_get_rec(pcur), index)
- == node->new_trx_id);
+ /**
+ * when scrubbing, and records gets cleared,
+ * the transaction id is not present afterwards.
+ * this is safe as: since the record is on free-list
+ * it can be reallocated at any time after this mtr-commits
+ * which is just below
+ */
+ ut_ad(srv_immediate_scrub_data_uncompressed ||
+ rec_get_trx_id(btr_pcur_get_rec(pcur), index) == node->new_trx_id);
btr_pcur_commit_specify_mtr(pcur, &mtr);
diff --git a/storage/innobase/srv/srv0mon.cc b/storage/innobase/srv/srv0mon.cc
index 24cf403c0af..cb7c1ae6367 100644
--- a/storage/innobase/srv/srv0mon.cc
+++ b/storage/innobase/srv/srv0mon.cc
@@ -985,6 +985,21 @@ static monitor_info_t innodb_counter_info[] =
MONITOR_NONE,
MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_PAGE_COMPRESSION_ERROR},
+ {"compress_pages_page_encrypted", "compression",
+ "Number of pages encrypted by page encryption",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_PAGE_ENCRYPTED},
+
+ {"compress_pages_page_decrypted", "compression",
+ "Number of pages decrypted by page encryption",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_PAGE_DECRYPTED},
+
+ {"compress_pages_page_encryption_error", "compression",
+ "Number of page encryption errors ",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_PAGE_ENCRYPTION_ERROR},
+
/* ========== Counters for Index ========== */
{"module_index", "index", "Index Manager",
MONITOR_MODULE,
@@ -1998,6 +2013,15 @@ srv_mon_process_existing_counter(
case MONITOR_OVLD_PAGES_PAGE_COMPRESSION_ERROR:
value = srv_stats.pages_page_compression_error;
break;
+ case MONITOR_OVLD_PAGES_PAGE_ENCRYPTED:
+ value = srv_stats.pages_page_encrypted;
+ break;
+ case MONITOR_OVLD_PAGES_PAGE_DECRYPTED:
+ value = srv_stats.pages_page_decrypted;
+ break;
+ case MONITOR_OVLD_PAGES_PAGE_ENCRYPTION_ERROR:
+ value = srv_stats.pages_page_encryption_error;
+ break;
default:
ut_error;
diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc
index bcbce3cd53c..7c796efe58f 100644
--- a/storage/innobase/srv/srv0srv.cc
+++ b/storage/innobase/srv/srv0srv.cc
@@ -73,7 +73,9 @@ Created 10/8/1995 Heikki Tuuri
#include "mysql/plugin.h"
#include "mysql/service_thd_wait.h"
+#include "fil0fil.h"
#include "fil0pagecompress.h"
+#include "btr0scrub.h"
#ifdef WITH_WSREP
extern int wsrep_debug;
@@ -93,6 +95,9 @@ UNIV_INTERN ibool srv_buf_dump_thread_active = FALSE;
UNIV_INTERN ibool srv_dict_stats_thread_active = FALSE;
+UNIV_INTERN ibool srv_log_scrub_active = FALSE;
+UNIV_INTERN my_bool srv_scrub_log = FALSE;
+
UNIV_INTERN const char* srv_main_thread_op_info = "";
/** Prefix used by MySQL to indicate pre-5.1 table name encoding */
@@ -1426,10 +1431,14 @@ srv_export_innodb_status(void)
ulint LRU_len;
ulint free_len;
ulint flush_list_len;
+ fil_crypt_stat_t crypt_stat;
+ btr_scrub_stat_t scrub_stat;
buf_get_total_stat(&stat);
buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len);
buf_get_total_list_size_in_bytes(&buf_pools_list_size);
+ fil_crypt_total_stat(&crypt_stat);
+ btr_scrub_total_stat(&scrub_stat);
mutex_enter(&srv_innodb_monitor_mutex);
@@ -1584,6 +1593,10 @@ srv_export_innodb_status(void)
export_vars.innodb_page_compressed_trim_op = srv_stats.page_compressed_trim_op;
export_vars.innodb_page_compressed_trim_op_saved = srv_stats.page_compressed_trim_op_saved;
export_vars.innodb_pages_page_decompressed = srv_stats.pages_page_decompressed;
+ export_vars.innodb_pages_page_compression_error = srv_stats.pages_page_compression_error;
+ export_vars.innodb_pages_page_decrypted = srv_stats.pages_page_decrypted;
+ export_vars.innodb_pages_page_encrypted = srv_stats.pages_page_encrypted;
+ export_vars.innodb_pages_page_encryption_error = srv_stats.pages_page_encryption_error;
export_vars.innodb_defragment_compression_failures =
btr_defragment_compression_failures;
@@ -1627,6 +1640,30 @@ srv_export_innodb_status(void)
export_vars.innodb_sec_rec_cluster_reads_avoided =
srv_stats.n_sec_rec_cluster_reads_avoided;
+ export_vars.innodb_encryption_rotation_pages_read_from_cache =
+ crypt_stat.pages_read_from_cache;
+ export_vars.innodb_encryption_rotation_pages_read_from_disk =
+ crypt_stat.pages_read_from_disk;
+ export_vars.innodb_encryption_rotation_pages_modified =
+ crypt_stat.pages_modified;
+ export_vars.innodb_encryption_rotation_pages_flushed =
+ crypt_stat.pages_flushed;
+ export_vars.innodb_encryption_rotation_estimated_iops =
+ crypt_stat.estimated_iops;
+
+ export_vars.innodb_scrub_page_reorganizations =
+ scrub_stat.page_reorganizations;
+ export_vars.innodb_scrub_page_splits =
+ scrub_stat.page_splits;
+ export_vars.innodb_scrub_page_split_failures_underflow =
+ scrub_stat.page_split_failures_underflow;
+ export_vars.innodb_scrub_page_split_failures_out_of_filespace =
+ scrub_stat.page_split_failures_out_of_filespace;
+ export_vars.innodb_scrub_page_split_failures_missing_index =
+ scrub_stat.page_split_failures_missing_index;
+ export_vars.innodb_scrub_page_split_failures_unknown =
+ scrub_stat.page_split_failures_unknown;
+
mutex_exit(&srv_innodb_monitor_mutex);
}
@@ -2010,6 +2047,8 @@ srv_any_background_threads_are_active(void)
thread_active = "buf_dump_thread";
} else if (srv_dict_stats_thread_active) {
thread_active = "dict_stats_thread";
+ } else if (srv_scrub_log && srv_log_scrub_thread_active) {
+ thread_active = "log_scrub_thread";
}
os_event_set(srv_error_event);
@@ -2017,6 +2056,8 @@ srv_any_background_threads_are_active(void)
os_event_set(srv_buf_dump_event);
os_event_set(lock_sys->timeout_event);
os_event_set(dict_stats_event);
+ if (srv_scrub_log)
+ os_event_set(log_scrub_event);
return(thread_active);
}
diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc
index 3d0d62c335b..3822a9abf2d 100644
--- a/storage/innobase/srv/srv0start.cc
+++ b/storage/innobase/srv/srv0start.cc
@@ -99,6 +99,7 @@ Created 2/16/1996 Heikki Tuuri
# include "os0sync.h"
# include "zlib.h"
# include "ut0crc32.h"
+# include "btr0scrub.h"
/** Log sequence number immediately after startup */
UNIV_INTERN lsn_t srv_start_lsn;
@@ -664,7 +665,8 @@ create_log_files(
fil_space_create(
logfilename, SRV_LOG_SPACE_FIRST_ID,
fsp_flags_set_page_size(0, UNIV_PAGE_SIZE),
- FIL_LOG);
+ FIL_LOG,
+ NULL /* no encryption yet */);
ut_a(fil_validate());
logfile0 = fil_node_create(
@@ -802,6 +804,7 @@ open_or_create_data_files(
ulint space;
ulint rounded_size_pages;
char name[10000];
+ fil_space_crypt_t* crypt_data;
if (srv_n_data_files >= 1000) {
@@ -1021,7 +1024,7 @@ check_first_page:
min_arch_log_no, max_arch_log_no,
#endif /* UNIV_LOG_ARCHIVE */
min_flushed_lsn, max_flushed_lsn,
- ULINT_UNDEFINED);
+ ULINT_UNDEFINED, &crypt_data);
if (check_msg) {
@@ -1115,6 +1118,8 @@ check_first_page:
}
*sum_of_new_sizes += srv_data_file_sizes[i];
+
+ crypt_data = fil_space_create_crypt_data();
}
ret = os_file_close(files[i]);
@@ -1122,7 +1127,9 @@ check_first_page:
if (i == 0) {
flags = fsp_flags_set_page_size(0, UNIV_PAGE_SIZE);
- fil_space_create(name, 0, flags, FIL_TABLESPACE);
+ fil_space_create(name, 0, flags, FIL_TABLESPACE,
+ crypt_data);
+ crypt_data = NULL;
}
ut_a(fil_validate());
@@ -1268,7 +1275,8 @@ srv_undo_tablespace_open(
/* Set the compressed page size to 0 (non-compressed) */
flags = fsp_flags_set_page_size(0, UNIV_PAGE_SIZE);
- fil_space_create(name, space, flags, FIL_TABLESPACE);
+ fil_space_create(name, space, flags, FIL_TABLESPACE,
+ NULL /* no encryption */);
ut_a(fil_validate());
@@ -2257,7 +2265,8 @@ innobase_start_or_create_for_mysql(void)
fil_space_create(logfilename,
SRV_LOG_SPACE_FIRST_ID,
fsp_flags_set_page_size(0, UNIV_PAGE_SIZE),
- FIL_LOG);
+ FIL_LOG,
+ NULL /* no encryption yet */);
ut_a(fil_validate());
@@ -2313,6 +2322,11 @@ files_checked:
dict_stats_thread_init();
}
+ if (!srv_read_only_mode && srv_scrub_log) {
+ /* TODO(minliz): have/use log_scrub_thread_init() instead? */
+ log_scrub_event = os_event_create();
+ }
+
trx_sys_file_format_init();
trx_sys_create();
@@ -2917,6 +2931,16 @@ files_checked:
/* Create the thread that will optimize the FTS sub-system. */
fts_optimize_init();
+
+ /* Init data for datafile scrub threads */
+ btr_scrub_init();
+
+ /* Create thread(s) that handles key rotation */
+ fil_crypt_threads_init();
+
+ /* Create the log scrub thread */
+ if (srv_scrub_log)
+ os_thread_create(log_scrub_thread, NULL, NULL);
}
/* Initialize online defragmentation. */
@@ -2982,6 +3006,9 @@ innobase_shutdown_for_mysql(void)
fts_optimize_start_shutdown();
fts_optimize_end();
+
+ /* Shutdown key rotation threads */
+ fil_crypt_threads_end();
}
/* 1. Flush the buffer pool to disk, write the current lsn to
@@ -3090,6 +3117,18 @@ innobase_shutdown_for_mysql(void)
if (!srv_read_only_mode) {
dict_stats_thread_deinit();
+ if (srv_scrub_log) {
+ /* TODO(minliz): have/use log_scrub_thread_deinit() instead? */
+ os_event_free(log_scrub_event);
+ log_scrub_event = NULL;
+ }
+ }
+
+ if (!srv_read_only_mode) {
+ fil_crypt_threads_cleanup();
+
+ /* Cleanup data for datafile scrubbing */
+ btr_scrub_cleanup();
}
#ifdef __WIN__