summaryrefslogtreecommitdiff
path: root/storage/innobase/os/os0file.cc
diff options
context:
space:
mode:
Diffstat (limited to 'storage/innobase/os/os0file.cc')
-rw-r--r--storage/innobase/os/os0file.cc1238
1 files changed, 1083 insertions, 155 deletions
diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index 0d63b6c091c..047eec7949c 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -125,6 +125,30 @@ static const ulint IO_LOG_SEGMENT = 1;
/** Number of retries for partial I/O's */
static const ulint NUM_RETRIES_ON_PARTIAL_IO = 10;
+/** Blocks for doing IO, used in the transparent compression
+and encryption code. */
+struct Block {
+ /** Default constructor */
+ Block() : m_ptr(), m_in_use() { }
+
+ byte* m_ptr;
+
+ byte pad[CACHE_LINE_SIZE - sizeof(ulint)];
+ lock_word_t m_in_use;
+};
+
+/** For storing the allocated blocks */
+typedef std::vector<Block> Blocks;
+
+/** Block collection */
+static Blocks* block_cache;
+
+/** Number of blocks to allocate for sync read/writes */
+static const size_t MAX_BLOCKS = 128;
+
+/** Block buffer size */
+#define BUFFER_BLOCK_SIZE ((ulint)(UNIV_PAGE_SIZE * 1.3))
+
/* This specifies the file permissions InnoDB uses when it creates files in
Unix; the value of os_innodb_umask is initialized in ha_innodb.cc to
my_umask */
@@ -259,7 +283,8 @@ struct Slot {
to the caller of os_aio_simulated_handle */
bool io_already_done;
- ulint file_block_size;/*!< file block size */
+ /*!< file block size */
+ ulint file_block_size;
/** The file node for which the IO is requested. */
fil_node_t* m1;
@@ -307,6 +332,9 @@ struct Slot {
/** Length of the block before it was compressed */
uint32 original_len;
+ /** Buffer block for compressed pages or encrypted pages */
+ Block* buf_block;
+
/** Unaligned buffer for compressed pages */
byte* compressed_ptr;
@@ -360,7 +388,7 @@ public:
os_offset_t offset,
ulint len,
ulint* write_size)
- __attribute__((warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/** @return number of reserved slots */
ulint pending_io_count() const;
@@ -369,7 +397,7 @@ public:
@param[in] index Index of the slot in the array
@return pointer to slot */
const Slot* at(ulint i) const
- __attribute__((warn_unused_result))
+ MY_ATTRIBUTE((warn_unused_result))
{
ut_a(i < m_slots.size());
@@ -378,7 +406,7 @@ public:
/** Non const version */
Slot* at(ulint i)
- __attribute__((warn_unused_result))
+ MY_ATTRIBUTE((warn_unused_result))
{
ut_a(i < m_slots.size());
@@ -399,14 +427,14 @@ public:
/** @return the number of slots per segment */
ulint slots_per_segment() const
- __attribute__((warn_unused_result))
+ MY_ATTRIBUTE((warn_unused_result))
{
return(m_slots.size() / m_n_segments);
}
/** @return accessor for n_segments */
ulint get_n_segments() const
- __attribute__((warn_unused_result))
+ MY_ATTRIBUTE((warn_unused_result))
{
return(m_n_segments);
}
@@ -414,7 +442,7 @@ public:
#ifdef UNIV_DEBUG
/** @return true if the thread owns the mutex */
bool is_mutex_owned() const
- __attribute__((warn_unused_result))
+ MY_ATTRIBUTE((warn_unused_result))
{
return(mutex_own(&m_mutex));
}
@@ -441,13 +469,13 @@ public:
@param[in,out] slot an already reserved slot
@return true on success. */
bool linux_dispatch(Slot* slot)
- __attribute__((warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/** Accessor for an AIO event
@param[in] index Index into the array
@return the event at the index */
io_event* io_events(ulint index)
- __attribute__((warn_unused_result))
+ MY_ATTRIBUTE((warn_unused_result))
{
ut_a(index < m_events.size());
@@ -458,7 +486,7 @@ public:
@param[in] segment Segment for which to get the context
@return the AIO context for the segment */
io_context* io_ctx(ulint segment)
- __attribute__((warn_unused_result))
+ MY_ATTRIBUTE((warn_unused_result))
{
ut_ad(segment < get_n_segments());
@@ -470,7 +498,7 @@ public:
@param[out] io_ctx io_ctx to initialize.
@return true on success. */
static bool linux_create_io_ctx(ulint max_events, io_context_t* io_ctx)
- __attribute__((warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/** Checks if the system supports native linux aio. On some kernel
versions where native aio is supported it won't work on tmpfs. In such
@@ -478,7 +506,7 @@ public:
and native aio.
@return true if supported, false otherwise. */
static bool is_linux_native_aio_supported()
- __attribute__((warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
#endif /* LINUX_NATIVE_AIO */
#ifdef WIN_ASYNC_IO
@@ -520,7 +548,7 @@ public:
/** The non asynchronous IO array.
@return the synchronous AIO array instance. */
static AIO* sync_array()
- __attribute__((warn_unused_result))
+ MY_ATTRIBUTE((warn_unused_result))
{
return(s_sync);
}
@@ -530,7 +558,7 @@ public:
@param[in] segment The local segment.
@return the handles for the segment. */
HANDLE* handles(ulint segment)
- __attribute__((warn_unused_result))
+ MY_ATTRIBUTE((warn_unused_result))
{
ut_ad(segment < m_handles->size() / slots_per_segment());
@@ -539,7 +567,7 @@ public:
/** @return true if no slots are reserved */
bool is_empty() const
- __attribute__((warn_unused_result))
+ MY_ATTRIBUTE((warn_unused_result))
{
ut_ad(is_mutex_owned());
return(m_n_reserved == 0);
@@ -555,7 +583,7 @@ public:
latch_id_t id,
ulint n_slots,
ulint segments)
- __attribute__((warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/** Initializes the asynchronous io system. Creates one array each
for ibuf and log I/O. Also creates one array each for read and write
@@ -574,7 +602,7 @@ public:
ulint n_readers,
ulint n_writers,
ulint n_slots_sync)
- __attribute__((warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/** Free the AIO arrays */
static void shutdown();
@@ -591,7 +619,7 @@ public:
static ulint get_array_and_local_segment(
AIO** array,
ulint segment)
- __attribute__((warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/** Select the IO slot array
@param[in] type Type of IO, READ or WRITE
@@ -602,7 +630,7 @@ public:
IORequest& type,
bool read_only,
ulint mode)
- __attribute__((warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/** Calculates segment number for a slot.
@param[in] array AIO wait array
@@ -612,7 +640,7 @@ public:
static ulint get_segment_no_from_slot(
const AIO* array,
const Slot* slot)
- __attribute__((warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/** Wakes up a simulated AIO I/O-handler thread if it has something
to do.
@@ -624,7 +652,7 @@ public:
@param[in] aio The AIO instance to check
@return true if the AIO instance is for reading. */
static bool is_read(const AIO* aio)
- __attribute__((warn_unused_result))
+ MY_ATTRIBUTE((warn_unused_result))
{
return(s_reads == aio);
}
@@ -648,7 +676,7 @@ private:
/** Initialise the slots
@return DB_SUCCESS or error code */
dberr_t init_slots()
- __attribute__((warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
/** Wakes up a simulated AIO I/O-handler thread if it has something
to do for a local segment in the AIO array.
@@ -671,7 +699,7 @@ private:
/** Initialise the Linux native AIO data structures
@return DB_SUCCESS or error code */
dberr_t init_linux_native_aio()
- __attribute__((warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
#endif /* LINUX_NATIVE_AIO */
private:
@@ -898,6 +926,77 @@ os_aio_windows_handler(
IORequest* type);
#endif /* WIN_ASYNC_IO */
+#ifdef MYSQL_COMPRESSION_ENCRYPTION
+/** Allocate a page for sync IO
+@return pointer to page */
+static
+Block*
+os_alloc_block()
+{
+ size_t pos;
+ Blocks& blocks = *block_cache;
+ size_t i = static_cast<size_t>(my_timer_cycles());
+ const size_t size = blocks.size();
+ ulint retry = 0;
+ Block* block;
+
+ DBUG_EXECUTE_IF("os_block_cache_busy", retry = MAX_BLOCKS * 3;);
+
+ for (;;) {
+
+ /* After go through the block cache for 3 times,
+ allocate a new temporary block. */
+ if (retry == MAX_BLOCKS * 3) {
+ byte* ptr;
+
+ ptr = static_cast<byte*>(
+ ut_malloc_nokey(sizeof(*block)
+ + BUFFER_BLOCK_SIZE));
+
+ block = new (ptr) Block();
+ block->m_ptr = static_cast<byte*>(
+ ptr + sizeof(*block));
+ block->m_in_use = 1;
+
+ break;
+ }
+
+ pos = i++ % size;
+
+ if (TAS(&blocks[pos].m_in_use, 1) == 0) {
+ block = &blocks[pos];
+ break;
+ }
+
+ os_thread_yield();
+
+ ++retry;
+ }
+
+ ut_a(block->m_in_use != 0);
+
+ return(block);
+}
+
+/** Free a page after sync IO
+@param[in,own] block The block to free/release */
+static
+void
+os_free_block(Block* block)
+{
+ ut_ad(block->m_in_use == 1);
+
+ TAS(&block->m_in_use, 0);
+
+ /* When this block is not in the block cache, and it's
+ a temporary block, we need to free it directly. */
+ if (std::less<Block*>()(block, &block_cache->front())
+ || std::greater<Block*>()(block, &block_cache->back())) {
+ ut_free(block);
+ }
+}
+#endif /* MYSQL_COMPRESSION_ENCRYPTION */
+
/** Generic AIO Handler methods. Currently handles IO post processing. */
class AIOHandler {
public:
@@ -920,6 +1019,18 @@ public:
}
private:
+ /** Check whether the page was encrypted.
+ @param[in] slot The slot that contains the IO request
+ @return true if it was an encyrpted page */
+ static bool is_encrypted_page(const Slot* slot)
+ {
+#ifdef MYSQL_ENCRYPTION
+ return(Encryption::is_encrypted_page(slot->buf));
+#else
+ return (false);
+#endif
+ }
+
/** Check whether the page was compressed.
@param[in] slot The slot that contains the IO request
@return true if it was a compressed page */
@@ -994,14 +1105,6 @@ public:
m_offset(offset)
{
ut_ad(m_n > 0);
-
- /* If off_t is > 4 bytes in size, then we assume we can pass a
- 64-bit address */
- off_t offs = static_cast<off_t>(m_offset);
-
- if (sizeof(off_t) <= 4 && m_offset != (os_offset_t) offs) {
- ib::error() << "file write at offset > 4 GB.";
- }
}
/** Destructor */
@@ -1092,7 +1195,7 @@ os_file_original_page_size(const byte* buf)
dberr_t
AIOHandler::check_read(Slot* slot, ulint n_bytes)
{
- dberr_t err;
+ dberr_t err=DB_SUCCESS;
ut_ad(slot->type.is_read());
ut_ad(slot->original_len > slot->len);
@@ -1119,10 +1222,29 @@ AIOHandler::check_read(Slot* slot, ulint n_bytes)
err = DB_FAIL;
}
+ } else if (is_encrypted_page(slot)) {
+ ut_a(slot->offset > 0);
+
+ slot->len = slot->original_len;
+#ifdef _WIN32
+ slot->n_bytes = static_cast<DWORD>(n_bytes);
+#else
+ slot->n_bytes = static_cast<ulint>(n_bytes);
+#endif /* _WIN32 */
+
+ err = io_complete(slot);
+ ut_a(err == DB_SUCCESS);
+
} else {
err = DB_FAIL;
}
+#ifdef MYSQL_COMPRESSION_ENCRYPTION
+ if (slot->buf_block != NULL) {
+ os_free_block(slot->buf_block);
+ slot->buf_block = NULL;
+ }
+#endif
return(err);
}
@@ -1131,7 +1253,7 @@ AIOHandler::check_read(Slot* slot, ulint n_bytes)
dberr_t
AIOHandler::post_io_processing(Slot* slot)
{
- dberr_t err;
+ dberr_t err=DB_SUCCESS;
ut_ad(slot->is_reserved);
@@ -1145,7 +1267,10 @@ AIOHandler::post_io_processing(Slot* slot)
&& slot->type.is_compressed()
&& slot->len == static_cast<ulint>(slot->n_bytes))) {
- if (!slot->type.is_log() && is_compressed_page(slot)) {
+#ifdef MYSQL_COMPRESSION
+ if (!slot->type.is_log()
+ && (is_compressed_page(slot)
+ || is_encrypted_page(slot))) {
ut_a(slot->offset > 0);
@@ -1170,6 +1295,11 @@ AIOHandler::post_io_processing(Slot* slot)
err = DB_SUCCESS;
}
+ if (slot->buf_block != NULL) {
+ os_free_block(slot->buf_block);
+ slot->buf_block = NULL;
+ }
+#endif /* MYSQL_COMPRESSION */
} else if ((ulint) slot->n_bytes == (ulint) slot->len) {
/* It *must* be a partial read. */
@@ -1220,6 +1350,7 @@ AIO::pending_io_count() const
return(reserved);
}
+#ifdef MYSQL_COMPRESSION
/** Compress a data page
#param[in] block_size File system block size
@param[in] src Source contents to compress
@@ -1241,7 +1372,7 @@ os_file_compress_page(
ulint compression_level = page_zip_level;
ulint page_type = mach_read_from_2(src + FIL_PAGE_TYPE);
- /* Must be divisible by the file system block size. */
+ /* The page size must be a multiple of the OS punch hole size. */
ut_ad(!(src_len % block_size));
/* Shouldn't compress an already compressed page. */
@@ -1255,7 +1386,6 @@ os_file_compress_page(
if (page_type == FIL_PAGE_RTREE
|| block_size == ULINT_UNDEFINED
|| compression.m_type == Compression::NONE
- || block_size >= src_len
|| src_len < block_size * 2) {
*dst_len = src_len;
@@ -1367,6 +1497,7 @@ os_file_compress_page(
return(dst);
}
+#endif /* MYSQL_COMPRESSION */
#ifdef UNIV_DEBUG
# ifndef UNIV_HOTBACKUP
@@ -1547,12 +1678,17 @@ AIO::release_with_mutex(Slot* slot)
}
/** Creates a temporary file. This function is like tmpfile(3), but
-@return temporary file handle, or NULL on error */
+the temporary file is created in the given parameter path. If the path
+is NULL then it will create the file in the MySQL server configuration
+parameter (--tmpdir).
+@param[in] path location for creating temporary file
+@@return temporary file handle, or NULL on error */
FILE*
-os_file_create_tmpfile()
+os_file_create_tmpfile(
+ const char* path)
{
FILE* file = NULL;
- int fd = innobase_mysql_tmpfile();
+ int fd = innobase_mysql_tmpfile(path);
if (fd >= 0) {
file = fdopen(fd, "w+b");
@@ -1613,6 +1749,7 @@ os_file_io_complete(
ulint offset,
ulint len)
{
+#ifdef MYSQL_ENCRYPTION_COMPRESSION
/* We never compress/decompress the first page */
ut_a(offset > 0);
ut_ad(type.validate());
@@ -1622,11 +1759,19 @@ os_file_io_complete(
return(DB_SUCCESS);
} else if (type.is_read()) {
+ dberr_t ret = DB_SUCCESS;
+ Encryption encryption(type.encryption_algorithm());
ut_ad(!type.is_log());
- return(os_file_decompress_page(
- type.is_dblwr_recover(), buf, scratch, len));
+ ret = encryption.decrypt(type, buf, src_len, scratch, len);
+ if (ret == DB_SUCCESS) {
+ return(os_file_decompress_page(
+ type.is_dblwr_recover(),
+ buf, scratch, len));
+ } else {
+ return(ret);
+ }
} else if (type.punch_hole()) {
@@ -1660,6 +1805,7 @@ os_file_io_complete(
}
ut_ad(!type.is_log());
+#endif /* MYSQL_ENCRYPTION_COMPRESSION */
return(DB_SUCCESS);
}
@@ -1961,6 +2107,7 @@ os_file_create_subdirs_if_needed(
return(success ? DB_SUCCESS : DB_ERROR);
}
+#ifdef MYSQL_COMPRESSION
/** Allocate the buffer for IO on a transparently compressed table.
@param[in] type IO flags
@param[out] buf buffer to read or write
@@ -1969,7 +2116,7 @@ os_file_create_subdirs_if_needed(
@return pointer to allocated page, compressed data is written to the offset
that is aligned on UNIV_SECTOR_SIZE of Block.m_ptr */
static
-byte*
+Block*
os_file_compress_page(
IORequest& type,
void*& buf,
@@ -1981,13 +2128,13 @@ os_file_compress_page(
ulint n_alloc = *n * 2;
- ut_a(n_alloc < UNIV_PAGE_SIZE_MAX * 2);
+ ut_a(n_alloc <= UNIV_PAGE_SIZE_MAX * 2);
#ifdef HAVE_LZ4
ut_a(type.compression_algorithm().m_type != Compression::LZ4
|| static_cast<ulint>(LZ4_COMPRESSBOUND(*n)) < n_alloc);
#endif
- byte* ptr = reinterpret_cast<byte*>(ut_malloc_nokey(n_alloc));
+ Block* ptr = reinterpret_cast<Block*>(ut_malloc_nokey(n_alloc));
if (ptr == NULL) {
return(NULL);
@@ -2009,7 +2156,7 @@ os_file_compress_page(
byte* compressed_page;
compressed_page = static_cast<byte*>(
- ut_align(ptr, UNIV_SECTOR_SIZE));
+ ut_align(block->m_ptr, UNIV_SECTOR_SIZE));
byte* buf_ptr;
@@ -2037,8 +2184,54 @@ os_file_compress_page(
}
}
- return(ptr);
+ return(block);
}
+#endif /* MYSQL_COMPRESSION */
+
+#ifdef MYSQL_ENCRYPTION
+/** Encrypt a page content when write it to disk.
+@param[in] type IO flags
+@param[out] buf buffer to read or write
+@param[in,out] n number of bytes to read/write, starting from
+ offset
+@return pointer to the encrypted page */
+static
+Block*
+os_file_encrypt_page(
+ const IORequest& type,
+ void*& buf,
+ ulint* n)
+{
+
+ byte* encrypted_page;
+ ulint encrypted_len = *n;
+ byte* buf_ptr;
+ Encryption encryption(type.encryption_algorithm());
+
+ ut_ad(!type.is_log());
+ ut_ad(type.is_write());
+ ut_ad(type.is_encrypted());
+
+ Block* block = os_alloc_block();
+
+ encrypted_page = static_cast<byte*>(
+ ut_align(block->m_ptr, UNIV_SECTOR_SIZE));
+
+ buf_ptr = encryption.encrypt(type,
+ reinterpret_cast<byte*>(buf), *n,
+ encrypted_page, &encrypted_len);
+
+ bool encrypted = buf_ptr != buf;
+
+ if (encrypted) {
+
+ buf = buf_ptr;
+ *n = encrypted_len;
+ }
+
+ return(block);
+}
+#endif /* MYSQL_ENCRYPTION */
#ifndef _WIN32
@@ -2272,7 +2465,7 @@ LinuxAIOHandler::check_state(Slot* slot)
ut_ad(slot->io_already_done);
- dberr_t err;
+ dberr_t err = DB_SUCCESS;
if (slot->ret == 0) {
@@ -2469,7 +2662,7 @@ LinuxAIOHandler::collect()
dberr_t
LinuxAIOHandler::poll(fil_node_t** m1, void** m2, IORequest* request)
{
- dberr_t err;
+ dberr_t err = DB_SUCCESS;
Slot* slot;
/* Loop until we have found a completed request. */
@@ -2722,7 +2915,7 @@ AIO::is_linux_native_aio_supported()
} else if (!srv_read_only_mode) {
/* Now check if tmpdir supports native aio ops. */
- fd = innobase_mysql_tmpfile();
+ fd = innobase_mysql_tmpfile(NULL);
if (fd < 0) {
ib::warn()
@@ -2733,7 +2926,7 @@ AIO::is_linux_native_aio_supported()
}
} else {
- os_normalize_path_for_win(srv_log_group_home_dir);
+ os_normalize_path(srv_log_group_home_dir);
ulint dirnamelen = strlen(srv_log_group_home_dir);
@@ -3522,11 +3715,11 @@ os_file_create_func(
} while (retry);
/* We disable OS caching (O_DIRECT) only on data files */
- if (!srv_read_only_mode
- && *success
- && type != OS_LOG_FILE
- && (srv_unix_file_flush_method == SRV_UNIX_O_DIRECT
- || srv_unix_file_flush_method == SRV_UNIX_O_DIRECT_NO_FSYNC)) {
+ if (!read_only
+ && *success
+ && (type != OS_LOG_FILE && type != OS_DATA_TEMP_FILE)
+ && (srv_unix_file_flush_method == SRV_UNIX_O_DIRECT
+ || srv_unix_file_flush_method == SRV_UNIX_O_DIRECT_NO_FSYNC)) {
os_file_set_nocache(file, name, mode_str);
}
@@ -5300,6 +5493,7 @@ AIO::simulated_put_read_threads_to_sleep()
#endif /* !_WIN32*/
+#ifdef MYSQL_COMPRESSION
/** Validate the type, offset and number of bytes to read *
@param[in] type IO flags
@param[in] offset Offset from start of the file
@@ -5321,6 +5515,7 @@ os_file_check_args(const IORequest& type, os_offset_t offset, ulint n)
ib::error() << "file write at offset > 4 GB.";
}
}
+#endif /* MYSQL_COMPRESSION */
/** Does a syncronous read or write depending upon the type specified
In case of partial reads/writes the function tries
@@ -5332,7 +5527,7 @@ NUM_RETRIES_ON_PARTIAL_IO times to read/write the complete data.
@param[in] n number of bytes to read, starting from offset
@param[out] err DB_SUCCESS or error code
@return number of bytes read/written, -1 if error */
-static __attribute__((warn_unused_result))
+static MY_ATTRIBUTE((warn_unused_result))
ssize_t
os_file_io(
const IORequest&in_type,
@@ -5342,26 +5537,45 @@ os_file_io(
os_offset_t offset,
dberr_t* err)
{
- byte* ptr;
ulint original_n = n;
IORequest type = in_type;
- byte* compressed_page;
+ byte* compressed_page=NULL;
ssize_t bytes_returned = 0;
+#ifdef MYSQL_COMPRESSION
+ Block* block=NULL;
if (type.is_compressed()) {
/* We don't compress the first page of any file. */
ut_ad(offset > 0);
- ptr = os_file_compress_page(type, buf, &n);
+ block = os_file_compress_page(type, buf, &n);
compressed_page = static_cast<byte*>(
- ut_align(ptr, UNIV_SECTOR_SIZE));
+ ut_align(block->m_ptr, UNIV_SECTOR_SIZE));
} else {
- ptr = NULL;
+ block = NULL;
compressed_page = NULL;
}
+#endif /* MYSQL_COMPRESSION */
+
+#ifdef MYSQL_ENCRYPTION
+ /* We do encryption after compression, since if we do encryption
+ before compression, the encrypted data will cause compression fail
+ or low compression rate. */
+ if (type.is_encrypted() && type.is_write()) {
+ /* We don't encrypt the first page of any file. */
+ Block* compressed_block = block;
+ ut_ad(offset > 0);
+
+ block = os_file_encrypt_page(type, buf, &n);
+
+ if (compressed_block != NULL) {
+ os_free_block(compressed_block);
+ }
+ }
+#endif /* MYSQL_ENCRYPTION */
SyncFileIO sync_file_io(file, buf, n, offset);
@@ -5387,14 +5601,15 @@ os_file_io(
compressed_page, original_n,
static_cast<ulint>(offset), n);
- if (ptr != NULL) {
- ut_free(ptr);
- }
-
} else {
*err = DB_SUCCESS;
}
+#ifdef MYSQL_COMPRESSION
+ if (block != NULL) {
+ os_free_block(block);
+ }
+#endif
return(original_n);
}
@@ -5422,9 +5637,11 @@ os_file_io(
sync_file_io.advance(n_bytes);
}
- if (ptr != NULL) {
- ut_free(ptr);
+#ifdef MYSQL_COMPRESSION
+ if (block != NULL) {
+ os_free_block(block);
}
+#endif
*err = DB_IO_ERROR;
@@ -5446,7 +5663,7 @@ os_file_io(
@param[in] offset file offset from the start where to read
@param[out] err DB_SUCCESS or error code
@return number of bytes written, -1 if error */
-static __attribute__((warn_unused_result))
+static MY_ATTRIBUTE((warn_unused_result))
ssize_t
os_file_pwrite(
IORequest& type,
@@ -5478,7 +5695,7 @@ os_file_pwrite(
@param[in] offset file offset from the start where to read
@param[in] n number of bytes to read, starting from offset
@return DB_SUCCESS if request was successful, false if fail */
-static __attribute__((warn_unused_result))
+static MY_ATTRIBUTE((warn_unused_result))
dberr_t
os_file_write_page(
IORequest& type,
@@ -5490,8 +5707,9 @@ os_file_write_page(
{
dberr_t err;
- os_file_check_args(type, offset, n);
-
+ ut_ad(type.validate());
+ ut_ad(n > 0);
+
ssize_t n_bytes = os_file_pwrite(type, file, buf, n, offset, &err);
if ((ulint) n_bytes != n && !os_has_said_disk_full) {
@@ -5530,7 +5748,7 @@ os_file_write_page(
@param[in] n number of bytes to read, starting from offset
@param[out] err DB_SUCCESS or error code
@return number of bytes read, -1 if error */
-static __attribute__((warn_unused_result))
+static MY_ATTRIBUTE((warn_unused_result))
ssize_t
os_file_pread(
IORequest& type,
@@ -5563,7 +5781,7 @@ os_file_pread(
@param[out] o number of bytes actually read
@param[in] exit_on_err if true then exit on error
@return DB_SUCCESS or error code */
-static __attribute__((warn_unused_result))
+static MY_ATTRIBUTE((warn_unused_result))
dberr_t
os_file_read_page(
IORequest& type,
@@ -5578,7 +5796,8 @@ os_file_read_page(
os_bytes_read_since_printout += n;
- os_file_check_args(type, offset, n);
+ ut_ad(type.validate());
+ ut_ad(n > 0);
for (;;) {
ssize_t n_bytes;
@@ -5595,6 +5814,7 @@ os_file_read_page(
} else if ((ulint) n_bytes == n) {
+#ifdef MYSQL_COMPRESSION
/** The read will succeed but decompress can fail
for various reasons. */
@@ -5607,11 +5827,14 @@ os_file_read_page(
} else {
return(err);
}
+#else
+ return(DB_SUCCESS);
+#endif /* MYSQL_COMPRESSION */
}
- ib::error()
- << "Tried to read " << n << " bytes at offset "
- << offset << " was only able to read" << n_bytes;
+ ib::error() << "Tried to read " << n
+ << " bytes at offset " << offset
+ << ", but was only able to read " << n_bytes;
if (exit_on_err) {
@@ -5664,7 +5887,7 @@ and the error type, if should_exit is true then on_error_silent is ignored.
@param[in] on_error_silent if true then don't print any message to the log
iff it is an unknown non-fatal error
@return true if we should retry the operation */
-static __attribute__((warn_unused_result))
+static MY_ATTRIBUTE((warn_unused_result))
bool
os_file_handle_error_cond_exit(
const char* name,
@@ -5785,9 +6008,9 @@ os_file_handle_error_no_exit(
message */
void
os_file_set_nocache(
- os_file_t fd __attribute__((unused)),
- const char* file_name __attribute__((unused)),
- const char* operation_name __attribute__((unused)))
+ os_file_t fd MY_ATTRIBUTE((unused)),
+ const char* file_name MY_ATTRIBUTE((unused)),
+ const char* operation_name MY_ATTRIBUTE((unused)))
{
/* some versions of Solaris may not have DIRECTIO_ON */
#if defined(UNIV_SOLARIS) && defined(DIRECTIO_ON)
@@ -6064,6 +6287,13 @@ os_file_punch_hole(
os_offset_t off,
os_offset_t len)
{
+ /* In this debugging mode, we act as if punch hole is supported,
+ and then skip any calls to actually punch a hole here.
+ In this way, Transparent Page Compression is still being tested. */
+ DBUG_EXECUTE_IF("ignore_punch_hole",
+ return(DB_SUCCESS);
+ );
+
#ifdef _WIN32
return(os_file_punch_hole_win32(fh, off, len));
#else
@@ -6085,6 +6315,13 @@ Note: On Windows we use the name and on Unices we use the file handle.
bool
os_is_sparse_file_supported(const char* path, os_file_t fh)
{
+ /* In this debugging mode, we act as if punch hole is supported,
+ then we skip any calls to actually punch a hole. In this way,
+ Transparent Page Compression is still being tested. */
+ DBUG_EXECUTE_IF("ignore_punch_hole",
+ return(true);
+ );
+
#ifdef _WIN32
return(os_is_sparse_file_supported_win32(path));
#else
@@ -6575,6 +6812,26 @@ os_aio_init(
}
#endif /* _WIN32 */
+ ut_a(block_cache == NULL);
+
+ block_cache = UT_NEW_NOKEY(Blocks(MAX_BLOCKS));
+
+ for (Blocks::iterator it = block_cache->begin();
+ it != block_cache->end();
+ ++it) {
+
+ ut_a(it->m_in_use == 0);
+ ut_a(it->m_ptr == NULL);
+
+ /* Allocate double of max page size memory, since
+ compress could generate more bytes than orgininal
+ data. */
+ it->m_ptr = static_cast<byte*>(
+ ut_malloc_nokey(BUFFER_BLOCK_SIZE));
+
+ ut_a(it->m_ptr != NULL);
+ }
+
return(AIO::start(limit, n_readers, n_writers, n_slots_sync));
}
@@ -6591,6 +6848,18 @@ os_aio_free()
ut_free(os_aio_segment_wait_events);
os_aio_segment_wait_events = 0;
os_aio_n_segments = 0;
+
+ for (Blocks::iterator it = block_cache->begin();
+ it != block_cache->end();
+ ++it) {
+
+ ut_a(it->m_in_use == 0);
+ ut_free(it->m_ptr);
+ }
+
+ UT_DELETE(block_cache);
+
+ block_cache = NULL;
}
/** Wakes up all async i/o threads so that they know to exit themselves in
@@ -6796,70 +7065,78 @@ AIO::reserve_slot(
slot->is_log = type.is_log();
slot->original_len = static_cast<uint32>(len);
slot->io_already_done = false;
+ slot->buf_block = NULL;
slot->buf = static_cast<byte*>(buf);
+#ifdef MYSQL_COMPRESSION
if (srv_use_native_aio
&& offset > 0
&& type.is_write()
&& type.is_compressed()) {
+ ulint compressed_len = len;
ut_ad(!type.is_log());
release();
- ulint compressed_len = len;
-
- ulint old_compressed_len;
-
- old_compressed_len = mach_read_from_2(
- slot->buf + FIL_PAGE_COMPRESS_SIZE_V1);
+ void* src_buf = slot->buf;
- if (old_compressed_len > 0) {
- old_compressed_len = ut_calc_align(
- old_compressed_len + FIL_PAGE_DATA,
- slot->type.block_size());
- }
-
- byte* ptr;
-
- ptr = os_file_compress_page(
- slot->type.compression_algorithm(),
- slot->type.block_size(),
- slot->buf,
- slot->len,
- slot->compressed_page,
+ slot->buf_block = os_file_compress_page(
+ type,
+ src_buf,
&compressed_len);
- if (ptr != buf) {
- /* Set new compressed size to uncompressed page. */
- memcpy(slot->buf + FIL_PAGE_COMPRESS_SIZE_V1,
- slot->compressed_page
- + FIL_PAGE_COMPRESS_SIZE_V1, 2);
+ slot->buf = static_cast<byte*>(src_buf);
+ slot->ptr = slot->buf;
#ifdef _WIN32
- slot->len = static_cast<DWORD>(compressed_len);
+ slot->len = static_cast<DWORD>(compressed_len);
#else
- slot->len = static_cast<ulint>(compressed_len);
+ slot->len = static_cast<ulint>(compressed_len);
#endif /* _WIN32 */
- slot->buf = slot->compressed_page;
- slot->ptr = slot->buf;
+ slot->skip_punch_hole = type.punch_hole();
- if (old_compressed_len > 0
- && compressed_len >= old_compressed_len) {
+ acquire();
+ }
+#endif /* MYSQL_COMPRESSION */
- ut_ad(old_compressed_len <= UNIV_PAGE_SIZE);
+#ifdef MYSQL_ENCRYPTION
+ /* We do encryption after compression, since if we do encryption
+ before compression, the encrypted data will cause compression fail
+ or low compression rate. */
+ if (srv_use_native_aio
+ && offset > 0
+ && type.is_write()
+ && type.is_encrypted()) {
+ ulint encrypted_len = slot->len;
+ Block* encrypted_block;
- slot->skip_punch_hole = true;
+ ut_ad(!type.is_log());
- } else {
- slot->skip_punch_hole = false;
- }
+ release();
- } else {
- slot->skip_punch_hole = false;
+ void* src_buf = slot->buf;
+ encrypted_block = os_file_encrypt_page(
+ type,
+ src_buf,
+ &encrypted_len);
+
+ if (slot->buf_block != NULL) {
+ os_free_block(slot->buf_block);
}
+ slot->buf_block = encrypted_block;
+ slot->buf = static_cast<byte*>(src_buf);
+ slot->ptr = slot->buf;
+
+#ifdef _WIN32
+ slot->len = static_cast<DWORD>(encrypted_len);
+#else
+ slot->len = static_cast<ulint>(encrypted_len);
+#endif /* _WIN32 */
+
acquire();
- }
+ }
+#endif /* MYSQL_ENCRYPTION */
#ifdef WIN_ASYNC_IO
{
@@ -7501,7 +7778,7 @@ public:
all data, and perform the I/O
@return the length of the buffer */
ulint allocate_buffer()
- __attribute__((warn_unused_result))
+ MY_ATTRIBUTE((warn_unused_result))
{
ulint len;
Slot* slot = first_slot();
@@ -7591,7 +7868,7 @@ public:
/** @return the first slot in the consecutive array */
Slot* first_slot()
- __attribute__((warn_unused_result))
+ MY_ATTRIBUTE((warn_unused_result))
{
ut_a(m_n_elems > 0);
@@ -7605,7 +7882,7 @@ public:
ulint check_pending(
ulint global_segment,
os_event_t event)
- __attribute__((warn_unused_result));
+ MY_ATTRIBUTE((warn_unused_result));
private:
/** Do the file read
@@ -8095,7 +8372,7 @@ os_aio_print(FILE* file)
for (ulint i = 0; i < srv_n_file_io_threads; ++i) {
fprintf(file, "I/O thread %lu state: %s (%s)",
- (ulong) i,
+ (ulint) i,
srv_io_thread_op_info[i],
srv_io_thread_function[i]);
@@ -8119,17 +8396,17 @@ os_aio_print(FILE* file)
fprintf(file,
"Pending flushes (fsync) log: %lu; buffer pool: %lu\n"
"%lu OS file reads, %lu OS file writes, %lu OS fsyncs\n",
- (ulong) fil_n_pending_log_flushes,
- (ulong) fil_n_pending_tablespace_flushes,
- (ulong) os_n_file_reads,
- (ulong) os_n_file_writes,
- (ulong) os_n_fsyncs);
+ (ulint) fil_n_pending_log_flushes,
+ (ulint) fil_n_pending_tablespace_flushes,
+ (ulint) os_n_file_reads,
+ (ulint) os_n_file_writes,
+ (ulint) os_n_fsyncs);
if (os_n_pending_writes != 0 || os_n_pending_reads != 0) {
fprintf(file,
"%lu pending preads, %lu pending pwrites\n",
(ulint) os_n_pending_reads,
- (ulong) os_n_pending_writes);
+ (ulint) os_n_pending_writes);
}
if (os_n_file_reads == os_n_file_reads_old) {
@@ -8144,7 +8421,7 @@ os_aio_print(FILE* file)
" %.2f writes/s, %.2f fsyncs/s\n",
(os_n_file_reads - os_n_file_reads_old)
/ time_elapsed,
- (ulong) avg_bytes_read,
+ (ulint) avg_bytes_read,
(os_n_file_writes - os_n_file_writes_old)
/ time_elapsed,
(os_n_fsyncs - os_n_fsyncs_old)
@@ -8276,9 +8553,38 @@ os_file_set_umask(ulint umask)
#endif
#include <zlib.h>
+#ifndef UNIV_INNOCHECKSUM
+#include <my_aes.h>
+#include <my_rnd.h>
+#include <mysqld.h>
+#include <mysql/service_mysql_keyring.h>
+#endif
+typedef byte Block;
+
+#ifdef MYSQL_COMPRESSION
+/** Allocate a page for sync IO
+@return pointer to page */
+static
+Block*
+os_alloc_block()
+{
+ return(reinterpret_cast<byte*>(malloc(UNIV_PAGE_SIZE_MAX * 2)));
+}
+
+/** Free a page after sync IO
+@param[in,own] block The block to free/release */
+static
+void
+os_free_block(Block* block)
+{
+ ut_free(block);
+}
+#endif
#endif /* !UNIV_INNOCHECKSUM */
+#ifdef MYSQL_COMPRESSION
+
/**
@param[in] type The compression type
@return the string representation */
@@ -8388,32 +8694,24 @@ Compression::deserialize(
return(DB_CORRUPTION);
}
- // FIXME: We should use TLS for this and reduce the malloc/free
- bool allocated;
+ Block* block;
/* The caller doesn't know what to expect */
if (dst == NULL) {
- /* Add a safety margin of an additional 50% */
- ulint n_bytes = header.m_original_size
- + (header.m_original_size / 2);
+ block = os_alloc_block();
-#ifndef UNIV_INNOCHECKSUM
- dst = reinterpret_cast<byte*>(ut_malloc_nokey(n_bytes));
+#ifdef UNIV_INNOCHECKSUM
+ dst = block;
#else
- dst = reinterpret_cast<byte*>(malloc(n_bytes));
-#endif /* !UNIV_INNOCHECKSUM */
-
- if (dst == NULL) {
-
- return(DB_OUT_OF_MEMORY);
- }
+ dst = block->m_ptr;
+#endif /* UNIV_INNOCHECKSUM */
- allocated = true;
} else {
- allocated = false;
+ block = NULL;
}
+ int ret;
Compression compression;
ulint len = header.m_original_size;
@@ -8427,8 +8725,8 @@ Compression::deserialize(
if (uncompress(dst, &zlen, ptr, header.m_compressed_size)
!= Z_OK) {
- if (allocated) {
- ut_free(dst);
+ if (block != NULL) {
+ os_free_block(block);
}
return(DB_IO_DECOMPRESS_FAIL);
@@ -8467,8 +8765,8 @@ Compression::deserialize(
if (ret < 0) {
- if (allocated) {
- ut_free(dst);
+ if (block != NULL) {
+ os_free_block(block);
}
return(DB_IO_DECOMPRESS_FAIL);
@@ -8487,8 +8785,8 @@ Compression::deserialize(
Compression::to_string(compression.m_type));
#endif /* !UNIV_INNOCHECKSUM */
- if (allocated) {
- ut_free(dst);
+ if (block != NULL) {
+ os_free_block(block);
}
return(DB_UNSUPPORTED);
@@ -8503,9 +8801,10 @@ Compression::deserialize(
src + (header.m_original_size + FIL_PAGE_DATA)
- FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4) == 0);
- if (allocated) {
- ut_free(dst);
+ if (block != NULL) {
+ os_free_block(block);
}
+
return(DB_SUCCESS);
}
@@ -8526,6 +8825,635 @@ os_file_decompress_page(
{
return(Compression::deserialize(dblwr_recover, src, dst, dst_len));
}
+#endif /* MYSQL_COMPRESSION */
+
+#ifdef MYSQL_ENCRYPTION
+
+/**
+@param[in] type The encryption type
+@return the string representation */
+const char*
+Encryption::to_string(Type type)
+{
+ switch(type) {
+ case NONE:
+ return("N");
+ case AES:
+ return("Y");
+ }
+
+ ut_ad(0);
+
+ return("<UNKNOWN>");
+}
+
+/** Generate random encryption value for key and iv.
+@param[in,out] value Encryption value */
+void Encryption::random_value(byte* value)
+{
+ ut_ad(value != NULL);
+
+ my_rand_buffer(value, ENCRYPTION_KEY_LEN);
+}
+
+/** Create new master key for key rotation.
+@param[in,out] master_key master key */
+void
+Encryption::create_master_key(byte** master_key)
+{
+#ifndef UNIV_INNOCHECKSUM
+ char* key_type = NULL;
+ size_t key_len;
+ char key_name[ENCRYPTION_MASTER_KEY_NAME_MAX_LEN];
+ int ret;
+
+ /* If uuid does not match with current server uuid,
+ set uuid as current server uuid. */
+ if (strcmp(uuid, server_uuid) != 0) {
+ memcpy(uuid, server_uuid, ENCRYPTION_SERVER_UUID_LEN);
+ }
+ memset(key_name, 0, ENCRYPTION_MASTER_KEY_NAME_MAX_LEN);
+
+ /* Generate new master key */
+ ut_snprintf(key_name, ENCRYPTION_MASTER_KEY_NAME_MAX_LEN,
+ "%s-%s-%lu", ENCRYPTION_MASTER_KEY_PRIFIX,
+ uuid, master_key_id + 1);
+
+ /* We call key ring API to generate master key here. */
+ ret = my_key_generate(key_name, "AES",
+ NULL, ENCRYPTION_KEY_LEN);
+
+ /* We call key ring API to get master key here. */
+ ret = my_key_fetch(key_name, &key_type, NULL,
+ reinterpret_cast<void**>(master_key),
+ &key_len);
+
+ if (ret || *master_key == NULL) {
+ ib::error() << "Encryption can't find master key, please check"
+ " the keyring plugin is loaded.";
+ *master_key = NULL;
+ } else {
+ master_key_id++;
+ }
+
+ if (key_type) {
+ my_free(key_type);
+ }
+#endif
+}
+
+/** Get master key by key id.
+@param[in] master_key_id master key id
+@param[in] srv_uuid uuid of server instance
+@param[in,out] master_key master key */
+void
+Encryption::get_master_key(ulint master_key_id,
+ char* srv_uuid,
+ byte** master_key)
+{
+#ifndef UNIV_INNOCHECKSUM
+ char* key_type = NULL;
+ size_t key_len;
+ char key_name[ENCRYPTION_MASTER_KEY_NAME_MAX_LEN];
+ int ret;
+
+ memset(key_name, 0, ENCRYPTION_MASTER_KEY_NAME_MAX_LEN);
+
+ if (srv_uuid != NULL) {
+ ut_snprintf(key_name, ENCRYPTION_MASTER_KEY_NAME_MAX_LEN,
+ "%s-%s-%lu", ENCRYPTION_MASTER_KEY_PRIFIX,
+ srv_uuid, master_key_id);
+ } else {
+ /* For compitable with 5.7.11, we need to get master key with
+ server id. */
+ memset(key_name, 0, ENCRYPTION_MASTER_KEY_NAME_MAX_LEN);
+ ut_snprintf(key_name, ENCRYPTION_MASTER_KEY_NAME_MAX_LEN,
+ "%s-%lu-%lu", ENCRYPTION_MASTER_KEY_PRIFIX,
+ server_id, master_key_id);
+ }
+
+ /* We call key ring API to get master key here. */
+ ret = my_key_fetch(key_name, &key_type, NULL,
+ reinterpret_cast<void**>(master_key), &key_len);
+
+ if (key_type) {
+ my_free(key_type);
+ }
+
+ if (ret) {
+ *master_key = NULL;
+ ib::error() << "Encryption can't find master key, please check"
+ " the keyring plugin is loaded.";
+ }
+
+#ifdef UNIV_ENCRYPT_DEBUG
+ if (!ret && *master_key) {
+ fprintf(stderr, "Fetched master key:%lu ", master_key_id);
+ ut_print_buf(stderr, *master_key, key_len);
+ fprintf(stderr, "\n");
+ }
+#endif /* DEBUG_TDE */
+
+#endif
+}
+
+/** Current master key id */
+ulint Encryption::master_key_id = 0;
+
+/** Current uuid of server instance */
+char Encryption::uuid[ENCRYPTION_SERVER_UUID_LEN + 1] = {0};
+
+/** Get current master key and master key id
+@param[in,out] master_key_id master key id
+@param[in,out] master_key master key
+@param[in,out] version encryption information version */
+void
+Encryption::get_master_key(ulint* master_key_id,
+ byte** master_key,
+ Encryption::Version* version)
+{
+#ifndef UNIV_INNOCHECKSUM
+ char* key_type = NULL;
+ size_t key_len;
+ char key_name[ENCRYPTION_MASTER_KEY_NAME_MAX_LEN];
+ int ret;
+
+ memset(key_name, 0, ENCRYPTION_KEY_LEN);
+ *version = Encryption::ENCRYPTION_VERSION_2;
+
+ if (Encryption::master_key_id == 0) {
+ /* If m_master_key is 0, means there's no encrypted
+ tablespace, we need to generate the first master key,
+ and store it to key ring. */
+ memset(uuid, 0, ENCRYPTION_SERVER_UUID_LEN + 1);
+ memcpy(uuid, server_uuid, ENCRYPTION_SERVER_UUID_LEN);
+
+ /* Prepare the server uuid. */
+ ut_snprintf(key_name, ENCRYPTION_MASTER_KEY_NAME_MAX_LEN,
+ "%s-%s-1", ENCRYPTION_MASTER_KEY_PRIFIX,
+ uuid);
+
+ /* We call key ring API to generate master key here. */
+ ret = my_key_generate(key_name, "AES",
+ NULL, ENCRYPTION_KEY_LEN);
+
+ /* We call key ring API to get master key here. */
+ ret = my_key_fetch(key_name, &key_type, NULL,
+ reinterpret_cast<void**>(master_key),
+ &key_len);
+
+ if (!ret && *master_key != NULL) {
+ Encryption::master_key_id++;
+ *master_key_id = Encryption::master_key_id;
+ }
+#ifdef UNIV_ENCRYPT_DEBUG
+ if (!ret && *master_key) {
+ fprintf(stderr, "Generated new master key:");
+ ut_print_buf(stderr, *master_key, key_len);
+ fprintf(stderr, "\n");
+ }
+#endif
+ } else {
+ *master_key_id = Encryption::master_key_id;
+
+ ut_snprintf(key_name, ENCRYPTION_MASTER_KEY_NAME_MAX_LEN,
+ "%s-%s-%lu", ENCRYPTION_MASTER_KEY_PRIFIX,
+ uuid, *master_key_id);
+
+ /* We call key ring API to get master key here. */
+ ret = my_key_fetch(key_name, &key_type, NULL,
+ reinterpret_cast<void**>(master_key),
+ &key_len);
+
+ /* For compitable with 5.7.11, we need to try to get master key with
+ server id when get master key with server uuid failure. */
+ if (ret || *master_key == NULL) {
+ if (key_type) {
+ my_free(key_type);
+ }
+
+ memset(key_name, 0,
+ ENCRYPTION_MASTER_KEY_NAME_MAX_LEN);
+ ut_snprintf(key_name, ENCRYPTION_MASTER_KEY_NAME_MAX_LEN,
+ "%s-%lu-%lu", ENCRYPTION_MASTER_KEY_PRIFIX,
+ server_id, *master_key_id);
+
+ ret = my_key_fetch(key_name, &key_type, NULL,
+ reinterpret_cast<void**>(master_key),
+ &key_len);
+ *version = Encryption::ENCRYPTION_VERSION_1;
+ }
+#ifdef UNIV_ENCRYPT_DEBUG
+ if (!ret && *master_key) {
+ fprintf(stderr, "Fetched master key:%lu ",
+ *master_key_id);
+ ut_print_buf(stderr, *master_key, key_len);
+ fprintf(stderr, "\n");
+ }
+#endif
+ }
+
+ if (ret) {
+ *master_key = NULL;
+ ib::error() << "Encryption can't find master key, please check"
+ " the keyring plugin is loaded.";
+ }
+
+ if (key_type) {
+ my_free(key_type);
+ }
+#endif
+}
+
+/** Check if page is encrypted page or not
+@param[in] page page which need to check
+@return true if it is a encrypted page */
+bool
+Encryption::is_encrypted_page(const byte* page)
+{
+ ulint page_type = mach_read_from_2(page + FIL_PAGE_TYPE);
+
+ return(page_type == FIL_PAGE_ENCRYPTED
+ || page_type == FIL_PAGE_COMPRESSED_AND_ENCRYPTED
+ || page_type == FIL_PAGE_ENCRYPTED_RTREE);
+}
+
+/** Encrypt the page data contents. Page type can't be
+FIL_PAGE_ENCRYPTED, FIL_PAGE_COMPRESSED_AND_ENCRYPTED,
+FIL_PAGE_ENCRYPTED_RTREE.
+@param[in] type IORequest
+@param[in,out] src page data which need to encrypt
+@param[in] src_len Size of the source in bytes
+@param[in,out] dst destination area
+@param[in,out] dst_len Size of the destination in bytes
+@return buffer data, dst_len will have the length of the data */
+byte*
+Encryption::encrypt(
+ const IORequest& type,
+ byte* src,
+ ulint src_len,
+ byte* dst,
+ ulint* dst_len)
+{
+ ulint len = 0;
+ ulint page_type = mach_read_from_2(src + FIL_PAGE_TYPE);
+ ulint data_len;
+ ulint main_len;
+ ulint remain_len;
+ byte remain_buf[MY_AES_BLOCK_SIZE * 2];
+
+#ifdef UNIV_ENCRYPT_DEBUG
+ ulint space_id =
+ mach_read_from_4(src + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+ ulint page_no = mach_read_from_4(src + FIL_PAGE_OFFSET);
+
+ fprintf(stderr, "Encrypting page:%lu.%lu len:%lu\n",
+ space_id, page_no, src_len);
+#endif
+
+ /* Shouldn't encrypte an already encrypted page. */
+ ut_ad(page_type != FIL_PAGE_ENCRYPTED
+ && page_type != FIL_PAGE_COMPRESSED_AND_ENCRYPTED
+ && page_type != FIL_PAGE_ENCRYPTED_RTREE);
+
+ ut_ad(m_type != Encryption::NONE);
+
+ /* This is data size which need to encrypt. */
+ data_len = src_len - FIL_PAGE_DATA;
+ main_len = (data_len / MY_AES_BLOCK_SIZE) * MY_AES_BLOCK_SIZE;
+ remain_len = data_len - main_len;
+
+ /* Only encrypt the data + trailer, leave the header alone */
+
+ switch (m_type) {
+ case Encryption::NONE:
+ ut_error;
+
+ case Encryption::AES: {
+ lint elen;
+
+ ut_ad(m_klen == ENCRYPTION_KEY_LEN);
+
+ elen = my_aes_encrypt(
+ src + FIL_PAGE_DATA,
+ static_cast<uint32>(main_len),
+ dst + FIL_PAGE_DATA,
+ reinterpret_cast<unsigned char*>(m_key),
+ static_cast<uint32>(m_klen),
+ my_aes_256_cbc,
+ reinterpret_cast<unsigned char*>(m_iv),
+ false);
+
+ if (elen == MY_AES_BAD_DATA) {
+ ulint page_no =mach_read_from_4(
+ src + FIL_PAGE_OFFSET);
+ ulint space_id = mach_read_from_4(
+ src + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+ *dst_len = src_len;
+#ifndef UNIV_INNOCHECKSUM
+ ib::warn()
+ << " Can't encrypt data of page,"
+ << " page no:" << page_no
+ << " space id:" << space_id;
+#else
+ fprintf(stderr, " Can't encrypt data of page,"
+ " page no:" ULINTPF
+ " space id:" ULINTPF,
+ page_no, space_id);
+#endif /* !UNIV_INNOCHECKSUM */
+ return(src);
+ }
+
+ len = static_cast<ulint>(elen);
+ ut_ad(len == main_len);
+
+ /* Copy remain bytes and page tailer. */
+ memcpy(dst + FIL_PAGE_DATA + len,
+ src + FIL_PAGE_DATA + len,
+ src_len - FIL_PAGE_DATA - len);
+
+ /* Encrypt the remain bytes. */
+ if (remain_len != 0) {
+ remain_len = MY_AES_BLOCK_SIZE * 2;
+
+ elen = my_aes_encrypt(
+ dst + FIL_PAGE_DATA + data_len - remain_len,
+ static_cast<uint32>(remain_len),
+ remain_buf,
+ reinterpret_cast<unsigned char*>(m_key),
+ static_cast<uint32>(m_klen),
+ my_aes_256_cbc,
+ reinterpret_cast<unsigned char*>(m_iv),
+ false);
+
+ if (elen == MY_AES_BAD_DATA) {
+ ulint page_no =mach_read_from_4(
+ src + FIL_PAGE_OFFSET);
+ ulint space_id = mach_read_from_4(
+ src + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+#ifndef UNIV_INNOCHECKSUM
+ ib::warn()
+ << " Can't encrypt data of page,"
+ << " page no:" << page_no
+ << " space id:" << space_id;
+#else
+ fprintf(stderr, " Can't encrypt data of page,"
+ " page no:" ULINTPF
+ " space id:" ULINTPF,
+ page_no, space_id);
+#endif /* !UNIV_INNOCHECKSUM */
+ *dst_len = src_len;
+ return(src);
+ }
+
+ memcpy(dst + FIL_PAGE_DATA + data_len - remain_len,
+ remain_buf, remain_len);
+ }
+
+
+ break;
+ }
+
+ default:
+ ut_error;
+ }
+
+ /* Copy the header as is. */
+ memmove(dst, src, FIL_PAGE_DATA);
+ ut_ad(memcmp(src, dst, FIL_PAGE_DATA) == 0);
+
+ /* Add encryption control information. Required for decrypting. */
+ if (page_type == FIL_PAGE_COMPRESSED) {
+ /* If the page is compressed, we don't need to save the
+ original type, since it is done in compression already. */
+ mach_write_to_2(dst + FIL_PAGE_TYPE,
+ FIL_PAGE_COMPRESSED_AND_ENCRYPTED);
+ ut_ad(memcmp(src+FIL_PAGE_TYPE+2,
+ dst+FIL_PAGE_TYPE+2,
+ FIL_PAGE_DATA-FIL_PAGE_TYPE-2) == 0);
+ } else if (page_type == FIL_PAGE_RTREE) {
+ /* If the page is R-tree page, we need to save original
+ type. */
+ mach_write_to_2(dst + FIL_PAGE_TYPE, FIL_PAGE_ENCRYPTED_RTREE);
+ } else{
+ mach_write_to_2(dst + FIL_PAGE_TYPE, FIL_PAGE_ENCRYPTED);
+ mach_write_to_2(dst + FIL_PAGE_ORIGINAL_TYPE_V1, page_type);
+ }
+
+#ifdef UNIV_ENCRYPT_DEBUG
+#ifndef UNIV_INNOCHECKSUM
+#if 0
+ byte* check_buf = static_cast<byte*>(ut_malloc_nokey(src_len));
+ byte* buf2 = static_cast<byte*>(ut_malloc_nokey(src_len));
+
+ memcpy(check_buf, dst, src_len);
+
+ dberr_t err = decrypt(type, check_buf, src_len, buf2, src_len);
+ if (err != DB_SUCCESS || memcmp(src + FIL_PAGE_DATA,
+ check_buf + FIL_PAGE_DATA,
+ src_len - FIL_PAGE_DATA) != 0) {
+ ut_print_buf(stderr, src, src_len);
+ ut_print_buf(stderr, check_buf, src_len);
+ ut_ad(0);
+ }
+ ut_free(buf2);
+ ut_free(check_buf);
+#endif
+ fprintf(stderr, "Encrypted page:%lu.%lu\n", space_id, page_no);
+#endif
+#endif
+ *dst_len = src_len;
+
+
+ return(dst);
+}
+
+/** Decrypt the page data contents. Page type must be FIL_PAGE_ENCRYPTED,
+if not then the source contents are left unchanged and DB_SUCCESS is returned.
+@param[in] type IORequest
+@param[in,out] src Data read from disk, decrypted data will be
+ copied to this page
+@param[in] src_len source data length
+@param[in,out] dst Scratch area to use for decryption
+@param[in] dst_len Size of the scratch area in bytes
+@return DB_SUCCESS or error code */
+dberr_t
+Encryption::decrypt(
+ const IORequest& type,
+ byte* src,
+ ulint src_len,
+ byte* dst,
+ ulint dst_len)
+{
+ ulint data_len;
+ ulint main_len;
+ ulint remain_len;
+ ulint original_type;
+ ulint page_type;
+ byte remain_buf[MY_AES_BLOCK_SIZE * 2];
+ Block* block;
+
+ /* Do nothing if it's not an encrypted table. */
+ if (!is_encrypted_page(src)) {
+ return(DB_SUCCESS);
+ }
+
+ /* For compressed page, we need to get the compressed size
+ for decryption */
+ page_type = mach_read_from_2(src + FIL_PAGE_TYPE);
+ if (page_type == FIL_PAGE_COMPRESSED_AND_ENCRYPTED) {
+ src_len = static_cast<uint16_t>(
+ mach_read_from_2(src + FIL_PAGE_COMPRESS_SIZE_V1))
+ + FIL_PAGE_DATA;
+#ifndef UNIV_INNOCHECKSUM
+ src_len = ut_calc_align(src_len, type.block_size());
+#endif
+ }
+#ifdef UNIV_ENCRYPT_DEBUG
+ ulint space_id =
+ mach_read_from_4(src + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+ ulint page_no = mach_read_from_4(src + FIL_PAGE_OFFSET);
+
+ fprintf(stderr, "Decrypting page:%lu.%lu len:%lu\n",
+ space_id, page_no, src_len);
+#endif
+
+ original_type = static_cast<uint16_t>(
+ mach_read_from_2(src + FIL_PAGE_ORIGINAL_TYPE_V1));
+
+ byte* ptr = src + FIL_PAGE_DATA;
+
+ /* The caller doesn't know what to expect */
+ if (dst == NULL) {
+
+ block = os_alloc_block();
+#ifdef UNIV_INNOCHECKSUM
+ dst = block;
+#else
+ dst = block->m_ptr;
+#endif /* UNIV_INNOCHECKSUM */
+
+ } else {
+ block = NULL;
+ }
+
+ data_len = src_len - FIL_PAGE_DATA;
+ main_len = (data_len / MY_AES_BLOCK_SIZE) * MY_AES_BLOCK_SIZE;
+ remain_len = data_len - main_len;
+
+ switch(m_type) {
+ case Encryption::AES: {
+ lint elen;
+
+ /* First decrypt the last 2 blocks data of data, since
+ data is no block aligned. */
+ if (remain_len != 0) {
+ ut_ad(m_klen == ENCRYPTION_KEY_LEN);
+
+ remain_len = MY_AES_BLOCK_SIZE * 2;
+
+ /* Copy the last 2 blocks. */
+ memcpy(remain_buf,
+ ptr + data_len - remain_len,
+ remain_len);
+
+ elen = my_aes_decrypt(
+ remain_buf,
+ static_cast<uint32>(remain_len),
+ dst + data_len - remain_len,
+ reinterpret_cast<unsigned char*>(m_key),
+ static_cast<uint32>(m_klen),
+ my_aes_256_cbc,
+ reinterpret_cast<unsigned char*>(m_iv),
+ false);
+ if (elen == MY_AES_BAD_DATA) {
+ if (block != NULL) {
+ os_free_block(block);
+ }
+
+ return(DB_IO_DECRYPT_FAIL);
+ }
+
+ /* Copy the other data bytes to temp area. */
+ memcpy(dst, ptr, data_len - remain_len);
+ } else {
+ ut_ad(data_len == main_len);
+
+ /* Copy the data bytes to temp area. */
+ memcpy(dst, ptr, data_len);
+ }
+
+ /* Then decrypt the main data */
+ elen = my_aes_decrypt(
+ dst,
+ static_cast<uint32>(main_len),
+ ptr,
+ reinterpret_cast<unsigned char*>(m_key),
+ static_cast<uint32>(m_klen),
+ my_aes_256_cbc,
+ reinterpret_cast<unsigned char*>(m_iv),
+ false);
+ if (elen == MY_AES_BAD_DATA) {
+
+ if (block != NULL) {
+ os_free_block(block);
+ }
+
+ return(DB_IO_DECRYPT_FAIL);
+ }
+
+ ut_ad(static_cast<ulint>(elen) == main_len);
+
+ /* Copy the remain bytes. */
+ memcpy(ptr + main_len, dst + main_len, data_len - main_len);
+
+ break;
+ }
+
+ default:
+#if !defined(UNIV_INNOCHECKSUM)
+ ib::error()
+ << "Encryption algorithm support missing: "
+ << Encryption::to_string(m_type);
+#else
+ fprintf(stderr, "Encryption algorithm support missing: %s\n",
+ Encryption::to_string(m_type));
+#endif /* !UNIV_INNOCHECKSUM */
+
+ if (block != NULL) {
+ os_free_block(block);
+ }
+
+ return(DB_UNSUPPORTED);
+ }
+
+ /* Restore the original page type. If it's a compressed and
+ encrypted page, just reset it as compressed page type, since
+ we will do uncompress later. */
+
+ if (page_type == FIL_PAGE_ENCRYPTED) {
+ mach_write_to_2(src + FIL_PAGE_TYPE, original_type);
+ mach_write_to_2(src + FIL_PAGE_ORIGINAL_TYPE_V1, 0);
+ } else if (page_type == FIL_PAGE_ENCRYPTED_RTREE) {
+ mach_write_to_2(src + FIL_PAGE_TYPE, FIL_PAGE_RTREE);
+ } else {
+ ut_ad(page_type == FIL_PAGE_COMPRESSED_AND_ENCRYPTED);
+ mach_write_to_2(src + FIL_PAGE_TYPE, FIL_PAGE_COMPRESSED);
+ }
+
+ if (block != NULL) {
+ os_free_block(block);
+ }
+
+#ifdef UNIV_ENCRYPT_DEBUG
+ fprintf(stderr, "Decrypted page:%lu.%lu\n", space_id, page_no);
+#endif
+
+ DBUG_EXECUTE_IF("ib_crash_during_decrypt_page", DBUG_SUICIDE(););
+
+ return(DB_SUCCESS);
+}
+#endif /* MYSQL_ENCRYPTION */
/** Normalizes a directory path for the current OS:
On Windows, we convert '/' to '\', else we convert '\' to '/'.