diff options
Diffstat (limited to 'innobase/trx/trx0sys.c')
-rw-r--r-- | innobase/trx/trx0sys.c | 319 |
1 files changed, 318 insertions, 1 deletions
diff --git a/innobase/trx/trx0sys.c b/innobase/trx/trx0sys.c index 99ec5b50237..b056975d28a 100644 --- a/innobase/trx/trx0sys.c +++ b/innobase/trx/trx0sys.c @@ -19,9 +19,326 @@ Created 3/26/1996 Heikki Tuuri #include "trx0undo.h" #include "srv0srv.h" #include "trx0purge.h" +#include "log0log.h" /* The transaction system */ -trx_sys_t* trx_sys = NULL; +trx_sys_t* trx_sys = NULL; +trx_doublewrite_t* trx_doublewrite = NULL; + +/******************************************************************** +Creates or initialializes the doublewrite buffer at a database start. */ +static +void +trx_doublewrite_init( +/*=================*/ + byte* doublewrite) /* in: pointer to the doublewrite buf + header on trx sys page */ +{ + trx_doublewrite = mem_alloc(sizeof(trx_doublewrite_t)); + + mutex_create(&(trx_doublewrite->mutex)); + mutex_set_level(&(trx_doublewrite->mutex), SYNC_DOUBLEWRITE); + + trx_doublewrite->first_free = 0; + + trx_doublewrite->block1 = mach_read_from_4( + doublewrite + + TRX_SYS_DOUBLEWRITE_BLOCK1); + trx_doublewrite->block2 = mach_read_from_4( + doublewrite + + TRX_SYS_DOUBLEWRITE_BLOCK2); + trx_doublewrite->write_buf_unaligned = + ut_malloc( + (1 + 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) + * UNIV_PAGE_SIZE); + + trx_doublewrite->write_buf = ut_align( + trx_doublewrite->write_buf_unaligned, + UNIV_PAGE_SIZE); + trx_doublewrite->buf_block_arr = mem_alloc( + 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE + * sizeof(void*)); +} + +/******************************************************************** +Creates the doublewrite buffer at a database start. The header of the +doublewrite buffer is placed on the trx system header page. */ + +void +trx_sys_create_doublewrite_buf(void) +/*================================*/ +{ + page_t* page; + page_t* page2; + page_t* new_page; + byte* doublewrite; + byte* fseg_header; + ulint page_no; + ulint prev_page_no; + ulint i; + mtr_t mtr; + + if (trx_doublewrite) { + /* Already inited */ + + return; + } + +start_again: + mtr_start(&mtr); + + page = buf_page_get(TRX_SYS_SPACE, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr); + buf_page_dbg_add_level(page, SYNC_NO_ORDER_CHECK); + + doublewrite = page + TRX_SYS_DOUBLEWRITE; + + if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC) + == TRX_SYS_DOUBLEWRITE_MAGIC_N) { + + /* The doublewrite buffer has already been created: + just read in some numbers */ + + trx_doublewrite_init(doublewrite); + + mtr_commit(&mtr); + } else { + fprintf(stderr, + "InnoDB: Doublewrite buffer not found: creating new\n"); + + if (buf_pool_get_curr_size() < + (2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE + + FSP_EXTENT_SIZE / 2 + 100) + * UNIV_PAGE_SIZE) { + fprintf(stderr, + "InnoDB: Cannot create doublewrite buffer: you must\n" + "InnoDB: increase your buffer pool size.\n" + "InnoDB: Cannot continue operation.\n"); + + exit(1); + } + + page2 = fseg_create(TRX_SYS_SPACE, TRX_SYS_PAGE_NO, + TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_FSEG, &mtr); + + /* fseg_create acquires a second latch on the page, + therefore we must declare it: */ + + buf_page_dbg_add_level(page2, SYNC_NO_ORDER_CHECK); + + if (page2 == NULL) { + fprintf(stderr, + "InnoDB: Cannot create doublewrite buffer: you must\n" + "InnoDB: increase your tablespace size.\n" + "InnoDB: Cannot continue operation.\n"); + + /* We exit without committing the mtr to prevent + its modifications to the database getting to disk */ + + exit(1); + } + + fseg_header = page + TRX_SYS_DOUBLEWRITE + + TRX_SYS_DOUBLEWRITE_FSEG; + prev_page_no = 0; + + for (i = 0; i < 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE + + FSP_EXTENT_SIZE / 2; i++) { + page_no = fseg_alloc_free_page(fseg_header, + prev_page_no + 1, + FSP_UP, &mtr); + if (page_no == FIL_NULL) { + fprintf(stderr, + "InnoDB: Cannot create doublewrite buffer: you must\n" + "InnoDB: increase your tablespace size.\n" + "InnoDB: Cannot continue operation.\n"); + + exit(1); + } + + /* We read the allocated pages to the buffer pool; + when they are written to disk in a flush, the space + id and page number fields are also written to the + pages. When we at database startup read pages + from the doublewrite buffer, we know that if the + space id and page number in them are the same as + the page position in the tablespace, then the page + has not been written to in doublewrite. */ + + new_page = buf_page_get(TRX_SYS_SPACE, page_no, + RW_X_LATCH, &mtr); + buf_page_dbg_add_level(new_page, SYNC_NO_ORDER_CHECK); + + /* Make a dummy change to the page to ensure it will + be written to disk in a flush */ + + mlog_write_ulint(new_page + FIL_PAGE_DATA, + TRX_SYS_DOUBLEWRITE_MAGIC_N, + MLOG_4BYTES, &mtr); + + if (i == FSP_EXTENT_SIZE / 2) { + mlog_write_ulint(doublewrite + + TRX_SYS_DOUBLEWRITE_BLOCK1, + page_no, MLOG_4BYTES, &mtr); + mlog_write_ulint(doublewrite + + TRX_SYS_DOUBLEWRITE_REPEAT + + TRX_SYS_DOUBLEWRITE_BLOCK1, + page_no, MLOG_4BYTES, &mtr); + } else if (i == FSP_EXTENT_SIZE / 2 + + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) { + mlog_write_ulint(doublewrite + + TRX_SYS_DOUBLEWRITE_BLOCK2, + page_no, MLOG_4BYTES, &mtr); + mlog_write_ulint(doublewrite + + TRX_SYS_DOUBLEWRITE_REPEAT + + TRX_SYS_DOUBLEWRITE_BLOCK2, + page_no, MLOG_4BYTES, &mtr); + } else if (i > FSP_EXTENT_SIZE / 2) { + ut_a(page_no == prev_page_no + 1); + } + + prev_page_no = page_no; + } + + mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC, + TRX_SYS_DOUBLEWRITE_MAGIC_N, MLOG_4BYTES, &mtr); + mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC + + TRX_SYS_DOUBLEWRITE_REPEAT, + TRX_SYS_DOUBLEWRITE_MAGIC_N, MLOG_4BYTES, &mtr); + mtr_commit(&mtr); + + /* Flush the modified pages to disk and make a checkpoint */ + log_make_checkpoint_at(ut_dulint_max, TRUE); + + fprintf(stderr, "InnoDB: Doublewrite buffer created\n"); + + goto start_again; + } +} + +/******************************************************************** +At a database startup uses a possible doublewrite buffer to restore +half-written pages in the data files. */ + +void +trx_sys_doublewrite_restore_corrupt_pages(void) +/*===========================================*/ +{ + byte* buf; + byte* read_buf; + byte* unaligned_read_buf; + ulint block1; + ulint block2; + byte* page; + byte* doublewrite; + ulint space_id; + ulint page_no; + ulint i; + + /* We do the file i/o past the buffer pool */ + + unaligned_read_buf = ut_malloc(2 * UNIV_PAGE_SIZE); + read_buf = ut_align(unaligned_read_buf, UNIV_PAGE_SIZE); + + /* Read the trx sys header to check if we are using the + doublewrite buffer */ + + fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, TRX_SYS_PAGE_NO, 0, + UNIV_PAGE_SIZE, read_buf, NULL); + + doublewrite = read_buf + TRX_SYS_DOUBLEWRITE; + + if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC) + == TRX_SYS_DOUBLEWRITE_MAGIC_N) { + /* The doublewrite buffer has been created */ + + trx_doublewrite_init(doublewrite); + + block1 = trx_doublewrite->block1; + block2 = trx_doublewrite->block2; + + buf = trx_doublewrite->write_buf; + } else { + goto leave_func; + } + + /* Read the pages from the doublewrite buffer to memory */ + + fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, block1, 0, + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE, + buf, NULL); + fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, block2, 0, + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE, + buf + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE, + NULL); + /* Check if any of these pages is half-written in data files, in the + intended position */ + + page = buf; + + for (i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 2; i++) { + + space_id = mach_read_from_4(page + FIL_PAGE_SPACE); + page_no = mach_read_from_4(page + FIL_PAGE_OFFSET); + + if (!fil_check_adress_in_tablespace(space_id, page_no)) { + fprintf(stderr, + "InnoDB: Warning: an inconsistent page in the doublewrite buffer\n" + "InnoDB: space id %lu page number %lu, %lu'th page in dblwr buf.\n", + space_id, page_no, i); + + } else if (space_id == TRX_SYS_SPACE + && ( (page_no >= block1 + && page_no + < block1 + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) + || (page_no >= block2 + && page_no + < block2 + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE))) { + + /* It is an unwritten doublewrite buffer page: + do nothing */ + + } else { + /* Read in the actual page from the data files */ + + fil_io(OS_FILE_READ, TRUE, space_id, page_no, 0, + UNIV_PAGE_SIZE, read_buf, NULL); + /* Check if the page is corrupt */ + + if (buf_page_is_corrupted(read_buf)) { + + fprintf(stderr, + "InnoDB: Warning: database page corruption or a failed\n" + "InnoDB: file read of page %lu.\n", page_no); + fprintf(stderr, + "InnoDB: Trying to recover it from the doublewrite buffer.\n"); + + if (buf_page_is_corrupted(page)) { + fprintf(stderr, + "InnoDB: Also the page in the doublewrite buffer is corrupt.\n" + "InnoDB: Cannot continue operation.\n"); + exit(1); + } + + /* Write the good page from the + doublewrite buffer to the intended + position */ + + fil_io(OS_FILE_WRITE, TRUE, space_id, + page_no, 0, + UNIV_PAGE_SIZE, page, NULL); + fprintf(stderr, + "InnoDB: Recovered the page from the doublewrite buffer.\n"); + } + } + + page += UNIV_PAGE_SIZE; + } + + fil_flush_file_spaces(FIL_TABLESPACE); + +leave_func: + ut_free(unaligned_read_buf); +} /******************************************************************** Checks that trx is in the trx list. */ |