diff options
author | Vasil Dimov <vasil.dimov@oracle.com> | 2010-04-12 18:20:41 +0300 |
---|---|---|
committer | Vasil Dimov <vasil.dimov@oracle.com> | 2010-04-12 18:20:41 +0300 |
commit | c877ff39bceb4df96acf3e54f7c98a2bed12b8ee (patch) | |
tree | 04211a3e5734b73e9f94cff511a4a74ff87075f0 /storage/innobase/row/row0merge.c | |
parent | fe0828b3b8193e086abe740572c9b0cb2b7da671 (diff) | |
parent | 410e23a6af8b597cdda0890d6ed9008355edee7a (diff) | |
download | mariadb-git-c877ff39bceb4df96acf3e54f7c98a2bed12b8ee.tar.gz |
Import branches/innodb+ from SVN on top of storage/innobase.
Diffstat (limited to 'storage/innobase/row/row0merge.c')
-rw-r--r-- | storage/innobase/row/row0merge.c | 203 |
1 files changed, 142 insertions, 61 deletions
diff --git a/storage/innobase/row/row0merge.c b/storage/innobase/row/row0merge.c index 232211e5ce7..908d142c98f 100644 --- a/storage/innobase/row/row0merge.c +++ b/storage/innobase/row/row0merge.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2005, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 2005, 2010, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -57,6 +57,11 @@ Completed by Sunny Bains and Marko Makela #include "ut0sort.h" #include "handler0alter.h" +/* Ignore posix_fadvise() on those platforms where it does not exist */ +#if defined __WIN__ +# define posix_fadvise(fd, offset, len, advice) /* nothing */ +#endif /* __WIN__ */ + #ifdef UNIV_DEBUG /** Set these in order ot enable debug printout. */ /* @{ */ @@ -424,14 +429,13 @@ row_merge_dup_report( row_merge_dup_t* dup, /*!< in/out: for reporting duplicates */ const dfield_t* entry) /*!< in: duplicate index entry */ { - mrec_buf_t buf; + mrec_buf_t* buf; const dtuple_t* tuple; dtuple_t tuple_store; const rec_t* rec; const dict_index_t* index = dup->index; ulint n_fields= dict_index_get_n_fields(index); - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; + mem_heap_t* heap; ulint* offsets; ulint n_ext; @@ -441,22 +445,22 @@ row_merge_dup_report( return; } - rec_offs_init(offsets_); - /* Convert the tuple to a record and then to MySQL format. */ + heap = mem_heap_create((1 + REC_OFFS_HEADER_SIZE + n_fields) + * sizeof *offsets + + sizeof *buf); + + buf = mem_heap_alloc(heap, sizeof *buf); tuple = dtuple_from_fields(&tuple_store, entry, n_fields); n_ext = dict_index_is_clust(index) ? dtuple_get_n_ext(tuple) : 0; - rec = rec_convert_dtuple_to_rec(buf, index, tuple, n_ext); - offsets = rec_get_offsets(rec, index, offsets_, ULINT_UNDEFINED, - &heap); + rec = rec_convert_dtuple_to_rec(*buf, index, tuple, n_ext); + offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap); innobase_rec_to_mysql(dup->table, rec, index, offsets); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } + mem_heap_free(heap); } /*************************************************************//** @@ -627,22 +631,26 @@ row_merge_buf_write( } /******************************************************//** -Create a memory heap and allocate space for row_merge_rec_offsets(). +Create a memory heap and allocate space for row_merge_rec_offsets() +and mrec_buf_t[3]. @return memory heap */ static mem_heap_t* row_merge_heap_create( /*==================*/ const dict_index_t* index, /*!< in: record descriptor */ + mrec_buf_t** buf, /*!< out: 3 buffers */ ulint** offsets1, /*!< out: offsets */ ulint** offsets2) /*!< out: offsets */ { ulint i = 1 + REC_OFFS_HEADER_SIZE + dict_index_get_n_fields(index); - mem_heap_t* heap = mem_heap_create(2 * i * sizeof *offsets1); + mem_heap_t* heap = mem_heap_create(2 * i * sizeof **offsets1 + + 3 * sizeof **buf); - *offsets1 = mem_heap_alloc(heap, i * sizeof *offsets1); - *offsets2 = mem_heap_alloc(heap, i * sizeof *offsets2); + *buf = mem_heap_alloc(heap, 3 * sizeof **buf); + *offsets1 = mem_heap_alloc(heap, i * sizeof **offsets1); + *offsets2 = mem_heap_alloc(heap, i * sizeof **offsets2); (*offsets1)[0] = (*offsets2)[0] = i; (*offsets1)[1] = (*offsets2)[1] = dict_index_get_n_fields(index); @@ -704,6 +712,11 @@ row_merge_read( (ulint) (ofs & 0xFFFFFFFF), (ulint) (ofs >> 32), sizeof *buf); +#ifdef POSIX_FADV_DONTNEED + /* Each block is read exactly once. Free up the file cache. */ + posix_fadvise(fd, ofs, sizeof *buf, POSIX_FADV_DONTNEED); +#endif /* POSIX_FADV_DONTNEED */ + if (UNIV_UNLIKELY(!success)) { ut_print_timestamp(stderr); fprintf(stderr, @@ -734,6 +747,12 @@ row_merge_write( } #endif /* UNIV_DEBUG */ +#ifdef POSIX_FADV_DONTNEED + /* The block will be needed on the next merge pass, + but it can be evicted from the file cache meanwhile. */ + posix_fadvise(fd, ofs, sizeof *buf, POSIX_FADV_DONTNEED); +#endif /* POSIX_FADV_DONTNEED */ + return(UNIV_LIKELY(os_file_write("(merge)", OS_FILE_FROM_FD(fd), buf, (ulint) (ofs & 0xFFFFFFFF), (ulint) (ofs >> 32), @@ -1394,7 +1413,8 @@ row_merge_blocks( { mem_heap_t* heap; /*!< memory heap for offsets0, offsets1 */ - mrec_buf_t buf[3]; /*!< buffer for handling split mrec in block[] */ + mrec_buf_t* buf; /*!< buffer for handling + split mrec in block[] */ const byte* b0; /*!< pointer to block[0] */ const byte* b1; /*!< pointer to block[1] */ byte* b2; /*!< pointer to block[2] */ @@ -1414,7 +1434,7 @@ row_merge_blocks( } #endif /* UNIV_DEBUG */ - heap = row_merge_heap_create(index, &offsets0, &offsets1); + heap = row_merge_heap_create(index, &buf, &offsets0, &offsets1); /* Write a record and read the next record. Split the output file in two halves, which can be merged on the following pass. */ @@ -1500,7 +1520,7 @@ row_merge_blocks_copy( { mem_heap_t* heap; /*!< memory heap for offsets0, offsets1 */ - mrec_buf_t buf[3]; /*!< buffer for handling + mrec_buf_t* buf; /*!< buffer for handling split mrec in block[] */ const byte* b0; /*!< pointer to block[0] */ byte* b2; /*!< pointer to block[2] */ @@ -1518,7 +1538,7 @@ row_merge_blocks_copy( } #endif /* UNIV_DEBUG */ - heap = row_merge_heap_create(index, &offsets0, &offsets1); + heap = row_merge_heap_create(index, &buf, &offsets0, &offsets1); /* Write a record and read the next record. Split the output file in two halves, which can be merged on the following pass. */ @@ -1589,6 +1609,14 @@ row_merge( of.offset = 0; of.n_rec = 0; +#ifdef POSIX_FADV_SEQUENTIAL + /* The input file will be read sequentially, starting from the + beginning and the middle. In Linux, the POSIX_FADV_SEQUENTIAL + affects the entire file. Each block will be read exactly once. */ + posix_fadvise(file->fd, 0, 0, + POSIX_FADV_SEQUENTIAL | POSIX_FADV_NOREUSE); +#endif /* POSIX_FADV_SEQUENTIAL */ + /* Merge blocks to the output file. */ ohalf = 0; foffs0 = 0; @@ -1760,7 +1788,6 @@ row_merge_insert_index_tuples( int fd, /*!< in: file descriptor */ row_merge_block_t* block) /*!< in/out: file buffer */ { - mrec_buf_t buf; const byte* b; que_thr_t* thr; ins_node_t* node; @@ -1779,7 +1806,7 @@ row_merge_insert_index_tuples( trx->op_info = "inserting index entries"; - graph_heap = mem_heap_create(500); + graph_heap = mem_heap_create(500 + sizeof(mrec_buf_t)); node = ins_node_create(INS_DIRECT, table, graph_heap); thr = pars_complete_graph_for_exec(node, trx, graph_heap); @@ -1801,12 +1828,14 @@ row_merge_insert_index_tuples( if (!row_merge_read(fd, foffs, block)) { error = DB_CORRUPTION; } else { + mrec_buf_t* buf = mem_heap_alloc(graph_heap, sizeof *buf); + for (;;) { const mrec_t* mrec; dtuple_t* dtuple; ulint n_ext; - b = row_merge_read_rec(block, &buf, b, index, + b = row_merge_read_rec(block, buf, b, index, fd, &foffs, &mrec, offsets); if (UNIV_UNLIKELY(!b)) { /* End of list, or I/O error */ @@ -1977,14 +2006,12 @@ row_merge_drop_index( /* Drop the field definitions of the index. */ "DELETE FROM SYS_FIELDS WHERE INDEX_ID = :indexid;\n" /* Drop the index definition and the B-tree. */ - "DELETE FROM SYS_INDEXES WHERE ID = :indexid\n" - " AND TABLE_ID = :tableid;\n" + "DELETE FROM SYS_INDEXES WHERE ID = :indexid;\n" "END;\n"; ut_ad(index && table && trx); pars_info_add_dulint_literal(info, "indexid", index->id); - pars_info_add_dulint_literal(info, "tableid", table->id); trx_start_if_not_started(trx); trx->op_info = "dropping index"; @@ -2033,47 +2060,79 @@ row_merge_drop_temp_indexes(void) /*=============================*/ { trx_t* trx; - ulint err; - - /* We use the private SQL parser of Innobase to generate the - query graphs needed in deleting the dictionary data from system - tables in Innobase. Deleting a row from SYS_INDEXES table also - frees the file segments of the B-tree associated with the index. */ - static const char drop_temp_indexes[] = - "PROCEDURE DROP_TEMP_INDEXES_PROC () IS\n" - "indexid CHAR;\n" - "DECLARE CURSOR c IS SELECT ID FROM SYS_INDEXES\n" - "WHERE SUBSTR(NAME,0,1)='" TEMP_INDEX_PREFIX_STR "';\n" - "BEGIN\n" - "\tOPEN c;\n" - "\tWHILE 1=1 LOOP\n" - "\t\tFETCH c INTO indexid;\n" - "\t\tIF (SQL % NOTFOUND) THEN\n" - "\t\t\tEXIT;\n" - "\t\tEND IF;\n" - "\t\tDELETE FROM SYS_FIELDS WHERE INDEX_ID = indexid;\n" - "\t\tDELETE FROM SYS_INDEXES WHERE ID = indexid;\n" - "\tEND LOOP;\n" - "\tCLOSE c;\n" - "\tCOMMIT WORK;\n" - "END;\n"; + btr_pcur_t pcur; + mtr_t mtr; + /* Load the table definitions that contain partially defined + indexes, so that the data dictionary information can be checked + when accessing the tablename.ibd files. */ trx = trx_allocate_for_background(); trx->op_info = "dropping partially created indexes"; row_mysql_lock_data_dictionary(trx); - /* Incomplete transactions may be holding some locks on the - data dictionary tables. However, they should never have been - able to lock the records corresponding to the partially - created indexes that we are attempting to delete, because the - table was locked when the indexes were being created. We will - drop the partially created indexes before the rollback of - incomplete transactions is initiated. Thus, this should not - interfere with the incomplete transactions. */ - trx->isolation_level = TRX_ISO_READ_UNCOMMITTED; - err = que_eval_sql(NULL, drop_temp_indexes, FALSE, trx); - ut_a(err == DB_SUCCESS); + mtr_start(&mtr); + + btr_pcur_open_at_index_side( + TRUE, + dict_table_get_first_index(dict_sys->sys_indexes), + BTR_SEARCH_LEAF, &pcur, TRUE, &mtr); + + for (;;) { + const rec_t* rec; + const byte* field; + ulint len; + dulint table_id; + dict_table_t* table; + + btr_pcur_move_to_next_user_rec(&pcur, &mtr); + + if (!btr_pcur_is_on_user_rec(&pcur)) { + break; + } + + rec = btr_pcur_get_rec(&pcur); + field = rec_get_nth_field_old(rec, DICT_SYS_INDEXES_NAME_FIELD, + &len); + if (len == UNIV_SQL_NULL || len == 0 + || mach_read_from_1(field) != (ulint) TEMP_INDEX_PREFIX) { + continue; + } + + /* This is a temporary index. */ + + field = rec_get_nth_field_old(rec, 0/*TABLE_ID*/, &len); + if (len != 8) { + /* Corrupted TABLE_ID */ + continue; + } + + table_id = mach_read_from_8(field); + btr_pcur_store_position(&pcur, &mtr); + btr_pcur_commit_specify_mtr(&pcur, &mtr); + + table = dict_load_table_on_id(table_id); + + if (table) { + dict_index_t* index; + + for (index = dict_table_get_first_index(table); + index; index = dict_table_get_next_index(index)) { + + if (*index->name == TEMP_INDEX_PREFIX) { + row_merge_drop_index(index, table, trx); + trx_commit_for_mysql(trx); + } + } + } + + mtr_start(&mtr); + btr_pcur_restore_position(BTR_SEARCH_LEAF, + &pcur, &mtr); + } + + btr_pcur_close(&pcur); + mtr_commit(&mtr); row_mysql_unlock_data_dictionary(trx); trx_free_for_background(trx); } @@ -2086,9 +2145,22 @@ row_merge_file_create( /*==================*/ merge_file_t* merge_file) /*!< out: merge file structure */ { +#ifdef UNIV_PFS_IO + /* This temp file open does not go through normal + file APIs, add instrumentation to register with + performance schema */ + struct PSI_file_locker* locker = NULL; + register_pfs_file_open_begin(locker, innodb_file_temp_key, + PSI_FILE_OPEN, + "Innodb Merge Temp File", + __FILE__, __LINE__); +#endif merge_file->fd = innobase_mysql_tmpfile(); merge_file->offset = 0; merge_file->n_rec = 0; +#ifdef UNIV_PFS_IO + register_pfs_file_open_end(locker, merge_file->fd); +#endif } /*********************************************************************//** @@ -2099,10 +2171,19 @@ row_merge_file_destroy( /*===================*/ merge_file_t* merge_file) /*!< out: merge file structure */ { +#ifdef UNIV_PFS_IO + struct PSI_file_locker* locker = NULL; + register_pfs_file_io_begin(locker, merge_file->fd, 0, PSI_FILE_CLOSE, + __FILE__, __LINE__); +#endif if (merge_file->fd != -1) { close(merge_file->fd); merge_file->fd = -1; } + +#ifdef UNIV_PFS_IO + register_pfs_file_io_end(locker, 0); +#endif } /*********************************************************************//** |