summaryrefslogtreecommitdiff
path: root/storage
diff options
context:
space:
mode:
Diffstat (limited to 'storage')
-rw-r--r--storage/maria/Makefile.am5
-rw-r--r--storage/maria/ha_maria.cc26
-rw-r--r--storage/maria/lockman.c12
-rw-r--r--storage/maria/ma_blockrec.c229
-rw-r--r--storage/maria/ma_blockrec.h1
-rw-r--r--storage/maria/ma_check.c106
-rw-r--r--storage/maria/ma_checkpoint.c1
-rw-r--r--storage/maria/ma_close.c3
-rw-r--r--storage/maria/ma_control_file.c29
-rw-r--r--storage/maria/ma_create.c19
-rw-r--r--storage/maria/ma_delete.c393
-rw-r--r--storage/maria/ma_ft_update.c9
-rw-r--r--storage/maria/ma_key_recover.c599
-rw-r--r--storage/maria/ma_key_recover.h64
-rw-r--r--storage/maria/ma_key_redo.c417
-rw-r--r--storage/maria/ma_locking.c35
-rw-r--r--storage/maria/ma_loghandler.c98
-rw-r--r--storage/maria/ma_loghandler.h23
-rw-r--r--storage/maria/ma_open.c49
-rw-r--r--storage/maria/ma_packrec.c2
-rw-r--r--storage/maria/ma_page.c282
-rwxr-xr-xstorage/maria/ma_pagecache.c213
-rw-r--r--storage/maria/ma_pagecache.h7
-rw-r--r--storage/maria/ma_panic.c2
-rw-r--r--storage/maria/ma_range.c4
-rw-r--r--storage/maria/ma_recovery.c358
-rw-r--r--storage/maria/ma_rt_index.c225
-rw-r--r--storage/maria/ma_rt_key.c3
-rw-r--r--storage/maria/ma_rt_split.c9
-rw-r--r--storage/maria/ma_search.c179
-rw-r--r--storage/maria/ma_sort.c5
-rw-r--r--storage/maria/ma_statrec.c2
-rw-r--r--storage/maria/ma_test1.c14
-rw-r--r--storage/maria/ma_test2.c12
-rwxr-xr-xstorage/maria/ma_test_all.sh2
-rw-r--r--storage/maria/ma_write.c1384
-rw-r--r--storage/maria/maria_chk.c4
-rw-r--r--storage/maria/maria_def.h105
-rw-r--r--storage/maria/unittest/ma_pagecache_single.c2
-rw-r--r--storage/myisam/mi_check.c24
-rw-r--r--storage/myisam/mi_checksum.c27
-rw-r--r--storage/myisam/mi_create.c4
-rw-r--r--storage/myisam/mi_open.c17
-rw-r--r--storage/myisam/mi_test2.c4
-rw-r--r--storage/myisam/myisamdef.h6
45 files changed, 4032 insertions, 982 deletions
diff --git a/storage/maria/Makefile.am b/storage/maria/Makefile.am
index 795784f31aa..090c3d783d9 100644
--- a/storage/maria/Makefile.am
+++ b/storage/maria/Makefile.am
@@ -62,7 +62,7 @@ noinst_HEADERS = maria_def.h ma_rt_index.h ma_rt_key.h ma_rt_mbr.h \
ma_control_file.h ha_maria.h ma_blockrec.h \
ma_loghandler.h ma_loghandler_lsn.h ma_pagecache.h \
ma_checkpoint.h ma_recovery.h ma_commit.h \
- trnman_public.h ma_check_standalone.h
+ trnman_public.h ma_check_standalone.h ma_key_recover.h
ma_test1_DEPENDENCIES= $(LIBRARIES)
ma_test1_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \
$(top_builddir)/storage/myisam/libmyisam.a \
@@ -103,7 +103,8 @@ ma_sp_test_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \
$(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@
libmaria_a_SOURCES = ma_init.c ma_open.c ma_extra.c ma_info.c ma_rkey.c \
ma_rnext.c ma_rnext_same.c \
- ma_search.c ma_page.c ma_key.c ma_locking.c \
+ ma_search.c ma_page.c ma_key_recover.c ma_key.c \
+ ma_locking.c \
ma_rrnd.c ma_scan.c ma_cache.c \
ma_statrec.c ma_packrec.c ma_dynrec.c \
ma_blockrec.c ma_bitmap.c \
diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc
index 1d36c05ee4c..f43e23da439 100644
--- a/storage/maria/ha_maria.cc
+++ b/storage/maria/ha_maria.cc
@@ -2131,7 +2131,7 @@ int ha_maria::create(const char *name, register TABLE *table_arg,
HA_CREATE_INFO *ha_create_info)
{
int error;
- uint create_flags= 0, records, i;
+ uint create_flags= 0, record_count, i;
char buff[FN_REFLEN];
MARIA_KEYDEF *keydef;
MARIA_COLUMNDEF *recinfo;
@@ -2159,7 +2159,7 @@ int ha_maria::create(const char *name, register TABLE *table_arg,
ER_ILLEGAL_HA_CREATE_OPTION,
"Row format set to PAGE because of TRANSACTIONAL=1 option");
- if ((error= table2maria(table_arg, &keydef, &recinfo, &records)))
+ if ((error= table2maria(table_arg, &keydef, &recinfo, &record_count)))
DBUG_RETURN(error); /* purecov: inspected */
bzero((char*) &create_info, sizeof(create_info));
create_info.max_rows= share->max_rows;
@@ -2204,7 +2204,7 @@ int ha_maria::create(const char *name, register TABLE *table_arg,
maria_create(fn_format(buff, name, "", "",
MY_UNPACK_FILENAME | MY_APPEND_EXT),
row_type, share->keys, keydef,
- records, recinfo,
+ record_count, recinfo,
0, (MARIA_UNIQUEDEF *) 0,
&create_info, create_flags);
@@ -2322,22 +2322,22 @@ uint ha_maria::checksum() const
}
-bool ha_maria::check_if_incompatible_data(HA_CREATE_INFO *info,
+bool ha_maria::check_if_incompatible_data(HA_CREATE_INFO *create_info,
uint table_changes)
{
uint options= table->s->db_options_in_use;
- if (info->auto_increment_value != stats.auto_increment_value ||
- info->data_file_name != data_file_name ||
- info->index_file_name != index_file_name ||
- maria_row_type(info) != data_file_type ||
+ if (create_info->auto_increment_value != stats.auto_increment_value ||
+ create_info->data_file_name != data_file_name ||
+ create_info->index_file_name != index_file_name ||
+ maria_row_type(create_info) != data_file_type ||
table_changes == IS_EQUAL_NO ||
table_changes & IS_EQUAL_PACK_LENGTH) // Not implemented yet
return COMPATIBLE_DATA_NO;
if ((options & (HA_OPTION_PACK_RECORD | HA_OPTION_CHECKSUM |
HA_OPTION_DELAY_KEY_WRITE)) !=
- (info->table_options & (HA_OPTION_PACK_RECORD | HA_OPTION_CHECKSUM |
+ (create_info->table_options & (HA_OPTION_PACK_RECORD | HA_OPTION_CHECKSUM |
HA_OPTION_DELAY_KEY_WRITE)))
return COMPATIBLE_DATA_NO;
return COMPATIBLE_DATA_YES;
@@ -2413,7 +2413,7 @@ static int ha_maria_init(void *p)
maria_recover() ||
ma_checkpoint_init(checkpoint_interval);
maria_multi_threaded= TRUE;
- return res;
+ return res ? HA_ERR_INITIALIZATION : 0;
}
@@ -2519,9 +2519,9 @@ static void update_checkpoint_interval(MYSQL_THD thd,
}
static SHOW_VAR status_variables[]= {
- {"Maria_pagecache_blocks_not_flushed", (char*) &maria_pagecache_var.global_blocks_changed, SHOW_LONG},
- {"Maria_pagecache_blocks_unused", (char*) &maria_pagecache_var.blocks_unused, SHOW_LONG},
- {"Maria_pagecache_blocks_used", (char*) &maria_pagecache_var.blocks_used, SHOW_LONG},
+ {"Maria_pagecache_blocks_not_flushed", (char*) &maria_pagecache_var.global_blocks_changed, SHOW_LONG_NOFLUSH},
+ {"Maria_pagecache_blocks_unused", (char*) &maria_pagecache_var.blocks_unused, SHOW_LONG_NOFLUSH},
+ {"Maria_pagecache_blocks_used", (char*) &maria_pagecache_var.blocks_used, SHOW_LONG_NOFLUSH},
{"Maria_pagecache_read_requests", (char*) &maria_pagecache_var.global_cache_r_requests, SHOW_LONGLONG},
{"Maria_pagecache_reads", (char*) &maria_pagecache_var.global_cache_read, SHOW_LONGLONG},
{"Maria_pagecache_write_requests", (char*) &maria_pagecache_var.global_cache_w_requests, SHOW_LONGLONG},
diff --git a/storage/maria/lockman.c b/storage/maria/lockman.c
index 8316d70bb29..c9b753fb492 100644
--- a/storage/maria/lockman.c
+++ b/storage/maria/lockman.c
@@ -247,7 +247,7 @@ static int lockfind(LOCK * volatile *head, LOCK *node,
{
uint32 hashnr, cur_hashnr;
uint64 resource, cur_resource;
- intptr link;
+ intptr cur_link;
my_bool cur_active, compatible, upgrading, prev_active;
enum lock_type lock, prev_lock, cur_lock;
uint16 loid, cur_loid;
@@ -276,10 +276,10 @@ retry:
if (!cursor->curr)
break;
do {
- link= cursor->curr->link;
- cursor->next= PTR(link);
+ cur_link= cursor->curr->link;
+ cursor->next= PTR(cur_link);
_lf_pin(pins, 0, cursor->next);
- } while(link != cursor->curr->link && LF_BACKOFF);
+ } while (cur_link != cursor->curr->link && LF_BACKOFF);
cur_hashnr= cursor->curr->hashnr;
cur_resource= cursor->curr->resource;
cur_lock= cursor->curr->lock;
@@ -290,7 +290,7 @@ retry:
(void)LF_BACKOFF;
goto retry;
}
- if (!DELETED(link))
+ if (!DELETED(cur_link))
{
if (cur_hashnr > hashnr ||
(cur_hashnr == hashnr && cur_resource >= resource))
@@ -429,7 +429,7 @@ static int lockinsert(LOCK * volatile *head, LOCK *node, LF_PINS *pins,
if (res & LOCK_UPGRADE)
cursor.upgrade_from->flags|= IGNORE_ME;
/*
- QQ: is this OK ? if a reader has already read upgrade_from,
+ QQ: is this OK ? if a reader has already read upgrade_from,
it may find it conflicting with node :(
- see the last test from test_lockman_simple()
*/
diff --git a/storage/maria/ma_blockrec.c b/storage/maria/ma_blockrec.c
index c418d1e71b2..af7bf4c1f5b 100644
--- a/storage/maria/ma_blockrec.c
+++ b/storage/maria/ma_blockrec.c
@@ -265,8 +265,9 @@
#include "maria_def.h"
#include "ma_blockrec.h"
-#include <lf.h>
#include "trnman.h"
+#include "ma_key_recover.h"
+#include <lf.h>
/*
Struct for having a cursor over a set of extent.
@@ -314,12 +315,7 @@ typedef struct st_maria_extent_cursor
trn->undo_lsn under log mutex, and needs to know the type of UNDO being
undone now to modify state.records under log mutex.
*/
-struct st_msg_to_write_hook_for_clr_end
-{
- LSN previous_undo_lsn;
- enum translog_record_type undone_record_type;
- ha_checksum checksum_delta;
-};
+
/** S:share,D:checksum_delta,E:expression,P:pointer_into_record,L:length */
#define store_checksum_in_rec(S,D,E,P,L) do \
{ \
@@ -498,12 +494,6 @@ my_bool _ma_init_block_record(MARIA_HA *info)
sizeof(MARIA_BITMAP_BLOCK),
ELEMENTS_RESERVED_FOR_MAIN_PART, 16))
goto err;
- /* The following should be big enough for all purposes */
- if (my_init_dynamic_array(&info->pinned_pages,
- sizeof(MARIA_PINNED_PAGE),
- max(info->s->base.blobs*2 + 4,
- MARIA_MAX_TREE_LEVELS*2), 16))
- goto err;
row->base_length= new_row->base_length= info->s->base_length;
/*
@@ -527,7 +517,6 @@ void _ma_end_block_record(MARIA_HA *info)
DBUG_ENTER("_ma_end_block_record");
my_free((uchar*) info->cur_row.empty_bits, MYF(MY_ALLOW_ZERO_PTR));
delete_dynamic(&info->bitmap_blocks);
- delete_dynamic(&info->pinned_pages);
my_free((uchar*) info->cur_row.extents, MYF(MY_ALLOW_ZERO_PTR));
/*
The data file is closed, when needed, in ma_once_end_block_record().
@@ -783,50 +772,6 @@ void copy_not_changed_fields(MARIA_HA *info, MY_BITMAP *changed_fields,
}
}
-
-/*
- Unpin all pinned pages
-
- SYNOPSIS
- _ma_unpin_all_pages()
- info Maria handler
- undo_lsn LSN for undo pages. LSN_IMPOSSIBLE if we shouldn't write undo
- (error)
-
- NOTE
- We unpin pages in the reverse order as they where pinned; This may not
- be strictly necessary but may simplify things in the future.
-
- RETURN
- 0 ok
- 1 error (fatal disk error)
-
-*/
-
-void _ma_unpin_all_pages(MARIA_HA *info, LSN undo_lsn)
-{
- MARIA_PINNED_PAGE *page_link= ((MARIA_PINNED_PAGE*)
- dynamic_array_ptr(&info->pinned_pages, 0));
- MARIA_PINNED_PAGE *pinned_page= page_link + info->pinned_pages.elements;
- DBUG_ENTER("_ma_unpin_all_pages");
- DBUG_PRINT("info", ("undo_lsn: %lu", (ulong) undo_lsn));
-
- /* True if not disk error */
- DBUG_ASSERT((undo_lsn != LSN_IMPOSSIBLE) || !info->s->now_transactional);
-
- if (!info->s->now_transactional)
- undo_lsn= LSN_IMPOSSIBLE; /* don't try to set a LSN on pages */
-
- while (pinned_page-- != page_link)
- pagecache_unlock_by_link(info->s->pagecache, pinned_page->link,
- pinned_page->unlock, PAGECACHE_UNPIN,
- info->trn->rec_lsn, undo_lsn);
-
- info->pinned_pages.elements= 0;
- DBUG_VOID_RETURN;
-}
-
-
#ifdef NOT_YET_NEEDED
/* Calculate empty space on a page */
@@ -843,23 +788,6 @@ static uint empty_space_on_page(uchar *buff, uint block_size)
}
#endif
-/**
- When we have finished the write/update/delete of a row, we have cleanups to
- do. For now it is signalling to Checkpoint that all dirtied pages have
- their rec_lsn set and page LSN set (_ma_unpin_all_pages() has been called),
- and that bitmap pages are correct (_ma_bitmap_release_unused() has been
- called).
-*/
-#define _ma_finalize_row(info) \
- do { info->trn->rec_lsn= LSN_IMPOSSIBLE; } while(0)
-/** unpinning is often the last operation before finalizing: */
-#define _ma_unpin_all_pages_and_finalize_row(info,undo_lsn) do \
- { \
- _ma_unpin_all_pages(info, undo_lsn); \
- _ma_finalize_row(info); \
- } while(0)
-
-
/*
Find free position in directory
@@ -1379,6 +1307,7 @@ static my_bool get_head_or_tail_page(MARIA_HA *info,
lock, &page_link.link)))
DBUG_RETURN(1);
page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
+ page_link.changed= 1;
push_dynamic(&info->pinned_pages, (void*) &page_link);
DBUG_ASSERT((res->buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == page_type);
@@ -1517,6 +1446,7 @@ static my_bool write_tail(MARIA_HA *info,
PAGECACHE_WRITE_DELAY, &page_link.link)))
{
page_link.unlock= PAGECACHE_LOCK_READ_UNLOCK;
+ page_link.changed= 1;
if (block_is_read)
{
/* Change the lock used when we read the page */
@@ -1879,7 +1809,6 @@ static my_bool free_full_page_range(MARIA_HA *info, ulonglong page, uint count)
TRANSLOG_INTERNAL_PARTS + 1, log_array,
log_data, NULL))
res= 1;
-
}
pthread_mutex_lock(&info->s->bitmap.bitmap_lock);
if (_ma_reset_full_page_bits(info, &info->s->bitmap, page,
@@ -2343,13 +2272,12 @@ static my_bool write_block_record(MARIA_HA *info,
head_block+1, bitmap_blocks->count - 1);
if (head_tail_block)
{
- ulong data_length= (tmp_data - info->rec_buff);
- uint length;
+ ulong block_length= (tmp_data - info->rec_buff);
uchar *extent_data;
- length= (uint) (data_length % FULL_PAGE_SIZE(block_size));
+ length= (uint) (block_length % FULL_PAGE_SIZE(block_size));
if (write_tail(info, head_tail_block,
- info->rec_buff + data_length - length,
+ info->rec_buff + block_length - length,
length))
goto disk_err;
tmp_data-= length; /* Remove the tail */
@@ -2393,7 +2321,7 @@ static my_bool write_block_record(MARIA_HA *info,
{
uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE];
LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
- size_t data_length= (size_t) (data - row_pos->data);
+ size_t block_length= (size_t) (data - row_pos->data);
/* Log REDO changes of head page */
page_store(log_data + FILEID_STORE_SIZE, head_block->page);
@@ -2402,9 +2330,9 @@ static my_bool write_block_record(MARIA_HA *info,
log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data;
log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
log_array[TRANSLOG_INTERNAL_PARTS + 1].str= (char*) row_pos->data;
- log_array[TRANSLOG_INTERNAL_PARTS + 1].length= data_length;
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].length= block_length;
if (translog_write_record(&lsn, LOGREC_REDO_INSERT_ROW_HEAD, info->trn,
- info, sizeof(log_data) + data_length,
+ info, sizeof(log_data) + block_length,
TRANSLOG_INTERNAL_PARTS + 2, log_array,
log_data, NULL))
goto disk_err;
@@ -2426,6 +2354,7 @@ static my_bool write_block_record(MARIA_HA *info,
PAGECACHE_WRITE_DELAY, &page_link.link))
goto disk_err;
page_link.unlock= PAGECACHE_LOCK_READ_UNLOCK;
+ page_link.changed= 1;
if (head_block_is_read)
{
/* Head page is always the first pinned page */
@@ -2477,12 +2406,12 @@ static my_bool write_block_record(MARIA_HA *info,
if (tmp_data_used)
{
/* Full head page */
- size_t data_length= (ulong) (tmp_data - info->rec_buff);
+ size_t block_length= (ulong) (tmp_data - info->rec_buff);
log_pos= store_page_range(log_pos, head_block+1, block_size,
- data_length, &extents);
+ block_length, &extents);
log_array_pos->str= (char*) info->rec_buff;
- log_array_pos->length= data_length;
- log_entry_length+= data_length;
+ log_array_pos->length= block_length;
+ log_entry_length+= block_length;
log_array_pos++;
sub_extents++;
}
@@ -2545,7 +2474,7 @@ static my_bool write_block_record(MARIA_HA *info,
}
/* Write UNDO or CLR record */
- lsn= 0;
+ lsn= LSN_IMPOSSIBLE;
if (share->now_transactional)
{
LEX_STRING *log_array= info->log_row_parts;
@@ -2609,6 +2538,8 @@ static my_bool write_block_record(MARIA_HA *info,
if (!old_record)
{
+ /* Store undo_lsn in case we are aborting the insert */
+ row->orig_undo_lsn= info->trn->undo_lsn;
/* Write UNDO log record for the INSERT */
if (translog_write_record(&lsn, LOGREC_UNDO_ROW_INSERT,
info->trn, info,
@@ -2711,7 +2642,7 @@ disk_err:
Unpin all pinned pages to not cause problems for disk cache. This is
safe to call even if we already called _ma_unpin_all_pages() above.
*/
- _ma_unpin_all_pages_and_finalize_row(info, 0);
+ _ma_unpin_all_pages_and_finalize_row(info, LSN_IMPOSSIBLE);
DBUG_RETURN(1);
}
@@ -2863,32 +2794,13 @@ my_bool _ma_write_abort_block_record(MARIA_HA *info)
if (share->now_transactional)
{
- TRANSLOG_HEADER_BUFFER rec;
LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
uchar log_data[LSN_STORE_SIZE + FILEID_STORE_SIZE +
CLR_TYPE_STORE_SIZE + HA_CHECKSUM_STORE_SIZE];
- int len;
struct st_msg_to_write_hook_for_clr_end msg;
- /*
- We do need the code above (delete_head_or_tail() etc) for
- non-transactional tables.
- For transactional tables we could skip this code above and just execute
- the UNDO_INSERT, but we try to have one code path.
- Write CLR record, because we are somehow undoing UNDO_ROW_INSERT.
- When we have logging for keys: as maria_write() first writes the row
- then the keys, and if failure, deletes the keys then the rows,
- info->trn->undo_lsn below will properly point to the UNDO of the
- UNDO_ROW_INSERT for this row.
- */
- if ((len= translog_read_record_header(info->trn->undo_lsn, &rec)) ==
- RECHEADER_READ_ERROR)
- {
- res= 1;
- goto end;
- }
- DBUG_ASSERT(rec.type == LOGREC_UNDO_ROW_INSERT);
- memcpy(log_data, rec.header, LSN_STORE_SIZE); /* previous UNDO LSN */
- msg.previous_undo_lsn= lsn_korr(rec.header);
+
+ lsn_store(log_data, info->cur_row.orig_undo_lsn);
+ msg.previous_undo_lsn= info->cur_row.orig_undo_lsn;
msg.undone_record_type= LOGREC_UNDO_ROW_INSERT;
clr_type_store(log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE,
LOGREC_UNDO_ROW_INSERT);
@@ -2907,7 +2819,6 @@ my_bool _ma_write_abort_block_record(MARIA_HA *info)
log_data + LSN_STORE_SIZE, &msg))
res= 1;
}
-end:
_ma_unpin_all_pages_and_finalize_row(info, lsn);
DBUG_RETURN(res);
}
@@ -2964,6 +2875,7 @@ static my_bool _ma_update_block_record2(MARIA_HA *info,
PAGECACHE_LOCK_WRITE, &page_link.link)))
DBUG_RETURN(1);
page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
+ page_link.changed= 1;
push_dynamic(&info->pinned_pages, (void*) &page_link);
org_empty_size= uint2korr(buff + EMPTY_SPACE_OFFSET);
@@ -3018,7 +2930,7 @@ static my_bool _ma_update_block_record2(MARIA_HA *info,
the head page
*/
head_length= uint2korr(dir + 2);
- if (buff[PAGE_TYPE_OFFSET] & PAGE_CAN_BE_COMPACTED && org_empty_size &&
+ if ((buff[PAGE_TYPE_OFFSET] & PAGE_CAN_BE_COMPACTED) && org_empty_size &&
(head_length < new_row->head_length ||
(new_row->total_length <= head_length &&
org_empty_size + head_length >= new_row->total_length)))
@@ -3047,14 +2959,13 @@ static my_bool _ma_update_block_record2(MARIA_HA *info,
DBUG_RETURN(res);
err:
- _ma_unpin_all_pages_and_finalize_row(info, 0);
+ _ma_unpin_all_pages_and_finalize_row(info, LSN_IMPOSSIBLE);
DBUG_RETURN(1);
}
/* Wrapper for _ma_update_block_record2() used by ma_update() */
-
my_bool _ma_update_block_record(MARIA_HA *info, MARIA_RECORD_POS record_pos,
const uchar *orig_rec, const uchar *new_rec)
{
@@ -3142,7 +3053,7 @@ static int delete_dir_entry(uchar *buff, uint block_size, uint record_number,
buff[PAGE_TYPE_OFFSET]= UNALLOCATED_PAGE;
#ifdef IDENTICAL_PAGES_AFTER_RECOVERY
{
- uchar *dir= dir_entry_pos(buff, block_size, record_number);
+ dir= dir_entry_pos(buff, block_size, record_number);
bzero(dir, (record_number+1) * DIR_ENTRY_SIZE);
}
#endif
@@ -3219,6 +3130,7 @@ static my_bool delete_head_or_tail(MARIA_HA *info,
PAGECACHE_LOCK_WRITE, &page_link.link)))
DBUG_RETURN(1);
page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
+ page_link.changed= 1;
push_dynamic(&info->pinned_pages, (void*) &page_link);
if (from_update)
@@ -3291,6 +3203,7 @@ static my_bool delete_head_or_tail(MARIA_HA *info,
}
/* The page is pinned with a read lock */
page_link.unlock= lock_at_unpin;
+ page_link.changed= 1;
set_dynamic(&info->pinned_pages, (void*) &page_link,
info->pinned_pages.elements-1);
@@ -3402,7 +3315,7 @@ my_bool _ma_delete_block_record(MARIA_HA *info, const uchar *record)
DBUG_RETURN(0);
err:
- _ma_unpin_all_pages_and_finalize_row(info, 0);
+ _ma_unpin_all_pages_and_finalize_row(info, LSN_IMPOSSIBLE);
DBUG_RETURN(1);
}
@@ -3579,6 +3492,7 @@ static uchar *read_next_extent(MARIA_HA *info, MARIA_EXTENT_CURSOR *extent,
{
/* Read during redo */
page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
+ page_link.changed= 1;
push_dynamic(&info->pinned_pages, (void*) &page_link);
}
@@ -3890,8 +3804,9 @@ int _ma_read_block_record2(MARIA_HA *info, uchar *record,
}
case FIELD_BLOB:
{
- uint size_length= column->length - portable_sizeof_char_ptr;
- ulong blob_length= _ma_calc_blob_length(size_length, field_length_data);
+ uint column_size_length= column->length - portable_sizeof_char_ptr;
+ ulong blob_length= _ma_calc_blob_length(column_size_length,
+ field_length_data);
if (!found_blob)
{
@@ -3920,10 +3835,10 @@ int _ma_read_block_record2(MARIA_HA *info, uchar *record,
blob_buffer= info->rec_buff;
}
- memcpy(field_pos, field_length_data, size_length);
- memcpy_fixed(field_pos + size_length, (uchar *) & blob_buffer,
+ memcpy(field_pos, field_length_data, column_size_length);
+ memcpy_fixed(field_pos + column_size_length, (uchar *) &blob_buffer,
sizeof(char*));
- field_length_data+= size_length;
+ field_length_data+= column_size_length;
/*
After we have read one extent, then each blob is in it's own extent
@@ -5061,16 +4976,21 @@ my_bool write_hook_for_clr_end(enum translog_record_type type
(struct st_msg_to_write_hook_for_clr_end *)hook_arg;
DBUG_ASSERT(trn->trid != 0);
trn->undo_lsn= msg->previous_undo_lsn;
- share->state.state.checksum+= msg->checksum_delta;
switch (msg->undone_record_type) {
case LOGREC_UNDO_ROW_DELETE:
share->state.state.records++;
+ share->state.state.checksum+= msg->checksum_delta;
break;
case LOGREC_UNDO_ROW_INSERT:
share->state.state.records--;
+ share->state.state.checksum+= msg->checksum_delta;
break;
case LOGREC_UNDO_ROW_UPDATE:
+ share->state.state.checksum+= msg->checksum_delta;
+ break;
+ case LOGREC_UNDO_KEY_INSERT:
+ case LOGREC_UNDO_KEY_DELETE:
break;
default:
DBUG_ASSERT(0);
@@ -5180,7 +5100,7 @@ uint _ma_apply_redo_insert_row_head_or_tail(MARIA_HA *info, LSN lsn,
pagecache_unlock_by_link(share->pagecache, page_link.link,
PAGECACHE_LOCK_WRITE_UNLOCK,
PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
- LSN_IMPOSSIBLE);
+ LSN_IMPOSSIBLE, 0);
DBUG_RETURN(my_errno);
}
/* Create new page */
@@ -5193,7 +5113,7 @@ uint _ma_apply_redo_insert_row_head_or_tail(MARIA_HA *info, LSN lsn,
pagecache_unlock_by_link(share->pagecache, page_link.link,
PAGECACHE_LOCK_WRITE_UNLOCK,
PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
- LSN_IMPOSSIBLE);
+ LSN_IMPOSSIBLE, 0);
/* Fix bitmap, just in case */
empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET);
if (_ma_bitmap_set(info, page, page_type == HEAD_PAGE, empty_space))
@@ -5299,7 +5219,7 @@ err:
pagecache_unlock_by_link(share->pagecache, page_link.link,
PAGECACHE_LOCK_WRITE_UNLOCK,
PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
- LSN_IMPOSSIBLE);
+ LSN_IMPOSSIBLE, 0);
DBUG_RETURN(HA_ERR_WRONG_IN_RECORD);
}
@@ -5351,7 +5271,7 @@ uint _ma_apply_redo_purge_row_head_or_tail(MARIA_HA *info, LSN lsn,
pagecache_unlock_by_link(share->pagecache, page_link.link,
PAGECACHE_LOCK_WRITE_UNLOCK,
PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
- LSN_IMPOSSIBLE);
+ LSN_IMPOSSIBLE, 0);
DBUG_RETURN(my_errno);
}
@@ -5365,7 +5285,7 @@ uint _ma_apply_redo_purge_row_head_or_tail(MARIA_HA *info, LSN lsn,
pagecache_unlock_by_link(share->pagecache, page_link.link,
PAGECACHE_LOCK_WRITE_UNLOCK,
PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
- LSN_IMPOSSIBLE);
+ LSN_IMPOSSIBLE, 0);
if ((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == page_type)
{
@@ -5401,7 +5321,7 @@ err:
pagecache_unlock_by_link(share->pagecache, page_link.link,
PAGECACHE_LOCK_WRITE_UNLOCK,
PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
- LSN_IMPOSSIBLE);
+ LSN_IMPOSSIBLE, 0);
DBUG_RETURN(HA_ERR_WRONG_IN_RECORD);
}
@@ -5426,7 +5346,7 @@ uint _ma_apply_redo_free_blocks(MARIA_HA *info,
{
MARIA_SHARE *share= info->s;
uint ranges;
- DBUG_ENTER("_ma_apply_redo_purge_blocks");
+ DBUG_ENTER("_ma_apply_redo_free_blocks");
ranges= pagerange_korr(header);
header+= PAGERANGE_STORE_SIZE;
@@ -5493,7 +5413,7 @@ uint _ma_apply_redo_free_head_or_tail(MARIA_HA *info, LSN lsn,
pagecache_unlock_by_link(share->pagecache, page_link.link,
PAGECACHE_LOCK_WRITE_UNLOCK,
PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
- LSN_IMPOSSIBLE);
+ LSN_IMPOSSIBLE, 0);
DBUG_RETURN(1);
}
if (lsn_korr(buff) >= lsn)
@@ -5502,7 +5422,7 @@ uint _ma_apply_redo_free_head_or_tail(MARIA_HA *info, LSN lsn,
pagecache_unlock_by_link(share->pagecache, page_link.link,
PAGECACHE_LOCK_WRITE_UNLOCK,
PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
- LSN_IMPOSSIBLE);
+ LSN_IMPOSSIBLE, 0);
}
else
{
@@ -5618,7 +5538,7 @@ uint _ma_apply_redo_insert_row_blobs(MARIA_HA *info,
pagecache_unlock_by_link(share->pagecache, page_link.link,
PAGECACHE_LOCK_WRITE_UNLOCK,
PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
- LSN_IMPOSSIBLE);
+ LSN_IMPOSSIBLE, 0);
DBUG_RETURN(my_errno);
}
/* Physical file was too short; Create new page */
@@ -5634,7 +5554,7 @@ uint _ma_apply_redo_insert_row_blobs(MARIA_HA *info,
pagecache_unlock_by_link(share->pagecache, page_link.link,
PAGECACHE_LOCK_WRITE_UNLOCK,
PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
- LSN_IMPOSSIBLE);
+ LSN_IMPOSSIBLE, 0);
continue;
}
}
@@ -5685,15 +5605,12 @@ my_bool _ma_apply_undo_row_insert(MARIA_HA *info, LSN undo_lsn,
{
ulonglong page;
uint rownr;
- LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
- uchar log_data[LSN_STORE_SIZE + FILEID_STORE_SIZE +
- CLR_TYPE_STORE_SIZE + HA_CHECKSUM_STORE_SIZE],
- *buff;
+ uchar *buff;
my_bool res= 1;
MARIA_PINNED_PAGE page_link;
- LSN lsn;
MARIA_SHARE *share= info->s;
- struct st_msg_to_write_hook_for_clr_end msg;
+ ha_checksum checksum;
+ LSN lsn;
DBUG_ENTER("_ma_apply_undo_row_insert");
page= page_korr(header);
@@ -5710,6 +5627,7 @@ my_bool _ma_apply_undo_row_insert(MARIA_HA *info, LSN undo_lsn,
DBUG_RETURN(1);
page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
+ page_link.changed= 1;
push_dynamic(&info->pinned_pages, (void*) &page_link);
if (read_row_extent_info(info, buff, rownr))
@@ -5722,26 +5640,11 @@ my_bool _ma_apply_undo_row_insert(MARIA_HA *info, LSN undo_lsn,
if (info->cur_row.extents && free_full_pages(info, &info->cur_row))
goto err;
- /* undo_lsn must be first for compression to work */
- lsn_store(log_data, undo_lsn);
- clr_type_store(log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE,
- LOGREC_UNDO_ROW_INSERT);
- log_array[TRANSLOG_INTERNAL_PARTS + 0].length=
- sizeof(log_data) - HA_CHECKSUM_STORE_SIZE;
- msg.undone_record_type= LOGREC_UNDO_ROW_INSERT;
- msg.previous_undo_lsn= undo_lsn;
- store_checksum_in_rec(share, msg.checksum_delta,
- - ha_checksum_korr(header),
- log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE +
- CLR_TYPE_STORE_SIZE,
- log_array[TRANSLOG_INTERNAL_PARTS + 0].length);
- log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data;
-
- if (translog_write_record(&lsn, LOGREC_CLR_END,
- info->trn, info, log_array[TRANSLOG_INTERNAL_PARTS
- + 0].length,
- TRANSLOG_INTERNAL_PARTS + 1, log_array,
- log_data + LSN_STORE_SIZE, &msg))
+ checksum= 0;
+ if (share->calc_checksum)
+ checksum= -ha_checksum_korr(header);
+ if (_ma_write_clr(info, undo_lsn, LOGREC_UNDO_ROW_INSERT,
+ share->calc_checksum != 0, checksum, &lsn))
goto err;
res= 0;
@@ -5754,7 +5657,8 @@ err:
/* Execute undo of a row delete (insert the row back somewhere) */
my_bool _ma_apply_undo_row_delete(MARIA_HA *info, LSN undo_lsn,
- const uchar *header, size_t length)
+ const uchar *header,
+ size_t header_length __attribute__((unused)))
{
uchar *record;
const uchar *null_bits, *field_length_data;
@@ -5853,6 +5757,8 @@ my_bool _ma_apply_undo_row_delete(MARIA_HA *info, LSN undo_lsn,
header+= column->length;
break;
case FIELD_SKIP_ENDSPACE: /* CHAR */
+ {
+ uint length;
if (column->length <= 255)
length= (uint) *field_length_data++;
else
@@ -5868,6 +5774,7 @@ my_bool _ma_apply_undo_row_delete(MARIA_HA *info, LSN undo_lsn,
' ');
header+= length;
break;
+ }
case FIELD_VARCHAR:
{
uint length;
diff --git a/storage/maria/ma_blockrec.h b/storage/maria/ma_blockrec.h
index cd49a65a0c8..7f3bf11d96c 100644
--- a/storage/maria/ma_blockrec.h
+++ b/storage/maria/ma_blockrec.h
@@ -13,7 +13,6 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
-
/*
Storage of records in block
*/
diff --git a/storage/maria/ma_check.c b/storage/maria/ma_check.c
index b3090b98deb..0f970e77a0f 100644
--- a/storage/maria/ma_check.c
+++ b/storage/maria/ma_check.c
@@ -92,7 +92,7 @@ static int _ma_safe_scan_block_record(MARIA_SORT_INFO *sort_info,
static void copy_data_file_state(MARIA_STATE_INFO *to,
MARIA_STATE_INFO *from);
static int write_log_record_for_repair(const HA_CHECK *param, MARIA_HA *info);
-
+static void report_keypage_fault(HA_CHECK *param, my_off_t position);
void maria_chk_init(HA_CHECK *param)
{
@@ -218,7 +218,7 @@ int maria_chk_del(HA_CHECK *param, register MARIA_HA *info, uint test_flag)
else
{
param->record_checksum+=(ha_checksum) next_link;
- next_link= _ma_rec_pos(info->s, buff+1);
+ next_link= _ma_rec_pos(info, buff+1);
empty+=info->s->base.pack_reclength;
}
}
@@ -259,7 +259,7 @@ wrong:
/* Check delete links in index file */
-static int check_k_link(HA_CHECK *param, register MARIA_HA *info,
+static int check_k_link(HA_CHECK *param, register MARIA_HA *info,
my_off_t next_link)
{
uint block_size= info->s->block_size;
@@ -287,7 +287,7 @@ static int check_k_link(HA_CHECK *param, register MARIA_HA *info,
DBUG_RETURN(1);
/* purecov: end */
}
-
+
/* Key blocks must be aligned at block_size */
if (next_link & (block_size -1))
{
@@ -443,8 +443,8 @@ int maria_chk_key(HA_CHECK *param, register MARIA_HA *info)
if (!(param->testflag & T_SILENT)) puts("- check index reference");
all_keydata=all_totaldata=key_totlength=0;
- old_record_checksum=0;
init_checksum=param->record_checksum;
+ old_record_checksum=0;
if (share->data_file_type == STATIC_RECORD)
old_record_checksum= (calc_checksum(info->state->records +
info->state->del-1) *
@@ -474,14 +474,17 @@ int maria_chk_key(HA_CHECK *param, register MARIA_HA *info)
printf ("- check data record references index: %d\n",key+1);
if (keyinfo->flag & HA_FULLTEXT)
full_text_keys++;
- if (share->state.key_root[key] == HA_OFFSET_ERROR &&
- (info->state->records == 0 || keyinfo->flag & HA_FULLTEXT))
+ if (share->state.key_root[key] == HA_OFFSET_ERROR)
+ {
+ if (info->state->records != 0 && !(keyinfo->flag & HA_FULLTEXT))
+ _ma_check_print_error(param, "Key tree %u is empty", key + 1);
goto do_stat;
- if (!_ma_fetch_keypage(info,keyinfo,share->state.key_root[key],
- DFLT_INIT_HITS,info->buff,0))
+ }
+ if (!_ma_fetch_keypage(info, keyinfo, share->state.key_root[key],
+ PAGECACHE_LOCK_LEFT_UNLOCKED, DFLT_INIT_HITS,
+ info->buff, 0, 0))
{
- _ma_check_print_error(param,"Can't read indexpage from filepos: %s",
- llstr(share->state.key_root[key],buff));
+ report_keypage_fault(param, share->state.key_root[key]);
if (!(param->testflag & T_INFO))
DBUG_RETURN(-1);
result= -1;
@@ -513,7 +516,9 @@ int maria_chk_key(HA_CHECK *param, register MARIA_HA *info)
else if (old_record_checksum != param->record_checksum)
{
if (key)
- _ma_check_print_error(param,"Key %u doesn't point at same records that key 1",
+ _ma_check_print_error(param,
+ "Key %u doesn't point at same records as "
+ "key 1",
key+1);
else
_ma_check_print_error(param,"Key 1 doesn't point at all records");
@@ -600,6 +605,7 @@ do_stat:
} /* maria_chk_key */
+
static int chk_index_down(HA_CHECK *param, MARIA_HA *info,
MARIA_KEYDEF *keyinfo,
my_off_t page, uchar *buff, ha_rows *keys,
@@ -638,10 +644,10 @@ static int chk_index_down(HA_CHECK *param, MARIA_HA *info,
/* purecov: end */
}
- if (!_ma_fetch_keypage(info,keyinfo,page, DFLT_INIT_HITS,buff,0))
+ if (!_ma_fetch_keypage(info, keyinfo, page, PAGECACHE_LOCK_LEFT_UNLOCKED,
+ DFLT_INIT_HITS, buff, 0, 0))
{
- _ma_check_print_error(param,"Can't read key from filepos: %s",
- llstr(page,llbuff));
+ report_keypage_fault(param, page);
goto err;
}
param->key_file_blocks+=keyinfo->block_length;
@@ -1068,7 +1074,7 @@ static int check_static_record(HA_CHECK *param, MARIA_HA *info, int extend,
my_off_t start_recpos, pos;
char llbuff[22];
- pos= 0;
+ pos= 0;
while (pos < info->state->data_file_length)
{
if (*_ma_killed_ptr(param))
@@ -1114,7 +1120,7 @@ static int check_dynamic_record(HA_CHECK *param, MARIA_HA *info, int extend,
LINT_INIT(start_recpos);
LINT_INIT(to);
- pos= 0;
+ pos= 0;
while (pos < info->state->data_file_length)
{
my_bool got_error= 0;
@@ -1217,7 +1223,7 @@ static int check_dynamic_record(HA_CHECK *param, MARIA_HA *info, int extend,
if (_ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size,
block_info.rec_len +
info->s->base.extra_rec_buff_size))
-
+
{
_ma_check_print_error(param,
"Not enough memory (%lu) for blob at %s",
@@ -1302,7 +1308,7 @@ static int check_dynamic_record(HA_CHECK *param, MARIA_HA *info, int extend,
}
param->glob_crc+= checksum;
}
-
+
if (! got_error)
{
if (check_keys_in_record(param, info, extend, start_recpos, record))
@@ -1881,7 +1887,7 @@ int maria_chk_data_link(HA_CHECK *param, MARIA_HA *info,int extend)
bzero((char*) param->tmp_key_crc,
info->s->base.keys * sizeof(param->tmp_key_crc[0]));
-
+
switch (info->s->data_file_type) {
case BLOCK_RECORD:
error= check_block_record(param, info, extend, record);
@@ -1989,7 +1995,7 @@ int maria_chk_data_link(HA_CHECK *param, MARIA_HA *info,int extend)
llstr(param->records,llbuff),
(long)((param->used - param->link_used)/param->records),
(info->s->base.blobs ? 0.0 :
- (ulonglong2double((ulonglong) info->s->base.reclength *
+ (ulonglong2double((ulonglong) info->s->base.reclength *
param->records)-
my_off_t2double(param->used))/
ulonglong2double((ulonglong) info->s->base.reclength *
@@ -2520,8 +2526,9 @@ int maria_movepoint(register MARIA_HA *info, uchar *record,
nod_flag=_ma_test_if_nod(info, info->buff);
_ma_dpointer(info,info->int_keypos-nod_flag-
info->s->rec_reflength,newpos);
- if (_ma_write_keypage(info,keyinfo,info->last_keypage,
- DFLT_INIT_HITS,info->buff))
+ if (_ma_write_keypage(info, keyinfo, info->last_keypage,
+ PAGECACHE_LOCK_LEFT_UNLOCKED, DFLT_INIT_HITS,
+ info->buff))
DBUG_RETURN(-1);
}
else
@@ -2694,7 +2701,6 @@ static int sort_one_index(HA_CHECK *param, MARIA_HA *info,
uchar *buff,*keypos,*endpos;
uchar key[HA_MAX_POSSIBLE_KEY_BUFF];
my_off_t new_page_pos,next_page;
- char llbuff[22];
DBUG_ENTER("sort_one_index");
/* cannot walk over R-tree indices */
@@ -2707,10 +2713,10 @@ static int sort_one_index(HA_CHECK *param, MARIA_HA *info,
_ma_check_print_error(param,"Not enough memory for key block");
DBUG_RETURN(-1);
}
- if (!_ma_fetch_keypage(info,keyinfo,pagepos,DFLT_INIT_HITS,buff,0))
+ if (!_ma_fetch_keypage(info, keyinfo, pagepos,PAGECACHE_LOCK_LEFT_UNLOCKED,
+ DFLT_INIT_HITS, buff, 0, 0))
{
- _ma_check_print_error(param,"Can't read key block from filepos: %s",
- llstr(pagepos,llbuff));
+ report_keypage_fault(param, pagepos);
goto err;
}
if ((nod_flag=_ma_test_if_nod(info, buff)) || keyinfo->flag & HA_FULLTEXT)
@@ -3938,7 +3944,6 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param)
{
if (param->testflag & T_VERBOSE)
{
- char llbuff[22];
record_pos_to_txt(info, info->cur_row.lastpos, llbuff);
_ma_check_print_info(param,
"Found record with wrong checksum at %s",
@@ -4179,7 +4184,7 @@ static int sort_get_next_record(MARIA_SORT_PARAM *sort_param)
&sort_param->rec_buff_size,
block_info.rec_len +
info->s->base.extra_rec_buff_size))
-
+
{
if (param->max_record_length >= block_info.rec_len)
{
@@ -4441,7 +4446,7 @@ int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param)
from=sort_info->buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER);
}
/* We can use info->checksum here as only one thread calls this */
- info->cur_row.checksum= (*info->s->calc_check_checksum)(info,
+ info->cur_row.checksum= (*info->s->calc_check_checksum)(info,
sort_param->
record);
reclength= _ma_rec_pack(info,from,sort_param->record);
@@ -4735,6 +4740,7 @@ static int sort_insert_key(MARIA_SORT_PARAM *sort_param,
MARIA_KEYDEF *keyinfo=sort_param->keyinfo;
MARIA_SORT_INFO *sort_info= sort_param->sort_info;
HA_CHECK *param=sort_info->param;
+ MARIA_PINNED_PAGE tmp_page_link, *page_link= &tmp_page_link;
DBUG_ENTER("sort_insert_key");
anc_buff= key_block->buff;
@@ -4753,6 +4759,7 @@ static int sort_insert_key(MARIA_SORT_PARAM *sort_param,
}
a_length= info->s->keypage_header + nod_flag;
key_block->end_pos= anc_buff + info->s->keypage_header;
+ bzero(anc_buff, info->s->keypage_header);
_ma_store_keynr(info, anc_buff, (uint) (sort_param->keyinfo -
info->s->keyinfo));
lastkey=0; /* No previous key in block */
@@ -4783,13 +4790,16 @@ static int sort_insert_key(MARIA_SORT_PARAM *sort_param,
bzero(anc_buff+key_block->last_length,
keyinfo->block_length- key_block->last_length);
key_file_length=info->state->key_file_length;
- if ((filepos= _ma_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR)
+ if ((filepos= _ma_new(info, DFLT_INIT_HITS, &page_link)) == HA_OFFSET_ERROR)
DBUG_RETURN(1);
/* If we read the page from the key cache, we have to write it back to it */
- if (key_file_length == info->state->key_file_length)
+ if (page_link->changed)
{
- if (_ma_write_keypage(info, keyinfo, filepos, DFLT_INIT_HITS, anc_buff))
+ pop_dynamic(&info->pinned_pages);
+ if (_ma_write_keypage(info, keyinfo, filepos,
+ PAGECACHE_LOCK_WRITE_UNLOCK,
+ DFLT_INIT_HITS, anc_buff))
DBUG_RETURN(1);
}
else if (my_pwrite(info->s->kfile.file, anc_buff,
@@ -4882,6 +4892,7 @@ int _ma_flush_pending_blocks(MARIA_SORT_PARAM *sort_param)
myf myf_rw=sort_info->param->myf_rw;
MARIA_HA *info=sort_info->info;
MARIA_KEYDEF *keyinfo=sort_param->keyinfo;
+ MARIA_PINNED_PAGE tmp_page_link, *page_link= &tmp_page_link;
DBUG_ENTER("_ma_flush_pending_blocks");
filepos= HA_OFFSET_ERROR; /* if empty file */
@@ -4894,13 +4905,16 @@ int _ma_flush_pending_blocks(MARIA_SORT_PARAM *sort_param)
_ma_kpointer(info,key_block->end_pos,filepos);
key_file_length=info->state->key_file_length;
bzero(key_block->buff+length, keyinfo->block_length-length);
- if ((filepos= _ma_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR)
+ if ((filepos= _ma_new(info, DFLT_INIT_HITS, &page_link)) ==
+ HA_OFFSET_ERROR)
DBUG_RETURN(1);
/* If we read the page from the key cache, we have to write it back */
- if (key_file_length == info->state->key_file_length)
+ if (page_link->changed)
{
+ pop_dynamic(&info->pinned_pages);
if (_ma_write_keypage(info, keyinfo, filepos,
+ PAGECACHE_LOCK_WRITE_UNLOCK,
DFLT_INIT_HITS, key_block->buff))
DBUG_RETURN(1);
}
@@ -5583,7 +5597,7 @@ static int _ma_safe_scan_block_record(MARIA_SORT_INFO *sort_info,
length= uint2korr(info->scan.dir + 2);
end_of_data= data + length;
info->scan.dir-= DIR_ENTRY_SIZE; /* Point to previous row */
-
+
if (end_of_data > info->scan.dir_end ||
offset < PAGE_HEADER_SIZE || length < info->s->base.min_block_length)
{
@@ -5619,7 +5633,7 @@ read_next_page:
PAGECACHE_READ_UNKNOWN_PAGE,
PAGECACHE_LOCK_LEFT_UNLOCKED, 0)))
DBUG_RETURN(my_errno);
-
+
page_type= (info->scan.page_buff[PAGE_TYPE_OFFSET] &
PAGE_TYPE_MASK);
if (page_type == HEAD_PAGE)
@@ -5724,3 +5738,21 @@ static int write_log_record_for_repair(const HA_CHECK *param, MARIA_HA *info)
}
return 0;
}
+
+
+/* Give error message why reading of key page failed */
+
+static void report_keypage_fault(HA_CHECK *param, my_off_t position)
+{
+ char buff[11];
+
+ if (my_errno == HA_ERR_CRASHED)
+ _ma_check_print_error(param,
+ "Wrong base information on indexpage at filepos: %s",
+ llstr(position, buff));
+ else
+ _ma_check_print_error(param,
+ "Can't read indexpage from filepos: %s, "
+ "error: %d",
+ llstr(position,buff), my_errno);
+}
diff --git a/storage/maria/ma_checkpoint.c b/storage/maria/ma_checkpoint.c
index 76f1723b053..ee1f3182278 100644
--- a/storage/maria/ma_checkpoint.c
+++ b/storage/maria/ma_checkpoint.c
@@ -32,7 +32,6 @@
#include "maria_def.h"
#include "ma_pagecache.h"
-#include "trnman.h"
#include "ma_blockrec.h"
#include "ma_checkpoint.h"
#include "ma_loghandler_lsn.h"
diff --git a/storage/maria/ma_close.c b/storage/maria/ma_close.c
index 9b654803945..27700ff4ff3 100644
--- a/storage/maria/ma_close.c
+++ b/storage/maria/ma_close.c
@@ -148,7 +148,8 @@ int maria_close(register MARIA_HA *info)
error = my_errno;
}
- my_free((uchar*) info,MYF(0));
+ delete_dynamic(&info->pinned_pages);
+ my_free(info, MYF(0));
if (error)
DBUG_RETURN(my_errno= error);
diff --git a/storage/maria/ma_control_file.c b/storage/maria/ma_control_file.c
index 9ebe21ac15a..122fa9f38ee 100644
--- a/storage/maria/ma_control_file.c
+++ b/storage/maria/ma_control_file.c
@@ -92,6 +92,7 @@ CONTROL_FILE_ERROR ma_control_file_create_or_open()
{
char buffer[CONTROL_FILE_SIZE];
char name[FN_REFLEN];
+ const char *errmsg;
MY_STAT stat_buff;
my_bool create_file;
int open_flags= O_BINARY | /*O_DIRECT |*/ O_RDWR;
@@ -121,7 +122,8 @@ CONTROL_FILE_ERROR ma_control_file_create_or_open()
if (maria_in_recovery)
DBUG_RETURN(CONTROL_FILE_MISSING);
if ((control_file_fd= my_create(name, 0,
- open_flags, MYF(MY_SYNC_DIR))) < 0)
+ open_flags,
+ MYF(MY_SYNC_DIR | MY_WME))) < 0)
DBUG_RETURN(CONTROL_FILE_UNKNOWN_ERROR);
/* Create unique uuid for the control file */
@@ -153,10 +155,16 @@ CONTROL_FILE_ERROR ma_control_file_create_or_open()
/* Otherwise, file exists */
if ((control_file_fd= my_open(name, open_flags, MYF(MY_WME))) < 0)
+ {
+ errmsg= "Can't open file";
goto err;
+ }
- if (my_stat(name, &stat_buff, MYF(MY_WME)) == NULL)
+ if (my_stat(name, &stat_buff, MYF(0)) == NULL)
+ {
+ errmsg= "Can't read status";
goto err;
+ }
if ((uint)stat_buff.st_size < CONTROL_FILE_SIZE)
{
@@ -176,6 +184,7 @@ CONTROL_FILE_ERROR ma_control_file_create_or_open()
MySQL's error log at startup.
*/
error= CONTROL_FILE_TOO_SMALL;
+ errmsg= "File size to small";
goto err;
}
@@ -183,17 +192,21 @@ CONTROL_FILE_ERROR ma_control_file_create_or_open()
{
/* TODO: store "too big file" message */
error= CONTROL_FILE_TOO_BIG;
+ errmsg= "File size bigger than expected";
goto err;
}
- if (my_read(control_file_fd, buffer, CONTROL_FILE_SIZE,
- MYF(MY_FNABP | MY_WME)))
+ if (my_read(control_file_fd, buffer, CONTROL_FILE_SIZE, MYF(MY_FNABP)))
+ {
+ errmsg= "Can't read file";
goto err;
+ }
if (memcmp(buffer + CONTROL_FILE_MAGIC_STRING_OFFSET,
CONTROL_FILE_MAGIC_STRING, CONTROL_FILE_MAGIC_STRING_SIZE))
{
/* TODO: store message "bad magic string" somewhere */
error= CONTROL_FILE_BAD_MAGIC_STRING;
+ errmsg= "Missing valid id at start of file";
goto err;
}
memcpy(maria_uuid, buffer + CONTROL_FILE_UUID_OFFSET,
@@ -203,15 +216,19 @@ CONTROL_FILE_ERROR ma_control_file_create_or_open()
CONTROL_FILE_SIZE - CONTROL_FILE_LSN_OFFSET) !=
uint4korr(buffer + CONTROL_FILE_CHECKSUM_OFFSET))
{
- /* TODO: store message "checksum mismatch" somewhere */
error= CONTROL_FILE_BAD_CHECKSUM;
+ errmsg= "Checksum missmatch";
goto err;
}
last_checkpoint_lsn= lsn_korr(buffer + CONTROL_FILE_LSN_OFFSET);
last_logno= uint4korr(buffer + CONTROL_FILE_FILENO_OFFSET);
DBUG_RETURN(0);
+
err:
+ my_printf_error(HA_ERR_INITIALIZATION,
+ "Error when trying to use maria control file '%s': %s", 0,
+ name, errmsg);
ma_control_file_end();
DBUG_RETURN(error);
}
@@ -247,7 +264,7 @@ err:
*/
int ma_control_file_write_and_force(const LSN checkpoint_lsn, uint32 logno,
- uint objs_to_write)
+ uint objs_to_write)
{
char buffer[CONTROL_FILE_SIZE];
my_bool update_checkpoint_lsn= FALSE, update_logno= FALSE;
diff --git a/storage/maria/ma_create.c b/storage/maria/ma_create.c
index cdb28e11ae3..95e1fa1fdcb 100644
--- a/storage/maria/ma_create.c
+++ b/storage/maria/ma_create.c
@@ -188,6 +188,8 @@ int maria_create(const char *name, enum data_file_type datafile_type,
max_field_lengths++;
packed++;
column->fill_length= 1;
+ options|= HA_OPTION_NULL_FIELDS; /* Use ma_checksum() */
+
/* We must test for 257 as length includes pack-length */
if (test(column->length >= 257))
{
@@ -270,16 +272,17 @@ int maria_create(const char *name, enum data_file_type datafile_type,
{
options|= HA_OPTION_TMP_TABLE;
tmp_table= TRUE;
- create_mode|= O_EXCL | O_NOFOLLOW;
+ create_mode|= O_NOFOLLOW;
/* "CREATE TEMPORARY" tables are not crash-safe (dropped at restart) */
ci->transactional= FALSE;
+ flags&= ~HA_CREATE_PAGE_CHECKSUM;
}
share.base.null_bytes= ci->null_bytes;
share.base.original_null_bytes= ci->null_bytes;
share.base.born_transactional= ci->transactional;
share.base.max_field_lengths= max_field_lengths;
share.base.field_offsets= 0; /* for future */
-
+
if (pack_reclength != INT_MAX32)
pack_reclength+= max_field_lengths + long_varchar_count;
@@ -654,9 +657,9 @@ int maria_create(const char *name, enum data_file_type datafile_type,
share.state.dellink = HA_OFFSET_ERROR;
share.state.first_bitmap_with_space= 0;
+#ifdef EXTERNAL_LOCKING
share.state.process= (ulong) getpid();
- share.state.unique= (ulong) 0;
- share.state.update_count=(ulong) 0;
+#endif
share.state.version= (ulong) time((time_t*) 0);
share.state.sortkey= (ushort) ~0;
share.state.auto_increment=ci->auto_increment;
@@ -957,7 +960,7 @@ int maria_create(const char *name, enum data_file_type datafile_type,
char empty_string[]= "";
LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 4];
uint total_rec_length= 0;
- uint i;
+ uint k;
LSN lsn;
log_array[TRANSLOG_INTERNAL_PARTS + 1].length= 1 + 2 + 2 +
kfile_size_before_extension;
@@ -988,9 +991,9 @@ int maria_create(const char *name, enum data_file_type datafile_type,
(ci->index_file_name ? ci->index_file_name : empty_string);
log_array[TRANSLOG_INTERNAL_PARTS + 3].length=
strlen(log_array[TRANSLOG_INTERNAL_PARTS + 3].str) + 1;
- for (i= TRANSLOG_INTERNAL_PARTS;
- i < (sizeof(log_array)/sizeof(log_array[0])); i++)
- total_rec_length+= log_array[i].length;
+ for (k= TRANSLOG_INTERNAL_PARTS;
+ k < (sizeof(log_array)/sizeof(log_array[0])); k++)
+ total_rec_length+= log_array[k].length;
/**
For this record to be of any use for Recovery, we need the upper
MySQL layer to be crash-safe, which it is not now (that would require
diff --git a/storage/maria/ma_delete.c b/storage/maria/ma_delete.c
index 543be5142d2..fb13118ab75 100644
--- a/storage/maria/ma_delete.c
+++ b/storage/maria/ma_delete.c
@@ -17,19 +17,26 @@
#include "ma_fulltext.h"
#include "ma_rt_index.h"
+#include "trnman.h"
+#include "ma_key_recover.h"
static int d_search(MARIA_HA *info,MARIA_KEYDEF *keyinfo,uint comp_flag,
- uchar *key,uint key_length,my_off_t page,uchar *anc_buff);
-static int del(MARIA_HA *info,MARIA_KEYDEF *keyinfo,uchar *key,uchar *anc_buff,
- my_off_t leaf_page,uchar *leaf_buff,uchar *keypos,
- my_off_t next_block,uchar *ret_key);
+ uchar *key, uint key_length,
+ my_off_t page, uchar *anc_buff,
+ MARIA_PINNED_PAGE *anc_page_link);
+static int del(MARIA_HA *info,MARIA_KEYDEF *keyinfo, uchar *key,
+ uchar *anc_buff, my_off_t leaf_page, uchar *leaf_buff,
+ MARIA_PINNED_PAGE *leaf_page_link, uchar *keypos,
+ my_off_t next_block, uchar *ret_key);
static int underflow(MARIA_HA *info,MARIA_KEYDEF *keyinfo,uchar *anc_buff,
- my_off_t leaf_page,uchar *leaf_buff,uchar *keypos);
+ my_off_t leaf_page,uchar *leaf_buff,
+ MARIA_PINNED_PAGE *leaf_page_link, uchar *keypos);
static uint remove_key(MARIA_KEYDEF *keyinfo,uint nod_flag,uchar *keypos,
uchar *lastkey,uchar *page_end,
- my_off_t *next_block);
-static int _ma_ck_real_delete(register MARIA_HA *info,MARIA_KEYDEF *keyinfo,
- uchar *key, uint key_length, my_off_t *root);
+ my_off_t *next_block, MARIA_KEY_PARAM *s_temp);
+static my_bool _ma_log_delete(MARIA_HA *info, my_off_t page, uchar *buff,
+ uchar *key_pos, uint move_length,
+ uint change_length);
int maria_delete(MARIA_HA *info,const uchar *record)
@@ -108,7 +115,7 @@ int maria_delete(MARIA_HA *info,const uchar *record)
info->update= HA_STATE_CHANGED+HA_STATE_DELETED+HA_STATE_ROW_CHANGED;
info->state->records-= !share->now_transactional;
share->state.changed|= STATE_NOT_OPTIMIZED_ROWS;
-
+
mi_sizestore(lastpos, info->cur_row.lastpos);
VOID(_ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE));
allow_break(); /* Allow SIGHUP & SIGINT */
@@ -142,23 +149,75 @@ err:
} /* maria_delete */
- /* Remove a key from the btree index */
+/* Remove a key from the btree index */
int _ma_ck_delete(register MARIA_HA *info, uint keynr, uchar *key,
uint key_length)
{
- return _ma_ck_real_delete(info, info->s->keyinfo+keynr, key, key_length,
- &info->s->state.key_root[keynr]);
+ int res;
+ LSN lsn= LSN_IMPOSSIBLE;
+ my_off_t new_root= info->s->state.key_root[keynr];
+ DBUG_ENTER("_ma_ck_delete");
+
+ res= _ma_ck_real_delete(info, info->s->keyinfo+keynr, key, key_length,
+ &new_root);
+
+ if (!res && info->s->now_transactional)
+ {
+ uchar log_data[LSN_STORE_SIZE + FILEID_STORE_SIZE +
+ KEY_NR_STORE_SIZE + PAGE_STORE_SIZE], *log_pos;
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
+ struct st_msg_to_write_hook_for_undo_key msg;
+ enum translog_record_type log_type= LOGREC_UNDO_KEY_DELETE;
+
+ lsn_store(log_data, info->trn->undo_lsn);
+ key_nr_store(log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE, keynr);
+ log_pos= log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE + KEY_NR_STORE_SIZE;
+
+ if (new_root != info->s->state.key_root[keynr])
+ {
+ my_off_t page;
+ page= ((new_root == HA_OFFSET_ERROR) ? IMPOSSIBLE_PAGE_NO :
+ new_root / info->s->block_size);
+ page_store(log_pos, page);
+ log_pos+= PAGE_STORE_SIZE;
+ log_type= LOGREC_UNDO_KEY_DELETE_WITH_ROOT;
+ }
+
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (uint) (log_pos - log_data);
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].str= (char*) key;
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].length= key_length;
+
+ msg.root= &info->s->state.key_root[keynr];
+ msg.value= new_root;
+
+ if (translog_write_record(&lsn, log_type,
+ info->trn, info,
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length +
+ key_length,
+ TRANSLOG_INTERNAL_PARTS + 2, log_array,
+ log_data + LSN_STORE_SIZE, &msg))
+ res= -1;
+ }
+ else
+ {
+ info->s->state.key_root[keynr]= new_root;
+ _ma_fast_unlock_key_del(info);
+ }
+ _ma_unpin_all_pages_and_finalize_row(info, lsn);
+ DBUG_RETURN(res);
} /* _ma_ck_delete */
-static int _ma_ck_real_delete(register MARIA_HA *info, MARIA_KEYDEF *keyinfo,
- uchar *key, uint key_length, my_off_t *root)
+int _ma_ck_real_delete(register MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ uchar *key, uint key_length, my_off_t *root)
{
int error;
uint nod_flag;
my_off_t old_root;
uchar *root_buff;
+ MARIA_PINNED_PAGE *page_link;
DBUG_ENTER("_ma_ck_real_delete");
if ((old_root=*root) == HA_OFFSET_ERROR)
@@ -167,13 +226,15 @@ static int _ma_ck_real_delete(register MARIA_HA *info, MARIA_KEYDEF *keyinfo,
DBUG_RETURN(my_errno=HA_ERR_CRASHED);
}
if (!(root_buff= (uchar*) my_alloca((uint) keyinfo->block_length+
- HA_MAX_KEY_BUFF*2)))
+ HA_MAX_KEY_BUFF*2)))
{
DBUG_PRINT("error",("Couldn't allocate memory"));
DBUG_RETURN(my_errno=ENOMEM);
}
DBUG_PRINT("info",("root_page: %ld", (long) old_root));
- if (!_ma_fetch_keypage(info,keyinfo,old_root,DFLT_INIT_HITS,root_buff,0))
+ if (!_ma_fetch_keypage(info, keyinfo, old_root,
+ PAGECACHE_LOCK_WRITE, DFLT_INIT_HITS, root_buff, 0,
+ &page_link))
{
error= -1;
goto err;
@@ -181,7 +242,7 @@ static int _ma_ck_real_delete(register MARIA_HA *info, MARIA_KEYDEF *keyinfo,
if ((error=d_search(info,keyinfo,
(keyinfo->flag & HA_FULLTEXT ?
SEARCH_FIND | SEARCH_UPDATE : SEARCH_SAME),
- key,key_length,old_root,root_buff)) >0)
+ key, key_length, old_root, root_buff, page_link)) >0)
{
if (error == 2)
{
@@ -192,6 +253,7 @@ static int _ma_ck_real_delete(register MARIA_HA *info, MARIA_KEYDEF *keyinfo,
{
uint used_length;
_ma_get_used_and_nod(info, root_buff, used_length, nod_flag);
+ page_link->changed= 1;
if (used_length <= nod_flag + info->s->keypage_header + 1)
{
error=0;
@@ -200,12 +262,13 @@ static int _ma_ck_real_delete(register MARIA_HA *info, MARIA_KEYDEF *keyinfo,
nod_flag);
else
*root=HA_OFFSET_ERROR;
- if (_ma_dispose(info,keyinfo,old_root,DFLT_INIT_HITS))
+ if (_ma_dispose(info, old_root, 0))
error= -1;
}
else
- error= _ma_write_keypage(info,keyinfo,old_root,
- DFLT_INIT_HITS,root_buff);
+ error= _ma_write_keypage(info,keyinfo, old_root,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED,
+ DFLT_INIT_HITS, root_buff);
}
}
err:
@@ -218,15 +281,19 @@ err:
/*
@brief Remove key below key root
+ @param key Key to delete. Will contain new key if block was enlarged
+
@return
- @retval 1 If there are less buffers; In this case anc_buff is not saved
- @retval 2 If there are more buffers
- @retval -1 On errors
+ @retval 0 ok (anc_page is not changed)
+ @retval 1 If data on page is too small; In this case anc_buff is not saved
+ @retval 2 If data on page is too big
+ @retval -1 On errors
*/
static int d_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
uint comp_flag, uchar *key, uint key_length,
- my_off_t page, uchar *anc_buff)
+ my_off_t anc_page, uchar *anc_buff,
+ MARIA_PINNED_PAGE *anc_page_link)
{
int flag,ret_value,save_flag;
uint length,nod_flag,search_key_length;
@@ -234,6 +301,8 @@ static int d_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
uchar *leaf_buff,*keypos;
my_off_t leaf_page,next_block;
uchar lastkey[HA_MAX_KEY_BUFF];
+ MARIA_PINNED_PAGE *leaf_page_link;
+ MARIA_KEY_PARAM s_temp;
DBUG_ENTER("d_search");
DBUG_DUMP("page",anc_buff,_ma_get_page_used(info, anc_buff));
@@ -279,7 +348,8 @@ static int d_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
my_off_t root;
uchar *kpos=keypos;
- if (!(tmp_key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&kpos,lastkey)))
+ if (!(tmp_key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&kpos,
+ lastkey)))
{
maria_print_error(info->s, HA_ERR_CRASHED);
my_errno= HA_ERR_CRASHED;
@@ -289,24 +359,29 @@ static int d_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
if (subkeys == -1)
{
/* the last entry in sub-tree */
- if (_ma_dispose(info, keyinfo, root,DFLT_INIT_HITS))
+ if (_ma_dispose(info, root, 1))
DBUG_RETURN(-1);
/* fall through to normal delete */
}
else
{
keyinfo=&info->s->ft2_keyinfo;
- kpos-=keyinfo->keylength+nod_flag; /* we'll modify key entry 'in vivo' */
+ /* we'll modify key entry 'in vivo' */
+ kpos-=keyinfo->keylength+nod_flag;
get_key_full_length_rdonly(off, key);
key+=off;
ret_value= _ma_ck_real_delete(info, &info->s->ft2_keyinfo,
- key, HA_FT_WLEN, &root);
+ key, HA_FT_WLEN, &root);
_ma_dpointer(info, kpos+HA_FT_WLEN, root);
subkeys++;
ft_intXstore(kpos, subkeys);
if (!ret_value)
- ret_value= _ma_write_keypage(info,keyinfo,page,
- DFLT_INIT_HITS,anc_buff);
+ {
+ anc_page_link->changed= 1;
+ ret_value= _ma_write_keypage(info, keyinfo, anc_page,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED,
+ DFLT_INIT_HITS, anc_buff);
+ }
DBUG_PRINT("exit",("Return: %d",ret_value));
DBUG_RETURN(ret_value);
}
@@ -320,12 +395,13 @@ static int d_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
if (!(leaf_buff= (uchar*) my_alloca((uint) keyinfo->block_length+
HA_MAX_KEY_BUFF*2)))
{
- DBUG_PRINT("error",("Couldn't allocate memory"));
+ DBUG_PRINT("error", ("Couldn't allocate memory"));
my_errno=ENOMEM;
- DBUG_PRINT("exit",("Return: %d",-1));
DBUG_RETURN(-1);
}
- if (!_ma_fetch_keypage(info,keyinfo,leaf_page,DFLT_INIT_HITS,leaf_buff,0))
+ if (!_ma_fetch_keypage(info,keyinfo,leaf_page,
+ PAGECACHE_LOCK_WRITE, DFLT_INIT_HITS, leaf_buff,
+ 0, &leaf_page_link))
goto err;
}
@@ -339,47 +415,64 @@ static int d_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
goto err;
}
save_flag=0;
- ret_value=d_search(info,keyinfo,comp_flag,key,key_length,
- leaf_page,leaf_buff);
+ ret_value=d_search(info, keyinfo, comp_flag, key, key_length,
+ leaf_page, leaf_buff, leaf_page_link);
}
else
{ /* Found key */
uint tmp;
length= _ma_get_page_used(info, anc_buff);
if (!(tmp= remove_key(keyinfo,nod_flag,keypos,lastkey,anc_buff+length,
- &next_block)))
+ &next_block, &s_temp)))
goto err;
+ anc_page_link->changed= 1;
length-= tmp;
-
_ma_store_page_used(info, anc_buff, length, nod_flag);
+
+ /*
+ Log initial changes on pages
+ If there is an underflow, there will be more changes logged to the
+ page
+ */
+ if (info->s->now_transactional &&
+ _ma_log_delete(info, anc_page, anc_buff, s_temp.key_pos,
+ s_temp.move_length, s_temp.changed_length))
+ DBUG_RETURN(-1);
+
if (!nod_flag)
{ /* On leaf page */
- if (_ma_write_keypage(info,keyinfo,page,DFLT_INIT_HITS,anc_buff))
+ if (test(length <= (info->quick_mode ?
+ MARIA_MIN_KEYBLOCK_LENGTH :
+ (uint) keyinfo->underflow_block_length)))
{
- DBUG_PRINT("exit",("Return: %d",-1));
- DBUG_RETURN(-1);
+ /* Page will be written by caller if we return 1 */
+ DBUG_RETURN(1);
}
- /* Page will be update later if we return 1 */
- DBUG_RETURN(test(length <= (info->quick_mode ? MARIA_MIN_KEYBLOCK_LENGTH :
- (uint) keyinfo->underflow_block_length)));
+ if (_ma_write_keypage(info, keyinfo, anc_page,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED, DFLT_INIT_HITS,
+ anc_buff))
+ DBUG_RETURN(-1);
+ DBUG_RETURN(0);
}
- save_flag=1;
- ret_value=del(info,keyinfo,key,anc_buff,leaf_page,leaf_buff,keypos,
- next_block,lastkey);
+ save_flag=1; /* Mark that anc_buff is changed */
+ ret_value= del(info, keyinfo, key, anc_buff, leaf_page, leaf_buff,
+ leaf_page_link, keypos, next_block, lastkey);
}
if (ret_value >0)
{
save_flag=1;
if (ret_value == 1)
- ret_value= underflow(info,keyinfo,anc_buff,leaf_page,leaf_buff,keypos);
+ ret_value= underflow(info, keyinfo, anc_buff, leaf_page, leaf_buff,
+ leaf_page_link, keypos);
else
{ /* This happens only with packed keys */
DBUG_PRINT("test",("Enlarging of key when deleting"));
if (!_ma_get_last_key(info,keyinfo,anc_buff,lastkey,keypos,&length))
goto err;
- ret_value= _ma_insert(info,keyinfo,key,anc_buff,keypos,lastkey,
- (uchar*) 0,(uchar*) 0,(my_off_t) 0,(my_bool) 0);
+ ret_value= _ma_insert(info, keyinfo, key, anc_buff, keypos, anc_page,
+ lastkey, (my_off_t) 0, (uchar*) 0,
+ (MARIA_PINNED_PAGE*) 0, (uchar*) 0, (my_bool) 0);
}
}
if (ret_value == 0 && _ma_get_page_used(info, anc_buff) >
@@ -389,7 +482,12 @@ static int d_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
ret_value= _ma_split_page(info,keyinfo,key,anc_buff,lastkey,0) | 2;
}
if (save_flag && ret_value != 1)
- ret_value|= _ma_write_keypage(info,keyinfo,page,DFLT_INIT_HITS,anc_buff);
+ {
+ anc_page_link->changed= 1;
+ ret_value|= _ma_write_keypage(info, keyinfo, anc_page,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED,
+ DFLT_INIT_HITS, anc_buff);
+ }
else
{
DBUG_DUMP("page", anc_buff, _ma_get_page_used(info, anc_buff));
@@ -405,11 +503,11 @@ err:
} /* d_search */
- /* Remove a key that has a page-reference */
+/* Remove a key that has a page-reference */
static int del(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
uchar *key, uchar *anc_buff, my_off_t leaf_page,
- uchar *leaf_buff,
+ uchar *leaf_buff, MARIA_PINNED_PAGE *leaf_page_link,
uchar *keypos, /* Pos to where deleted key was */
my_off_t next_block,
uchar *ret_key) /* key before keypos in anc_buff */
@@ -420,6 +518,7 @@ static int del(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
uchar keybuff[HA_MAX_KEY_BUFF],*endpos,*next_buff,*key_start, *prev_key;
MARIA_SHARE *share=info->s;
MARIA_KEY_PARAM s_temp;
+ MARIA_PINNED_PAGE *next_page_link;
DBUG_ENTER("del");
DBUG_PRINT("enter",("leaf_page: %ld keypos: 0x%lx", (long) leaf_page,
(ulong) keypos));
@@ -438,20 +537,21 @@ static int del(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
if (!(next_buff= (uchar*) my_alloca((uint) keyinfo->block_length+
HA_MAX_KEY_BUFF*2)))
DBUG_RETURN(-1);
- if (!_ma_fetch_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,next_buff,0))
+ if (!_ma_fetch_keypage(info, keyinfo, next_page, PAGECACHE_LOCK_WRITE,
+ DFLT_INIT_HITS, next_buff, 0, &next_page_link))
ret_value= -1;
else
{
DBUG_DUMP("next_page", next_buff, _ma_get_page_used(info, next_buff));
- if ((ret_value=del(info,keyinfo,key,anc_buff,next_page,next_buff,
- keypos,next_block,ret_key)) >0)
+ if ((ret_value= del(info,keyinfo,key,anc_buff, next_page, next_buff,
+ next_page_link, keypos, next_block, ret_key)) >0)
{
/* Get new length after key was deleted */
endpos=leaf_buff+_ma_get_page_used(info, leaf_buff);
if (ret_value == 1)
{
- ret_value=underflow(info,keyinfo,leaf_buff,next_page,
- next_buff,endpos);
+ ret_value= underflow(info, keyinfo, leaf_buff, next_page,
+ next_buff, next_page_link, endpos);
if (ret_value == 0 &&
_ma_get_page_used(info, leaf_buff) >
(uint) (keyinfo->block_length - KEYPAGE_CHECKSUM_SIZE))
@@ -466,11 +566,15 @@ static int del(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
if (!_ma_get_last_key(info,keyinfo,leaf_buff,keybuff,endpos,
&tmp))
goto err;
- ret_value= _ma_insert(info,keyinfo,key,leaf_buff,endpos,keybuff,
- (uchar*) 0,(uchar*) 0,(my_off_t) 0,0);
+ ret_value= _ma_insert(info, keyinfo, key, leaf_buff, endpos,
+ leaf_page, keybuff, (my_off_t) 0, (uchar*) 0,
+ (MARIA_PINNED_PAGE *) 0, (uchar*) 0, 0);
}
}
- if (_ma_write_keypage(info,keyinfo,leaf_page,DFLT_INIT_HITS,leaf_buff))
+ leaf_page_link->changed= 1;
+ if (_ma_write_keypage(info, keyinfo, leaf_page,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED,
+ DFLT_INIT_HITS, leaf_buff))
goto err;
}
my_afree(next_buff);
@@ -478,8 +582,11 @@ static int del(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
}
/* Remove last key from leaf page */
+ leaf_page_link->changed= 1;
_ma_store_page_used(info, leaf_buff, key_start-leaf_buff, nod_flag);
- if (_ma_write_keypage(info,keyinfo,leaf_page,DFLT_INIT_HITS,leaf_buff))
+ if (_ma_write_keypage(info, keyinfo, leaf_page,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED, DFLT_INIT_HITS,
+ leaf_buff))
goto err;
/* Place last key in ancestor page on deleted key position */
@@ -521,6 +628,7 @@ static int underflow(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
uchar *anc_buff,
my_off_t leaf_page,/* Ancestor page and underflow page */
uchar *leaf_buff,
+ MARIA_PINNED_PAGE *leaf_page_link,
uchar *keypos) /* Position to pos after key */
{
int t_length;
@@ -532,6 +640,7 @@ static int underflow(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
uchar *after_key;
MARIA_KEY_PARAM s_temp;
MARIA_SHARE *share=info->s;
+ MARIA_PINNED_PAGE *next_page_link;
DBUG_ENTER("underflow");
DBUG_PRINT("enter",("leaf_page: %ld keypos: 0x%lx",(long) leaf_page,
(ulong) keypos));
@@ -569,7 +678,8 @@ static int underflow(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
goto err;
}
next_page= _ma_kpos(key_reflength,next_keypos);
- if (!_ma_fetch_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,buff,0))
+ if (!_ma_fetch_keypage(info,keyinfo, next_page, PAGECACHE_LOCK_WRITE,
+ DFLT_INIT_HITS, buff, 0, &next_page_link))
goto err;
buff_length= _ma_get_page_used(info, buff);
DBUG_DUMP("next",buff,buff_length);
@@ -599,7 +709,7 @@ static int underflow(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
/* remove key from anc_buff */
if (!(s_length=remove_key(keyinfo,key_reflength,keypos,anc_key,
- anc_buff+anc_length,(my_off_t *) 0)))
+ anc_buff+anc_length,(my_off_t *) 0, &s_temp)))
goto err;
anc_length-=s_length;
@@ -608,7 +718,8 @@ static int underflow(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
if (buff_length <= (uint) (keyinfo->block_length - KEYPAGE_CHECKSUM_SIZE))
{ /* Keys in one page */
memcpy(leaf_buff,buff,(size_t) buff_length);
- if (_ma_dispose(info,keyinfo,next_page,DFLT_INIT_HITS))
+ next_page_link->changed= 1;
+ if (_ma_dispose(info, next_page, 0))
goto err;
}
else
@@ -660,10 +771,16 @@ static int underflow(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
(*keyinfo->store_key)(keyinfo,buff+p_length,&s_temp);
_ma_store_page_used(info, buff, length + t_length + p_length, nod_flag);
- if (_ma_write_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,buff))
+ next_page_link->changed= 1;
+ if (_ma_write_keypage(info, keyinfo, next_page,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED, DFLT_INIT_HITS,
+ buff))
goto err;
}
- if (_ma_write_keypage(info,keyinfo,leaf_page,DFLT_INIT_HITS,leaf_buff))
+ leaf_page_link->changed= 1;
+ if (_ma_write_keypage(info, keyinfo, leaf_page,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED, DFLT_INIT_HITS,
+ leaf_buff))
goto err;
DBUG_RETURN(anc_length <= ((info->quick_mode ? MARIA_MIN_BLOCK_LENGTH :
(uint) keyinfo->underflow_block_length)));
@@ -675,8 +792,9 @@ static int underflow(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
if (!keypos)
goto err;
next_page= _ma_kpos(key_reflength,keypos);
- if (!_ma_fetch_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,buff,0))
- goto err;
+ if (!_ma_fetch_keypage(info, keyinfo, next_page, PAGECACHE_LOCK_WRITE,
+ DFLT_INIT_HITS, buff, 0, &next_page_link))
+ goto err;
buff_length= _ma_get_page_used(info, buff);
endpos=buff+buff_length;
DBUG_DUMP("prev",buff,buff_length);
@@ -711,7 +829,7 @@ static int underflow(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
/* remove key from anc_buff */
if (!(s_length= remove_key(keyinfo,key_reflength,keypos,anc_key,
- anc_buff+anc_length,(my_off_t *) 0)))
+ anc_buff+anc_length,(my_off_t *) 0, &s_temp)))
goto err;
anc_length-=s_length;
@@ -719,7 +837,8 @@ static int underflow(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
if (buff_length <= (uint) (keyinfo->block_length - KEYPAGE_CHECKSUM_SIZE))
{ /* Keys in one page */
- if (_ma_dispose(info,keyinfo,leaf_page,DFLT_INIT_HITS))
+ leaf_page_link->changed= 1;
+ if (_ma_dispose(info, leaf_page, 0))
goto err;
}
else
@@ -768,11 +887,16 @@ static int underflow(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
(*keyinfo->store_key)(keyinfo,leaf_buff+p_length,&s_temp);
_ma_store_page_used(info, leaf_buff, length + t_length + p_length,
nod_flag);
- if (_ma_write_keypage(info,keyinfo,leaf_page,DFLT_INIT_HITS,leaf_buff))
+ leaf_page_link->changed= 1;
+ if (_ma_write_keypage(info, keyinfo, leaf_page,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED, DFLT_INIT_HITS,
+ leaf_buff))
goto err;
_ma_store_page_used(info, buff, (uint) (endpos - buff),nod_flag);
}
- if (_ma_write_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,buff))
+ next_page_link->changed= 1;
+ if (_ma_write_keypage(info, keyinfo, next_page,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED, DFLT_INIT_HITS, buff))
goto err;
DBUG_RETURN(anc_length <= (uint)
(keyinfo->block_length - KEYPAGE_CHECKSUM_SIZE)/2);
@@ -782,25 +906,43 @@ err:
} /* underflow */
- /*
- remove a key from packed buffert
- The current code doesn't handle the case that the next key may be
- packed better against the previous key if there is a case difference
- returns how many chars was removed or 0 on error
- */
+/*
+ @brief Remove a key from page
+
+ @fn remove_key()
+ keyinfo Key handle
+ keypos Where on page key starts
+ lastkey Unpacked version of key to be removed
+ page_end Pointer to end of page
+ next_block If <> 0 and node-page, this is set to address of
+ next page
+ s_temp Information about what changes was done one the page:
+ s_temp.key_pos Start of key
+ s_temp.move_length Number of bytes removed at keypos
+ s_temp.changed_length Number of bytes changed at keypos
+
+ @todo
+ The current code doesn't handle the case that the next key may be
+ packed better against the previous key if there is a case difference
+
+ @return
+ @retval 0 error
+ @retval # How many chars was removed
+*/
static uint remove_key(MARIA_KEYDEF *keyinfo, uint nod_flag,
- uchar *keypos, /* Where key starts */
- uchar *lastkey, /* key to be removed */
- uchar *page_end, /* End of page */
- my_off_t *next_block) /* ptr to next block */
+ uchar *keypos, uchar *lastkey,
+ uchar *page_end, my_off_t *next_block,
+ MARIA_KEY_PARAM *s_temp)
{
int s_length;
uchar *start;
DBUG_ENTER("remove_key");
- DBUG_PRINT("enter",("keypos: 0x%lx page_end: 0x%lx",(long) keypos, (long) page_end));
+ DBUG_PRINT("enter", ("keypos: 0x%lx page_end: 0x%lx",
+ (long) keypos, (long) page_end));
- start=keypos;
+ start= s_temp->key_pos= keypos;
+ s_temp->changed_length= 0;
if (!(keyinfo->flag &
(HA_PACK_KEY | HA_SPACE_PACK_USED | HA_VAR_LENGTH_KEY |
HA_BINARY_PACK_KEY)))
@@ -822,18 +964,21 @@ static uint remove_key(MARIA_KEYDEF *keyinfo, uint nod_flag,
{
if (keyinfo->flag & HA_BINARY_PACK_KEY)
{
- uchar *old_key=start;
+ uchar *old_key= start;
uint next_length,prev_length,prev_pack_length;
+
+ /* keypos points here on start of next key */
get_key_length(next_length,keypos);
get_key_pack_length(prev_length,prev_pack_length,old_key);
if (next_length > prev_length)
{
+ uint diff= (next_length-prev_length);
/* We have to copy data from the current key to the next key */
- bmove_upp(keypos, (lastkey+next_length),
- (next_length-prev_length));
- keypos-=(next_length-prev_length)+prev_pack_length;
- store_key_length(keypos,prev_length);
+ keypos-= diff + prev_pack_length;
+ store_key_length(keypos, prev_length);
+ bmove(keypos + prev_pack_length, lastkey + prev_length, diff);
s_length=(int) (keypos-start);
+ s_temp->changed_length= diff + prev_pack_length;
}
}
else
@@ -874,13 +1019,15 @@ static uint remove_key(MARIA_KEYDEF *keyinfo, uint nod_flag,
get_key_length(rest_length,keypos);
if (next_length >= prev_length)
- { /* Key after is based on deleted key */
- uint pack_length,tmp;
- bmove_upp(keypos, (lastkey+next_length),
- tmp=(next_length-prev_length));
- rest_length+=tmp;
+ {
+ /* Next key is based on deleted key */
+ uint pack_length;
+ uint diff= (next_length-prev_length);
+
+ bmove(keypos - diff, lastkey + prev_length, diff);
+ rest_length+= diff;
pack_length= prev_length ? get_pack_length(rest_length): 0;
- keypos-=tmp+pack_length+prev_pack_length;
+ keypos-= diff + pack_length + prev_pack_length;
s_length=(int) (keypos-start);
if (prev_length) /* Pack against prev key */
{
@@ -903,6 +1050,7 @@ static uint remove_key(MARIA_KEYDEF *keyinfo, uint nod_flag,
else
*keypos= rest_length;
}
+ s_temp->changed_length= diff + pack_length + prev_pack_length;
}
}
}
@@ -910,5 +1058,60 @@ static uint remove_key(MARIA_KEYDEF *keyinfo, uint nod_flag,
}
end:
bmove(start, start+s_length, (uint) (page_end-start-s_length));
+ s_temp->move_length= s_length;
DBUG_RETURN((uint) s_length);
} /* remove_key */
+
+
+/****************************************************************************
+ Logging of redos
+****************************************************************************/
+
+/*
+ @brief log entry where some parts are deleted and some things are changed
+*/
+
+static my_bool _ma_log_delete(MARIA_HA *info, my_off_t page, uchar *buff,
+ uchar *key_pos, uint move_length,
+ uint change_length)
+{
+ LSN lsn;
+ uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 9], *log_pos;
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
+ uint translog_parts;
+ uint offset= (uint) (key_pos - buff);
+ DBUG_ENTER("_ma_log_delete");
+ DBUG_ASSERT(info->s->now_transactional && move_length);
+
+ /* Store address of new root page */
+ page/= info->s->block_size;
+ page_store(log_data + FILEID_STORE_SIZE, page);
+ log_pos= log_data+ FILEID_STORE_SIZE + PAGE_STORE_SIZE;
+ log_pos[0]= KEY_OP_OFFSET;
+ int2store(log_pos+1, offset);
+ log_pos[3]= KEY_OP_SHIFT;
+ int2store(log_pos+4, -(int) move_length);
+ log_pos+= 6;
+ translog_parts= 1;
+ if (change_length)
+ {
+ log_pos[0]= KEY_OP_CHANGE;
+ int2store(log_pos+1, change_length);
+ log_pos+= 3;
+ translog_parts= 2;
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].str= buff + offset;
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].length= change_length;
+ }
+
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (uint) (log_pos - log_data);
+
+ if (translog_write_record(&lsn, LOGREC_REDO_INDEX,
+ info->trn, info,
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length +
+ change_length,
+ TRANSLOG_INTERNAL_PARTS + translog_parts,
+ log_array, log_data, NULL))
+ DBUG_RETURN(1);
+ DBUG_RETURN(0);
+}
diff --git a/storage/maria/ma_ft_update.c b/storage/maria/ma_ft_update.c
index ce173993b6d..b793ccd1295 100644
--- a/storage/maria/ma_ft_update.c
+++ b/storage/maria/ma_ft_update.c
@@ -309,6 +309,7 @@ uint _ma_ft_convert_to_ft2(MARIA_HA *info, uint keynr, uchar *key)
MARIA_KEYDEF *keyinfo=&info->s->ft2_keyinfo;
uchar *key_ptr= (uchar*) dynamic_array_ptr(da, 0), *end;
uint length, key_length;
+ MARIA_PINNED_PAGE tmp_page_link, *page_link= &tmp_page_link;
DBUG_ENTER("_ma_ft_convert_to_ft2");
/* we'll generate one pageful at once, and insert the rest one-by-one */
@@ -323,16 +324,18 @@ uint _ma_ft_convert_to_ft2(MARIA_HA *info, uint keynr, uchar *key)
/*
nothing to do here.
_ma_ck_delete() will populate info->ft1_to_ft2 with deleted keys
- */
+ */
}
/* creating pageful of keys */
+ bzero(info->buff, info->s->keypage_header);
_ma_store_keynr(info, info->buff, keynr);
_ma_store_page_used(info, info->buff, length + info->s->keypage_header, 0);
memcpy(info->buff + info->s->keypage_header, key_ptr, length);
info->keyread_buff_used= info->page_changed=1; /* info->buff is used */
- if ((root= _ma_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR ||
- _ma_write_keypage(info,keyinfo,root,DFLT_INIT_HITS,info->buff))
+ if ((root= _ma_new(info, DFLT_INIT_HITS, &page_link)) == HA_OFFSET_ERROR ||
+ _ma_write_keypage(info, keyinfo, root, page_link->write_lock,
+ DFLT_INIT_HITS, info->buff))
DBUG_RETURN(-1);
/* inserting the rest of key values */
diff --git a/storage/maria/ma_key_recover.c b/storage/maria/ma_key_recover.c
new file mode 100644
index 00000000000..b2655e53260
--- /dev/null
+++ b/storage/maria/ma_key_recover.c
@@ -0,0 +1,599 @@
+/* Copyright (C) 2007 Michael Widenius
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Redo of index */
+
+#include "maria_def.h"
+#include "ma_blockrec.h"
+#include "trnman.h"
+#include "ma_key_recover.h"
+
+/****************************************************************************
+ Some helper functions used both by key page loggin and block page loggin
+****************************************************************************/
+
+/*
+ @brief Unpin all pinned pages
+
+ @fn _ma_unpin_all_pages()
+ @param info Maria handler
+ @param undo_lsn LSN for undo pages. LSN_IMPOSSIBLE if we shouldn't write
+ undo (like on duplicate key errors)
+
+ @note
+ We unpin pages in the reverse order as they where pinned; This may not
+ be strictly necessary but may simplify things in the future.
+
+ @return
+ @retval 0 ok
+ @retval 1 error (fatal disk error)
+*/
+
+void _ma_unpin_all_pages(MARIA_HA *info, LSN undo_lsn)
+{
+ MARIA_PINNED_PAGE *page_link= ((MARIA_PINNED_PAGE*)
+ dynamic_array_ptr(&info->pinned_pages, 0));
+ MARIA_PINNED_PAGE *pinned_page= page_link + info->pinned_pages.elements;
+ DBUG_ENTER("_ma_unpin_all_pages");
+ DBUG_PRINT("info", ("undo_lsn: %lu", (ulong) undo_lsn));
+
+ if (!info->s->now_transactional)
+ undo_lsn= LSN_IMPOSSIBLE; /* don't try to set a LSN on pages */
+
+ while (pinned_page-- != page_link)
+ {
+ DBUG_ASSERT(!pinned_page->changed ||
+ undo_lsn != LSN_IMPOSSIBLE || !info->s->now_transactional);
+ pagecache_unlock_by_link(info->s->pagecache, pinned_page->link,
+ pinned_page->unlock, PAGECACHE_UNPIN,
+ info->trn->rec_lsn, undo_lsn,
+ pinned_page->changed);
+ }
+
+ info->pinned_pages.elements= 0;
+ DBUG_VOID_RETURN;
+}
+
+
+my_bool _ma_write_clr(MARIA_HA *info, LSN undo_lsn,
+ enum translog_record_type undo_type,
+ my_bool store_checksum, ha_checksum checksum,
+ LSN *res_lsn)
+{
+ uchar log_data[LSN_STORE_SIZE + FILEID_STORE_SIZE +
+ PAGE_STORE_SIZE + DIRPOS_STORE_SIZE +
+ HA_CHECKSUM_STORE_SIZE];
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
+ struct st_msg_to_write_hook_for_clr_end msg;
+ my_bool res;
+ DBUG_ENTER("_ma_write_clr");
+
+ /* undo_lsn must be first for compression to work */
+ lsn_store(log_data, undo_lsn);
+ clr_type_store(log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE,
+ undo_type);
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length=
+ sizeof(log_data) - HA_CHECKSUM_STORE_SIZE;
+ msg.undone_record_type= undo_type;
+ msg.previous_undo_lsn= undo_lsn;
+
+ if (store_checksum)
+ {
+ ha_checksum_store(log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE +
+ CLR_TYPE_STORE_SIZE, checksum);
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
+ }
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data;
+
+ res= translog_write_record(res_lsn, LOGREC_CLR_END,
+ info->trn, info, log_array[TRANSLOG_INTERNAL_PARTS
+ + 0].length,
+ TRANSLOG_INTERNAL_PARTS + 1, log_array,
+ log_data + LSN_STORE_SIZE, &msg);
+ DBUG_RETURN(res);
+}
+
+
+/****************************************************************************
+ Redo of key pages
+****************************************************************************/
+
+/**
+ @brief Apply LOGREC_REDO_INDEX_NEW_PAGE
+
+ @param info Maria handler
+ @param header Header (without FILEID)
+
+ @return Operation status
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+uint _ma_apply_redo_index_new_page(MARIA_HA *info, LSN lsn,
+ const uchar *header, uint length)
+{
+ ulonglong root_page= page_korr(header);
+ ulonglong free_page= page_korr(header + PAGE_STORE_SIZE);
+ uint key_nr= key_nr_korr(header + PAGE_STORE_SIZE * 2);
+ my_bool page_type_flag= header[PAGE_STORE_SIZE * 2 + KEY_NR_STORE_SIZE];
+ enum pagecache_page_lock unlock_method;
+ enum pagecache_page_pin unpin_method;
+ MARIA_PINNED_PAGE page_link;
+ my_off_t file_size;
+ uchar *buff;
+ uint result;
+ MARIA_SHARE *share= info->s;
+ DBUG_ENTER("_ma_apply_redo_index_new_page");
+
+ /* Set header to point at key data */
+
+ share->state.changed|= (STATE_CHANGED | STATE_NOT_OPTIMIZED_KEYS |
+ STATE_NOT_SORTED_PAGES);
+
+ header+= PAGE_STORE_SIZE * 2 + KEY_NR_STORE_SIZE + 1;
+ length-= PAGE_STORE_SIZE * 2 + KEY_NR_STORE_SIZE + 1;
+
+ /* free_page is 0 if we shouldn't set key_del */
+ if (free_page)
+ {
+ if (free_page != IMPOSSIBLE_PAGE_NO)
+ share->state.key_del= (my_off_t) free_page * share->block_size;
+ else
+ share->state.key_del= HA_OFFSET_ERROR;
+ }
+ file_size= (my_off_t) (root_page + 1) * share->block_size;
+
+ /* If root page */
+ if (page_type_flag)
+ share->state.key_root[key_nr]= file_size - share->block_size;
+
+ if (file_size > info->state->key_file_length)
+ {
+ info->state->key_file_length= file_size;
+ buff= info->keyread_buff;
+ info->keyread_buff_used= 1;
+ unlock_method= PAGECACHE_LOCK_LEFT_UNLOCKED;
+ unpin_method= PAGECACHE_PIN_LEFT_UNPINNED;
+ }
+ else
+ {
+ if (!(buff= pagecache_read(share->pagecache, &share->kfile,
+ root_page, 0, 0,
+ PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_WRITE,
+ &page_link.link)))
+ {
+ result= (uint) my_errno;
+ goto err;
+ }
+ if (lsn_korr(buff) >= lsn)
+ {
+ /* Already applied */
+ result= 0;
+ goto err;
+ }
+ unlock_method= PAGECACHE_LOCK_WRITE_UNLOCK;
+ unpin_method= PAGECACHE_UNPIN;
+ }
+
+ /* Write modified page */
+ lsn_store(buff, lsn);
+ memcpy(buff + LSN_STORE_SIZE, header, length);
+ bzero(buff + LSN_STORE_SIZE + length,
+ share->block_size - LSN_STORE_SIZE - KEYPAGE_CHECKSUM_SIZE - length);
+ bfill(buff + share->block_size - KEYPAGE_CHECKSUM_SIZE,
+ KEYPAGE_CHECKSUM_SIZE, (uchar) 255);
+ if (pagecache_write(share->pagecache,
+ &share->kfile, root_page, 0,
+ buff, PAGECACHE_PLAIN_PAGE,
+ unlock_method, unpin_method,
+ PAGECACHE_WRITE_DELAY, 0))
+ DBUG_RETURN(my_errno);
+ DBUG_RETURN(0);
+
+err:
+ pagecache_unlock_by_link(share->pagecache, page_link.link,
+ PAGECACHE_LOCK_WRITE_UNLOCK,
+ PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
+ LSN_IMPOSSIBLE, 0);
+ DBUG_RETURN(result);
+}
+
+
+/**
+ @brief Apply LOGREC_REDO_INDEX_FREE_PAGE
+
+ @param info Maria handler
+ @param header Header (without FILEID)
+
+ @return Operation status
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+uint _ma_apply_redo_index_free_page(MARIA_HA *info,
+ LSN lsn,
+ const uchar *header)
+{
+ ulonglong page= page_korr(header);
+ ulonglong free_page= page_korr(header + PAGE_STORE_SIZE);
+ my_off_t old_link;
+ MARIA_PINNED_PAGE page_link;
+ MARIA_SHARE *share= info->s;
+ uchar *buff;
+ int result;
+ DBUG_ENTER("_ma_apply_redo_index_free_page");
+
+ share->state.changed|= (STATE_CHANGED | STATE_NOT_OPTIMIZED_KEYS |
+ STATE_NOT_SORTED_PAGES);
+
+ old_link= share->state.key_del;
+ share->state.key_del= ((free_page != IMPOSSIBLE_PAGE_NO) ?
+ (my_off_t) free_page * share->block_size :
+ HA_OFFSET_ERROR);
+ if (!(buff= pagecache_read(share->pagecache, &info->s->kfile,
+ page, 0, 0,
+ PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_WRITE,
+ &page_link.link)))
+ {
+ result= (uint) my_errno;
+ goto err;
+ }
+ if (lsn_korr(buff) >= lsn)
+ {
+ /* Already applied */
+ result= 0;
+ goto err;
+ }
+ /* Write modified page */
+ lsn_store(buff, lsn);
+ bzero(buff + LSN_STORE_SIZE, share->keypage_header - LSN_STORE_SIZE);
+ _ma_store_keynr(info, buff, (uchar) MARIA_DELETE_KEY_NR);
+ mi_sizestore(buff + share->keypage_header, old_link);
+ share->state.changed|= STATE_NOT_SORTED_PAGES;
+
+ if (pagecache_write(share->pagecache,
+ &info->s->kfile, page, 0,
+ buff, PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_WRITE_UNLOCK,
+ PAGECACHE_UNPIN,
+ PAGECACHE_WRITE_DELAY, 0))
+ DBUG_RETURN(my_errno);
+ DBUG_RETURN(0);
+
+err:
+ pagecache_unlock_by_link(share->pagecache, page_link.link,
+ PAGECACHE_LOCK_WRITE_UNLOCK,
+ PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
+ LSN_IMPOSSIBLE, 0);
+ DBUG_RETURN(result);
+}
+
+
+/**
+ @brief Apply LOGREC_REDO_INDEX
+
+ @fn ma_apply_redo_index()
+ @param info Maria handler
+ @param header Header (without FILEID)
+
+ @notes
+ Data for this part is a set of logical instructions of how to
+ construct the key page.
+
+ Information of the layout of the components for REDO_INDEX:
+
+ Name Parameters (in byte) Information
+ KEY_OP_OFFSET 2 Set position for next operations
+ KEY_OP_SHIFT 2 (signed int) How much to shift down or up
+ KEY_OP_CHANGE 2 length, data Data to replace at 'pos'
+ KEY_OP_ADD_PREFIX 2 move-length How much data should be moved up
+ 2 change-length Data to be replaced at page start
+ KEY_OP_DEL_PREFIX 2 length Bytes to be deleted at page start
+ KEY_OP_ADD_SUFFIX 2 length, data Add data to end of page
+ KEY_OP_DEL_SUFFIX 2 length Reduce page length with this
+
+ @return Operation status
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+uint _ma_apply_redo_index(MARIA_HA *info,
+ LSN lsn, const uchar *header, uint head_length)
+{
+ MARIA_SHARE *share= info->s;
+ ulonglong page= page_korr(header);
+ MARIA_PINNED_PAGE page_link;
+ uchar *buff;
+ const uchar *header_end= header + head_length;
+ uint page_offset= 0;
+ uint nod_flag, page_length, keypage_header;
+ int result;
+ uint org_page_length;
+ DBUG_ENTER("_ma_apply_redo_index");
+
+ /* Set header to point at key data */
+ header+= PAGE_STORE_SIZE;
+
+ if (!(buff= pagecache_read(share->pagecache, &info->s->kfile,
+ page, 0, 0,
+ PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_WRITE,
+ &page_link.link)))
+ {
+ result= 1;
+ goto err;
+ }
+ if (lsn_korr(buff) >= lsn)
+ {
+ /* Already applied */
+ result= 0;
+ goto err;
+ }
+
+ _ma_get_used_and_nod(info, buff, page_length, nod_flag);
+ keypage_header= share->keypage_header;
+ org_page_length= page_length;
+
+ /* Apply modifications to page */
+ do
+ {
+ switch ((enum en_key_op) (*header++)) {
+ case KEY_OP_OFFSET:
+ page_offset= uint2korr(header);
+ header+= 2;
+ DBUG_ASSERT(page_offset >= keypage_header && page_offset <= page_length);
+ break;
+ case KEY_OP_SHIFT:
+ {
+ int length= sint2korr(header);
+ header+= 2;
+ DBUG_ASSERT(page_offset != 0 && page_offset < page_length &&
+ page_length + length < share->block_size);
+
+ if (length < 0)
+ bmove(buff + page_offset, buff + page_offset - length,
+ page_length - page_offset + length);
+ else
+ bmove_upp(buff + page_length + length, buff + page_length,
+ page_length - page_offset);
+ page_length+= length;
+ break;
+ }
+ case KEY_OP_CHANGE:
+ {
+ uint length= uint2korr(header);
+ DBUG_ASSERT(page_offset != 0 && page_offset + length <= page_length);
+
+ memcpy(buff + page_offset, header + 2 , length);
+ header+= 2 + length;
+ break;
+ }
+ case KEY_OP_ADD_PREFIX:
+ {
+ uint insert_length= uint2korr(header);
+ uint change_length= uint2korr(header+2);
+ DBUG_ASSERT(insert_length <= change_length &&
+ page_length + change_length <= share->block_size);
+
+ bmove_upp(buff + page_length + insert_length, buff + page_length,
+ page_length - keypage_header);
+ memcpy(buff + keypage_header, header + 4 , change_length);
+ header+= 4 + change_length;
+ page_length+= insert_length;
+ break;
+ }
+ case KEY_OP_DEL_PREFIX:
+ {
+ uint length= uint2korr(header);
+ header+= 2;
+ DBUG_ASSERT(length <= page_length - keypage_header);
+
+ bmove(buff + keypage_header, buff + keypage_header +
+ length, page_length - keypage_header - length);
+ page_length-= length;
+ break;
+ }
+ case KEY_OP_ADD_SUFFIX:
+ {
+ uint insert_length= uint2korr(header);
+ DBUG_ASSERT(page_length + insert_length <= share->block_size);
+ memcpy(buff + page_length, header+2, insert_length);
+
+ page_length+= insert_length;
+ header+= 2 + insert_length;
+ break;
+ }
+ case KEY_OP_DEL_SUFFIX:
+ {
+ uint del_length= uint2korr(header);
+ header+= 2;
+ DBUG_ASSERT(page_length - del_length >= keypage_header);
+ page_length-= del_length;
+ break;
+ }
+ case KEY_OP_NONE:
+ default:
+ DBUG_ASSERT(0);
+ result= 1;
+ goto err;
+ }
+ } while (header < header_end);
+ DBUG_ASSERT(header == header_end);
+
+ /* Write modified page */
+ lsn_store(buff, lsn);
+ _ma_store_page_used(info, buff, page_length, nod_flag);
+
+ /*
+ Clean old stuff up. Gives us better compression of we archive things
+ and makes things easer to debug
+ */
+ if (page_length < org_page_length)
+ bzero(buff + page_length, org_page_length-page_length);
+
+ if (pagecache_write(share->pagecache,
+ &info->s->kfile, page, 0,
+ buff, PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_WRITE_UNLOCK, PAGECACHE_UNPIN,
+ PAGECACHE_WRITE_DELAY, 0))
+ DBUG_RETURN(my_errno);
+ DBUG_RETURN(0);
+
+err:
+ pagecache_unlock_by_link(share->pagecache, page_link.link,
+ PAGECACHE_LOCK_WRITE_UNLOCK,
+ PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
+ LSN_IMPOSSIBLE, 0);
+ DBUG_RETURN(result);
+}
+
+
+/****************************************************************************
+ Undo of key block changes
+****************************************************************************/
+
+
+/**
+ @brief Undo of insert of key (ie, delete the inserted key)
+*/
+
+my_bool _ma_apply_undo_key_insert(MARIA_HA *info, LSN undo_lsn,
+ const uchar *header, uint length)
+{
+ LSN lsn;
+ my_bool res;
+ uint keynr;
+ uchar key[HA_MAX_KEY_BUFF];
+ MARIA_SHARE *share= info->s;
+ DBUG_ENTER("_ma_apply_undo_key_insert");
+
+ share->state.changed|= (STATE_CHANGED | STATE_NOT_OPTIMIZED_KEYS |
+ STATE_NOT_SORTED_PAGES);
+ keynr= key_nr_korr(header);
+ length-= KEY_NR_STORE_SIZE;
+
+ /* We have to copy key as _ma_ck_real_delete() may change it */
+ memcpy(key, header+ KEY_NR_STORE_SIZE, length);
+
+ res= _ma_ck_real_delete(info, share->keyinfo+keynr, key, length,
+ &share->state.key_root[keynr]);
+
+ if (_ma_write_clr(info, undo_lsn, LOGREC_UNDO_KEY_INSERT, 1, 0, &lsn))
+ res= 1;
+
+ _ma_fast_unlock_key_del(info);
+ _ma_unpin_all_pages_and_finalize_row(info, lsn);
+ DBUG_RETURN(res);
+}
+
+
+/**
+ @brief Undo of insert of key (ie, delete the inserted key)
+*/
+
+my_bool _ma_apply_undo_key_delete(MARIA_HA *info, LSN undo_lsn,
+ const uchar *header, uint length)
+{
+ LSN lsn;
+ my_bool res;
+ uint keynr;
+ uchar key[HA_MAX_KEY_BUFF];
+ MARIA_SHARE *share= info->s;
+ DBUG_ENTER("_ma_apply_undo_key_delete");
+
+ share->state.changed|= (STATE_CHANGED | STATE_NOT_OPTIMIZED_KEYS |
+ STATE_NOT_SORTED_PAGES);
+ keynr= key_nr_korr(header);
+ length-= KEY_NR_STORE_SIZE;
+
+ /* We have to copy key as _ma_ck_real_delete() may change it */
+ memcpy(key, header+ KEY_NR_STORE_SIZE, length);
+
+ res= _ma_ck_real_write_btree(info, share->keyinfo+keynr, key, length,
+ &share->state.key_root[keynr],
+ share->keyinfo[keynr].write_comp_flag);
+
+ if (_ma_write_clr(info, undo_lsn, LOGREC_UNDO_KEY_DELETE, 1, 0, &lsn))
+ res= 1;
+
+ _ma_fast_unlock_key_del(info);
+ _ma_unpin_all_pages_and_finalize_row(info, lsn);
+ DBUG_RETURN(res);
+}
+
+
+/****************************************************************************
+ Handle some local variables
+****************************************************************************/
+
+/*
+ @brief lock key_del for other threads usage
+
+ @fn _ma_lock_key_del()
+ @param info Maria handler
+ @param insert_at_end Set to 1 if we are doing an insert
+
+ @notes
+ To allow higher concurrency in the common case where we do inserts
+ and we don't have any linked blocks we do the following:
+ - Mark in info->used_key_del that we are not using key_del
+ - Return at once (without marking key_del as used)
+
+ This is safe as we in this case don't write current_key_del into
+ the redo log and during recover we are not updating key_del.
+*/
+
+my_bool _ma_lock_key_del(MARIA_HA *info, my_bool insert_at_end)
+{
+ MARIA_SHARE *share= info->s;
+
+ if (info->used_key_del != 1)
+ {
+ pthread_mutex_lock(&share->intern_lock);
+ if (share->state.key_del == HA_OFFSET_ERROR && insert_at_end)
+ {
+ pthread_mutex_unlock(&share->intern_lock);
+ info->used_key_del= 2; /* insert-with-append */
+ return 1;
+ }
+#ifdef THREAD
+ while (share->used_key_del)
+ pthread_cond_wait(&share->intern_cond, &share->intern_lock);
+#endif
+ info->used_key_del= 1;
+ share->used_key_del= 1;
+ pthread_mutex_unlock(&share->intern_lock);
+ }
+ return 0;
+}
+
+
+/*
+ @brief copy changes to key_del and unlock it
+*/
+
+void _ma_unlock_key_del(MARIA_HA *info)
+{
+ MARIA_SHARE *share= info->s;
+ DBUG_ASSERT(info->used_key_del);
+ if (info->used_key_del == 1) /* Ignore insert-with-append */
+ {
+ pthread_mutex_lock(&share->intern_lock);
+ share->used_key_del= 0;
+ info->s->state.key_del= info->s->current_key_del;
+ pthread_mutex_unlock(&share->intern_lock);
+ pthread_cond_signal(&share->intern_cond);
+ }
+ info->used_key_del= 0;
+}
diff --git a/storage/maria/ma_key_recover.h b/storage/maria/ma_key_recover.h
new file mode 100644
index 00000000000..9d3548ac472
--- /dev/null
+++ b/storage/maria/ma_key_recover.h
@@ -0,0 +1,64 @@
+/* Copyright (C) 2007 Michael Widenius
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/*
+ When we have finished the write/update/delete of a row, we have cleanups to
+ do. For now it is signalling to Checkpoint that all dirtied pages have
+ their rec_lsn set and page LSN set (_ma_unpin_all_pages() has been called),
+ and that bitmap pages are correct (_ma_bitmap_release_unused() has been
+ called).
+*/
+
+/* Function definitions for some redo functions */
+
+my_bool _ma_write_clr(MARIA_HA *info, LSN undo_lsn,
+ enum translog_record_type undo_type,
+ my_bool store_checksum, ha_checksum checksum,
+ LSN *res_lsn);
+void _ma_unpin_all_pages(MARIA_HA *info, LSN undo_lsn);
+
+uint _ma_apply_redo_index_new_page(MARIA_HA *info, LSN lsn,
+ const uchar *header, uint length);
+uint _ma_apply_redo_index_free_page(MARIA_HA *info, LSN lsn,
+ const uchar *header);
+uint _ma_apply_redo_index(MARIA_HA *info,
+ LSN lsn, const uchar *header, uint length);
+
+my_bool _ma_apply_undo_key_insert(MARIA_HA *info, LSN undo_lsn,
+ const uchar *header, uint length);
+my_bool _ma_apply_undo_key_delete(MARIA_HA *info, LSN undo_lsn,
+ const uchar *header, uint length);
+
+static inline void _ma_finalize_row(MARIA_HA *info)
+{
+ info->trn->rec_lsn= LSN_IMPOSSIBLE;
+}
+
+/* unpinning is often the last operation before finalizing */
+
+static inline void _ma_unpin_all_pages_and_finalize_row(MARIA_HA *info,
+ LSN undo_lsn)
+{
+ _ma_unpin_all_pages(info, undo_lsn);
+ _ma_finalize_row(info);
+}
+
+extern my_bool _ma_lock_key_del(MARIA_HA *info, my_bool insert_at_end);
+extern void _ma_unlock_key_del(MARIA_HA *info);
+static inline void _ma_fast_unlock_key_del(MARIA_HA *info)
+{
+ if (info->used_key_del)
+ _ma_unlock_key_del(info);
+}
diff --git a/storage/maria/ma_key_redo.c b/storage/maria/ma_key_redo.c
new file mode 100644
index 00000000000..9299f23f328
--- /dev/null
+++ b/storage/maria/ma_key_redo.c
@@ -0,0 +1,417 @@
+/* Copyright (C) 2007 Michael Widenius
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Redo of index */
+
+#include "maria_def.h"
+#include "ma_blockrec.h"
+#include "trnman.h"
+
+/**
+ @brief Apply LOGREC_REDO_INDEX_NEW_PAGE
+
+ @param info Maria handler
+ @param header Header (without FILEID)
+
+ @return Operation status
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+uint _ma_apply_redo_index_new_page(MARIA_HA *info, LSN lsn,
+ const uchar *header, uint length)
+{
+ ulonglong root_page= page_korr(header);
+ ulonglong free_page= page_korr(header + PAGE_STORE_SIZE);
+ enum pagecache_page_lock unlock_method;
+ enum pagecache_page_pin unpin_method;
+ MARIA_PINNED_PAGE page_link;
+ my_off_t file_size;
+ uchar *buff;
+ uint result;
+ DBUG_ENTER("_ma_apply_redo_index_new_page");
+
+ /* Set header to point at key data */
+ header+= PAGE_STORE_SIZE*2;
+ length-= PAGE_STORE_SIZE*2;
+
+ if (free_page != IMPOSSIBLE_PAGE_NO)
+ info->s->state.key_del= (my_off_t) free_page * info->s->block_size;
+ else
+ info->s->state.key_del= HA_OFFSET_ERROR;
+
+ file_size= (my_off_t) (root_page + 1) * info->s->block_size;
+ if (file_size > info->state->key_file_length)
+ {
+ info->state->key_file_length= file_size;
+ buff= info->keyread_buff;
+ info->keyread_buff_used= 1;
+ unlock_method= PAGECACHE_LOCK_LEFT_UNLOCKED;
+ unpin_method= PAGECACHE_PIN_LEFT_UNPINNED;
+ }
+ else
+ {
+ if (!(buff= pagecache_read(info->s->pagecache, &info->dfile,
+ root_page, 0, 0,
+ PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_WRITE,
+ &page_link.link)))
+ {
+ result= (uint) my_errno;
+ goto err;
+ }
+ if (lsn_korr(buff) >= lsn)
+ {
+ /* Already applied */
+ result= 0;
+ goto err;
+ }
+ unlock_method= PAGECACHE_LOCK_WRITE_UNLOCK;
+ unpin_method= PAGECACHE_UNPIN;
+ }
+
+ /* Write modified page */
+ lsn_store(buff, lsn);
+ memcpy(buff + LSN_STORE_SIZE, header, length);
+#ifdef IDENTICAL_PAGES_AFTER_RECOVERY
+ bzero(buff + LSN_STORE_SIZE + length,
+ info->s->block_size - LSN_STORE_SIZE - length);
+#endif
+ if (pagecache_write(info->s->pagecache,
+ &info->dfile, root_page, 0,
+ buff, PAGECACHE_PLAIN_PAGE,
+ unlock_method, unpin_method,
+ PAGECACHE_WRITE_DELAY, 0))
+ DBUG_RETURN(my_errno);
+ DBUG_RETURN(0);
+
+err:
+ pagecache_unlock_by_link(info->s->pagecache, page_link.link,
+ PAGECACHE_LOCK_WRITE_UNLOCK,
+ PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
+ LSN_IMPOSSIBLE);
+ DBUG_RETURN(result);
+}
+
+
+/**
+ @brief Apply LOGREC_REDO_INDEX_FREE_PAGE
+
+ @param info Maria handler
+ @param header Header (without FILEID)
+
+ @return Operation status
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+uint _ma_apply_redo_index_free_page(MARIA_HA *info,
+ LSN lsn,
+ const uchar *header)
+{
+ ulonglong page= page_korr(header);
+ ulonglong free_page= page_korr(header + PAGE_STORE_SIZE);
+ my_off_t old_link;
+ MARIA_PINNED_PAGE page_link;
+ MARIA_SHARE *share= info->s;
+ uchar *buff;
+ int result;
+ DBUG_ENTER("_ma_apply_redo_index_free_page");
+
+ old_link= share->state.key_del;
+ share->state.key_del= ((free_page != IMPOSSIBLE_PAGE_NO) ?
+ (my_off_t) free_page * share->block_size :
+ HA_OFFSET_ERROR);
+ if (!(buff= pagecache_read(share->pagecache, &info->dfile,
+ page, 0, 0,
+ PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_WRITE,
+ &page_link.link)))
+ {
+ result= (uint) my_errno;
+ goto err;
+ }
+ if (lsn_korr(buff) >= lsn)
+ {
+ /* Already applied */
+ result= 0;
+ goto err;
+ }
+ /* Write modified page */
+ lsn_store(buff, lsn);
+ bzero(buff + LSN_STORE_SIZE, share->keypage_header - LSN_STORE_SIZE);
+ _ma_store_keynr(info, buff, (uchar) MARIA_DELETE_KEY_NR);
+ mi_sizestore(buff + share->keypage_header, old_link);
+ share->state.changed|= STATE_NOT_SORTED_PAGES;
+
+ if (pagecache_write(share->pagecache,
+ &info->dfile, page, 0,
+ buff, PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_WRITE_UNLOCK,
+ PAGECACHE_UNPIN,
+ PAGECACHE_WRITE_DELAY, 0))
+ DBUG_RETURN(my_errno);
+ DBUG_RETURN(0);
+
+err:
+ pagecache_unlock_by_link(share->pagecache, page_link.link,
+ PAGECACHE_LOCK_WRITE_UNLOCK,
+ PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
+ LSN_IMPOSSIBLE);
+ DBUG_RETURN(result);
+}
+
+
+/**
+ @brief Apply LOGREC_REDO_INDEX
+
+ @fn ma_apply_redo_index()
+ @param info Maria handler
+ @param header Header (without FILEID)
+
+ @notes
+ Data for this part is a set of logical instructions of how to
+ construct the key page.
+
+ Information of the layout of the components for REDO_INDEX:
+
+ Name Parameters (in byte) Information
+ KEY_OP_OFFSET 2 Set position for next operations
+ KEY_OP_SHIFT 2 (signed int) How much to shift down or up
+ KEY_OP_CHANGE 2 length, data Data to replace at 'pos'
+ KEY_OP_ADD_PREFIX 2 move-length How much data should be moved up
+ 2 change-length Data to be replaced at page start
+ KEY_OP_DEL_PREFIX 2 length Bytes to be deleted at page start
+ KEY_OP_ADD_SUFFIX 2 length, data Add data to end of page
+ KEY_OP_DEL_SUFFIX 2 length Reduce page length with this
+
+ @return Operation status
+ @retval 0 OK
+ @retval 1 Error
+*/
+
+uint _ma_apply_redo_index(MARIA_HA *info,
+ LSN lsn, const uchar *header, uint length)
+{
+ ulonglong root_page= page_korr(header);
+ MARIA_PINNED_PAGE page_link;
+ uchar *buff;
+ const uchar *header_end= header + length;
+ uint page_offset= 0;
+ uint nod_flag, page_length, keypage_header;
+ int result;
+ DBUG_ENTER("_ma_apply_redo_index");
+
+ /* Set header to point at key data */
+ header+= PAGE_STORE_SIZE;
+
+ if (!(buff= pagecache_read(info->s->pagecache, &info->dfile,
+ root_page, 0, 0,
+ PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_WRITE,
+ &page_link.link)))
+ {
+ result= (uint) my_errno;
+ goto err;
+ }
+ if (lsn_korr(buff) >= lsn)
+ {
+ /* Already applied */
+ result= 0;
+ goto err;
+ }
+
+ _ma_get_used_and_nod(info, buff, page_length, nod_flag);
+ keypage_header= info->s->keypage_header;
+
+ /* Apply modifications to page */
+ do
+ {
+ switch ((enum en_key_op) (*header++)) {
+ case KEY_OP_NONE:
+ DBUG_ASSERT(0); /* Impossible */
+ break;
+ case KEY_OP_OFFSET:
+ page_offset= uint2korr(header);
+ header+= 2;
+ DBUG_ASSERT(page_offset >= keypage_header && page_offset <= page_length);
+ break;
+ case KEY_OP_SHIFT:
+ {
+ int length= sint2korr(header);
+ header+= 2;
+ DBUG_ASSERT(page_offset != 0 && page_offset < page_length &&
+ page_length + length < info->s->block_size);
+
+ if (length < 0)
+ bmove(buff + page_offset, buff + page_offset - length,
+ page_length - page_offset + length);
+ else
+ bmove_upp(buff + page_length + length, buff + page_length,
+ page_length - page_offset);
+ page_length+= length;
+ break;
+ }
+ case KEY_OP_CHANGE:
+ {
+ uint length= uint2korr(header);
+ DBUG_ASSERT(page_offset != 0 && page_offset + length <= page_length);
+
+ memcpy(buff + page_offset, header + 2 , length);
+ header+= 2 + length;
+ break;
+ }
+ case KEY_OP_ADD_PREFIX:
+ {
+ uint insert_length= uint2korr(header);
+ uint change_length= uint2korr(header+2);
+ DBUG_ASSERT(insert_length <= change_length &&
+ page_length + change_length < info->s->block_size);
+
+ bmove_upp(buff + page_length + insert_length, buff + page_length,
+ page_length - keypage_header);
+ memcpy(buff + keypage_header, header + 2 , change_length);
+ header+= 4 + change_length;
+ page_length+= insert_length;
+ break;
+ }
+ case KEY_OP_DEL_PREFIX:
+ {
+ uint length= uint2korr(header);
+ header+= 2;
+ DBUG_ASSERT(length <= page_length - keypage_header);
+
+ bmove(buff + keypage_header, buff + keypage_header +
+ length, page_length - keypage_header - length);
+ page_length-= length;
+ break;
+ }
+ case KEY_OP_ADD_SUFFIX:
+ {
+ uint insert_length= uint2korr(header);
+ DBUG_ASSERT(page_length + insert_length < info->s->block_size);
+ memcpy(buff + page_length, header, insert_length);
+ page_length= insert_length;
+ header+= 2 + insert_length;
+ break;
+ }
+ case KEY_OP_DEL_SUFFIX:
+ {
+ uint del_length= uint2korr(header);
+ header+= 2;
+ DBUG_ASSERT(page_length - del_length >= keypage_header);
+ page_length-= del_length;
+ break;
+ }
+ }
+ } while (header < header_end);
+ DBUG_ASSERT(header == header_end);
+
+ /* Write modified page */
+ lsn_store(buff, lsn);
+ memcpy(buff + LSN_STORE_SIZE, header, length);
+ _ma_store_page_used(info, buff, page_length, nod_flag);
+
+ if (pagecache_write(info->s->pagecache,
+ &info->dfile, root_page, 0,
+ buff, PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_WRITE_UNLOCK, PAGECACHE_UNPIN,
+ PAGECACHE_WRITE_DELAY, 0))
+ DBUG_RETURN(my_errno);
+ DBUG_RETURN(0);
+
+err:
+ pagecache_unlock_by_link(info->s->pagecache, page_link.link,
+ PAGECACHE_LOCK_WRITE_UNLOCK,
+ PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
+ LSN_IMPOSSIBLE);
+ DBUG_RETURN(result);
+}
+
+/*
+ Unpin all pinned pages
+
+ SYNOPSIS
+ _ma_unpin_all_pages()
+ info Maria handler
+ undo_lsn LSN for undo pages. LSN_IMPOSSIBLE if we shouldn't write undo
+ (error)
+
+ NOTE
+ We unpin pages in the reverse order as they where pinned; This may not
+ be strictly necessary but may simplify things in the future.
+
+ RETURN
+ 0 ok
+ 1 error (fatal disk error)
+
+*/
+
+void _ma_unpin_all_pages(MARIA_HA *info, LSN undo_lsn)
+{
+ MARIA_PINNED_PAGE *page_link= ((MARIA_PINNED_PAGE*)
+ dynamic_array_ptr(&info->pinned_pages, 0));
+ MARIA_PINNED_PAGE *pinned_page= page_link + info->pinned_pages.elements;
+ DBUG_ENTER("_ma_unpin_all_pages");
+ DBUG_PRINT("info", ("undo_lsn: %lu", (ulong) undo_lsn));
+
+ if (!info->s->now_transactional)
+ undo_lsn= LSN_IMPOSSIBLE; /* don't try to set a LSN on pages */
+
+ while (pinned_page-- != page_link)
+ {
+ DBUG_ASSERT(pinned_page->changed &&
+ ((undo_lsn != LSN_IMPOSSIBLE) || !info->s->now_transactional));
+ pagecache_unlock_by_link(info->s->pagecache, pinned_page->link,
+ pinned_page->unlock, PAGECACHE_UNPIN,
+ info->trn->rec_lsn, undo_lsn);
+ }
+
+ info->pinned_pages.elements= 0;
+ DBUG_VOID_RETURN;
+}
+
+
+/****************************************************************************
+ Undo of key block changes
+****************************************************************************/
+
+
+/*
+ Undo of insert of key (ie, delete the inserted key)
+*/
+
+my_bool _ma_apply_undo_key_insert(MARIA_HA *info, LSN undo_lsn,
+ const uchar *header, uint length)
+{
+ ulonglong page;
+ uint rownr;
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
+ uchar log_data[LSN_STORE_SIZE + FILEID_STORE_SIZE +
+ CLR_TYPE_STORE_SIZE + HA_CHECKSUM_STORE_SIZE],
+ *buff;
+ my_bool res= 1;
+ MARIA_PINNED_PAGE page_link;
+ LSN lsn= LSN_IMPOSSIBLE;
+ MARIA_SHARE *share= info->s;
+ struct st_msg_to_write_hook_for_clr_end msg;
+ DBUG_ENTER("_ma_apply_undo_key_insert");
+
+ keynr= keynr_korr(header);
+ key= header+ KEY_NR_STORE_SIZE;
+ length-= KEYNR_STORE_SIZE;
+
+ res= _ma_ck_delete(info, info->s->keyinfo+keynr, key, length,
+ &info->s->state.key_root[keynr]);
+ _ma_unpin_all_pages_and_finalize_row(info, lsn);
+ DBUG_RETURN(res);
+}
diff --git a/storage/maria/ma_locking.c b/storage/maria/ma_locking.c
index 9fc364e2af8..0b24fe9594c 100644
--- a/storage/maria/ma_locking.c
+++ b/storage/maria/ma_locking.c
@@ -67,18 +67,11 @@ int maria_lock_database(MARIA_HA *info, int lock_type)
--share->tot_locks;
if (info->lock_type == F_WRLCK && !share->w_locks)
{
- if (!share->delay_key_write &&
- flush_pagecache_blocks(share->pagecache, &share->kfile,
- FLUSH_KEEP))
- {
- error= my_errno;
- maria_print_error(info->s, HA_ERR_CRASHED);
- /* Mark that table must be checked */
- maria_mark_crashed(info);
- }
/* pages of transactional tables get flushed at Checkpoint */
- if (!share->base.born_transactional &&
- _ma_flush_table_files(info, MARIA_FLUSH_DATA,
+ if (!share->base.born_transactional && !share->temporary &&
+ _ma_flush_table_files(info,
+ share->delay_key_write ? MARIA_FLUSH_DATA :
+ MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
FLUSH_KEEP, FLUSH_KEEP))
error= my_errno;
}
@@ -110,9 +103,11 @@ int maria_lock_database(MARIA_HA *info, int lock_type)
rw_unlock(&info->s->mmap_lock);
}
#endif
+#ifdef EXTERNAL_LOCKING
share->state.process= share->last_process=share->this_process;
share->state.unique= info->last_unique= info->this_unique;
share->state.update_count= info->last_loop= ++info->this_loop;
+#endif
/* transactional tables rather flush their state at Checkpoint */
if (!share->base.born_transactional)
{
@@ -239,7 +234,7 @@ int maria_lock_database(MARIA_HA *info, int lock_type)
/*
Check for bad file descriptors if this table is part
of a merge union. Failing to capture this may cause
- a crash on windows if the table is renamed and
+ a crash on windows if the table is renamed and
later on referenced by the merge table.
*/
if( info->owned_by_merge && (info->s)->kfile.file < 0 )
@@ -438,9 +433,17 @@ int _ma_writeinfo(register MARIA_HA *info, uint operation)
if (operation)
{ /* Two threads can't be here */
olderror= my_errno; /* Remember last error */
+
+#ifdef EXTERNAL_LOCKING
+ /*
+ The following only makes sense if we want to be allow two different
+ processes access the same table at the same time
+ */
share->state.process= share->last_process= share->this_process;
share->state.unique= info->last_unique= info->this_unique;
share->state.update_count= info->last_loop= ++info->this_loop;
+#endif
+
if ((error= _ma_state_info_write_sub(share->kfile.file,
&share->state, 1)))
olderror=my_errno;
@@ -460,11 +463,14 @@ int _ma_writeinfo(register MARIA_HA *info, uint operation)
} /* _ma_writeinfo */
- /* Test if someone has changed the database */
- /* (Should be called after readinfo) */
+/*
+ Test if an external process has changed the database
+ (Should be called after readinfo)
+*/
int _ma_test_if_changed(register MARIA_HA *info)
{
+#ifdef EXTERNAL_LOCKING
MARIA_SHARE *share=info->s;
if (share->state.process != share->last_process ||
share->state.unique != info->last_unique ||
@@ -481,6 +487,7 @@ int _ma_test_if_changed(register MARIA_HA *info)
info->data_changed= 1; /* For maria_is_changed */
return 1;
}
+#endif
return (!(info->update & HA_STATE_AKTIV) ||
(info->update & (HA_STATE_WRITTEN | HA_STATE_DELETED |
HA_STATE_KEY_CHANGED)));
diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c
index 8f6a50bd93f..8e4f50ade21 100644
--- a/storage/maria/ma_loghandler.c
+++ b/storage/maria/ma_loghandler.c
@@ -15,7 +15,7 @@
#include "maria_def.h"
#include "ma_blockrec.h" /* for some constants and in-write hooks */
-#include "trnman.h" /* for access to members of TRN */
+#include "trnman.h"
/**
@file
@@ -65,7 +65,7 @@
static int translog_mutex_lock(pthread_mutex_t *M)
{
int rc;
- DBUG_PRINT("info", ("Going lock mutex 0x%lx...", (ulong)(M)));
+ DBUG_PRINT("info", ("Going lock mutex 0x%lx", (ulong)(M)));
rc= pthread_mutex_lock(M);
DBUG_PRINT("info", ("Mutex locked 0x%lx rc: %d", (ulong)(M), rc));
return (rc);
@@ -74,7 +74,7 @@ static int translog_mutex_lock(pthread_mutex_t *M)
static int translog_mutex_unlock(pthread_mutex_t *M)
{
int rc;
- DBUG_PRINT("info", ("Going unlock mutex 0x%lx...", (ulong)(M)));
+ DBUG_PRINT("info", ("Going unlock mutex 0x%lx", (ulong)(M)));
rc= pthread_mutex_unlock(M);
DBUG_PRINT("info", ("Mutex unlocked 0x%lx rc: %d", (ulong)(M), rc));
return(rc);
@@ -270,19 +270,9 @@ static void check_translog_description_table(int num)
DBUG_ASSERT(num > 0);
/* last is reserved for extending the table */
DBUG_ASSERT(num < LOGREC_NUMBER_OF_TYPES - 1);
- DBUG_PRINT("info", ("records number: OK"));
- DBUG_PRINT("info",
- ("record type: %d class: %d fixed: %u header: %u LSNs: %u "
- "name: %s",
- 0,
- log_record_type_descriptor[0].class,
- (uint)log_record_type_descriptor[0].fixed_length,
- (uint)log_record_type_descriptor[0].read_header_len,
- (uint)log_record_type_descriptor[0].compressed_LSN,
- log_record_type_descriptor[0].name));
DBUG_ASSERT(log_record_type_descriptor[0].class == LOGRECTYPE_NOT_ALLOWED);
- DBUG_PRINT("info", ("record type 0: OK"));
- for (i= 1; i <= num; i++)
+
+ for (i= 0; i <= num; i++)
{
DBUG_PRINT("info",
("record type: %d class: %d fixed: %u header: %u LSNs: %u "
@@ -294,7 +284,7 @@ static void check_translog_description_table(int num)
log_record_type_descriptor[i].name));
switch (log_record_type_descriptor[i].class) {
case LOGRECTYPE_NOT_ALLOWED:
- DBUG_ASSERT(0);
+ DBUG_ASSERT(i == 0);
break;
case LOGRECTYPE_VARIABLE_LENGTH:
DBUG_ASSERT(log_record_type_descriptor[i].fixed_length == 0);
@@ -320,13 +310,10 @@ static void check_translog_description_table(int num)
default:
DBUG_ASSERT(0);
}
- DBUG_PRINT("info", ("record type %d: OK", i));
}
- DBUG_PRINT("info", ("All filled records are OK"));
for (i= num + 1; i < LOGREC_NUMBER_OF_TYPES; i++)
{
DBUG_ASSERT(log_record_type_descriptor[i].class == LOGRECTYPE_NOT_ALLOWED);
- DBUG_PRINT("info", ("record type %d: OK", i));
}
DBUG_VOID_RETURN;
}
@@ -453,6 +440,18 @@ static LOG_DESC INIT_LOGREC_REDO_INDEX=
{LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, write_hook_for_redo, NULL, 0,
"redo_index", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
+static LOG_DESC INIT_LOGREC_REDO_INDEX_NEW_PAGE=
+{LOGRECTYPE_VARIABLE_LENGTH, 0,
+ FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2 + KEY_NR_STORE_SIZE + 1,
+ NULL, write_hook_for_redo, NULL, 0,
+ "redo_index_new_page", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
+
+static LOG_DESC INIT_LOGREC_REDO_INDEX_FREE_PAGE=
+{LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2,
+ FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2,
+ NULL, write_hook_for_redo, NULL, 0,
+ "redo_index_free_page", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
+
static LOG_DESC INIT_LOGREC_REDO_UNDELETE_ROW=
{LOGRECTYPE_FIXEDLENGTH, 16, 16, NULL, write_hook_for_redo, NULL, 0,
"redo_undelete_row", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
@@ -485,13 +484,23 @@ static LOG_DESC INIT_LOGREC_UNDO_ROW_UPDATE=
"undo_row_update", LOGREC_LAST_IN_GROUP, NULL, NULL};
static LOG_DESC INIT_LOGREC_UNDO_KEY_INSERT=
-{LOGRECTYPE_VARIABLE_LENGTH, 0, 10, NULL, write_hook_for_undo, NULL, 1,
+{LOGRECTYPE_VARIABLE_LENGTH, 0,
+ LSN_STORE_SIZE + FILEID_STORE_SIZE + KEY_NR_STORE_SIZE,
+ NULL, write_hook_for_undo_key, NULL, 1,
"undo_key_insert", LOGREC_LAST_IN_GROUP, NULL, NULL};
static LOG_DESC INIT_LOGREC_UNDO_KEY_DELETE=
-{LOGRECTYPE_VARIABLE_LENGTH, 0, 15, NULL, write_hook_for_undo, NULL, 1,
+{LOGRECTYPE_VARIABLE_LENGTH, 0,
+ LSN_STORE_SIZE + FILEID_STORE_SIZE + KEY_NR_STORE_SIZE,
+ NULL, write_hook_for_undo_key, NULL, 1,
"undo_key_delete", LOGREC_LAST_IN_GROUP, NULL, NULL};
+static LOG_DESC INIT_LOGREC_UNDO_KEY_DELETE_WITH_ROOT=
+{LOGRECTYPE_VARIABLE_LENGTH, 0,
+ LSN_STORE_SIZE + FILEID_STORE_SIZE + KEY_NR_STORE_SIZE + PAGE_STORE_SIZE,
+ NULL, write_hook_for_undo_key, NULL, 1,
+ "undo_key_delete_with_root", LOGREC_LAST_IN_GROUP, NULL, NULL};
+
static LOG_DESC INIT_LOGREC_PREPARE=
{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
"prepare", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
@@ -572,6 +581,10 @@ static void loghandler_init()
INIT_LOGREC_REDO_UPDATE_ROW_HEAD;
log_record_type_descriptor[LOGREC_REDO_INDEX]=
INIT_LOGREC_REDO_INDEX;
+ log_record_type_descriptor[LOGREC_REDO_INDEX_NEW_PAGE]=
+ INIT_LOGREC_REDO_INDEX_NEW_PAGE;
+ log_record_type_descriptor[LOGREC_REDO_INDEX_FREE_PAGE]=
+ INIT_LOGREC_REDO_INDEX_FREE_PAGE;
log_record_type_descriptor[LOGREC_REDO_UNDELETE_ROW]=
INIT_LOGREC_REDO_UNDELETE_ROW;
log_record_type_descriptor[LOGREC_CLR_END]=
@@ -588,6 +601,8 @@ static void loghandler_init()
INIT_LOGREC_UNDO_KEY_INSERT;
log_record_type_descriptor[LOGREC_UNDO_KEY_DELETE]=
INIT_LOGREC_UNDO_KEY_DELETE;
+ log_record_type_descriptor[LOGREC_UNDO_KEY_DELETE_WITH_ROOT]=
+ INIT_LOGREC_UNDO_KEY_DELETE_WITH_ROOT;
log_record_type_descriptor[LOGREC_PREPARE]=
INIT_LOGREC_PREPARE;
log_record_type_descriptor[LOGREC_PREPARE_WITH_UNDO_PURGE]=
@@ -2432,13 +2447,13 @@ static uchar *translog_get_page(TRANSLOG_VALIDATOR_DATA *data, uchar *buffer,
last_protected_sector));
for (i= 1; i <= last_protected_sector; i++)
{
- uint index= i * 2;
+ uint idx= i * 2;
uint offset= i * DISK_DRIVE_SECTOR_SIZE;
DBUG_PRINT("info", ("Sector %u: 0x%02x%02x <- 0x%02x%02x",
i, buffer[offset], buffer[offset + 1],
- table[index], table[index + 1]));
- buffer[offset]= table[index];
- buffer[offset + 1]= table[index + 1];
+ table[idx], table[idx + 1]));
+ buffer[offset]= table[idx];
+ buffer[offset + 1]= table[idx + 1];
}
}
else
@@ -2533,7 +2548,7 @@ static void translog_free_link(PAGECACHE_BLOCK_LINK *direct_link)
if (direct_link)
pagecache_unlock_by_link(log_descriptor.pagecache, direct_link,
PAGECACHE_LOCK_READ_UNLOCK, PAGECACHE_UNPIN,
- LSN_IMPOSSIBLE, LSN_IMPOSSIBLE);
+ LSN_IMPOSSIBLE, LSN_IMPOSSIBLE, 0);
DBUG_VOID_RETURN;
}
@@ -2555,21 +2570,21 @@ static void translog_free_link(PAGECACHE_BLOCK_LINK *direct_link)
static my_bool translog_get_last_page_addr(TRANSLOG_ADDRESS *addr,
my_bool *last_page_ok)
{
- MY_STAT stat_buff, *stat;
+ MY_STAT stat_buff, *local_stat;
char path[FN_REFLEN];
uint32 rec_offset;
uint32 file_no= LSN_FILE_NO(*addr);
DBUG_ENTER("translog_get_last_page_addr");
- if (!(stat= my_stat(translog_filename_by_fileno(file_no, path),
- &stat_buff, MYF(MY_WME))))
+ if (!(local_stat= my_stat(translog_filename_by_fileno(file_no, path),
+ &stat_buff, MYF(MY_WME))))
DBUG_RETURN(1);
- DBUG_PRINT("info", ("File size: %lu", (ulong) stat->st_size));
- if (stat->st_size > TRANSLOG_PAGE_SIZE)
+ DBUG_PRINT("info", ("File size: %lu", (ulong) local_stat->st_size));
+ if (local_stat->st_size > TRANSLOG_PAGE_SIZE)
{
- rec_offset= (((stat->st_size / TRANSLOG_PAGE_SIZE) - 1) *
+ rec_offset= (((local_stat->st_size / TRANSLOG_PAGE_SIZE) - 1) *
TRANSLOG_PAGE_SIZE);
- *last_page_ok= (stat->st_size == rec_offset + TRANSLOG_PAGE_SIZE);
+ *last_page_ok= (local_stat->st_size == rec_offset + TRANSLOG_PAGE_SIZE);
}
else
{
@@ -5769,15 +5784,15 @@ translog_variable_length_header(uchar *page, translog_size_t page_offset,
for (;;)
{
- uint i, read= grp_no;
+ uint i, read_length= grp_no;
buff->chunk0_pages++;
if (page_rest < grp_no * (7 + 1))
- read= page_rest / (7 + 1);
+ read_length= page_rest / (7 + 1);
DBUG_PRINT("info", ("Read chunk0 page#%u read: %u left: %u "
"start from: %u",
- buff->chunk0_pages, read, grp_no, curr));
- for (i= 0; i < read; i++, curr++)
+ buff->chunk0_pages, read_length, grp_no, curr));
+ for (i= 0; i < read_length; i++, curr++)
{
DBUG_ASSERT(curr < buff->groups_no);
buff->groups[curr].addr= lsn_korr(src + i * (7 + 1));
@@ -5787,22 +5802,23 @@ translog_variable_length_header(uchar *page, translog_size_t page_offset,
LSN_IN_PARTS(buff->groups[curr].addr),
(uint) buff->groups[curr].num));
}
- grp_no-= read;
+ grp_no-= read_length;
if (grp_no == 0)
{
if (scanner)
{
buff->chunk0_data_addr= scanner->page_addr;
+ /* offset increased */
buff->chunk0_data_addr+= (page_offset + header_to_skip +
- read * (7 + 1)); /* offset increased */
+ read_length * (7 + 1));
}
else
{
buff->chunk0_data_addr= buff->lsn;
/* offset increased */
- buff->chunk0_data_addr+= (header_to_skip + read * (7 + 1));
+ buff->chunk0_data_addr+= (header_to_skip + read_length * (7 + 1));
}
- buff->chunk0_data_len= chunk_len - 2 - read * (7 + 1);
+ buff->chunk0_data_len= chunk_len - 2 - read_length * (7 + 1);
DBUG_PRINT("info", ("Data address: (%lu,0x%lx) len: %u",
LSN_IN_PARTS(buff->chunk0_data_addr),
buff->chunk0_data_len));
diff --git a/storage/maria/ma_loghandler.h b/storage/maria/ma_loghandler.h
index 58a21ee09b2..67a861691f0 100644
--- a/storage/maria/ma_loghandler.h
+++ b/storage/maria/ma_loghandler.h
@@ -50,6 +50,8 @@ struct st_maria_handler;
/* Changing one of the "SIZE" below will break backward-compatibility! */
/* Length of CRC at end of pages */
+#define ROW_EXTENT_PAGE_SIZE 5
+#define ROW_EXTENT_COUNT_SIZE 2
#define CRC_LENGTH 4
/* Size of file id in logs */
#define FILEID_STORE_SIZE 2
@@ -61,6 +63,8 @@ struct st_maria_handler;
#define CLR_TYPE_STORE_SIZE 1
/* If table has live checksum we store its changes in UNDOs */
#define HA_CHECKSUM_STORE_SIZE 4
+#define KEY_NR_STORE_SIZE 1
+#define PAGE_LENGTH_STORE_SIZE 2
/* Store methods to match the above sizes */
#define fileid_store(T,A) int2store(T,A)
@@ -68,12 +72,14 @@ struct st_maria_handler;
#define dirpos_store(T,A) ((*(uchar*) (T)) = A)
#define pagerange_store(T,A) int2store(T,A)
#define clr_type_store(T,A) ((*(uchar*) (T)) = A)
+#define key_nr_store(T, A) ((*(uchar*) (T)) = A)
#define ha_checksum_store(T,A) int4store(T,A)
#define fileid_korr(P) uint2korr(P)
#define page_korr(P) uint5korr(P)
#define dirpos_korr(P) ((P)[0])
#define pagerange_korr(P) uint2korr(P)
#define clr_type_korr(P) ((P)[0])
+#define key_nr_korr(P) ((P)[0])
#define ha_checksum_korr(P) uint4korr(P)
/*
@@ -108,6 +114,8 @@ enum translog_record_type
LOGREC_REDO_DELETE_ROW,
LOGREC_REDO_UPDATE_ROW_HEAD,
LOGREC_REDO_INDEX,
+ LOGREC_REDO_INDEX_NEW_PAGE,
+ LOGREC_REDO_INDEX_FREE_PAGE,
LOGREC_REDO_UNDELETE_ROW,
LOGREC_CLR_END,
LOGREC_PURGE_END,
@@ -116,6 +124,7 @@ enum translog_record_type
LOGREC_UNDO_ROW_UPDATE,
LOGREC_UNDO_KEY_INSERT,
LOGREC_UNDO_KEY_DELETE,
+ LOGREC_UNDO_KEY_DELETE_WITH_ROOT,
LOGREC_PREPARE,
LOGREC_PREPARE_WITH_UNDO_PURGE,
LOGREC_COMMIT,
@@ -132,6 +141,20 @@ enum translog_record_type
};
#define LOGREC_NUMBER_OF_TYPES 64 /* Maximum, can't be extended */
+/* Type of operations in LOGREC_REDO_INDEX */
+
+enum en_key_op
+{
+ KEY_OP_NONE, /* Not used */
+ KEY_OP_OFFSET, /* Set current position */
+ KEY_OP_SHIFT, /* Shift up/or down at current position */
+ KEY_OP_CHANGE, /* Change data at current position */
+ KEY_OP_ADD_PREFIX, /* Insert data at start of page */
+ KEY_OP_DEL_PREFIX, /* Delete data at start of page */
+ KEY_OP_ADD_SUFFIX, /* Insert data at end of page */
+ KEY_OP_DEL_SUFFIX, /* Delete data at end of page */
+};
+
/* Size of log file; One log file is restricted to 4G */
typedef uint32 translog_size_t;
diff --git a/storage/maria/ma_open.c b/storage/maria/ma_open.c
index 50126775b03..75ca054d88c 100644
--- a/storage/maria/ma_open.c
+++ b/storage/maria/ma_open.c
@@ -19,7 +19,6 @@
#include "ma_sp_defs.h"
#include "ma_rt_index.h"
#include "ma_blockrec.h"
-#include "trnman.h"
#include <m_ctype.h>
#if defined(MSDOS) || defined(__WIN__)
@@ -153,6 +152,14 @@ static MARIA_HA *maria_clone_internal(MARIA_SHARE *share, int mode,
if ((*share->init)(&info))
goto err;
+ /* The following should be big enough for all pinning purposes */
+ if (my_init_dynamic_array(&info.pinned_pages,
+ sizeof(MARIA_PINNED_PAGE),
+ max(share->base.blobs*2 + 4,
+ MARIA_MAX_TREE_LEVELS*3), 16))
+ goto err;
+
+
pthread_mutex_lock(&share->intern_lock);
info.read_record= share->read_record;
share->reopen++;
@@ -207,7 +214,8 @@ err:
switch (errpos) {
case 6:
(*share->end)(&info);
- my_free((uchar*) m_info,MYF(0));
+ delete_dynamic(&info.pinned_pages);
+ my_free(m_info, MYF(0));
/* fall through */
case 5:
if (data_file < 0)
@@ -491,6 +499,7 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
{
share->keyinfo[i].share= share;
disk_pos=_ma_keydef_read(disk_pos, &share->keyinfo[i]);
+ share->keyinfo[i].key_nr= i;
disk_pos_assert(disk_pos + share->keyinfo[i].keysegs * HA_KEYSEG_SIZE,
end_pos);
if (share->keyinfo[i].key_alg == HA_KEY_ALG_RTREE)
@@ -718,7 +727,8 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
#ifdef THREAD
thr_lock_init(&share->lock);
- VOID(pthread_mutex_init(&share->intern_lock,MY_MUTEX_INIT_FAST));
+ VOID(pthread_mutex_init(&share->intern_lock, MY_MUTEX_INIT_FAST));
+ VOID(pthread_cond_init(&share->intern_cond, 0));
for (i=0; i<keys; i++)
VOID(my_rwlock_init(&share->key_root_lock[i], NULL));
VOID(my_rwlock_init(&share->mmap_lock, NULL));
@@ -851,6 +861,8 @@ void _ma_setup_functions(register MARIA_SHARE *share)
share->scan_end= maria_scan_end_dummy;/* Compat. dummy function */
share->write_record_init= _ma_write_init_default;
share->write_record_abort= _ma_write_abort_default;
+ share->keypos_to_recpos= _ma_transparent_recpos;
+ share->recpos_to_keypos= _ma_transparent_recpos;
switch (share->data_file_type) {
case COMPRESSED_RECORD:
@@ -890,13 +902,15 @@ void _ma_setup_functions(register MARIA_SHARE *share)
}
break;
case STATIC_RECORD:
- share->read_record= _ma_read_static_record;
- share->scan= _ma_read_rnd_static_record;
- share->delete_record= _ma_delete_static_record;
- share->compare_record= _ma_cmp_static_record;
- share->update_record= _ma_update_static_record;
- share->write_record= _ma_write_static_record;
- share->compare_unique= _ma_cmp_static_unique;
+ share->read_record= _ma_read_static_record;
+ share->scan= _ma_read_rnd_static_record;
+ share->delete_record= _ma_delete_static_record;
+ share->compare_record= _ma_cmp_static_record;
+ share->update_record= _ma_update_static_record;
+ share->write_record= _ma_write_static_record;
+ share->compare_unique= _ma_cmp_static_unique;
+ share->keypos_to_recpos= _ma_static_keypos_to_recpos;
+ share->recpos_to_keypos= _ma_static_recpos_to_keypos;
if (share->state.header.org_data_file_type == STATIC_RECORD &&
! (share->options & HA_OPTION_NULL_FIELDS))
share->calc_checksum= _ma_static_checksum;
@@ -920,6 +934,9 @@ void _ma_setup_functions(register MARIA_SHARE *share)
share->write_record= _ma_write_block_record;
share->compare_unique= _ma_cmp_block_unique;
share->calc_checksum= _ma_checksum;
+ share->keypos_to_recpos= _ma_transaction_keypos_to_recpos;
+ share->recpos_to_keypos= _ma_transaction_recpos_to_keypos;
+
/*
write_block_record() will calculate the checksum; Tell maria_write()
that it doesn't have to do this.
@@ -988,6 +1005,18 @@ static void setup_key_functions(register MARIA_KEYDEF *keyinfo)
keyinfo->pack_key= _ma_calc_static_key_length;
keyinfo->store_key= _ma_store_static_key;
}
+
+ /* set keyinfo->write_comp_flag */
+ if (keyinfo->flag & HA_SORT_ALLOWS_SAME)
+ keyinfo->write_comp_flag=SEARCH_BIGGER; /* Put after same key */
+ else if (keyinfo->flag & ( HA_NOSAME | HA_FULLTEXT))
+ {
+ keyinfo->write_comp_flag= SEARCH_FIND | SEARCH_UPDATE; /* No duplicates */
+ if (keyinfo->flag & HA_NULL_ARE_EQUAL)
+ keyinfo->write_comp_flag|= SEARCH_NULL_ARE_EQUAL;
+ }
+ else
+ keyinfo->write_comp_flag= SEARCH_SAME; /* Keys in rec-pos order */
return;
}
diff --git a/storage/maria/ma_packrec.c b/storage/maria/ma_packrec.c
index 446981d9fb9..08959aca079 100644
--- a/storage/maria/ma_packrec.c
+++ b/storage/maria/ma_packrec.c
@@ -773,7 +773,7 @@ int _ma_pack_rec_unpack(register MARIA_HA *info, MARIA_BIT_BUFF *bit_buff,
{
memcpy(to, from, info->s->base.null_bytes);
to+= info->s->base.null_bytes;
- from+= info->s->base.null_bytes;
+ from+= info->s->base.null_bytes;
reclength-= info->s->base.null_bytes;
}
init_bit_buffer(bit_buff, (uchar*) from, reclength);
diff --git a/storage/maria/ma_page.c b/storage/maria/ma_page.c
index a7973af3f57..3493ad544c4 100644
--- a/storage/maria/ma_page.c
+++ b/storage/maria/ma_page.c
@@ -16,27 +16,40 @@
/* Read and write key blocks */
#include "maria_def.h"
+#include "trnman.h"
+#include "ma_key_recover.h"
- /* Fetch a key-page in memory */
+/* Fetch a key-page in memory */
-uchar *_ma_fetch_keypage(register MARIA_HA *info, MARIA_KEYDEF *keyinfo,
- my_off_t page, int level,
- uchar *buff,
- int return_buffer __attribute__ ((unused)))
+uchar *_ma_fetch_keypage(register MARIA_HA *info,
+ MARIA_KEYDEF *keyinfo __attribute__ ((unused)),
+ my_off_t page, enum pagecache_page_lock lock,
+ int level, uchar *buff,
+ int return_buffer __attribute__ ((unused)),
+ MARIA_PINNED_PAGE **page_link_res)
{
uchar *tmp;
uint page_size;
+ uint block_size= info->s->block_size;
+ MARIA_PINNED_PAGE page_link;
DBUG_ENTER("_ma_fetch_keypage");
DBUG_PRINT("enter",("page: %ld", (long) page));
- DBUG_ASSERT(info->s->pagecache->block_size == keyinfo->block_length);
- /*
- TODO: replace PAGECACHE_PLAIN_PAGE with PAGECACHE_LSN_PAGE when
- LSN on the pages will be implemented
- */
tmp= pagecache_read(info->s->pagecache, &info->s->kfile,
- page / keyinfo->block_length, level, buff,
- PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_LEFT_UNLOCKED, 0);
+ page / block_size, level, buff,
+ info->s->page_type, lock, &page_link.link);
+
+ if (lock != PAGECACHE_LOCK_LEFT_UNLOCKED)
+ {
+ DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE);
+ page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
+ page_link.changed= 0;
+ push_dynamic(&info->pinned_pages, (void*) &page_link);
+ *page_link_res= dynamic_element(&info->pinned_pages,
+ info->pinned_pages.elements-1,
+ MARIA_PINNED_PAGE *);
+ }
+
if (tmp == info->buff)
info->keyread_buff_used=1;
else if (!tmp)
@@ -50,13 +63,13 @@ uchar *_ma_fetch_keypage(register MARIA_HA *info, MARIA_KEYDEF *keyinfo,
info->last_keypage=page;
#ifdef EXTRA_DEBUG
page_size= _ma_get_page_used(info, tmp);
- if (page_size < 4 || page_size > keyinfo->block_length ||
- _ma_get_keynr(info, tmp) > info->s->base.keys)
+ if (page_size < 4 || page_size > block_size ||
+ _ma_get_keynr(info, tmp) != keyinfo->key_nr)
{
DBUG_PRINT("error",("page %lu had wrong page length: %u keynr: %u",
(ulong) page, page_size,
_ma_get_keynr(info, tmp)));
- DBUG_DUMP("page", (char*) tmp, keyinfo->block_length);
+ DBUG_DUMP("page", (char*) tmp, page_size);
info->last_keypage = HA_OFFSET_ERROR;
maria_print_error(info->s, HA_ERR_CRASHED);
my_errno= HA_ERR_CRASHED;
@@ -67,16 +80,20 @@ uchar *_ma_fetch_keypage(register MARIA_HA *info, MARIA_KEYDEF *keyinfo,
} /* _ma_fetch_keypage */
- /* Write a key-page on disk */
+/* Write a key-page on disk */
int _ma_write_keypage(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
- my_off_t page, int level, uchar *buff)
+ my_off_t page, enum pagecache_page_lock lock,
+ int level, uchar *buff)
{
+ uint block_size= info->s->block_size;
+ MARIA_PINNED_PAGE page_link;
+ int res;
DBUG_ENTER("_ma_write_keypage");
#ifdef EXTRA_DEBUG /* Safety check */
if (page < info->s->base.keystart ||
- page+keyinfo->block_length > info->state->key_file_length ||
+ page+block_size > info->state->key_file_length ||
(page & (MARIA_MIN_KEY_BLOCK_LENGTH-1)))
{
DBUG_PRINT("error",("Trying to write inside key status region: "
@@ -92,113 +109,214 @@ int _ma_write_keypage(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
#endif
/* Verify that keynr is correct */
- DBUG_ASSERT(_ma_get_keynr(info, buff) ==
- (uint) (keyinfo - info->s->keyinfo));
+ DBUG_ASSERT(_ma_get_keynr(info, buff) == keyinfo->key_nr);
-#ifdef HAVE_purify
+#if defined(EXTRA_DEBUG) && defined(HAVE_purify)
{
- /* Clear unitialized part of page to avoid valgrind/purify warnings */
- uint length= _ma_get_page_used(info, buff);
- bzero(buff+length, keyinfo->block_length-length);
- length=keyinfo->block_length;
+ /* This is here to catch uninitialized bytes */
+ ulong crc= my_checksum(0, buff, block_size - KEYPAGE_CHECKSUM_SIZE);
+ int4store(buff + block_size - KEYPAGE_CHECKSUM_SIZE, crc);
}
#endif
- DBUG_ASSERT(info->s->pagecache->block_size == keyinfo->block_length);
+#ifdef IDENTICAL_PAGES_AFTER_RECOVERY
+ {
+ uint length= _ma_get_page_used(info, buff);
+ bzero(buff + length, block_size - length);
+ }
+#endif
+ DBUG_ASSERT(info->s->pagecache->block_size == block_size);
if (!(info->s->options & HA_OPTION_PAGE_CHECKSUM))
- bfill(buff + keyinfo->block_length - KEYPAGE_CHECKSUM_SIZE,
+ bfill(buff + block_size - KEYPAGE_CHECKSUM_SIZE,
KEYPAGE_CHECKSUM_SIZE, (uchar) 255);
- /*
- TODO: replace PAGECACHE_PLAIN_PAGE with PAGECACHE_LSN_PAGE when
- LSN on the pages will be implemented
- */
- DBUG_RETURN(pagecache_write(info->s->pagecache,
- &info->s->kfile, page / keyinfo->block_length,
- level, buff, PAGECACHE_PLAIN_PAGE,
- PAGECACHE_LOCK_LEFT_UNLOCKED,
- PAGECACHE_PIN_LEFT_UNPINNED,
- PAGECACHE_WRITE_DELAY, 0));
+ res= pagecache_write(info->s->pagecache,
+ &info->s->kfile, page / block_size,
+ level, buff, info->s->page_type,
+ lock,
+ lock == PAGECACHE_LOCK_LEFT_WRITELOCKED ?
+ PAGECACHE_PIN_LEFT_PINNED :
+ PAGECACHE_PIN,
+ PAGECACHE_WRITE_DELAY, &page_link.link);
+
+ if (lock == PAGECACHE_LOCK_WRITE)
+ {
+ /* It was not locked before, we have to unlock it when we unpin pages */
+ page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
+ page_link.changed= 1;
+ push_dynamic(&info->pinned_pages, (void*) &page_link);
+ }
+ DBUG_RETURN(res);
+
} /* maria_write_keypage */
- /* Remove page from disk */
+/*
+ @brief Put page in free list
+
+ @fn _ma_dispose()
+ @param info Maria handle
+ @param pos Address to page
+ @param page_not_read 1 if page has not yet been read
+
+ @note
+ The page at 'pos' must have been read with a write lock
+
+ @return
+ @retval 0 ok
+ £retval 1 error
-int _ma_dispose(register MARIA_HA *info, MARIA_KEYDEF *keyinfo, my_off_t pos,
- int level)
+*/
+
+int _ma_dispose(register MARIA_HA *info, my_off_t pos, my_bool page_not_read)
{
my_off_t old_link;
uchar buff[MAX_KEYPAGE_HEADER_SIZE+8];
- uint offset;
- pgcache_page_no_t page_no;
+ ulonglong page_no;
+ MARIA_SHARE *share= info->s;
+ MARIA_PINNED_PAGE page_link;
+ uint block_size= share->block_size;
+ int result= 0;
+ enum pagecache_page_lock lock_method;
+ enum pagecache_page_pin pin_method;
DBUG_ENTER("_ma_dispose");
DBUG_PRINT("enter",("pos: %ld", (long) pos));
+ DBUG_ASSERT(pos % block_size == 0);
+
+ (void) _ma_lock_key_del(info, 0);
- old_link= info->s->state.key_del;
- info->s->state.key_del= pos;
- page_no= pos / keyinfo->block_length;
- offset= pos % keyinfo->block_length;
- bzero(buff, info->s->keypage_header);
+ old_link= share->state.key_del;
+ share->state.key_del= pos;
+ page_no= pos / block_size;
+ bzero(buff, share->keypage_header);
_ma_store_keynr(info, buff, (uchar) MARIA_DELETE_KEY_NR);
- mi_sizestore(buff + info->s->keypage_header, old_link);
- info->s->state.changed|= STATE_NOT_SORTED_PAGES;
-
- DBUG_ASSERT(info->s->pagecache->block_size == keyinfo->block_length &&
- info->s->pagecache->block_size == info->s->block_size);
- /*
- TODO: replace PAGECACHE_PLAIN_PAGE with PAGECACHE_LSN_PAGE when
- LSN on the pages will be implemented
- */
- DBUG_RETURN(pagecache_write_part(info->s->pagecache,
- &info->s->kfile, page_no, level, buff,
- PAGECACHE_PLAIN_PAGE,
- PAGECACHE_LOCK_LEFT_UNLOCKED,
- PAGECACHE_PIN_LEFT_UNPINNED,
- PAGECACHE_WRITE_DELAY, 0,
- offset, info->s->keypage_header+8, 0, 0));
+ mi_sizestore(buff + share->keypage_header, old_link);
+ share->state.changed|= STATE_NOT_SORTED_PAGES;
+
+ if (info->s->now_transactional)
+ {
+ LSN lsn;
+ uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2];
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
+ my_off_t page;
+
+ /* Store address of deleted page */
+ page_store(log_data + FILEID_STORE_SIZE, page_no);
+
+ /* Store link to next unused page (the link that is written to page) */
+ page= (old_link == HA_OFFSET_ERROR ? IMPOSSIBLE_PAGE_NO :
+ old_link / info->s->block_size);
+ page_store(log_data + FILEID_STORE_SIZE + PAGE_STORE_SIZE, page);
+
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
+
+ if (translog_write_record(&lsn, LOGREC_REDO_INDEX_FREE_PAGE,
+ info->trn, info, sizeof(log_data),
+ TRANSLOG_INTERNAL_PARTS + 1, log_array,
+ log_data, NULL))
+ result= 1;
+ }
+
+ if (page_not_read)
+ {
+ lock_method= PAGECACHE_LOCK_WRITE;
+ pin_method= PAGECACHE_PIN;
+ }
+ else
+ {
+ lock_method= PAGECACHE_LOCK_LEFT_WRITELOCKED;
+ pin_method= PAGECACHE_PIN_LEFT_PINNED;
+ }
+
+ if (pagecache_write_part(share->pagecache,
+ &share->kfile, (pgcache_page_no_t) page_no,
+ PAGECACHE_PRIORITY_LOW, buff,
+ share->page_type,
+ lock_method, pin_method,
+ PAGECACHE_WRITE_DELAY, &page_link.link,
+ 0, share->keypage_header+8, 0, 0))
+ result= 1;
+
+ if (page_not_read)
+ {
+ /* It was not locked before, we have to unlock it when we unpin pages */
+ page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
+ page_link.changed= 1;
+ push_dynamic(&info->pinned_pages, (void*) &page_link);
+ }
+
+ DBUG_RETURN(result);
} /* _ma_dispose */
-/* Make new page on disk */
+/**
+ @brief Get address for free page to use
+
+ @fn _ma_new()
+ @param info Maria handle
+ @param level Type of key block (caching priority for pagecache)
+ @param page_link Pointer to page in page cache if read. One can
+ check if this is used by checking if
+ page_link->changed != 0
+
+ @return
+ HA_OFFSET_ERROR File is full or page read error
+ # Page address to use
+*/
+
+my_off_t _ma_new(register MARIA_HA *info, int level,
+ MARIA_PINNED_PAGE **page_link)
-my_off_t _ma_new(register MARIA_HA *info, MARIA_KEYDEF *keyinfo, int level)
{
my_off_t pos;
- uchar *buff;
+ MARIA_SHARE *share= info->s;
+ uint block_size= share->block_size;
DBUG_ENTER("_ma_new");
- if ((pos= info->s->state.key_del) == HA_OFFSET_ERROR)
+ if (_ma_lock_key_del(info, 1))
{
if (info->state->key_file_length >=
- info->s->base.max_key_file_length - keyinfo->block_length)
+ share->base.max_key_file_length - block_size)
{
my_errno=HA_ERR_INDEX_FILE_FULL;
DBUG_RETURN(HA_OFFSET_ERROR);
}
- pos=info->state->key_file_length;
- info->state->key_file_length+= keyinfo->block_length;
+ pos= info->state->key_file_length;
+ info->state->key_file_length+= block_size;
+ (*page_link)->changed= 0;
+ (*page_link)->write_lock= PAGECACHE_LOCK_WRITE;
}
else
{
- /* QQ: Remove this alloc (We don't have to copy the page) */
- buff= my_alloca(info->s->block_size);
- DBUG_ASSERT(info->s->pagecache->block_size == keyinfo->block_length &&
- info->s->pagecache->block_size == info->s->block_size);
+ uchar *buff;
/*
TODO: replace PAGECACHE_PLAIN_PAGE with PAGECACHE_LSN_PAGE when
LSN on the pages will be implemented
*/
- DBUG_ASSERT(info->s->pagecache->block_size == keyinfo->block_length);
- if (!pagecache_read(info->s->pagecache,
- &info->s->kfile, pos / keyinfo->block_length, level,
- buff, PAGECACHE_PLAIN_PAGE,
- PAGECACHE_LOCK_LEFT_UNLOCKED, 0))
+ pos= info->s->state.key_del; /* Protected */
+ DBUG_ASSERT(share->pagecache->block_size == block_size);
+ if (!(buff= pagecache_read(share->pagecache,
+ &share->kfile, pos / block_size, level,
+ 0, share->page_type,
+ PAGECACHE_LOCK_WRITE, &(*page_link)->link)))
pos= HA_OFFSET_ERROR;
else
- info->s->state.key_del= mi_sizekorr(buff+info->s->keypage_header);
- my_afree(buff);
+ {
+ share->current_key_del= mi_sizekorr(buff+share->keypage_header);
+ DBUG_ASSERT(share->current_key_del != info->s->state.key_del &&
+ share->current_key_del);
+ }
+
+ (*page_link)->unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
+ (*page_link)->write_lock= PAGECACHE_LOCK_WRITE;
+ (*page_link)->changed= 0;
+ push_dynamic(&info->pinned_pages, (void*) &page_link);
+ *page_link= dynamic_element(&info->pinned_pages,
+ info->pinned_pages.elements-1,
+ MARIA_PINNED_PAGE *);
}
- info->s->state.changed|= STATE_NOT_SORTED_PAGES;
+ share->state.changed|= STATE_NOT_SORTED_PAGES;
DBUG_PRINT("exit",("Pos: %ld",(long) pos));
DBUG_RETURN(pos);
} /* _ma_new */
diff --git a/storage/maria/ma_pagecache.c b/storage/maria/ma_pagecache.c
index 842acd2f0e8..4e468c380db 100755
--- a/storage/maria/ma_pagecache.c
+++ b/storage/maria/ma_pagecache.c
@@ -156,6 +156,7 @@ struct st_pagecache_hash_link
#define PCBLOCK_REASSIGNED 8 /* block does not accept requests for old page */
#define PCBLOCK_IN_FLUSH 16 /* block is in flush operation */
#define PCBLOCK_CHANGED 32 /* block buffer contains a dirty page */
+#define PCBLOCK_DIRECT_W 64 /* possible direct write to the block */
/* page status, returned by find_block */
#define PAGE_READ 0
@@ -463,7 +464,7 @@ error:
DBUG_RETURN(1);
}
#endif /* NOT_USED */
-#endif /* !DBUG_OFF */
+#endif /* !DBUG_OFF */
#define FLUSH_CACHE 2000 /* sort this many blocks at once */
@@ -719,6 +720,11 @@ ulong init_pagecache(PAGECACHE *pagecache, size_t use_mem,
{
for ( ; ; )
{
+ if (blocks < 8)
+ {
+ my_errno= ENOMEM;
+ goto err;
+ }
/* Set my_hash_entries to the next bigger 2 power */
if ((pagecache->hash_entries= next_power(blocks)) <
(blocks) * 5/4)
@@ -749,11 +755,6 @@ ulong init_pagecache(PAGECACHE *pagecache, size_t use_mem,
my_large_free(pagecache->block_mem, MYF(0));
pagecache->block_mem= 0;
}
- if (blocks < 8)
- {
- my_errno= ENOMEM;
- goto err;
- }
blocks= blocks / 4*3;
}
pagecache->blocks_unused= blocks;
@@ -1082,7 +1083,8 @@ void end_pagecache(PAGECACHE *pagecache, my_bool cleanup)
DBUG_PRINT("status", ("used: %lu changed: %lu w_requests: %lu "
"writes: %lu r_requests: %lu reads: %lu",
- pagecache->blocks_used, pagecache->global_blocks_changed,
+ pagecache->blocks_used,
+ pagecache->global_blocks_changed,
(ulong) pagecache->global_cache_w_requests,
(ulong) pagecache->global_cache_write,
(ulong) pagecache->global_cache_r_requests,
@@ -1132,9 +1134,9 @@ static inline void link_changed(PAGECACHE_BLOCK_LINK *block,
static void link_to_file_list(PAGECACHE *pagecache,
PAGECACHE_BLOCK_LINK *block,
- PAGECACHE_FILE *file, my_bool unlink)
+ PAGECACHE_FILE *file, my_bool unlink_flag)
{
- if (unlink)
+ if (unlink_flag)
unlink_changed(block);
link_changed(block, &pagecache->file_blocks[FILE_HASH(*file)]);
if (block->status & PCBLOCK_CHANGED)
@@ -2503,22 +2505,22 @@ static void check_and_set_lsn(PAGECACHE *pagecache,
}
-/*
- Unlock/unpin page and put LSN stamp if it need
-
- SYNOPSIS
- pagecache_unlock()
- pagecache pointer to a page cache data structure
- file handler for the file for the block of data to be read
- pageno number of the block of data in the file
- lock lock change
- pin pin page
- first_REDO_LSN_for_page do not set it if it is zero
- lsn if it is not LSN_IMPOSSIBLE (0) and it
+/**
+ @brief Unlock/unpin page and put LSN stamp if it need
+
+ @param pagecache pointer to a page cache data structure
+ @pagam file handler for the file for the block of data to be read
+ @param pageno number of the block of data in the file
+ @param lock lock change
+ @param pin pin page
+ @param first_REDO_LSN_for_page do not set it if it is zero
+ @param lsn if it is not LSN_IMPOSSIBLE (0) and it
is bigger then LSN on the page it will be written on
the page
+ @param was_changed should be true if the page was write locked with
+ direct link giving and the page was changed
- NOTE
+ @note
Pininig uses requests registration mechanism it works following way:
| beginnig | ending |
| of func. | of func. |
@@ -2537,7 +2539,7 @@ void pagecache_unlock(PAGECACHE *pagecache,
enum pagecache_page_lock lock,
enum pagecache_page_pin pin,
LSN first_REDO_LSN_for_page,
- LSN lsn)
+ LSN lsn, my_bool was_changed)
{
PAGECACHE_BLOCK_LINK *block;
int page_st;
@@ -2578,6 +2580,26 @@ void pagecache_unlock(PAGECACHE *pagecache,
if (lsn != LSN_IMPOSSIBLE)
check_and_set_lsn(pagecache, lsn, block);
+ /* if we lock for write we must link the block to changed blocks */
+ DBUG_ASSERT((block->status & PCBLOCK_DIRECT_W) == 0 ||
+ (lock == PAGECACHE_LOCK_WRITE_UNLOCK ||
+ lock == PAGECACHE_LOCK_LEFT_WRITELOCKED));
+ /*
+ if was_changed then status should be PCBLOCK_DIRECT_W or marked
+ as dirty
+ */
+ DBUG_ASSERT(!was_changed || (block->status & PCBLOCK_DIRECT_W) ||
+ (block->status & PCBLOCK_CHANGED));
+ if ((block->status & PCBLOCK_DIRECT_W) &&
+ (lock == PAGECACHE_LOCK_WRITE_UNLOCK))
+ {
+ if (!(block->status & PCBLOCK_CHANGED) && was_changed)
+ link_to_changed_list(pagecache, block);
+ block->status&= ~PCBLOCK_DIRECT_W;
+ DBUG_PRINT("info", ("Drop PCBLOCK_DIRECT_W for block: 0x%lx",
+ (ulong) block));
+ }
+
if (make_lock_and_pin(pagecache, block, lock, pin, file))
{
DBUG_ASSERT(0); /* should not happend */
@@ -2635,6 +2657,8 @@ void pagecache_unpin(PAGECACHE *pagecache,
block= find_block(pagecache, file, pageno, 0, 0, 0, &page_st);
DBUG_ASSERT(block != 0);
DBUG_ASSERT(page_st == PAGE_READ);
+ /* we can't unpin such page without unlock */
+ DBUG_ASSERT((block->status & PCBLOCK_DIRECT_W) == 0);
if (lsn != LSN_IMPOSSIBLE)
check_and_set_lsn(pagecache, lsn, block);
@@ -2665,19 +2689,19 @@ void pagecache_unpin(PAGECACHE *pagecache,
}
-/*
- Unlock/unpin page and put LSN stamp if it need
+/**
+ @brief Unlock/unpin page and put LSN stamp if it need
(uses direct block/page pointer)
- SYNOPSIS
- pagecache_unlock_by_link()
- pagecache pointer to a page cache data structure
- link direct link to page (returned by read or write)
- lock lock change
- pin pin page
- first_REDO_LSN_for_page do not set it if it is LSN_IMPOSSIBLE (0)
- lsn if it is not LSN_IMPOSSIBLE and it is bigger then
+ @param pagecache pointer to a page cache data structure
+ @param link direct link to page (returned by read or write)
+ @param lock lock change
+ @param pin pin page
+ @param first_REDO_LSN_for_page do not set it if it is LSN_IMPOSSIBLE (0)
+ @param lsn if it is not LSN_IMPOSSIBLE and it is bigger then
LSN on the page it will be written on the page
+ @param was_changed should be true if the page was write locked with
+ direct link giving and the page was changed
*/
void pagecache_unlock_by_link(PAGECACHE *pagecache,
@@ -2685,7 +2709,7 @@ void pagecache_unlock_by_link(PAGECACHE *pagecache,
enum pagecache_page_lock lock,
enum pagecache_page_pin pin,
LSN first_REDO_LSN_for_page,
- LSN lsn)
+ LSN lsn, my_bool was_changed)
{
DBUG_ENTER("pagecache_unlock_by_link");
DBUG_PRINT("enter", ("block: 0x%lx fd: %u page: %lu %s %s",
@@ -2719,24 +2743,48 @@ void pagecache_unlock_by_link(PAGECACHE *pagecache,
DBUG_ASSERT(pagecache->can_be_used);
inc_counter_for_resize_op(pagecache);
- if (first_REDO_LSN_for_page != LSN_IMPOSSIBLE)
+ if (was_changed)
{
- /*
- LOCK_READ_UNLOCK is ok here as the page may have first locked
- with WRITE lock that was temporarly converted to READ lock before
- it's unpinned
- */
- DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE_UNLOCK ||
- lock == PAGECACHE_LOCK_READ_UNLOCK);
- DBUG_ASSERT(pin == PAGECACHE_UNPIN);
- if (block->rec_lsn == LSN_MAX)
- block->rec_lsn= first_REDO_LSN_for_page;
- else
- DBUG_ASSERT(cmp_translog_addr(block->rec_lsn,
- first_REDO_LSN_for_page) <= 0);
+ if (first_REDO_LSN_for_page != LSN_IMPOSSIBLE)
+ {
+ /*
+ LOCK_READ_UNLOCK is ok here as the page may have first locked
+ with WRITE lock that was temporarly converted to READ lock before
+ it's unpinned
+ */
+ DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE_UNLOCK ||
+ lock == PAGECACHE_LOCK_READ_UNLOCK);
+ DBUG_ASSERT(pin == PAGECACHE_UNPIN);
+ if (block->rec_lsn == LSN_MAX)
+ block->rec_lsn= first_REDO_LSN_for_page;
+ else
+ DBUG_ASSERT(cmp_translog_addr(block->rec_lsn,
+ first_REDO_LSN_for_page) <= 0);
+ }
+ if (lsn != LSN_IMPOSSIBLE)
+ check_and_set_lsn(pagecache, lsn, block);
+ }
+
+ /* if we lock for write we must link the block to changed blocks */
+ DBUG_ASSERT((block->status & PCBLOCK_DIRECT_W) == 0 ||
+ (lock == PAGECACHE_LOCK_WRITE_UNLOCK ||
+ lock == PAGECACHE_LOCK_LEFT_WRITELOCKED));
+ /*
+ If was_changed then status should be PCBLOCK_DIRECT_W or marked
+ as dirty
+ */
+ DBUG_ASSERT(!was_changed || (block->status & PCBLOCK_DIRECT_W) ||
+ (block->status & PCBLOCK_CHANGED));
+ if ((block->status & PCBLOCK_DIRECT_W) &&
+ (lock == PAGECACHE_LOCK_WRITE_UNLOCK))
+ {
+ if (!(block->status & PCBLOCK_CHANGED) && was_changed)
+ link_to_changed_list(pagecache, block);
+ block->status&= ~PCBLOCK_DIRECT_W;
+ DBUG_PRINT("info", ("Drop PCBLOCK_DIRECT_W for block: 0x%lx",
+ (ulong) block));
+
}
- if (lsn != LSN_IMPOSSIBLE)
- check_and_set_lsn(pagecache, lsn, block);
if (make_lock_and_pin(pagecache, block, lock, pin, 0))
DBUG_ASSERT(0); /* should not happend */
@@ -2786,6 +2834,8 @@ void pagecache_unpin_by_link(PAGECACHE *pagecache,
unlock.
*/
DBUG_ASSERT(pagecache->can_be_used);
+ /* we can't unpin such page without unlock */
+ DBUG_ASSERT((block->status & PCBLOCK_DIRECT_W) == 0);
inc_counter_for_resize_op(pagecache);
@@ -2869,7 +2919,7 @@ uchar *pagecache_valid_read(PAGECACHE *pagecache,
uchar *buff,
enum pagecache_page_type type,
enum pagecache_page_lock lock,
- PAGECACHE_BLOCK_LINK **link,
+ PAGECACHE_BLOCK_LINK **page_link,
pagecache_disk_read_validator validator,
uchar* validator_data)
{
@@ -2887,9 +2937,9 @@ uchar *pagecache_valid_read(PAGECACHE *pagecache,
DBUG_ASSERT(buff != 0 || (buff == 0 && (pin == PAGECACHE_PIN ||
pin == PAGECACHE_PIN_LEFT_PINNED)));
- if (!link)
- link= &fake_link;
- *link= 0; /* Catch errors */
+ if (!page_link)
+ page_link= &fake_link;
+ *page_link= 0; /* Catch errors */
restart:
@@ -2932,6 +2982,11 @@ restart:
validator, validator_data);
DBUG_PRINT("info", ("read is done"));
}
+
+ /* PCBLOCK_DIRECT_W should be unlocked in unlock */
+ DBUG_ASSERT((block->status & PCBLOCK_DIRECT_W) == 0 ||
+ lock == PAGECACHE_LOCK_LEFT_WRITELOCKED);
+
if (make_lock_and_pin(pagecache, block, lock, pin, file))
{
/*
@@ -2947,11 +3002,15 @@ restart:
if (!buff)
{
buff= block->buffer;
- /* if we lock for write we must link the block to changed blocks */
+ /* possibly we will write here (resolved on unlock) */
if ((lock == PAGECACHE_LOCK_WRITE ||
lock == PAGECACHE_LOCK_LEFT_WRITELOCKED) &&
!(block->status & PCBLOCK_CHANGED))
- link_to_changed_list(pagecache, block);
+ {
+ block->status|= PCBLOCK_DIRECT_W;
+ DBUG_PRINT("info", ("Set PCBLOCK_DIRECT_W for block: 0x%lx",
+ (ulong) block));
+ }
}
else
{
@@ -2980,7 +3039,7 @@ restart:
if (pin == PAGECACHE_PIN_LEFT_UNPINNED || pin == PAGECACHE_UNPIN)
unreg_request(pagecache, block, 1);
else
- *link= block;
+ *page_link= block;
dec_counter_for_resize_op(pagecache);
@@ -3039,28 +3098,27 @@ my_bool pagecache_delete(PAGECACHE *pagecache,
lock == PAGECACHE_LOCK_LEFT_WRITELOCKED);
DBUG_ASSERT(pin == PAGECACHE_PIN ||
pin == PAGECACHE_PIN_LEFT_PINNED);
-
restart:
if (pagecache->can_be_used)
{
/* Key cache is used */
reg1 PAGECACHE_BLOCK_LINK *block;
- PAGECACHE_HASH_LINK **unused_start, *link;
+ PAGECACHE_HASH_LINK **unused_start, *page_link;
pagecache_pthread_mutex_lock(&pagecache->cache_lock);
if (!pagecache->can_be_used)
goto end;
inc_counter_for_resize_op(pagecache);
- link= get_present_hash_link(pagecache, file, pageno, &unused_start);
- if (!link)
+ page_link= get_present_hash_link(pagecache, file, pageno, &unused_start);
+ if (!page_link)
{
DBUG_PRINT("info", ("There is no such page in the cache"));
pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
DBUG_RETURN(0);
}
- block= link->block;
+ block= page_link->block;
/* See NOTE for pagecache_unlock about registering requests. */
if (pin == PAGECACHE_PIN)
reg_requests(pagecache, block, 1);
@@ -3076,6 +3134,9 @@ restart:
goto restart;
}
+ /* we can't delete with opened direct link for write */
+ DBUG_ASSERT((block->status & PCBLOCK_DIRECT_W) == 0);
+
if (block->status & PCBLOCK_CHANGED)
{
if (flush)
@@ -3116,8 +3177,8 @@ restart:
make_lock_and_pin(pagecache, block,
PAGECACHE_LOCK_WRITE_UNLOCK,
PAGECACHE_UNPIN, file);
- DBUG_ASSERT(link->requests > 0);
- link->requests--;
+ DBUG_ASSERT(page_link->requests > 0);
+ page_link->requests--;
/* See NOTE for pagecache_unlock about registering requests. */
free_block(pagecache, block);
@@ -3234,7 +3295,7 @@ my_bool pagecache_write_part(PAGECACHE *pagecache,
enum pagecache_page_lock lock,
enum pagecache_page_pin pin,
enum pagecache_write_mode write_mode,
- PAGECACHE_BLOCK_LINK **link,
+ PAGECACHE_BLOCK_LINK **page_link,
uint offset, uint size,
pagecache_disk_read_validator validator,
uchar* validator_data)
@@ -3257,9 +3318,9 @@ my_bool pagecache_write_part(PAGECACHE *pagecache,
DBUG_ASSERT(lock != PAGECACHE_LOCK_READ_UNLOCK);
DBUG_ASSERT(offset + size <= pagecache->block_size);
- if (!link)
- link= &fake_link;
- *link= 0;
+ if (!page_link)
+ page_link= &fake_link;
+ *page_link= 0;
restart:
@@ -3307,6 +3368,10 @@ restart:
(block->type == PAGECACHE_PLAIN_PAGE &&
type == PAGECACHE_LSN_PAGE));
block->type= type;
+ /* we write to the page so it has no sense to keep the flag */
+ block->status&= ~PCBLOCK_DIRECT_W;
+ DBUG_PRINT("info", ("Drop PCBLOCK_DIRECT_W for block: 0x%lx",
+ (ulong) block));
if (make_lock_and_pin(pagecache, block,
write_lock_change_table[lock].new_lock,
@@ -3384,7 +3449,7 @@ restart:
if (pin == PAGECACHE_PIN_LEFT_UNPINNED || pin == PAGECACHE_UNPIN)
unreg_request(pagecache, block, 1);
else
- *link= block;
+ *page_link= block;
if (block->status & PCBLOCK_ERROR)
error= 1;
@@ -3629,8 +3694,10 @@ static int flush_pagecache_blocks_int(PAGECACHE *pagecache,
PAGECACHE_BLOCK_LINK *cache_buff[FLUSH_CACHE],**cache;
int last_errno= 0;
DBUG_ENTER("flush_pagecache_blocks_int");
- DBUG_PRINT("enter",("file: %d blocks_used: %lu blocks_changed: %lu",
- file->file, pagecache->blocks_used, pagecache->blocks_changed));
+ DBUG_PRINT("enter",
+ ("file: %d blocks_used: %lu blocks_changed: %lu type: %d",
+ file->file, pagecache->blocks_used, pagecache->blocks_changed,
+ type));
#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
DBUG_EXECUTE("check_pagecache",
@@ -4044,7 +4111,7 @@ my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache,
wqueue_add_to_queue(&other_flusher->flush_queue, thread);
do
{
- KEYCACHE_DBUG_PRINT("pagecache_collect_çhanged_blocks_with_lsn: wait",
+ KEYCACHE_DBUG_PRINT("pagecache_collect_çhanged_blocks_with_lsn: wait",
("suspend thread %ld", thread->id));
pagecache_pthread_cond_wait(&thread->suspend,
&pagecache->cache_lock);
@@ -4311,7 +4378,7 @@ static int pagecache_pthread_cond_wait(pthread_cond_t *cond,
#endif
#endif /* defined(PAGECACHE_TIMEOUT) && !defined(__WIN__) */
-#if defined(PAGECACHE_DEBUG)
+#if defined(PAGECACHE_DEBUG)
static int ___pagecache_pthread_mutex_lock(pthread_mutex_t *mutex)
{
int rc;
diff --git a/storage/maria/ma_pagecache.h b/storage/maria/ma_pagecache.h
index 64935a0fa36..59f4803c913 100644
--- a/storage/maria/ma_pagecache.h
+++ b/storage/maria/ma_pagecache.h
@@ -97,6 +97,9 @@ typedef struct st_pagecache_hash_link PAGECACHE_HASH_LINK;
typedef my_bool (*pagecache_disk_read_validator)(uchar *page, uchar *data);
#define PAGECACHE_CHANGED_BLOCKS_HASH 128 /* must be power of 2 */
+#define PAGECACHE_PRIORITY_LOW 0
+#define PAGECACHE_PRIORITY_DEFAULT 3
+#define PAGECACHE_PRIORITY_HIGH 6
/*
The page cache structure
@@ -228,13 +231,13 @@ extern void pagecache_unlock(PAGECACHE *pagecache,
enum pagecache_page_lock lock,
enum pagecache_page_pin pin,
LSN first_REDO_LSN_for_page,
- LSN lsn);
+ LSN lsn, my_bool was_changed);
extern void pagecache_unlock_by_link(PAGECACHE *pagecache,
PAGECACHE_BLOCK_LINK *block,
enum pagecache_page_lock lock,
enum pagecache_page_pin pin,
LSN first_REDO_LSN_for_page,
- LSN lsn);
+ LSN lsn, my_bool was_changed);
extern void pagecache_unpin(PAGECACHE *pagecache,
PAGECACHE_FILE *file,
pgcache_page_no_t pageno,
diff --git a/storage/maria/ma_panic.c b/storage/maria/ma_panic.c
index 0394f630343..ceabcd991aa 100644
--- a/storage/maria/ma_panic.c
+++ b/storage/maria/ma_panic.c
@@ -30,7 +30,7 @@
locked. A maria_readinfo() is done for
all single user files to get changes
in database
-
+
RETURN
0 ok
# error number in case of error
diff --git a/storage/maria/ma_range.c b/storage/maria/ma_range.c
index 8ef6dc87c0c..2fd172793ba 100644
--- a/storage/maria/ma_range.c
+++ b/storage/maria/ma_range.c
@@ -211,7 +211,9 @@ static double _ma_search_pos(register MARIA_HA *info,
if (pos == HA_OFFSET_ERROR)
DBUG_RETURN(0.5);
- if (!(buff= _ma_fetch_keypage(info,keyinfo,pos,DFLT_INIT_HITS,info->buff,1)))
+ if (!(buff= _ma_fetch_keypage(info,keyinfo, pos,
+ PAGECACHE_LOCK_LEFT_UNLOCKED, DFLT_INIT_HITS,
+ info->buff, 1, 0)))
goto err;
flag=(*keyinfo->bin_search)(info, keyinfo, buff, key, key_len, nextflag,
&keypos,info->lastkey, &after_key);
diff --git a/storage/maria/ma_recovery.c b/storage/maria/ma_recovery.c
index 4d2c8dc7fdd..5c7bd4a9f65 100644
--- a/storage/maria/ma_recovery.c
+++ b/storage/maria/ma_recovery.c
@@ -23,8 +23,9 @@
#include "maria_def.h"
#include "ma_recovery.h"
#include "ma_blockrec.h"
-#include "trnman.h"
#include "ma_checkpoint.h"
+#include "trnman.h"
+#include "ma_key_recover.h"
struct st_trn_for_recovery /* used only in the REDO phase */
{
@@ -81,14 +82,23 @@ prototype_redo_exec_hook(REDO_PURGE_ROW_TAIL);
prototype_redo_exec_hook(REDO_FREE_HEAD_OR_TAIL);
prototype_redo_exec_hook(REDO_FREE_BLOCKS);
prototype_redo_exec_hook(REDO_DELETE_ALL);
+prototype_redo_exec_hook(REDO_INDEX);
+prototype_redo_exec_hook(REDO_INDEX_NEW_PAGE);
+prototype_redo_exec_hook(REDO_INDEX_FREE_PAGE);
prototype_redo_exec_hook(UNDO_ROW_INSERT);
prototype_redo_exec_hook(UNDO_ROW_DELETE);
prototype_redo_exec_hook(UNDO_ROW_UPDATE);
+prototype_redo_exec_hook(UNDO_KEY_INSERT);
+prototype_redo_exec_hook(UNDO_KEY_DELETE);
+prototype_redo_exec_hook(UNDO_KEY_DELETE_WITH_ROOT);
prototype_redo_exec_hook(COMMIT);
prototype_redo_exec_hook(CLR_END);
prototype_undo_exec_hook(UNDO_ROW_INSERT);
prototype_undo_exec_hook(UNDO_ROW_DELETE);
prototype_undo_exec_hook(UNDO_ROW_UPDATE);
+prototype_undo_exec_hook(UNDO_KEY_INSERT);
+prototype_undo_exec_hook(UNDO_KEY_DELETE);
+prototype_undo_exec_hook(UNDO_KEY_DELETE_WITH_ROOT);
static int run_redo_phase(LSN lsn, enum maria_apply_log_way apply);
static uint end_of_redo_phase(my_bool prepare_for_undo_phase);
@@ -879,7 +889,7 @@ prototype_redo_exec_hook(REDO_DROP_TABLE)
{
char *name;
int error= 1;
- MARIA_HA *info= NULL;
+ MARIA_HA *info;
if (skip_DDLs)
{
tprint(tracef, "we skip DDLs\n");
@@ -892,7 +902,7 @@ prototype_redo_exec_hook(REDO_DROP_TABLE)
rec->record_length)
{
tprint(tracef, "Failed to read record\n");
- goto end;
+ return 1;
}
name= log_record_buffer.str;
tprint(tracef, "Table '%s'", name);
@@ -1005,6 +1015,7 @@ static int new_table(uint16 sid, const char *name,
*/
int error= 1;
MARIA_HA *info;
+ MARIA_SHARE *share;
checkpoint_useful= TRUE;
if ((name == NULL) || (name[0] == 0))
@@ -1032,7 +1043,7 @@ static int new_table(uint16 sid, const char *name,
tprint(tracef, "Table is crashed, can't apply log records to it\n");
goto end;
}
- MARIA_SHARE *share= info->s;
+ share= info->s;
/* check that we're not already using it */
if (share->reopen != 1)
{
@@ -1320,6 +1331,75 @@ end:
}
+prototype_redo_exec_hook(REDO_INDEX)
+{
+ int error= 1;
+ MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec);
+ if (info == NULL)
+ return 0;
+ enlarge_buffer(rec);
+
+ if (log_record_buffer.str == NULL ||
+ translog_read_record(rec->lsn, 0, rec->record_length,
+ log_record_buffer.str, NULL) !=
+ rec->record_length)
+ {
+ tprint(tracef, "Failed to read record\n");
+ goto end;
+ }
+
+ if (_ma_apply_redo_index(info, current_group_end_lsn,
+ log_record_buffer.str + FILEID_STORE_SIZE,
+ rec->record_length - FILEID_STORE_SIZE))
+ goto end;
+ error= 0;
+end:
+ return error;
+}
+
+prototype_redo_exec_hook(REDO_INDEX_NEW_PAGE)
+{
+ int error= 1;
+ MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec);
+ if (info == NULL)
+ return 0;
+ enlarge_buffer(rec);
+
+ if (log_record_buffer.str == NULL ||
+ translog_read_record(rec->lsn, 0, rec->record_length,
+ log_record_buffer.str, NULL) !=
+ rec->record_length)
+ {
+ tprint(tracef, "Failed to read record\n");
+ goto end;
+ }
+
+ if (_ma_apply_redo_index_new_page(info, current_group_end_lsn,
+ log_record_buffer.str + FILEID_STORE_SIZE,
+ rec->record_length - FILEID_STORE_SIZE))
+ goto end;
+ error= 0;
+end:
+ return error;
+}
+
+
+prototype_redo_exec_hook(REDO_INDEX_FREE_PAGE)
+{
+ int error= 1;
+ MARIA_HA *info= get_MARIA_HA_from_REDO_record(rec);
+ if (info == NULL)
+ return 0;
+
+ if (_ma_apply_redo_index_free_page(info, current_group_end_lsn,
+ rec->header + FILEID_STORE_SIZE))
+ goto end;
+ error= 0;
+end:
+ return error;
+}
+
+
#define set_undo_lsn_for_active_trans(TRID, LSN) do { \
all_active_trans[TRID].undo_lsn= LSN; \
if (all_active_trans[TRID].first_undo_lsn == LSN_IMPOSSIBLE) \
@@ -1328,9 +1408,11 @@ end:
prototype_redo_exec_hook(UNDO_ROW_INSERT)
{
MARIA_HA *info= get_MARIA_HA_from_UNDO_record(rec);
+ MARIA_SHARE *share;
+
if (info == NULL)
return 0;
- MARIA_SHARE *share= info->s;
+ share= info->s;
set_undo_lsn_for_active_trans(rec->short_trid, rec->lsn);
if (cmp_translog_addr(rec->lsn, share->state.is_of_horizon) >= 0)
{
@@ -1353,8 +1435,7 @@ prototype_redo_exec_hook(UNDO_ROW_INSERT)
@todo some bits below will rather be set when executing UNDOs related
to keys
*/
- info->s->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED |
- STATE_NOT_OPTIMIZED_KEYS | STATE_NOT_SORTED_PAGES;
+ info->s->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED;
}
tprint(tracef, " rows' count %lu\n", (ulong)info->s->state.state.records);
return 0;
@@ -1364,9 +1445,11 @@ prototype_redo_exec_hook(UNDO_ROW_INSERT)
prototype_redo_exec_hook(UNDO_ROW_DELETE)
{
MARIA_HA *info= get_MARIA_HA_from_UNDO_record(rec);
+ MARIA_SHARE *share;
+
if (info == NULL)
return 0;
- MARIA_SHARE *share= info->s;
+ share= info->s;
set_undo_lsn_for_active_trans(rec->short_trid, rec->lsn);
if (cmp_translog_addr(rec->lsn, share->state.is_of_horizon) >= 0)
{
@@ -1385,8 +1468,7 @@ prototype_redo_exec_hook(UNDO_ROW_DELETE)
}
share->state.state.checksum+= ha_checksum_korr(buff);
}
- share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED |
- STATE_NOT_OPTIMIZED_KEYS | STATE_NOT_SORTED_PAGES;
+ share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED;
}
tprint(tracef, " rows' count %lu\n", (ulong)share->state.state.records);
return 0;
@@ -1396,9 +1478,10 @@ prototype_redo_exec_hook(UNDO_ROW_DELETE)
prototype_redo_exec_hook(UNDO_ROW_UPDATE)
{
MARIA_HA *info= get_MARIA_HA_from_UNDO_record(rec);
+ MARIA_SHARE *share;
if (info == NULL)
return 0;
- MARIA_SHARE *share= info->s;
+ share= info->s;
set_undo_lsn_for_active_trans(rec->short_trid, rec->lsn);
if (cmp_translog_addr(rec->lsn, share->state.is_of_horizon) >= 0)
{
@@ -1415,8 +1498,41 @@ prototype_redo_exec_hook(UNDO_ROW_UPDATE)
}
share->state.state.checksum+= ha_checksum_korr(buff);
}
- share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED |
- STATE_NOT_OPTIMIZED_KEYS | STATE_NOT_SORTED_PAGES;
+ share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED;
+ }
+ return 0;
+}
+
+prototype_redo_exec_hook(UNDO_KEY_INSERT)
+{
+ set_undo_lsn_for_active_trans(rec->short_trid, rec->lsn);
+ return 0;
+}
+
+prototype_redo_exec_hook(UNDO_KEY_DELETE)
+{
+ set_undo_lsn_for_active_trans(rec->short_trid, rec->lsn);
+ return 0;
+}
+
+prototype_redo_exec_hook(UNDO_KEY_DELETE_WITH_ROOT)
+{
+ MARIA_HA *info= get_MARIA_HA_from_UNDO_record(rec);
+ MARIA_SHARE *share;
+ if (info == NULL)
+ return 0;
+ share= info->s;
+ set_undo_lsn_for_active_trans(rec->short_trid, rec->lsn);
+ if (cmp_translog_addr(rec->lsn, share->state.is_of_horizon) >= 0)
+ {
+ uint key_nr;
+ my_off_t page;
+ page= page_korr(rec->header + LSN_STORE_SIZE + FILEID_STORE_SIZE);
+ key_nr= key_nr_korr(rec->header + LSN_STORE_SIZE + FILEID_STORE_SIZE +
+ PAGE_STORE_SIZE);
+ share->state.key_root[key_nr]= (page == IMPOSSIBLE_PAGE_NO ?
+ HA_OFFSET_ERROR :
+ page * share->block_size);
}
return 0;
}
@@ -1464,13 +1580,18 @@ prototype_redo_exec_hook(COMMIT)
prototype_redo_exec_hook(CLR_END)
{
MARIA_HA *info= get_MARIA_HA_from_UNDO_record(rec);
+ MARIA_SHARE *share;
+ LSN previous_undo_lsn;
+ enum translog_record_type undone_record_type;
+ const LOG_DESC *log_desc;
+
if (info == NULL)
return 0;
- MARIA_SHARE *share= info->s;
- LSN previous_undo_lsn= lsn_korr(rec->header);
- enum translog_record_type undone_record_type=
+ share= info->s;
+ previous_undo_lsn= lsn_korr(rec->header);
+ undone_record_type=
clr_type_korr(rec->header + LSN_STORE_SIZE + FILEID_STORE_SIZE);
- const LOG_DESC *log_desc= &log_record_type_descriptor[undone_record_type];
+ log_desc= &log_record_type_descriptor[undone_record_type];
set_undo_lsn_for_active_trans(rec->short_trid, previous_undo_lsn);
tprint(tracef, " CLR_END was about %s, undo_lsn now LSN (%lu,0x%lx)\n",
@@ -1502,8 +1623,7 @@ prototype_redo_exec_hook(CLR_END)
default:
DBUG_ASSERT(0);
}
- share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED |
- STATE_NOT_OPTIMIZED_KEYS | STATE_NOT_SORTED_PAGES;
+ share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED;
}
tprint(tracef, " rows' count %lu\n", (ulong)share->state.state.records);
return 0;
@@ -1515,6 +1635,8 @@ prototype_undo_exec_hook(UNDO_ROW_INSERT)
my_bool error;
MARIA_HA *info= get_MARIA_HA_from_UNDO_record(rec);
LSN previous_undo_lsn= lsn_korr(rec->header);
+ MARIA_SHARE *share;
+ const uchar *record_ptr;
if (info == NULL)
{
@@ -1526,11 +1648,10 @@ prototype_undo_exec_hook(UNDO_ROW_INSERT)
*/
return 1;
}
- MARIA_SHARE *share= info->s;
- share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED |
- STATE_NOT_OPTIMIZED_KEYS | STATE_NOT_SORTED_PAGES;
+ share= info->s;
+ share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED;
- const uchar *record_ptr= rec->header;
+ record_ptr= rec->header;
if (share->calc_checksum)
{
/*
@@ -1568,14 +1689,13 @@ prototype_undo_exec_hook(UNDO_ROW_DELETE)
my_bool error;
MARIA_HA *info= get_MARIA_HA_from_UNDO_record(rec);
LSN previous_undo_lsn= lsn_korr(rec->header);
+ MARIA_SHARE *share;
if (info == NULL)
return 1;
- MARIA_SHARE *share= info->s;
- share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED |
- STATE_NOT_OPTIMIZED_KEYS | STATE_NOT_SORTED_PAGES;
-
+ share= info->s;
+ share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED;
enlarge_buffer(rec);
if (log_record_buffer.str == NULL ||
translog_read_record(rec->lsn, 0, rec->record_length,
@@ -1610,12 +1730,13 @@ prototype_undo_exec_hook(UNDO_ROW_UPDATE)
my_bool error;
MARIA_HA *info= get_MARIA_HA_from_UNDO_record(rec);
LSN previous_undo_lsn= lsn_korr(rec->header);
+ MARIA_SHARE *share;
if (info == NULL)
return 1;
- MARIA_SHARE *share= info->s;
- share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED |
- STATE_NOT_OPTIMIZED_KEYS | STATE_NOT_SORTED_PAGES;
+
+ share= info->s;
+ share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED;
enlarge_buffer(rec);
if (log_record_buffer.str == NULL ||
@@ -1640,6 +1761,142 @@ prototype_undo_exec_hook(UNDO_ROW_UPDATE)
}
+prototype_undo_exec_hook(UNDO_KEY_INSERT)
+{
+ my_bool error;
+ MARIA_HA *info= get_MARIA_HA_from_UNDO_record(rec);
+ LSN previous_undo_lsn= lsn_korr(rec->header);
+ MARIA_SHARE *share;
+
+ if (info == NULL)
+ {
+ /*
+ Unlike for REDOs, if the table was skipped it is abnormal; we have a
+ transaction to rollback which used this table, as it is not rolled back
+ it was supposed to hold this table and so the table should still be
+ there.
+ */
+ return 1;
+ }
+
+ share= info->s;
+ share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED;
+
+ enlarge_buffer(rec);
+ if (log_record_buffer.str == NULL ||
+ translog_read_record(rec->lsn, 0, rec->record_length,
+ log_record_buffer.str, NULL) !=
+ rec->record_length)
+ {
+ tprint(tracef, "Failed to read record\n");
+ return 1;
+ }
+
+ info->trn= trn;
+ error= _ma_apply_undo_key_insert(info, previous_undo_lsn,
+ log_record_buffer.str + LSN_STORE_SIZE +
+ FILEID_STORE_SIZE,
+ rec->record_length - LSN_STORE_SIZE -
+ FILEID_STORE_SIZE);
+ info->trn= 0;
+ /* trn->undo_lsn is updated in an inwrite_hook when writing the CLR_END */
+ tprint(tracef, " undo_lsn now LSN (%lu,0x%lx)\n",
+ LSN_IN_PARTS(previous_undo_lsn));
+ return error;
+}
+
+
+prototype_undo_exec_hook(UNDO_KEY_DELETE)
+{
+ my_bool error;
+ MARIA_HA *info= get_MARIA_HA_from_UNDO_record(rec);
+ LSN previous_undo_lsn= lsn_korr(rec->header);
+ MARIA_SHARE *share;
+
+ if (info == NULL)
+ {
+ /*
+ Unlike for REDOs, if the table was skipped it is abnormal; we have a
+ transaction to rollback which used this table, as it is not rolled back
+ it was supposed to hold this table and so the table should still be
+ there.
+ */
+ return 1;
+ }
+
+ share= info->s;
+ share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED;
+
+ enlarge_buffer(rec);
+ if (log_record_buffer.str == NULL ||
+ translog_read_record(rec->lsn, 0, rec->record_length,
+ log_record_buffer.str, NULL) !=
+ rec->record_length)
+ {
+ tprint(tracef, "Failed to read record\n");
+ return 1;
+ }
+
+ info->trn= trn;
+ error= _ma_apply_undo_key_delete(info, previous_undo_lsn,
+ log_record_buffer.str + LSN_STORE_SIZE +
+ FILEID_STORE_SIZE,
+ rec->record_length - LSN_STORE_SIZE -
+ FILEID_STORE_SIZE);
+ info->trn= 0;
+ /* trn->undo_lsn is updated in an inwrite_hook when writing the CLR_END */
+ tprint(tracef, " undo_lsn now LSN (%lu,0x%lx)\n",
+ LSN_IN_PARTS(previous_undo_lsn));
+ return error;
+}
+
+
+prototype_undo_exec_hook(UNDO_KEY_DELETE_WITH_ROOT)
+{
+ my_bool error;
+ MARIA_HA *info= get_MARIA_HA_from_UNDO_record(rec);
+ LSN previous_undo_lsn= lsn_korr(rec->header);
+ MARIA_SHARE *share;
+
+ if (info == NULL)
+ {
+ /*
+ Unlike for REDOs, if the table was skipped it is abnormal; we have a
+ transaction to rollback which used this table, as it is not rolled back
+ it was supposed to hold this table and so the table should still be
+ there.
+ */
+ return 1;
+ }
+
+ share= info->s;
+ share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED;
+
+ enlarge_buffer(rec);
+ if (log_record_buffer.str == NULL ||
+ translog_read_record(rec->lsn, 0, rec->record_length,
+ log_record_buffer.str, NULL) !=
+ rec->record_length)
+ {
+ tprint(tracef, "Failed to read record\n");
+ return 1;
+ }
+
+ info->trn= trn;
+ error= _ma_apply_undo_key_delete(info, previous_undo_lsn,
+ log_record_buffer.str + LSN_STORE_SIZE +
+ FILEID_STORE_SIZE + PAGE_STORE_SIZE,
+ rec->record_length - LSN_STORE_SIZE -
+ FILEID_STORE_SIZE - PAGE_STORE_SIZE);
+ info->trn= 0;
+ /* trn->undo_lsn is updated in an inwrite_hook when writing the CLR_END */
+ tprint(tracef, " undo_lsn now LSN (%lu,0x%lx)\n",
+ LSN_IN_PARTS(previous_undo_lsn));
+ return error;
+}
+
+
+
static int run_redo_phase(LSN lsn, enum maria_apply_log_way apply)
{
TRANSLOG_HEADER_BUFFER rec;
@@ -1669,14 +1926,23 @@ static int run_redo_phase(LSN lsn, enum maria_apply_log_way apply)
install_redo_exec_hook(REDO_FREE_HEAD_OR_TAIL);
install_redo_exec_hook(REDO_FREE_BLOCKS);
install_redo_exec_hook(REDO_DELETE_ALL);
+ install_redo_exec_hook(REDO_INDEX);
+ install_redo_exec_hook(REDO_INDEX_NEW_PAGE);
+ install_redo_exec_hook(REDO_INDEX_FREE_PAGE);
install_redo_exec_hook(UNDO_ROW_INSERT);
install_redo_exec_hook(UNDO_ROW_DELETE);
install_redo_exec_hook(UNDO_ROW_UPDATE);
+ install_redo_exec_hook(UNDO_KEY_INSERT);
+ install_redo_exec_hook(UNDO_KEY_DELETE);
+ install_redo_exec_hook(UNDO_KEY_DELETE_WITH_ROOT);
install_redo_exec_hook(COMMIT);
install_redo_exec_hook(CLR_END);
install_undo_exec_hook(UNDO_ROW_INSERT);
install_undo_exec_hook(UNDO_ROW_DELETE);
install_undo_exec_hook(UNDO_ROW_UPDATE);
+ install_undo_exec_hook(UNDO_KEY_INSERT);
+ install_undo_exec_hook(UNDO_KEY_DELETE);
+ install_undo_exec_hook(UNDO_KEY_DELETE_WITH_ROOT);
current_group_end_lsn= LSN_IMPOSSIBLE;
@@ -1870,7 +2136,6 @@ static uint end_of_redo_phase(my_bool prepare_for_undo_phase)
LSN_IN_PARTS(gslsn), sid);
if (all_active_trans[sid].undo_lsn != LSN_IMPOSSIBLE)
{
- char llbuf[22];
llstr(long_trid, llbuf);
tprint(tracef, "Transaction long_trid %s short_trid %u unfinished\n",
llbuf, sid);
@@ -2017,13 +2282,15 @@ static MARIA_HA *get_MARIA_HA_from_REDO_record(const
print_redo_phase_progress(rec->lsn);
sid= fileid_korr(rec->header);
page= page_korr(rec->header + FILEID_STORE_SIZE);
- switch(rec->type)
- {
+ switch(rec->type) {
/* not all REDO records have a page: */
case LOGREC_REDO_INSERT_ROW_HEAD:
case LOGREC_REDO_INSERT_ROW_TAIL:
case LOGREC_REDO_PURGE_ROW_HEAD:
case LOGREC_REDO_PURGE_ROW_TAIL:
+ case LOGREC_REDO_INDEX_NEW_PAGE:
+ case LOGREC_REDO_INDEX:
+ case LOGREC_REDO_INDEX_FREE_PAGE:
llstr(page, llbuf);
tprint(tracef, " For page %s of table of short id %u", llbuf, sid);
break;
@@ -2281,7 +2548,7 @@ static int new_page(File fileid, pgcache_page_no_t pageid, LSN rec_lsn,
static int close_all_tables(void)
{
int error= 0;
- uint count;
+ uint count= 0;
LIST *list_element, *next_open;
MARIA_HA *info;
pthread_mutex_lock(&THR_LOCK_maria);
@@ -2364,6 +2631,9 @@ static void print_redo_phase_progress(TRANSLOG_ADDRESS addr)
{
static int end_logno= FILENO_IMPOSSIBLE, end_offset, percentage_printed= 0;
static ulonglong initial_remainder= -1;
+ int cur_logno, cur_offset;
+ ulonglong local_remainder;
+
if (tracef == stdout)
return;
if (recovery_message_printed == REC_MSG_NONE)
@@ -2379,16 +2649,15 @@ static void print_redo_phase_progress(TRANSLOG_ADDRESS addr)
end_logno= LSN_FILE_NO(end_addr);
end_offset= LSN_OFFSET(end_addr);
}
- int cur_logno= LSN_FILE_NO(addr);
- int cur_offset= LSN_OFFSET(addr);
- ulonglong remainder;
- remainder= (cur_logno == end_logno) ? (end_offset - cur_offset) :
+ cur_logno= LSN_FILE_NO(addr);
+ cur_offset= LSN_OFFSET(addr);
+ local_remainder= (cur_logno == end_logno) ? (end_offset - cur_offset) :
(TRANSLOG_FILE_SIZE - cur_offset +
max(end_logno - cur_logno - 1, 0) * TRANSLOG_FILE_SIZE + end_offset);
if (initial_remainder == (ulonglong)(-1))
- initial_remainder= remainder;
+ initial_remainder= local_remainder;
int percentage_done=
- (initial_remainder - remainder) * ULL(100) / initial_remainder;
+ (initial_remainder - local_remainder) * ULL(100) / initial_remainder;
if ((percentage_done - percentage_printed) >= 10)
{
percentage_printed= percentage_done;
@@ -2433,11 +2702,10 @@ effect on the state in case of crash. But we make them sync the state
as soon as they have finished. This reduces the window for a problem.
It looks like only one thread at a time updates the state in memory or
-on disk. However there is not 100% certainty when it comes to
-HA_EXTRA_(FORCE_REOPEN|PREPARE_FOR_RENAME): can they read the state
-from memory while some other thread is updating "records" in memory?
-If yes, they may write a corrupted state to disk.
-We assume that no for now: ASK_MONTY.
+on disk. We assume that the upper level (normally MySQL) has protection
+against issuing HA_EXTRA_(FORCE_REOPEN|PREPARE_FOR_RENAME) so that these
+are not issued while there are any running transactions on the given table.
+If this is not done, we may write a corrupted state to disk.
With checkpoints
================
diff --git a/storage/maria/ma_rt_index.c b/storage/maria/ma_rt_index.c
index 2530ae86a5c..eba2a519a3f 100644
--- a/storage/maria/ma_rt_index.c
+++ b/storage/maria/ma_rt_index.c
@@ -67,7 +67,8 @@ static int maria_rtree_find_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
my_errno= HA_ERR_OUT_OF_MEM;
return -1;
}
- if (!_ma_fetch_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf, 0))
+ if (!_ma_fetch_keypage(info, keyinfo, page, PAGECACHE_LOCK_LEFT_UNLOCKED,
+ DFLT_INIT_HITS, page_buf, 0, 0))
goto err1;
nod_flag= _ma_test_if_nod(info, page_buf);
@@ -286,7 +287,8 @@ static int maria_rtree_get_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
if (!(page_buf= (uchar*) my_alloca((uint)keyinfo->block_length)))
return -1;
- if (!_ma_fetch_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf, 0))
+ if (!_ma_fetch_keypage(info, keyinfo, page, PAGECACHE_LOCK_LEFT_UNLOCKED,
+ DFLT_INIT_HITS, page_buf, 0, 0))
goto err1;
nod_flag= _ma_test_if_nod(info, page_buf);
@@ -547,6 +549,7 @@ static int maria_rtree_insert_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
uint nod_flag;
int res;
uchar *page_buf, *k;
+ MARIA_PINNED_PAGE *page_link;
DBUG_ENTER("maria_rtree_insert_req");
if (!(page_buf= (uchar*) my_alloca((uint)keyinfo->block_length +
@@ -555,7 +558,8 @@ static int maria_rtree_insert_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
my_errno= HA_ERR_OUT_OF_MEM;
DBUG_RETURN(-1); /* purecov: inspected */
}
- if (!_ma_fetch_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf, 0))
+ if (!_ma_fetch_keypage(info, keyinfo, page, PAGECACHE_LOCK_WRITE,
+ DFLT_INIT_HITS, page_buf, 0, &page_link))
goto err1;
nod_flag= _ma_test_if_nod(info, page_buf);
DBUG_PRINT("rtree", ("page: %lu level: %d ins_level: %d nod_flag: %u",
@@ -574,7 +578,10 @@ static int maria_rtree_insert_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
case 0: /* child was not split */
{
maria_rtree_combine_rect(keyinfo->seg, k, key, k, key_length);
- if (_ma_write_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf))
+ page_link->changed= 1;
+ if (_ma_write_keypage(info, keyinfo, page,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED,
+ DFLT_INIT_HITS, page_buf))
goto err1;
goto ok;
}
@@ -592,7 +599,10 @@ static int maria_rtree_insert_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
goto err1;
res= maria_rtree_add_key(info, keyinfo, new_key, key_length,
page_buf, new_page);
- if (_ma_write_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf))
+ page_link->changed= 1;
+ if (_ma_write_keypage(info, keyinfo, page,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED,
+ DFLT_INIT_HITS, page_buf))
goto err1;
goto ok;
}
@@ -607,7 +617,9 @@ static int maria_rtree_insert_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
{
res= maria_rtree_add_key(info, keyinfo, key, key_length, page_buf,
new_page);
- if (_ma_write_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf))
+ page_link->changed= 1;
+ if (_ma_write_keypage(info, keyinfo, page, PAGECACHE_LOCK_LEFT_WRITELOCKED,
+ DFLT_INIT_HITS, page_buf))
goto err1;
}
@@ -637,17 +649,26 @@ static int maria_rtree_insert_level(MARIA_HA *info, uint keynr, uchar *key,
MARIA_KEYDEF *keyinfo= info->s->keyinfo + keynr;
int res;
my_off_t new_page;
+ MARIA_PINNED_PAGE *page_link;
DBUG_ENTER("maria_rtree_insert_level");
if ((old_root= info->s->state.key_root[keynr]) == HA_OFFSET_ERROR)
{
- if ((old_root= _ma_new(info, keyinfo, DFLT_INIT_HITS)) == HA_OFFSET_ERROR)
+ MARIA_PINNED_PAGE tmp_page_link;
+ page_link= &tmp_page_link;
+ if ((old_root= _ma_new(info, DFLT_INIT_HITS, &page_link)) ==
+ HA_OFFSET_ERROR)
DBUG_RETURN(-1);
info->keyread_buff_used= 1;
+ bzero(info->buff, info->s->keypage_header);
+ _ma_store_keynr(info, info->buff, keynr);
_ma_store_page_used(info, info->buff, info->s->keypage_header, 0);
+
res= maria_rtree_add_key(info, keyinfo, key, key_length, info->buff,
NULL);
- if (_ma_write_keypage(info, keyinfo, old_root, DFLT_INIT_HITS, info->buff))
+ if (_ma_write_keypage(info, keyinfo, old_root,
+ page_link->write_lock,
+ DFLT_INIT_HITS, info->buff))
DBUG_RETURN(1);
info->s->state.key_root[keynr]= old_root;
DBUG_RETURN(res);
@@ -665,6 +686,8 @@ static int maria_rtree_insert_level(MARIA_HA *info, uint keynr, uchar *key,
uchar *new_root_buf, *new_key;
my_off_t new_root;
uint nod_flag= info->s->base.key_reflength;
+ MARIA_PINNED_PAGE tmp_page_link;
+ page_link= &tmp_page_link;
DBUG_PRINT("rtree", ("root was split, grow a new root"));
if (!(new_root_buf= (uchar*) my_alloca((uint)keyinfo->block_length +
@@ -674,9 +697,11 @@ static int maria_rtree_insert_level(MARIA_HA *info, uint keynr, uchar *key,
DBUG_RETURN(-1); /* purecov: inspected */
}
+ bzero(new_root_buf, info->s->keypage_header);
+ _ma_store_keynr(info, new_root_buf, keynr);
_ma_store_page_used(info, new_root_buf, info->s->keypage_header,
nod_flag);
- if ((new_root= _ma_new(info, keyinfo, DFLT_INIT_HITS)) ==
+ if ((new_root= _ma_new(info, DFLT_INIT_HITS, &page_link)) ==
HA_OFFSET_ERROR)
goto err1;
@@ -698,7 +723,7 @@ static int maria_rtree_insert_level(MARIA_HA *info, uint keynr, uchar *key,
NULL)
== -1)
goto err1;
- if (_ma_write_keypage(info, keyinfo, new_root,
+ if (_ma_write_keypage(info, keyinfo, new_root, page_link->write_lock,
DFLT_INIT_HITS, new_root_buf))
goto err1;
info->s->state.key_root[keynr]= new_root;
@@ -790,6 +815,7 @@ static int maria_rtree_delete_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
uint nod_flag;
int res;
uchar *page_buf, *last, *k;
+ MARIA_PINNED_PAGE *page_link;
DBUG_ENTER("maria_rtree_delete_req");
if (!(page_buf= (uchar*) my_alloca((uint)keyinfo->block_length)))
@@ -797,7 +823,8 @@ static int maria_rtree_delete_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
my_errno= HA_ERR_OUT_OF_MEM;
DBUG_RETURN(-1); /* purecov: inspected */
}
- if (!_ma_fetch_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf, 0))
+ if (!_ma_fetch_keypage(info, keyinfo, page, PAGECACHE_LOCK_WRITE,
+ DFLT_INIT_HITS, page_buf, 0, &page_link))
goto err1;
nod_flag= _ma_test_if_nod(info, page_buf);
DBUG_PRINT("rtree", ("page: %lu level: %d nod_flag: %u",
@@ -827,7 +854,9 @@ static int maria_rtree_delete_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
if (maria_rtree_set_key_mbr(info, keyinfo, k, key_length,
_ma_kpos(nod_flag, k)))
goto err1;
+ page_link->changed= 1;
if (_ma_write_keypage(info, keyinfo, page,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED,
DFLT_INIT_HITS, page_buf))
goto err1;
}
@@ -852,7 +881,9 @@ static int maria_rtree_delete_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
level later to reintegrate the subtrees.
*/
maria_rtree_delete_key(info, page_buf, k, key_length, nod_flag);
+ page_link->changed= 1;
if (_ma_write_keypage(info, keyinfo, page,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED,
DFLT_INIT_HITS, page_buf))
goto err1;
*page_size= _ma_get_page_used(info, page_buf);
@@ -867,7 +898,9 @@ static int maria_rtree_delete_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
case 2: /* vacuous case: last key in the leaf */
{
maria_rtree_delete_key(info, page_buf, k, key_length, nod_flag);
+ page_link->changed= 1;
if (_ma_write_keypage(info, keyinfo, page,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED,
DFLT_INIT_HITS, page_buf))
goto err1;
*page_size= _ma_get_page_used(info, page_buf);
@@ -888,19 +921,23 @@ static int maria_rtree_delete_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
if (!maria_rtree_key_cmp(keyinfo->seg, key, k, key_length,
MBR_EQUAL | MBR_DATA))
{
+ page_link->changed= 1;
+
maria_rtree_delete_key(info, page_buf, k, key_length, nod_flag);
*page_size= _ma_get_page_used(info, page_buf);
if (*page_size == info->s->keypage_header)
{
/* last key in the leaf */
res= 2;
- if (_ma_dispose(info, keyinfo, page, DFLT_INIT_HITS))
+ if (_ma_dispose(info, page, 0))
goto err1;
}
else
{
res= 0;
- if (_ma_write_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf))
+ if (_ma_write_keypage(info, keyinfo, page,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED,
+ DFLT_INIT_HITS, page_buf))
goto err1;
}
goto ok;
@@ -933,6 +970,7 @@ int maria_rtree_delete(MARIA_HA *info, uint keynr, uchar *key, uint key_length)
stPageList ReinsertList;
my_off_t old_root;
MARIA_KEYDEF *keyinfo= info->s->keyinfo + keynr;
+ MARIA_PINNED_PAGE *page_link, *root_page_link;
DBUG_ENTER("maria_rtree_delete");
if ((old_root= info->s->state.key_root[keynr]) == HA_OFFSET_ERROR)
@@ -948,100 +986,102 @@ int maria_rtree_delete(MARIA_HA *info, uint keynr, uchar *key, uint key_length)
ReinsertList.m_pages= 0;
switch (maria_rtree_delete_req(info, keyinfo, key, key_length, old_root,
- &page_size, &ReinsertList, 0))
- {
+ &page_size, &ReinsertList, 0)) {
case 2: /* empty */
- {
- info->s->state.key_root[keynr]= HA_OFFSET_ERROR;
- DBUG_RETURN(0);
- }
+ {
+ info->s->state.key_root[keynr]= HA_OFFSET_ERROR;
+ DBUG_RETURN(0);
+ }
case 0: /* deleted */
+ {
+ uint nod_flag;
+ ulong i;
+ for (i= 0; i < ReinsertList.n_pages; ++i)
{
- uint nod_flag;
- ulong i;
- for (i= 0; i < ReinsertList.n_pages; ++i)
- {
- uchar *page_buf, *k, *last;
+ uchar *page_buf, *k, *last;
- if (!(page_buf= (uchar*) my_alloca((uint)keyinfo->block_length)))
+ if (!(page_buf= (uchar*) my_alloca((uint)keyinfo->block_length)))
+ {
+ my_errno= HA_ERR_OUT_OF_MEM;
+ goto err1;
+ }
+ if (!_ma_fetch_keypage(info, keyinfo, ReinsertList.pages[i].offs,
+ PAGECACHE_LOCK_WRITE,
+ DFLT_INIT_HITS, page_buf, 0, &page_link))
+ goto err1;
+ nod_flag= _ma_test_if_nod(info, page_buf);
+ DBUG_PRINT("rtree", ("reinserting keys from "
+ "page: %lu level: %d nod_flag: %u",
+ (ulong) ReinsertList.pages[i].offs,
+ ReinsertList.pages[i].level, nod_flag));
+
+ k= rt_PAGE_FIRST_KEY(info, page_buf, nod_flag);
+ last= rt_PAGE_END(info, page_buf);
+ for (; k < last; k= rt_PAGE_NEXT_KEY(k, key_length, nod_flag))
+ {
+ int res;
+ if ((res=
+ maria_rtree_insert_level(info, keynr, k, key_length,
+ ReinsertList.pages[i].level)) == -1)
{
- my_errno= HA_ERR_OUT_OF_MEM;
+ my_afree(page_buf);
goto err1;
}
- if (!_ma_fetch_keypage(info, keyinfo, ReinsertList.pages[i].offs,
- DFLT_INIT_HITS, page_buf, 0))
- goto err1;
- nod_flag= _ma_test_if_nod(info, page_buf);
- DBUG_PRINT("rtree", ("reinserting keys from "
- "page: %lu level: %d nod_flag: %u",
- (ulong) ReinsertList.pages[i].offs,
- ReinsertList.pages[i].level, nod_flag));
-
- k= rt_PAGE_FIRST_KEY(info, page_buf, nod_flag);
- last= rt_PAGE_END(info, page_buf);
- for (; k < last; k= rt_PAGE_NEXT_KEY(k, key_length, nod_flag))
+ if (res)
{
- int res;
- if ((res=
- maria_rtree_insert_level(info, keynr, k, key_length,
- ReinsertList.pages[i].level)) == -1)
+ ulong j;
+ DBUG_PRINT("rtree", ("root has been split, adjust levels"));
+ for (j= i; j < ReinsertList.n_pages; j++)
{
- my_afree(page_buf);
- goto err1;
- }
- if (res)
- {
- ulong j;
- DBUG_PRINT("rtree", ("root has been split, adjust levels"));
- for (j= i; j < ReinsertList.n_pages; j++)
- {
- ReinsertList.pages[j].level++;
- DBUG_PRINT("rtree", ("keys from page: %lu now level: %d",
- (ulong) ReinsertList.pages[i].offs,
- ReinsertList.pages[i].level));
- }
+ ReinsertList.pages[j].level++;
+ DBUG_PRINT("rtree", ("keys from page: %lu now level: %d",
+ (ulong) ReinsertList.pages[i].offs,
+ ReinsertList.pages[i].level));
}
}
- my_afree(page_buf);
- if (_ma_dispose(info, keyinfo, ReinsertList.pages[i].offs,
- DFLT_INIT_HITS))
- goto err1;
}
- if (ReinsertList.pages)
- my_free((uchar*) ReinsertList.pages, MYF(0));
-
- /* check for redundant root (not leaf, 1 child) and eliminate */
- if ((old_root= info->s->state.key_root[keynr]) == HA_OFFSET_ERROR)
+ my_afree(page_buf);
+ page_link->changed= 1;
+ if (_ma_dispose(info, ReinsertList.pages[i].offs, 0))
goto err1;
- if (!_ma_fetch_keypage(info, keyinfo, old_root, DFLT_INIT_HITS,
- info->buff, 0))
- goto err1;
- nod_flag= _ma_test_if_nod(info, info->buff);
- page_size= _ma_get_page_used(info, info->buff);
- if (nod_flag && (page_size == info->s->keypage_header + key_length +
- nod_flag))
- {
- my_off_t new_root= _ma_kpos(nod_flag,
- rt_PAGE_FIRST_KEY(info, info->buff,
- nod_flag));
- if (_ma_dispose(info, keyinfo, old_root, DFLT_INIT_HITS))
- goto err1;
- info->s->state.key_root[keynr]= new_root;
- }
- info->update= HA_STATE_DELETED;
- DBUG_RETURN(0);
-
-err1:
- DBUG_RETURN(-1); /* purecov: inspected */
}
- case 1: /* not found */
+ if (ReinsertList.pages)
+ my_free((uchar*) ReinsertList.pages, MYF(0));
+
+ /* check for redundant root (not leaf, 1 child) and eliminate */
+ if ((old_root= info->s->state.key_root[keynr]) == HA_OFFSET_ERROR)
+ goto err1;
+ if (!_ma_fetch_keypage(info, keyinfo, old_root,
+ PAGECACHE_LOCK_WRITE,
+ DFLT_INIT_HITS, info->buff, 0, &root_page_link))
+ goto err1;
+ nod_flag= _ma_test_if_nod(info, info->buff);
+ page_size= _ma_get_page_used(info, info->buff);
+ if (nod_flag && (page_size == info->s->keypage_header + key_length +
+ nod_flag))
{
- my_errno= HA_ERR_KEY_NOT_FOUND;
- DBUG_RETURN(-1); /* purecov: inspected */
+ my_off_t new_root= _ma_kpos(nod_flag,
+ rt_PAGE_FIRST_KEY(info, info->buff,
+ nod_flag));
+ root_page_link->changed= 1;
+ if (_ma_dispose(info, old_root, 0))
+ goto err1;
+ info->s->state.key_root[keynr]= new_root;
}
- default:
- case -1: /* error */
- DBUG_RETURN(-1); /* purecov: inspected */
+ info->update= HA_STATE_DELETED;
+ DBUG_RETURN(0);
+
+err1:
+ DBUG_RETURN(-1); /* purecov: inspected */
+ }
+ case 1: /* not found */
+ {
+ my_errno= HA_ERR_KEY_NOT_FOUND;
+ DBUG_RETURN(-1); /* purecov: inspected */
+ }
+ default:
+ case -1: /* error */
+ DBUG_RETURN(-1); /* purecov: inspected */
}
}
@@ -1071,7 +1111,8 @@ ha_rows maria_rtree_estimate(MARIA_HA *info, uint keynr, uchar *key,
return HA_POS_ERROR;
if (!(page_buf= (uchar*) my_alloca((uint)keyinfo->block_length)))
return HA_POS_ERROR;
- if (!_ma_fetch_keypage(info, keyinfo, root, DFLT_INIT_HITS, page_buf, 0))
+ if (!_ma_fetch_keypage(info, keyinfo, root, PAGECACHE_LOCK_LEFT_UNLOCKED,
+ DFLT_INIT_HITS, page_buf, 0, 0))
goto err1;
nod_flag= _ma_test_if_nod(info, page_buf);
diff --git a/storage/maria/ma_rt_key.c b/storage/maria/ma_rt_key.c
index c71d7d7d8eb..311137850f6 100644
--- a/storage/maria/ma_rt_key.c
+++ b/storage/maria/ma_rt_key.c
@@ -100,7 +100,8 @@ int maria_rtree_set_key_mbr(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *key,
{
DBUG_ENTER("maria_rtree_set_key_mbr");
if (!_ma_fetch_keypage(info, keyinfo, child_page,
- DFLT_INIT_HITS, info->buff, 0))
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ DFLT_INIT_HITS, info->buff, 0, 0))
DBUG_RETURN(-1);
DBUG_RETURN(maria_rtree_page_mbr(info, keyinfo->seg,
diff --git a/storage/maria/ma_rt_split.c b/storage/maria/ma_rt_split.c
index d3381ecf1ad..25cfb0be91a 100644
--- a/storage/maria/ma_rt_split.c
+++ b/storage/maria/ma_rt_split.c
@@ -252,7 +252,6 @@ int maria_rtree_split_page(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
uint key_length, my_off_t *new_page_offs)
{
int n1, n2; /* Number of items in groups */
-
SplitStruct *task;
SplitStruct *cur;
SplitStruct *stop;
@@ -268,6 +267,7 @@ int maria_rtree_split_page(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
info->s->base.rec_reflength);
int max_keys= ((_ma_get_page_used(info, page) - info->s->keypage_header) /
(full_length));
+ MARIA_PINNED_PAGE tmp_page_link, *page_link= &tmp_page_link;
DBUG_ENTER("maria_rtree_split_page");
DBUG_PRINT("rtree", ("splitting block"));
@@ -339,16 +339,19 @@ int maria_rtree_split_page(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
memcpy(to - nod_flag, cur->key - nod_flag, full_length);
}
+ bzero(new_page, info->s->keypage_header);
+ _ma_store_keynr(info, new_page, keyinfo->key_nr);
_ma_store_page_used(info, page, info->s->keypage_header + n1 * full_length,
nod_flag);
_ma_store_page_used(info, new_page, info->s->keypage_header +
n2 * full_length, nod_flag);
- if ((*new_page_offs= _ma_new(info, keyinfo, DFLT_INIT_HITS)) ==
- HA_OFFSET_ERROR)
+ if ((*new_page_offs= _ma_new(info, DFLT_INIT_HITS, &page_link)) ==
+ HA_OFFSET_ERROR)
err_code= -1;
else
err_code= _ma_write_keypage(info, keyinfo, *new_page_offs,
+ page_link->write_lock,
DFLT_INIT_HITS, new_page);
DBUG_PRINT("rtree", ("split new block: %lu", (ulong) *new_page_offs));
diff --git a/storage/maria/ma_search.c b/storage/maria/ma_search.c
index 460d8367440..8eaff88155e 100644
--- a/storage/maria/ma_search.c
+++ b/storage/maria/ma_search.c
@@ -45,12 +45,17 @@ int _ma_check_index(MARIA_HA *info, int inx)
} /* _ma_check_index */
- /*
- ** Search after row by a key
- ** Position to row is stored in info->lastpos
- ** Return: -1 if not found
- ** 1 if one should continue search on higher level
- */
+/**
+ @breif Search after row by a key
+
+ @note
+ Position to row is stored in info->lastpos
+
+ @return
+ @retval 0 ok (key found)
+ @retval -1 Not found
+ @retval 1 If one should continue search on higher level
+*/
int _ma_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
uchar *key, uint key_len, uint nextflag, register my_off_t pos)
@@ -74,9 +79,10 @@ int _ma_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
DBUG_RETURN(1); /* Search at upper levels */
}
- if (!(buff= _ma_fetch_keypage(info,keyinfo,pos,DFLT_INIT_HITS,
- info->keyread_buff,
- test(!(nextflag & SEARCH_SAVE_BUFF)))))
+ if (!(buff= _ma_fetch_keypage(info,keyinfo, pos,
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ DFLT_INIT_HITS, info->keyread_buff,
+ test(!(nextflag & SEARCH_SAVE_BUFF)), 0)))
goto err;
DBUG_DUMP("page", buff, _ma_get_page_used(info, buff));
@@ -118,9 +124,10 @@ int _ma_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
if (pos != info->last_keypage)
{
uchar *old_buff=buff;
- if (!(buff= _ma_fetch_keypage(info,keyinfo,pos,DFLT_INIT_HITS,
+ if (!(buff= _ma_fetch_keypage(info,keyinfo, pos,
+ PAGECACHE_LOCK_LEFT_UNLOCKED,DFLT_INIT_HITS,
info->keyread_buff,
- test(!(nextflag & SEARCH_SAVE_BUFF)))))
+ test(!(nextflag & SEARCH_SAVE_BUFF)), 0)))
goto err;
keypos=buff+(keypos-old_buff);
maxpos=buff+(maxpos-old_buff);
@@ -174,8 +181,9 @@ err:
/* ret_pos point to where find or bigger key starts */
/* ARGSUSED */
-int _ma_bin_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, uchar *page,
- uchar *key, uint key_len, uint comp_flag, uchar **ret_pos,
+int _ma_bin_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
+ uchar *page, const uchar *key, uint key_len,
+ uint comp_flag, uchar **ret_pos,
uchar *buff __attribute__((unused)), my_bool *last_key)
{
int flag;
@@ -209,7 +217,7 @@ int _ma_bin_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, uchar *page,
(uchar*) key, key_len, comp_flag, not_used);
if (flag < 0)
start++; /* point at next, bigger key */
- *ret_pos=page+(uint) start*totlength;
+ *ret_pos= (char*) (page+(uint) start*totlength);
*last_key= end == save_end;
DBUG_PRINT("exit",("flag: %d keypos: %d",flag,start));
DBUG_RETURN(flag);
@@ -242,13 +250,14 @@ int _ma_bin_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, uchar *page,
< 0 Not found.
*/
-int _ma_seq_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, uchar *page,
- uchar *key, uint key_len, uint comp_flag, uchar **ret_pos,
+int _ma_seq_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
+ uchar *page, const uchar *key, uint key_len,
+ uint comp_flag, uchar **ret_pos,
uchar *buff, my_bool *last_key)
{
int flag;
uint nod_flag, length, used_length, not_used[2];
- uchar t_buff[HA_MAX_KEY_BUFF],*end;
+ uchar t_buff[HA_MAX_KEY_BUFF], *end;
DBUG_ENTER("_ma_seq_search");
LINT_INIT(flag);
@@ -257,7 +266,7 @@ int _ma_seq_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, uchar *page,
_ma_get_used_and_nod(info, page, used_length, nod_flag);
end= page + used_length;
page+= info->s->keypage_header + nod_flag;
- *ret_pos=page;
+ *ret_pos= (uchar*) page;
t_buff[0]=0; /* Avoid bugs */
while (page < end)
{
@@ -290,8 +299,9 @@ int _ma_seq_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, uchar *page,
int _ma_prefix_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
- uchar *page, uchar *key, uint key_len, uint nextflag,
- uchar **ret_pos, uchar *buff, my_bool *last_key)
+ uchar *page, const uchar *key, uint key_len,
+ uint nextflag, uchar **ret_pos, uchar *buff,
+ my_bool *last_key)
{
/*
my_flag is raw comparison result to be changed according to
@@ -303,10 +313,11 @@ int _ma_prefix_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
uint prefix_len,suffix_len;
int key_len_skip, seg_len_pack, key_len_left;
uchar *end;
- uchar *kseg, *vseg, *saved_vseg, *saved_from;
+ uchar *vseg, *saved_vseg, *saved_from;
uchar *sort_order= keyinfo->seg->charset->sort_order;
uchar tt_buff[HA_MAX_KEY_BUFF+2], *t_buff=tt_buff+2;
uchar *saved_to;
+ const uchar *kseg;
uint saved_length=0, saved_prefix_len=0;
uint length_pack;
DBUG_ENTER("_ma_prefix_search");
@@ -435,7 +446,7 @@ int _ma_prefix_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
{
/* We have to compare. But we can still skip part of the key */
uint left;
- uchar *k= kseg+prefix_len;
+ const uchar *k= kseg+prefix_len;
/*
If prefix_len > cmplen then we are in the end-space comparison
@@ -481,7 +492,7 @@ int _ma_prefix_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
else
{
/* We have to compare k and vseg as if they were space extended */
- uchar *k_end= k+ (cmplen - len);
+ const uchar *k_end= k+ (cmplen - len);
for ( ; k < k_end && *k == ' '; k++) ;
if (k == k_end)
goto cmp_rest; /* should never happen */
@@ -631,8 +642,8 @@ void _ma_kpointer(register MARIA_HA *info, register uchar *buff, my_off_t pos)
/* Calc pos to a data-record from a key */
-
-my_off_t _ma_dpos(MARIA_HA *info, uint nod_flag, const uchar *after_key)
+MARIA_RECORD_POS _ma_dpos(MARIA_HA *info, uint nod_flag,
+ const uchar *after_key)
{
my_off_t pos;
after_key-=(nod_flag + info->s->rec_reflength);
@@ -654,15 +665,16 @@ my_off_t _ma_dpos(MARIA_HA *info, uint nod_flag, const uchar *after_key)
default:
pos=0L; /* Shut compiler up */
}
- return ((info->s->data_file_type == STATIC_RECORD) ?
- pos * info->s->base.pack_reclength : pos);
+ return info->s->keypos_to_recpos(info, pos);
}
/* Calc position from a record pointer ( in delete link chain ) */
-my_off_t _ma_rec_pos(MARIA_SHARE *s, uchar *ptr)
+MARIA_RECORD_POS _ma_rec_pos(MARIA_HA *info, uchar *ptr)
{
+ MARIA_SHARE *s= info->s;
+
my_off_t pos;
switch (s->rec_reflength) {
#if SIZEOF_OFF_T > 4
@@ -711,18 +723,16 @@ my_off_t _ma_rec_pos(MARIA_SHARE *s, uchar *ptr)
break;
default: abort(); /* Impossible */
}
- return ((s->data_file_type == STATIC_RECORD) ?
- pos * s->base.pack_reclength : pos);
+ return (*s->keypos_to_recpos)(info, pos);
}
- /* save position to record */
+/* save position to record */
void _ma_dpointer(MARIA_HA *info, uchar *buff, my_off_t pos)
{
- if (info->s->data_file_type == STATIC_RECORD &&
- pos != HA_OFFSET_ERROR)
- pos/= info->s->base.pack_reclength;
+ if (pos != HA_OFFSET_ERROR)
+ pos= (*info->s->recpos_to_keypos)(info, pos);
switch (info->s->rec_reflength) {
#if SIZEOF_OFF_T > 4
@@ -748,16 +758,53 @@ void _ma_dpointer(MARIA_HA *info, uchar *buff, my_off_t pos)
} /* _ma_dpointer */
- /* Get key from key-block */
- /* page points at previous key; its advanced to point at next key */
- /* key should contain previous key */
- /* Returns length of found key + pointers */
- /* nod_flag is a flag if we are on nod */
+my_off_t _ma_static_keypos_to_recpos(MARIA_HA *info, my_off_t pos)
+{
+ return pos * info->s->base.pack_reclength;
+}
+
+
+my_off_t _ma_static_recpos_to_keypos(MARIA_HA *info, my_off_t pos)
+{
+ return pos / info->s->base.pack_reclength;
+}
+
+my_off_t _ma_transparent_recpos(MARIA_HA *info __attribute__((unused)),
+ my_off_t pos)
+{
+ return pos;
+}
+
+my_off_t _ma_transaction_keypos_to_recpos(MARIA_HA *info
+ __attribute__((unused)),
+ my_off_t pos)
+{
+ /* We need one bit to store if there is transid's after position */
+ return pos >> 1;
+}
+
+my_off_t _ma_transaction_recpos_to_keypos(MARIA_HA *info
+ __attribute__((unused)),
+ my_off_t pos)
+{
+ return pos << 1;
+}
+
+/*
+ @brief Get key from key-block
+
+ @param nod_flag Is set to nod length if we on nod
+ @param page Points at previous key; Its advanced to point at next key
+ @param key Should contain previous key
+
+ @notes
+ Same as _ma_get_key but used with fixed length keys
- /* same as _ma_get_key but used with fixed length keys */
+ @retval Returns length of found key + pointers
+ */
uint _ma_get_static_key(register MARIA_KEYDEF *keyinfo, uint nod_flag,
- register uchar **page, register uchar *key)
+ register uchar **page, uchar *key)
{
memcpy((uchar*) key,(uchar*) *page,
(size_t) (keyinfo->keylength+nod_flag));
@@ -1290,8 +1337,9 @@ int _ma_search_next(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
if (info->keyread_buff_used)
{
- if (!_ma_fetch_keypage(info,keyinfo,info->last_search_keypage,
- DFLT_INIT_HITS,info->keyread_buff,0))
+ if (!_ma_fetch_keypage(info, keyinfo, info->last_search_keypage,
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ DFLT_INIT_HITS, info->keyread_buff, 0, 0))
DBUG_RETURN(-1);
info->keyread_buff_used=0;
}
@@ -1360,8 +1408,8 @@ int _ma_search_first(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
do
{
- if (!_ma_fetch_keypage(info,keyinfo,pos,DFLT_INIT_HITS,
- info->keyread_buff,0))
+ if (!_ma_fetch_keypage(info, keyinfo, pos, PAGECACHE_LOCK_LEFT_UNLOCKED,
+ DFLT_INIT_HITS, info->keyread_buff, 0, 0))
{
info->cur_row.lastpos= HA_OFFSET_ERROR;
DBUG_RETURN(-1);
@@ -1409,7 +1457,8 @@ int _ma_search_last(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
do
{
uint used_length;
- if (!_ma_fetch_keypage(info,keyinfo,pos,DFLT_INIT_HITS,buff,0))
+ if (!_ma_fetch_keypage(info, keyinfo, pos, PAGECACHE_LOCK_LEFT_UNLOCKED,
+ DFLT_INIT_HITS, buff, 0, 0))
{
info->cur_row.lastpos= HA_OFFSET_ERROR;
DBUG_RETURN(-1);
@@ -1865,7 +1914,8 @@ void _ma_store_static_key(MARIA_KEYDEF *keyinfo __attribute__((unused)),
register uchar *key_pos,
register MARIA_KEY_PARAM *s_temp)
{
- memcpy((uchar*) key_pos,(uchar*) s_temp->key,(size_t) s_temp->totlength);
+ memcpy(key_pos, s_temp->key,(size_t) s_temp->totlength);
+ s_temp->changed_length= s_temp->totlength;
}
@@ -1881,9 +1931,7 @@ void _ma_store_var_pack_key(MARIA_KEYDEF *keyinfo __attribute__((unused)),
register MARIA_KEY_PARAM *s_temp)
{
uint length;
- uchar *start;
-
- start=key_pos;
+ uchar *org_key_pos= key_pos;
if (s_temp->ref_length)
{
@@ -1898,12 +1946,13 @@ void _ma_store_var_pack_key(MARIA_KEYDEF *keyinfo __attribute__((unused)),
/* Not packed against previous key */
store_pack_length(s_temp->pack_marker == 128,key_pos,s_temp->key_length);
}
- bmove((uchar*) key_pos,(uchar*) s_temp->key,
- (length=s_temp->totlength-(uint) (key_pos-start)));
+ bmove(key_pos, s_temp->key,
+ (length= s_temp->totlength - (uint) (key_pos-org_key_pos)));
+
+ key_pos+= length;
if (!s_temp->next_key_pos) /* No following key */
- return;
- key_pos+=length;
+ goto end;
if (s_temp->prev_length)
{
@@ -1921,19 +1970,25 @@ void _ma_store_var_pack_key(MARIA_KEYDEF *keyinfo __attribute__((unused)),
s_temp->n_length);
}
memcpy(key_pos, s_temp->prev_key, s_temp->prev_length);
+ key_pos+= s_temp->prev_length;
}
else if (s_temp->n_ref_length)
{
store_pack_length(s_temp->pack_marker == 128,key_pos,s_temp->n_ref_length);
- if (s_temp->n_ref_length == s_temp->pack_marker)
- return; /* Identical key */
- store_key_length(key_pos,s_temp->n_length);
+ if (s_temp->n_ref_length != s_temp->pack_marker)
+ {
+ /* Not identical key */
+ store_key_length_inc(key_pos,s_temp->n_length);
+ }
}
else
{
s_temp->n_length+= s_temp->store_not_null;
store_pack_length(s_temp->pack_marker == 128,key_pos,s_temp->n_length);
}
+
+end:
+ s_temp->changed_length= (uint) (key_pos - org_key_pos);
}
@@ -1943,17 +1998,21 @@ void _ma_store_bin_pack_key(MARIA_KEYDEF *keyinfo __attribute__((unused)),
register uchar *key_pos,
register MARIA_KEY_PARAM *s_temp)
{
+ uchar *org_key_pos= key_pos;
+ size_t length= s_temp->totlength - s_temp->ref_length;
+
store_key_length_inc(key_pos,s_temp->ref_length);
- memcpy((char*) key_pos,(char*) s_temp->key+s_temp->ref_length,
- (size_t) s_temp->totlength-s_temp->ref_length);
+ memcpy(key_pos, s_temp->key+s_temp->ref_length, length);
+ key_pos+= length;
if (s_temp->next_key_pos)
{
- key_pos+=(uint) (s_temp->totlength-s_temp->ref_length);
store_key_length_inc(key_pos,s_temp->n_ref_length);
if (s_temp->prev_length) /* If we must extend key */
{
memcpy(key_pos,s_temp->prev_key,s_temp->prev_length);
+ key_pos+= s_temp->prev_length;
}
}
+ s_temp->changed_length= (uint) (key_pos - org_key_pos);
}
diff --git a/storage/maria/ma_sort.c b/storage/maria/ma_sort.c
index 2b4a4b16923..e61019aeb83 100644
--- a/storage/maria/ma_sort.c
+++ b/storage/maria/ma_sort.c
@@ -138,8 +138,8 @@ int _ma_create_index_by_sort(MARIA_SORT_PARAM *info, my_bool no_messages,
while (memavl >= MIN_SORT_MEMORY)
{
- if ((records < UINT_MAX32) &&
- ((my_off_t) (records + 1) *
+ if ((records < UINT_MAX32) &&
+ ((my_off_t) (records + 1) *
(sort_length + sizeof(char*)) <= (my_off_t) memavl))
keys= (uint)records+1;
else
@@ -1055,4 +1055,3 @@ static int flush_maria_ft_buf(MARIA_SORT_PARAM *info)
}
return err;
}
-
diff --git a/storage/maria/ma_statrec.c b/storage/maria/ma_statrec.c
index 03ebc781104..b0422f067f5 100644
--- a/storage/maria/ma_statrec.c
+++ b/storage/maria/ma_statrec.c
@@ -30,7 +30,7 @@ my_bool _ma_write_static_record(MARIA_HA *info, const uchar *record)
info->s->state.dellink+1,
MYF(MY_NABP)))
goto err;
- info->s->state.dellink= _ma_rec_pos(info->s,temp);
+ info->s->state.dellink= _ma_rec_pos(info, temp);
info->state->del--;
info->state->empty-=info->s->base.pack_reclength;
if (info->s->file_write(info, record, info->s->base.reclength,
diff --git a/storage/maria/ma_test1.c b/storage/maria/ma_test1.c
index 1f487e01cd3..1a8a55e7f90 100644
--- a/storage/maria/ma_test1.c
+++ b/storage/maria/ma_test1.c
@@ -75,7 +75,7 @@ int main(int argc,char *argv[])
maria_data_root= ".";
/* Maria requires that we always have a page cache */
if (maria_init() ||
- (init_pagecache(maria_pagecache, IO_SIZE*16, 0, 0,
+ (init_pagecache(maria_pagecache, maria_block_size * 16, 0, 0,
maria_block_size) == 0) ||
ma_control_file_create_or_open() ||
(init_pagecache(maria_log_pagecache,
@@ -86,7 +86,7 @@ int main(int argc,char *argv[])
TRANSLOG_DEFAULT_FLAGS) ||
(transactional && (trnman_init(0) || ma_checkpoint_init(0))))
{
- fprintf(stderr, "Error in initialization");
+ fprintf(stderr, "Error in initialization\n");
exit(1);
}
@@ -214,7 +214,13 @@ static int run_test(const char *filename)
row_count=deleted=0;
for (i=49 ; i>=1 ; i-=2 )
{
- if (insert_count-- == 0) { VOID(maria_close(file)) ; exit(0) ; }
+ if (insert_count-- == 0)
+ {
+ if (testflag)
+ break;
+ VOID(maria_close(file));
+ exit(0);
+ }
j=i%25 +1;
create_record(record,j);
error=maria_write(file,record);
@@ -712,7 +718,7 @@ static struct my_option my_long_options[] =
(uchar**) &pagecacheing, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
{"key-length", 'k', "Undocumented", (uchar**) &key_length,
(uchar**) &key_length, 0, GET_UINT, REQUIRED_ARG, 6, 0, 0, 0, 0, 0},
- {"key-multiple", 'm', "Undocumented",
+ {"key-multiple", 'm', "Don't use unique keys",
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
{"key-prefix_pack", 'P', "Undocumented",
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
diff --git a/storage/maria/ma_test2.c b/storage/maria/ma_test2.c
index 6bc09fb1aee..d9a24eb7dff 100644
--- a/storage/maria/ma_test2.c
+++ b/storage/maria/ma_test2.c
@@ -51,7 +51,7 @@ static int verbose=0,testflag=0,
die_in_middle_of_transaction= 0;
static int pack_seg=HA_SPACE_PACK,pack_type=HA_PACK_KEY,remove_count=-1;
static int create_flag= 0, srand_arg= 0, checkpoint= 0;
-static ulong pagecache_size=IO_SIZE*16;
+static ulong pagecache_size=8192*32;
static enum data_file_type record_type= DYNAMIC_RECORD;
static uint keys=MARIA_KEYS,recant=1000;
@@ -253,14 +253,6 @@ int main(int argc, char *argv[])
for (i=0 ; i < recant ; i++)
{
ulong blob_length;
-#if 0
- /*
- Starting from i==72, there was a difference between runtime and
- log-applying. This is now fixed, by not using non_header_data_len in
- log-applying.
- */
- if (i == 72) goto end;
-#endif
n1=rnd(1000); n2=rnd(100); n3=rnd(5000);
sprintf((char*) record,"%6d:%4d:%8d:Pos: %4d ",n1,n2,n3,write_count);
int4store(record+STANDARD_LENGTH-4,(long) i);
@@ -1160,6 +1152,7 @@ static void put_blob_in_record(uchar *blob_pos, char **blob_buffer,
ulong *blob_length)
{
ulong i,length;
+ *blob_length= 0;
if (use_blob)
{
if (rnd(10) == 0)
@@ -1180,7 +1173,6 @@ static void put_blob_in_record(uchar *blob_pos, char **blob_buffer,
else
{
int4store(blob_pos,0);
- *blob_length= 0;
}
}
return;
diff --git a/storage/maria/ma_test_all.sh b/storage/maria/ma_test_all.sh
index f62dff8b09b..2c1cf659c94 100755
--- a/storage/maria/ma_test_all.sh
+++ b/storage/maria/ma_test_all.sh
@@ -119,7 +119,7 @@ run_tests()
$maria_path/maria_chk$suffix -sm test2
$maria_path/ma_test2$suffix $silent -m10000 -e16384 -E16384 -K -L $row_type
$maria_path/maria_chk$suffix -sm test2
- $maria_path/ma_test2$suffix $silent -M -T -c -b65000
+ $maria_path/ma_test2$suffix $silent -c -b65000 $row_type
$maria_path/maria_chk$suffix -se test2
}
diff --git a/storage/maria/ma_write.c b/storage/maria/ma_write.c
index 12f29c8ee1d..311597314b3 100644
--- a/storage/maria/ma_write.c
+++ b/storage/maria/ma_write.c
@@ -17,27 +17,69 @@
#include "ma_fulltext.h"
#include "ma_rt_index.h"
+#include "trnman.h"
+#include "ma_key_recover.h"
+#include "ma_blockrec.h"
#define MAX_POINTER_LENGTH 8
/* Functions declared in this file */
-static int w_search(MARIA_HA *info,MARIA_KEYDEF *keyinfo,
- uint comp_flag, uchar *key,
- uint key_length, my_off_t pos, uchar *father_buff,
- uchar *father_keypos, my_off_t father_page,
+static int w_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
+ uint comp_flag, uchar *key, uint key_length, my_off_t page,
+ my_off_t father_page, uchar *father_buff,
+ MARIA_PINNED_PAGE *father_page_link, uchar *father_keypos,
my_bool insert_last);
-static int _ma_balance_page(MARIA_HA *info,MARIA_KEYDEF *keyinfo,uchar *key,
- uchar *curr_buff,uchar *father_buff,
- uchar *father_keypos,my_off_t father_page);
+static int _ma_balance_page(MARIA_HA *info,MARIA_KEYDEF *keyinfo,
+ uchar *key, uchar *curr_buff, my_off_t page,
+ my_off_t father_page, uchar *father_buff,
+ uchar *father_keypos,
+ MARIA_KEY_PARAM *s_temp);
static uchar *_ma_find_last_pos(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
uchar *page, uchar *key,
uint *return_key_length, uchar **after_key);
-int _ma_ck_write_tree(register MARIA_HA *info, uint keynr,uchar *key,
- uint key_length);
-int _ma_ck_write_btree(register MARIA_HA *info, uint keynr,uchar *key,
- uint key_length);
+static int _ma_ck_write_tree(register MARIA_HA *info, uint keynr,uchar *key,
+ uint key_length);
+static int _ma_ck_write_btree(register MARIA_HA *info, uint keynr,uchar *key,
+ uint key_length);
+static int _ma_ck_write_btree_with_log(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ uchar *key, uint key_length,
+ my_off_t *root, uint comp_flag);
+static my_bool _ma_log_new(MARIA_HA *info, my_off_t page, uchar *buff,
+ uint page_length, uint key_nr, my_bool root_page);
+static my_bool _ma_log_add(MARIA_HA *info, my_off_t page, uchar *buff,
+ uchar *end_buff, uchar *key_pos,
+ uint changed_length, int move_length);
+static my_bool _ma_log_change(MARIA_HA *info, my_off_t page, uchar *buff,
+ uchar *key_pos, uint length);
+static my_bool _ma_log_split(MARIA_HA *info, my_off_t page, uchar *buff,
+ uint org_length, uint new_length,
+ uchar *key_pos,
+ uint key_length, int move_length,
+ enum en_key_op prefix_or_suffix,
+ uchar *data, uint data_length,
+ uint change_length);
+static my_bool _ma_log_del_prefix(MARIA_HA *info, my_off_t page, uchar *buff,
+ uint org_length, uint new_length,
+ uchar *key_pos, uint key_length,
+ int move_length);
+static my_bool _ma_log_key_middle(MARIA_HA *info, my_off_t page, uchar *buff,
+ uint new_length,
+ uint data_added_first,
+ uint data_changed_first,
+ uint data_deleted_last,
+ uchar *key_pos,
+ uint key_length, int move_length);
+static my_bool _ma_log_prefix(MARIA_HA *info, my_off_t page,
+ uchar *buff, uint changed_length,
+ int move_length);
+static my_bool _ma_log_suffix(MARIA_HA *info, my_off_t page,
+ uchar *buff, uint org_length,
+ uint new_length);
+/*
+ @brief Default handler for returing position to new row
+*/
MARIA_RECORD_POS _ma_write_init_default(MARIA_HA *info,
const uchar *record
@@ -300,31 +342,19 @@ int _ma_ck_write(MARIA_HA *info, uint keynr, uchar *key, uint key_length)
/**********************************************************************
- * Normal insert code *
- **********************************************************************/
+ Insert key into btree (normal case)
+**********************************************************************/
-int _ma_ck_write_btree(register MARIA_HA *info, uint keynr, uchar *key,
- uint key_length)
+static int _ma_ck_write_btree(register MARIA_HA *info, uint keynr, uchar *key,
+ uint key_length)
{
int error;
- uint comp_flag;
MARIA_KEYDEF *keyinfo=info->s->keyinfo+keynr;
my_off_t *root=&info->s->state.key_root[keynr];
DBUG_ENTER("_ma_ck_write_btree");
- if (keyinfo->flag & HA_SORT_ALLOWS_SAME)
- comp_flag=SEARCH_BIGGER; /* Put after same key */
- else if (keyinfo->flag & (HA_NOSAME|HA_FULLTEXT))
- {
- comp_flag=SEARCH_FIND | SEARCH_UPDATE; /* No duplicates */
- if (keyinfo->flag & HA_NULL_ARE_EQUAL)
- comp_flag|= SEARCH_NULL_ARE_EQUAL;
- }
- else
- comp_flag=SEARCH_SAME; /* Keys in rec-pos order */
-
- error= _ma_ck_real_write_btree(info, keyinfo, key, key_length,
- root, comp_flag);
+ error= _ma_ck_write_btree_with_log(info, keyinfo, key, key_length,
+ root, keyinfo->write_comp_flag);
if (info->ft1_to_ft2)
{
if (!error)
@@ -337,30 +367,124 @@ int _ma_ck_write_btree(register MARIA_HA *info, uint keynr, uchar *key,
} /* _ma_ck_write_btree */
+/**
+ @brief Write a key to the b-tree
+
+ @retval -1 error
+ @retval 0 ok
+*/
+
+static int _ma_ck_write_btree_with_log(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ uchar *key, uint key_length,
+ my_off_t *root, uint comp_flag)
+{
+ LSN lsn= LSN_IMPOSSIBLE;
+ int error;
+ my_off_t new_root= *root;
+#ifdef NOT_YET
+ DBUG_ENTER("_ma_ck_write_btree_with_log");
+#endif
+
+ error= _ma_ck_real_write_btree(info, keyinfo, key, key_length, &new_root,
+ comp_flag);
+ if (!error && info->s->now_transactional)
+ {
+ uchar log_data[LSN_STORE_SIZE + FILEID_STORE_SIZE +
+ KEY_NR_STORE_SIZE];
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
+ struct st_msg_to_write_hook_for_undo_key msg;
+
+ lsn_store(log_data, info->trn->undo_lsn);
+ key_nr_store(log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE,
+ keyinfo->key_nr);
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].str= (char*) key;
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].length= key_length;
+
+ msg.root= root;
+ msg.value= new_root;
+
+ if (translog_write_record(&lsn, LOGREC_UNDO_KEY_INSERT,
+ info->trn, info,
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length +
+ key_length,
+ TRANSLOG_INTERNAL_PARTS + 2, log_array,
+ log_data + LSN_STORE_SIZE, &msg))
+ error= -1;
+ }
+ else
+ {
+ *root= new_root;
+ _ma_fast_unlock_key_del(info);
+ }
+ _ma_unpin_all_pages_and_finalize_row(info, lsn);
+
+#ifdef NOT_YET
+ DBUG_RETURN(error);
+#else
+ return(error);
+#endif
+} /* _ma_ck_write_btree_with_log */
+
+
+
+/**
+ @brief Write a key to the b-tree
+
+ @retval -1 error
+ @retval 0 ok
+*/
+
int _ma_ck_real_write_btree(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
uchar *key, uint key_length, my_off_t *root,
uint comp_flag)
{
int error;
DBUG_ENTER("_ma_ck_real_write_btree");
+
/* key_length parameter is used only if comp_flag is SEARCH_FIND */
if (*root == HA_OFFSET_ERROR ||
- (error=w_search(info, keyinfo, comp_flag, key, key_length,
- *root, (uchar*) 0, (uchar*) 0,
- (my_off_t) 0, 1)) > 0)
- error= _ma_enlarge_root(info,keyinfo,key,root);
+ (error= w_search(info, keyinfo, comp_flag, key, key_length,
+ *root, (my_off_t) 0, (uchar*) 0,
+ (MARIA_PINNED_PAGE *) 0, (uchar*) 0, 1)) > 0)
+ error= _ma_enlarge_root(info, keyinfo, key, root);
DBUG_RETURN(error);
} /* _ma_ck_real_write_btree */
- /* Make a new root with key as only pointer */
+/*
+ @brief write hook for undo key insert
+*/
+
+my_bool write_hook_for_undo_key(enum translog_record_type type,
+ TRN *trn, MARIA_HA *tbl_info,
+ LSN *lsn, void *hook_arg)
+{
+ struct st_msg_to_write_hook_for_undo_key *msg=
+ (struct st_msg_to_write_hook_for_undo_key *) hook_arg;
+
+ *msg->root= msg->value;
+ _ma_fast_unlock_key_del(tbl_info);
+ return write_hook_for_undo(type, trn, tbl_info, lsn, 0);
+}
+
+
+/**
+ @brief Make a new root with key as only pointer
+
+ @retval -1 error
+ @retval 0 ok
+*/
-int _ma_enlarge_root(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *key,
+int _ma_enlarge_root(MARIA_HA *info, MARIA_KEYDEF *keyinfo, const uchar *key,
my_off_t *root)
{
- uint t_length,nod_flag;
+ uint t_length, nod_flag, page_length;
MARIA_KEY_PARAM s_temp;
MARIA_SHARE *share=info->s;
+ MARIA_PINNED_PAGE tmp_page_link, *page_link= &tmp_page_link;
+ int res= 0;
DBUG_ENTER("_ma_enlarge_root");
nod_flag= (*root != HA_OFFSET_ERROR) ? share->base.key_reflength : 0;
@@ -368,18 +492,36 @@ int _ma_enlarge_root(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *key,
_ma_kpointer(info, info->buff + info->s->keypage_header, *root);
t_length=(*keyinfo->pack_key)(keyinfo,nod_flag,(uchar*) 0,
(uchar*) 0, (uchar*) 0, key,&s_temp);
+ page_length= info->s->keypage_header + t_length + nod_flag;
- _ma_store_keynr(info, info->buff, (keyinfo - info->s->keyinfo));
- _ma_store_page_used(info, info->buff, info->s->keypage_header +
- t_length + nod_flag, nod_flag);
+ bzero(info->buff, info->s->keypage_header);
+ _ma_store_keynr(info, info->buff, keyinfo->key_nr);
+ _ma_store_page_used(info, info->buff, page_length, nod_flag);
(*keyinfo->store_key)(keyinfo, info->buff + info->s->keypage_header +
nod_flag, &s_temp);
+
/* Mark that info->buff was used */
info->keyread_buff_used= info->page_changed= 1;
- if ((*root= _ma_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR ||
- _ma_write_keypage(info,keyinfo,*root,DFLT_INIT_HITS,info->buff))
+ if ((*root= _ma_new(info, PAGECACHE_PRIORITY_HIGH, &page_link)) ==
+ HA_OFFSET_ERROR)
DBUG_RETURN(-1);
- DBUG_RETURN(0);
+
+ /*
+ Clear unitialized part of page to avoid valgrind/purify warnings
+ and to get a clean page that is easier to compress and compare with
+ pages generated with redo
+ */
+ bzero(info->buff + page_length, share->block_size - page_length);
+
+
+ if (info->s->now_transactional &&
+ _ma_log_new(info, *root, info->buff, page_length, keyinfo->key_nr, 1))
+ res= -1;
+ if (_ma_write_keypage(info, keyinfo, *root, page_link->write_lock,
+ PAGECACHE_PRIORITY_HIGH, info->buff))
+ res= -1;
+
+ DBUG_RETURN(res);
} /* _ma_enlarge_root */
@@ -389,13 +531,14 @@ int _ma_enlarge_root(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *key,
@return
@retval -1 error
@retval 0 ok
- @retval 1 key should be stored in higher tree
+ @retval > 0 Key should be stored in higher tree
*/
static int w_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
uint comp_flag, uchar *key, uint key_length, my_off_t page,
- uchar *father_buff, uchar *father_keypos,
- my_off_t father_page, my_bool insert_last)
+ my_off_t father_page, uchar *father_buff,
+ MARIA_PINNED_PAGE *father_page_link, uchar *father_keypos,
+ my_bool insert_last)
{
int error,flag;
uint nod_flag, search_key_length;
@@ -403,6 +546,7 @@ static int w_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
uchar keybuff[HA_MAX_KEY_BUFF];
my_bool was_last_key;
my_off_t next_page, dup_key_pos;
+ MARIA_PINNED_PAGE *page_link;
DBUG_ENTER("w_search");
DBUG_PRINT("enter",("page: %ld", (long) page));
@@ -410,7 +554,8 @@ static int w_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
if (!(temp_buff= (uchar*) my_alloca((uint) keyinfo->block_length+
HA_MAX_KEY_BUFF*2)))
DBUG_RETURN(-1);
- if (!_ma_fetch_keypage(info,keyinfo,page,DFLT_INIT_HITS,temp_buff,0))
+ if (!_ma_fetch_keypage(info, keyinfo, page, PAGECACHE_LOCK_WRITE,
+ DFLT_INIT_HITS, temp_buff, 0, &page_link))
goto err;
flag=(*keyinfo->bin_search)(info,keyinfo,temp_buff,key,search_key_length,
@@ -457,13 +602,19 @@ static int w_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
DBUG_ASSERT(subkeys < 0);
ft_intXstore(keypos, subkeys);
if (!error)
- error= _ma_write_keypage(info,keyinfo,page,DFLT_INIT_HITS,temp_buff);
+ {
+ page_link->changed= 1;
+ error= _ma_write_keypage(info, keyinfo, page,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED,
+ DFLT_INIT_HITS, temp_buff);
+ }
my_afree((uchar*) temp_buff);
DBUG_RETURN(error);
}
}
else /* not HA_FULLTEXT, normal HA_NOSAME key */
{
+ DBUG_PRINT("warning", ("Duplicate key"));
info->dup_key_pos= dup_key_pos;
my_afree((uchar*) temp_buff);
my_errno=HA_ERR_FOUND_DUPP_KEY;
@@ -476,12 +627,15 @@ static int w_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
insert_last=0;
next_page= _ma_kpos(nod_flag,keypos);
if (next_page == HA_OFFSET_ERROR ||
- (error=w_search(info, keyinfo, comp_flag, key, key_length, next_page,
- temp_buff, keypos, page, insert_last)) >0)
+ (error= w_search(info, keyinfo, comp_flag, key, key_length, next_page,
+ page, temp_buff, page_link, keypos, insert_last)) > 0)
{
- error= _ma_insert(info,keyinfo,key,temp_buff,keypos,keybuff,father_buff,
- father_keypos,father_page, insert_last);
- if (_ma_write_keypage(info,keyinfo,page,DFLT_INIT_HITS,temp_buff))
+ error= _ma_insert(info, keyinfo, key, temp_buff, keypos, page, keybuff,
+ father_page, father_buff, father_page_link,
+ father_keypos, insert_last);
+ page_link->changed= 1;
+ if (_ma_write_keypage(info, keyinfo, page, PAGECACHE_LOCK_LEFT_WRITELOCKED,
+ DFLT_INIT_HITS,temp_buff))
goto err;
}
my_afree((uchar*) temp_buff);
@@ -500,9 +654,10 @@ err:
_ma_insert()
info Open table information.
keyinfo Key definition information.
- key New key.
+ key New key
anc_buff Key page (beginning).
key_pos Position in key page where to insert.
+ anc_page Page number for anc_buff
key_buff Copy of previous key.
father_buff parent key page for balancing.
father_key_pos position in parent key page for balancing.
@@ -511,20 +666,23 @@ err:
DESCRIPTION
Insert new key at right of key_pos.
+ Note that caller must save anc_buff
RETURN
- 2 if key contains key to upper level.
- 0 OK.
< 0 Error.
+ 0 OK
+ 1 If key contains key to upper level (from balance page)
+ 2 If key contains key to upper level (from split space)
*/
int _ma_insert(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
- uchar *key, uchar *anc_buff, uchar *key_pos, uchar *key_buff,
- uchar *father_buff, uchar *father_key_pos, my_off_t father_page,
- my_bool insert_last)
+ uchar *key, uchar *anc_buff, uchar *key_pos, my_off_t anc_page,
+ uchar *key_buff, my_off_t father_page, uchar *father_buff,
+ MARIA_PINNED_PAGE *father_page_link, uchar *father_key_pos,
+ my_bool insert_last)
{
- uint a_length,nod_flag;
- int t_length;
+ uint a_length, nod_flag, org_anc_length;
+ int t_length, res;
uchar *endpos, *prev_key;
MARIA_KEY_PARAM s_temp;
DBUG_ENTER("_ma_insert");
@@ -533,6 +691,7 @@ int _ma_insert(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
USE_WHOLE_KEY););
_ma_get_used_and_nod(info, anc_buff, a_length, nod_flag);
+ org_anc_length= a_length;
endpos= anc_buff+ a_length;
prev_key= (key_pos == anc_buff + info->s->keypage_header + nod_flag ?
(uchar*) 0 : key_buff);
@@ -577,6 +736,11 @@ int _ma_insert(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
(*keyinfo->store_key)(keyinfo,key_pos,&s_temp);
a_length+=t_length;
_ma_store_page_used(info, anc_buff, a_length, nod_flag);
+
+ /*
+ Check if the new key fits totally into the the page
+ (anc_buff is big enough to contain a full page + one key)
+ */
if (a_length <= (uint) keyinfo->block_length - KEYPAGE_CHECKSUM_SIZE)
{
if (keyinfo->block_length - KEYPAGE_CHECKSUM_SIZE - a_length < 32 &&
@@ -588,8 +752,8 @@ int _ma_insert(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
Normal word. One-level tree. Page is almost full.
Let's consider converting.
We'll compare 'key' and the first key at anc_buff
- */
- uchar *a= key, *b= anc_buff + info->s->keypage_header + nod_flag;
+ */
+ const uchar *a= key, *b= anc_buff + info->s->keypage_header + nod_flag;
uint alen, blen, ft2len=info->s->ft2_keyinfo.keylength;
/* the very first key on the page is always unpacked */
DBUG_ASSERT((*b & 128) == 0);
@@ -621,7 +785,7 @@ int _ma_insert(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
*/
b+=blen+ft2len+2;
for (a=anc_buff+a_length ; b < a ; b+=ft2len+2)
- insert_dynamic(info->ft1_to_ft2, b);
+ insert_dynamic(info->ft1_to_ft2, (uchar*) b);
/* fixing the page's length - it contains only one key now */
_ma_store_page_used(info, anc_buff, info->s->keypage_header + blen +
@@ -630,6 +794,13 @@ int _ma_insert(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
/* the rest will be done when we're back from recursion */
}
}
+ else
+ {
+ if (info->s->now_transactional &&
+ _ma_log_add(info, anc_page, anc_buff, endpos, key_pos,
+ s_temp.changed_length, t_length))
+ DBUG_RETURN(-1);
+ }
DBUG_RETURN(0); /* There is room on page */
}
/* Page is full */
@@ -637,29 +808,59 @@ int _ma_insert(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
insert_last=0;
if (!(keyinfo->flag & (HA_VAR_LENGTH_KEY | HA_BINARY_PACK_KEY)) &&
father_buff && !insert_last)
- DBUG_RETURN(_ma_balance_page(info,keyinfo,key,anc_buff,father_buff,
- father_key_pos,father_page));
- DBUG_RETURN(_ma_split_page(info,keyinfo,key,anc_buff,key_buff, insert_last));
+ {
+ s_temp.key_pos= key_pos;
+ s_temp.move_length= t_length;
+ father_page_link->changed= 1;
+ DBUG_RETURN(_ma_balance_page(info, keyinfo, key, anc_buff, anc_page,
+ father_page, father_buff, father_key_pos,
+ &s_temp));
+ }
+
+ res= _ma_split_page(info,keyinfo,key,anc_buff,key_buff, insert_last);
+ if (res < 0)
+ DBUG_RETURN(res); /* Error */
+
+ if (info->s->now_transactional)
+ {
+ if (_ma_log_split(info, anc_page, anc_buff, org_anc_length,
+ _ma_get_page_used(info, anc_buff),
+ key_pos,
+ s_temp.changed_length,
+ t_length, KEY_OP_NONE, (uchar*) 0, 0, 0))
+ res= -1;
+ }
+ DBUG_RETURN(res);
} /* _ma_insert */
- /* split a full page in two and assign emerging item to key */
+/**
+ @brief split a full page in two and assign emerging item to key
+
+ RETURN
+ @retval 0 ok
+ @retval -1 error
+*/
int _ma_split_page(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
uchar *key, uchar *buff, uchar *key_buff,
my_bool insert_last_key)
{
uint length,a_length,key_ref_length,t_length,nod_flag,key_length;
- uchar *key_pos,*pos, *after_key;
+ uint page_length;
+ uchar *key_pos,*pos, *after_key, *new_buff;
my_off_t new_pos;
MARIA_KEY_PARAM s_temp;
+ MARIA_PINNED_PAGE tmp_page_link, *page_link= &tmp_page_link;
+ int res;
DBUG_ENTER("maria_split_page");
+
LINT_INIT(after_key);
DBUG_DUMP("buff", buff, _ma_get_page_used(info, buff));
- if (info->s->keyinfo+info->lastinx == keyinfo)
- info->page_changed=1; /* Info->buff is used */
+ info->page_changed=1; /* Info->buff is used */
info->keyread_buff_used=1;
+ new_buff= info->buff;
nod_flag=_ma_test_if_nod(info, buff);
key_ref_length= info->s->keypage_header + nod_flag;
if (insert_last_key)
@@ -680,12 +881,13 @@ int _ma_split_page(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
{
DBUG_PRINT("test",("Splitting nod"));
pos=key_pos-nod_flag;
- memcpy((uchar*) info->buff + info->s->keypage_header, (uchar*) pos,
+ memcpy((uchar*) new_buff + info->s->keypage_header, (uchar*) pos,
(size_t) nod_flag);
}
- /* Move middle item to key and pointer to new page */
- if ((new_pos= _ma_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR)
+ /* Move middle item to key and pointer to new page */
+ if ((new_pos= _ma_new(info, PAGECACHE_PRIORITY_HIGH, &page_link)) ==
+ HA_OFFSET_ERROR)
DBUG_RETURN(-1);
_ma_kpointer(info, _ma_move_key(keyinfo,key,key_buff),new_pos);
@@ -697,21 +899,30 @@ int _ma_split_page(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
(uchar*) 0, (uchar*) 0,
key_buff, &s_temp);
length=(uint) ((buff+a_length)-key_pos);
- memcpy((uchar*) info->buff+key_ref_length+t_length,(uchar*) key_pos,
+ memcpy((uchar*) new_buff+key_ref_length+t_length,(uchar*) key_pos,
(size_t) length);
- (*keyinfo->store_key)(keyinfo,info->buff+key_ref_length,&s_temp);
- _ma_store_page_used(info, info->buff, length + t_length + key_ref_length,
- nod_flag);
+ (*keyinfo->store_key)(keyinfo,new_buff+key_ref_length,&s_temp);
+ page_length= length + t_length + key_ref_length;
+ bzero(new_buff, info->s->keypage_header);
+ _ma_store_page_used(info, new_buff, page_length, nod_flag);
/* Copy key number */
- info->buff[info->s->keypage_header - KEYPAGE_USED_SIZE - KEYPAGE_KEYID_SIZE -
+ new_buff[info->s->keypage_header - KEYPAGE_USED_SIZE - KEYPAGE_KEYID_SIZE -
KEYPAGE_FLAG_SIZE]=
buff[info->s->keypage_header - KEYPAGE_USED_SIZE - KEYPAGE_KEYID_SIZE -
KEYPAGE_FLAG_SIZE];
- if (_ma_write_keypage(info,keyinfo,new_pos,DFLT_INIT_HITS,info->buff))
- DBUG_RETURN(-1);
+
+ res= 2; /* Middle key up */
+ if (info->s->now_transactional &&
+ _ma_log_new(info, new_pos, new_buff, page_length, keyinfo->key_nr, 0))
+ res= -1;
+ bzero(new_buff + page_length, info->s->block_size - page_length);
+
+ if (_ma_write_keypage(info, keyinfo, new_pos, page_link->write_lock,
+ DFLT_INIT_HITS, new_buff))
+ res= -1;
DBUG_DUMP("key",(uchar*) key, _ma_keylength(keyinfo,key));
- DBUG_RETURN(2); /* Middle key up */
+ DBUG_RETURN(res);
} /* _ma_split_page */
@@ -821,18 +1032,33 @@ static uchar *_ma_find_last_pos(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
} /* _ma_find_last_pos */
- /* Balance page with not packed keys with page on right/left */
- /* returns 0 if balance was done */
+/**
+ @brief Balance page with static size keys with page on right/left
+
+ @param key Middle key will be stored here
+
+ @notes
+ Father_buff will always be changed
+
+ @return
+ @retval 0 Balance was done
+ @retval 1 Middle key up
+ @retval -1 Error
+*/
static int _ma_balance_page(register MARIA_HA *info, MARIA_KEYDEF *keyinfo,
- uchar *key, uchar *curr_buff, uchar *father_buff,
- uchar *father_key_pos, my_off_t father_page)
+ uchar *key, uchar *curr_buff,
+ my_off_t curr_page,
+ my_off_t father_page, uchar *father_buff,
+ uchar *father_key_pos, MARIA_KEY_PARAM *s_temp)
{
+ MARIA_PINNED_PAGE *next_page_link;
+ MARIA_PINNED_PAGE tmp_page_link, *new_page_link= &tmp_page_link;
my_bool right;
- uint k_length,father_length,father_keylength,nod_flag,curr_keylength,
- right_length,left_length,new_right_length,new_left_length,extra_length,
- length,keys;
- uchar *pos,*buff,*extra_buff;
+ uint k_length,father_length,father_keylength,nod_flag,curr_keylength;
+ uint right_length,left_length,new_right_length,new_left_length,extra_length;
+ uint keys, tmp_length, extra_buff_length;
+ uchar *pos,*buff,*extra_buff, *parting_key;
my_off_t next_page,new_pos;
uchar tmp_part_key[HA_MAX_KEY_BUFF];
DBUG_ENTER("_ma_balance_page");
@@ -853,24 +1079,26 @@ static int _ma_balance_page(register MARIA_HA *info, MARIA_KEYDEF *keyinfo,
next_page= _ma_kpos(info->s->base.key_reflength,
father_key_pos+father_keylength);
buff=info->buff;
- DBUG_PRINT("test",("use right page: %lu", (ulong) next_page));
+ DBUG_PRINT("info", ("use right page: %lu", (ulong) next_page));
}
else
{
right=0;
father_key_pos-=father_keylength;
next_page= _ma_kpos(info->s->base.key_reflength,father_key_pos);
- /* Fix that curr_buff is to left */
- buff=curr_buff; curr_buff=info->buff;
- DBUG_PRINT("test",("use left page: %lu", (ulong) next_page));
+ /* Move curr_buff so that it's on the left */
+ buff= curr_buff;
+ curr_buff= info->buff;
+ DBUG_PRINT("info", ("use left page: %lu", (ulong) next_page));
} /* father_key_pos ptr to parting key */
- if (!_ma_fetch_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,info->buff,0))
+ if (!_ma_fetch_keypage(info,keyinfo, next_page, PAGECACHE_LOCK_WRITE,
+ DFLT_INIT_HITS, info->buff, 0, &next_page_link))
goto err;
+ next_page_link->changed= 1;
DBUG_DUMP("next", info->buff, _ma_get_page_used(info, info->buff));
- /* Test if there is room to share keys */
-
+ /* Test if there is room to share keys */
left_length= _ma_get_page_used(info, curr_buff);
right_length= _ma_get_page_used(info, buff);
keys= ((left_length+right_length-info->s->keypage_header*2-nod_flag*2)/
@@ -878,15 +1106,23 @@ static int _ma_balance_page(register MARIA_HA *info, MARIA_KEYDEF *keyinfo,
if ((right ? right_length : left_length) + curr_keylength <=
(uint) keyinfo->block_length - KEYPAGE_CHECKSUM_SIZE)
- { /* Merge buffs */
+ {
+ /* Enough space to hold all keys in the two buffers ; Balance bufferts */
new_left_length= info->s->keypage_header+nod_flag+(keys/2)*curr_keylength;
new_right_length=info->s->keypage_header+nod_flag+(((keys+1)/2)*
curr_keylength);
- _ma_store_page_used(info, curr_buff,new_left_length,nod_flag);
- _ma_store_page_used(info, buff,new_right_length,nod_flag);
+ _ma_store_page_used(info, curr_buff, new_left_length, nod_flag);
+ _ma_store_page_used(info, buff, new_right_length, nod_flag);
+ DBUG_PRINT("info", ("left_length: %u -> %u right_length: %u -> %u",
+ left_length, new_left_length,
+ right_length, new_right_length));
if (left_length < new_left_length)
- { /* Move keys buff -> leaf */
+ {
+ uint length;
+ DBUG_PRINT("info", ("move keys to end of buff"));
+
+ /* Move keys buff -> curr_buff */
pos=curr_buff+left_length;
memcpy(pos,father_key_pos, (size_t) k_length);
memcpy(pos+k_length, buff + info->s->keypage_header,
@@ -894,30 +1130,150 @@ static int _ma_balance_page(register MARIA_HA *info, MARIA_KEYDEF *keyinfo,
pos= buff + info->s->keypage_header + length;
memcpy(father_key_pos, pos, (size_t) k_length);
bmove(buff + info->s->keypage_header, pos + k_length, new_right_length);
+
+ if (info->s->now_transactional)
+ {
+ if (right)
+ {
+ /*
+ Log changes to page on left
+ The original page is on the left and stored in curr_buff
+ We have on the page the newly inserted key and data
+ from buff added last on the page
+ */
+ if (_ma_log_split(info, curr_page, curr_buff,
+ left_length - s_temp->move_length,
+ new_left_length,
+ s_temp->key_pos, s_temp->changed_length,
+ s_temp->move_length,
+ KEY_OP_ADD_SUFFIX,
+ curr_buff + left_length,
+ new_left_length - left_length,
+ new_left_length - left_length+ k_length))
+ goto err;
+ /*
+ Log changes to page on right
+ This contains the original data with some keys deleted from
+ start of page
+ */
+ if (_ma_log_prefix(info, next_page, buff, 0,
+ ((int) new_right_length - (int) right_length)))
+ goto err;
+ }
+ else
+ {
+ /*
+ Log changes to page on right (the original page) which is in buff
+ Data is removed from start of page
+ The inserted key may be in buff or moved to curr_buff
+ */
+ if (_ma_log_del_prefix(info, curr_page, buff,
+ right_length - s_temp->changed_length,
+ new_right_length,
+ s_temp->key_pos, s_temp->changed_length,
+ s_temp->move_length))
+ goto err;
+ /*
+ Log changes to page on left, which has new data added last
+ */
+ if (_ma_log_suffix(info, next_page, curr_buff,
+ left_length, new_left_length))
+ goto err;
+ }
+ }
}
else
- { /* Move keys -> buff */
+ {
+ uint length;
+ DBUG_PRINT("info", ("move keys to start of buff"));
bmove_upp(buff + new_right_length, buff + right_length,
right_length - info->s->keypage_header);
- length=new_right_length-right_length-k_length;
+ length= new_right_length -right_length - k_length;
memcpy(buff + info->s->keypage_header + length, father_key_pos,
(size_t) k_length);
pos=curr_buff+new_left_length;
memcpy(father_key_pos, pos, (size_t) k_length);
- memcpy(buff + info->s->keypage_header, pos+k_length,
- (size_t) length);
+ memcpy(buff + info->s->keypage_header, pos+k_length, (size_t) length);
+
+ if (info->s->now_transactional)
+ {
+ if (right)
+ {
+ /*
+ Log changes to page on left
+ The original page is on the left and stored in curr_buff
+ The page is shortened from end and the key may be on the page
+ */
+ if (_ma_log_split(info, curr_page, curr_buff,
+ left_length - s_temp->move_length,
+ new_left_length,
+ s_temp->key_pos, s_temp->changed_length,
+ s_temp->move_length,
+ KEY_OP_NONE, (uchar*) 0, 0, 0))
+ goto err;
+ /*
+ Log changes to page on right
+ This contains the original data, with some data from cur_buff
+ added first
+ */
+ if (_ma_log_prefix(info, next_page, buff,
+ (uint) (new_right_length - right_length),
+ (int) (new_right_length - right_length)))
+ goto err;
+ }
+ else
+ {
+ /*
+ Log changes to page on right (the original page) which is in buff
+ We have on the page the newly inserted key and data
+ from buff added first on the page
+ */
+ uint diff_length= new_right_length - right_length;
+ if (_ma_log_split(info, curr_page, buff,
+ left_length - s_temp->move_length,
+ new_right_length,
+ s_temp->key_pos + diff_length,
+ s_temp->changed_length,
+ s_temp->move_length,
+ KEY_OP_ADD_PREFIX,
+ buff + info->s->keypage_header,
+ diff_length, diff_length + k_length))
+ goto err;
+ /*
+ Log changes to page on left, which is shortened from end
+ */
+ if (_ma_log_suffix(info, next_page, curr_buff,
+ left_length, new_left_length))
+ goto err;
+ }
+ }
}
- if (_ma_write_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,info->buff) ||
- _ma_write_keypage(info,keyinfo,father_page,DFLT_INIT_HITS,father_buff))
+ /* Log changes to father (one level up) page */
+
+ if (info->s->now_transactional &&
+ _ma_log_change(info, father_page, father_buff, father_key_pos,
+ k_length))
+ goto err;
+
+ /*
+ next_page_link->changed is marked as true above and fathers
+ page_link->changed is marked as true in caller
+ */
+ if (_ma_write_keypage(info, keyinfo, next_page,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED,
+ DFLT_INIT_HITS, info->buff) ||
+ _ma_write_keypage(info, keyinfo, father_page,
+ PAGECACHE_LOCK_LEFT_WRITELOCKED, DFLT_INIT_HITS,
+ father_buff))
goto err;
DBUG_RETURN(0);
}
- /* curr_buff[] and buff[] are full, lets split and make new nod */
+ /* curr_buff[] and buff[] are full, lets split and make new nod */
- extra_buff=info->buff+info->s->base.max_key_block_length;
+ extra_buff= info->buff+info->s->base.max_key_block_length;
new_left_length= new_right_length= (info->s->keypage_header + nod_flag +
(keys+1) / 3 * curr_keylength);
/*
@@ -927,51 +1283,141 @@ static int _ma_balance_page(register MARIA_HA *info, MARIA_KEYDEF *keyinfo,
*/
if (keys == 5) /* Too few keys to balance */
new_left_length-=curr_keylength;
- extra_length=nod_flag+left_length+right_length-
- new_left_length-new_right_length-curr_keylength;
+ extra_length= (nod_flag + left_length + right_length -
+ new_left_length - new_right_length - curr_keylength);
+ extra_buff_length= extra_length + info->s->keypage_header;
DBUG_PRINT("info",("left_length: %d right_length: %d new_left_length: %d new_right_length: %d extra_length: %d",
- left_length, right_length,
- new_left_length, new_right_length,
- extra_length));
+ left_length, right_length,
+ new_left_length, new_right_length,
+ extra_length));
_ma_store_page_used(info, curr_buff,new_left_length,nod_flag);
_ma_store_page_used(info, buff,new_right_length,nod_flag);
- /* Copy key number */
+
bzero(extra_buff, info->s->keypage_header);
extra_buff[info->s->keypage_header - KEYPAGE_USED_SIZE - KEYPAGE_KEYID_SIZE -
KEYPAGE_FLAG_SIZE]=
buff[info->s->keypage_header - KEYPAGE_USED_SIZE - KEYPAGE_KEYID_SIZE -
KEYPAGE_FLAG_SIZE];
- _ma_store_page_used(info, extra_buff, extra_length + info->s->keypage_header,
- nod_flag);
+ _ma_store_page_used(info, extra_buff, extra_buff_length, nod_flag);
/* move first largest keys to new page */
pos=buff+right_length-extra_length;
- memcpy(extra_buff + info->s->keypage_header, pos,
- (size_t) extra_length);
- /* Save new parting key */
+ memcpy(extra_buff + info->s->keypage_header, pos, extra_length);
+ /* Zero old data from buffer */
+ bzero(extra_buff + extra_buff_length,
+ info->s->block_size - extra_buff_length);
+
+ /* Save new parting key between buff and extra_buff */
memcpy(tmp_part_key, pos-k_length,k_length);
/* Make place for new keys */
bmove_upp(buff+ new_right_length, pos - k_length,
- right_length - extra_length - k_length - info->s->keypage_header);
+ right_length - extra_length - k_length - info->s->keypage_header);
/* Copy keys from left page */
pos= curr_buff+new_left_length;
memcpy(buff + info->s->keypage_header, pos + k_length,
- (size_t) (length=left_length-new_left_length-k_length));
+ (size_t) (tmp_length= left_length - new_left_length - k_length));
/* Copy old parting key */
- memcpy(buff + info->s->keypage_header + length,
- father_key_pos, (size_t) k_length);
+ parting_key= buff + info->s->keypage_header + tmp_length;
+ memcpy(parting_key, father_key_pos, (size_t) k_length);
/* Move new parting keys up to caller */
memcpy((right ? key : father_key_pos),pos,(size_t) k_length);
memcpy((right ? father_key_pos : key),tmp_part_key, k_length);
- if ((new_pos= _ma_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR)
+ if ((new_pos= _ma_new(info, DFLT_INIT_HITS, &new_page_link))
+ == HA_OFFSET_ERROR)
goto err;
_ma_kpointer(info,key+k_length,new_pos);
- if (_ma_write_keypage(info,keyinfo,(right ? new_pos : next_page),
- DFLT_INIT_HITS,info->buff) ||
- _ma_write_keypage(info,keyinfo,(right ? next_page : new_pos),
- DFLT_INIT_HITS,extra_buff))
+
+ if (info->s->now_transactional)
+ {
+ if (right)
+ {
+ /*
+ Page order according to key values:
+ orignal_page (curr_buff), next_page (buff), extra_buff
+
+ cur_buff is shortened,
+ buff is getting new keys at start and shortened from end.
+ extra_buff is new page
+
+ Note that extra_buff (largest key parts) will be stored at the
+ place of the original 'right' page (next_page) and right page (buff)
+ will be stored at new_pos.
+
+ This makes the log entries smaller as right_page contains all
+ data to generate the data extra_buff
+ */
+
+ /*
+ Log changes to page on left (page shortened page at end)
+ */
+ if (_ma_log_split(info, curr_page, curr_buff,
+ left_length, new_left_length,
+ s_temp->key_pos, s_temp->changed_length,
+ s_temp->move_length,
+ KEY_OP_NONE, (uchar*) 0, 0, 0))
+ goto err;
+ /*
+ Log changes to right page (stored at next page)
+ This contains the last 'extra_buff' from 'buff'
+ */
+ if (_ma_log_prefix(info, next_page, extra_buff,
+ 0, (int) (extra_length - right_length)))
+ goto err;
+
+ /*
+ Log changes to middle page, which is stored at the new page
+ position
+ */
+ if (_ma_log_new(info, new_pos, buff, new_right_length,
+ keyinfo->key_nr, 0))
+ goto err;
+ }
+ else
+ {
+ /*
+ Log changes to page on right (the original page) which is in buff
+ This contains the original data, with some data from curr_buff
+ added first and shortened at end
+ */
+ int data_added_first= left_length - new_left_length;
+ if (_ma_log_key_middle(info, curr_page, buff,
+ new_right_length,
+ data_added_first,
+ data_added_first,
+ extra_length,
+ s_temp->key_pos,
+ s_temp->changed_length,
+ s_temp->move_length))
+ goto err;
+
+ /* Log changes to page on left, which is shortened from end */
+ if (_ma_log_suffix(info, next_page, curr_buff,
+ left_length, new_left_length))
+ goto err;
+
+ /* Log change to rightmost (new) page */
+ if (_ma_log_new(info, new_pos, extra_buff,
+ extra_buff_length, keyinfo->key_nr, 0))
+ goto err;
+ }
+
+ /* Log changes to father (one level up) page */
+ if (info->s->now_transactional &&
+ _ma_log_change(info, father_page, father_buff, father_key_pos,
+ k_length))
+ goto err;
+ }
+
+ if (_ma_write_keypage(info, keyinfo, (right ? new_pos : next_page),
+ (right ? new_page_link->write_lock :
+ PAGECACHE_LOCK_LEFT_WRITELOCKED),
+ DFLT_INIT_HITS, info->buff) ||
+ _ma_write_keypage(info, keyinfo, (right ? next_page : new_pos),
+ (!right ? new_page_link->write_lock :
+ PAGECACHE_LOCK_LEFT_WRITELOCKED),
+ DFLT_INIT_HITS, extra_buff))
goto err;
DBUG_RETURN(1); /* Middle key up */
@@ -980,6 +1426,7 @@ err:
DBUG_RETURN(-1);
} /* _ma_balance_page */
+
/**********************************************************************
* Bulk insert code *
**********************************************************************/
@@ -990,15 +1437,16 @@ typedef struct {
} bulk_insert_param;
-int _ma_ck_write_tree(register MARIA_HA *info, uint keynr, uchar *key,
- uint key_length)
+static int _ma_ck_write_tree(register MARIA_HA *info, uint keynr, uchar *key,
+ uint key_length)
{
int error;
DBUG_ENTER("_ma_ck_write_tree");
- error= tree_insert(&info->bulk_insert[keynr], key,
- key_length + info->s->rec_reflength,
- info->bulk_insert[keynr].custom_arg) ? 0 : HA_ERR_OUT_OF_MEM ;
+ error= (tree_insert(&info->bulk_insert[keynr], key,
+ key_length + info->s->rec_reflength,
+ info->bulk_insert[keynr].custom_arg) ? 0 :
+ HA_ERR_OUT_OF_MEM) ;
DBUG_RETURN(error);
} /* _ma_ck_write_tree */
@@ -1037,8 +1485,8 @@ static int keys_free(uchar *key, TREE_FREE mode, bulk_insert_param *param)
keyinfo=param->info->s->keyinfo+param->keynr;
keylen= _ma_keylength(keyinfo, key);
memcpy(lastkey, key, keylen);
- return _ma_ck_write_btree(param->info,param->keynr,lastkey,
- keylen - param->info->s->rec_reflength);
+ return _ma_ck_write_btree(param->info, param->keynr, lastkey,
+ keylen - param->info->s->rec_reflength);
case free_end:
if (param->info->s->concurrent_insert)
rw_unlock(&param->info->s->key_root_lock[param->keynr]);
@@ -1128,12 +1576,670 @@ void maria_end_bulk_insert(MARIA_HA *info)
for (i=0 ; i < info->s->base.keys ; i++)
{
if (is_tree_inited(& info->bulk_insert[i]))
- {
- delete_tree(& info->bulk_insert[i]);
- }
+ delete_tree(&info->bulk_insert[i]);
}
- my_free((void *)info->bulk_insert, MYF(0));
+ my_free(info->bulk_insert, MYF(0));
info->bulk_insert=0;
}
DBUG_VOID_RETURN;
}
+
+
+/****************************************************************************
+ Dedicated functions that generate log entries
+****************************************************************************/
+
+/**
+ @brief Log creation of new page
+
+ @note
+ We don't have to store the page_length into the log entry as we can
+ calculate this from the length of the log entry
+
+ @retval 1 error
+ @retval 0 ok
+*/
+
+static my_bool _ma_log_new(MARIA_HA *info, my_off_t page, uchar *buff,
+ uint page_length, uint key_nr, my_bool root_page)
+{
+ LSN lsn;
+ uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2 + KEY_NR_STORE_SIZE
+ +1];
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
+ MARIA_SHARE *share=info->s;
+ DBUG_ENTER("_ma_log_new");
+ DBUG_PRINT("enter", ("page: %lu", (ulong) page));
+
+ DBUG_ASSERT(info->s->now_transactional);
+
+ /* Store address of new root page */
+ page/= info->s->block_size;
+ page_store(log_data + FILEID_STORE_SIZE, page);
+
+ /* Store link to next unused page */
+ if (info->used_key_del == 2)
+ page= 0; /* key_del not changed */
+ else
+ page= ((share->current_key_del == HA_OFFSET_ERROR) ? IMPOSSIBLE_PAGE_NO :
+ share->current_key_del / info->s->block_size);
+
+ page_store(log_data + FILEID_STORE_SIZE + PAGE_STORE_SIZE, page);
+ key_nr_store(log_data + FILEID_STORE_SIZE + PAGE_STORE_SIZE*2, key_nr);
+ log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE*2 + KEY_NR_STORE_SIZE]=
+ (uchar) root_page;
+
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
+
+ page_length-= LSN_STORE_SIZE;
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].str= buff + LSN_STORE_SIZE;
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].length= page_length;
+
+ if (translog_write_record(&lsn, LOGREC_REDO_INDEX_NEW_PAGE,
+ info->trn, info, sizeof(log_data) + page_length,
+ TRANSLOG_INTERNAL_PARTS + 2, log_array,
+ log_data, NULL))
+ DBUG_RETURN(1);
+ DBUG_RETURN(0);
+}
+
+
+/**
+ @brief
+ Log that a key was added to the page
+*/
+
+static my_bool _ma_log_add(MARIA_HA *info, my_off_t page, uchar *buff,
+ uchar *end_buff, uchar *key_pos,
+ uint changed_length, int move_length)
+{
+ LSN lsn;
+ uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 3 + 3 + 3], *log_pos;
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
+ DBUG_ENTER("_ma_log_add");
+ DBUG_PRINT("enter", ("page: %lu", (ulong) page));
+
+ DBUG_ASSERT(info->s->now_transactional);
+
+ /*
+ Write REDO entry that contains the logical operations we need
+ to do the page
+ */
+ log_pos= log_data + FILEID_STORE_SIZE;
+ page/= info->s->block_size;
+ page_store(log_pos, page);
+ log_pos+= PAGE_STORE_SIZE;
+
+ if (key_pos == end_buff)
+ log_pos[0]= KEY_OP_ADD_SUFFIX;
+ else
+ {
+ uint offset= (uint) (key_pos - buff);
+ log_pos[0]= KEY_OP_OFFSET;
+ int2store(log_pos+1, offset);
+ log_pos+= 3;
+ if (move_length)
+ {
+ log_pos[0]= KEY_OP_SHIFT;
+ int2store(log_pos+1, move_length);
+ log_pos+= 3;
+ }
+ log_pos[0]= KEY_OP_CHANGE;
+ }
+ int2store(log_pos+1, changed_length);
+ log_pos+= 3;
+
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (uint) (log_pos -
+ log_data);
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].str= key_pos;
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].length= changed_length;
+
+ if (translog_write_record(&lsn, LOGREC_REDO_INDEX,
+ info->trn, info,
+ log_array[TRANSLOG_INTERNAL_PARTS +
+ 0].length + changed_length,
+ TRANSLOG_INTERNAL_PARTS + 2, log_array,
+ log_data, NULL))
+ DBUG_RETURN(-1);
+ DBUG_RETURN(0);
+}
+
+
+/**
+ @brief
+ Log when some part of the key page changes
+*/
+
+static my_bool _ma_log_change(MARIA_HA *info, my_off_t page, uchar *buff,
+ uchar *key_pos, uint length)
+{
+ LSN lsn;
+ uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 6], *log_pos;
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
+ uint offset= (uint) (key_pos - buff);
+ DBUG_ENTER("_ma_log_change");
+ DBUG_PRINT("enter", ("page: %lu", (ulong) page));
+
+ DBUG_ASSERT(info->s->now_transactional);
+
+ /* Store address of new root page */
+ page/= info->s->block_size;
+ page_store(log_data + FILEID_STORE_SIZE, page);
+ log_pos= log_data+ FILEID_STORE_SIZE + PAGE_STORE_SIZE;
+ log_pos[0]= KEY_OP_OFFSET;
+ int2store(log_pos+1, offset);
+ log_pos[3]= KEY_OP_CHANGE;
+ int2store(log_pos+4, length);
+
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].str= buff + offset;
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].length= length;
+
+ if (translog_write_record(&lsn, LOGREC_REDO_INDEX,
+ info->trn, info, sizeof(log_data) + length,
+ TRANSLOG_INTERNAL_PARTS + 2, log_array,
+ log_data, NULL))
+ DBUG_RETURN(1);
+ DBUG_RETURN(0);
+}
+
+
+/**
+ @brief
+ Write log entry for page that has got a key added to the page under
+ one and only one of the following senarios:
+ - Page is shortened from end
+ - Data is added to end of page
+ - Data added at front of page
+
+ @param prefix_or_suffix KEY_OP_NONE Ignored
+ KEY_OP_ADD_PREFIX Add data to start of page
+ KEY_OP_ADD_SUFFIX Add data to end of page
+
+*/
+
+static my_bool _ma_log_split(MARIA_HA *info, my_off_t page, uchar *buff,
+ uint org_length, uint new_length,
+ uchar *key_pos, uint key_length, int move_length,
+ enum en_key_op prefix_or_suffix,
+ uchar *data, uint data_length,
+ uint change_length)
+{
+ LSN lsn;
+ uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 3+3+3+3+3+2];
+ uchar *log_pos;
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 3];
+ uint offset= (uint) (key_pos - buff);
+ uint translog_parts, extra_length;
+ DBUG_ENTER("_ma_log_split");
+ DBUG_PRINT("enter", ("page: %lu org_length: %u new_length: %u",
+ (ulong) page, org_length, new_length));
+
+ log_pos= log_data + FILEID_STORE_SIZE;
+ page/= info->s->block_size;
+ page_store(log_pos, page);
+ log_pos+= PAGE_STORE_SIZE;
+
+ if (new_length <= offset)
+ {
+ /*
+ Page was split before inserted key. Write redo entry where
+ we just cut current page at page_length
+ */
+ uint length_offset= org_length - new_length;
+ log_pos[0]= KEY_OP_DEL_SUFFIX;
+ int2store(log_pos+1, length_offset);
+ log_pos+= 3;
+ translog_parts= 1;
+ extra_length= 0;
+ }
+ else
+ {
+ /* Key was added to page which was split after the inserted key */
+ uint max_key_length;
+
+ /*
+ Handle case when split happened directly after the newly inserted key.
+ */
+ max_key_length= new_length - offset;
+ extra_length= min(key_length, max_key_length);
+
+ if ((int) new_length < (int) (org_length + move_length + data_length))
+ {
+ /* Shorten page */
+ uint diff= org_length + move_length + data_length - new_length;
+ log_pos[0]= KEY_OP_DEL_SUFFIX;
+ int2store(log_pos + 1, diff);
+ log_pos+= 3;
+ }
+ else
+ {
+ DBUG_ASSERT(new_length == org_length + move_length + data_length);
+ }
+
+ log_pos[0]= KEY_OP_OFFSET;
+ int2store(log_pos+1, offset);
+ log_pos+= 3;
+
+ if (move_length)
+ {
+ log_pos[0]= KEY_OP_SHIFT;
+ int2store(log_pos+1, move_length);
+ log_pos+= 3;
+ }
+
+ log_pos[0]= KEY_OP_CHANGE;
+ int2store(log_pos+1, extra_length);
+ log_pos+= 3;
+
+ /* Point to original inserted key data */
+ if (prefix_or_suffix == KEY_OP_ADD_PREFIX)
+ key_pos+= data_length;
+
+ translog_parts= 2;
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].str= (char*) key_pos;
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].length= extra_length;
+ }
+
+ if (data_length)
+ {
+ /* Add prefix or suffix */
+ log_pos[0]= prefix_or_suffix;
+ int2store(log_pos+1, data_length);
+ log_pos+= 3;
+ if (prefix_or_suffix == KEY_OP_ADD_PREFIX)
+ {
+ int2store(log_pos+1, change_length);
+ log_pos+= 2;
+ data_length= change_length;
+ }
+ log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].str= (char*) data;
+ log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].length= data_length;
+ translog_parts++;
+ extra_length+= data_length;
+ }
+
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (uint) (log_pos -
+ log_data);
+ DBUG_RETURN(translog_write_record(&lsn, LOGREC_REDO_INDEX,
+ info->trn, info,
+ log_array[TRANSLOG_INTERNAL_PARTS +
+ 0].length +
+ extra_length,
+ TRANSLOG_INTERNAL_PARTS + translog_parts,
+ log_array, log_data, NULL));
+}
+
+
+/**
+ @brief
+ Write log entry for page that has got a key added to the page
+ and page is shortened from start of page
+
+ @fn _ma_log_del_prefix()
+ @param info Maria handler
+ @param page Page number
+ @param buff Page buffer
+ @param org_length Length of buffer when read
+ @param new_length Final length
+ @param key_pos Where on page buffer key was added. This is position
+ before prefix was removed
+ @param key_length How many bytes was changed at 'key_pos'
+ @param move_length How many bytes was moved up when key was added
+
+ @return
+ @retval 0 ok
+ @retval 1 error
+*/
+
+static my_bool _ma_log_del_prefix(MARIA_HA *info, my_off_t page, uchar *buff,
+ uint org_length, uint new_length,
+ uchar *key_pos, uint key_length,
+ int move_length)
+{
+ LSN lsn;
+ uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 12], *log_pos;
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
+ uint offset= (uint) (key_pos - buff);
+ uint diff_length= org_length + move_length - new_length;
+ uint translog_parts, extra_length;
+ DBUG_ENTER("_ma_log_del_prefix");
+ DBUG_PRINT("enter", ("page: %lu org_length: %u new_length: %u",
+ (ulong) page, org_length, new_length));
+
+ DBUG_ASSERT((int) diff_length > 0);
+
+ log_pos= log_data + FILEID_STORE_SIZE;
+ page/= info->s->block_size;
+ page_store(log_pos, page);
+ log_pos+= PAGE_STORE_SIZE;
+
+ translog_parts= 1;
+ extra_length= 0;
+
+ if (offset <= diff_length)
+ {
+ /*
+ Key is not anymore on page. Move data down, but take into account that
+ the original page had grown with 'move_length bytes'
+ */
+ log_pos[0]= KEY_OP_DEL_PREFIX;
+ int2store(log_pos+1, diff_length - move_length);
+ log_pos+= 3;
+ }
+ else
+ {
+ /*
+ Correct position to key, as data before key has been delete and key
+ has thus been moved down
+ */
+ offset-= diff_length;
+ key_pos-= diff_length;
+
+ /* Move data down */
+ log_pos[0]= KEY_OP_DEL_PREFIX;
+ int2store(log_pos+1, diff_length);
+ log_pos+= 3;
+
+ log_pos[0]= KEY_OP_OFFSET;
+ int2store(log_pos+1, offset);
+ log_pos+= 3;
+
+ if (move_length)
+ {
+ log_pos[0]= KEY_OP_SHIFT;
+ int2store(log_pos+1, move_length);
+ log_pos+= 3;
+ }
+ log_pos[0]= KEY_OP_CHANGE;
+ int2store(log_pos+1, key_length);
+ log_pos+= 3;
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].str= (char*) key_pos;
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].length= key_length;
+ translog_parts= 2;
+ extra_length= key_length;
+ }
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (uint) (log_pos -
+ log_data);
+ DBUG_RETURN(translog_write_record(&lsn, LOGREC_REDO_INDEX,
+ info->trn, info,
+ log_array[TRANSLOG_INTERNAL_PARTS +
+ 0].length +
+ extra_length,
+ TRANSLOG_INTERNAL_PARTS + translog_parts,
+ log_array, log_data, NULL));
+}
+
+
+/**
+ @brief
+ Write log entry for page that has got data added first and
+ data deleted last. Old changed key may be part of page
+*/
+
+static my_bool _ma_log_key_middle(MARIA_HA *info, my_off_t page, uchar *buff,
+ uint new_length,
+ uint data_added_first,
+ uint data_changed_first,
+ uint data_deleted_last,
+ uchar *key_pos,
+ uint key_length, int move_length)
+{
+ LSN lsn;
+ uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 3+5+3+3+3];
+ uchar *log_pos;
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 4];
+ uint key_offset;
+ uint translog_parts, extra_length;
+ DBUG_ENTER("_ma_log_key_middle");
+ DBUG_PRINT("enter", ("page: %lu", (ulong) page));
+
+ /* new place of key after changes */
+ key_pos+= data_added_first;
+ key_offset= (uint) (key_pos - buff);
+ if (key_offset < new_length)
+ {
+ /* key is on page; Calculate how much of the key is there */
+ uint max_key_length= new_length - key_offset;
+ if (max_key_length < key_length)
+ {
+ /* Key is last on page */
+ key_length= max_key_length;
+ move_length= 0;
+ }
+ /*
+ Take into account that new data was added as part of original key
+ that also needs to be removed from page
+ */
+ data_deleted_last+= move_length;
+ }
+
+ page/= info->s->block_size;
+
+ /* First log changes to page */
+ log_pos= log_data + FILEID_STORE_SIZE;
+ page_store(log_pos, page);
+ log_pos+= PAGE_STORE_SIZE;
+
+ log_pos[0]= KEY_OP_DEL_SUFFIX;
+ int2store(log_pos+1, data_deleted_last);
+ log_pos+= 3;
+
+ log_pos[0]= KEY_OP_ADD_PREFIX;
+ int2store(log_pos+1, data_added_first);
+ int2store(log_pos+3, data_changed_first);
+ log_pos+= 5;
+
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (uint) (log_pos -
+ log_data);
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].str= ((char*) buff +
+ info->s->keypage_header);
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].length= data_changed_first;
+ translog_parts= 2;
+ extra_length= data_changed_first;
+
+ /* If changed key is on page, log those changes too */
+
+ if (key_offset < new_length)
+ {
+ uchar *start_log_pos= log_pos;
+
+ log_pos[0]= KEY_OP_OFFSET;
+ int2store(log_pos+1, key_offset);
+ log_pos+= 3;
+ if (move_length)
+ {
+ log_pos[0]= KEY_OP_SHIFT;
+ int2store(log_pos+1, move_length);
+ log_pos+= 3;
+ }
+ log_pos[0]= KEY_OP_CHANGE;
+ int2store(log_pos+1, key_length);
+ log_pos+= 3;
+
+ log_array[TRANSLOG_INTERNAL_PARTS + 2].str= (char*) start_log_pos;
+ log_array[TRANSLOG_INTERNAL_PARTS + 2].length= (uint) (log_pos -
+ start_log_pos);
+
+ log_array[TRANSLOG_INTERNAL_PARTS + 3].str= (char*) key_pos;
+ log_array[TRANSLOG_INTERNAL_PARTS + 3].length= key_length;
+ translog_parts+=2;
+ extra_length+= log_array[TRANSLOG_INTERNAL_PARTS + 2].length + key_length;
+ }
+
+ DBUG_RETURN(translog_write_record(&lsn, LOGREC_REDO_INDEX,
+ info->trn, info,
+ log_array[TRANSLOG_INTERNAL_PARTS +
+ 0].length + extra_length,
+ TRANSLOG_INTERNAL_PARTS + translog_parts,
+ log_array, log_data, NULL));
+}
+
+
+/**
+ @brief
+ Write log entry for page that has got data added or deleted at start of page
+*/
+
+static my_bool _ma_log_prefix(MARIA_HA *info, my_off_t page,
+ uchar *buff, uint changed_length,
+ int move_length)
+{
+ uint translog_parts;
+ LSN lsn;
+ uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 7], *log_pos;
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
+ DBUG_ENTER("_ma_log_prefix");
+ DBUG_PRINT("enter", ("page: %lu change_length: %u move_length: %d",
+ (ulong) page, changed_length, move_length));
+
+ page/= info->s->block_size;
+ log_pos= log_data + FILEID_STORE_SIZE;
+ page_store(log_pos, page);
+ log_pos+= PAGE_STORE_SIZE;
+
+ if (move_length < 0)
+ {
+ /* Delete prefix */
+ DBUG_ASSERT(changed_length == 0);
+ log_pos[0]= KEY_OP_DEL_PREFIX;
+ int2store(log_pos+1, -move_length);
+ log_pos+= 3;
+ translog_parts= 1;
+ }
+ else
+ {
+ /* Add prefix */
+ DBUG_ASSERT(changed_length >0 && (int) changed_length >= move_length);
+ log_pos[0]= KEY_OP_ADD_PREFIX;
+ int2store(log_pos+1, move_length);
+ int2store(log_pos+3, changed_length);
+ log_pos+= 5;
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].str= ((char*) buff +
+ info->s->keypage_header);
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].length= changed_length;
+ translog_parts= 2;
+ }
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (uint) (log_pos -
+ log_data);
+
+ DBUG_RETURN(translog_write_record(&lsn, LOGREC_REDO_INDEX,
+ info->trn, info,
+ log_array[TRANSLOG_INTERNAL_PARTS +
+ 0].length + changed_length,
+ TRANSLOG_INTERNAL_PARTS + translog_parts,
+ log_array, log_data, NULL));
+}
+
+
+/**
+ @brief
+ Write log entry for page that has got data added or deleted at end of page
+*/
+
+static my_bool _ma_log_suffix(MARIA_HA *info, my_off_t page,
+ uchar *buff, uint org_length,
+ uint new_length)
+{
+ LSN lsn;
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
+ uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 10], *log_pos;
+ int diff;
+ uint translog_parts, extra_length;
+ DBUG_ENTER("_ma_log_suffix");
+ DBUG_PRINT("enter", ("page: %lu org_length: %u new_length: %u",
+ (ulong) page, org_length, new_length));
+
+ page/= info->s->block_size;
+
+ log_pos= log_data + FILEID_STORE_SIZE;
+ page_store(log_pos, page);
+ log_pos+= PAGE_STORE_SIZE;
+
+ if ((diff= (int) (new_length - org_length)) < 0)
+ {
+ log_pos[0]= KEY_OP_DEL_SUFFIX;
+ int2store(log_pos+1, -diff);
+ log_pos+= 3;
+ translog_parts= 1;
+ extra_length= 0;
+ }
+ else
+ {
+ log_pos[0]= KEY_OP_ADD_SUFFIX;
+ int2store(log_pos+1, diff);
+ log_pos+= 3;
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].str= (char*) buff + org_length;
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].length= (uint) diff;
+ translog_parts= 2;
+ extra_length= (uint) diff;
+ }
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (uint) (log_pos -
+ log_data);
+
+ DBUG_RETURN(translog_write_record(&lsn, LOGREC_REDO_INDEX,
+ info->trn, info,
+ log_array[TRANSLOG_INTERNAL_PARTS +
+ 0].length + extra_length,
+ TRANSLOG_INTERNAL_PARTS + translog_parts,
+ log_array, log_data, NULL));
+}
+
+
+#ifdef NOT_NEEDED
+
+/**
+ @brief
+ Write log entry for page that has got data added first and
+ data deleted last
+*/
+
+static my_bool _ma_log_middle(MARIA_HA *info, my_off_t page,
+ uchar *buff,
+ uint data_added_first, uint data_changed_first,
+ uint data_deleted_last)
+{
+ LSN lsn;
+ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
+ uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 3 + 5], *log_pos;
+ DBUG_ENTER("_ma_log_middle");
+ DBUG_PRINT("enter", ("page: %lu", (ulong) page));
+
+ page/= info->s->block_size;
+
+ log_pos= log_data + FILEID_STORE_SIZE;
+ page_store(log_pos, page);
+ log_pos+= PAGE_STORE_SIZE;
+
+ log_pos[0]= KEY_OP_DEL_PREFIX;
+ int2store(log_pos+1, data_deleted_last);
+ log_pos+= 3;
+
+ log_pos[0]= KEY_OP_ADD_PREFIX;
+ int2store(log_pos+1, data_added_first);
+ int2store(log_pos+3, data_changed_first);
+ log_pos+= 5;
+
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data;
+ log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (uint) (log_pos -
+ log_data);
+
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].str= ((char*) buff +
+ info->s->keypage_header);
+ log_array[TRANSLOG_INTERNAL_PARTS + 1].length= data_changed_first;
+ DBUG_RETURN(translog_write_record(&lsn, LOGREC_REDO_INDEX,
+ info->trn, info,
+ log_array[TRANSLOG_INTERNAL_PARTS +
+ 0].length + data_changed_first,
+ TRANSLOG_INTERNAL_PARTS + 2,
+ log_array, log_data, NULL));
+}
+#endif
diff --git a/storage/maria/maria_chk.c b/storage/maria/maria_chk.c
index 60b83caa2f1..d36b440541b 100644
--- a/storage/maria/maria_chk.c
+++ b/storage/maria/maria_chk.c
@@ -66,7 +66,7 @@ static const char *field_pack[]=
};
static const char *record_formats[]=
-{
+{
"Fixed length", "Packed", "Compressed", "Block", "?"
};
@@ -1255,6 +1255,8 @@ static void descript(HA_CHECK *param, register MARIA_HA *info, char *name)
printf("\nMARIA file: %s\n",name);
printf("Record format: %s\n", record_formats[share->data_file_type]);
+ printf("Crashsafe: %s\n",
+ share->base.born_transactional ? "yes" : "no");
printf("Character set: %s (%d)\n",
get_charset_name(share->state.header.language),
share->state.header.language);
diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h
index 5351ffdab16..98f076970d4 100644
--- a/storage/maria/maria_def.h
+++ b/storage/maria/maria_def.h
@@ -289,10 +289,14 @@ typedef struct st_maria_share
/* Compare a row in memory with a row on disk */
my_bool (*compare_unique)(MARIA_HA *, MARIA_UNIQUEDEF *,
const uchar *record, MARIA_RECORD_POS pos);
+ my_off_t (*keypos_to_recpos)(MARIA_HA *info, my_off_t pos);
+ my_off_t (*recpos_to_keypos)(MARIA_HA *info, my_off_t pos);
+
/* Mapings to read/write the data file */
size_t (*file_read)(MARIA_HA *, uchar *, size_t, my_off_t, myf);
size_t (*file_write)(MARIA_HA *, const uchar *, size_t, my_off_t, myf);
invalidator_by_filename invalidator; /* query cache invalidator */
+ my_off_t current_key_del; /* delete links for index pages */
ulong this_process; /* processid */
ulong last_process; /* For table-change-check */
ulong last_version; /* Version on start */
@@ -333,9 +337,11 @@ typedef struct st_maria_share
(FALSE, TRUE) is impossible.
*/
my_bool now_transactional;
+ my_bool used_key_del; /* != 0 if key_del is locked */
#ifdef THREAD
THR_LOCK lock;
pthread_mutex_t intern_lock; /* Locking for use with _locking */
+ pthread_cond_t intern_cond;
rw_lock_t *key_root_lock;
#endif
my_off_t mmaped_length;
@@ -382,6 +388,7 @@ typedef struct st_maria_row
MARIA_RECORD_POS lastpos, nextpos;
MARIA_RECORD_POS *tail_positions;
ha_checksum checksum;
+ LSN orig_undo_lsn; /* Lsn at start of row insert */
uchar *empty_bits, *field_lengths;
uint *null_field_lengths; /* All null field lengths */
ulong *blob_lengths; /* Length for each blob */
@@ -464,7 +471,7 @@ struct st_maria_handler
enum ha_rkey_function last_key_func; /* CONTAIN, OVERLAP, etc */
uint save_lastkey_length;
uint pack_key_length; /* For MARIAMRG */
- uint16 last_used_keyseg; /* For MARIAMRG */
+ myf lock_wait; /* is 0 or MY_DONT_WAIT */
int errkey; /* Got last error on this key */
int lock_type; /* How database was locked */
int tmp_lock_type; /* When locked by readinfo */
@@ -472,10 +479,12 @@ struct st_maria_handler
uint save_update; /* When using KEY_READ */
int save_lastinx;
uint preload_buff_size; /* When preloading indexes */
- myf lock_wait; /* is 0 or MY_DONT_WAIT */
+ uint16 last_used_keyseg; /* For MARIAMRG */
+ uint8 used_key_del; /* != 0 if key_del is used */
my_bool was_locked; /* Was locked in panic */
my_bool append_insert_at_end; /* Set if concurrent insert */
my_bool quick_mode;
+ /* Marker if key_del_changed */
/* If info->keyread_buff can't be used for rnext */
my_bool page_changed;
/* If info->keyread_buff has to be re-read for rnext */
@@ -625,6 +634,9 @@ struct st_maria_handler
#define MARIA_MIN_ROWS_TO_DISABLE_INDEXES 100
#define MARIA_MIN_ROWS_TO_USE_WRITE_CACHE 10
+/* Marker for impossible delete link */
+#define IMPOSSIBLE_PAGE_NO LL(0xFFFFFFFFFF)
+
/* The UNIQUE check is done with a hashed long key */
#define MARIA_UNIQUE_HASH_TYPE HA_KEYTYPE_ULONG_INT
@@ -654,10 +666,13 @@ extern my_bool maria_inited;
/* This is used by _ma_calc_xxx_key_length och _ma_store_key */
typedef struct st_maria_s_param
{
- uint ref_length, key_length, n_ref_length;
- uint n_length, totlength, part_of_prev_key, prev_length, pack_marker;
const uchar *key;
uchar *prev_key, *next_key_pos;
+ uchar *key_pos; /* For balance page */
+ uint ref_length, key_length, n_ref_length;
+ uint n_length, totlength, part_of_prev_key, prev_length, pack_marker;
+ uint changed_length;
+ int move_length; /* For balance_page */
bool store_not_null;
} MARIA_KEY_PARAM;
@@ -666,7 +681,8 @@ typedef struct st_maria_s_param
typedef struct st_pinned_page
{
PAGECACHE_BLOCK_LINK *link;
- enum pagecache_page_lock unlock;
+ enum pagecache_page_lock unlock, write_lock;
+ my_bool changed;
} MARIA_PINNED_PAGE;
@@ -692,15 +708,16 @@ extern my_bool _ma_delete_static_record(MARIA_HA *info, const uchar *record);
extern my_bool _ma_cmp_static_record(MARIA_HA *info, const uchar *record);
extern int _ma_ck_write(MARIA_HA *info, uint keynr, uchar *key,
uint length);
+extern int _ma_enlarge_root(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ const uchar *key, MARIA_RECORD_POS *root);
+extern int _ma_insert(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
+ uchar *key, uchar *anc_buff, uchar *key_pos,
+ my_off_t anc_page, uchar *key_buff, my_off_t father_page,
+ uchar *father_buff, MARIA_PINNED_PAGE *father_page_link,
+ uchar *father_key_pos, my_bool insert_last);
extern int _ma_ck_real_write_btree(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
uchar *key, uint key_length,
MARIA_RECORD_POS *root, uint comp_flag);
-extern int _ma_enlarge_root(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
- uchar *key, MARIA_RECORD_POS *root);
-extern int _ma_insert(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *key,
- uchar *anc_buff, uchar *key_pos, uchar *key_buff,
- uchar *father_buff, uchar *father_keypos,
- my_off_t father_page, my_bool insert_last);
extern int _ma_split_page(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
uchar *key, uchar *buff, uchar *key_buff,
my_bool insert_last);
@@ -709,6 +726,9 @@ extern uchar *_ma_find_half_pos(MARIA_HA *info, uint nod_flag,
uchar *page, uchar *key,
uint *return_key_length,
uchar ** after_key);
+extern my_bool write_hook_for_undo_key(enum translog_record_type type,
+ TRN *trn, MARIA_HA *tbl_info,
+ LSN *lsn, void *hook_arg);
extern int _ma_calc_static_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag,
uchar *key_pos, uchar *org_key,
uchar *key_buff, const uchar *key,
@@ -727,19 +747,22 @@ extern int _ma_calc_bin_pack_key_length(MARIA_KEYDEF *keyinfo,
uchar *org_key, uchar *prev_key,
const uchar *key,
MARIA_KEY_PARAM *s_temp);
-void _ma_store_static_key(MARIA_KEYDEF *keyinfo, uchar *key_pos,
- MARIA_KEY_PARAM *s_temp);
-void _ma_store_var_pack_key(MARIA_KEYDEF *keyinfo, uchar *key_pos,
- MARIA_KEY_PARAM *s_temp);
+extern void _ma_store_static_key(MARIA_KEYDEF *keyinfo, uchar *key_pos,
+ MARIA_KEY_PARAM *s_temp);
+extern void _ma_store_var_pack_key(MARIA_KEYDEF *keyinfo, uchar *key_pos,
+ MARIA_KEY_PARAM *s_temp);
#ifdef NOT_USED
-void _ma_store_pack_key(MARIA_KEYDEF *keyinfo, uchar *key_pos,
- MARIA_KEY_PARAM *s_temp);
+extern void _ma_store_pack_key(MARIA_KEYDEF *keyinfo, uchar *key_pos,
+ MARIA_KEY_PARAM *s_temp);
#endif
-void _ma_store_bin_pack_key(MARIA_KEYDEF *keyinfo, uchar *key_pos,
- MARIA_KEY_PARAM *s_temp);
+extern void _ma_store_bin_pack_key(MARIA_KEYDEF *keyinfo, uchar *key_pos,
+ MARIA_KEY_PARAM *s_temp);
extern int _ma_ck_delete(MARIA_HA *info, uint keynr, uchar *key,
uint key_length);
+extern int _ma_ck_real_delete(register MARIA_HA *info, MARIA_KEYDEF *keyinfo,
+ uchar *key, uint key_length,
+ my_off_t *root);
extern int _ma_readinfo(MARIA_HA *info, int lock_flag, int check_keybuffer);
extern int _ma_writeinfo(MARIA_HA *info, uint options);
extern int _ma_test_if_changed(MARIA_HA *info);
@@ -749,22 +772,22 @@ extern int _ma_check_index(MARIA_HA *info, int inx);
extern int _ma_search(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uchar *key,
uint key_len, uint nextflag, my_off_t pos);
extern int _ma_bin_search(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
- uchar *page, uchar *key, uint key_len,
+ uchar *page, const uchar *key, uint key_len,
uint comp_flag, uchar **ret_pos, uchar *buff,
my_bool *was_last_key);
extern int _ma_seq_search(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
- uchar *page, uchar *key, uint key_len,
+ uchar *page, const uchar *key, uint key_len,
uint comp_flag, uchar ** ret_pos, uchar *buff,
my_bool *was_last_key);
extern int _ma_prefix_search(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
- uchar *page, uchar *key, uint key_len,
+ uchar *page, const uchar *key, uint key_len,
uint comp_flag, uchar ** ret_pos, uchar *buff,
my_bool *was_last_key);
extern my_off_t _ma_kpos(uint nod_flag, uchar *after_key);
extern void _ma_kpointer(MARIA_HA *info, uchar *buff, my_off_t pos);
extern MARIA_RECORD_POS _ma_dpos(MARIA_HA *info, uint nod_flag,
const uchar *after_key);
-extern MARIA_RECORD_POS _ma_rec_pos(MARIA_SHARE *info, uchar *ptr);
+extern MARIA_RECORD_POS _ma_rec_pos(MARIA_HA *info, uchar *ptr);
extern void _ma_dpointer(MARIA_HA *info, uchar *buff, MARIA_RECORD_POS pos);
extern uint _ma_get_static_key(MARIA_KEYDEF *keyinfo, uint nod_flag,
uchar **page, uchar *key);
@@ -789,14 +812,22 @@ extern int _ma_search_first(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
my_off_t pos);
extern int _ma_search_last(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
my_off_t pos);
+extern my_off_t _ma_static_keypos_to_recpos(MARIA_HA *info, my_off_t pos);
+extern my_off_t _ma_static_recpos_to_keypos(MARIA_HA *info, my_off_t pos);
+extern my_off_t _ma_transparent_recpos(MARIA_HA *info, my_off_t pos);
+extern my_off_t _ma_transaction_keypos_to_recpos(MARIA_HA *info, my_off_t pos);
+extern my_off_t _ma_transaction_recpos_to_keypos(MARIA_HA *info, my_off_t pos);
+
extern uchar *_ma_fetch_keypage(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
- my_off_t page, int level, uchar *buff,
- int return_buffer);
+ my_off_t page, enum pagecache_page_lock lock,
+ int level, uchar *buff, int return_buffer,
+ MARIA_PINNED_PAGE **page_link);
extern int _ma_write_keypage(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
- my_off_t page, int level, uchar *buff);
-extern int _ma_dispose(MARIA_HA *info, MARIA_KEYDEF *keyinfo, my_off_t pos,
- int level);
-extern my_off_t _ma_new(MARIA_HA *info, MARIA_KEYDEF *keyinfo, int level);
+ my_off_t page, enum pagecache_page_lock lock,
+ int level, uchar *buff);
+extern int _ma_dispose(MARIA_HA *info, my_off_t pos, my_bool page_not_read);
+extern my_off_t _ma_new(register MARIA_HA *info, int level,
+ MARIA_PINNED_PAGE **page_link);
extern uint _ma_make_key(MARIA_HA *info, uint keynr, uchar *key,
const uchar *record, MARIA_RECORD_POS filepos);
extern uint _ma_pack_key(MARIA_HA *info, uint keynr, uchar *key,
@@ -881,6 +912,21 @@ typedef struct st_maria_block_info
#define SORT_BUFFER_INIT (2048L*1024L-MALLOC_OVERHEAD)
#define MIN_SORT_BUFFER (4096-MALLOC_OVERHEAD)
+/* Struct for clr_end */
+
+struct st_msg_to_write_hook_for_clr_end
+{
+ LSN previous_undo_lsn;
+ enum translog_record_type undone_record_type;
+ ha_checksum checksum_delta;
+};
+
+struct st_msg_to_write_hook_for_undo_key
+{
+ my_off_t *root;
+ my_off_t value;
+};
+
#define fast_ma_writeinfo(INFO) if (!(INFO)->s->tot_locks) (void) _ma_writeinfo((INFO),0)
#define fast_ma_readinfo(INFO) ((INFO)->lock_type == F_UNLCK) && _ma_readinfo((INFO),F_RDLCK,1)
@@ -987,7 +1033,6 @@ int _ma_update_create_rename_lsn(MARIA_SHARE *share,
int _ma_update_create_rename_lsn_sub(MARIA_SHARE *share,
LSN lsn, my_bool do_sync);
-void _ma_unpin_all_pages(MARIA_HA *info, LSN undo_lsn);
#define _ma_tmp_disable_logging_for_table(S) \
{ (S)->now_transactional= FALSE; (S)->page_type= PAGECACHE_PLAIN_PAGE; }
#define _ma_reenable_logging_for_table(S) \
diff --git a/storage/maria/unittest/ma_pagecache_single.c b/storage/maria/unittest/ma_pagecache_single.c
index 8add95e8a36..48f088aed7d 100644
--- a/storage/maria/unittest/ma_pagecache_single.c
+++ b/storage/maria/unittest/ma_pagecache_single.c
@@ -236,7 +236,7 @@ int simple_pin_test()
0,
PAGECACHE_LOCK_READ_UNLOCK,
PAGECACHE_UNPIN,
- 0, 0);
+ 0, 0, 0);
if (flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE))
{
diag("Got error in flush_pagecache_blocks\n");
diff --git a/storage/myisam/mi_check.c b/storage/myisam/mi_check.c
index b41f06a5fb8..152ffd3bb55 100644
--- a/storage/myisam/mi_check.c
+++ b/storage/myisam/mi_check.c
@@ -1008,7 +1008,7 @@ int chk_data_link(HA_CHECK *param, MI_INFO *info,int extend)
del_length+=info->s->base.pack_reclength;
continue; /* Record removed */
}
- param->glob_crc+= mi_static_checksum(info,record);
+ param->glob_crc+= (*info->s->calc_check_checksum)(info,record);
used+=info->s->base.pack_reclength;
break;
case DYNAMIC_RECORD:
@@ -1162,7 +1162,7 @@ int chk_data_link(HA_CHECK *param, MI_INFO *info,int extend)
}
else
{
- info->checksum=mi_checksum(info,record);
+ info->checksum= (*info->s->calc_check_checksum)(info,record);
if (param->testflag & (T_EXTEND | T_MEDIUM | T_VERBOSE))
{
if (_mi_rec_check(info,record, info->rec_buff,block_info.rec_len,
@@ -1208,10 +1208,7 @@ int chk_data_link(HA_CHECK *param, MI_INFO *info,int extend)
llstr(start_recpos,llbuff));
got_error=1;
}
- if (static_row_size)
- param->glob_crc+= mi_static_checksum(info,record);
- else
- param->glob_crc+= mi_checksum(info,record);
+ param->glob_crc+= (*info->s->calc_check_checksum)(info,record);
link_used+= (block_info.filepos - start_recpos);
used+= (pos-start_recpos);
} /* switch */
@@ -3164,7 +3161,9 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param)
{
if (sort_param->calc_checksum)
param->glob_crc+= (info->checksum=
- mi_static_checksum(info,sort_param->record));
+ (*info->s->calc_check_checksum)(info,
+ sort_param->
+ record));
DBUG_RETURN(0);
}
if (!sort_param->fix_datafile && sort_param->master)
@@ -3440,7 +3439,8 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param)
if (sort_param->read_cache.error < 0)
DBUG_RETURN(1);
if (sort_param->calc_checksum)
- info->checksum= mi_checksum(info, sort_param->record);
+ info->checksum= (*info->s->calc_check_checksum)(info,
+ sort_param->record);
if ((param->testflag & (T_EXTEND | T_REP)) || searching)
{
if (_mi_rec_check(info, sort_param->record, sort_param->rec_buff,
@@ -3525,7 +3525,9 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param)
info->packed_length=block_info.rec_len;
if (sort_param->calc_checksum)
param->glob_crc+= (info->checksum=
- mi_checksum(info, sort_param->record));
+ (*info->s->calc_check_checksum)(info,
+ sort_param->
+ record));
DBUG_RETURN(0);
}
}
@@ -3576,7 +3578,6 @@ int sort_write_record(MI_SORT_PARAM *sort_param)
}
sort_param->filepos+=share->base.pack_reclength;
info->s->state.split++;
- /* sort_info->param->glob_crc+=mi_static_checksum(info, sort_param->record); */
break;
case DYNAMIC_RECORD:
if (! info->blobs)
@@ -3599,10 +3600,9 @@ int sort_write_record(MI_SORT_PARAM *sort_param)
from= sort_info->buff+ALIGN_SIZE(MI_MAX_DYN_BLOCK_HEADER);
}
/* We can use info->checksum here as only one thread calls this. */
- info->checksum=mi_checksum(info,sort_param->record);
+ info->checksum= (*info->s->calc_check_checksum)(info,sort_param->record);
reclength=_mi_rec_pack(info,from,sort_param->record);
flag=0;
- /* sort_info->param->glob_crc+=info->checksum; */
do
{
diff --git a/storage/myisam/mi_checksum.c b/storage/myisam/mi_checksum.c
index 1aa56e571e3..8c408ef7ff5 100644
--- a/storage/myisam/mi_checksum.c
+++ b/storage/myisam/mi_checksum.c
@@ -19,27 +19,34 @@
ha_checksum mi_checksum(MI_INFO *info, const uchar *buf)
{
- uint i;
ha_checksum crc=0;
- MI_COLUMNDEF *rec=info->s->rec;
+ const uchar *record= buf;
+ MI_COLUMNDEF *column= info->s->rec;
+ MI_COLUMNDEF *column_end= column+ info->s->base.fields;
+ my_bool skip_null_bits= test(info->s->options & HA_OPTION_NULL_FIELDS);
- for (i=info->s->base.fields ; i-- ; buf+=(rec++)->length)
+ for ( ; column != column_end ; buf+= column++->length)
{
const uchar *pos;
ulong length;
- switch (rec->type) {
+
+ if ((record[column->null_pos] & column->null_bit) &&
+ skip_null_bits)
+ continue; /* Null field */
+
+ switch (column->type) {
case FIELD_BLOB:
{
- length=_mi_calc_blob_length(rec->length-
- portable_sizeof_char_ptr,
- buf);
- memcpy((char*) &pos, buf+rec->length- portable_sizeof_char_ptr,
+ length=_mi_calc_blob_length(column->length-
+ portable_sizeof_char_ptr,
+ buf);
+ memcpy((char*) &pos, buf+column->length- portable_sizeof_char_ptr,
sizeof(char*));
break;
}
case FIELD_VARCHAR:
{
- uint pack_length= HA_VARCHAR_PACKLENGTH(rec->length-1);
+ uint pack_length= HA_VARCHAR_PACKLENGTH(column->length-1);
if (pack_length == 1)
length= (ulong) *(uchar*) buf;
else
@@ -48,7 +55,7 @@ ha_checksum mi_checksum(MI_INFO *info, const uchar *buf)
break;
}
default:
- length=rec->length;
+ length=column->length;
pos=buf;
break;
}
diff --git a/storage/myisam/mi_create.c b/storage/myisam/mi_create.c
index fc5b31e7689..38e518fd823 100644
--- a/storage/myisam/mi_create.c
+++ b/storage/myisam/mi_create.c
@@ -108,6 +108,9 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
rec++,fields++)
{
reclength+=rec->length;
+ if (rec->null_bit)
+ options|= HA_OPTION_NULL_FIELDS;
+
if ((type=(enum en_fieldtype) rec->type) != FIELD_NORMAL &&
type != FIELD_CHECK)
{
@@ -142,6 +145,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
long_varchar_count++;
pack_reclength+= 2; /* May be packed on 3 bytes */
}
+ options|= HA_OPTION_NULL_FIELDS; /* Use of mi_checksum() */
}
else if (type != FIELD_SKIP_ZERO)
{
diff --git a/storage/myisam/mi_open.c b/storage/myisam/mi_open.c
index b0cc2e54ca7..5ce8ec0275a 100644
--- a/storage/myisam/mi_open.c
+++ b/storage/myisam/mi_open.c
@@ -144,7 +144,7 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
HA_OPTION_COMPRESS_RECORD | HA_OPTION_READ_ONLY_DATA |
HA_OPTION_TEMP_COMPRESS_RECORD | HA_OPTION_CHECKSUM |
HA_OPTION_TMP_TABLE | HA_OPTION_DELAY_KEY_WRITE |
- HA_OPTION_RELIES_ON_SQL_LAYER))
+ HA_OPTION_RELIES_ON_SQL_LAYER | HA_OPTION_NULL_FIELDS))
{
DBUG_PRINT("error",("wrong options: 0x%lx", share->options));
my_errno=HA_ERR_OLD_FILE;
@@ -737,12 +737,14 @@ void mi_setup_functions(register MYISAM_SHARE *share)
{
share->read_record=_mi_read_pack_record;
share->read_rnd=_mi_read_rnd_pack_record;
- if (!(share->options & HA_OPTION_TEMP_COMPRESS_RECORD))
- share->calc_checksum=0; /* No checksum */
- else if (share->options & HA_OPTION_PACK_RECORD)
+ if ((share->options &
+ (HA_OPTION_PACK_RECORD | HA_OPTION_NULL_FIELDS)))
share->calc_checksum= mi_checksum;
else
share->calc_checksum= mi_static_checksum;
+ share->calc_check_checksum= share->calc_checksum;
+ if (!(share->options & HA_OPTION_TEMP_COMPRESS_RECORD))
+ share->calc_checksum=0; /* No checksum */
}
else if (share->options & HA_OPTION_PACK_RECORD)
{
@@ -752,6 +754,7 @@ void mi_setup_functions(register MYISAM_SHARE *share)
share->compare_record=_mi_cmp_dynamic_record;
share->compare_unique=_mi_cmp_dynamic_unique;
share->calc_checksum= mi_checksum;
+ share->calc_check_checksum= share->calc_checksum;
/* add bits used to pack data to pack_reclength for faster allocation */
share->base.pack_reclength+= share->base.pack_bits;
@@ -775,7 +778,11 @@ void mi_setup_functions(register MYISAM_SHARE *share)
share->update_record=_mi_update_static_record;
share->write_record=_mi_write_static_record;
share->compare_unique=_mi_cmp_static_unique;
- share->calc_checksum= mi_static_checksum;
+ if (share->options & HA_OPTION_NULL_FIELDS)
+ share->calc_checksum= mi_checksum;
+ else
+ share->calc_checksum= mi_static_checksum;
+ share->calc_check_checksum= share->calc_checksum;
}
share->file_read= mi_nommap_pread;
share->file_write= mi_nommap_pwrite;
diff --git a/storage/myisam/mi_test2.c b/storage/myisam/mi_test2.c
index fd8adeed1c5..6fb71feb1e7 100644
--- a/storage/myisam/mi_test2.c
+++ b/storage/myisam/mi_test2.c
@@ -657,10 +657,10 @@ int main(int argc, char *argv[])
sprintf((char*) key2,"%6d",k);
min_key.key= key;
- min_key.length= USE_WHOLE_KEY;
+ min_key.keypart_map= HA_WHOLE_KEY;
min_key.flag= HA_READ_AFTER_KEY;
max_key.key= key2;
- max_key.length= USE_WHOLE_KEY;
+ max_key.keypart_map= HA_WHOLE_KEY;
max_key.flag= HA_READ_BEFORE_KEY;
range_records= mi_records_in_range(file, 0, &min_key, &max_key);
records=0;
diff --git a/storage/myisam/myisamdef.h b/storage/myisam/myisamdef.h
index fde80172643..59d54bdc542 100644
--- a/storage/myisam/myisamdef.h
+++ b/storage/myisam/myisamdef.h
@@ -182,7 +182,9 @@ typedef struct st_mi_isam_share
int(*delete_record) (struct st_myisam_info *);
int(*read_rnd) (struct st_myisam_info *, uchar*, my_off_t, my_bool);
int(*compare_record) (struct st_myisam_info *, const uchar*);
- ha_checksum(*calc_checksum) (struct st_myisam_info *, const uchar*);
+ ha_checksum(*calc_checksum) (struct st_myisam_info *, const uchar*);
+ /* calculate checksum for a row during check table */
+ ha_checksum(*calc_check_checksum)(struct st_myisam_info *, const uchar *);
int(*compare_unique) (struct st_myisam_info *, MI_UNIQUEDEF *,
const uchar *record, my_off_t pos);
size_t (*file_read) (MI_INFO *, uchar *, size_t, my_off_t, myf);
@@ -518,8 +520,6 @@ extern void _mi_kpointer(MI_INFO *info, uchar *buff, my_off_t pos);
extern my_off_t _mi_dpos(MI_INFO *info, uint nod_flag, uchar *after_key);
extern my_off_t _mi_rec_pos(MYISAM_SHARE *info, uchar *ptr);
extern void _mi_dpointer(MI_INFO *info, uchar *buff, my_off_t pos);
-extern int ha_key_cmp(HA_KEYSEG *keyseg, uchar *a, uchar *b,
- uint key_length, uint nextflag, uint *diff_length);
extern uint _mi_get_static_key(MI_KEYDEF *keyinfo, uint nod_flag,
uchar **page, uchar *key);
extern uint _mi_get_pack_key(MI_KEYDEF *keyinfo, uint nod_flag, uchar **page,