summaryrefslogtreecommitdiff
path: root/storage/maria/ma_bitmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'storage/maria/ma_bitmap.c')
-rw-r--r--storage/maria/ma_bitmap.c216
1 files changed, 199 insertions, 17 deletions
diff --git a/storage/maria/ma_bitmap.c b/storage/maria/ma_bitmap.c
index f1a2e4a1b80..b632fe0a662 100644
--- a/storage/maria/ma_bitmap.c
+++ b/storage/maria/ma_bitmap.c
@@ -132,6 +132,8 @@ uchar maria_bitmap_marker[4]=
{(uchar) 255, (uchar) 255, (uchar) 255, (uchar) 254};
uchar maria_normal_page_marker[4]=
{(uchar) 255, (uchar) 255, (uchar) 255, (uchar) 255};
+/*#define WRONG_BITMAP_FLUSH 1*/ /*define only for provoking bugs*/
+#undef WRONG_BITMAP_FLUSH
static my_bool _ma_read_bitmap_page(MARIA_SHARE *share,
MARIA_FILE_BITMAP *bitmap,
@@ -143,14 +145,48 @@ static my_bool _ma_read_bitmap_page(MARIA_SHARE *share,
static inline my_bool write_changed_bitmap(MARIA_SHARE *share,
MARIA_FILE_BITMAP *bitmap)
{
+ DBUG_ENTER("write_changed_bitmap");
DBUG_ASSERT(share->pagecache->block_size == bitmap->block_size);
- return (pagecache_write(share->pagecache,
- &bitmap->file, bitmap->page, 0,
- (uchar*) bitmap->map, PAGECACHE_PLAIN_PAGE,
- PAGECACHE_LOCK_LEFT_UNLOCKED,
- PAGECACHE_PIN_LEFT_UNPINNED,
- PAGECACHE_WRITE_DELAY, 0,
- LSN_IMPOSSIBLE));
+ DBUG_PRINT("info", ("bitmap->flushable: %d", bitmap->flushable));
+ if (bitmap->flushable
+#ifdef WRONG_BITMAP_FLUSH
+ || 1
+#endif
+ )
+ {
+ my_bool res= pagecache_write(share->pagecache,
+ &bitmap->file, bitmap->page, 0,
+ (uchar*) bitmap->map, PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ PAGECACHE_PIN_LEFT_UNPINNED,
+ PAGECACHE_WRITE_DELAY, 0, LSN_IMPOSSIBLE);
+ DBUG_RETURN(res);
+ }
+ else
+ {
+ /**
+ @todo RECOVERY BUG
+ Not flushable: its content is not reflected by the log, to honour WAL we
+ must keep the bitmap page pinned. Scenario of INSERT:
+ REDO - UNDO (written to log but not forced)
+ bitmap goes to page cache (because other INSERT needs to)
+ and then to disk (pagecache eviction)
+ crash: recovery will not find REDO-UNDO, table is corrupted.
+ Solutions:
+ give LSNs to bitmap pages or change pagecache to flush all log when
+ flushing a bitmap page or keep bitmap page pinned until checkpoint.
+ */
+ MARIA_PINNED_PAGE page_link;
+ int res= pagecache_write(share->pagecache,
+ &bitmap->file, bitmap->page, 0,
+ (uchar*) bitmap->map, PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_WRITE, PAGECACHE_PIN,
+ PAGECACHE_WRITE_DELAY, &page_link.link,
+ LSN_IMPOSSIBLE);
+ page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
+ push_dynamic(&bitmap->pinned_pages, (void*) &page_link);
+ DBUG_RETURN(res);
+ }
}
/*
@@ -180,7 +216,9 @@ my_bool _ma_bitmap_init(MARIA_SHARE *share, File file)
size*= 2;
#endif
- if (!(bitmap->map= (uchar*) my_malloc(size, MYF(MY_WME))))
+ if (((bitmap->map= (uchar*) my_malloc(size, MYF(MY_WME))) == NULL) ||
+ my_init_dynamic_array(&bitmap->pinned_pages,
+ sizeof(MARIA_PINNED_PAGE), 1, 1))
return 1;
bitmap->file.file= file;
@@ -193,6 +231,7 @@ my_bool _ma_bitmap_init(MARIA_SHARE *share, File file)
The +1 is to add the bitmap page, as this doesn't have to be covered
*/
bitmap->pages_covered= aligned_bit_blocks * 16 + 1;
+ bitmap->flushable= TRUE;
/* Update size for bits */
/* TODO; Make this dependent of the row size */
@@ -207,6 +246,7 @@ my_bool _ma_bitmap_init(MARIA_SHARE *share, File file)
bitmap->sizes[7]= 0;
pthread_mutex_init(&share->bitmap.bitmap_lock, MY_MUTEX_INIT_SLOW);
+ pthread_cond_init(&share->bitmap.bitmap_cond, 0);
_ma_bitmap_reset_cache(share);
@@ -231,6 +271,8 @@ my_bool _ma_bitmap_end(MARIA_SHARE *share)
{
my_bool res= _ma_bitmap_flush(share);
pthread_mutex_destroy(&share->bitmap.bitmap_lock);
+ pthread_cond_destroy(&share->bitmap.bitmap_cond);
+ delete_dynamic(&share->bitmap.pinned_pages);
my_free((uchar*) share->bitmap.map, MYF(MY_ALLOW_ZERO_PTR));
share->bitmap.map= 0;
return res;
@@ -273,6 +315,104 @@ my_bool _ma_bitmap_flush(MARIA_SHARE *share)
}
+/**
+ Dirty-page filtering criteria for bitmap pages
+
+ @param type Page's type
+ @param pageno Page's number
+ @param rec_lsn Page's rec_lsn
+ @param arg pages_covered of bitmap
+*/
+
+static enum pagecache_flush_filter_result
+filter_flush_bitmap_pages(enum pagecache_page_type type
+ __attribute__ ((unused)),
+ pgcache_page_no_t pageno,
+ LSN rec_lsn __attribute__ ((unused)),
+ void *arg)
+{
+ return ((pageno % (*(ulong*)arg)) == 0);
+}
+
+
+/**
+ Flushes current bitmap page to the pagecache, and then all bitmap pages
+ from pagecache to the file. Used by Checkpoint.
+
+ @param share Table's share
+*/
+
+my_bool _ma_bitmap_flush_all(MARIA_SHARE *share)
+{
+ my_bool res= 0;
+ MARIA_FILE_BITMAP *bitmap= &share->bitmap;
+ DBUG_ENTER("_ma_bitmap_flush_all");
+ pthread_mutex_lock(&bitmap->bitmap_lock);
+ if (bitmap->changed)
+ {
+#ifndef WRONG_BITMAP_FLUSH
+ while (!bitmap->flushable)
+ {
+ DBUG_PRINT("info", ("waiting for bitmap to be flushable"));
+ pthread_cond_wait(&bitmap->bitmap_cond, &bitmap->bitmap_lock);
+ }
+#endif
+ /*
+ Bitmap is in a flushable state: its contents in memory are reflected by
+ log records (complete REDO-UNDO groups) and all bitmap pages are
+ unpinned. We keep the mutex to preserve this situation, and flush to the
+ file.
+ */
+ res= write_changed_bitmap(share, bitmap);
+ bitmap->changed= FALSE;
+ /*
+ We do NOT use FLUSH_KEEP_LAZY because we must be sure that bitmap
+ pages have been flushed. That's a condition of correctness of
+ Recovery: data pages may have been all flushed, if we write the
+ checkpoint record Recovery will start from after their REDOs. If
+ bitmap page was not flushed, as the REDOs about it will be skipped, it
+ will wrongly not be recovered. If bitmap pages had a rec_lsn it would
+ be different.
+ There should be no pinned pages as bitmap->flushable is true.
+ */
+ if (flush_pagecache_blocks_with_filter(share->pagecache,
+ &bitmap->file, FLUSH_KEEP,
+ filter_flush_bitmap_pages,
+ &bitmap->pages_covered) &
+ PCFLUSH_PINNED_AND_ERROR)
+ res= TRUE;
+ }
+ pthread_mutex_unlock(&bitmap->bitmap_lock);
+ DBUG_RETURN(res);
+}
+
+
+/**
+ @brief Unpin all pinned bitmap pages
+
+ @param share Table's share
+
+ @return Operation status
+ @retval 0 ok
+*/
+
+static void _ma_bitmap_unpin_all(MARIA_SHARE *share)
+{
+ MARIA_FILE_BITMAP *bitmap= &share->bitmap;
+ MARIA_PINNED_PAGE *page_link= ((MARIA_PINNED_PAGE*)
+ dynamic_array_ptr(&bitmap->pinned_pages, 0));
+ MARIA_PINNED_PAGE *pinned_page= page_link + bitmap->pinned_pages.elements;
+ DBUG_ENTER("_ma_bitmap_unpin_all");
+ DBUG_PRINT("info", ("pinned: %u", bitmap->pinned_pages.elements));
+ while (pinned_page-- != page_link)
+ pagecache_unlock_by_link(share->pagecache, pinned_page->link,
+ pinned_page->unlock, PAGECACHE_UNPIN,
+ LSN_IMPOSSIBLE, LSN_IMPOSSIBLE, TRUE);
+ bitmap->pinned_pages.elements= 0;
+ DBUG_VOID_RETURN;
+}
+
+
/*
Intialize bitmap in memory to a zero bitmap
@@ -684,12 +824,6 @@ static my_bool _ma_change_bitmap_page(MARIA_HA *info,
if (bitmap->changed)
{
- /**
- @todo RECOVERY BUG this is going to flush the bitmap page possibly to
- disk even though it could be over-allocated with not yet any REDO-UNDO
- complete group (WAL violation: no way to undo the over-allocation if
- crash). See also collect_tables().
- */
if (write_changed_bitmap(info->s, bitmap))
DBUG_RETURN(1);
bitmap->changed= 0;
@@ -1973,6 +2107,46 @@ my_bool _ma_bitmap_set_full_page_bits(MARIA_HA *info,
}
+/**
+ Make a transition of MARIA_FILE_BITMAP::flushable.
+ If the bitmap becomes flushable, which requires that REDO-UNDO has been
+ logged and all bitmap pages touched by the thread have a correct
+ allocation, it unpins all bitmap pages, and if checkpoint is waiting, it
+ wakes it up.
+ If the bitmap becomes unflushable, it just records it.
+
+ @param share Table's share
+ @param flushable New state
+*/
+
+void _ma_bitmap_flushable(MARIA_SHARE *share, my_bool flushable)
+{
+ MARIA_FILE_BITMAP *bitmap= &share->bitmap;
+ if (flushable)
+ {
+ pthread_mutex_lock(&bitmap->bitmap_lock);
+ _ma_bitmap_unpin_all(share);
+ bitmap->flushable= TRUE;
+ pthread_mutex_unlock(&bitmap->bitmap_lock);
+ /*
+ Ok to read in_checkpoint without mutex, as it is set before Checkpoint
+ calls _ma_bitmap_flush_all().
+ */
+ if (share->in_checkpoint)
+ {
+ DBUG_PRINT("info", ("bitmap ready waking up checkpoint"));
+ pthread_cond_broadcast(&bitmap->bitmap_cond);
+ }
+ return;
+ }
+ /*
+ Ok to set without mutex: we didn't touch the bitmap yet; when we touch it
+ we will take the mutex.
+ */
+ bitmap->flushable= FALSE;
+}
+
+
/*
Correct bitmap pages to reflect the true allocation
@@ -2015,7 +2189,7 @@ my_bool _ma_bitmap_release_unused(MARIA_HA *info, MARIA_BITMAP_BLOCKS *blocks)
*/
current_bitmap_value= FULL_HEAD_PAGE;
- pthread_mutex_lock(&info->s->bitmap.bitmap_lock);
+ pthread_mutex_lock(&bitmap->bitmap_lock);
/* First handle head block */
if (block->used & BLOCKUSED_USED)
@@ -2065,11 +2239,19 @@ my_bool _ma_bitmap_release_unused(MARIA_HA *info, MARIA_BITMAP_BLOCKS *blocks)
block->page, page_count))
goto err;
}
- pthread_mutex_unlock(&info->s->bitmap.bitmap_lock);
+
+ _ma_bitmap_unpin_all(info->s);
+ bitmap->flushable= TRUE;
+ pthread_mutex_unlock(&bitmap->bitmap_lock);
+ if (info->s->in_checkpoint)
+ {
+ DBUG_PRINT("info", ("bitmap ready waking up checkpoint"));
+ pthread_cond_broadcast(&bitmap->bitmap_cond);
+ }
DBUG_RETURN(0);
err:
- pthread_mutex_unlock(&info->s->bitmap.bitmap_lock);
+ pthread_mutex_unlock(&bitmap->bitmap_lock);
DBUG_RETURN(1);
}