summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorunknown <bell@desktop.sanja.is.com.ua>2007-02-02 09:41:32 +0200
committerunknown <bell@desktop.sanja.is.com.ua>2007-02-02 09:41:32 +0200
commit025400922118f11a15be54c66455f20e2f72c0b4 (patch)
treef3e73bf1a50802f9f1fd8f4bcee5361654350e32
parent5c7960965c4c178d3a02f9893ea65b2802b38b8f (diff)
downloadmariadb-git-025400922118f11a15be54c66455f20e2f72c0b4.tar.gz
postreview changes for page cache and pre review commit for loghandler
storage/maria/unittest/test_file.c: Rename: unittest/mysys/test_file.c -> storage/maria/unittest/test_file.c storage/maria/unittest/test_file.h: Rename: unittest/mysys/test_file.h -> storage/maria/unittest/test_file.h include/pagecache.h: A waiting queue mechanism moved to separate file wqueue.* Pointer name changed for compatibility mysys/Makefile.am: A waiting queue mechanism moved to separate file wqueue.* mysys/mf_keycache.c: fixed unsigned comparison mysys/mf_pagecache.c: A waiting queue mechanism moved to separate file wqueue.* Fixed bug in unregistering block during write storage/maria/Makefile.am: The loghandler files added storage/maria/ma_control_file.h: Now we have loghandler and can compile control file storage/maria/maria_def.h: Including files need for compilation of maria storage/maria/unittest/Makefile.am: unit tests of loghandler storage/maria/unittest/ma_control_file-t.c: Used maria def storage/maria/unittest/mf_pagecache_consist.c: fixed memory overrun storage/maria/unittest/mf_pagecache_single.c: fixed used uninitialized memory unittest/mysys/Makefile.am: unittests of pagecache moved to maria becase pagecache need loghandler include/wqueue.h: New BitKeeper file ``include/wqueue.h'' mysys/wqueue.c: New BitKeeper file ``mysys/wqueue.c'' storage/maria/ma_loghandler.c: New BitKeeper file ``storage/maria/ma_loghandler.c'' storage/maria/ma_loghandler.h: New BitKeeper file ``storage/maria/ma_loghandler.h'' storage/maria/ma_loghandler_lsn.h: New BitKeeper file ``storage/maria/ma_loghandler_lsn.h'' storage/maria/unittest/ma_test_loghandler-t.c: New BitKeeper file ``storage/maria/unittest/ma_test_loghandler-t.c'' storage/maria/unittest/ma_test_loghandler_multigroup-t.c: New BitKeeper file ``storage/maria/unittest/ma_test_loghandler_multigroup-t.c'' storage/maria/unittest/ma_test_loghandler_multithread-t.c: New BitKeeper file ``storage/maria/unittest/ma_test_loghandler_multithread-t.c'' storage/maria/unittest/ma_test_loghandler_pagecache-t.c: New BitKeeper file ``storage/maria/unittest/ma_test_loghandler_pagecache-t.c''
-rw-r--r--include/pagecache.h65
-rw-r--r--include/wqueue.h26
-rw-r--r--mysys/Makefile.am2
-rw-r--r--mysys/mf_keycache.c2
-rwxr-xr-xmysys/mf_pagecache.c776
-rw-r--r--mysys/wqueue.c167
-rw-r--r--storage/maria/Makefile.am2
-rw-r--r--storage/maria/ma_control_file.h22
-rw-r--r--storage/maria/ma_loghandler.c5417
-rw-r--r--storage/maria/ma_loghandler.h314
-rw-r--r--storage/maria/ma_loghandler_lsn.h39
-rw-r--r--storage/maria/maria_def.h5
-rw-r--r--storage/maria/unittest/Makefile.am60
-rw-r--r--storage/maria/unittest/ma_control_file-t.c2
-rw-r--r--storage/maria/unittest/ma_test_loghandler-t.c540
-rw-r--r--storage/maria/unittest/ma_test_loghandler_multigroup-t.c570
-rw-r--r--storage/maria/unittest/ma_test_loghandler_multithread-t.c468
-rw-r--r--storage/maria/unittest/ma_test_loghandler_pagecache-t.c140
-rwxr-xr-xstorage/maria/unittest/mf_pagecache_consist.c (renamed from unittest/mysys/mf_pagecache_consist.c)33
-rw-r--r--storage/maria/unittest/mf_pagecache_single.c (renamed from unittest/mysys/mf_pagecache_single.c)6
-rw-r--r--storage/maria/unittest/test_file.c (renamed from unittest/mysys/test_file.c)0
-rw-r--r--storage/maria/unittest/test_file.h (renamed from unittest/mysys/test_file.h)0
-rw-r--r--unittest/mysys/Makefile.am43
23 files changed, 8160 insertions, 539 deletions
diff --git a/include/pagecache.h b/include/pagecache.h
index 4d64070ad62..2f745eae0b3 100644
--- a/include/pagecache.h
+++ b/include/pagecache.h
@@ -20,11 +20,13 @@
#define _pagecache_h
C_MODE_START
+#include "../storage/maria/ma_loghandler_lsn.h"
+
/* Type of the page */
enum pagecache_page_type
{
#ifndef DBUG_OFF
- /* used only for control page type chenging during debugging */
+ /* used only for control page type changing during debugging */
PAGECACHE_EMPTY_PAGE,
#endif
/* the page does not contain LSN */
@@ -34,7 +36,7 @@ enum pagecache_page_type
};
/*
- This enum describe lock status changing. every typr of page cache will
+ This enum describe lock status changing. every type of page cache will
interpret WRITE/READ lock as it need.
*/
enum pagecache_page_lock
@@ -71,9 +73,7 @@ enum pagecache_write_mode
typedef void *PAGECACHE_PAGE_LINK;
-/* TODO: move to loghandler emulator */
-typedef void LOG_HANDLER;
-typedef void *LSN;
+typedef void *LSN_PTR;
/* file descriptor for Maria */
typedef struct st_pagecache_file
@@ -82,7 +82,7 @@ typedef struct st_pagecache_file
} PAGECACHE_FILE;
/* page number for maria */
-typedef uint32 maria_page_no_t;
+typedef uint32 pgcache_page_no_t;
/* declare structures that is used by st_pagecache */
@@ -93,11 +93,9 @@ typedef struct st_pagecache_page PAGECACHE_PAGE;
struct st_pagecache_hash_link;
typedef struct st_pagecache_hash_link PAGECACHE_HASH_LINK;
-/* info about requests in a waiting queue */
-typedef struct st_pagecache_wqueue
-{
- struct st_my_thread_var *last_thread; /* circular list of waiting threads */
-} PAGECACHE_WQUEUE;
+#include <wqueue.h>
+
+typedef my_bool (*pagecache_disk_read_validator)(byte *page, gptr data);
#define PAGECACHE_CHANGED_BLOCKS_HASH 128 /* must be power of 2 */
@@ -136,16 +134,14 @@ typedef struct st_pagecache
PAGECACHE_BLOCK_LINK *used_last;/* ptr to the last block of the LRU chain */
PAGECACHE_BLOCK_LINK *used_ins;/* ptr to the insertion block in LRU chain */
pthread_mutex_t cache_lock; /* to lock access to the cache structure */
- PAGECACHE_WQUEUE resize_queue; /* threads waiting during resize operation */
- PAGECACHE_WQUEUE waiting_for_hash_link;/* waiting for a free hash link */
- PAGECACHE_WQUEUE waiting_for_block; /* requests waiting for a free block */
+ WQUEUE resize_queue; /* threads waiting during resize operation */
+ WQUEUE waiting_for_hash_link;/* waiting for a free hash link */
+ WQUEUE waiting_for_block; /* requests waiting for a free block */
/* hash for dirty file bl.*/
PAGECACHE_BLOCK_LINK *changed_blocks[PAGECACHE_CHANGED_BLOCKS_HASH];
/* hash for other file bl.*/
PAGECACHE_BLOCK_LINK *file_blocks[PAGECACHE_CHANGED_BLOCKS_HASH];
- LOG_HANDLER *loghandler; /* loghandler structure */
-
/*
The following variables are and variables used to hold parameters for
initializing the key cache.
@@ -169,24 +165,29 @@ typedef struct st_pagecache
extern int init_pagecache(PAGECACHE *pagecache, my_size_t use_mem,
uint division_limit, uint age_threshold,
- uint block_size,
- LOG_HANDLER *loghandler);
+ uint block_size);
extern int resize_pagecache(PAGECACHE *pagecache,
my_size_t use_mem, uint division_limit,
uint age_threshold);
extern void change_pagecache_param(PAGECACHE *pagecache, uint division_limit,
uint age_threshold);
-extern byte *pagecache_read(PAGECACHE *pagecache,
- PAGECACHE_FILE *file,
- maria_page_no_t pageno,
- uint level,
- byte *buff,
- enum pagecache_page_type type,
- enum pagecache_page_lock lock,
- PAGECACHE_PAGE_LINK *link);
+
+#define pagecache_read(P,F,N,L,B,T,K,I) \
+ pagecache_valid_read(P,F,N,L,B,T,K,I,0,0)
+
+extern byte *pagecache_valid_read(PAGECACHE *pagecache,
+ PAGECACHE_FILE *file,
+ pgcache_page_no_t pageno,
+ uint level,
+ byte *buff,
+ enum pagecache_page_type type,
+ enum pagecache_page_lock lock,
+ PAGECACHE_PAGE_LINK *link,
+ pagecache_disk_read_validator validator,
+ gptr validator_data);
extern my_bool pagecache_write(PAGECACHE *pagecache,
PAGECACHE_FILE *file,
- maria_page_no_t pageno,
+ pgcache_page_no_t pageno,
uint level,
byte *buff,
enum pagecache_page_type type,
@@ -196,20 +197,20 @@ extern my_bool pagecache_write(PAGECACHE *pagecache,
PAGECACHE_PAGE_LINK *link);
extern void pagecache_unlock_page(PAGECACHE *pagecache,
PAGECACHE_FILE *file,
- maria_page_no_t pageno,
+ pgcache_page_no_t pageno,
enum pagecache_page_lock lock,
enum pagecache_page_pin pin,
my_bool stamp_this_page,
- LSN first_REDO_LSN_for_page);
+ LSN_PTR first_REDO_LSN_for_page);
extern void pagecache_unlock(PAGECACHE *pagecache,
PAGECACHE_PAGE_LINK *link,
enum pagecache_page_lock lock,
enum pagecache_page_pin pin,
my_bool stamp_this_page,
- LSN first_REDO_LSN_for_page);
+ LSN_PTR first_REDO_LSN_for_page);
extern void pagecache_unpin_page(PAGECACHE *pagecache,
PAGECACHE_FILE *file,
- maria_page_no_t pageno);
+ pgcache_page_no_t pageno);
extern void pagecache_unpin(PAGECACHE *pagecache,
PAGECACHE_PAGE_LINK *link);
extern int flush_pagecache_blocks(PAGECACHE *keycache,
@@ -217,7 +218,7 @@ extern int flush_pagecache_blocks(PAGECACHE *keycache,
enum flush_type type);
extern my_bool pagecache_delete_page(PAGECACHE *pagecache,
PAGECACHE_FILE *file,
- maria_page_no_t pageno,
+ pgcache_page_no_t pageno,
enum pagecache_page_lock lock,
my_bool flush);
extern void end_pagecache(PAGECACHE *keycache, my_bool cleanup);
diff --git a/include/wqueue.h b/include/wqueue.h
new file mode 100644
index 00000000000..bacabb8c401
--- /dev/null
+++ b/include/wqueue.h
@@ -0,0 +1,26 @@
+
+#ifndef _wqueue_h
+#define _wqueue_h
+
+#include <my_global.h>
+#include <my_pthread.h>
+
+/* info about requests in a waiting queue */
+typedef struct st_pagecache_wqueue
+{
+ struct st_my_thread_var *last_thread; /* circular list of waiting
+ threads */
+} WQUEUE;
+
+#ifdef THREAD
+void wqueue_link_into_queue(WQUEUE *wqueue, struct st_my_thread_var *thread);
+void wqueue_unlink_from_queue(WQUEUE *wqueue, struct st_my_thread_var *thread);
+void wqueue_add_to_queue(WQUEUE *wqueue, struct st_my_thread_var *thread);
+void wqueue_add_and_wait(WQUEUE *wqueue,
+ struct st_my_thread_var *thread,
+ pthread_mutex_t *lock);
+void wqueue_release_queue(WQUEUE *wqueue);
+
+#endif
+
+#endif
diff --git a/mysys/Makefile.am b/mysys/Makefile.am
index 4d9570febbd..612411404c4 100644
--- a/mysys/Makefile.am
+++ b/mysys/Makefile.am
@@ -56,7 +56,7 @@ libmysys_a_SOURCES = my_init.c my_getwd.c mf_getdate.c my_mmap.c \
my_handler.c my_netware.c my_largepage.c \
my_memmem.c \
my_windac.c my_access.c base64.c my_libwrap.c \
- mf_pagecache.c
+ mf_pagecache.c wqueue.c
EXTRA_DIST = thr_alarm.c thr_lock.c my_pthread.c my_thr_init.c \
thr_mutex.c thr_rwlock.c \
CMakeLists.txt mf_soundex.c \
diff --git a/mysys/mf_keycache.c b/mysys/mf_keycache.c
index 9a99a278bc5..9cb428ab200 100644
--- a/mysys/mf_keycache.c
+++ b/mysys/mf_keycache.c
@@ -1008,12 +1008,12 @@ static void unlink_block(KEY_CACHE *keycache, BLOCK_LINK *block)
KEYCACHE_THREAD_TRACE("unlink_block");
#if defined(KEYCACHE_DEBUG)
+ KEYCACHE_DBUG_ASSERT(keycache->blocks_available != 0);
keycache->blocks_available--;
KEYCACHE_DBUG_PRINT("unlink_block",
("unlinked block %u status=%x #requests=%u #available=%u",
BLOCK_NUMBER(block), block->status,
block->requests, keycache->blocks_available));
- KEYCACHE_DBUG_ASSERT(keycache->blocks_available >= 0);
#endif
}
diff --git a/mysys/mf_pagecache.c b/mysys/mf_pagecache.c
index 4b92f68d9bf..97cb542f329 100755
--- a/mysys/mf_pagecache.c
+++ b/mysys/mf_pagecache.c
@@ -26,7 +26,7 @@
When a new block is required it is first tried to pop one from the stack.
If the stack is empty, it is tried to get a never-used block from the pool.
If this is empty too, then a block is taken from the LRU ring, flushing it
- to disk, if necessary. This is handled in find_key_block().
+ to disk, if necessary. This is handled in find_block().
With the new free list, the blocks can have three temperatures:
hot, warm and cold (which is free). This is remembered in the block header
by the enum BLOCK_TEMPERATURE temperature variable. Remembering the
@@ -91,13 +91,16 @@
/*
In key cache we have external raw locking here we use
SERIALIZED_READ_FROM_CACHE to avoid problem of reading
- not consistent data from te page
+ not consistent data from the page.
+ (keycache functions (key_cache_read(), key_cache_insert() and
+ key_cache_write()) rely on external MyISAM lock, we don't)
*/
#define SERIALIZED_READ_FROM_CACHE yes
#define BLOCK_INFO(B) \
DBUG_PRINT("info", \
- ("block 0x%lx, file %lu, page %lu, s %0x, hshL 0x%lx, req %u/%u", \
+ ("block 0x%lx file %lu page %lu s %0x hshL 0x%lx req %u/%u " \
+ "wrlock: %c", \
(ulong)(B), \
(ulong)((B)->hash_link ? \
(B)->hash_link->file.file : \
@@ -110,7 +113,8 @@
(uint) (B)->requests, \
(uint)((B)->hash_link ? \
(B)->hash_link->requests : \
- 0)))
+ 0), \
+ ((block->status & BLOCK_WRLOCK)?'Y':'N')))
/* TODO: put it to my_static.c */
my_bool my_disable_flush_pagecache_blocks= 0;
@@ -138,7 +142,7 @@ typedef pthread_cond_t KEYCACHE_CONDVAR;
struct st_pagecache_page
{
PAGECACHE_FILE file; /* file to which the page belongs to */
- maria_page_no_t pageno; /* number of the page in the file */
+ pgcache_page_no_t pageno; /* number of the page in the file */
};
/* element in the chain of a hash table bucket */
@@ -149,7 +153,7 @@ struct st_pagecache_hash_link
struct st_pagecache_block_link
*block; /* reference to the block for the page: */
PAGECACHE_FILE file; /* from such a file */
- maria_page_no_t pageno; /* this page */
+ pgcache_page_no_t pageno; /* this page */
uint requests; /* number of requests for the page */
};
@@ -162,7 +166,7 @@ struct st_pagecache_hash_link
#define BLOCK_CHANGED 32 /* block buffer contains a dirty page */
#define BLOCK_WRLOCK 64 /* write locked block */
-/* page status, returned by find_key_block */
+/* page status, returned by find_block */
#define PAGE_READ 0
#define PAGE_TO_BE_READ 1
#define PAGE_WAIT_TO_BE_READ 2
@@ -232,7 +236,7 @@ typedef struct st_pagecache_lock_info
node the node which should be linked
*/
-void info_link(PAGECACHE_PIN_INFO **list, PAGECACHE_PIN_INFO *node)
+static void info_link(PAGECACHE_PIN_INFO **list, PAGECACHE_PIN_INFO *node)
{
if ((node->next= *list))
node->next->prev= &(node->next);
@@ -249,7 +253,7 @@ void info_link(PAGECACHE_PIN_INFO **list, PAGECACHE_PIN_INFO *node)
node the node which should be unlinked
*/
-void info_unlink(PAGECACHE_PIN_INFO *node)
+static void info_unlink(PAGECACHE_PIN_INFO *node)
{
if ((*node->prev= node->next))
node->next->prev= node->prev;
@@ -271,8 +275,8 @@ void info_unlink(PAGECACHE_PIN_INFO *node)
pointer to the information node of the thread in the list
*/
-PAGECACHE_PIN_INFO *info_find(PAGECACHE_PIN_INFO *list,
- struct st_my_thread_var *thread)
+static PAGECACHE_PIN_INFO *info_find(PAGECACHE_PIN_INFO *list,
+ struct st_my_thread_var *thread)
{
register PAGECACHE_PIN_INFO *i= list;
for(; i != 0; i= i->next)
@@ -291,7 +295,7 @@ struct st_pagecache_block_link
*next_changed, **prev_changed; /* for lists of file dirty/clean blocks */
struct st_pagecache_hash_link
*hash_link; /* backward ptr to referring hash_link */
- PAGECACHE_WQUEUE
+ WQUEUE
wqueue[COND_SIZE]; /* queues on waiting requests for new/old pages */
uint requests; /* number of requests for the block */
byte *buffer; /* buffer for the block page */
@@ -310,8 +314,8 @@ struct st_pagecache_block_link
#ifdef PAGECACHE_DEBUG
/* debug checks */
-my_bool info_check_pin(PAGECACHE_BLOCK_LINK *block,
- enum pagecache_page_pin mode)
+static my_bool info_check_pin(PAGECACHE_BLOCK_LINK *block,
+ enum pagecache_page_pin mode)
{
struct st_my_thread_var *thread= my_thread_var;
DBUG_ENTER("info_check_pin");
@@ -367,9 +371,9 @@ my_bool info_check_pin(PAGECACHE_BLOCK_LINK *block,
1 - Error
*/
-my_bool info_check_lock(PAGECACHE_BLOCK_LINK *block,
- enum pagecache_page_lock lock,
- enum pagecache_page_pin pin)
+static my_bool info_check_lock(PAGECACHE_BLOCK_LINK *block,
+ enum pagecache_page_lock lock,
+ enum pagecache_page_pin pin)
{
struct st_my_thread_var *thread= my_thread_var;
DBUG_ENTER("info_check_lock");
@@ -379,47 +383,47 @@ my_bool info_check_lock(PAGECACHE_BLOCK_LINK *block,
switch(lock)
{
case PAGECACHE_LOCK_LEFT_UNLOCKED:
- DBUG_ASSERT(pin == PAGECACHE_PIN_LEFT_UNPINNED);
- if (info)
+ if (pin != PAGECACHE_PIN_LEFT_UNPINNED ||
+ info)
goto error;
break;
case PAGECACHE_LOCK_LEFT_READLOCKED:
- DBUG_ASSERT(pin == PAGECACHE_PIN_LEFT_UNPINNED ||
- pin == PAGECACHE_PIN_LEFT_PINNED);
- if (info == 0 || info->write_lock)
+ if ((pin != PAGECACHE_PIN_LEFT_UNPINNED &&
+ pin != PAGECACHE_PIN_LEFT_PINNED) ||
+ info == 0 || info->write_lock)
goto error;
break;
case PAGECACHE_LOCK_LEFT_WRITELOCKED:
- DBUG_ASSERT(pin == PAGECACHE_PIN_LEFT_PINNED);
- if (info == 0 || !info->write_lock)
+ if (pin != PAGECACHE_PIN_LEFT_PINNED ||
+ info == 0 || !info->write_lock)
goto error;
break;
case PAGECACHE_LOCK_READ:
- DBUG_ASSERT(pin == PAGECACHE_PIN_LEFT_UNPINNED ||
- pin == PAGECACHE_PIN);
- if (info != 0)
+ if ((pin != PAGECACHE_PIN_LEFT_UNPINNED &&
+ pin != PAGECACHE_PIN) ||
+ info != 0)
goto error;
break;
case PAGECACHE_LOCK_WRITE:
- DBUG_ASSERT(pin == PAGECACHE_PIN);
- if (info != 0)
+ if (pin != PAGECACHE_PIN ||
+ info != 0)
goto error;
break;
case PAGECACHE_LOCK_READ_UNLOCK:
- DBUG_ASSERT(pin == PAGECACHE_PIN_LEFT_UNPINNED ||
- pin == PAGECACHE_UNPIN);
- if (info == 0 || info->write_lock)
+ if ((pin != PAGECACHE_PIN_LEFT_UNPINNED &&
+ pin != PAGECACHE_UNPIN) ||
+ info == 0 || info->write_lock)
goto error;
break;
case PAGECACHE_LOCK_WRITE_UNLOCK:
- DBUG_ASSERT(pin == PAGECACHE_UNPIN);
- if (info == 0 || !info->write_lock)
+ if (pin != PAGECACHE_UNPIN ||
+ info == 0 || !info->write_lock)
goto error;
break;
case PAGECACHE_LOCK_WRITE_TO_READ:
- DBUG_ASSERT(pin == PAGECACHE_PIN_LEFT_PINNED ||
- pin == PAGECACHE_UNPIN);
- if (info == 0 || !info->write_lock)
+ if ((pin != PAGECACHE_PIN_LEFT_PINNED &&
+ pin != PAGECACHE_UNPIN) ||
+ info == 0 || !info->write_lock)
goto error;
break;
}
@@ -439,12 +443,6 @@ error:
#define FLUSH_CACHE 2000 /* sort this many blocks at once */
static int flush_all_key_blocks(PAGECACHE *pagecache);
-#ifdef THREAD
-static void link_into_queue(PAGECACHE_WQUEUE *wqueue,
- struct st_my_thread_var *thread);
-static void unlink_from_queue(PAGECACHE_WQUEUE *wqueue,
- struct st_my_thread_var *thread);
-#endif
static void free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block);
static void test_key_cache(PAGECACHE *pagecache,
const char *where, my_bool lock);
@@ -551,6 +549,7 @@ static int ___pagecache_pthread_cond_signal(pthread_cond_t *cond);
#define pagecache_pthread_cond_signal pthread_cond_signal
#endif /* defined(PAGECACHE_DEBUG) */
+extern my_bool translog_flush(LSN *lsn);
/*
Write page to the disk
@@ -567,18 +566,28 @@ static int ___pagecache_pthread_cond_signal(pthread_cond_t *cond);
0 - OK
!=0 - Error
*/
-uint pagecache_fwrite(PAGECACHE *pagecache,
- PAGECACHE_FILE *filedesc,
- byte *buffer,
- maria_page_no_t pageno,
- enum pagecache_page_type type,
- myf flags)
+
+static uint pagecache_fwrite(PAGECACHE *pagecache,
+ PAGECACHE_FILE *filedesc,
+ byte *buffer,
+ pgcache_page_no_t pageno,
+ enum pagecache_page_type type,
+ myf flags)
{
DBUG_ENTER("pagecache_fwrite");
if (type == PAGECACHE_LSN_PAGE)
{
+ LSN lsn;
DBUG_PRINT("info", ("Log handler call"));
- /* TODO: put here loghandler call */
+ /* TODO: integrate with page format */
+#define PAGE_LSN_OFFSET 0
+ lsn7korr(&lsn, buffer + PAGE_LSN_OFFSET);
+ /*
+ check CONTROL_FILE_IMPOSSIBLE_FILENO &
+ CONTROL_FILE_IMPOSSIBLE_LOG_OFFSET
+ */
+ DBUG_ASSERT(lsn.file_no != 0 && lsn.rec_offset != 0);
+ translog_flush(&lsn);
}
DBUG_RETURN(my_pwrite(filedesc->file, buffer, pagecache->block_size,
(pageno)<<(pagecache->shift), flags));
@@ -628,8 +637,6 @@ static uint next_power(uint value)
division_limit division limit (may be zero)
age_threshold age threshold (may be zero)
block_size size of block (should be power of 2)
- loghandler logfandler pointer to call it in case of
- pages with LSN
RETURN VALUE
number of blocks in the key cache, if successful,
@@ -647,12 +654,11 @@ static uint next_power(uint value)
int init_pagecache(PAGECACHE *pagecache, my_size_t use_mem,
uint division_limit, uint age_threshold,
- uint block_size,
- LOG_HANDLER *loghandler)
+ uint block_size)
{
- int blocks, hash_links, length;
+ uint blocks, hash_links, length;
int error;
- DBUG_ENTER("init_key_cache");
+ DBUG_ENTER("init_pagecache");
DBUG_ASSERT(block_size >= 512);
PAGECACHE_DEBUG_OPEN;
@@ -662,8 +668,6 @@ int init_pagecache(PAGECACHE *pagecache, my_size_t use_mem,
DBUG_RETURN(0);
}
- pagecache->loghandler= loghandler;
-
pagecache->global_cache_w_requests= pagecache->global_cache_r_requests= 0;
pagecache->global_cache_read= pagecache->global_cache_write= 0;
pagecache->disk_blocks= -1;
@@ -692,8 +696,8 @@ int init_pagecache(PAGECACHE *pagecache, my_size_t use_mem,
for ( ; ; )
{
/* Set my_hash_entries to the next bigger 2 power */
- if ((pagecache->hash_entries= next_power((uint)blocks)) <
- ((uint)blocks) * 5/4)
+ if ((pagecache->hash_entries= next_power(blocks)) <
+ (blocks) * 5/4)
pagecache->hash_entries<<= 1;
hash_links= 2 * blocks;
#if defined(MAX_THREADS)
@@ -704,7 +708,7 @@ int init_pagecache(PAGECACHE *pagecache, my_size_t use_mem,
ALIGN_SIZE(hash_links * sizeof(PAGECACHE_HASH_LINK)) +
ALIGN_SIZE(sizeof(PAGECACHE_HASH_LINK*) *
pagecache->hash_entries))) +
- ((ulong) blocks << pagecache->shift) > use_mem)
+ (((ulong) blocks) << pagecache->shift) > use_mem)
blocks--;
/* Allocate memory for cache page buffers */
if ((pagecache->block_mem=
@@ -760,10 +764,10 @@ int init_pagecache(PAGECACHE *pagecache, my_size_t use_mem,
pagecache->warm_blocks= 0;
pagecache->min_warm_blocks= (division_limit ?
blocks * division_limit / 100 + 1 :
- (ulong)blocks);
+ blocks);
pagecache->age_threshold= (age_threshold ?
blocks * age_threshold / 100 :
- (ulong)blocks);
+ blocks);
pagecache->cnt_for_resize_op= 0;
pagecache->resize_in_flush= 0;
@@ -842,7 +846,8 @@ int resize_pagecache(PAGECACHE *pagecache,
{
int blocks;
struct st_my_thread_var *thread;
- PAGECACHE_WQUEUE *wqueue;
+ WQUEUE *wqueue;
+
DBUG_ENTER("resize_pagecache");
if (!pagecache->inited)
@@ -859,7 +864,7 @@ int resize_pagecache(PAGECACHE *pagecache,
#ifdef THREAD
wqueue= &pagecache->resize_queue;
thread= my_thread_var;
- link_into_queue(wqueue, thread);
+ wqueue_link_into_queue(wqueue, thread);
while (wqueue->last_thread->next != thread)
{
@@ -892,12 +897,11 @@ int resize_pagecache(PAGECACHE *pagecache,
end_pagecache(pagecache, 0); /* Don't free mutex */
/* The following will work even if use_mem is 0 */
blocks= init_pagecache(pagecache, pagecache->block_size, use_mem,
- division_limit, age_threshold,
- pagecache->loghandler);
+ division_limit, age_threshold);
finish:
#ifdef THREAD
- unlink_from_queue(wqueue, thread);
+ wqueue_unlink_from_queue(wqueue, thread);
/* Signal for the next resize request to proceeed if any */
if (wqueue->last_thread)
{
@@ -1027,146 +1031,6 @@ void end_pagecache(PAGECACHE *pagecache, my_bool cleanup)
} /* end_pagecache */
-#ifdef THREAD
-/*
- Link a thread into double-linked queue of waiting threads.
-
- SYNOPSIS
- link_into_queue()
- wqueue pointer to the queue structure
- thread pointer to the thread to be added to the queue
-
- RETURN VALUE
- none
-
- NOTES.
- Queue is represented by a circular list of the thread structures
- The list is double-linked of the type (**prev,*next), accessed by
- a pointer to the last element.
-*/
-
-static void link_into_queue(PAGECACHE_WQUEUE *wqueue,
- struct st_my_thread_var *thread)
-{
- struct st_my_thread_var *last;
- if (! (last= wqueue->last_thread))
- {
- /* Queue is empty */
- thread->next= thread;
- thread->prev= &thread->next;
- }
- else
- {
- thread->prev= last->next->prev;
- last->next->prev= &thread->next;
- thread->next= last->next;
- last->next= thread;
- }
- wqueue->last_thread= thread;
-}
-
-/*
- Unlink a thread from double-linked queue of waiting threads
-
- SYNOPSIS
- unlink_from_queue()
- wqueue pointer to the queue structure
- thread pointer to the thread to be removed from the queue
-
- RETURN VALUE
- none
-
- NOTES.
- See NOTES for link_into_queue
-*/
-
-static void unlink_from_queue(PAGECACHE_WQUEUE *wqueue,
- struct st_my_thread_var *thread)
-{
- KEYCACHE_DBUG_PRINT("unlink_from_queue", ("thread %ld", thread->id));
- if (thread->next == thread)
- /* The queue contains only one member */
- wqueue->last_thread= NULL;
- else
- {
- thread->next->prev= thread->prev;
- *thread->prev=thread->next;
- if (wqueue->last_thread == thread)
- wqueue->last_thread= STRUCT_PTR(struct st_my_thread_var, next,
- thread->prev);
- }
- thread->next= NULL;
-}
-
-
-/*
- Add a thread to single-linked queue of waiting threads
-
- SYNOPSIS
- add_to_queue()
- wqueue pointer to the queue structure
- thread pointer to the thread to be added to the queue
-
- RETURN VALUE
- none
-
- NOTES.
- Queue is represented by a circular list of the thread structures
- The list is single-linked of the type (*next), accessed by a pointer
- to the last element.
-*/
-
-static inline void add_to_queue(PAGECACHE_WQUEUE *wqueue,
- struct st_my_thread_var *thread)
-{
- struct st_my_thread_var *last;
- if (! (last= wqueue->last_thread))
- thread->next= thread;
- else
- {
- thread->next= last->next;
- last->next= thread;
- }
- wqueue->last_thread= thread;
-}
-
-
-/*
- Remove all threads from queue signaling them to proceed
-
- SYNOPSIS
- realease_queue()
- wqueue pointer to the queue structure
- thread pointer to the thread to be added to the queue
-
- RETURN VALUE
- none
-
- NOTES.
- See notes for add_to_queue
- When removed from the queue each thread is signaled via condition
- variable thread->suspend.
-*/
-
-static void release_queue(PAGECACHE_WQUEUE *wqueue)
-{
- struct st_my_thread_var *last= wqueue->last_thread;
- struct st_my_thread_var *next= last->next;
- struct st_my_thread_var *thread;
- do
- {
- thread=next;
- KEYCACHE_DBUG_PRINT("release_queue: signal", ("thread %ld", thread->id));
- pagecache_pthread_cond_signal(&thread->suspend);
- next=thread->next;
- thread->next= NULL;
- }
- while (thread != last);
- wqueue->last_thread= NULL;
-}
-#endif
-
-
/*
Unlink a block from the chain of dirty/clean blocks
*/
@@ -1273,6 +1137,7 @@ static void link_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block,
PAGECACHE_BLOCK_LINK *ins;
PAGECACHE_BLOCK_LINK **ptr_ins;
+ BLOCK_INFO(block);
KEYCACHE_DBUG_ASSERT(! (block->hash_link && block->hash_link->requests));
#ifdef THREAD
if (!hot && pagecache->waiting_for_block.last_thread)
@@ -1297,7 +1162,7 @@ static void link_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block,
{
KEYCACHE_DBUG_PRINT("link_block: signal", ("thread %ld", thread->id));
pagecache_pthread_cond_signal(&thread->suspend);
- unlink_from_queue(&pagecache->waiting_for_block, thread);
+ wqueue_unlink_from_queue(&pagecache->waiting_for_block, thread);
block->requests++;
}
}
@@ -1363,6 +1228,8 @@ static void link_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block,
static void unlink_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block)
{
+ DBUG_ENTER("unlink_block");
+ DBUG_PRINT("unlink_block", ("unlink 0x%lx", (ulong)block));
if (block->next_used == block)
/* The list contains only one member */
pagecache->used_last= pagecache->used_ins= NULL;
@@ -1381,14 +1248,15 @@ static void unlink_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block)
KEYCACHE_THREAD_TRACE("unlink_block");
#if defined(PAGECACHE_DEBUG)
+ KEYCACHE_DBUG_ASSERT(pagecache->blocks_available != 0);
pagecache->blocks_available--;
KEYCACHE_DBUG_PRINT("unlink_block",
("unlinked block 0x%lx (%u) status=%x #requests=%u #available=%u",
(ulong)block, BLOCK_NUMBER(pagecache, block), block->status,
block->requests, pagecache->blocks_available));
BLOCK_INFO(block);
- KEYCACHE_DBUG_ASSERT(pagecache->blocks_available >= 0);
#endif
+ DBUG_VOID_RETURN;
}
@@ -1591,7 +1459,7 @@ static void unlink_hash(PAGECACHE *pagecache, PAGECACHE_HASH_LINK *hash_link)
{
KEYCACHE_DBUG_PRINT("unlink_hash: signal", ("thread %ld", thread->id));
pagecache_pthread_cond_signal(&thread->suspend);
- unlink_from_queue(&pagecache->waiting_for_hash_link, thread);
+ wqueue_unlink_from_queue(&pagecache->waiting_for_hash_link, thread);
}
}
while (thread != last_thread);
@@ -1618,7 +1486,7 @@ static void unlink_hash(PAGECACHE *pagecache, PAGECACHE_HASH_LINK *hash_link)
pagecache Pagecache reference
file file ID
pageno page number in the file
- start where to put pointer to found hash link (for
+ start where to put pointer to found hash bucket (for
direct referring it)
RETURN
@@ -1627,7 +1495,7 @@ static void unlink_hash(PAGECACHE *pagecache, PAGECACHE_HASH_LINK *hash_link)
static PAGECACHE_HASH_LINK *get_present_hash_link(PAGECACHE *pagecache,
PAGECACHE_FILE *file,
- maria_page_no_t pageno,
+ pgcache_page_no_t pageno,
PAGECACHE_HASH_LINK ***start)
{
reg1 PAGECACHE_HASH_LINK *hash_link;
@@ -1670,6 +1538,12 @@ static PAGECACHE_HASH_LINK *get_present_hash_link(PAGECACHE *pagecache,
KEYCACHE_DBUG_ASSERT(cnt <= pagecache->hash_links_used);
#endif
}
+ if (hash_link)
+ {
+ /* Register the request for the page */
+ hash_link->requests++;
+ }
+
DBUG_RETURN(hash_link);
}
@@ -1680,7 +1554,7 @@ static PAGECACHE_HASH_LINK *get_present_hash_link(PAGECACHE *pagecache,
static PAGECACHE_HASH_LINK *get_hash_link(PAGECACHE *pagecache,
PAGECACHE_FILE *file,
- maria_page_no_t pageno)
+ pgcache_page_no_t pageno)
{
reg1 PAGECACHE_HASH_LINK *hash_link;
PAGECACHE_HASH_LINK **start;
@@ -1693,7 +1567,7 @@ restart:
/* try to find the page in the cache */
hash_link= get_present_hash_link(pagecache, file, pageno,
&start);
- if (! hash_link)
+ if (!hash_link)
{
/* There is no hash link in the hash table for the pair (file, pageno) */
if (pagecache->free_hash_list)
@@ -1714,7 +1588,7 @@ restart:
page.file= *file;
page.pageno= pageno;
thread->opt_info= (void *) &page;
- link_into_queue(&pagecache->waiting_for_hash_link, thread);
+ wqueue_link_into_queue(&pagecache->waiting_for_hash_link, thread);
KEYCACHE_DBUG_PRINT("get_hash_link: wait",
("suspend thread %ld", thread->id));
pagecache_pthread_cond_wait(&thread->suspend,
@@ -1723,14 +1597,15 @@ restart:
#else
KEYCACHE_DBUG_ASSERT(0);
#endif
+ DBUG_PRINT("info", ("restarting..."));
goto restart;
}
hash_link->file= *file;
hash_link->pageno= pageno;
link_hash(start, hash_link);
+ /* Register the request for the page */
+ hash_link->requests++;
}
- /* Register the request for the page */
- hash_link->requests++;
return hash_link;
}
@@ -1743,7 +1618,7 @@ restart:
SYNOPSIS
- find_key_block()
+ find_block()
pagecache pointer to a page cache data structure
file handler for the file to read page from
pageno number of the page in the file
@@ -1773,29 +1648,29 @@ restart:
waits until first of this operations links any block back.
*/
-static PAGECACHE_BLOCK_LINK *find_key_block(PAGECACHE *pagecache,
- PAGECACHE_FILE *file,
- maria_page_no_t pageno,
- int init_hits_left,
- my_bool wrmode,
- my_bool reg_req,
- int *page_st)
+static PAGECACHE_BLOCK_LINK *find_block(PAGECACHE *pagecache,
+ PAGECACHE_FILE *file,
+ pgcache_page_no_t pageno,
+ int init_hits_left,
+ my_bool wrmode,
+ my_bool reg_req,
+ int *page_st)
{
PAGECACHE_HASH_LINK *hash_link;
PAGECACHE_BLOCK_LINK *block;
int error= 0;
int page_status;
- DBUG_ENTER("find_key_block");
- KEYCACHE_THREAD_TRACE("find_key_block:begin");
+ DBUG_ENTER("find_block");
+ KEYCACHE_THREAD_TRACE("find_block:begin");
DBUG_PRINT("enter", ("fd: %u pos %lu wrmode: %lu",
(uint) file->file, (ulong) pageno, (uint) wrmode));
- KEYCACHE_DBUG_PRINT("find_key_block", ("fd: %u pos: %lu wrmode: %lu",
- (uint) file->file, (ulong) pageno,
- (uint) wrmode));
+ KEYCACHE_DBUG_PRINT("find_block", ("fd: %u pos: %lu wrmode: %lu",
+ (uint) file->file, (ulong) pageno,
+ (uint) wrmode));
#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
DBUG_EXECUTE("check_pagecache",
- test_key_cache(pagecache, "start of find_key_block", 0););
+ test_key_cache(pagecache, "start of find_block", 0););
#endif
restart:
@@ -1840,10 +1715,10 @@ restart:
{
#ifdef THREAD
struct st_my_thread_var *thread= my_thread_var;
- add_to_queue(&block->wqueue[COND_FOR_SAVED], thread);
+ wqueue_add_to_queue(&block->wqueue[COND_FOR_SAVED], thread);
do
{
- KEYCACHE_DBUG_PRINT("find_key_block: wait",
+ KEYCACHE_DBUG_PRINT("find_block: wait",
("suspend thread %ld", thread->id));
pagecache_pthread_cond_wait(&thread->suspend,
&pagecache->cache_lock);
@@ -1871,7 +1746,7 @@ restart:
{
/* This is a request for a page to be removed from cache */
- KEYCACHE_DBUG_PRINT("find_key_block",
+ KEYCACHE_DBUG_PRINT("find_block",
("request for old page in block %u "
"wrmode: %d block->status: %d",
BLOCK_NUMBER(pagecache, block), wrmode,
@@ -1888,17 +1763,17 @@ restart:
else
{
hash_link->requests--;
- KEYCACHE_DBUG_PRINT("find_key_block",
+ KEYCACHE_DBUG_PRINT("find_block",
("request waiting for old page to be saved"));
{
#ifdef THREAD
struct st_my_thread_var *thread= my_thread_var;
/* Put the request into the queue of those waiting for the old page */
- add_to_queue(&block->wqueue[COND_FOR_SAVED], thread);
+ wqueue_add_to_queue(&block->wqueue[COND_FOR_SAVED], thread);
/* Wait until the request can be resubmitted */
do
{
- KEYCACHE_DBUG_PRINT("find_key_block: wait",
+ KEYCACHE_DBUG_PRINT("find_block: wait",
("suspend thread %ld", thread->id));
pagecache_pthread_cond_wait(&thread->suspend,
&pagecache->cache_lock);
@@ -1909,11 +1784,13 @@ restart:
/* No parallel requests in single-threaded case */
#endif
}
- KEYCACHE_DBUG_PRINT("find_key_block",
+ KEYCACHE_DBUG_PRINT("find_block",
("request for old page resubmitted"));
+ DBUG_PRINT("info", ("restarting..."));
/* Resubmit the request */
goto restart;
}
+ block->status&= ~BLOCK_IN_SWITCH;
}
else
{
@@ -1941,7 +1818,8 @@ restart:
pagecache->blocks_used++;
}
pagecache->blocks_unused--;
- DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0);
+ DBUG_ASSERT((block->status & BLOCK_WRLOCK));
+ DBUG_ASSERT(block->pins > 0);
block->status= 0;
#ifndef DBUG_OFF
block->type= PAGECACHE_EMPTY_PAGE;
@@ -1954,7 +1832,9 @@ restart:
block->hash_link= hash_link;
hash_link->block= block;
page_status= PAGE_TO_BE_READ;
- KEYCACHE_DBUG_PRINT("find_key_block",
+ DBUG_PRINT("info", ("page to be read set for page 0x%lx",
+ (ulong)block));
+ KEYCACHE_DBUG_PRINT("find_block",
("got free or never used block %u",
BLOCK_NUMBER(pagecache, block)));
}
@@ -1973,10 +1853,10 @@ restart:
{
struct st_my_thread_var *thread= my_thread_var;
thread->opt_info= (void *) hash_link;
- link_into_queue(&pagecache->waiting_for_block, thread);
+ wqueue_link_into_queue(&pagecache->waiting_for_block, thread);
do
{
- KEYCACHE_DBUG_PRINT("find_key_block: wait",
+ KEYCACHE_DBUG_PRINT("find_block: wait",
("suspend thread %ld", thread->id));
pagecache_pthread_cond_wait(&thread->suspend,
&pagecache->cache_lock);
@@ -2001,19 +1881,18 @@ restart:
reg_requests(pagecache, block,1);
hash_link->block= block;
}
- else
- {
- DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0);
- }
+ DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0);
+ DBUG_ASSERT(block->pins > 0);
if (block->hash_link != hash_link &&
! (block->status & BLOCK_IN_SWITCH) )
{
/* this is a primary request for a new page */
DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0);
+ DBUG_ASSERT(block->pins > 0);
block->status|= (BLOCK_IN_SWITCH | BLOCK_WRLOCK);
- KEYCACHE_DBUG_PRINT("find_key_block",
+ KEYCACHE_DBUG_PRINT("find_block",
("got block %u for new page",
BLOCK_NUMBER(pagecache, block)));
@@ -2021,7 +1900,7 @@ restart:
{
/* The block contains a dirty page - push it out of the cache */
- KEYCACHE_DBUG_PRINT("find_key_block", ("block is dirty"));
+ KEYCACHE_DBUG_PRINT("find_block", ("block is dirty"));
pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
/*
@@ -2054,7 +1933,7 @@ restart:
unlink_hash(pagecache, block->hash_link);
/* All pending requests for this page must be resubmitted */
if (block->wqueue[COND_FOR_SAVED].last_thread)
- release_queue(&block->wqueue[COND_FOR_SAVED]);
+ wqueue_release_queue(&block->wqueue[COND_FOR_SAVED]);
}
link_to_file_list(pagecache, block, file,
(my_bool)(block->hash_link ? 1 : 0));
@@ -2065,6 +1944,8 @@ restart:
#endif
block->hash_link= hash_link;
page_status= PAGE_TO_BE_READ;
+ DBUG_PRINT("info", ("page to be read set for page 0x%lx",
+ (ulong)block));
KEYCACHE_DBUG_ASSERT(block->hash_link->block == block);
KEYCACHE_DBUG_ASSERT(hash_link->block->hash_link == hash_link);
@@ -2072,7 +1953,7 @@ restart:
else
{
/* This is for secondary requests for a new page only */
- KEYCACHE_DBUG_PRINT("find_key_block",
+ KEYCACHE_DBUG_PRINT("find_block",
("block->hash_link: %p hash_link: %p "
"block->status: %u", block->hash_link,
hash_link, block->status ));
@@ -2087,7 +1968,7 @@ restart:
{
if (reg_req)
reg_requests(pagecache, block, 1);
- KEYCACHE_DBUG_PRINT("find_key_block",
+ KEYCACHE_DBUG_PRINT("find_block",
("block->hash_link: %p hash_link: %p "
"block->status: %u", block->hash_link,
hash_link, block->status ));
@@ -2098,12 +1979,12 @@ restart:
}
KEYCACHE_DBUG_ASSERT(page_status != -1);
- *page_st=page_status;
+ *page_st= page_status;
DBUG_PRINT("info",
("block: 0x%lx fd: %u pos %lu block->status %u page_status %lu",
(ulong) block, (uint) file->file,
(ulong) pageno, block->status, (uint) page_status));
- KEYCACHE_DBUG_PRINT("find_key_block",
+ KEYCACHE_DBUG_PRINT("find_block",
("block: 0x%lx fd: %u pos %lu block->status %u page_status %lu",
(ulong) block,
(uint) file->file, (ulong) pageno, block->status,
@@ -2111,16 +1992,16 @@ restart:
#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
DBUG_EXECUTE("check_pagecache",
- test_key_cache(pagecache, "end of find_key_block",0););
+ test_key_cache(pagecache, "end of find_block",0););
#endif
- KEYCACHE_THREAD_TRACE("find_key_block:end");
+ KEYCACHE_THREAD_TRACE("find_block:end");
DBUG_RETURN(block);
}
-void pagecache_add_pin(PAGECACHE_BLOCK_LINK *block)
+static void add_pin(PAGECACHE_BLOCK_LINK *block)
{
- DBUG_ENTER("pagecache_add_pin");
+ DBUG_ENTER("add_pin");
DBUG_PRINT("enter", ("block 0x%lx pins: %u",
(ulong) block,
block->pins));
@@ -2137,9 +2018,9 @@ void pagecache_add_pin(PAGECACHE_BLOCK_LINK *block)
DBUG_VOID_RETURN;
}
-void pagecache_remove_pin(PAGECACHE_BLOCK_LINK *block)
+static void remove_pin(PAGECACHE_BLOCK_LINK *block)
{
- DBUG_ENTER("pagecache_remove_pin");
+ DBUG_ENTER("remove_pin");
DBUG_PRINT("enter", ("block 0x%lx pins: %u",
(ulong) block,
block->pins));
@@ -2157,7 +2038,7 @@ void pagecache_remove_pin(PAGECACHE_BLOCK_LINK *block)
DBUG_VOID_RETURN;
}
#ifdef PAGECACHE_DEBUG
-void pagecache_add_lock(PAGECACHE_BLOCK_LINK *block, my_bool wl)
+static void info_add_lock(PAGECACHE_BLOCK_LINK *block, my_bool wl)
{
PAGECACHE_LOCK_INFO *info=
(PAGECACHE_LOCK_INFO *)my_malloc(sizeof(PAGECACHE_LOCK_INFO), MYF(0));
@@ -2166,7 +2047,7 @@ void pagecache_add_lock(PAGECACHE_BLOCK_LINK *block, my_bool wl)
info_link((PAGECACHE_PIN_INFO **)&block->lock_list,
(PAGECACHE_PIN_INFO *)info);
}
-void pagecache_remove_lock(PAGECACHE_BLOCK_LINK *block)
+static void info_remove_lock(PAGECACHE_BLOCK_LINK *block)
{
PAGECACHE_LOCK_INFO *info=
(PAGECACHE_LOCK_INFO *)info_find((PAGECACHE_PIN_INFO *)block->lock_list,
@@ -2175,7 +2056,7 @@ void pagecache_remove_lock(PAGECACHE_BLOCK_LINK *block)
info_unlink((PAGECACHE_PIN_INFO *)info);
my_free((gptr)info, MYF(0));
}
-void pagecache_change_lock(PAGECACHE_BLOCK_LINK *block, my_bool wl)
+static void info_change_lock(PAGECACHE_BLOCK_LINK *block, my_bool wl)
{
PAGECACHE_LOCK_INFO *info=
(PAGECACHE_LOCK_INFO *)info_find((PAGECACHE_PIN_INFO *)block->lock_list,
@@ -2184,40 +2065,47 @@ void pagecache_change_lock(PAGECACHE_BLOCK_LINK *block, my_bool wl)
info->write_lock= wl;
}
#else
-#define pagecache_add_lock(B,W)
-#define pagecache_remove_lock(B)
-#define pagecache_change_lock(B,W)
+#define info_add_lock(B,W)
+#define info_remove_lock(B)
+#define info_change_lock(B,W)
#endif
/*
- Put on the block "update" type lock
+ Put on the block write lock
SYNOPSIS
- pagecache_lock_block()
+ get_wrlock()
pagecache pointer to a page cache data structure
block the block to work with
RETURN
0 - OK
- 1 - Try to lock the block failed
+ 1 - Can't lock this block, need retry
*/
-my_bool pagecache_lock_block(PAGECACHE *pagecache,
- PAGECACHE_BLOCK_LINK *block)
-{
- DBUG_ENTER("pagecache_lock_block");
+static my_bool get_wrlock(PAGECACHE *pagecache,
+ PAGECACHE_BLOCK_LINK *block)
+{
+ PAGECACHE_FILE file= block->hash_link->file;
+ pgcache_page_no_t pageno= block->hash_link->pageno;
+ DBUG_ENTER("get_wrlock");
+ DBUG_PRINT("info", ("the block 0x%lx "
+ "files %d(%d) pages %d(%d)",
+ (ulong)block,
+ file.file, block->hash_link->file.file,
+ pageno, block->hash_link->pageno));
BLOCK_INFO(block);
while (block->status & BLOCK_WRLOCK)
{
- DBUG_PRINT("info", ("fail to lock, waiting..."));
+ DBUG_PRINT("info", ("fail to lock, waiting... 0x%lx", (ulong)block));
/* Lock failed we will wait */
#ifdef THREAD
struct st_my_thread_var *thread= my_thread_var;
- add_to_queue(&block->wqueue[COND_FOR_WRLOCK], thread);
+ wqueue_add_to_queue(&block->wqueue[COND_FOR_WRLOCK], thread);
dec_counter_for_resize_op(pagecache);
do
{
- KEYCACHE_DBUG_PRINT("pagecache_lock_block: wait",
+ KEYCACHE_DBUG_PRINT("get_wrlock: wait",
("suspend thread %ld", thread->id));
pagecache_pthread_cond_wait(&thread->suspend,
&pagecache->cache_lock);
@@ -2227,35 +2115,61 @@ my_bool pagecache_lock_block(PAGECACHE *pagecache,
DBUG_ASSERT(0);
#endif
BLOCK_INFO(block);
- DBUG_RETURN(1);
+ if ((block->status & (BLOCK_REASSIGNED | BLOCK_IN_SWITCH)) ||
+ file.file != block->hash_link->file.file ||
+ pageno != block->hash_link->pageno)
+ {
+ DBUG_PRINT("info", ("the block 0x%lx changed => need retry"
+ "status %x files %d != %d or pages %d !=%d",
+ (ulong)block, block->status,
+ file.file, block->hash_link->file.file,
+ pageno, block->hash_link->pageno));
+ DBUG_RETURN(1);
+ }
}
- /* we are doing it by global cache mutex protectio, so it is OK */
+ DBUG_ASSERT(block->pins == 0);
+ /* we are doing it by global cache mutex protection, so it is OK */
block->status|= BLOCK_WRLOCK;
DBUG_PRINT("info", ("WR lock set, block 0x%lx", (ulong)block));
DBUG_RETURN(0);
}
-void pagecache_unlock_block(PAGECACHE_BLOCK_LINK *block)
+
+/*
+ Remove write lock from the block
+
+ SYNOPSIS
+ release_wrlock()
+ pagecache pointer to a page cache data structure
+ block the block to work with
+
+ RETURN
+ 0 - OK
+*/
+
+static void release_wrlock(PAGECACHE_BLOCK_LINK *block)
{
- DBUG_ENTER("pagecache_unlock_block");
+ DBUG_ENTER("release_wrlock");
BLOCK_INFO(block);
DBUG_ASSERT(block->status & BLOCK_WRLOCK);
+ DBUG_ASSERT(block->pins > 0);
block->status&= ~BLOCK_WRLOCK;
DBUG_PRINT("info", ("WR lock reset, block 0x%lx", (ulong)block));
#ifdef THREAD
/* release all threads waiting for write lock */
if (block->wqueue[COND_FOR_WRLOCK].last_thread)
- release_queue(&block->wqueue[COND_FOR_WRLOCK]);
+ wqueue_release_queue(&block->wqueue[COND_FOR_WRLOCK]);
#endif
BLOCK_INFO(block);
DBUG_VOID_RETURN;
}
+
/*
- Try to lock/uplock and pin/unpin the block
+ Try to lock/unlock and pin/unpin the block
SYNOPSIS
- pagecache_make_lock_and_pin()
+ make_lock_and_pin()
pagecache pointer to a page cache data structure
block the block to work with
lock lock change mode
@@ -2266,12 +2180,12 @@ void pagecache_unlock_block(PAGECACHE_BLOCK_LINK *block)
1 - Try to lock the block failed
*/
-my_bool pagecache_make_lock_and_pin(PAGECACHE *pagecache,
- PAGECACHE_BLOCK_LINK *block,
- enum pagecache_page_lock lock,
- enum pagecache_page_pin pin)
+static my_bool make_lock_and_pin(PAGECACHE *pagecache,
+ PAGECACHE_BLOCK_LINK *block,
+ enum pagecache_page_lock lock,
+ enum pagecache_page_pin pin)
{
- DBUG_ENTER("pagecache_make_lock_and_pin");
+ DBUG_ENTER("make_lock_and_pin");
DBUG_PRINT("enter", ("block: 0x%lx (%u), wrlock: %c pins: %u, lock %s, pin: %s",
(ulong)block, BLOCK_NUMBER(pagecache, block),
((block->status & BLOCK_WRLOCK)?'Y':'N'),
@@ -2287,53 +2201,47 @@ my_bool pagecache_make_lock_and_pin(PAGECACHE *pagecache,
{
case PAGECACHE_LOCK_WRITE: /* free -> write */
/* Writelock and pin the buffer */
- if (pagecache_lock_block(pagecache, block))
+ if (get_wrlock(pagecache, block))
{
- DBUG_PRINT("info", ("restart"));
- /* in case of fail pagecache_lock_block unlock cache */
- DBUG_RETURN(1);
+ /* can't lock => need retry */
+ goto retry;
}
- /* The cache is locked so nothing afraid off */
- pagecache_add_pin(block);
- pagecache_add_lock(block, 1);
+
+ /* The cache is locked so nothing afraid of */
+ add_pin(block);
+ info_add_lock(block, 1);
break;
case PAGECACHE_LOCK_WRITE_TO_READ: /* write -> read */
case PAGECACHE_LOCK_WRITE_UNLOCK: /* write -> free */
/*
- Removes writelog and puts read lock (which is nothing in our
+ Removes write lock and puts read lock (which is nothing in our
implementation)
*/
- pagecache_unlock_block(block);
+ release_wrlock(block);
case PAGECACHE_LOCK_READ_UNLOCK: /* read -> free */
case PAGECACHE_LOCK_LEFT_READLOCKED: /* read -> read */
-#ifndef DBUG_OFF
if (pin == PAGECACHE_UNPIN)
{
- pagecache_remove_pin(block);
+ remove_pin(block);
}
-#endif
-#ifdef PAGECACHE_DEBUG
if (lock == PAGECACHE_LOCK_WRITE_TO_READ)
{
- pagecache_change_lock(block, 0);
+ info_change_lock(block, 0);
}
else if (lock == PAGECACHE_LOCK_WRITE_UNLOCK ||
lock == PAGECACHE_LOCK_READ_UNLOCK)
{
- pagecache_remove_lock(block);
+ info_remove_lock(block);
}
-#endif
break;
case PAGECACHE_LOCK_READ: /* free -> read */
-#ifndef DBUG_OFF
if (pin == PAGECACHE_PIN)
{
/* The cache is locked so nothing afraid off */
- pagecache_add_pin(block);
+ add_pin(block);
}
- pagecache_add_lock(block, 0);
+ info_add_lock(block, 0);
break;
-#endif
case PAGECACHE_LOCK_LEFT_UNLOCKED: /* free -> free */
case PAGECACHE_LOCK_LEFT_WRITELOCKED: /* write -> write */
break; /* do nothing */
@@ -2343,6 +2251,16 @@ my_bool pagecache_make_lock_and_pin(PAGECACHE *pagecache,
BLOCK_INFO(block);
DBUG_RETURN(0);
+retry:
+ DBUG_PRINT("INFO", ("Retry block 0x%lx", (ulong)block));
+ BLOCK_INFO(block);
+ DBUG_ASSERT(block->hash_link->requests != 0);
+ block->hash_link->requests--;
+ DBUG_ASSERT(block->requests != 0);
+ unreg_request(pagecache, block, 1);
+ BLOCK_INFO(block);
+ DBUG_RETURN(1);
+
}
@@ -2355,6 +2273,8 @@ my_bool pagecache_make_lock_and_pin(PAGECACHE *pagecache,
pagecache pointer to a page cache data structure
block block to which buffer the data is to be read
primary <-> the current thread will read the data
+ validator validator of read from the disk data
+ validator_data pointer to the data need by the validator
RETURN VALUE
None
@@ -2368,13 +2288,15 @@ my_bool pagecache_make_lock_and_pin(PAGECACHE *pagecache,
static void read_block(PAGECACHE *pagecache,
PAGECACHE_BLOCK_LINK *block,
- my_bool primary)
+ my_bool primary,
+ pagecache_disk_read_validator validator,
+ gptr validator_data)
{
uint got_length;
/* On entry cache_lock is locked */
- KEYCACHE_THREAD_TRACE("read_block");
+ DBUG_ENTER("read_block");
if (primary)
{
/*
@@ -2382,8 +2304,8 @@ static void read_block(PAGECACHE *pagecache,
that submitted primary requests
*/
- KEYCACHE_DBUG_PRINT("read_block",
- ("page to be read by primary request"));
+ DBUG_PRINT("read_block",
+ ("page to be read by primary request"));
/* Page is not in buffer yet, is to be read from disk */
pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
@@ -2400,11 +2322,15 @@ static void read_block(PAGECACHE *pagecache,
else
block->status= (BLOCK_READ | (block->status & BLOCK_WRLOCK));
- KEYCACHE_DBUG_PRINT("read_block",
- ("primary request: new page in cache"));
+ if (validator != NULL &&
+ (*validator)(block->buffer, validator_data))
+ block->status|= BLOCK_ERROR;
+
+ DBUG_PRINT("read_block",
+ ("primary request: new page in cache"));
/* Signal that all pending requests for this page now can be processed */
if (block->wqueue[COND_FOR_REQUESTED].last_thread)
- release_queue(&block->wqueue[COND_FOR_REQUESTED]);
+ wqueue_release_queue(&block->wqueue[COND_FOR_REQUESTED]);
}
else
{
@@ -2412,17 +2338,17 @@ static void read_block(PAGECACHE *pagecache,
This code is executed only by threads
that submitted secondary requests
*/
- KEYCACHE_DBUG_PRINT("read_block",
- ("secondary request waiting for new page to be read"));
+ DBUG_PRINT("read_block",
+ ("secondary request waiting for new page to be read"));
{
#ifdef THREAD
struct st_my_thread_var *thread= my_thread_var;
/* Put the request into a queue and wait until it can be processed */
- add_to_queue(&block->wqueue[COND_FOR_REQUESTED], thread);
+ wqueue_add_to_queue(&block->wqueue[COND_FOR_REQUESTED], thread);
do
{
- KEYCACHE_DBUG_PRINT("read_block: wait",
- ("suspend thread %ld", thread->id));
+ DBUG_PRINT("read_block: wait",
+ ("suspend thread %ld", thread->id));
pagecache_pthread_cond_wait(&thread->suspend,
&pagecache->cache_lock);
}
@@ -2432,9 +2358,10 @@ static void read_block(PAGECACHE *pagecache,
/* No parallel requests in single-threaded case */
#endif
}
- KEYCACHE_DBUG_PRINT("read_block",
- ("secondary request: new page in cache"));
+ DBUG_PRINT("read_block",
+ ("secondary request: new page in cache"));
}
+ DBUG_VOID_RETURN;
}
@@ -2454,11 +2381,11 @@ static void read_block(PAGECACHE *pagecache,
void pagecache_unlock_page(PAGECACHE *pagecache,
PAGECACHE_FILE *file,
- maria_page_no_t pageno,
+ pgcache_page_no_t pageno,
enum pagecache_page_lock lock,
enum pagecache_page_pin pin,
my_bool stamp_this_page,
- LSN first_REDO_LSN_for_page)
+ LSN_PTR first_REDO_LSN_for_page)
{
PAGECACHE_BLOCK_LINK *block;
int page_st;
@@ -2471,24 +2398,6 @@ void pagecache_unlock_page(PAGECACHE *pagecache,
DBUG_ASSERT(pin != PAGECACHE_PIN &&
lock != PAGECACHE_LOCK_READ &&
lock != PAGECACHE_LOCK_WRITE);
- if (pin == PAGECACHE_PIN_LEFT_UNPINNED &&
- lock == PAGECACHE_LOCK_READ_UNLOCK)
- {
-#ifndef DBUG_OFF
- if (
-#endif
- /* block do not need here so we do not provide it */
- pagecache_make_lock_and_pin(pagecache, 0, lock, pin)
-#ifndef DBUG_OFF
- )
- {
- DBUG_ASSERT(0); /* should not happend */
- }
-#else
- ;
-#endif
- DBUG_VOID_RETURN;
- }
pagecache_pthread_mutex_lock(&pagecache->cache_lock);
/*
@@ -2498,7 +2407,7 @@ void pagecache_unlock_page(PAGECACHE *pagecache,
DBUG_ASSERT(pagecache->can_be_used);
inc_counter_for_resize_op(pagecache);
- block= find_key_block(pagecache, file, pageno, 0, 0, 0, &page_st);
+ block= find_block(pagecache, file, pageno, 0, 0, 0, &page_st);
BLOCK_INFO(block);
DBUG_ASSERT(block != 0 && page_st == PAGE_READ);
if (stamp_this_page)
@@ -2511,7 +2420,7 @@ void pagecache_unlock_page(PAGECACHE *pagecache,
#ifndef DBUG_OFF
if (
#endif
- pagecache_make_lock_and_pin(pagecache, block, lock, pin)
+ make_lock_and_pin(pagecache, block, lock, pin)
#ifndef DBUG_OFF
)
{
@@ -2549,7 +2458,7 @@ void pagecache_unlock_page(PAGECACHE *pagecache,
void pagecache_unpin_page(PAGECACHE *pagecache,
PAGECACHE_FILE *file,
- maria_page_no_t pageno)
+ pgcache_page_no_t pageno)
{
PAGECACHE_BLOCK_LINK *block;
int page_st;
@@ -2565,7 +2474,7 @@ void pagecache_unpin_page(PAGECACHE *pagecache,
DBUG_ASSERT(pagecache->can_be_used);
inc_counter_for_resize_op(pagecache);
- block= find_key_block(pagecache, file, pageno, 0, 0, 0, &page_st);
+ block= find_block(pagecache, file, pageno, 0, 0, 0, &page_st);
DBUG_ASSERT(block != 0 && page_st == PAGE_READ);
#ifndef DBUG_OFF
@@ -2576,9 +2485,9 @@ void pagecache_unpin_page(PAGECACHE *pagecache,
a) we can't pin without any lock
b) we can't unpin keeping write lock
*/
- pagecache_make_lock_and_pin(pagecache, block,
- PAGECACHE_LOCK_LEFT_READLOCKED,
- PAGECACHE_UNPIN)
+ make_lock_and_pin(pagecache, block,
+ PAGECACHE_LOCK_LEFT_READLOCKED,
+ PAGECACHE_UNPIN)
#ifndef DBUG_OFF
)
{
@@ -2622,7 +2531,7 @@ void pagecache_unlock(PAGECACHE *pagecache,
enum pagecache_page_lock lock,
enum pagecache_page_pin pin,
my_bool stamp_this_page,
- LSN first_REDO_LSN_for_page)
+ LSN_PTR first_REDO_LSN_for_page)
{
PAGECACHE_BLOCK_LINK *block= (PAGECACHE_BLOCK_LINK *)link;
DBUG_ENTER("pagecache_unlock");
@@ -2643,7 +2552,7 @@ void pagecache_unlock(PAGECACHE *pagecache,
if (
#endif
/* block do not need here so we do not provide it */
- pagecache_make_lock_and_pin(pagecache, 0, lock, pin)
+ make_lock_and_pin(pagecache, 0, lock, pin)
#ifndef DBUG_OFF
)
{
@@ -2673,7 +2582,7 @@ void pagecache_unlock(PAGECACHE *pagecache,
#ifndef DBUG_OFF
if (
#endif
- pagecache_make_lock_and_pin(pagecache, block, lock, pin)
+ make_lock_and_pin(pagecache, block, lock, pin)
#ifndef DBUG_OFF
)
{
@@ -2736,9 +2645,9 @@ void pagecache_unpin(PAGECACHE *pagecache,
a) we can't pin without any lock
b) we can't unpin keeping write lock
*/
- pagecache_make_lock_and_pin(pagecache, block,
- PAGECACHE_LOCK_LEFT_READLOCKED,
- PAGECACHE_UNPIN)
+ make_lock_and_pin(pagecache, block,
+ PAGECACHE_LOCK_LEFT_READLOCKED,
+ PAGECACHE_UNPIN)
#ifndef DBUG_OFF
)
{
@@ -2767,7 +2676,7 @@ void pagecache_unpin(PAGECACHE *pagecache,
Read a block of data from a cached file into a buffer;
SYNOPSIS
- pagecache_read()
+ pagecache_valid_read()
pagecache pointer to a page cache data structure
file handler for the file for the block of data to be read
pageno number of the block of data in the file
@@ -2776,16 +2685,12 @@ void pagecache_unpin(PAGECACHE *pagecache,
type type of the page
lock lock change
link link to the page if we pin it
+ validator validator of read from the disk data
+ validator_data pointer to the data need by the validator
RETURN VALUE
Returns address from where the data is placed if sucessful, 0 - otherwise.
- NOTES.
-
- The function ensures that a block of data of size length from file
- positioned at pageno is in the buffers for some key cache blocks.
- Then the function copies the data into the buffer buff.
-
Pin will be choosen according to lock parameter (see lock_to_pin)
*/
static enum pagecache_page_pin lock_to_pin[]=
@@ -2800,19 +2705,21 @@ static enum pagecache_page_pin lock_to_pin[]=
PAGECACHE_UNPIN /*PAGECACHE_LOCK_WRITE_TO_READ*/
};
-byte *pagecache_read(PAGECACHE *pagecache,
- PAGECACHE_FILE *file,
- maria_page_no_t pageno,
- uint level,
- byte *buff,
- enum pagecache_page_type type,
- enum pagecache_page_lock lock,
- PAGECACHE_PAGE_LINK *link)
+byte *pagecache_valid_read(PAGECACHE *pagecache,
+ PAGECACHE_FILE *file,
+ pgcache_page_no_t pageno,
+ uint level,
+ byte *buff,
+ enum pagecache_page_type type,
+ enum pagecache_page_lock lock,
+ PAGECACHE_PAGE_LINK *link,
+ pagecache_disk_read_validator validator,
+ gptr validator_data)
{
int error= 0;
enum pagecache_page_pin pin= lock_to_pin[lock];
PAGECACHE_PAGE_LINK fake_link;
- DBUG_ENTER("page_cache_read");
+ DBUG_ENTER("pagecache_valid_read");
DBUG_PRINT("enter", ("fd: %u page: %lu level: %u t:%s l%s p%s",
(uint) file->file, (ulong) pageno, level,
page_cache_page_type_str[type],
@@ -2829,7 +2736,7 @@ restart:
if (pagecache->can_be_used)
{
/* Key cache is used */
- reg1 PAGECACHE_BLOCK_LINK *block;
+ PAGECACHE_BLOCK_LINK *block;
uint status;
int page_st;
@@ -2842,29 +2749,33 @@ restart:
inc_counter_for_resize_op(pagecache);
pagecache->global_cache_r_requests++;
- block= find_key_block(pagecache, file, pageno, level,
- ((lock == PAGECACHE_LOCK_WRITE) ? 1 : 0),
- (((pin == PAGECACHE_PIN_LEFT_PINNED) ||
- (pin == PAGECACHE_UNPIN)) ? 0 : 1),
- &page_st);
+ block= find_block(pagecache, file, pageno, level,
+ test(lock == PAGECACHE_LOCK_WRITE),
+ test((pin == PAGECACHE_PIN_LEFT_PINNED) ||
+ (pin == PAGECACHE_UNPIN)),
+ &page_st);
DBUG_ASSERT(block->type == PAGECACHE_EMPTY_PAGE ||
block->type == type);
block->type= type;
- if (pagecache_make_lock_and_pin(pagecache, block, lock, pin))
+ if (block->status != BLOCK_ERROR && page_st != PAGE_READ)
+ {
+ DBUG_PRINT("info", ("read block 0x%lx", (ulong)block));
+ /* The requested page is to be read into the block buffer */
+ read_block(pagecache, block,
+ (my_bool)(page_st == PAGE_TO_BE_READ),
+ validator, validator_data);
+ DBUG_PRINT("info", ("read is done"));
+ }
+ if (make_lock_and_pin(pagecache, block, lock, pin))
{
/*
- We failed to write lock the block, cache is unlocked, and last write
- lock is released, we will try to get the block again.
+ We failed to write lock the block, cache is unlocked,
+ we will try to get the block again.
*/
pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+ DBUG_PRINT("info", ("restarting..."));
goto restart;
}
- if (block->status != BLOCK_ERROR && page_st != PAGE_READ)
- {
- /* The requested page is to be read into the block buffer */
- read_block(pagecache, block,
- (my_bool)(page_st == PAGE_TO_BE_READ));
- }
if (! ((status= block->status) & BLOCK_ERROR))
{
@@ -2933,7 +2844,7 @@ no_key_cache: /* Key cache is not used */
*/
my_bool pagecache_delete_page(PAGECACHE *pagecache,
PAGECACHE_FILE *file,
- maria_page_no_t pageno,
+ pgcache_page_no_t pageno,
enum pagecache_page_lock lock,
my_bool flush)
{
@@ -2969,13 +2880,14 @@ restart:
}
block= link->block;
DBUG_ASSERT(block != 0);
- if (pagecache_make_lock_and_pin(pagecache, block, lock, pin))
+ if (make_lock_and_pin(pagecache, block, lock, pin))
{
/*
We failed to writelock the block, cache is unlocked, and last write
lock is released, we will try to get the block again.
*/
pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+ DBUG_PRINT("info", ("restarting..."));
goto restart;
}
@@ -2983,7 +2895,7 @@ restart:
{
/* The block contains a dirty page - push it out of the cache */
- KEYCACHE_DBUG_PRINT("find_key_block", ("block is dirty"));
+ KEYCACHE_DBUG_PRINT("find_block", ("block is dirty"));
pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
/*
@@ -3015,9 +2927,10 @@ restart:
}
/* Cache is locked, so we can relese page before freeing it */
- pagecache_make_lock_and_pin(pagecache, block,
- PAGECACHE_LOCK_WRITE_UNLOCK,
- PAGECACHE_UNPIN);
+ make_lock_and_pin(pagecache, block,
+ PAGECACHE_LOCK_WRITE_UNLOCK,
+ PAGECACHE_UNPIN);
+ link->requests--;
if (pin == PAGECACHE_PIN_LEFT_PINNED)
unreg_request(pagecache, block, 1);
free_block(pagecache, block);
@@ -3053,11 +2966,12 @@ end:
0 if a success, 1 - otherwise.
*/
+/* description of how to change lock before and after write */
struct write_lock_change
{
- int need_lock_change;
- enum pagecache_page_lock new_lock;
- enum pagecache_page_lock unlock_lock;
+ int need_lock_change; /* need changing of lock at the end of write */
+ enum pagecache_page_lock new_lock; /* lock at the beginning */
+ enum pagecache_page_lock unlock_lock; /* lock at the end */
};
static struct write_lock_change write_lock_change_table[]=
@@ -3084,10 +2998,11 @@ static struct write_lock_change write_lock_change_table[]=
PAGECACHE_LOCK_WRITE_TO_READ}/*PAGECACHE_LOCK_WRITE_TO_READ*/
};
+/* description of how to change pin before and after write */
struct write_pin_change
{
- enum pagecache_page_pin new_pin;
- enum pagecache_page_pin unlock_pin;
+ enum pagecache_page_pin new_pin; /* pin status at the beginning */
+ enum pagecache_page_pin unlock_pin; /* pin status at the end */
};
static struct write_pin_change write_pin_change_table[]=
@@ -3104,7 +3019,7 @@ static struct write_pin_change write_pin_change_table[]=
my_bool pagecache_write(PAGECACHE *pagecache,
PAGECACHE_FILE *file,
- maria_page_no_t pageno,
+ pgcache_page_no_t pageno,
uint level,
byte *buff,
enum pagecache_page_type type,
@@ -3113,7 +3028,7 @@ my_bool pagecache_write(PAGECACHE *pagecache,
enum pagecache_write_mode write_mode,
PAGECACHE_PAGE_LINK *link)
{
- reg1 PAGECACHE_BLOCK_LINK *block;
+ reg1 PAGECACHE_BLOCK_LINK *block= NULL;
PAGECACHE_PAGE_LINK fake_link;
int error= 0;
int need_lock_change= write_lock_change_table[lock].need_lock_change;
@@ -3133,7 +3048,7 @@ my_bool pagecache_write(PAGECACHE *pagecache,
if (write_mode == PAGECACHE_WRITE_NOW)
{
- /* we allow direct write if wwe do not use long term lockings */
+ /* we allow direct write if we do not use long term lockings */
DBUG_ASSERT(lock == PAGECACHE_LOCK_LEFT_UNLOCKED);
/* Force writing from buff into disk */
pagecache->global_cache_write++;
@@ -3167,10 +3082,10 @@ restart:
lock != PAGECACHE_LOCK_LEFT_WRITELOCKED &&
lock != PAGECACHE_LOCK_WRITE_UNLOCK &&
lock != PAGECACHE_LOCK_WRITE_TO_READ);
- block= find_key_block(pagecache, file, pageno, level,
- (need_wrlock ? 1 : 0),
- (need_wrlock ? 1 : 0),
- &page_st);
+ block= find_block(pagecache, file, pageno, level,
+ (need_wrlock ? 1 : 0),
+ (need_wrlock ? 1 : 0),
+ &page_st);
}
if (!block)
{
@@ -3186,24 +3101,25 @@ restart:
block->type == type);
block->type= type;
- if (pagecache_make_lock_and_pin(pagecache, block,
- write_lock_change_table[lock].new_lock,
- (need_lock_change ?
- write_pin_change_table[pin].new_pin :
- pin)))
+ if (make_lock_and_pin(pagecache, block,
+ write_lock_change_table[lock].new_lock,
+ (need_lock_change ?
+ write_pin_change_table[pin].new_pin :
+ pin)))
{
/*
We failed to writelock the block, cache is unlocked, and last write
lock is released, we will try to get the block again.
*/
pagecache_pthread_mutex_unlock(&pagecache->cache_lock);
+ DBUG_PRINT("info", ("restarting..."));
goto restart;
}
if (write_mode == PAGECACHE_WRITE_DONE)
{
- if (block->status != BLOCK_ERROR && page_st != PAGE_READ)
+ if ((block->status & BLOCK_ERROR) && page_st != PAGE_READ)
{
/* Copy data from buff */
bmove512(block->buffer, buff, pagecache->block_size);
@@ -3212,7 +3128,7 @@ restart:
("primary request: new page in cache"));
/* Signal that all pending requests for this now can be processed. */
if (block->wqueue[COND_FOR_REQUESTED].last_thread)
- release_queue(&block->wqueue[COND_FOR_REQUESTED]);
+ wqueue_release_queue(&block->wqueue[COND_FOR_REQUESTED]);
}
}
else
@@ -3220,7 +3136,8 @@ restart:
if (write_mode == PAGECACHE_WRITE_NOW)
{
/* buff has been written to disk at start */
- if (block->status & BLOCK_CHANGED)
+ if ((block->status & BLOCK_CHANGED) &&
+ !(block->status & BLOCK_ERROR))
link_to_file_list(pagecache, block, &block->hash_link->file, 1);
}
else
@@ -3231,8 +3148,8 @@ restart:
if (! (block->status & BLOCK_ERROR))
{
bmove512(block->buffer, buff, pagecache->block_size);
+ block->status|= BLOCK_READ;
}
- block->status|= BLOCK_READ;
}
@@ -3242,9 +3159,9 @@ restart:
int rc=
#endif
#warning we are doing an unlock here, so need to give the page its rec_lsn!
- pagecache_make_lock_and_pin(pagecache, block,
- write_lock_change_table[lock].unlock_lock,
- write_pin_change_table[pin].unlock_pin);
+ make_lock_and_pin(pagecache, block,
+ write_lock_change_table[lock].unlock_lock,
+ write_pin_change_table[pin].unlock_pin);
#ifndef DBUG_OFF
DBUG_ASSERT(rc == 0);
#endif
@@ -3255,10 +3172,7 @@ restart:
block->hash_link->requests--;
if (pin != PAGECACHE_PIN_LEFT_PINNED && pin != PAGECACHE_PIN)
{
- if (write_mode != PAGECACHE_WRITE_DONE)
- {
- unreg_request(pagecache, block, 1);
- }
+ unreg_request(pagecache, block, 1);
}
else
*link= (PAGECACHE_PAGE_LINK)block;
@@ -3290,6 +3204,7 @@ end:
DBUG_EXECUTE("exec",
test_key_cache(pagecache, "end of key_cache_write", 1););
#endif
+ BLOCK_INFO(block);
DBUG_RETURN(error);
}
@@ -3321,6 +3236,7 @@ static void free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block)
unlink_changed(block);
DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0);
+ DBUG_ASSERT(block->pins > 0);
block->status= 0;
#ifndef DBUG_OFF
block->type= PAGECACHE_EMPTY_PAGE;
@@ -3344,7 +3260,7 @@ static void free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block)
/* All pending requests for this page must be resubmitted. */
if (block->wqueue[COND_FOR_SAVED].last_thread)
- release_queue(&block->wqueue[COND_FOR_SAVED]);
+ wqueue_release_queue(&block->wqueue[COND_FOR_SAVED]);
}
@@ -3398,12 +3314,13 @@ static int flush_cached_blocks(PAGECACHE *pagecache,
}
/* if the block is not pinned then it is not write locked */
DBUG_ASSERT((block->status & BLOCK_WRLOCK) == 0);
+ DBUG_ASSERT(block->pins > 0);
#ifndef DBUG_OFF
{
int rc=
#endif
- pagecache_make_lock_and_pin(pagecache, block,
- PAGECACHE_LOCK_WRITE, PAGECACHE_PIN);
+ make_lock_and_pin(pagecache, block,
+ PAGECACHE_LOCK_WRITE, PAGECACHE_PIN);
#ifndef DBUG_OFF
DBUG_ASSERT(rc == 0);
}
@@ -3427,9 +3344,9 @@ static int flush_cached_blocks(PAGECACHE *pagecache,
MYF(MY_NABP | MY_WAIT_IF_FULL));
pagecache_pthread_mutex_lock(&pagecache->cache_lock);
- pagecache_make_lock_and_pin(pagecache, block,
- PAGECACHE_LOCK_WRITE_UNLOCK,
- PAGECACHE_UNPIN);
+ make_lock_and_pin(pagecache, block,
+ PAGECACHE_LOCK_WRITE_UNLOCK,
+ PAGECACHE_UNPIN);
pagecache->global_cache_write++;
if (error)
@@ -3443,7 +3360,7 @@ static int flush_cached_blocks(PAGECACHE *pagecache,
It might happen only during an operation to resize the key cache.
*/
if (block->wqueue[COND_FOR_SAVED].last_thread)
- release_queue(&block->wqueue[COND_FOR_SAVED]);
+ wqueue_release_queue(&block->wqueue[COND_FOR_SAVED]);
/* type will never be FLUSH_IGNORE_CHANGED here */
if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE))
{
@@ -3577,6 +3494,7 @@ restart:
if ((error= flush_cached_blocks(pagecache, file, cache,
end,type)))
last_errno=error;
+ DBUG_PRINT("info", ("restarting..."));
/*
Restart the scan as some other thread might have changed
the changed blocks chain: the blocks that were in switch
@@ -3622,7 +3540,7 @@ removes a page from the list of dirty pages, while it's still dirty. A \
{
#ifdef THREAD
struct st_my_thread_var *thread= my_thread_var;
- add_to_queue(&block->wqueue[COND_FOR_SAVED], thread);
+ wqueue_add_to_queue(&block->wqueue[COND_FOR_SAVED], thread);
do
{
KEYCACHE_DBUG_PRINT("flush_pagecache_blocks_int: wait",
@@ -3761,7 +3679,7 @@ static int flush_all_key_blocks(PAGECACHE *pagecache)
0 on success (always because it can't fail)
*/
-int reset_key_cache_counters(const char *name, PAGECACHE *key_cache)
+static int reset_key_cache_counters(const char *name, PAGECACHE *key_cache)
{
DBUG_ENTER("reset_key_cache_counters");
if (!key_cache->inited)
diff --git a/mysys/wqueue.c b/mysys/wqueue.c
new file mode 100644
index 00000000000..28e044ff606
--- /dev/null
+++ b/mysys/wqueue.c
@@ -0,0 +1,167 @@
+
+#include <wqueue.h>
+
+#define STRUCT_PTR(TYPE, MEMBER, a) \
+ (TYPE *) ((char *) (a) - offsetof(TYPE, MEMBER))
+/*
+ Link a thread into double-linked queue of waiting threads.
+
+ SYNOPSIS
+ wqueue_link_into_queue()
+ wqueue pointer to the queue structure
+ thread pointer to the thread to be added to the queue
+
+ RETURN VALUE
+ none
+
+ NOTES.
+ Queue is represented by a circular list of the thread structures
+ The list is double-linked of the type (**prev,*next), accessed by
+ a pointer to the last element.
+*/
+
+void wqueue_link_into_queue(WQUEUE *wqueue, struct st_my_thread_var *thread)
+{
+ struct st_my_thread_var *last;
+ if (!(last= wqueue->last_thread))
+ {
+ /* Queue is empty */
+ thread->next= thread;
+ thread->prev= &thread->next;
+ }
+ else
+ {
+ thread->prev= last->next->prev;
+ last->next->prev= &thread->next;
+ thread->next= last->next;
+ last->next= thread;
+ }
+ wqueue->last_thread= thread;
+}
+
+
+/*
+ Add a thread to single-linked queue of waiting threads
+
+ SYNOPSIS
+ wqueue_add_to_queue()
+ wqueue pointer to the queue structure
+ thread pointer to the thread to be added to the queue
+
+ RETURN VALUE
+ none
+
+ NOTES.
+ Queue is represented by a circular list of the thread structures
+ The list is single-linked of the type (*next), accessed by a pointer
+ to the last element.
+*/
+
+void wqueue_add_to_queue(WQUEUE *wqueue, struct st_my_thread_var *thread)
+{
+ struct st_my_thread_var *last;
+ if (!(last= wqueue->last_thread))
+ thread->next= thread;
+ else
+ {
+ thread->next= last->next;
+ last->next= thread;
+ }
+ wqueue->last_thread= thread;
+}
+
+/*
+ Unlink a thread from double-linked queue of waiting threads
+
+ SYNOPSIS
+ wqueue_unlink_from_queue()
+ wqueue pointer to the queue structure
+ thread pointer to the thread to be removed from the queue
+
+ RETURN VALUE
+ none
+
+ NOTES.
+ See NOTES for link_into_queue
+*/
+
+void wqueue_unlink_from_queue(WQUEUE *wqueue, struct st_my_thread_var *thread)
+{
+ if (thread->next == thread)
+ /* The queue contains only one member */
+ wqueue->last_thread= NULL;
+ else
+ {
+ thread->next->prev= thread->prev;
+ *thread->prev= thread->next;
+ if (wqueue->last_thread == thread)
+ wqueue->last_thread= STRUCT_PTR(struct st_my_thread_var, next,
+ thread->prev);
+ }
+ thread->next= NULL;
+}
+
+
+/*
+ Remove all threads from queue signaling them to proceed
+
+ SYNOPSIS
+ wqueue_realease_queue()
+ wqueue pointer to the queue structure
+ thread pointer to the thread to be added to the queue
+
+ RETURN VALUE
+ none
+
+ NOTES.
+ See notes for add_to_queue
+ When removed from the queue each thread is signaled via condition
+ variable thread->suspend.
+*/
+
+void wqueue_release_queue(WQUEUE *wqueue)
+{
+ struct st_my_thread_var *last= wqueue->last_thread;
+ struct st_my_thread_var *next= last->next;
+ struct st_my_thread_var *thread;
+ do
+ {
+ thread= next;
+ pthread_cond_signal(&thread->suspend);
+ next= thread->next;
+ thread->next= NULL;
+ }
+ while (thread != last);
+ wqueue->last_thread= NULL;
+}
+
+
+/*
+ Add thread and wait
+
+ SYNOPSYS
+ wqueue_add_and_wait()
+ wqueue queue to add to
+ thread thread which is waiting
+ lock mutex need for the operation
+*/
+
+void wqueue_add_and_wait(WQUEUE *wqueue,
+ struct st_my_thread_var *thread, pthread_mutex_t *lock)
+{
+ DBUG_ENTER("wqueue_add_and_wait");
+ DBUG_PRINT("enter", ("thread ox%lxcond 0x%lx, mutex 0x%lx",
+ (ulong) thread, (ulong) &thread->suspend, (ulong) lock));
+ wqueue_add_to_queue(wqueue, thread);
+ do
+ {
+ DBUG_PRINT("info", ("wait... cond 0x%lx, mutex 0x%lx",
+ (ulong) &thread->suspend, (ulong) lock));
+ pthread_cond_wait(&thread->suspend, lock);
+ DBUG_PRINT("info", ("wait done cond 0x%lx, mutex 0x%lx, next 0x%lx",
+ (ulong) &thread->suspend, (ulong) lock,
+ (ulong) thread->next));
+ }
+ while (thread->next);
+ DBUG_VOID_RETURN;
+}
diff --git a/storage/maria/Makefile.am b/storage/maria/Makefile.am
index 24636f139ab..2aa9a8a36cb 100644
--- a/storage/maria/Makefile.am
+++ b/storage/maria/Makefile.am
@@ -110,7 +110,7 @@ libmaria_a_SOURCES = ma_init.c ma_open.c ma_extra.c ma_info.c ma_rkey.c \
ma_ft_nlq_search.c ft_maria.c ma_sort.c \
ha_maria.cc trnman.c lockman.c tablockman.c \
ma_rt_index.c ma_rt_key.c ma_rt_mbr.c ma_rt_split.c \
- ma_sp_key.c ma_control_file.c
+ ma_sp_key.c ma_control_file.c ma_loghandler.c
CLEANFILES = test?.MA? FT?.MA? isam.log ma_test_all ma_rt_test.MA? sp_test.MA?
SUFFIXES = .sh
diff --git a/storage/maria/ma_control_file.h b/storage/maria/ma_control_file.h
index 9a99a721469..4b5ddd006c1 100644
--- a/storage/maria/ma_control_file.h
+++ b/storage/maria/ma_control_file.h
@@ -17,28 +17,10 @@
/*
WL#3234 Maria control file
First version written by Guilhem Bichot on 2006-04-27.
- Does not compile yet.
*/
-#ifndef _control_file_h
-#define _control_file_h
-
-/*
- Not everybody needs to call the control file that's why control_file.h is
- not in maria_def.h. However, policy or habit may want to change this.
-*/
-
-#ifndef REMOVE_WHEN_SANJA_PUSHES_LOG_HANDLER
-/*
- this is to get the control file to compile, until Sanja pushes the log
- handler which will supersede those definitions.
-*/
-typedef struct st_lsn {
- uint32 file_no;
- uint32 rec_offset;
-} LSN;
-#define maria_data_root "."
-#endif
+#ifndef _ma_control_file_h
+#define _ma_control_file_h
#define CONTROL_FILE_BASE_NAME "maria_control"
/*
diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c
new file mode 100644
index 00000000000..d5c19e29ce2
--- /dev/null
+++ b/storage/maria/ma_loghandler.c
@@ -0,0 +1,5417 @@
+#include "maria_def.h"
+#include <time.h>
+
+/* number of opened log files in the pagecache (should be at lesst 2) */
+#define OPENED_FILES_NUM 3
+
+/* records buffer size (should be LOG_PAGE_SIZE * n) */
+#define TRANSLOG_WRITE_BUFFER (1024*1024)
+/* min chunk length */
+#define TRANSLOG_MIN_CHUNK 3
+/*
+ Number of buffers used by loghandler
+
+ Should be at least 4, because one thread can block up to 2 buffers in
+ normal circumstances (less then half of one and full other, or just
+ switched one and other), But if we met end of the file in the middle and
+ have to switch buffer it will be 3. + 1 or 2 buffer for flushing/writing.
+*/
+#define TRANSLOG_BUFFERS_NO 5
+/* number of bytes which is worth to be left on first page */
+#define TRANSLOG_MINCHUNK_CONTENT 1
+/* length of transaction log file name maria_log.XXXXXXXX*/
+#define TRANSLOG_FILE_NAME_LENGTH 18
+/* version of log file */
+#define TRANSLOG_VERSION_ID 10000
+
+#define UNRECOVERABLE_ERROR(E) \
+ do { \
+ DBUG_PRINT("error", E); \
+ printf E; \
+ putchar('\n'); \
+ } while(0);
+
+
+/* record part descriptor */
+struct st_translog_part
+{
+ translog_size_t len;
+ uchar *buff;
+};
+
+/* record parts descriptor */
+struct st_translog_parts
+{
+ /* full record length */
+ translog_size_t record_length;
+ /* full record length with chunk headers */
+ translog_size_t total_record_length;
+ /* array of parts (st_translog_part) */
+ DYNAMIC_ARRAY parts;
+ /* current part index */
+ uint current;
+};
+
+/* log write buffer descriptor */
+struct st_translog_buffer
+{
+ LSN last_lsn;
+ /* This buffer offset in the file */
+ TRANSLOG_ADDRESS offset;
+ /*
+ How much written (or will be written when copy_to_buffer_in_progress
+ become 0) to this buffer
+ */
+ uint32 size;
+ /* This Buffer File */
+ File file;
+ /* Threads which are waiting for buffer filling/freeing */
+ WQUEUE waiting_filling_buffer;
+ /* Number of record which are in copy progress */
+ int16 copy_to_buffer_in_progress;
+ /* list of waiting buffer ready threads */
+ struct st_my_thread_var *waiting_flush;
+ /* lock for the buffer. Current buffer also lock the handler */
+ pthread_mutex_t mutex;
+ struct st_translog_buffer *overlay;
+#ifndef DBUG_OFF
+ struct st_my_thread_var *locked_by;
+ uint8 buffer_no;
+#endif
+ /* IO cache for current log */
+ uchar buffer[TRANSLOG_WRITE_BUFFER];
+};
+
+
+struct st_buffer_cursor
+{
+ /* pointer on the buffer */
+ uchar *ptr;
+ /* current page fill */
+ uint16 current_page_size;
+ /* how many times we finish this page to write it */
+ uint16 write_counter;
+ /* previous write offset */
+ uint16 previous_offset;
+ /* current buffer and its number */
+ struct st_translog_buffer *buffer;
+ uint8 buffer_no;
+ my_bool chaser, protected;
+};
+
+
+struct st_translog_descriptor
+{
+ /* *** Parameters of the log handler *** */
+
+ /* Directory to store files */
+ char directory[FN_REFLEN];
+ /* max size of one log size (for new logs creation) */
+ uint32 log_file_max_size;
+ /* server version */
+ uint32 server_version;
+ /* server ID */
+ uint32 server_id;
+ /* Page cache for the log reads */
+ PAGECACHE *pagecache;
+ /* Flags */
+ uint flags;
+ /* Page overhead calculated by flags */
+ uint16 page_overhead;
+ /* Page capacity calculated by flags (TRANSLOG_PAGE_SIZE-page_overhead-1) */
+ uint16 page_capacity_chunk_2;
+ /* Loghandler's buffer capacity in case of chunk 2 filling */
+ uint32 buffer_capacity_chunk_2;
+ /* Half of the buffer capacity in case of chunk 2 filling */
+ uint32 half_buffer_capacity_chunk_2;
+
+ /* *** Current state of the log handler *** */
+ /* Current and (OPENED_FILES_NUM-1) last logs number in page cache */
+ File log_file_num[OPENED_FILES_NUM];
+ /* buffers for log writing */
+ struct st_translog_buffer buffers[TRANSLOG_BUFFERS_NO];
+ /*
+ horizon - visible end of the log (here is absolute end of the log:
+ position where next chunk can start
+ */
+ TRANSLOG_ADDRESS horizon;
+ /* horizon buffer cursor */
+ struct st_buffer_cursor bc;
+
+ /* Last flushed LSN */
+ LSN flushed;
+ LSN sent_to_file;
+ pthread_mutex_t sent_to_file_lock;
+ File directory_fd;
+};
+
+static struct st_translog_descriptor log_descriptor;
+
+static uchar end_of_log= 0;
+
+/* record classes */
+enum record_class
+{
+ LOGRECTYPE_NOT_ALLOWED,
+ LOGRECTYPE_VARIABLE_LENGTH,
+ LOGRECTYPE_PSEUDOFIXEDLENGTH,
+ LOGRECTYPE_FIXEDLENGTH
+};
+
+/* chunk types */
+#define TRANSLOG_CHUNK_LSN 0x00 /* 0 chunk refer as LSN (head
+ or tail */
+#define TRANSLOG_CHUNK_FIXED 0x40 /* 1 (pseudo)fixed record (also
+ LSN) */
+#define TRANSLOG_CHUNK_NOHDR 0x80 /* 2 no header chunk (till page
+ end) */
+#define TRANSLOG_CHUNK_LNGTH 0xC0 /* 3 chunk with chunk length */
+#define TRANSLOG_CHUNK_TYPE 0xC0 /* Mask to get chunk type */
+#define TRANSLOG_REC_TYPE 0x3F /* Mask to get record type */
+
+/* compressed (relative) LSN constants */
+#define TRANSLOG_CLSN_LEN_BITS 0xC0 /* Mask to get compressed LSN
+ length */
+#define TRANSLOG_CLSN_MAX_LEN 5 /* Maximum length of compressed
+ LSN */
+
+typedef my_bool(*prewrite_rec_hook) (enum translog_record_type type,
+ void *tcb,
+ struct st_translog_parts *parts);
+
+typedef my_bool(*inwrite_rec_hook) (enum translog_record_type type,
+ void *tcb,
+ LSN *lsn,
+ struct st_translog_parts *parts);
+
+typedef int16(*read_rec_hook) (enum translog_record_type type,
+ int16 read_length, uchar *read_buff,
+ uchar *decoded_buff);
+
+/* Descriptor of log record type */
+struct st_log_record_type_descriptor
+{
+ /* internal class of the record */
+ enum record_class class;
+ /* length for fixed-size record, or maximum length of pseudo-fixed */
+ uint16 fixed_length;
+ /* how much record body (belonged to headers too) read with headers */
+ uint16 read_header_len;
+ /* HOOK for writing the record called before lock */
+ prewrite_rec_hook prewrite_hook;
+ /* HOOK for writing the record called when LSN is known */
+ inwrite_rec_hook inwrite_hook;
+ /* HOOK for reading headers */
+ read_rec_hook read_hook;
+ /*
+ For pseudo fixed records number of compressed LSNs followed by
+ system header
+ */
+ int16 compresed_LSN;
+};
+
+static struct st_log_record_type_descriptor
+ log_record_type_descriptor[LOGREC_NUMBER_OF_TYPES]=
+{
+ /*LOGREC_RESERVED_FOR_CHUNKS23= 0 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*LOGREC_REDO_INSERT_ROW_HEAD= 1 */
+ {LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, NULL, NULL, 0},
+ /*LOGREC_REDO_INSERT_ROW_TAIL= 2 */
+ {LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, NULL, NULL, 0},
+ /*LOGREC_REDO_INSERT_ROW_BLOB= 3 */
+ {LOGRECTYPE_VARIABLE_LENGTH, 0, 8, NULL, NULL, NULL, 0},
+ /*LOGREC_REDO_INSERT_ROW_BLOBS= 4 */
+ {LOGRECTYPE_VARIABLE_LENGTH, 0, 10, NULL, NULL, NULL, 0},
+ /*LOGREC_REDO_PURGE_ROW= 5 */
+ {LOGRECTYPE_FIXEDLENGTH, 9, 9, NULL, NULL, NULL, 0},
+ /*LOGREC_REDO_PURGE_BLOCKS= 6 */
+ {LOGRECTYPE_FIXEDLENGTH, 10, 10, NULL, NULL, NULL, 0},
+ /*LOGREC_REDO_DELETE_ROW= 7 */
+ {LOGRECTYPE_FIXEDLENGTH, 16, 16, NULL, NULL, NULL, 0},
+ /*LOGREC_REDO_UPDATE_ROW_HEAD= 8 */
+ {LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, NULL, NULL, 0},
+ /*LOGREC_REDO_INDEX= 9 */
+ {LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, NULL, NULL, 0},
+ /*LOGREC_REDO_UNDELETE_ROW= 10 */
+ {LOGRECTYPE_FIXEDLENGTH, 16, 16, NULL, NULL, NULL, 0},
+ /*LOGREC_CLR_END= 11 */
+ {LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, NULL, NULL, 1},
+ /*LOGREC_PURGE_END= 12 */
+ {LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, NULL, NULL, 1},
+ /*LOGREC_UNDO_ROW_INSERT= 13 */
+ {LOGRECTYPE_PSEUDOFIXEDLENGTH, 14, 14, NULL, NULL, NULL, 1},
+ /*LOGREC_UNDO_ROW_DELETE= 14 */
+ {LOGRECTYPE_PSEUDOFIXEDLENGTH, 19, 19, NULL, NULL, NULL, 2},
+ /*LOGREC_UNDO_ROW_UPDATE= 15 */
+ {LOGRECTYPE_VARIABLE_LENGTH, 0, 14, NULL, NULL, NULL, 2},
+ /*LOGREC_UNDO_KEY_INSERT= 16 */
+ {LOGRECTYPE_VARIABLE_LENGTH, 0, 10, NULL, NULL, NULL, 1},
+ /*LOGREC_UNDO_KEY_DELETE= 17 */
+ {LOGRECTYPE_VARIABLE_LENGTH, 0, 15, NULL, NULL, NULL, 2},
+ /*LOGREC_PREPARE= 18 */
+ {LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0},
+ /*LOGREC_PREPARE_WITH_UNDO_PURGE= 19 */
+ {LOGRECTYPE_VARIABLE_LENGTH, 0, 5, NULL, NULL, NULL, 1},
+ /*LOGREC_COMMIT= 20 */
+ {LOGRECTYPE_FIXEDLENGTH, 0, 0, NULL, NULL, NULL, 0},
+ /*LOGREC_COMMIT_WITH_UNDO_PURGE= 21 */
+ {LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, NULL, NULL, 1},
+ /*LOGREC_CHECKPOINT_PAGE= 22 */
+ {LOGRECTYPE_VARIABLE_LENGTH, 0, 6, NULL, NULL, NULL, 0},
+ /*LOGREC_CHECKPOINT_TRAN= 23 */
+ {LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0},
+ /*LOGREC_CHECKPOINT_TABL= 24 */
+ {LOGRECTYPE_VARIABLE_LENGTH, 0, 8, NULL, NULL, NULL, 0},
+ /*LOGREC_REDO_CREATE_TABLE= 25 */
+ {LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0},
+ /*LOGREC_REDO_RENAME_TABLE= 26 */
+ {LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0},
+ /*LOGREC_REDO_DROP_TABLE= 27 */
+ {LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0},
+ /*LOGREC_REDO_TRUNCATE_TABLE= 28 */
+ {LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0},
+ /*LOGREC_FILE_ID= 29 */
+ {LOGRECTYPE_VARIABLE_LENGTH, 0, 4, NULL, NULL, NULL, 0},
+ /*LOGREC_LONG_TRANSACTION_ID= 30 */
+ {LOGRECTYPE_FIXEDLENGTH, 6, 6, NULL, NULL, NULL, 0},
+ /*31 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*32 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*33 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*34 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*35 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*36 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*37 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*38 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*39 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*40 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*41 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*42 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*43 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*44 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*45 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*46 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*47 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*48 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*49 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*50 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*51 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*52 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*53 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*54 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*55 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*56 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*57 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*58 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*59 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*60 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*61 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*62 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0},
+ /*LOGREC_RESERVED_FUTURE_EXTENSION= 63 */
+ {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0}
+};
+
+
+typedef struct st_translog_validator_data
+{
+ TRANSLOG_ADDRESS *addr;
+ my_bool was_recovered;
+} TRANSLOG_VALIDATOR_DATA;
+
+
+const char *maria_data_root;
+
+
+/*
+ Get file name of the log by log number
+
+ SYNOPSIS
+ translog_filename_by_fileno()
+ file_no Number of the log we want to open
+ path Pointer to buffer where file name will be
+ stored (must be FN_REFLEN bytes at least
+ RETURN
+ pointer to path
+*/
+
+char *translog_filename_by_fileno(uint32 file_no, char *path)
+{
+ char file_name[10 + 8 + 1];
+ char *res;
+ DBUG_ENTER("translog_filename_by_fileno");
+ my_sprintf(file_name, (file_name, "maria_log.%08u", file_no));
+ res= fn_format(path, file_name, log_descriptor.directory, "", MYF(MY_WME));
+ DBUG_PRINT("info", ("Path '%s', path: 0x%lx, res: 0x%lx",
+ res, (ulong) path, (ulong) res));
+ DBUG_RETURN(res);
+}
+
+
+/*
+ Open log file with given number without cache
+
+ SYNOPSIS
+ open_logfile_by_number_no_cache()
+ file_no Number of the log we want to open
+
+ RETURN
+ 0 error
+ file descriptor number
+*/
+
+File open_logfile_by_number_no_cache(uint32 file_no)
+{
+ File file;
+ char path[FN_REFLEN];
+ DBUG_ENTER("open_logfile_by_number_no_cache");
+
+ if ((file= my_open(translog_filename_by_fileno(file_no, path), O_CREAT | O_BINARY | /* O_DIRECT
+ |
+ */ O_RDWR,
+ MYF(MY_WME))) < 0)
+ {
+ UNRECOVERABLE_ERROR(("Error %d during opening file '%s'", errno, path));
+ DBUG_RETURN(0);
+ }
+ DBUG_PRINT("info", ("File '%s', handler %d", path, file));
+ DBUG_RETURN(file);
+}
+
+
+/*
+ Write log file page header in the just opened new log file
+
+ SYNOPSIS
+ translog_write_file_header();
+
+ RETURN
+ 0 OK
+ 1 ERROR
+*/
+
+my_bool translog_write_file_header()
+{
+ ulonglong timestamp;
+ char page[TRANSLOG_PAGE_SIZE];
+ DBUG_ENTER("translog_write_file_header");
+
+ /* file tag */
+ strnmov(page, "MARIALOG", 8);
+ /* timestamp */
+ timestamp= my_getsystime();
+ int8store(page + 8, timestamp);
+ /* maria version */
+ int4store(page + (8 + 8), TRANSLOG_VERSION_ID);
+ /* mysql version (MYSQL_VERSION_ID) */
+ int4store(page + (8 + 8 + 4), log_descriptor.server_version);
+ /* server ID */
+ int4store(page + (8 + 8 + 4 + 4), log_descriptor.server_id);
+ /* loghandler page size/512 */
+ int2store(page + (8 + 8 + 4 + 4 + 4), TRANSLOG_PAGE_SIZE / 512);
+ /* file number */
+ int3store(page + (8 + 8 + 4 + 4 + 4 + 2), log_descriptor.horizon.file_no);
+
+ bzero(page + (8 + 8 + 4 + 4 + 4 + 2 + 3),
+ TRANSLOG_PAGE_SIZE - (8 + 8 + 4 + 4 + 4 + 2 + 3));
+
+ if (my_pwrite(log_descriptor.log_file_num[0], page,
+ TRANSLOG_PAGE_SIZE, 0, MYF(MY_WME)) != TRANSLOG_PAGE_SIZE)
+ DBUG_RETURN(1);
+
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Initialize transaction log file buffer
+
+ SYNOPSIS
+ translog_buffer_init()
+ buffer The buffer to initialize
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+my_bool translog_buffer_init(struct st_translog_buffer *buffer)
+{
+ DBUG_ENTER("translog_buffer_init");
+ /* This buffer offset */
+ buffer->last_lsn.file_no= buffer->offset.file_no= 0;
+ buffer->last_lsn.rec_offset= buffer->offset.rec_offset= 0;
+ /* This Buffer File */
+ buffer->file= 0;
+ buffer->overlay= 0;
+ /* IO cache for current log */
+ bzero(buffer->buffer, TRANSLOG_WRITE_BUFFER);
+ /* Buffer size */
+ buffer->size= 0;
+ /* cond of thread which is waiting for buffer filling */
+ buffer->waiting_filling_buffer.last_thread= 0;
+ /* Number of record which are in copy progress */
+ buffer->copy_to_buffer_in_progress= 0;
+ /* list of waiting buffer ready threads */
+ buffer->waiting_flush= 0;
+ /* lock for the buffer. Current buffer also lock the handler */
+ if (pthread_mutex_init(&buffer->mutex, MY_MUTEX_INIT_FAST))
+ DBUG_RETURN(1);
+ DBUG_PRINT("info", ("Init buffer #%u: 0x%lx",
+ (uint) buffer->buffer_no, (ulong) buffer));
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Close transaction log file by descriptor
+
+ SYNOPSIS
+ translog_close_log_file()
+ file file descriptor
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+static my_bool translog_close_log_file(File file)
+{
+ PAGECACHE_FILE fl=
+ {
+ file
+ };
+ flush_pagecache_blocks(log_descriptor.pagecache, &fl, FLUSH_RELEASE);
+ return test(my_close(file, MYF(MY_WME)));
+}
+
+
+/*
+ Create and fill header of new file
+
+ SYNOPSIS
+ translog_create_new_file()
+
+ RETURN
+ 0 OK
+ 1 Error
+*/
+
+my_bool translog_create_new_file()
+{
+ int i;
+
+ DBUG_ENTER("translog_create_new_file");
+
+ if (log_descriptor.log_file_num[OPENED_FILES_NUM - 1] &&
+ translog_close_log_file(log_descriptor.log_file_num[OPENED_FILES_NUM -
+ 1]))
+ DBUG_RETURN(1);
+ for (i= OPENED_FILES_NUM - 1; i > 0; i--)
+ {
+ log_descriptor.log_file_num[i]= log_descriptor.log_file_num[i - 1];
+ }
+
+ if ((log_descriptor.log_file_num[0]=
+ open_logfile_by_number_no_cache(log_descriptor.horizon.file_no)) <= 0 ||
+ translog_write_file_header())
+ DBUG_RETURN(1);
+
+ if (ma_control_file_write_and_force(NULL, log_descriptor.horizon.file_no,
+ CONTROL_FILE_UPDATE_ONLY_LOGNO))
+ DBUG_RETURN(1);
+
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Lock the loghandler buffer
+
+ SYNOPSIS
+ translog_buffer_lock()
+ buffer This buffer which should be locked
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+#ifndef DBUG_OFF
+static my_bool translog_buffer_lock(struct st_translog_buffer *buffer)
+{
+ int res;
+ DBUG_ENTER("translog_buffer_lock");
+ DBUG_PRINT("enter", ("Lock buffer #%u (0x%lx): locked by:0x%lx, mutex: 0x%lx",
+ (uint) buffer->buffer_no, (ulong) buffer,
+ (ulong) buffer->locked_by, (ulong) &buffer->mutex));
+ res= (pthread_mutex_lock(&buffer->mutex) != 0);
+#ifndef DBUG_OFF
+ if (res == 0)
+ {
+ DBUG_ASSERT(buffer->locked_by == 0);
+ buffer->locked_by= my_thread_var;
+ }
+ else
+ DBUG_PRINT("error", ("Can't lock mutex 0x%lx (locked by0x%lx) errno: %d",
+ (ulong) &buffer->mutex,
+ (ulong) buffer->locked_by, res));
+#endif
+ DBUG_RETURN(res);
+}
+#else
+#define translog_buffer_lock(B) \
+ pthread_mutex_lock(&B->mutex);
+#endif
+
+
+/*
+ Unlock the loghandler buffer
+
+ SYNOPSIS
+ translog_buffer_unlock()
+ buffer This buffer which should be unlocked
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+#ifndef DBUG_OFF
+static my_bool translog_buffer_unlock(struct st_translog_buffer *buffer)
+{
+ int res;
+ DBUG_ENTER("translog_buffer_unlock");
+ DBUG_PRINT("enter", ("Unlock buffer... #%u (0x%lx) :locked by:0x%lx (0x%lx),"
+ " mutex: 0x%lx",
+ (uint) buffer->buffer_no, (ulong) buffer,
+ (ulong) buffer->locked_by, (ulong) my_thread_var,
+ (ulong) &buffer->mutex));
+ DBUG_ASSERT(buffer->locked_by == my_thread_var);
+
+ buffer->locked_by= 0;
+ res= (pthread_mutex_unlock(&buffer->mutex) != 0);
+ DBUG_PRINT("enter", ("Unlocked buffer... #%u: 0x%lx, mutex: 0x%lx",
+ (uint) buffer->buffer_no, (ulong) buffer,
+ (ulong) &buffer->mutex));
+ DBUG_RETURN(res);
+}
+#else
+#define translog_buffer_unlock(B) \
+ pthread_mutex_unlock(&B->mutex);
+#endif
+
+
+/*
+ Write page header.
+
+ SYNOPSIS
+ translog_new_page_header()
+ horizon Where to write the page
+ cursor Where to write the page
+
+ NOTE
+ - space for page header should be checked before
+*/
+
+static void translog_new_page_header(TRANSLOG_ADDRESS *horizon,
+ struct st_buffer_cursor *cursor)
+{
+ uchar *ptr;
+
+ DBUG_ENTER("translog_new_page_header");
+ DBUG_ASSERT(cursor->ptr !=NULL);
+
+ cursor->protected= 0;
+
+ ptr= cursor->ptr;
+ /* Page number */
+ int3store(ptr, horizon->rec_offset / TRANSLOG_PAGE_SIZE);
+ ptr +=3;
+ /* File number */
+ int3store(ptr, horizon->file_no);
+ ptr +=3;
+ *(ptr ++)= (uchar) log_descriptor.flags;
+ if (log_descriptor.flags & TRANSLOG_PAGE_CRC)
+ {
+#ifndef DBUG_OFF
+ DBUG_PRINT("info", ("write 0x11223344 CRC to (%lu,0x%lx)",
+ (ulong) horizon->file_no, (ulong) horizon->rec_offset));
+ int4store(ptr, 0x11223344);
+#endif
+ ptr +=4; /* CRC will be put when page
+ will be finished */
+ }
+ if (log_descriptor.flags & TRANSLOG_SECTOR_PROTECTION)
+ {
+ time_t tm;
+ int2store(ptr, time(&tm) & 0xFFFF);
+ ptr +=(TRANSLOG_PAGE_SIZE / 512) * 2;
+ }
+ {
+ uint len= (ptr -cursor->ptr);
+ horizon->rec_offset+= len;
+ cursor->current_page_size= len;
+ if (!cursor->chaser)
+ cursor->buffer->size+= len;
+ }
+ cursor->ptr= ptr;
+ DBUG_PRINT("info", ("NewP buffer #%u: 0x%lx, chaser: %d, Size: %lu (%lu)",
+ (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer,
+ cursor->chaser, (ulong) cursor->buffer->size,
+ (ulong) (cursor->ptr -cursor->buffer->buffer)));
+ DBUG_ASSERT(cursor->chaser ||
+ ((ulong) (cursor->ptr -cursor->buffer->buffer) ==
+ cursor->buffer->size));
+ DBUG_ASSERT(cursor->buffer->buffer_no == cursor->buffer_no);
+ DBUG_ASSERT(cursor->current_page_size <= TRANSLOG_PAGE_SIZE);
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Put sector protection on the page image
+
+ SYNOPSIS
+ translog_put_sector_protection()
+ page reference on the page content
+ cursor cursor of the buffer
+*/
+
+static void translog_put_sector_protection(uchar *page,
+ struct st_buffer_cursor *cursor)
+{
+ uchar *table= page + log_descriptor.page_overhead -
+ (TRANSLOG_PAGE_SIZE / 512) * 2;
+ uint16 value= uint2korr(table) + cursor->write_counter;
+ uint16 last_protected_sector= (cursor->previous_offset - 1) / 512;
+ uint16 start_sector= cursor->previous_offset / 512;
+ uint i, offset;
+
+ DBUG_ENTER("translog_put_sector_protection");
+ if (start_sector == 0)
+ start_sector= 1;
+
+ DBUG_PRINT("enter", ("Write counter %u, value %u, offset %u, "
+ "last protected %u, start sector %u",
+ (uint) cursor->write_counter,
+ (uint) value,
+ (uint) cursor->previous_offset,
+ (uint) last_protected_sector, (uint) start_sector));
+ if (last_protected_sector == start_sector)
+ {
+ i= last_protected_sector * 2;
+ offset= last_protected_sector * 512;
+ /* restore data, because we modified sector which was protected */
+ if (offset < cursor->previous_offset)
+ page[offset]= table[i];
+ offset++;
+ if (offset < cursor->previous_offset)
+ page[offset]= table[i + 1];
+ }
+ for (i= start_sector * 2, offset= start_sector * 512;
+ i < (TRANSLOG_PAGE_SIZE / 512) * 2; (i+= 2), (offset+= 512))
+ {
+ DBUG_PRINT("info", ("sector %u, offset %u, data 0x%x%x",
+ i / 2, offset, (uint) page[offset],
+ (uint) page[offset + 1]));
+ table[i]= page[offset];
+ table[i + 1]= page[offset + 1];
+ /**((uint16 *)(table + i))= *((uint16* )(page + offset));*/
+ int2store(page + offset, value);
+ DBUG_PRINT("info", ("sector %u, offset %u, data 0x%x%x",
+ i / 2, offset, (uint) page[offset],
+ (uint) page[offset + 1]));
+ }
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Calculate adler CRC of given area
+
+ SYNOPSIS
+ translog_adler_crc()
+ area Pointer of the area beginning
+ length The Area length
+
+ RETURN
+ Adler CRC32
+*/
+
+uint32 translog_adler_crc(uchar *area, uint length)
+{
+ uint32 a= 1, b= 0;
+#define MOD_ADLER 65521
+
+ while (length)
+ {
+ uint tlen= length > 5550 ? 5550 : length;
+ length-= tlen;
+ do
+ {
+ a+= *area++;
+ b+= a;
+ } while (--tlen);
+ a= (a & 0xffff) + (a >> 16) * (65536 - MOD_ADLER);
+ b= (b & 0xffff) + (b >> 16) * (65536 - MOD_ADLER);
+ }
+ /* It can be shown that a <= 0x1013a here, so a single subtract will do. */
+ if (a >= MOD_ADLER)
+ a-= MOD_ADLER;
+ /* It can be shown that b can reach 0xffef1 here. */
+ b= (b & 0xffff) + (b >> 16) * (65536 - MOD_ADLER);
+ if (b >= MOD_ADLER)
+ b-= MOD_ADLER;
+ return (b << 16) | a;
+}
+
+
+/*
+ Finish current page with zeros
+
+ SYNOPSIS
+ translog_finish_page()
+ horizon \ horizon & buffer pointers
+ cursor /
+*/
+
+static void translog_finish_page(TRANSLOG_ADDRESS *horizon,
+ struct st_buffer_cursor *cursor)
+{
+ uint16 left= TRANSLOG_PAGE_SIZE - cursor->current_page_size;
+ uchar *page= cursor->ptr -cursor->current_page_size;
+ DBUG_ENTER("translog_finish_page");
+
+ DBUG_PRINT("enter", ("Buffer #%u 0x%lx, "
+ "Buffer addr (%lu,0x%lx), "
+ "Page addr: (%lu,0x%lx), "
+ "size %lu (%lu), Pg: %u, left: %u",
+ (uint) cursor->buffer_no, (ulong) cursor->buffer,
+ (ulong) cursor->buffer->offset.file_no,
+ (ulong) cursor->buffer->offset.rec_offset,
+ (ulong) horizon->file_no,
+ (ulong) (horizon->rec_offset -
+ cursor->current_page_size),
+ (ulong) cursor->buffer->size,
+ (ulong) (cursor->ptr -cursor->buffer->buffer),
+ (uint) cursor->current_page_size, (uint) left));
+ DBUG_ASSERT(cursor->ptr !=NULL);
+ DBUG_ASSERT((cursor->ptr -cursor->buffer->buffer) %TRANSLOG_PAGE_SIZE ==
+ cursor->current_page_size % TRANSLOG_PAGE_SIZE);
+ DBUG_ASSERT(horizon->file_no == cursor->buffer->offset.file_no);
+ DBUG_ASSERT(cursor->buffer->offset.rec_offset +
+ (cursor->ptr -cursor->buffer->buffer) == horizon->rec_offset);
+ if (cursor->protected)
+ {
+ DBUG_PRINT("info", ("Already protected and finished"));
+ DBUG_VOID_RETURN;
+ }
+ if (left != TRANSLOG_PAGE_SIZE && left != 0)
+ {
+ DBUG_PRINT("info", ("left %u", (uint) left));
+ bzero(cursor->ptr, left);
+ cursor->ptr +=left;
+ horizon->rec_offset+= left;
+ if (!cursor->chaser)
+ cursor->buffer->size+= left;
+ cursor->current_page_size= 0;
+ DBUG_PRINT("info", ("Finish Page buffer #%u: 0x%lx, "
+ "chaser: %d, Size: %lu (%lu)",
+ (uint) cursor->buffer->buffer_no,
+ (ulong) cursor->buffer, cursor->chaser,
+ (ulong) cursor->buffer->size,
+ (ulong) (cursor->ptr -cursor->buffer->buffer)));
+ DBUG_ASSERT(cursor->chaser
+ || ((ulong) (cursor->ptr -cursor->buffer->buffer) ==
+ cursor->buffer->size));
+ DBUG_ASSERT(cursor->buffer->buffer_no == cursor->buffer_no);
+ }
+ if (log_descriptor.flags & TRANSLOG_SECTOR_PROTECTION)
+ {
+ translog_put_sector_protection(page, cursor);
+ DBUG_PRINT("info", ("drop write_counter"));
+ cursor->write_counter= 0;
+ cursor->previous_offset= 0;
+ }
+ if (log_descriptor.flags & TRANSLOG_PAGE_CRC)
+ {
+ uint32 crc= translog_adler_crc(page + log_descriptor.page_overhead,
+ TRANSLOG_PAGE_SIZE -
+ log_descriptor.page_overhead);
+ DBUG_PRINT("info", ("CRC: 0x%lx", (ulong) crc));
+ int4store(page + 3 + 3 + 1, crc);
+ }
+ cursor->protected= 1;
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Wait until all thread finish filling this buffer
+
+ SYNOPSIS
+ translog_wait_for_writers()
+ buffer This buffer should be check
+
+ NOTE
+ This buffer should be locked
+*/
+static void translog_wait_for_writers(struct st_translog_buffer *buffer)
+{
+ struct st_my_thread_var *thread;
+ DBUG_ENTER("translog_wait_for_writers");
+ DBUG_PRINT("enter", ("Buffer #%u 0x%lx, copies in progress: %u",
+ (uint) buffer->buffer_no, (ulong) buffer,
+ (int) buffer->copy_to_buffer_in_progress));
+
+ if (!buffer->copy_to_buffer_in_progress)
+ DBUG_VOID_RETURN;
+
+ thread= my_thread_var;
+
+ DBUG_ASSERT(buffer->file != 0);
+
+ do
+ {
+ DBUG_PRINT("info", ("wait for writers... , thread 0x%lx, "
+ "buffer #%u 0x%lx, locked by 0x%lx (0x%lx), "
+ "mutex: 0x%lx",
+ thread, (uint) buffer->buffer_no, (ulong) buffer,
+ (ulong) buffer->locked_by, (ulong) thread,
+ (ulong) &buffer->mutex));
+#ifndef DBUG_OFF
+ DBUG_ASSERT(buffer->locked_by == thread);
+ buffer->locked_by= 0;
+#endif
+ wqueue_add_and_wait(&buffer->waiting_filling_buffer, thread,
+ &buffer->mutex);
+ DBUG_PRINT("info", ("wait for writers done, thread 0x%lx, "
+ "buffer #%u 0x%lx, locked by 0x%lx (0x%lx), "
+ "mutex: 0x%lx",
+ thread, (uint) buffer->buffer_no, (ulong) buffer,
+ (ulong) buffer->locked_by, (ulong) thread,
+ (ulong) &buffer->mutex));
+#ifndef DBUG_OFF
+ DBUG_ASSERT(buffer->locked_by == 0);
+ buffer->locked_by= thread;
+#endif
+ } while (buffer->copy_to_buffer_in_progress != 0);
+
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+
+ Wait for this buffer become free
+
+ SYNOPSIS
+ translog_wait_for_buffer_free()
+ buffer The buffer to initialize
+
+ NOTE
+ - this buffer should be locked
+*/
+
+static void translog_wait_for_buffer_free(struct st_translog_buffer *buffer)
+{
+ struct st_my_thread_var *thread= my_thread_var;
+ DBUG_ENTER("translog_wait_for_buffer_free");
+ DBUG_PRINT("enter", ("Buffer #%u 0x%lx, copies in progress: %u size 0x%lu",
+ (uint) buffer->buffer_no, (ulong) buffer,
+ (int) buffer->copy_to_buffer_in_progress,
+ (ulong) buffer->size));
+
+ translog_wait_for_writers(buffer);
+
+ if (!buffer->file)
+ DBUG_VOID_RETURN;
+
+ thread= my_thread_var;
+
+ do
+ {
+ DBUG_PRINT("info", ("wait for writers... , thread 0x%lx, "
+ "buffer #%u 0x%lx, locked by 0x%lx (0x%lx), "
+ "mutex: 0x%lx",
+ thread, (uint) buffer->buffer_no, (ulong) buffer,
+ (ulong) buffer->locked_by, (ulong) thread,
+ (ulong) &buffer->mutex));
+#ifndef DBUG_OFF
+ DBUG_ASSERT(buffer->locked_by == thread);
+ buffer->locked_by= 0;
+#endif
+ wqueue_add_and_wait(&buffer->waiting_filling_buffer, thread,
+ &buffer->mutex);
+ DBUG_PRINT("info", ("wait for writers done, thread 0x%lx, "
+ "buffer #%u 0x%lx, locked by 0x%lx (0x%lx), "
+ "mutex: 0x%lx",
+ thread, (uint) buffer->buffer_no, (ulong) buffer,
+ (ulong) buffer->locked_by, (ulong) thread,
+ (ulong) &buffer->mutex));
+#ifndef DBUG_OFF
+ DBUG_ASSERT(buffer->locked_by == 0);
+ buffer->locked_by= thread;
+#endif
+ } while (buffer->copy_to_buffer_in_progress != 0);
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Set cursor on the buffer beginning
+
+ SYNOPSIS
+ translog_cursor_init()
+ buffer The buffer
+ cursor It's cursor
+ buffer_no Number of buffer
+*/
+
+static void translog_cursor_init(struct st_buffer_cursor *cursor,
+ struct st_translog_buffer *buffer,
+ uint8 buffer_no)
+{
+ DBUG_ENTER("translog_cursor_init");
+ cursor->ptr= buffer->buffer;
+ cursor->buffer= buffer;
+ cursor->buffer_no= buffer_no;
+ cursor->current_page_size= 0;
+ cursor->chaser= (cursor != &log_descriptor.bc);
+ DBUG_PRINT("info", ("drop write_counter"));
+ cursor->write_counter= 0;
+ cursor->previous_offset= 0;
+ cursor->protected= 0;
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Initialize buffer for current file
+
+ SYNOPSIS
+ translog_start_buffer()
+ buffer The buffer
+ cursor It's cursor
+ buffer_no Number of buffer
+*/
+static void translog_start_buffer(struct st_translog_buffer *buffer,
+ struct st_buffer_cursor *cursor,
+ uint8 buffer_no)
+{
+ DBUG_ENTER("translog_start_buffer");
+ DBUG_PRINT("enter",
+ ("Assign buffer #%u (0x%lx) to file %u, offset 0x%lx(%lu)",
+ (uint) buffer->buffer_no, (ulong) buffer,
+ (uint) log_descriptor.log_file_num[0],
+ (ulong) log_descriptor.horizon.rec_offset,
+ (ulong) log_descriptor.horizon.rec_offset));
+ DBUG_ASSERT(buffer_no == buffer->buffer_no);
+ buffer->last_lsn.file_no= 0;
+ buffer->last_lsn.rec_offset= 0;
+ buffer->offset= log_descriptor.horizon;
+ buffer->file= log_descriptor.log_file_num[0];
+ buffer->overlay= 0;
+ buffer->size= 0;
+ translog_cursor_init(cursor, buffer, buffer_no);
+ DBUG_PRINT("info", ("init cursor #%u: 0x%lx, chaser: %d, Size: %lu (%lu)",
+ (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer,
+ cursor->chaser, (ulong) cursor->buffer->size,
+ (ulong) (cursor->ptr -cursor->buffer->buffer)));
+ DBUG_ASSERT(cursor->chaser ||
+ ((ulong) (cursor->ptr -cursor->buffer->buffer) ==
+ cursor->buffer->size));
+ DBUG_ASSERT(cursor->buffer->buffer_no == cursor->buffer_no);
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Switch to the next buffer in a chain
+
+ SYNOPSIS
+ translog_buffer_next()
+ horizon \ Pointers on current position in file and buffer
+ cursor /
+ next_file Also start new file
+
+ NOTE:
+ - loghandler should be locked
+ - after return new and old buffer still are locked
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+static my_bool translog_buffer_next(TRANSLOG_ADDRESS *horizon,
+ struct st_buffer_cursor *cursor,
+ my_bool new_file)
+{
+ uint8 old_buffer_no= cursor->buffer_no;
+ uint8 new_buffer_no= (old_buffer_no + 1) % TRANSLOG_BUFFERS_NO;
+ struct st_translog_buffer *new_buffer= log_descriptor.buffers + new_buffer_no;
+ my_bool chasing= cursor->chaser;
+ DBUG_ENTER("translog_buffer_next");
+
+ DBUG_PRINT("info", ("horizon (%u,0x%lx), chasing: %d",
+ (uint) log_descriptor.horizon.file_no,
+ (ulong) log_descriptor.horizon.rec_offset, chasing));
+
+ DBUG_ASSERT(cmp_translog_addr(log_descriptor.horizon, *horizon) >= 0);
+
+ translog_finish_page(horizon, cursor);
+
+ if (!chasing)
+ {
+ translog_buffer_lock(new_buffer);
+ translog_wait_for_buffer_free(new_buffer);
+ }
+#ifndef DBUG_OFF
+ else
+ DBUG_ASSERT(new_buffer->file != 0);
+#endif
+ if (new_file)
+ {
+ horizon->file_no++;
+ horizon->rec_offset= TRANSLOG_PAGE_SIZE; /* header page */
+ if (!chasing && translog_create_new_file())
+ {
+ DBUG_RETURN(1);
+ }
+ }
+
+ /* prepare next page */
+ if (chasing)
+ translog_cursor_init(cursor, new_buffer, new_buffer_no);
+ else
+ translog_start_buffer(new_buffer, cursor, new_buffer_no);
+ translog_new_page_header(horizon, cursor);
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Set max LSN send to file
+
+ SYNOPSIS
+ translog_set_sent_to_file()
+ lsn LSN to assign
+*/
+
+static void translog_set_sent_to_file(LSN *lsn)
+{
+ DBUG_ENTER("translog_set_sent_to_file");
+ pthread_mutex_lock(&log_descriptor.sent_to_file_lock);
+ DBUG_ASSERT(cmp_translog_addr(*lsn, log_descriptor.sent_to_file) >= 0);
+ log_descriptor.sent_to_file= *lsn;
+ pthread_mutex_unlock(&log_descriptor.sent_to_file_lock);
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Get max LSN send to file
+
+ SYNOPSIS
+ translog_get_sent_to_file()
+ lsn LSN to value
+*/
+
+static void translog_get_sent_to_file(LSN *lsn)
+{
+ DBUG_ENTER("translog_get_sent_to_file");
+ pthread_mutex_lock(&log_descriptor.sent_to_file_lock);
+ *lsn= log_descriptor.sent_to_file;
+ pthread_mutex_unlock(&log_descriptor.sent_to_file_lock);
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Get first chunk address on the given page
+
+ SYNOPSIS
+ translog_get_first_chunk_offset()
+ page The page where to find first chunk
+
+ RETURN
+ first chunk offset
+ 0 - Error
+*/
+
+static my_bool translog_get_first_chunk_offset(uchar *page)
+{
+ uint16 page_header= 7;
+ DBUG_ENTER("translog_get_first_chunk_offset");
+
+ if (page[6] & TRANSLOG_PAGE_CRC)
+ {
+ page_header+= 4;
+ }
+ if (page[6] & TRANSLOG_SECTOR_PROTECTION)
+ {
+ page_header+= (TRANSLOG_PAGE_SIZE / 512) * 2;
+ }
+ DBUG_RETURN(page_header);
+}
+
+
+/*
+ Write coded length of record
+
+ SYNOPSIS
+ translog_write_variable_record_1group_code_len
+ dst Destination buffer pointer
+ length Length which should be coded
+ header_len Calculated total header length
+*/
+
+static void
+translog_write_variable_record_1group_code_len(uchar *dst,
+ translog_size_t length,
+ uint16 header_len)
+{
+ switch (header_len) {
+ case 6: /* (5 + 1) */
+ DBUG_ASSERT(length <= 250);
+ *dst= (uint8) length;
+ return;
+ case 8: /* (5 + 3) */
+ DBUG_ASSERT(length <= 0xFFFF);
+ *dst= 251;
+ int2store(dst + 1, length);
+ return;
+ case 9: /* (5 + 4) */
+ DBUG_ASSERT(length <= 0xFFFFFF);
+ *dst= 252;
+ int3store(dst + 1, length);
+ return;
+ case 10: /* (5 + 5) */
+ *dst= 253;
+ int4store(dst + 1, length);
+ return;
+ default:
+ DBUG_ASSERT(0);
+ }
+ return;
+}
+
+
+/*
+ Decode record data length and advance given pointer to the next field
+
+ SYNOPSIS
+ translog_variable_record_1group_decode_len()
+ src The pointer to the pointer to the length beginning
+
+ RETURN
+ decoded length
+*/
+
+static translog_size_t translog_variable_record_1group_decode_len(uchar **src)
+{
+ uint8 first= (uint8) (**src);
+ switch (first) {
+ case 251:
+ *src+= 3;
+ return (uint2korr((*src) - 2));
+ case 252:
+ *src+= 4;
+ return (uint3korr((*src) - 3));
+ case 253:
+ *src+= 5;
+ return (uint4korr((*src) - 4));
+ case 254:
+ case 255:
+ DBUG_ASSERT(0); /* reserved for future use */
+ return (0);
+ default:
+ (*src)++;
+ return (first);
+ }
+}
+
+
+/*
+ Get total length of this chunk (not only body)
+
+ SYNOPSIS
+ translog_get_total_chunk_length()
+ page The page where chunk placed
+ offset Offset of the chunk on this place
+
+ RETURN
+ total length of the chunk
+ 0 - Error
+*/
+
+uint16 translog_get_total_chunk_length(uchar *page, uint16 offset)
+{
+ DBUG_ENTER("translog_get_total_chunk_length");
+ switch (page[offset] & TRANSLOG_CHUNK_TYPE) {
+ case TRANSLOG_CHUNK_LSN: /* 0 chunk referred as LSN
+ (head or tail) */
+ {
+ translog_size_t rec_len;
+ uchar *start= page + offset;
+ uchar *ptr= start + 1 + 2;
+ uint16 chunk_len, header_len, page_rest;
+ DBUG_PRINT("info", ("TRANSLOG_CHUNK_LSN"));
+ rec_len= translog_variable_record_1group_decode_len(&ptr);
+ chunk_len= uint2korr(ptr);
+ header_len= (ptr -start) +2;
+ DBUG_PRINT("info", ("rec len: %lu, chunk len: %u, header len: %u",
+ (ulong) rec_len, (uint) chunk_len, (uint) header_len));
+ if (chunk_len)
+ {
+ DBUG_PRINT("info", ("chunk len: %u + %u = %u",
+ (uint) header_len, (uint) chunk_len,
+ (uint) (chunk_len + header_len)));
+ DBUG_RETURN(chunk_len + header_len);
+ }
+ page_rest= TRANSLOG_PAGE_SIZE - offset;
+ DBUG_PRINT("info", ("page_rest %u", (uint) page_rest));
+ if (rec_len + header_len < page_rest)
+ DBUG_RETURN(rec_len + header_len);
+ DBUG_RETURN(page_rest);
+ break;
+ }
+ case TRANSLOG_CHUNK_FIXED: /* 1 (pseudo)fixed record (also
+ LSN) */
+ {
+ DBUG_PRINT("info", ("TRANSLOG_CHUNK_FIXED"));
+ uint type= page[offset] & TRANSLOG_REC_TYPE;
+ DBUG_ASSERT(log_record_type_descriptor[type].class ==
+ LOGRECTYPE_FIXEDLENGTH ||
+ log_record_type_descriptor[type].class ==
+ LOGRECTYPE_PSEUDOFIXEDLENGTH);
+ if (log_record_type_descriptor[type].class == LOGRECTYPE_FIXEDLENGTH)
+ {
+ DBUG_PRINT("info",
+ ("Fixed length: %u",
+ (uint) (log_record_type_descriptor[type].fixed_length + 3)));
+ DBUG_RETURN(log_record_type_descriptor[type].fixed_length + 3);
+ }
+ {
+ uchar *ptr= page + offset + 3; /* first compressed LSN */
+ int i= 0;
+ uint length= log_record_type_descriptor[type].fixed_length + 3;
+ for (; i < log_record_type_descriptor[type].compresed_LSN; i++)
+ {
+ /* first 2 bits is length - 2 */
+ uint len= ((((uint8) (*ptr)) & TRANSLOG_CLSN_LEN_BITS) >> 6) + 2;
+ ptr+= len;
+ length-= (TRANSLOG_CLSN_MAX_LEN - len); /* subtract economized
+ bytes */
+ }
+ DBUG_PRINT("info", ("Pseudo-fixed length: %u", length));
+ DBUG_RETURN(length);
+ }
+ break;
+ }
+ case TRANSLOG_CHUNK_NOHDR: /* 2 no header chunk (till page
+ end) */
+ DBUG_PRINT("info", ("TRANSLOG_CHUNK_NOHDR, length: %u",
+ (uint) (TRANSLOG_PAGE_SIZE - offset)));
+ DBUG_RETURN(TRANSLOG_PAGE_SIZE - offset);
+ break;
+ case TRANSLOG_CHUNK_LNGTH: /* 3 chunk with chunk length */
+ DBUG_PRINT("info", ("TRANSLOG_CHUNK_LNGTH"));
+ DBUG_ASSERT(TRANSLOG_PAGE_SIZE - offset >= 3);
+ DBUG_PRINT("info", ("Length %u", uint2korr(page + offset + 1) + 3));
+ DBUG_RETURN(uint2korr(page + offset + 1) + 3);
+ break;
+ default:
+ DBUG_ASSERT(0);
+ }
+}
+
+
+/*
+ Flush given buffer
+
+ SYNOPSIS
+ translog_buffer_flush()
+ buffer This buffer should be flushed
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+static my_bool translog_buffer_flush(struct st_translog_buffer *buffer)
+{
+ uint32 i;
+ DBUG_ENTER("translog_buffer_flush");
+ DBUG_PRINT("enter",
+ ("Buffer #%u 0x%lx: locked by 0x%lx (0x%lx), "
+ "file: %u, offset (%lu,0x%lx), size %lu",
+ (uint) buffer->buffer_no, (ulong) buffer,
+ (ulong) buffer->locked_by, my_thread_var,
+ (uint) buffer->file,
+ (ulong) buffer->offset.file_no, (ulong) buffer->offset.rec_offset,
+ (ulong) buffer->size));
+
+ DBUG_ASSERT(buffer->locked_by == my_thread_var);
+ DBUG_ASSERT(buffer->file != 0);
+
+ translog_wait_for_writers(buffer);
+ if (buffer->overlay && buffer->overlay->file)
+ {
+ struct st_translog_buffer *overlay= buffer->overlay;
+ translog_buffer_unlock(buffer);
+ translog_buffer_lock(overlay);
+ translog_wait_for_buffer_free(overlay);
+ translog_buffer_unlock(overlay);
+ translog_buffer_lock(buffer);
+ }
+
+ for (i= 0; i < buffer->size; i+= TRANSLOG_PAGE_SIZE)
+ {
+ PAGECACHE_FILE file=
+ {
+ buffer->file
+ };
+ if (pagecache_write(log_descriptor.pagecache,
+ &file,
+ (buffer->offset.rec_offset + i) / TRANSLOG_PAGE_SIZE,
+ 3,
+ buffer->buffer + i,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ PAGECACHE_PIN_LEFT_UNPINNED, PAGECACHE_WRITE_DONE, 0))
+ {
+ UNRECOVERABLE_ERROR(("Cant't write page (%lu,0x%lx) to pagecacte",
+ (ulong) buffer->file,
+ (ulong) (buffer->offset.rec_offset + i)));
+ }
+ }
+ if (my_pwrite(buffer->file, (char*) buffer->buffer,
+ buffer->size, buffer->offset.rec_offset,
+ MYF(MY_WME)) != buffer->size)
+ {
+ UNRECOVERABLE_ERROR(("Cant't buffer (%lu,0x%lx) size %lu to the disk (%d)",
+ (ulong) buffer->file,
+ (ulong) buffer->offset.rec_offset,
+ (ulong) buffer->size, errno));
+ DBUG_RETURN(1);
+ }
+ if (buffer->last_lsn.rec_offset != 0) /* if buffer->last_lsn is set */
+ translog_set_sent_to_file(&buffer->last_lsn);
+ /* Free buffer */
+ buffer->file= 0;
+ buffer->overlay= 0;
+ if (buffer->waiting_filling_buffer.last_thread != NULL)
+ {
+ wqueue_release_queue(&buffer->waiting_filling_buffer);
+ }
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Recover page with sector protection (wipe out failed chunks)
+
+ SYNOPSYS
+ translog_recover_page_up_to_sector()
+ page reference on the page
+ offset offset of failed sector
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+static my_bool translog_recover_page_up_to_sector(uchar *page, uint16 offset)
+{
+ uint16 chunk_offset= translog_get_first_chunk_offset(page), valid_chunk_end;
+ DBUG_ENTER("translog_recover_page_up_to_sector");
+ DBUG_PRINT("enter", ("offset %u, first chunk %u",
+ (uint) offset, (uint) chunk_offset));
+
+ if (chunk_offset == 0)
+ DBUG_RETURN(1);
+
+ while (page[chunk_offset] != '\0' && chunk_offset < offset)
+ {
+ uint16 chunk_length;
+ if ((chunk_length=
+ translog_get_total_chunk_length(page, chunk_offset)) == 0)
+ {
+ UNRECOVERABLE_ERROR(("cant get chunk length (offset %u)",
+ (uint) chunk_offset));
+ DBUG_RETURN(1);
+ }
+ DBUG_PRINT("info", ("chunk: offset: %u, length %u",
+ (uint) chunk_offset, (uint) chunk_length));
+ if (((ulong) chunk_offset) + ((ulong) chunk_length) > TRANSLOG_PAGE_SIZE)
+ {
+ UNRECOVERABLE_ERROR(("demaged chunk (offset %u) in trusted area",
+ (uint) chunk_offset));
+ DBUG_RETURN(1);
+ }
+ chunk_offset+= chunk_length;
+ }
+
+ valid_chunk_end= chunk_offset;
+ /*end of trusted area - sector parsing */
+ while (page[chunk_offset] != '\0')
+ {
+ uint16 chunk_length;
+ if ((chunk_length=
+ translog_get_total_chunk_length(page, chunk_offset)) == 0)
+ {
+ break;
+ }
+ DBUG_PRINT("info", ("chunk: offset: %u, length %u",
+ (uint) chunk_offset, (uint) chunk_length));
+ if (((ulong) chunk_offset) + ((ulong) chunk_length) > (uint) (offset + 512))
+ {
+ break;
+ }
+ chunk_offset+= chunk_length;
+ valid_chunk_end= chunk_offset;
+ }
+ DBUG_PRINT("info", ("valid chunk end offset: %u", (uint) valid_chunk_end));
+
+ bzero(page + valid_chunk_end, TRANSLOG_PAGE_SIZE - valid_chunk_end);
+
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Log page validator
+
+ SYNOPSIS
+ translog_page_validator()
+ page_addr The page to check
+ data data, need for validation (address in this case)
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+static my_bool translog_page_validator(byte *page_addr, gptr data)
+{
+ uint8 flags;
+ uchar *page= (uchar*) page_addr;
+ DBUG_ENTER("translog_page_validator");
+ TRANSLOG_ADDRESS *addr= ((TRANSLOG_VALIDATOR_DATA*) data)->addr;
+
+ ((TRANSLOG_VALIDATOR_DATA*) data)->was_recovered= 0;
+
+ if (uint3korr(page) != addr->rec_offset / TRANSLOG_PAGE_SIZE ||
+ uint3korr(page + 3) != addr->file_no)
+ {
+ UNRECOVERABLE_ERROR(("Page (%lu,0x%lx): "
+ "page address written in the page is incorrect :"
+ "File %lu instead of %lu or page %lu instead of %lu",
+ (ulong) addr->file_no, (ulong) addr->rec_offset,
+ (ulong) uint3korr(page + 3), (ulong) addr->file_no,
+ (ulong) uint3korr(page),
+ (ulong) addr->rec_offset / TRANSLOG_PAGE_SIZE));
+ DBUG_RETURN(1);
+ }
+ flags= page[3 + 3];
+ if (flags & ~(TRANSLOG_PAGE_CRC | TRANSLOG_SECTOR_PROTECTION |
+ TRANSLOG_RECORD_CRC))
+ {
+ UNRECOVERABLE_ERROR(("Page (%lu,0x%lx): "
+ "Garbage in the page flags field detected : %x",
+ (ulong) addr->file_no, (ulong) addr->rec_offset,
+ (uint) flags));
+ DBUG_RETURN(1);
+ }
+ if (flags & TRANSLOG_PAGE_CRC)
+ {
+ uint32 crc= translog_adler_crc(page + log_descriptor.page_overhead,
+ TRANSLOG_PAGE_SIZE -
+ log_descriptor.page_overhead);
+ if (crc != uint4korr(page + 3 + 3 + 1))
+ {
+ UNRECOVERABLE_ERROR(("Page (%lu,0x%lx): "
+ "CRC mismatch: calculated: %lx on the page %lx",
+ (ulong) addr->file_no, (ulong) addr->rec_offset,
+ (ulong) crc, (ulong) uint4korr(page + 3 + 3 + 1)));
+ DBUG_RETURN(1);
+ }
+ }
+ if (flags & TRANSLOG_SECTOR_PROTECTION)
+ {
+ uint i, offset;
+ uchar *table= (page + 3 + 3 + 1 + ((flags & TRANSLOG_PAGE_CRC) ? 4 : 0));
+ uint16 current= uint2korr(table);
+ for (i= 2, offset= 512;
+ i < (TRANSLOG_PAGE_SIZE / 512) * 2; i+= 2, offset+= 512)
+ {
+ /*
+ TODO: add cunk counting for "suspecting" sectors (difference is
+ more that 1-2)
+ */
+ uint16 test= uint2korr(page + offset);
+ DBUG_PRINT("info", ("sector #%u offset %u current %lx "
+ "read 0x%lx stored 0x%x%x",
+ i / 2, offset, current,
+ (uint) uint2korr(page + offset), (uint) table[i],
+ (uint) table[i + 1]));
+ if (test < current)
+ {
+ if (0xFFFFLL - current + test > 512 / 3)
+ {
+ /* it is not overflow */
+ if (translog_recover_page_up_to_sector(page, offset))
+ DBUG_RETURN(1);
+ ((TRANSLOG_VALIDATOR_DATA*) data)->was_recovered= 1;
+ DBUG_RETURN(0);
+ }
+ }
+ else if (test - current > 512 / 3)
+ {
+ if (translog_recover_page_up_to_sector(page, offset))
+ DBUG_RETURN(1);
+ ((TRANSLOG_VALIDATOR_DATA*) data)->was_recovered= 1;
+ DBUG_RETURN(0);
+ }
+
+ /* Return value on the page */
+ page[offset]= table[i];
+ page[offset + 1]= table[i + 1];
+ /**((uint16*)page + offset)= *((uint16*)(table + i));*/
+
+ current= test;
+ DBUG_PRINT("info", ("sector #%u offset %u current %lx "
+ "read 0x%lx stored 0x%x%x",
+ i / 2, offset, current,
+ (uint) uint2korr(page + offset), (uint) table[i],
+ (uint) table[i + 1]));
+ }
+ }
+ DBUG_RETURN(0);
+}
+
+/*
+ Get log page by file number and offset of the beginning of the page
+
+ SYNOPSIS
+ translog_get_page()
+ data validator data, which contains the page address
+ buffer buffer for page placing
+ (might not be used in some cache implementations)
+
+ RETURN
+ pointer to the page cache which should be used to read this page
+ NULL - Error
+*/
+
+uchar *translog_get_page(TRANSLOG_VALIDATOR_DATA *data, uchar *buffer)
+{
+ uint cache_index;
+ DBUG_ENTER("translog_get_page");
+ DBUG_PRINT("enter", ("File %lu, Offset %lu(0x%lx)",
+ (ulong) data->addr->file_no,
+ (ulong) data->addr->rec_offset,
+ (ulong) data->addr->rec_offset));
+
+ /* it is really page address */
+ DBUG_ASSERT(data->addr->rec_offset % TRANSLOG_PAGE_SIZE == 0);
+
+ if ((cache_index= log_descriptor.horizon.file_no - data->addr->file_no) <
+ OPENED_FILES_NUM)
+ {
+ PAGECACHE_FILE file;
+ /* file in the cache */
+ if (log_descriptor.log_file_num[cache_index] == 0)
+ {
+ if ((log_descriptor.log_file_num[cache_index]=
+ open_logfile_by_number_no_cache(data->addr->file_no)) == 0)
+ {
+ DBUG_RETURN(NULL);
+ }
+ }
+ file.file= log_descriptor.log_file_num[cache_index];
+
+ buffer= (uchar*)
+ pagecache_valid_read(log_descriptor.pagecache, &file,
+ data->addr->rec_offset / TRANSLOG_PAGE_SIZE,
+ 3, (char*) buffer,
+ PAGECACHE_PLAIN_PAGE,
+ PAGECACHE_LOCK_LEFT_UNLOCKED, 0,
+ &translog_page_validator, (gptr) data);
+ }
+ else
+ {
+ File file= open_logfile_by_number_no_cache(data->addr->file_no);
+ if (my_pread(file, (char*) buffer, TRANSLOG_PAGE_SIZE,
+ data->addr->rec_offset, MYF(MY_FNABP | MY_WME)))
+ buffer= NULL;
+ else if (translog_page_validator((byte*) buffer, (gptr) data))
+ buffer= NULL;
+ my_close(file, MYF(MY_WME));
+ }
+ DBUG_RETURN(buffer);
+}
+
+
+/*
+ Finds last page of the given log file
+
+ SYNOPSIS
+ translog_get_last_page_addr()
+ addr address structure to fill with data, which contain
+ file number of the log file
+ last_page_ok assigned 1 if last page was OK
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+static my_bool translog_get_last_page_addr(TRANSLOG_ADDRESS *addr,
+ my_bool *last_page_ok)
+{
+ MY_STAT stat_buff, *stat;
+ char path[FN_REFLEN];
+ DBUG_ENTER("translog_get_last_page_addr");
+
+ if ((stat= my_stat (translog_filename_by_fileno(addr->file_no,
+ path),
+ &stat_buff, MYF(MY_WME))) == NULL)
+ DBUG_RETURN(1);
+ DBUG_PRINT("info", ("File size %lu", (ulong) stat->st_size));
+ if (stat->st_size > TRANSLOG_PAGE_SIZE)
+ {
+ addr->rec_offset= (((stat->st_size / TRANSLOG_PAGE_SIZE) - 1) *
+ TRANSLOG_PAGE_SIZE);
+ *last_page_ok= (stat->st_size == addr->rec_offset + TRANSLOG_PAGE_SIZE);
+ }
+ else
+ {
+ *last_page_ok= 0;
+ addr->rec_offset= 0;
+ }
+ DBUG_PRINT("info", ("Last page: 0x%lx, ok %d", (ulong) addr->rec_offset,
+ *last_page_ok));
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Get number bytes for record length storing
+
+ SYNOPSIS
+ translog_variable_record_length_bytes()
+ length Record length wich will be codded
+
+ RETURN
+ 1,3,4,5 - number of bytes to store given length
+*/
+static uint translog_variable_record_length_bytes(translog_size_t length)
+{
+ if (length < 250)
+ return 1;
+ else if (length < 0xFFFF)
+ return 3;
+ else if (length < 0xFFFFFF)
+ return 4;
+ return 5;
+}
+
+
+/*
+ Get header of this chunk
+
+ SYNOPSIS
+ translog_get_chunk_header_length()
+ page The page where chunk placed
+ offset Offset of the chunk on this place
+
+ RETURN
+ total length of the chunk
+ 0 - Error
+*/
+
+uint16 translog_get_chunk_header_length(uchar *page, uint16 offset)
+{
+ DBUG_ENTER("translog_get_chunk_header_length");
+ switch (page[offset] & TRANSLOG_CHUNK_TYPE) {
+ case TRANSLOG_CHUNK_LSN: /* 0 chunk referred as LSN
+ (head or tail) */
+ {
+ translog_size_t rec_len;
+ uchar *start= page + offset;
+ uchar *ptr= start + 1 + 2;
+ uint16 chunk_len, header_len;
+ DBUG_PRINT("info", ("TRANSLOG_CHUNK_LSN"));
+ rec_len= translog_variable_record_1group_decode_len(&ptr);
+ chunk_len= uint2korr(ptr);
+ header_len= (ptr -start) +2;
+ DBUG_PRINT("info", ("rec len: %lu, chunk len: %u, header len: %u",
+ (ulong) rec_len, (uint) chunk_len, (uint) header_len));
+ if (chunk_len)
+ {
+ /*TODO: fine header end */
+ DBUG_ASSERT(0);
+ }
+ DBUG_RETURN(header_len);
+ break;
+ }
+ case TRANSLOG_CHUNK_FIXED: /* 1 (pseudo)fixed record (also
+ LSN) */
+ {
+ DBUG_PRINT("info", ("TRANSLOG_CHUNK_FIXED = 3"));
+ DBUG_RETURN(3);
+ }
+ case TRANSLOG_CHUNK_NOHDR: /* 2 no header chunk (till page
+ end) */
+ DBUG_PRINT("info", ("TRANSLOG_CHUNK_NOHDR = 1"));
+ DBUG_RETURN(1);
+ break;
+ case TRANSLOG_CHUNK_LNGTH: /* 3 chunk with chunk length */
+ DBUG_PRINT("info", ("TRANSLOG_CHUNK_LNGTH = 3"));
+ DBUG_RETURN(3);
+ break;
+ default:
+ DBUG_ASSERT(0);
+ }
+}
+
+
+/*
+ Initialize transaction log
+
+ SYNOPSIS
+ translog_init()
+ directory Directory where log files are put
+ log_file_max_size max size of one log size (for new logs creation)
+ server_version version of MySQL server (MYSQL_VERSION_ID)
+ server_id server ID (replication & Co)
+ pagecache Page cache for the log reads
+ flags flags (TRANSLOG_PAGE_CRC, TRANSLOG_SECTOR_PROTECTION
+ TRANSLOG_RECORD_CRC)
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+my_bool translog_init(const char *directory,
+ uint32 log_file_max_size,
+ uint32 server_version,
+ uint32 server_id, PAGECACHE *pagecache, uint flags)
+{
+ int i;
+ int old_log_was_recovered= 0, logs_found= 0;
+ TRANSLOG_ADDRESS sure_page, last_page, last_valid_page;
+ DBUG_ENTER("translog_init");
+
+
+ if (pthread_mutex_init(&log_descriptor.sent_to_file_lock, MY_MUTEX_INIT_FAST))
+ DBUG_RETURN(1);
+
+ /* Directory to store files */
+ unpack_dirname(log_descriptor.directory, directory);
+
+ if ((log_descriptor.directory_fd= my_open(log_descriptor.directory,
+ O_RDONLY, MYF(MY_WME))) < 0)
+ {
+ UNRECOVERABLE_ERROR(("Error %d during opening directory '%s'",
+ errno, log_descriptor.directory));
+ DBUG_RETURN(1);
+ }
+
+ /* max size of one log size (for new logs creation) */
+ log_descriptor.log_file_max_size=
+ log_file_max_size - (log_file_max_size % TRANSLOG_PAGE_SIZE);
+ /* server version */
+ log_descriptor.server_version= server_version;
+ /* server ID */
+ log_descriptor.server_id= server_id;
+ /* Page cache for the log reads */
+ log_descriptor.pagecache= pagecache;
+ /* Flags */
+ DBUG_ASSERT((flags &
+ ~(TRANSLOG_PAGE_CRC | TRANSLOG_SECTOR_PROTECTION |
+ TRANSLOG_RECORD_CRC)) == 0);
+ log_descriptor.flags= flags;
+ log_descriptor.page_overhead= 7;
+ if (flags & TRANSLOG_PAGE_CRC)
+ log_descriptor.page_overhead+= 4;
+ if (flags & TRANSLOG_SECTOR_PROTECTION)
+ log_descriptor.page_overhead+= (TRANSLOG_PAGE_SIZE / 512) * 2;
+ log_descriptor.page_capacity_chunk_2=
+ TRANSLOG_PAGE_SIZE - log_descriptor.page_overhead - 1;
+ DBUG_ASSERT(TRANSLOG_WRITE_BUFFER % TRANSLOG_PAGE_SIZE == 0);
+ log_descriptor.buffer_capacity_chunk_2=
+ (TRANSLOG_WRITE_BUFFER / TRANSLOG_PAGE_SIZE) *
+ log_descriptor.page_capacity_chunk_2;
+ log_descriptor.half_buffer_capacity_chunk_2=
+ log_descriptor.buffer_capacity_chunk_2 / 2;
+ DBUG_PRINT("info",
+ ("Overhead: %u, pc2: %u, bc2: %u, bc2/2: %u",
+ log_descriptor.page_overhead,
+ log_descriptor.page_capacity_chunk_2,
+ log_descriptor.buffer_capacity_chunk_2,
+ log_descriptor.half_buffer_capacity_chunk_2));
+
+ /* *** Current state of the log handler *** */
+
+ /* Init log handler file handlers cache */
+ for (i= 0; i < OPENED_FILES_NUM; i++)
+ {
+ log_descriptor.log_file_num[i]= 0;
+ }
+
+ /* just to init it somehow */
+ translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);
+
+ /* Buffers for log writing */
+ for (i= 0; i < TRANSLOG_BUFFERS_NO; i++)
+ {
+#ifndef DBUG_OFF
+ log_descriptor.buffers[i].buffer_no= (uint8) i;
+ log_descriptor.buffers[i].locked_by= NULL;
+#endif
+ if (translog_buffer_init(log_descriptor.buffers + i))
+ DBUG_RETURN(1);
+ }
+
+ logs_found= (last_logno != CONTROL_FILE_IMPOSSIBLE_FILENO);
+
+ if (logs_found)
+ {
+ my_bool pageok;
+ /*
+ TODO: scan directory for maria_log.XXXXXXXX files and find
+ highest XXXXXXXX & set logs_found
+ */
+
+ /* TODO: check that last checkpoint within present log addresses space */
+ /* find the log end */
+ if (last_checkpoint_lsn.file_no == CONTROL_FILE_IMPOSSIBLE_FILENO)
+ {
+ DBUG_ASSERT(last_checkpoint_lsn.rec_offset == 0);
+ /* there was no checkpoints we will read from the beginning */
+ sure_page.file_no= 1;
+ sure_page.rec_offset= TRANSLOG_PAGE_SIZE;
+ }
+ else
+ {
+ sure_page= last_checkpoint_lsn;
+ DBUG_ASSERT(sure_page.rec_offset % TRANSLOG_PAGE_SIZE != 0);
+ sure_page.rec_offset-= sure_page.rec_offset % TRANSLOG_PAGE_SIZE;
+ }
+ log_descriptor.horizon.file_no= last_page.file_no= last_logno;
+ if (translog_get_last_page_addr(&last_page, &pageok))
+ DBUG_RETURN(1);
+ if (last_page.rec_offset == 0)
+ {
+ if (last_page.file_no == 1)
+ {
+ logs_found= 0; /* file #1 has no pages */
+ }
+ else
+ {
+ last_page.file_no--;
+ if (translog_get_last_page_addr(&last_page, &pageok))
+ DBUG_RETURN(1);
+ }
+ }
+ }
+ if (logs_found)
+ {
+ TRANSLOG_ADDRESS current_page= sure_page;
+ my_bool pageok;
+
+ DBUG_ASSERT(sure_page.file_no < last_page.file_no ||
+ (sure_page.file_no == last_page.file_no &&
+ sure_page.rec_offset <= last_page.rec_offset));
+
+ /* TODO: check page size */
+
+ last_valid_page.file_no= CONTROL_FILE_IMPOSSIBLE_FILENO;
+ last_valid_page.rec_offset= 0;
+ /* scan and validate pages */
+ do
+ {
+ TRANSLOG_ADDRESS current_file_last_page;
+ current_file_last_page.file_no= current_page.file_no;
+ if (translog_get_last_page_addr(&current_file_last_page, &pageok))
+ DBUG_RETURN(1);
+ if (!pageok)
+ {
+ DBUG_PRINT("error", ("File %u have no complete last page",
+ (uint) current_file_last_page.file_no));
+ old_log_was_recovered= 1;
+ /* This file is not written till the end so it should be last */
+ last_page= current_file_last_page;
+ /* TODO: issue warning */
+ }
+ do
+ {
+ TRANSLOG_VALIDATOR_DATA data=
+ {
+ &current_page, 0
+ };
+ uchar buffer[TRANSLOG_PAGE_SIZE], *page;
+ if ((page= translog_get_page(&data, buffer)) == NULL)
+ DBUG_RETURN(1);
+ if (data.was_recovered)
+ {
+ DBUG_PRINT("error", ("file no %u (%d), rec_offset 0x%lx (%lu) (%d)",
+ (uint) current_page.file_no,
+ (uint3korr(page + 3) != current_page.file_no),
+ (ulong) current_page.rec_offset,
+ (ulong) (current_page.rec_offset /
+ TRANSLOG_PAGE_SIZE),
+ (uint3korr(page) !=
+ current_page.rec_offset / TRANSLOG_PAGE_SIZE)));
+ old_log_was_recovered= 1;
+ break;
+ }
+ last_valid_page= current_page;
+ current_page.rec_offset+= TRANSLOG_PAGE_SIZE;
+ } while (current_page.rec_offset <= current_file_last_page.rec_offset);
+ current_page.file_no++;
+ current_page.rec_offset= TRANSLOG_PAGE_SIZE;
+ } while (current_page.file_no <= last_page.file_no &&
+ !old_log_was_recovered);
+ if (last_valid_page.file_no == CONTROL_FILE_IMPOSSIBLE_FILENO)
+ {
+ DBUG_ASSERT(last_valid_page.rec_offset == 0);
+
+ /* Panic!!! Even page which should be valid is invalid */
+ /* TODO: issue error */
+ DBUG_RETURN(1);
+ }
+ DBUG_PRINT("info", ("Last valid page is in file %lu offset %lu (0x%lx), "
+ "Logs found: %d, was recovered: %d",
+ (ulong) last_valid_page.file_no,
+ (ulong) last_valid_page.rec_offset,
+ (ulong) last_valid_page.rec_offset,
+ logs_found, old_log_was_recovered));
+
+ /* TODO: check server ID */
+ if (logs_found && !old_log_was_recovered)
+ {
+ TRANSLOG_VALIDATOR_DATA data=
+ {
+ &last_valid_page, 0
+ };
+ uchar buffer[TRANSLOG_PAGE_SIZE], *page;
+ uint16 chunk_offset;
+ /* continue old log */
+ DBUG_ASSERT(last_valid_page.file_no == log_descriptor.horizon.file_no);
+ if ((page= translog_get_page(&data,
+ buffer)) == NULL ||
+ (chunk_offset= translog_get_first_chunk_offset(page)) == 0)
+ DBUG_RETURN(1);
+
+ /* Puts filled part of old page in the buffer */
+ log_descriptor.horizon= last_valid_page;
+ translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);
+ /*
+ Free space if filled with 0 and first byte of
+ real chunk can't be 0
+ */
+ while (chunk_offset < TRANSLOG_PAGE_SIZE && page[chunk_offset] != '\0')
+ {
+ uint16 chunk_length;
+ if ((chunk_length=
+ translog_get_total_chunk_length(page, chunk_offset)) == 0)
+ DBUG_RETURN(1);
+ DBUG_PRINT("info", ("chunk: offset: %u, length %u",
+ (uint) chunk_offset, (uint) chunk_length));
+ chunk_offset+= chunk_length;
+
+ /* chunk can't cross the page border */
+ DBUG_ASSERT(chunk_offset <= TRANSLOG_PAGE_SIZE);
+ }
+ memmove(log_descriptor.buffers->buffer, page, chunk_offset);
+ log_descriptor.bc.buffer->size+= chunk_offset;
+ log_descriptor.bc.ptr+= chunk_offset;
+ log_descriptor.bc.current_page_size= chunk_offset;
+ log_descriptor.horizon.rec_offset=
+ chunk_offset + last_valid_page.rec_offset;
+ DBUG_PRINT("info", ("Move Page #%u: 0x%lx, chaser: %d, Size: %lu (%lu)",
+ (uint) log_descriptor.bc.buffer_no,
+ (ulong) log_descriptor.bc.buffer,
+ log_descriptor.bc.chaser,
+ (ulong) log_descriptor.bc.buffer->size,
+ (ulong) (log_descriptor.bc.ptr -log_descriptor.bc.
+ buffer->buffer)));
+ DBUG_ASSERT(log_descriptor.bc.chaser
+ ||
+ ((ulong)
+ (log_descriptor.bc.ptr -log_descriptor.bc.buffer->buffer) ==
+ log_descriptor.bc.buffer->size));
+ DBUG_ASSERT(log_descriptor.bc.buffer->buffer_no ==
+ log_descriptor.bc.buffer_no);
+ DBUG_ASSERT(log_descriptor.bc.current_page_size <= TRANSLOG_PAGE_SIZE);
+ }
+ }
+ DBUG_PRINT("info", ("Logs found: %d, was recovered %d",
+ logs_found, old_log_was_recovered));
+ if (!logs_found)
+ {
+ /* Start new log system from scratch */
+ /* Current log number */
+ log_descriptor.horizon.file_no= 1;
+ /* Used space */
+ log_descriptor.horizon.rec_offset= TRANSLOG_PAGE_SIZE; // header page
+ /* Current logs file number in page cache */
+ log_descriptor.log_file_num[0]=
+ open_logfile_by_number_no_cache(log_descriptor.horizon.file_no);
+ if (translog_write_file_header())
+ DBUG_RETURN(1);
+ if (ma_control_file_write_and_force(NULL, log_descriptor.horizon.file_no,
+ CONTROL_FILE_UPDATE_ONLY_LOGNO))
+ DBUG_RETURN(1);
+ /* assign buffer 0 */
+ translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);
+ translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc);
+ }
+ else if (old_log_was_recovered)
+ {
+ int buffer_touched= log_descriptor.bc.buffer->file;
+ if (buffer_touched)
+ {
+ struct st_translog_buffer *buffer= log_descriptor.bc.buffer;
+ /*
+ We are in initialization so we can use translog_buffer_lock instead
+ of translog_lock, because there is no other threads which can lock
+ the loghandler.
+ */
+ if (translog_buffer_lock(buffer) ||
+ translog_buffer_next(&log_descriptor.horizon, &log_descriptor.bc,
+ 1) ||
+ translog_buffer_unlock(log_descriptor.bc.buffer) ||
+ translog_buffer_flush(buffer) || translog_buffer_unlock(buffer))
+ DBUG_RETURN(1);
+ }
+ else
+ {
+ log_descriptor.horizon.file_no++; /* leave the demaged file
+ untouched */
+ log_descriptor.horizon.rec_offset= TRANSLOG_PAGE_SIZE; /* header page */
+ if (translog_create_new_file())
+ DBUG_RETURN(1);
+ /*
+ Buffer system left untouched after recovery => we should init it
+ (starting from buffer 0)
+ */
+ translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);
+ translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc);
+ }
+ }
+
+ /* all LSNs that are on disk are flushed */
+ log_descriptor.sent_to_file= log_descriptor.flushed= log_descriptor.horizon;
+ log_descriptor.flushed.rec_offset--;
+ log_descriptor.sent_to_file.rec_offset--;
+
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Free transaction log file buffer
+
+ SYNOPSIS
+ translog_buffer_destroy()
+ buffer_no The buffer to free
+
+ NOTE
+ This buffer should be locked;
+*/
+
+static void translog_buffer_destroy(struct st_translog_buffer *buffer)
+{
+ DBUG_ENTER("translog_buffer_destroy");
+ DBUG_PRINT("enter",
+ ("Buffer #%u: 0x%lx, file: %u, offset (%u,0x%lx), size %lu",
+ (uint) buffer->buffer_no, (ulong) buffer,
+ (uint) buffer->file,
+ (ulong) buffer->offset.file_no, (ulong) buffer->offset.rec_offset,
+ (ulong) buffer->size));
+ DBUG_ASSERT(buffer->waiting_filling_buffer.last_thread == 0);
+ if (buffer->file)
+ {
+ /*
+ We ignore error here, because we can't do something about it
+ (it is shutting down)
+ */
+ translog_buffer_flush(buffer);
+ }
+ DBUG_PRINT("info", ("Unlock mutex 0x%lx", (ulong) &buffer->mutex));
+ pthread_mutex_unlock(&buffer->mutex);
+ DBUG_PRINT("info", ("Destroy mutex 0x%lx", (ulong) &buffer->mutex));
+ pthread_mutex_destroy(&buffer->mutex);
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Free log handler resources
+
+ SYNOPSIS
+ translog_destroy()
+*/
+
+void translog_destroy()
+{
+ int i;
+ DBUG_ENTER("translog_destroy");
+ if (log_descriptor.bc.buffer->file != 0)
+ translog_finish_page(&log_descriptor.horizon, &log_descriptor.bc);
+
+ for (i= 0; i < TRANSLOG_BUFFERS_NO; i++)
+ {
+ struct st_translog_buffer *buffer= log_descriptor.buffers + i;
+ translog_buffer_lock(buffer);
+ translog_buffer_destroy(buffer);
+ }
+ /* close files */
+ for (i= 0; i < OPENED_FILES_NUM; i++)
+ {
+ if (log_descriptor.log_file_num[i])
+ translog_close_log_file(log_descriptor.log_file_num[i]);
+ }
+ pthread_mutex_destroy(&log_descriptor.sent_to_file_lock);
+ my_close(log_descriptor.directory_fd, MYF(MY_WME));
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Lock the loghandler
+
+ SYNOPSIS
+ translog_lock()
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+static my_bool translog_lock()
+{
+ struct st_translog_buffer *current_buffer;
+ DBUG_ENTER("translog_lock");
+
+ /*
+ locking the loghandler mean locking current buffer, but it can change
+ during locking, so we should check it
+ */
+ for (;;)
+ {
+ current_buffer= log_descriptor.bc.buffer;
+ if (translog_buffer_lock(current_buffer))
+ DBUG_RETURN(1);
+ if (log_descriptor.bc.buffer == current_buffer)
+ break;
+ translog_buffer_unlock(current_buffer);
+ }
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Unlock the loghandler
+
+ SYNOPSIS
+ translog_unlock()
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+#ifndef DBUG_OFF
+static my_bool translog_unlock()
+{
+ DBUG_ENTER("translog_unlock");
+ translog_buffer_unlock(log_descriptor.bc.buffer);
+
+ DBUG_RETURN(0);
+}
+#else
+#define translog_unlock() \
+ translog_buffer_unlock(log_descriptor.bc.buffer);
+#endif
+
+/*
+ Start new page
+
+ SYNOPSIS
+ translog_page_next()
+ horizon \ Position in file and buffer where we are
+ cursor /
+ prev_buffer Buffer which should be flushed will be assigned
+ here if it is need
+
+ NOTE
+ handler should be locked
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+static my_bool translog_page_next(TRANSLOG_ADDRESS *horizon,
+ struct st_buffer_cursor *cursor,
+ struct st_translog_buffer **prev_buffer)
+{
+ struct st_translog_buffer *buffer= cursor->buffer;
+ DBUG_ENTER("translog_page_next");
+
+ if ((cursor->ptr +TRANSLOG_PAGE_SIZE >
+ cursor->buffer->buffer + TRANSLOG_WRITE_BUFFER) ||
+ (horizon->rec_offset + TRANSLOG_PAGE_SIZE >
+ log_descriptor.log_file_max_size))
+ {
+ DBUG_PRINT("info", ("Switch to next buffer, Buffer Size %lu (%lu) => %d, "
+ "File size %lu max %lu => %d",
+ (ulong) cursor->buffer->size,
+ (ulong) (cursor->ptr -cursor->buffer->buffer),
+ (cursor->ptr +TRANSLOG_PAGE_SIZE >
+ cursor->buffer->buffer + TRANSLOG_WRITE_BUFFER),
+ (ulong) horizon->rec_offset,
+ (ulong) log_descriptor.log_file_max_size,
+ (horizon->rec_offset + TRANSLOG_PAGE_SIZE >
+ log_descriptor.log_file_max_size)));
+ if (translog_buffer_next(horizon, cursor,
+ (horizon->rec_offset +
+ TRANSLOG_PAGE_SIZE) >
+ log_descriptor.log_file_max_size))
+ DBUG_RETURN(1);
+ *prev_buffer= buffer;
+ DBUG_PRINT("info", ("Buffer #%u (0x%lu) have to be flushed",
+ (uint) buffer->buffer_no, (ulong) buffer));
+ }
+ else
+ {
+ DBUG_PRINT("info", ("Use the same buffer #%u (0x%lu), "
+ "Buffer Size %lu (%lu)",
+ (uint) buffer->buffer_no,
+ (ulong) buffer,
+ (ulong) cursor->buffer->size,
+ (ulong) (cursor->ptr -cursor->buffer->buffer)));
+ translog_finish_page(horizon, cursor);
+ translog_new_page_header(horizon, cursor);
+ *prev_buffer= NULL;
+ }
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Write data of given length to the current page
+
+ SYNOPSIS
+ translog_write_data_on_page()
+ horizon \ Pointers on file and buffer
+ cursor /
+ length IN length of the chunk
+ buffer buffer with data
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+my_bool translog_write_data_on_page(TRANSLOG_ADDRESS *horizon,
+ struct st_buffer_cursor *cursor,
+ translog_size_t length, uchar *buffer)
+{
+ DBUG_ENTER("translog_write_data_on_page");
+ DBUG_PRINT("enter", ("Chunk length: %lu Page size %u",
+ (ulong) length, (uint) cursor->current_page_size));
+ DBUG_ASSERT(length > 0);
+ DBUG_ASSERT(length + cursor->current_page_size <= TRANSLOG_PAGE_SIZE);
+ DBUG_ASSERT(length + cursor->ptr <=cursor->buffer->buffer +
+ TRANSLOG_WRITE_BUFFER);
+
+ memmove(cursor->ptr, buffer, length);
+ cursor->ptr+= length;
+ horizon->rec_offset+= length;
+ cursor->current_page_size+= length;
+ if (!cursor->chaser)
+ cursor->buffer->size+= length;
+ DBUG_PRINT("info", ("Write data buffer #%u: 0x%lx,"
+ "chaser: %d, Size: %lu (%lu)",
+ (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer,
+ cursor->chaser, (ulong) cursor->buffer->size,
+ (ulong) (cursor->ptr -cursor->buffer->buffer)));
+ DBUG_ASSERT(cursor->chaser ||
+ ((ulong) (cursor->ptr -cursor->buffer->buffer) ==
+ cursor->buffer->size));
+ DBUG_ASSERT(cursor->buffer->buffer_no == cursor->buffer_no);
+ DBUG_ASSERT(cursor->current_page_size <= TRANSLOG_PAGE_SIZE);
+
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Write data from parts of given length to the current page
+
+ SYNOPSIS
+ translog_write_parts_on_page()
+ horizon \ Pointers on file and buffer
+ cursor /
+ length IN length of the chunk
+ parts IN/OUT chunk source
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+my_bool translog_write_parts_on_page(TRANSLOG_ADDRESS *horizon,
+ struct st_buffer_cursor *cursor,
+ translog_size_t length,
+ struct st_translog_parts *parts)
+{
+ translog_size_t left= length;
+ uint cur= (uint) parts->current;
+ DBUG_ENTER("translog_write_parts_on_page");
+ DBUG_PRINT("enter", ("Chunk length: %lu, parts %u of %u. Page size %u, "
+ "Buffer size: %lu (%lu)",
+ (ulong) length,
+ (uint) (cur + 1), (uint) parts->parts.elements,
+ (uint) cursor->current_page_size,
+ (ulong) cursor->buffer->size,
+ (ulong) (cursor->ptr -cursor->buffer->buffer)));
+ DBUG_ASSERT(length > 0);
+ DBUG_ASSERT(length + cursor->current_page_size <= TRANSLOG_PAGE_SIZE);
+ DBUG_ASSERT(length + cursor->ptr <=cursor->buffer->buffer +
+ TRANSLOG_WRITE_BUFFER);
+
+ do
+ {
+ translog_size_t len;
+ struct st_translog_part part;
+ uchar *buff;
+
+ DBUG_ASSERT(cur < parts->parts.elements);
+ get_dynamic(&parts->parts, (gptr) &part, cur);
+ buff= part.buff;
+ DBUG_PRINT("info", ("Part %u, Length: %lu, left: %lu",
+ (uint) (cur + 1), (ulong) part.len, (ulong) left));
+
+ if (part.len > left)
+ {
+ /* we should write less then the current part */
+ len= left;
+ part.len-= len;
+ part.buff+= len;
+ if (set_dynamic(&parts->parts, (gptr) &part, cur))
+ DBUG_RETURN(1);
+ DBUG_PRINT("info", ("Set new part %u, Length: %lu",
+ (uint) (cur + 1), (ulong) part.len));
+ }
+ else
+ {
+ len= part.len;
+ cur++;
+ DBUG_PRINT("info", ("moved to next part (len: %lu)", (ulong) len));
+ }
+ DBUG_PRINT("info", ("copy: 0x%lx <- 0x%lx %u",
+ (ulong) cursor->ptr, (ulong)buff, (uint)len));
+ memmove(cursor->ptr, buff, len);
+ left-= len;
+ cursor->ptr+= len;
+ } while (left);
+
+ parts->current= cur;
+ horizon->rec_offset+= length;
+ cursor->current_page_size+= length;
+ if (!cursor->chaser)
+ cursor->buffer->size+= length;
+ DBUG_PRINT("info", ("Write parts buffer #%u: 0x%lx, "
+ "chaser: %d, Size: %lu (%lu)",
+ (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer,
+ cursor->chaser, (ulong) cursor->buffer->size,
+ (ulong) (cursor->ptr -cursor->buffer->buffer)));
+ DBUG_ASSERT(cursor->chaser ||
+ ((ulong) (cursor->ptr -cursor->buffer->buffer) ==
+ cursor->buffer->size));
+ DBUG_ASSERT(cursor->buffer->buffer_no == cursor->buffer_no);
+ DBUG_ASSERT((cursor->ptr -cursor->buffer->buffer) %TRANSLOG_PAGE_SIZE ==
+ cursor->current_page_size % TRANSLOG_PAGE_SIZE);
+ DBUG_ASSERT(cursor->current_page_size <= TRANSLOG_PAGE_SIZE);
+
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Put 1 group chunk type 0 header into parts array
+
+ SYNOPSIS
+ translog_write_variable_record_1group_header()
+ parts Descriptor of record source parts
+ type the log record type
+ short_trid Sort transaction ID or 0 if it has no sense
+ header_length Calculated header length of chunk type 0
+ chunk0_header Buffer for the chunk header writing
+*/
+
+static void
+translog_write_variable_record_1group_header(struct st_translog_parts *parts,
+ enum translog_record_type type,
+ SHORT_TRANSACTION_ID short_trid,
+ uint16 header_length,
+ uchar *chunk0_header)
+{
+ struct st_translog_part part;
+ DBUG_ASSERT(parts->current != 0); /* first part is left for
+ header */
+ parts->total_record_length+= (part.len= header_length);
+ part.buff= chunk0_header;
+ *chunk0_header= (uchar) (type |TRANSLOG_CHUNK_LSN);
+ int2store(chunk0_header + 1, short_trid);
+ translog_write_variable_record_1group_code_len(chunk0_header + 3,
+ parts->record_length,
+ header_length);
+ int2store(chunk0_header + header_length - 2, 0);
+ parts->current--;
+ set_dynamic(&parts->parts, (gptr) &part, parts->current);
+}
+
+
+/*
+ Increase number of writers for this buffer
+
+ SYNOPSIS
+ translog_buffer_increase_writers()
+ buffer target buffer
+*/
+
+#ifndef DBUG_OFF
+static void translog_buffer_increase_writers(struct st_translog_buffer *buffer)
+{
+ DBUG_ENTER("translog_buffer_increase_writers");
+ buffer->copy_to_buffer_in_progress++;
+ DBUG_PRINT("info", ("copy_to_buffer_in_progress, buffer #%u 0x%lx: %d",
+ (uint) buffer->buffer_no, (ulong) buffer,
+ buffer->copy_to_buffer_in_progress));
+ DBUG_VOID_RETURN;
+}
+#else
+#define translog_buffer_increase_writers(B) \
+ (B)->copy_to_buffer_in_progress++;
+#endif
+
+
+/*
+ Decrease number of writers for this buffer
+
+ SYNOPSIS
+ translog_buffer_decrease_writers()
+ buffer target buffer
+*/
+
+
+static void translog_buffer_decrease_writers(struct st_translog_buffer *buffer)
+{
+ DBUG_ENTER("translog_buffer_decrease_writers");
+ buffer->copy_to_buffer_in_progress--;
+ DBUG_PRINT("info", ("copy_to_buffer_in_progress, buffer #%u 0x%lx: %d",
+ (uint) buffer->buffer_no, (ulong) buffer,
+ buffer->copy_to_buffer_in_progress));
+ if (buffer->copy_to_buffer_in_progress == 0 &&
+ buffer->waiting_filling_buffer.last_thread != NULL)
+ {
+ wqueue_release_queue(&buffer->waiting_filling_buffer);
+ }
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Put chunk 2 from new page beginning
+
+ SYNOPSIS
+ translog_write_variable_record_chunk2_page()
+ parts Descriptor of record source parts
+ horizon \ Pointers on file position and buffer
+ cursor /
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+static my_bool
+translog_write_variable_record_chunk2_page(struct st_translog_parts *parts,
+ TRANSLOG_ADDRESS *horizon,
+ struct st_buffer_cursor *cursor)
+{
+ struct st_translog_buffer *buffer_to_flush= 0;
+ int rc;
+ uchar chunk2_header[1]=
+ {
+ TRANSLOG_CHUNK_NOHDR
+ };
+
+ DBUG_ENTER("translog_write_variable_record_chunk2_page");
+
+ rc= translog_page_next(horizon, cursor, &buffer_to_flush);
+ if (buffer_to_flush != NULL)
+ {
+ rc|= translog_buffer_lock(buffer_to_flush);
+ translog_buffer_decrease_writers(buffer_to_flush);
+ if (!rc)
+ rc= translog_buffer_flush(buffer_to_flush);
+ rc|= translog_buffer_unlock(buffer_to_flush);
+ }
+ if (rc)
+ DBUG_RETURN(1);
+
+ translog_write_data_on_page(horizon, cursor, 1, chunk2_header);
+ translog_write_parts_on_page(horizon, cursor,
+ log_descriptor.page_capacity_chunk_2, parts);
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Put chunk 3 of requested length in the buffer from new page beginning
+
+ SYNOPSIS
+ translog_write_variable_record_chunk3_page()
+ parts Descriptor of record source parts
+ length Length of this chunk
+ horizon \ Pointers on file position and buffer
+ cursor /
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+static my_bool
+translog_write_variable_record_chunk3_page(struct st_translog_parts *parts,
+ uint16 length,
+ TRANSLOG_ADDRESS *horizon,
+ struct st_buffer_cursor *cursor)
+{
+ struct st_translog_buffer *buffer_to_flush= 0;
+ struct st_translog_part part;
+ int rc;
+ uchar chunk3_header[1 + 2];
+
+ DBUG_ENTER("translog_write_variable_record_chunk3_page");
+
+ rc= translog_page_next(horizon, cursor, &buffer_to_flush);
+ if (buffer_to_flush != NULL)
+ {
+ rc|= translog_buffer_lock(buffer_to_flush);
+ translog_buffer_decrease_writers(buffer_to_flush);
+ if (!rc)
+ rc= translog_buffer_flush(buffer_to_flush);
+ rc|= translog_buffer_unlock(buffer_to_flush);
+ }
+ if (rc)
+ DBUG_RETURN(1);
+ if (length == 0)
+ {
+ /* It was call to write page header only (no data for chunk 3) */
+ DBUG_PRINT("info", ("It is a call to make page header only"));
+ DBUG_RETURN(0);
+ }
+
+ DBUG_ASSERT(parts->current != 0); /* first part is left for
+ header */
+ parts->total_record_length+= (part.len= 1 + 2);
+ part.buff= chunk3_header;
+ *chunk3_header= (uchar) (TRANSLOG_CHUNK_LNGTH);
+ int2store(chunk3_header + 1, length);
+ parts->current--;
+ set_dynamic(&parts->parts, (gptr) &part, parts->current);
+
+ translog_write_parts_on_page(horizon, cursor, length + 1 + 2, parts);
+ DBUG_RETURN(0);
+}
+
+/*
+ Move log pointer (horizon) on given number pages starting from next page,
+ and given offset on the last page
+
+ SYNOPSIS
+ translog_advance_pointer()
+ pages Number of full pages starting from the next one
+ last_page_data Plus this data on the last page
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+static my_bool translog_advance_pointer(uint pages, uint16 last_page_data)
+{
+ translog_size_t last_page_offset=
+ log_descriptor.page_overhead + last_page_data;
+ translog_size_t offset= TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_size /* next
+ page
+ */ +
+ pages * TRANSLOG_PAGE_SIZE + last_page_offset;
+ translog_size_t buffer_end_offset, file_end_offset, min_offset;
+ DBUG_ENTER("translog_advance_pointer");
+ DBUG_PRINT("enter", ("Pointer: (%u, 0x%lx) + %u + %u pages + %u + %u",
+ (uint) log_descriptor.horizon.file_no,
+ (ulong) log_descriptor.horizon.rec_offset,
+ (uint) (TRANSLOG_PAGE_SIZE -
+ log_descriptor.bc.current_page_size),
+ pages, (uint) log_descriptor.page_overhead,
+ (uint) last_page_data));
+
+ for (;;)
+ {
+ uint8 new_buffer_no=
+ (log_descriptor.bc.buffer_no + 1) % TRANSLOG_BUFFERS_NO;
+ struct st_translog_buffer *new_buffer;
+ struct st_translog_buffer *old_buffer;
+ buffer_end_offset= TRANSLOG_WRITE_BUFFER - log_descriptor.bc.buffer->size;
+ file_end_offset=
+ log_descriptor.log_file_max_size - log_descriptor.horizon.rec_offset;
+ DBUG_PRINT("info", ("offset: %lu, buffer_end_offs: %lu, "
+ "file_end_offs: %lu",
+ (ulong) offset, (ulong) buffer_end_offset,
+ (ulong) file_end_offset));
+ DBUG_PRINT("info", ("Buff #%u %u (0x%lx) offset 0x%lx + size 0x%lx = "
+ "0x%lx (0x%lx)",
+ (uint) log_descriptor.bc.buffer->buffer_no,
+ (uint) log_descriptor.bc.buffer_no,
+ (ulong) log_descriptor.bc.buffer,
+ (ulong) log_descriptor.bc.buffer->offset.rec_offset,
+ (ulong) log_descriptor.bc.buffer->size,
+ (ulong) (log_descriptor.bc.buffer->offset.rec_offset +
+ log_descriptor.bc.buffer->size),
+ (ulong) log_descriptor.horizon.rec_offset));
+ DBUG_ASSERT(log_descriptor.bc.buffer->offset.rec_offset +
+ log_descriptor.bc.buffer->size ==
+ log_descriptor.horizon.rec_offset);
+
+ if (offset <= buffer_end_offset && offset <= file_end_offset)
+ break;
+ old_buffer= log_descriptor.bc.buffer;
+ new_buffer_no= (log_descriptor.bc.buffer_no + 1) % TRANSLOG_BUFFERS_NO;
+ new_buffer= log_descriptor.buffers + new_buffer_no;
+
+ translog_buffer_lock(new_buffer);
+ translog_wait_for_buffer_free(new_buffer);
+
+ min_offset= (buffer_end_offset < file_end_offset ?
+ buffer_end_offset : file_end_offset);
+ log_descriptor.bc.buffer->size+= min_offset;
+ log_descriptor.bc.ptr +=min_offset;
+ DBUG_PRINT("info", ("NewP buffer #%u: 0x%lx, chaser: %d, Size: %lu (%lu)",
+ (uint) log_descriptor.bc.buffer->buffer_no,
+ (ulong) log_descriptor.bc.buffer,
+ log_descriptor.bc.chaser,
+ (ulong) log_descriptor.bc.buffer->size,
+ (ulong) (log_descriptor.bc.ptr -log_descriptor.bc.
+ buffer->buffer)));
+ DBUG_ASSERT((ulong)
+ (log_descriptor.bc.ptr -log_descriptor.bc.buffer->buffer) ==
+ log_descriptor.bc.buffer->size);
+ DBUG_ASSERT(log_descriptor.bc.buffer->buffer_no ==
+ log_descriptor.bc.buffer_no);
+ translog_buffer_increase_writers(log_descriptor.bc.buffer);
+
+ if (file_end_offset <= buffer_end_offset)
+ {
+ log_descriptor.horizon.file_no++;
+ log_descriptor.horizon.rec_offset= TRANSLOG_PAGE_SIZE;
+ DBUG_PRINT("info", ("New file %d", log_descriptor.horizon.file_no));
+ if (translog_create_new_file())
+ {
+ DBUG_RETURN(1);
+ }
+ }
+ else
+ {
+ DBUG_PRINT("info", ("The same file"));
+ log_descriptor.horizon.rec_offset+= min_offset;
+ }
+ translog_start_buffer(new_buffer, &log_descriptor.bc, new_buffer_no);
+ if (translog_buffer_unlock(old_buffer))
+ {
+ DBUG_RETURN(1);
+ }
+ offset-= min_offset;
+ }
+ log_descriptor.bc.ptr+= offset;
+ log_descriptor.bc.buffer->size+= offset;
+ translog_buffer_increase_writers(log_descriptor.bc.buffer);
+ log_descriptor.horizon.rec_offset+= offset;
+ log_descriptor.bc.current_page_size= last_page_offset;
+ DBUG_PRINT("info", ("drop write_counter"));
+ log_descriptor.bc.write_counter= 0;
+ log_descriptor.bc.previous_offset= 0;
+ DBUG_PRINT("info", ("NewP buffer #%u: 0x%lx, chaser: %d, Size: %lu (%lu), "
+ "offset: %u last page: %u",
+ (uint) log_descriptor.bc.buffer->buffer_no,
+ (ulong) log_descriptor.bc.buffer,
+ log_descriptor.bc.chaser,
+ (ulong) log_descriptor.bc.buffer->size,
+ (ulong) (log_descriptor.bc.ptr -log_descriptor.bc.buffer->
+ buffer), (uint) offset,
+ (uint) last_page_offset));
+ DBUG_ASSERT(log_descriptor.bc.chaser
+ ||
+ ((ulong) (log_descriptor.bc.ptr -log_descriptor.bc.buffer->buffer)
+ == log_descriptor.bc.buffer->size));
+ DBUG_ASSERT(log_descriptor.bc.buffer->buffer_no ==
+ log_descriptor.bc.buffer_no);
+ DBUG_PRINT("info",
+ ("pointer moved to: (%u, 0x%lx)",
+ (uint) log_descriptor.horizon.file_no,
+ (ulong) log_descriptor.horizon.rec_offset));
+ DBUG_ASSERT((log_descriptor.bc.ptr -log_descriptor.bc.buffer->
+ buffer) %TRANSLOG_PAGE_SIZE ==
+ log_descriptor.bc.current_page_size % TRANSLOG_PAGE_SIZE);
+ DBUG_ASSERT(log_descriptor.bc.current_page_size <= TRANSLOG_PAGE_SIZE);
+ log_descriptor.bc.protected= 0;
+ DBUG_RETURN(0);
+}
+
+
+
+/*
+ Get page rest
+
+ SYNOPSIS
+ translog_get_current_page_rest()
+
+ NOTE loghandler should be locked
+
+ RETURN
+ number of bytes left on the current page
+*/
+
+#define translog_get_current_page_rest() \
+ (TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_size)
+
+/*
+ Get buffer rest in full pages
+
+ SYNOPSIS
+ translog_get_current_buffer_rest()
+
+ NOTE loghandler should be locked
+
+ RETURN
+ number of full pages left on the current buffer
+*/
+
+#define translog_get_current_buffer_rest() \
+ ((log_descriptor.bc.buffer->buffer + TRANSLOG_WRITE_BUFFER - \
+ log_descriptor.bc.ptr) / \
+ TRANSLOG_PAGE_SIZE)
+
+/*
+ Calculate possible group size without first (current) page
+
+ SYNOPSIS
+ translog_get_current_group_size()
+
+ NOTE loghandler should be locked
+
+ RETURN
+ group size without first (current) page
+*/
+
+static translog_size_t translog_get_current_group_size()
+{
+ /* buffer rest in full pages */
+ translog_size_t buffer_rest= translog_get_current_buffer_rest();
+
+ DBUG_ENTER("translog_get_current_group_size");
+
+ DBUG_PRINT("info", ("buffer_rest in pages %lu", buffer_rest));
+ buffer_rest*= log_descriptor.page_capacity_chunk_2;
+ /* in case of only half of buffer free we can write this and next buffer */
+ if (buffer_rest < log_descriptor.half_buffer_capacity_chunk_2)
+ {
+ DBUG_PRINT("info", ("buffer_rest %lu -> add %lu",
+ buffer_rest,
+ (ulong) log_descriptor.buffer_capacity_chunk_2));
+ buffer_rest+= log_descriptor.buffer_capacity_chunk_2;
+ }
+
+ DBUG_PRINT("info", ("buffer_rest %lu", buffer_rest));
+
+ DBUG_RETURN(buffer_rest);
+}
+
+
+/*
+ Write variable record in 1 group
+
+ SYNOPSIS
+ translog_write_variable_record_1group()
+ lsn LSN of the record will be written here
+ type the log record type
+ short_trid Sort transaction ID or 0 if it has no sense
+ parts Descriptor of record source parts
+ buffer_to_flush Buffer which have to be flushed if it is not 0
+ header_length Calculated header length of chunk type 0
+ tcb Transaction control block pointer for hooks by
+ record log type
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+static my_bool
+translog_write_variable_record_1group(LSN *lsn,
+ enum translog_record_type type,
+ SHORT_TRANSACTION_ID short_trid,
+ struct st_translog_parts *parts,
+ struct st_translog_buffer
+ *buffer_to_flush, uint16 header_length,
+ void *tcb)
+{
+ TRANSLOG_ADDRESS horizon;
+ struct st_buffer_cursor cursor;
+ int rc= 0;
+ uint i;
+ translog_size_t record_rest, full_pages, first_page;
+ uint additional_chunk3_page= 0;
+ uchar chunk0_header[1 + 2 + 5 + 2];
+
+ DBUG_ENTER("translog_write_variable_record_1group");
+
+ *lsn= horizon= log_descriptor.horizon;
+ if (log_record_type_descriptor[type].inwrite_hook &&
+ (*log_record_type_descriptor[type].inwrite_hook)(type, tcb,
+ lsn, parts))
+ {
+ DBUG_RETURN(1);
+ }
+ cursor= log_descriptor.bc;
+ cursor.chaser= 1;
+
+ /*
+ Advance pointer To be able unlock the loghandler
+ */
+ first_page= translog_get_current_page_rest();
+ record_rest= parts->record_length - (first_page - header_length);
+ full_pages= record_rest / log_descriptor.page_capacity_chunk_2;
+ record_rest= (record_rest % log_descriptor.page_capacity_chunk_2);
+
+ if (record_rest + 1 == log_descriptor.page_capacity_chunk_2)
+ {
+ DBUG_PRINT("info", ("2 chunks type 3 is needed"));
+ /* We will write 2 chunks type 3 at the end of this group */
+ additional_chunk3_page= 1;
+ record_rest= 1;
+ }
+
+ DBUG_PRINT("info", ("first_page: %u (%u), full_pages: %u (%lu), "
+ "additional: %u (%u), rest %u = %u",
+ first_page, first_page - header_length,
+ full_pages,
+ (ulong) full_pages *
+ log_descriptor.page_capacity_chunk_2,
+ additional_chunk3_page,
+ additional_chunk3_page *
+ (log_descriptor.page_capacity_chunk_2 - 1),
+ record_rest, parts->record_length));
+ /* record_rest + 3 is chunk type 3 overhead + record_rest */
+ translog_advance_pointer(full_pages + additional_chunk3_page,
+ (record_rest ? record_rest + 3 : 0));
+ log_descriptor.bc.buffer->last_lsn= *lsn;
+
+ rc|= translog_unlock();
+
+ /*
+ check if we switched buffer and need process it (current buffer is
+ unlocked already => we will not delay other threads
+ */
+ if (buffer_to_flush != NULL)
+ {
+ if (!rc)
+ rc= translog_buffer_flush(buffer_to_flush);
+ rc|= translog_buffer_unlock(buffer_to_flush);
+ }
+
+ if (rc)
+ DBUG_RETURN(1);
+
+ translog_write_variable_record_1group_header(parts, type, short_trid,
+ header_length, chunk0_header);
+
+ /* fill the pages */
+ translog_write_parts_on_page(&horizon, &cursor, first_page, parts);
+
+
+ DBUG_PRINT("info", ("absolute horizon (%u,0x%lx), local (%u,0x%lx)",
+ (uint) log_descriptor.horizon.file_no,
+ (ulong) log_descriptor.horizon.rec_offset,
+ (uint) horizon.file_no, (ulong) horizon.rec_offset));
+
+ for (i= 0; i < full_pages; i++)
+ {
+ if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor))
+ DBUG_RETURN(1);
+
+ DBUG_PRINT("info", ("absolute horizon (%u,0x%lx), local (%u,0x%lx)",
+ (uint) log_descriptor.horizon.file_no,
+ (ulong) log_descriptor.horizon.rec_offset,
+ (uint) horizon.file_no, (ulong) horizon.rec_offset));
+ }
+
+ if (additional_chunk3_page)
+ {
+ if (translog_write_variable_record_chunk3_page(parts,
+ log_descriptor.
+ page_capacity_chunk_2 - 2,
+ &horizon, &cursor))
+ DBUG_RETURN(1);
+ DBUG_PRINT("info", ("absolute horizon (%u,0x%lx), local (%u,0x%lx)",
+ (uint) log_descriptor.horizon.file_no,
+ (ulong) log_descriptor.horizon.rec_offset,
+ (uint) horizon.file_no, (ulong) horizon.rec_offset));
+ DBUG_ASSERT(cursor.current_page_size == TRANSLOG_PAGE_SIZE);
+ }
+
+ if (translog_write_variable_record_chunk3_page(parts,
+ record_rest,
+ &horizon, &cursor))
+ DBUG_RETURN(1);
+ DBUG_PRINT("info", ("absolute horizon (%u,0x%lx), local (%u,0x%lx)",
+ (uint) log_descriptor.horizon.file_no,
+ (ulong) log_descriptor.horizon.rec_offset,
+ (uint) horizon.file_no, (ulong) horizon.rec_offset));
+
+ rc= translog_buffer_lock(cursor.buffer);
+ if (!rc)
+ {
+ /*
+ check if we wrote something on lst not full page and need to reconstruct
+ CRC and sector protection
+ if (buffer->offset.rec_offset + buffer->size - horizon->rec_offset >
+ */
+ translog_buffer_decrease_writers(cursor.buffer);
+ }
+ rc|= translog_buffer_unlock(cursor.buffer);
+ DBUG_RETURN(rc);
+}
+
+
+/*
+ Write variable record in 1 chunk
+
+ SYNOPSIS
+ translog_write_variable_record_1chunk()
+ lsn LSN of the record will be written here
+ type the log record type
+ short_trid Sort transaction ID or 0 if it has no sense
+ parts Descriptor of record source parts
+ buffer_to_flush Buffer which have to be flushed if it is not 0
+ header_length Calculated header length of chunk type 0
+ tcb Transaction control block pointer for hooks by
+ record log type
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+static my_bool
+translog_write_variable_record_1chunk(LSN *lsn,
+ enum translog_record_type type,
+ SHORT_TRANSACTION_ID short_trid,
+ struct st_translog_parts *parts,
+ struct st_translog_buffer
+ *buffer_to_flush, uint16 header_length,
+ void *tcb)
+{
+ int rc;
+ uchar chunk0_header[1 + 2 + 5 + 2];
+ DBUG_ENTER("translog_write_variable_record_1chunk");
+
+ translog_write_variable_record_1group_header(parts, type, short_trid,
+ header_length, chunk0_header);
+
+ *lsn= log_descriptor.horizon;
+ if (log_record_type_descriptor[type].inwrite_hook &&
+ (*log_record_type_descriptor[type].inwrite_hook) (type, tcb,
+ lsn, parts))
+ {
+ DBUG_RETURN(1);
+ }
+
+ rc= translog_write_parts_on_page(&log_descriptor.horizon,
+ &log_descriptor.bc,
+ parts->total_record_length, parts);
+ log_descriptor.bc.buffer->last_lsn= *lsn;
+ rc|= translog_unlock();
+
+ /*
+ check if we switched buffer and need process it (current buffer is
+ unlocked already => we will not delay other threads
+ */
+ if (buffer_to_flush != NULL)
+ {
+ if (!rc)
+ rc= translog_buffer_flush(buffer_to_flush);
+ rc|= translog_buffer_unlock(buffer_to_flush);
+ }
+
+ DBUG_RETURN(rc);
+}
+
+
+/*
+ Calculate and write LSN difference (compressed LSN)
+
+ SYNOPSIS
+ translog_put_LSN_diff()
+ base_lsn LSN from which we calculate difference
+ lsn LSN for codding
+ dst pointer before which result should be written
+
+ NOTE:
+ to store an LSN in a compact way we will use the following compression:
+
+ if a log record has LSN1, and it contains the lSN2 as a back reference,
+ instead of LSN2 we write LSN1-LSN2, encoded as:
+
+ two bits the number N (see below)
+ 14 bits
+ N bytes
+
+ that is, LSN is encoded in 2..5 bytes, and the number of bytes minus 2
+ is stored in the first two bits.
+
+ RETURN
+ pointer on coded LSN
+ NULL - error
+*/
+
+static uchar *translog_put_LSN_diff(LSN *base_lsn, LSN *lsn, uchar *dst)
+{
+ DBUG_ENTER("translog_put_LSN_diff");
+ DBUG_PRINT("enter", ("Base: (0x%lx,0x%lx), val: (0x%lx,0x%lx), dst 0x%lx",
+ (ulong) base_lsn->file_no,
+ (ulong) base_lsn->rec_offset,
+ (ulong) lsn->file_no,
+ (ulong) lsn->rec_offset, (ulong) dst));
+ if (base_lsn->file_no == lsn->file_no)
+ {
+ uint32 diff;
+ DBUG_ASSERT(base_lsn->rec_offset > lsn->rec_offset);
+ diff= base_lsn->rec_offset - lsn->rec_offset;
+ if (diff <= 0x3FFF)
+ {
+ dst-= 2;
+ dst[0]= diff >> 8;
+ dst[1]= (diff & 0xFF);
+ }
+ else if (diff <= 0x3FFFFF)
+ {
+ dst-= 3;
+ dst[0]= 0x40 | (diff >> 16);
+ int2store(dst + 1, diff & 0xFFFF);
+ }
+ else if (diff <= 0x3FFFFFFF)
+ {
+ dst-= 4;
+ dst[0]= 0x80 | (diff >> 24);
+ int3store(dst + 1, diff & 0xFFFFFF);
+ }
+ else
+ {
+ dst-= 5;
+ dst[0]= 0xC0;
+ int4store(dst + 1, diff);
+ }
+ }
+ else
+ {
+ uint32 diff;
+ uint32 offset_diff;
+ ulonglong base_offset= base_lsn->rec_offset;
+ DBUG_ASSERT(base_lsn->file_no > lsn->file_no);
+ diff= base_lsn->file_no - lsn->file_no;
+ if (base_offset < lsn->rec_offset)
+ {
+ /* take 1 from file offset */
+ diff--;
+ base_offset+= 0x100000000LL;
+ }
+ offset_diff= base_offset - lsn->rec_offset;
+ if (diff > 0x3f)
+ {
+ /*TODO: error - too long transaction - panic!!! */
+ UNRECOVERABLE_ERROR(("Too big file diff: %lu", (ulong) diff));
+ DBUG_RETURN(NULL);
+ }
+ dst-= 5;
+ *dst= (0xC0 | diff);
+ int4store(dst + 1, offset_diff);
+ }
+ DBUG_PRINT("info", ("new dst: 0x%lx", (ulong) dst));
+ DBUG_RETURN(dst);
+}
+
+
+/*
+ Get LSN from LSN-difference (compressed LSN)
+
+ SYNOPSIS
+ translog_get_LSN_from_diff()
+ base_lsn LSN from which we calculate difference
+ src pointer to coded lsn
+ dst pointer to buffer where to write 7byte LSN
+
+ NOTE:
+ to store an LSN in a compact way we use the following compression:
+
+ If a log record has LSN1, and it contains the lSN2 as a back reference,
+ instead of LSN2 we write LSN1-LSN2, encoded as:
+
+ two bits the number N (see below)
+ 14 bits
+ N bytes
+
+ That is, LSN is encoded in 2..5 bytes, and the number of bytes minus 2
+ is stored in the first two bits.
+
+ RETURN
+ pointer to buffer after decoded LSN
+*/
+
+static uchar *translog_get_LSN_from_diff(LSN *base_lsn, uchar *src, uchar *dst)
+{
+ LSN lsn;
+ uint32 diff;
+ uint32 first_byte;
+ uint8 code;
+ DBUG_ENTER("translog_get_LSN_from_diff");
+ DBUG_PRINT("enter", ("Base: (0x%lx,0x%lx), src: 0x%lx, dst 0x%lx",
+ (ulong) base_lsn->file_no,
+ (ulong) base_lsn->rec_offset, (ulong) src, (ulong) dst));
+ first_byte= *((uint8*) src);
+ code= first_byte & 0xC0;
+ first_byte &= 0x3F;
+ switch (code) {
+ case 0x00:
+ lsn.file_no= base_lsn->file_no;
+ lsn.rec_offset=
+ base_lsn->rec_offset - ((first_byte << 8) + *((uint8*) (src + 1)));
+ src+= 2;
+ break;
+ case 0x40:
+ lsn.file_no= base_lsn->file_no;
+ diff= uint2korr(src + 1);
+ lsn.rec_offset= base_lsn->rec_offset - ((first_byte << 16) + diff);
+ src+= 3;
+ break;
+ case 0x80:
+ lsn.file_no= base_lsn->file_no;
+ diff= uint3korr(src + 1);
+ lsn.rec_offset= base_lsn->rec_offset - ((first_byte << 24) + diff);
+ src+= 4;
+ break;
+ case 0xC0:
+ {
+ ulonglong base_offset= base_lsn->rec_offset;
+ diff= uint4korr(src + 1);
+ if (diff > base_lsn->rec_offset)
+ {
+ /* take 1 from file offset */
+ first_byte++;
+ base_offset+= 0x100000000LL;
+ }
+ lsn.file_no= base_lsn->file_no - first_byte;
+ lsn.rec_offset= base_offset - diff;
+ src+= 5;
+ break;
+ }
+ default:
+ DBUG_ASSERT(0);
+ DBUG_RETURN(NULL);
+ }
+ lsn7store(dst, &lsn);
+ DBUG_PRINT("info", ("new src: 0x%lx", (ulong) dst));
+ DBUG_RETURN(src);
+}
+
+
+/*
+ Encode relative LSNs listed in the parameters
+
+ SYNOPSIS
+ translog_relative_LSN_encode()
+ parts Parts list with encoded LSN(s)
+ base_lsn LSN which is base for encoding
+ lsns number of LSN(s) to encode
+ compressed_LSNs buffer which can be used for storing compressed LSN(s)
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+static my_bool translog_relative_LSN_encode(struct st_translog_parts *parts,
+ LSN *base_lsn,
+ uint lsns, uchar *compressed_LSNs)
+{
+ struct st_translog_part part;
+ uint lsns_len= lsns * 7;
+
+ DBUG_ENTER("translog_relative_LSN_encode");
+
+ get_dynamic(&parts->parts, (gptr) &part, parts->current);
+ /* collect all LSN(s) in one chunk if it (they) is (are) divided */
+ if (part.len < lsns_len)
+ {
+ uint copied= part.len;
+ DBUG_PRINT("info", ("Using buffer 0x%lx", (ulong) compressed_LSNs));
+ memmove(compressed_LSNs, part.buff, part.len);
+ do
+ {
+ get_dynamic(&parts->parts, (gptr) &part, parts->current + 1);
+ if ((part.len + copied) < lsns_len)
+ {
+ memmove(compressed_LSNs + copied, part.buff, part.len);
+ copied+= part.len;
+ delete_dynamic_element(&parts->parts, parts->current + 1);
+ }
+ else
+ {
+ uint len= lsns_len - copied;
+ memmove(compressed_LSNs + copied, part.buff, len);
+ copied= lsns_len;
+ part.buff+= len;
+ part.len-= len;
+ /*
+ We do not check result of set_dynamic, because we are sure that
+ it will not grow
+ */
+ set_dynamic(&parts->parts, (gptr) &part, parts->current + 1);
+ }
+ } while (copied < lsns_len);
+ part.len= lsns_len;
+ part.buff= compressed_LSNs;
+ }
+ {
+ /* Compress */
+ LSN ref;
+ uint economy;
+ uchar *ref_ptr= part.buff + lsns_len - 7;
+ uchar *dst_ptr= part.buff + lsns_len;
+ uint i;
+ for (i= 0; i < lsns; i++, ref_ptr-= 7)
+ {
+ lsn7korr(&ref, ref_ptr);
+ if ((dst_ptr= translog_put_LSN_diff(base_lsn, &ref, dst_ptr)) == NULL)
+ DBUG_RETURN(1);
+ }
+ economy= (dst_ptr - part.buff);
+ DBUG_PRINT("info", ("Economy %u", economy));
+ part.len-= economy;
+ parts->record_length-= economy;
+ parts->total_record_length-= economy;
+ part.buff= dst_ptr;
+ }
+ /*
+ We do not check result of set_dynamic, because we are sure that
+ it will not grow
+ */
+ set_dynamic(&parts->parts, (gptr) &part, parts->current);
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Write multi-group variable-size record
+
+ SYNOPSIS
+ translog_write_variable_record_mgroup()
+ lsn LSN of the record will be written here
+ type the log record type
+ short_trid Sort transaction ID or 0 if it has no sense
+ parts Descriptor of record source parts
+ buffer_to_flush Buffer which have to be flushed if it is not 0
+ header_length Header length calculated for 1 group
+ buffer_rest Beginning from which we plan to write in full pages
+ tcb Transaction control block pointer for hooks by
+ record log type
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+static my_bool
+translog_write_variable_record_mgroup(LSN *lsn,
+ enum translog_record_type type,
+ SHORT_TRANSACTION_ID short_trid,
+ struct st_translog_parts *parts,
+ struct st_translog_buffer
+ *buffer_to_flush,
+ uint16 header_length,
+ translog_size_t buffer_rest,
+ void *tcb)
+{
+ TRANSLOG_ADDRESS horizon;
+ struct st_buffer_cursor cursor;
+ int rc= 0;
+ uint i, chunk2_page, full_pages;
+ uint curr_group= 0;
+ translog_size_t record_rest, first_page, chunk3_pages, chunk0_pages= 1;
+ translog_size_t done= 0;
+ struct st_translog_group_descriptor group;
+ DYNAMIC_ARRAY groups;
+ uint16 chunk3_size;
+ uint16 page_capacity= log_descriptor.page_capacity_chunk_2 + 1;
+ uint16 last_page_capacity;
+ my_bool new_page_before_chunk0= 1, first_chunk0= 1;
+ uchar chunk0_header[1 + 2 + 5 + 2 + 2], group_desc[7 + 1];
+ uchar chunk2_header[1]=
+ {
+ TRANSLOG_CHUNK_NOHDR
+ };
+ uint header_fixed_part= header_length + 2;
+ uint groups_per_page= (page_capacity - header_fixed_part) / (7 + 1);
+
+ DBUG_ENTER("translog_write_variable_record_mgroup");
+
+ if (init_dynamic_array(&groups, sizeof(struct st_translog_group_descriptor),
+ 10, 10 CALLER_INFO))
+ {
+ UNRECOVERABLE_ERROR(("init array failed"));
+ DBUG_RETURN(1);
+ }
+
+ first_page= translog_get_current_page_rest();
+ record_rest= parts->record_length - (first_page - 1);
+ DBUG_PRINT("info", ("Record Rest: %lu", (ulong) record_rest));
+
+ if (record_rest < buffer_rest)
+ {
+ DBUG_PRINT("info", ("too many free space because changing header"));
+ buffer_rest-= log_descriptor.page_capacity_chunk_2;
+ DBUG_ASSERT(record_rest >= buffer_rest);
+ }
+
+ do
+ {
+ group.addr= horizon= log_descriptor.horizon;
+ cursor= log_descriptor.bc;
+ cursor.chaser= 1;
+ if ((full_pages= buffer_rest / log_descriptor.page_capacity_chunk_2) > 255)
+ {
+ /* suzeof(uint8) == 256 is max number of chunk in multi-chunks group */
+ full_pages= 255;
+ buffer_rest= full_pages * log_descriptor.page_capacity_chunk_2;
+ }
+ /*
+ group chunks =
+ full pages + first page (which actually can be full, too.
+ But here we assign number of chunks - 1
+ */
+ group.num= full_pages;
+ if (insert_dynamic(&groups, (gptr) &group))
+ {
+ translog_unlock();
+ delete_dynamic(&groups);
+ UNRECOVERABLE_ERROR(("insert into array failed"));
+ DBUG_RETURN(1);
+ }
+
+ DBUG_PRINT("info", ("chunk #%u first_page: %u (%u), full_pages: %u (%lu), "
+ "Left %lu",
+ groups.elements,
+ first_page, first_page - 1,
+ full_pages,
+ (ulong) full_pages *
+ log_descriptor.page_capacity_chunk_2,
+ parts->record_length - (first_page - 1 + buffer_rest) -
+ done));
+ translog_advance_pointer(full_pages, 0);
+
+ rc|= translog_unlock();
+
+ if (buffer_to_flush != NULL)
+ {
+ rc|= translog_buffer_lock(buffer_to_flush);
+ translog_buffer_decrease_writers(buffer_to_flush);
+ if (!rc)
+ rc= translog_buffer_flush(buffer_to_flush);
+ rc|= translog_buffer_unlock(buffer_to_flush);
+ buffer_to_flush= NULL;
+ }
+ if (rc)
+ {
+ delete_dynamic(&groups);
+ UNRECOVERABLE_ERROR(("flush of unlock buffer failed"));
+ DBUG_RETURN(1);
+ }
+
+ translog_write_data_on_page(&horizon, &cursor, 1, chunk2_header);
+ translog_write_parts_on_page(&horizon, &cursor, first_page - 1, parts);
+ DBUG_PRINT("info", ("absolute horizon (%u,0x%lx), local (%u,0x%lx) "
+ "Left: %lu",
+ (uint) log_descriptor.horizon.file_no,
+ (ulong) log_descriptor.horizon.rec_offset,
+ (uint) horizon.file_no,
+ (ulong) horizon.rec_offset,
+ (ulong) (parts->record_length - (first_page - 1) -
+ done)));
+
+ for (i= 0; i < full_pages; i++)
+ {
+ if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor))
+ {
+ delete_dynamic(&groups);
+ DBUG_RETURN(1);
+ }
+
+ DBUG_PRINT("info", ("absolute horizon (%u,0x%lx), local (%u,0x%lx)"
+ "Left: %lu",
+ (uint) log_descriptor.horizon.file_no,
+ (ulong) log_descriptor.horizon.rec_offset,
+ (uint) horizon.file_no,
+ (ulong) horizon.rec_offset,
+ (ulong) (parts->record_length - (first_page - 1) -
+ i * log_descriptor.page_capacity_chunk_2 -
+ done)));
+ }
+
+ done+= (first_page - 1 + buffer_rest);
+
+ /* TODO: made separate function for following */
+ rc= translog_page_next(&horizon, &cursor, &buffer_to_flush);
+ if (buffer_to_flush != NULL)
+ {
+ rc|= translog_buffer_lock(buffer_to_flush);
+ translog_buffer_decrease_writers(buffer_to_flush);
+ if (!rc)
+ rc= translog_buffer_flush(buffer_to_flush);
+ rc|= translog_buffer_unlock(buffer_to_flush);
+ buffer_to_flush= NULL;
+ }
+ if (rc)
+ {
+ delete_dynamic(&groups);
+ UNRECOVERABLE_ERROR(("flush of unlock buffer failed"));
+ DBUG_RETURN(1);
+ }
+ rc= translog_buffer_lock(cursor.buffer);
+ if (!rc)
+ translog_buffer_decrease_writers(cursor.buffer);
+ rc|= translog_buffer_unlock(cursor.buffer);
+ if (rc)
+ {
+ delete_dynamic(&groups);
+ DBUG_RETURN(1);
+ }
+
+ translog_lock();
+
+ first_page= translog_get_current_page_rest();
+ buffer_rest= translog_get_current_group_size();
+ } while (first_page + buffer_rest < (uint) (parts->record_length - done));
+
+ group.addr= horizon= log_descriptor.horizon;
+ cursor= log_descriptor.bc;
+ cursor.chaser= 1;
+ group.num= 0; /* 0 because it does not matter
+ */
+ if (insert_dynamic(&groups, (gptr) &group))
+ {
+ delete_dynamic(&groups);
+ translog_unlock();
+ UNRECOVERABLE_ERROR(("insert into array failed"));
+ DBUG_RETURN(1);
+ }
+ record_rest= parts->record_length - done;
+ DBUG_PRINT("info", ("Record rest: %lu", (ulong) record_rest));
+ if (first_page <= record_rest + 1)
+ {
+ chunk2_page= 1;
+ record_rest-= (first_page - 1);
+ full_pages= record_rest / log_descriptor.page_capacity_chunk_2;
+ record_rest= (record_rest % log_descriptor.page_capacity_chunk_2);
+ last_page_capacity= page_capacity;
+ }
+ else
+ {
+ chunk2_page= full_pages= 0;
+ last_page_capacity= first_page;
+ }
+ chunk3_size= 0;
+ chunk3_pages= 0;
+ if (last_page_capacity > record_rest + 1 && record_rest != 0)
+ {
+ if (last_page_capacity >
+ record_rest + header_fixed_part + groups.elements * (7 + 1))
+ {
+ /* 1 record of type 0 */
+ chunk3_pages= 0;
+ }
+ else
+ {
+ chunk3_pages= 1;
+ if (record_rest + 2 == last_page_capacity)
+ {
+ chunk3_size= record_rest - 1;
+ record_rest= 1;
+ }
+ else
+ {
+ chunk3_size= record_rest;
+ record_rest= 0;
+ }
+ }
+ }
+ /*
+ A first non-full page will hold type 0 chunk only if it fit in it with
+ all its headers
+ */
+ while (page_capacity <
+ record_rest + header_fixed_part +
+ (groups.elements - groups_per_page * (chunk0_pages - 1)) * (7 + 1))
+ chunk0_pages++;
+ DBUG_PRINT("info", ("chunk0_pages %u, groups %u, groups per full page %u, "
+ "Group on last page %u",
+ chunk0_pages, groups.elements,
+ groups_per_page,
+ (groups.elements -
+ ((page_capacity - header_fixed_part) / (7 + 1)) *
+ (chunk0_pages - 1))));
+ DBUG_PRINT("info", ("first_page: %u, chunk2 %u full_pages: %u (%lu), "
+ "chunk3 %u (%u), rest %u",
+ first_page,
+ chunk2_page, full_pages,
+ (ulong) full_pages *
+ log_descriptor.page_capacity_chunk_2,
+ chunk3_pages, (uint) chunk3_size, (uint) record_rest));
+ translog_advance_pointer(full_pages + chunk3_pages +
+ (chunk0_pages - 1),
+ record_rest + header_fixed_part +
+ (groups.elements -
+ ((page_capacity - header_fixed_part) / (7 + 1)) *
+ (chunk0_pages - 1)) * (7 + 1));
+ translog_unlock();
+
+ if (chunk2_page)
+ {
+ DBUG_PRINT("info", ("chunk 2 to finish first page"));
+ translog_write_data_on_page(&horizon, &cursor, 1, chunk2_header);
+ translog_write_parts_on_page(&horizon, &cursor, first_page - 1, parts);
+ DBUG_PRINT("info", ("absolute horizon (%u,0x%lx), local (%u,0x%lx) "
+ "Left: %lu",
+ (uint) log_descriptor.horizon.file_no,
+ (ulong) log_descriptor.horizon.rec_offset,
+ (uint) horizon.file_no,
+ (ulong) horizon.rec_offset,
+ (ulong) (parts->record_length - (first_page - 1) -
+ done)));
+ }
+ else if (chunk3_pages)
+ {
+ DBUG_PRINT("info", ("chunk 3"));
+ DBUG_ASSERT(full_pages == 0);
+ uchar chunk3_header[3];
+ chunk3_header[0]= TRANSLOG_CHUNK_LNGTH;
+ int2store(chunk3_header + 1, chunk3_size);
+ translog_write_data_on_page(&horizon, &cursor, 3, chunk3_header);
+ translog_write_parts_on_page(&horizon, &cursor, chunk3_size, parts);
+ DBUG_PRINT("info", ("absolute horizon (%u,0x%lx), local (%u,0x%lx) "
+ "Left: %lu",
+ (uint) log_descriptor.horizon.file_no,
+ (ulong) log_descriptor.horizon.rec_offset,
+ (uint) horizon.file_no,
+ (ulong) horizon.rec_offset,
+ (ulong) (parts->record_length - chunk3_size - done)));
+ chunk3_pages= 0;
+ }
+ else
+ {
+ DBUG_PRINT("info", ("no new_page_before_chunk0"));
+ new_page_before_chunk0= 0;
+ }
+
+ for (i= 0; i < full_pages; i++)
+ {
+ DBUG_ASSERT(chunk2_page != 0);
+ if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor))
+ {
+ delete_dynamic(&groups);
+ DBUG_RETURN(1);
+ }
+
+ DBUG_PRINT("info", ("absolute horizon (%u,0x%lx), local (%u,0x%lx) "
+ "Left: %lu",
+ (uint) log_descriptor.horizon.file_no,
+ (ulong) log_descriptor.horizon.rec_offset,
+ (uint) horizon.file_no,
+ (ulong) horizon.rec_offset,
+ (ulong) (parts->record_length - (first_page - 1) -
+ i * log_descriptor.page_capacity_chunk_2 -
+ done)));
+ }
+
+ if (chunk3_pages &&
+ translog_write_variable_record_chunk3_page(parts,
+ chunk3_size,
+ &horizon, &cursor))
+ {
+ delete_dynamic(&groups);
+ DBUG_RETURN(1);
+ }
+ DBUG_PRINT("info", ("absolute horizon (%u,0x%lx), local (%u,0x%lx)",
+ (uint) log_descriptor.horizon.file_no,
+ (ulong) log_descriptor.horizon.rec_offset,
+ (uint) horizon.file_no, (ulong) horizon.rec_offset));
+
+
+ *chunk0_header= (uchar) (type |TRANSLOG_CHUNK_LSN);
+ int2store(chunk0_header + 1, short_trid);
+ translog_write_variable_record_1group_code_len(chunk0_header + 3,
+ parts->record_length,
+ header_length);
+ do
+ {
+ int limit;
+ if (new_page_before_chunk0)
+ {
+ rc= translog_page_next(&horizon, &cursor, &buffer_to_flush);
+ if (buffer_to_flush != NULL)
+ {
+ rc|= translog_buffer_lock(buffer_to_flush);
+ translog_buffer_decrease_writers(buffer_to_flush);
+ if (!rc)
+ rc= translog_buffer_flush(buffer_to_flush);
+ rc|= translog_buffer_unlock(buffer_to_flush);
+ buffer_to_flush= NULL;
+ }
+ if (rc)
+ {
+ delete_dynamic(&groups);
+ UNRECOVERABLE_ERROR(("flush of unlock buffer failed"));
+ DBUG_RETURN(1);
+ }
+ }
+ new_page_before_chunk0= 1;
+
+ if (first_chunk0)
+ {
+ *lsn= horizon;
+ if (log_record_type_descriptor[type].inwrite_hook &&
+ (*log_record_type_descriptor[type].inwrite_hook) (type, tcb,
+ lsn, parts))
+ {
+ DBUG_RETURN(1);
+ }
+
+ first_chunk0= 0;
+ }
+
+ /*
+ A first non-full page will hold type 0 chunk only if it fit in it with
+ all its headers => the fist page is full or number of groups less then
+ possible number of full page.
+ */
+ limit= (groups_per_page < groups.elements - curr_group ?
+ groups_per_page : groups.elements - curr_group);
+ DBUG_PRINT("info", ("Groups: %u curr %u, limit %u",
+ (uint) groups.elements, (uint) curr_group,
+ (uint) limit));
+
+ if (chunk0_pages == 1)
+ {
+ DBUG_PRINT("info", ("chunk_len: 2 + %u * (7+1) + %u = %u",
+ (uint) limit, (uint) record_rest,
+ (uint) (2 + limit * (7 + 1) + record_rest)));
+ int2store(chunk0_header + header_length - 2,
+ 2 + limit * (7 + 1) + record_rest);
+ }
+ else
+ {
+ DBUG_PRINT("info", ("chunk_len: 2 + %u * (7+1) = %u",
+ (uint) limit, (uint) (2 + limit * (7 + 1))));
+ int2store(chunk0_header + header_length - 2, 2 + limit * (7 + 1));
+ }
+ int2store(chunk0_header + header_length, groups.elements - curr_group);
+ translog_write_data_on_page(&horizon, &cursor, header_fixed_part,
+ chunk0_header);
+ for (i= curr_group; i < limit + curr_group; i++)
+ {
+ get_dynamic(&groups, (gptr) &group, i);
+ lsn7store(group_desc, &group.addr);
+ group_desc[7]= group.num;
+ translog_write_data_on_page(&horizon, &cursor, (7 + 1), group_desc);
+ }
+
+ if (chunk0_pages == 1 && record_rest != 0)
+ translog_write_parts_on_page(&horizon, &cursor, record_rest, parts);
+
+ chunk0_pages--;
+ curr_group+= limit;
+
+ } while (chunk0_pages != 0);
+ rc= translog_buffer_lock(cursor.buffer);
+ if (cmp_translog_addr(cursor.buffer->last_lsn, *lsn) < 0)
+ cursor.buffer->last_lsn= *lsn;
+ translog_buffer_decrease_writers(cursor.buffer);
+ rc|= translog_buffer_unlock(cursor.buffer);
+
+ delete_dynamic(&groups);
+ DBUG_RETURN(rc);
+}
+
+
+/*
+ Write the variable length log record
+
+ SYNOPSIS
+ translog_write_variable_record()
+ lsn LSN of the record will be written here
+ type the log record type
+ short_trid Sort transaction ID or 0 if it has no sense
+ parts Descriptor of record source parts
+ tcb Transaction control block pointer for hooks by
+ record log type
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+static my_bool translog_write_variable_record(LSN *lsn,
+ enum translog_record_type type,
+ SHORT_TRANSACTION_ID short_trid,
+ struct st_translog_parts *parts,
+ void *tcb)
+{
+ struct st_translog_buffer *buffer_to_flush= NULL;
+ uint header_length1= 1 + 2 + 2 +
+ translog_variable_record_length_bytes(parts->record_length);
+ ulong buffer_rest;
+ uint page_rest;
+ uchar compressed_LSNs[2 * 7]; /* Max number of such LSNs per
+ record is 2 */
+
+ DBUG_ENTER("translog_write_variable_record");
+
+ translog_lock();
+ DBUG_PRINT("info", ("horizon (%u,0x%lx)",
+ (uint) log_descriptor.horizon.file_no,
+ (ulong) log_descriptor.horizon.rec_offset));
+ page_rest= TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_size;
+ DBUG_PRINT("info", ("header length %u, page_rest: %u",
+ header_length1, page_rest));
+
+ /*
+ header and part which we should read have to fit in one chunk
+ TODO: allow to divide readable header
+ */
+ if (page_rest <
+ (header_length1 + log_record_type_descriptor[type].read_header_len))
+ {
+ DBUG_PRINT("info",
+ ("Next page, size: %u, header: %u + %u",
+ log_descriptor.bc.current_page_size,
+ header_length1,
+ log_record_type_descriptor[type].read_header_len));
+ translog_page_next(&log_descriptor.horizon, &log_descriptor.bc,
+ &buffer_to_flush);
+ page_rest= log_descriptor.page_capacity_chunk_2 + 1;
+ DBUG_PRINT("info", ("page_rest: %u", page_rest));
+ }
+
+ /*
+ To minimize compressed size we will compress always relative to
+ very first chunk address (log_descriptor.horizon for now)
+ */
+ if (log_record_type_descriptor[type].compresed_LSN > 0)
+ {
+ if (translog_relative_LSN_encode(parts, &log_descriptor.horizon,
+ log_record_type_descriptor[type].
+ compresed_LSN, compressed_LSNs))
+ {
+ int rc= translog_unlock();
+ if (buffer_to_flush != NULL)
+ {
+ if (!rc)
+ rc= translog_buffer_flush(buffer_to_flush);
+ rc|= translog_buffer_unlock(buffer_to_flush);
+ }
+ DBUG_RETURN(1);
+ }
+ /* recalculate header length after compression */
+ header_length1= 1 + 2 + 2 +
+ translog_variable_record_length_bytes(parts->record_length);
+ DBUG_PRINT("info", ("after compressing LSN(s) header length %u, "
+ "record length %lu",
+ header_length1, parts->record_length));
+ }
+
+ /* TODO: check space on current page for header + few bytes */
+ if (page_rest >= parts->record_length + header_length1)
+ {
+ /* following function makes translog_unlock(); */
+ DBUG_RETURN(translog_write_variable_record_1chunk(lsn, type, short_trid,
+ parts, buffer_to_flush,
+ header_length1, tcb));
+ }
+
+ buffer_rest= translog_get_current_group_size();
+
+ if (buffer_rest >= parts->record_length + header_length1 - page_rest)
+ {
+ /* following function makes translog_unlock(); */
+ DBUG_RETURN(translog_write_variable_record_1group(lsn, type, short_trid,
+ parts, buffer_to_flush,
+ header_length1, tcb));
+ }
+ /* following function makes translog_unlock(); */
+ DBUG_RETURN(translog_write_variable_record_mgroup(lsn, type, short_trid,
+ parts, buffer_to_flush,
+ header_length1,
+ buffer_rest, tcb));
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Write the fixed and pseudo-fixed log record
+
+ SYNOPSIS
+ translog_write_fixed_record()
+ lsn LSN of the record will be written here
+ type the log record type
+ short_trid Sort transaction ID or 0 if it has no sense
+ parts Descriptor of record source parts
+ tcb Transaction control block pointer for hooks by
+ record log type
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+static my_bool translog_write_fixed_record(LSN *lsn,
+ enum translog_record_type type,
+ SHORT_TRANSACTION_ID short_trid,
+ struct st_translog_parts *parts,
+ void *tcb)
+{
+ struct st_translog_buffer *buffer_to_flush= NULL;
+ uchar chunk1_header[1 + 2];
+ uchar compressed_LSNs[2 * 7]; /* Max number of such LSNs per
+ record is 2 */
+ struct st_translog_part part;
+ int rc;
+ DBUG_ENTER("translog_write_fixed_record");
+ DBUG_ASSERT((log_record_type_descriptor[type].class ==
+ LOGRECTYPE_FIXEDLENGTH &&
+ parts->record_length ==
+ log_record_type_descriptor[type].fixed_length) ||
+ (log_record_type_descriptor[type].class ==
+ LOGRECTYPE_PSEUDOFIXEDLENGTH &&
+ (parts->record_length -
+ log_record_type_descriptor[type].compresed_LSN * 2) <=
+ log_record_type_descriptor[type].fixed_length));
+
+ translog_lock();
+ DBUG_PRINT("info", ("horizon (%u,0x%lx)",
+ (uint) log_descriptor.horizon.file_no,
+ (ulong) log_descriptor.horizon.rec_offset));
+
+ DBUG_ASSERT(log_descriptor.bc.current_page_size <= TRANSLOG_PAGE_SIZE);
+ DBUG_PRINT("info",
+ ("Page size: %u, record %u, next cond %d",
+ log_descriptor.bc.current_page_size,
+ (parts->record_length -
+ log_record_type_descriptor[type].compresed_LSN * 2 + 3),
+ ((((uint) log_descriptor.bc.current_page_size) +
+ (parts->record_length -
+ log_record_type_descriptor[type].compresed_LSN * 2 + 3)) >
+ TRANSLOG_PAGE_SIZE)));
+ /*
+ check that there is enough place on current page:
+ (log_record_type_descriptor[type].fixed_length - economized on compressed
+ LSNs) bytes
+ */
+ if ((((uint) log_descriptor.bc.current_page_size) +
+ (parts->record_length -
+ log_record_type_descriptor[type].compresed_LSN * 2 + 3)) >
+ TRANSLOG_PAGE_SIZE)
+ {
+ DBUG_PRINT("info", ("Next page"));
+ translog_page_next(&log_descriptor.horizon, &log_descriptor.bc,
+ &buffer_to_flush);
+ }
+
+ *lsn= log_descriptor.horizon;
+ if (log_record_type_descriptor[type].inwrite_hook &&
+ (*log_record_type_descriptor[type].inwrite_hook) (type, tcb,
+ lsn, parts))
+ {
+ DBUG_RETURN(1);
+ }
+
+
+ /* compress LSNs */
+ if (log_record_type_descriptor[type].class == LOGRECTYPE_PSEUDOFIXEDLENGTH)
+ {
+ DBUG_ASSERT(log_record_type_descriptor[type].compresed_LSN > 0);
+ if (translog_relative_LSN_encode(parts, lsn,
+ log_record_type_descriptor[type].
+ compresed_LSN, compressed_LSNs))
+ {
+ rc= 1;
+ goto err;
+ }
+ }
+
+ /*
+ Write the whole record at once (we sure that there is enough place on
+ the destination page
+ */
+ DBUG_ASSERT(parts->current != 0); /* first part is left for
+ header */
+ parts->total_record_length+= (part.len= 1 + 2);
+ part.buff= chunk1_header;
+ *chunk1_header= (uchar) (type |TRANSLOG_CHUNK_FIXED);
+ int2store(chunk1_header + 1, short_trid);
+ parts->current--;
+ set_dynamic(&parts->parts, (gptr) &part, parts->current);
+
+ rc= translog_write_parts_on_page(&log_descriptor.horizon,
+ &log_descriptor.bc,
+ parts->total_record_length, parts);
+
+ log_descriptor.bc.buffer->last_lsn= *lsn;
+err:
+ rc|= translog_unlock();
+
+ /*
+ check if we switched buffer and need process it (current buffer is
+ unlocked already => we will not delay other threads
+ */
+ if (buffer_to_flush != NULL)
+ {
+ if (!rc)
+ rc= translog_buffer_flush(buffer_to_flush);
+ rc|= translog_buffer_unlock(buffer_to_flush);
+ }
+
+ DBUG_RETURN(rc);
+}
+
+
+/*
+ Write the log record
+
+ SYNOPSIS
+ translog_write_record()
+ lsn LSN of the record will be written here
+ type the log record type
+ short_trid Sort transaction ID or 0 if it has no sense
+ tcb Transaction control block pointer for hooks by
+ record log type
+ partN_length length of Ns part of the log
+ partN_buffer pointer on Ns part buffer
+ 0 sign of the end of parts
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+my_bool translog_write_record(LSN *lsn,
+ enum translog_record_type type,
+ SHORT_TRANSACTION_ID short_trid,
+ void *tcb,
+ translog_size_t part1_length,
+ uchar *part1_buff, ...)
+{
+ struct st_translog_parts parts;
+ va_list pvar;
+ int rc;
+ DBUG_ENTER("translog_write_record");
+ DBUG_PRINT("enter", ("type %u, ShortTrID %u", (uint) type, (uint)short_trid));
+
+ /* move information about parts into dynamic array */
+ if (init_dynamic_array(&parts.parts, sizeof(struct st_translog_part),
+ 10, 10 CALLER_INFO))
+ {
+ UNRECOVERABLE_ERROR(("init array failed"));
+ DBUG_RETURN(1);
+ }
+ {
+ struct st_translog_part part;
+
+ /* reserve place for header */
+ parts.current= 1;
+ part.len= 0;
+ part.buff= 0;
+ if (insert_dynamic(&parts.parts, (gptr) &part))
+ {
+ UNRECOVERABLE_ERROR(("insert into array failed"));
+ DBUG_RETURN(1);
+ }
+
+ parts.record_length= part.len= part1_length;
+ part.buff= part1_buff;
+ if (insert_dynamic(&parts.parts, (gptr) &part))
+ {
+ UNRECOVERABLE_ERROR(("insert into array failed"));
+ DBUG_RETURN(1);
+ }
+ DBUG_PRINT("info", ("record length: %lu, %lu ...",
+ (ulong) parts.record_length,
+ (ulong) parts.total_record_length));
+
+ /* count record length */
+ va_start(pvar, part1_buff);
+ for (;;)
+ {
+ part.len= va_arg(pvar, translog_size_t);
+ if (part.len == 0)
+ break;
+ parts.record_length+= part.len;
+ part.buff= va_arg(pvar, uchar*);
+ if (insert_dynamic(&parts.parts, (gptr) &part))
+ {
+ UNRECOVERABLE_ERROR(("insert into array failed"));
+ DBUG_RETURN(1);
+ }
+ DBUG_PRINT("info", ("record length: %lu, %lu ...",
+ (ulong) parts.record_length,
+ (ulong) parts.total_record_length));
+ }
+ va_end(pvar);
+
+ /*
+ start total_record_length from record_length then overhead will
+ be add
+ */
+ parts.total_record_length= parts.record_length;
+ }
+ va_end(pvar);
+ DBUG_PRINT("info", ("record length: %lu, %lu",
+ (ulong) parts.record_length,
+ (ulong) parts.total_record_length));
+
+ /* process this parts */
+ if (!(rc= (log_record_type_descriptor[type].prewrite_hook &&
+ (*log_record_type_descriptor[type].prewrite_hook) (type, tcb,
+ &parts))))
+ {
+ switch (log_record_type_descriptor[type].class)
+ {
+ case LOGRECTYPE_VARIABLE_LENGTH:
+ {
+ rc= translog_write_variable_record(lsn, type, short_trid, &parts, tcb);
+ break;
+ }
+ case LOGRECTYPE_PSEUDOFIXEDLENGTH:
+ case LOGRECTYPE_FIXEDLENGTH:
+ {
+ rc= translog_write_fixed_record(lsn, type, short_trid, &parts, tcb);
+ break;
+ }
+ case LOGRECTYPE_NOT_ALLOWED:
+ default:
+ DBUG_ASSERT(0);
+ rc= 1;
+ }
+ }
+
+ delete_dynamic(&parts.parts);
+ DBUG_RETURN(rc);
+}
+
+
+/*
+ Decode compressed (relative) LSN(s)
+
+ SYNOPSIS
+ translog_relative_lsn_decode()
+ base_lsn LSN for encoding
+ src Decode LSN(s) from here
+ dst Put decoded LSNs here
+ lsns number of LSN(s)
+
+ RETURN
+ position in sources after decoded LSN(s)
+*/
+
+static uchar *translog_relative_LSN_decode(LSN *base_lsn,
+ uchar *src, uchar *dst, uint lsns)
+{
+ uint i;
+ for (i= 0; i < lsns; i++, dst+= 7)
+ {
+ src= translog_get_LSN_from_diff(base_lsn, src, dst);
+ }
+ return src;
+}
+
+/*
+ Get header of fixed/pseudo length record and call hook for it processing
+
+ SYNOPSIS
+ translog_fixed_length_header()
+ page Pointer to the buffer with page where LSN chunk is
+ placed
+ page_offset Offset of the first chunk in the page
+ buff Buffer to be filled with header data
+
+ RETURN
+ 0 - error
+ number of bytes in TRANSLOG_HEADER_BUFFER::header where stored decoded
+ part of the header
+*/
+
+translog_size_t translog_fixed_length_header(uchar *page,
+ translog_size_t page_offset,
+ TRANSLOG_HEADER_BUFFER *buff)
+{
+ struct st_log_record_type_descriptor *desc=
+ log_record_type_descriptor + buff->type;
+ uchar *src= page + page_offset + 3;
+ uchar *dst= buff->header;
+ uchar *start= src;
+ uint lsns= desc->compresed_LSN;
+ uint length= desc->fixed_length + (lsns * 2);
+
+ DBUG_ENTER("translog_fixed_length_header");
+
+ buff->record_length= length;
+
+ if (desc->class == LOGRECTYPE_PSEUDOFIXEDLENGTH)
+ {
+ DBUG_ASSERT(lsns > 0);
+ src= translog_relative_LSN_decode(&buff->lsn, src, dst, lsns);
+ lsns*= 7;
+ dst+= lsns;
+ length-= lsns;
+ buff->compressed_LSN_economy= (uint16) (lsns - (src - start));
+ }
+ else
+ buff->compressed_LSN_economy= 0;
+
+ memmove(dst, src, length);
+ buff->non_header_data_start_offset= page_offset +
+ ((src + length) - (page + page_offset));
+ buff->non_header_data_len= 0;
+ DBUG_RETURN(buff->record_length);
+}
+
+
+/*
+ Free resources used by TRANSLOG_HEADER_BUFFER
+
+ SYNOPSIS
+ translog_free_record_header();
+*/
+
+void translog_free_record_header(TRANSLOG_HEADER_BUFFER *buff)
+{
+ DBUG_ENTER("translog_free_record_header");
+ if (buff->groups_no != 0)
+ {
+ my_free((gptr) buff->groups, MYF(0));
+ buff->groups_no= 0;
+ }
+ DBUG_VOID_RETURN;
+}
+
+
+/*
+ Set current horizon in the scanner data structure
+
+ SYNOPSIS
+ translog_scanner_set_horizon()
+ scanner Information about current chunk during scanning
+*/
+
+static void translog_scanner_set_horizon(struct st_translog_scanner_data
+ *scanner)
+{
+ translog_lock();
+ scanner->horizon= log_descriptor.horizon;
+ translog_unlock();
+}
+
+
+/*
+ Set last page in the scanner data structure
+
+ SYNOPSIS
+ translog_scanner_set_last_page()
+ scanner Information about current chunk during scanning
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+static my_bool translog_scanner_set_last_page(struct st_translog_scanner_data
+ *scanner)
+{
+ my_bool page_ok;
+ scanner->last_file_page= scanner->page_addr;
+ if (translog_get_last_page_addr(&scanner->last_file_page, &page_ok))
+ return (1);
+ return (0);
+}
+
+
+/*
+ Init scanner
+
+ SYNOPSIS
+ translog_init_scanner()
+ lsn LSN with which it have to be inited
+ fixed_horizon true if it is OK do not read records which was written
+ after scanning beginning
+ scanner scanner which have to be inited
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+static my_bool translog_init_scanner(LSN *lsn,
+ my_bool fixed_horizon,
+ struct st_translog_scanner_data *scanner)
+{
+ TRANSLOG_VALIDATOR_DATA data=
+ {
+ &scanner->page_addr, 0
+ };
+
+ DBUG_ENTER("translog_init_scanner");
+ DBUG_PRINT("enter", ("LSN: (0x%lx,0x%lx)",
+ (ulong) lsn->file_no, (ulong) lsn->rec_offset));
+ DBUG_ASSERT(lsn->rec_offset % TRANSLOG_PAGE_SIZE != 0);
+ scanner->page_offset= lsn->rec_offset % TRANSLOG_PAGE_SIZE;
+
+ scanner->fixed_horizon= fixed_horizon;
+
+ translog_scanner_set_horizon(scanner);
+ DBUG_PRINT("info", ("Horizon: (0x%lx,0x%lx)",
+ (ulong) scanner->horizon.file_no,
+ (ulong) scanner->horizon.rec_offset));
+
+ /* lsn < horizon */
+ DBUG_ASSERT(lsn->file_no < scanner->horizon.file_no ||
+ (lsn->file_no == scanner->horizon.file_no &&
+ lsn->rec_offset < scanner->horizon.rec_offset));
+
+ scanner->page_addr= *lsn;
+ scanner->page_addr.rec_offset-= scanner->page_offset;
+
+ if (translog_scanner_set_last_page(scanner))
+ DBUG_RETURN(1);
+
+ if ((scanner->page= translog_get_page(&data, scanner->buffer)) == NULL)
+ DBUG_RETURN(1);
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Checks End of the Log
+
+ SYNOPSIS
+ translog_scanner_eol()
+ scanner Information about current chunk during scanning
+
+ RETURN
+ 1 - End of the Log
+ 0 - OK
+*/
+static my_bool translog_scanner_eol(struct st_translog_scanner_data *scanner)
+{
+ DBUG_ENTER("translog_scanner_eol");
+ DBUG_PRINT("enter",
+ ("Horizon: (%lu, 0x%lx), Current: (%lu, 0x%lx+0x%x=0x%lx)",
+ (ulong) scanner->horizon.file_no,
+ (ulong) scanner->horizon.rec_offset,
+ (ulong) scanner->page_addr.file_no,
+ (ulong) scanner->page_addr.rec_offset,
+ (uint) scanner->page_offset,
+ (ulong) (scanner->page_addr.rec_offset + scanner->page_offset)));
+ if (scanner->horizon.file_no > scanner->page_addr.file_no ||
+ (scanner->horizon.file_no == scanner->page_addr.file_no &&
+ scanner->horizon.rec_offset > (scanner->page_addr.rec_offset +
+ scanner->page_offset)))
+ {
+ DBUG_PRINT("info", ("Horizon is not reached"));
+ DBUG_RETURN(0);
+ }
+ if (scanner->fixed_horizon)
+ {
+ DBUG_PRINT("info", ("Horizon is fixed and reached"));
+ DBUG_RETURN(1);
+ }
+ translog_scanner_set_horizon(scanner);
+ DBUG_PRINT("info",
+ ("Horizon is re-read, EOL: %d",
+ scanner->horizon.file_no <= scanner->page_addr.file_no &&
+ (scanner->horizon.file_no != scanner->page_addr.file_no ||
+ scanner->horizon.rec_offset <= (scanner->page_addr.rec_offset +
+ scanner->page_offset))));
+ DBUG_RETURN(scanner->horizon.file_no <= scanner->page_addr.file_no &&
+ (scanner->horizon.file_no != scanner->page_addr.file_no ||
+ scanner->horizon.rec_offset <= (scanner->page_addr.rec_offset +
+ scanner->page_offset)));
+}
+
+
+/*
+ Cheks End of the Page
+
+ SYNOPSIS
+ translog_scanner_eop()
+ scanner Information about current chunk during scanning
+
+ RETURN
+ 1 - End of the Page
+ 0 - OK
+*/
+static my_bool translog_scanner_eop(struct st_translog_scanner_data *scanner)
+{
+ DBUG_ENTER("translog_scanner_eop");
+ DBUG_RETURN(scanner->page_offset >= TRANSLOG_PAGE_SIZE ||
+ scanner->page[scanner->page_offset] == 0);
+}
+
+
+/*
+ Checks End of the File (I.e. we are scanning last page, which do not
+ mean end of this page)
+
+ SYNOPSIS
+ translog_scanner_eof()
+ scanner Information about current chunk during scanning
+
+ RETURN
+ 1 - End of the File
+ 0 - OK
+*/
+static my_bool translog_scanner_eof(struct st_translog_scanner_data *scanner)
+{
+ DBUG_ENTER("translog_scanner_eof");
+ DBUG_ASSERT(scanner->page_addr.file_no == scanner->last_file_page.file_no);
+ DBUG_PRINT("enter", ("curr Page 0x%lx, last page 0x%lx, "
+ "normal EOF %d",
+ scanner->page_addr.rec_offset,
+ scanner->last_file_page.rec_offset,
+ scanner->page_addr.rec_offset ==
+ scanner->last_file_page.rec_offset));
+ /*
+ TODO: detect damaged file EOF,
+ TODO: issue warning if damaged file EOF detected
+ */
+ DBUG_RETURN(scanner->page_addr.rec_offset ==
+ scanner->last_file_page.rec_offset);
+}
+
+
+/*
+ Move scanner to the next chunk
+
+ SYNOPSIS
+ translog_get_next_chunk()
+ scanner Information about current chunk during scanning
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+static my_bool translog_get_next_chunk(struct st_translog_scanner_data *scanner)
+{
+ DBUG_ENTER("translog_get_next_chunk");
+ uint16 len= translog_get_total_chunk_length(scanner->page,
+ scanner->page_offset);
+ if (len == 0)
+ DBUG_RETURN(1);
+ scanner->page_offset+= len;
+
+ if (translog_scanner_eol(scanner))
+ {
+ scanner->page= &end_of_log;
+ scanner->page_offset= 0;
+ DBUG_RETURN(0);
+ }
+ if (translog_scanner_eop(scanner))
+ {
+ if (translog_scanner_eof(scanner))
+ {
+ DBUG_PRINT("info", ("horizon (%lu,0x%lx) pageaddr (%lu,0x%lx)",
+ (ulong) scanner->horizon.file_no,
+ (ulong) scanner->horizon.rec_offset,
+ (ulong) scanner->page_addr.file_no,
+ (ulong) scanner->page_addr.rec_offset));
+ /* if it is log end it have to be caught before */
+ DBUG_ASSERT(scanner->horizon.file_no > scanner->page_addr.file_no);
+ scanner->page_addr.file_no++;
+ scanner->page_addr.rec_offset= TRANSLOG_PAGE_SIZE;
+ if (translog_scanner_set_last_page(scanner))
+ DBUG_RETURN(1);
+ }
+ else
+ {
+ scanner->page_addr.rec_offset+= TRANSLOG_PAGE_SIZE;
+ }
+ {
+ TRANSLOG_VALIDATOR_DATA data=
+ {
+ &scanner->page_addr, 0
+ };
+ if ((scanner->page= translog_get_page(&data, scanner->buffer)) == NULL)
+ DBUG_RETURN(1);
+ }
+ scanner->page_offset= translog_get_first_chunk_offset(scanner->page);
+ if (translog_scanner_eol(scanner))
+ {
+ scanner->page= &end_of_log;
+ scanner->page_offset= 0;
+ DBUG_RETURN(0);
+ }
+ DBUG_ASSERT(scanner->page[scanner->page_offset] != 0);
+ }
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Get header of variable length record and call hook for it processing
+
+ SYNOPSIS
+ translog_variable_length_header()
+ page Pointer to the buffer with page where LSN chunk is
+ placed
+ page_offset Offset of the first chunk in the page
+ buff Buffer to be filled with header data
+ scanner If present should be moved to the header page if
+ it differ from LSN page
+
+ RETURN
+ 0 - error
+ number of bytes in TRANSLOG_HEADER_BUFFER::header where stored decoded
+ part of the header
+*/
+
+translog_size_t translog_variable_length_header(uchar *page,
+ translog_size_t page_offset,
+ TRANSLOG_HEADER_BUFFER *buff,
+ struct
+ st_translog_scanner_data
+ *scanner)
+{
+ struct st_log_record_type_descriptor *desc=
+ log_record_type_descriptor + buff->type;
+ uchar *src= page + page_offset + 1 + 2;
+ uchar *dst= buff->header;
+ LSN base_lsn;
+ uint lsns= desc->compresed_LSN;
+ uint16 chunk_len;
+ uint16 length= desc->read_header_len + (lsns * 2);
+ uint16 buffer_length= length;
+ uint16 body_len;
+ struct st_translog_scanner_data internal_scanner;
+
+ DBUG_ENTER("translog_variable_length_header");
+
+ buff->record_length= translog_variable_record_1group_decode_len(&src);
+ chunk_len= uint2korr(src);
+ DBUG_PRINT("info", ("rec len: %lu, chunk len: %u, length %u, bufflen %u",
+ (ulong) buff->record_length, (uint) chunk_len,
+ (uint) length, (uint) buffer_length));
+ if (chunk_len == 0)
+ {
+ uint16 page_rest;
+ DBUG_PRINT("info", ("1 group"));
+ src+= 2;
+ page_rest= TRANSLOG_PAGE_SIZE - (src - page);
+
+ base_lsn= buff->lsn;
+ body_len= (page_rest < buff->record_length ?
+ page_rest : buff->record_length);
+ }
+ else
+ {
+ uint grp_no, curr;
+ uint header_to_skip;
+ uint16 page_rest;
+
+ DBUG_PRINT("info", ("multi-group"));
+ grp_no= buff->groups_no= uint2korr(src + 2);
+ if ((buff->groups=
+ (TRANSLOG_GROUP*) my_malloc(sizeof(TRANSLOG_GROUP) * buff->groups_no,
+ MYF(0))) == 0)
+ DBUG_RETURN(0);
+ DBUG_PRINT("info", ("Groups: %u", (uint) grp_no));
+ src+= (2 + 2);
+ page_rest= TRANSLOG_PAGE_SIZE - (src - page);
+ curr= 0;
+ header_to_skip= src - (page + page_offset);
+ buff->chunk0_pages= 0;
+
+ for (;;)
+ {
+ uint i;
+ uint read= grp_no;
+
+ buff->chunk0_pages++;
+ if (page_rest < grp_no * (7 + 1))
+ read= page_rest / (7 + 1);
+ DBUG_PRINT("info", ("Read chunk0 page#%u read %u left %u start from %u",
+ buff->chunk0_pages, read, grp_no, curr));
+ for (i= 0; i < read; i++, curr++)
+ {
+ DBUG_ASSERT(curr < buff->groups_no);
+ lsn7korr(&buff->groups[curr].addr, src + i * (7 + 1));
+ buff->groups[curr].num= src[i * (7 + 1) + 7];
+ DBUG_PRINT("info", ("group #%u (%u,0x%lx) chunks %u",
+ curr,
+ (uint) buff->groups[curr].addr.file_no,
+ (ulong) buff->groups[curr].addr.rec_offset,
+ (uint) buff->groups[curr].num));
+ }
+ grp_no-= read;
+ if (grp_no == 0)
+ {
+ if (scanner)
+ {
+ buff->chunk0_data_addr= scanner->page_addr;
+ buff->chunk0_data_addr.rec_offset+= (page_offset + header_to_skip +
+ i * (7 + 1));
+ }
+ else
+ {
+ buff->chunk0_data_addr= buff->lsn;
+ buff->chunk0_data_addr.rec_offset+= (header_to_skip + i * (7 + 1));
+ }
+ buff->chunk0_data_len= chunk_len - 2 - i * (7 + 1);
+ DBUG_PRINT("info", ("Data address (%u,0x%lx), len: %u",
+ (uint) buff->chunk0_data_addr.file_no,
+ (ulong) buff->chunk0_data_addr.rec_offset,
+ buff->chunk0_data_len));
+ break;
+ }
+ if (scanner == NULL)
+ {
+ DBUG_PRINT("info", ("use internal scanner for header reding"));
+ scanner= &internal_scanner;
+ translog_init_scanner(&buff->lsn, 1, scanner);
+ }
+ translog_get_next_chunk(scanner);
+ page= scanner->page;
+ page_offset= scanner->page_offset;
+ src= page + page_offset + header_to_skip;
+ chunk_len= uint2korr(src - 2 - 2);
+ DBUG_PRINT("info", ("Chunk len: %u", (uint) chunk_len));
+ page_rest= TRANSLOG_PAGE_SIZE - (src - page);
+ }
+
+ if (scanner == NULL)
+ {
+ DBUG_PRINT("info", ("use internal scanner"));
+ scanner= &internal_scanner;
+ }
+
+ base_lsn= buff->groups[0].addr;
+ translog_init_scanner(&base_lsn, 1, scanner);
+ /* first group chunk is always chunk type 2 */
+ page= scanner->page;
+ page_offset= scanner->page_offset;
+ src= page + page_offset + 1;
+ page_rest= TRANSLOG_PAGE_SIZE - (src - page);
+ body_len= page_rest;
+ }
+ if (lsns)
+ {
+ uchar *start= src;
+ src= translog_relative_LSN_decode(&base_lsn, src, dst, lsns);
+ lsns*= 7;
+ dst+= lsns;
+ length-= lsns;
+ buff->record_length+= (buff->compressed_LSN_economy=
+ (uint16) (lsns - (src - start)));
+ DBUG_PRINT("info", ("lsns: %u, length %u, economy %u, new length %lu",
+ lsns / 7, (uint) length,
+ (uint) buff->compressed_LSN_economy,
+ (ulong) buff->record_length));
+ body_len-= (src - start);
+ }
+ else
+ buff->compressed_LSN_economy= 0;
+
+ DBUG_ASSERT(body_len >= length);
+ body_len-= length;
+ memmove(dst, src, length);
+ buff->non_header_data_start_offset= src + length - page;
+ buff->non_header_data_len= body_len;
+ DBUG_PRINT("info", ("non_header_data_start_offset %u len %u buffer %u",
+ buff->non_header_data_start_offset,
+ buff->non_header_data_len, buffer_length));
+ DBUG_RETURN(buffer_length);
+}
+
+
+/*
+ Read record header from the given buffer
+
+ SYNOPSIS
+ translog_read_record_header_from_buffer()
+ page page content buffer
+ page_offset offset of the chunk in the page
+ buff destination buffer
+ scanner if it is need this scanner will be moved to the
+ record header page (differ from LSN page in case of
+ multi-group records
+*/
+
+translog_size_t
+translog_read_record_header_from_buffer(uchar *page,
+ uint16 page_offset,
+ TRANSLOG_HEADER_BUFFER *buff,
+ struct
+ st_translog_scanner_data *scanner)
+{
+ DBUG_ENTER("translog_read_record_header_from_buffer");
+ DBUG_ASSERT((page[page_offset] & TRANSLOG_CHUNK_TYPE) ==
+ TRANSLOG_CHUNK_LSN ||
+ (page[page_offset] & TRANSLOG_CHUNK_TYPE) ==
+ TRANSLOG_CHUNK_FIXED);
+ buff->type= (page[page_offset] & TRANSLOG_REC_TYPE);
+ buff->short_trid= uint2korr(page + page_offset + 1);
+ DBUG_PRINT("info", ("Type %u, Sort TrID %u, LSN (%u,0x%lx)",
+ (uint) buff->type, (uint)buff->short_trid,
+ buff->lsn.file_no, buff->lsn.rec_offset));
+ /* Read required bytes from the header and call hook */
+ switch (log_record_type_descriptor[buff->type].class)
+ {
+ case LOGRECTYPE_VARIABLE_LENGTH:
+ DBUG_RETURN(translog_variable_length_header(page, page_offset, buff,
+ scanner));
+ case LOGRECTYPE_PSEUDOFIXEDLENGTH:
+ case LOGRECTYPE_FIXEDLENGTH:
+ DBUG_RETURN(translog_fixed_length_header(page, page_offset, buff));
+ default:
+ DBUG_ASSERT(0);
+ }
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Read record header and some fixed part of a record (the part depend on
+ record type).
+
+ SYNOPSIS
+ translog_read_record_header()
+ lsn log record serial number (address of the record)
+ buff log record header buffer
+
+ NOTE
+ - lsn can point to TRANSLOG_HEADER_BUFFER::lsn and it will be processed
+ correctly.
+ - Some type of record can be read completely by this call
+ - "Decoded" header stored in TRANSLOG_HEADER_BUFFER::header (relative
+ LSN can be translated to absolute one), some fields can be added
+ (like actual header length in the record if the header has variable
+ length)
+
+ RETURN
+ 0 - error
+ number of bytes in TRANSLOG_HEADER_BUFFER::header where stored decoded
+ part of the header
+*/
+
+translog_size_t translog_read_record_header(LSN *lsn,
+ TRANSLOG_HEADER_BUFFER *buff)
+{
+ uchar buffer[TRANSLOG_PAGE_SIZE], *page;
+ translog_size_t page_offset= lsn->rec_offset % TRANSLOG_PAGE_SIZE;
+
+ DBUG_ENTER("translog_read_record_header");
+ DBUG_PRINT("enter", ("LSN: (0x%lx,0x%lx)",
+ (ulong) lsn->file_no, (ulong) lsn->rec_offset));
+ DBUG_ASSERT(lsn->rec_offset % TRANSLOG_PAGE_SIZE != 0);
+
+ buff->lsn= *lsn;
+ buff->groups_no= 0;
+ {
+ TRANSLOG_ADDRESS addr= *lsn;
+ TRANSLOG_VALIDATOR_DATA data=
+ {
+ &addr, 0
+ };
+ addr.rec_offset-= page_offset;
+ if ((page= translog_get_page(&data, buffer)) == NULL)
+ DBUG_RETURN(0);
+ }
+
+ DBUG_RETURN(translog_read_record_header_from_buffer(page, page_offset,
+ buff, 0));
+}
+
+
+/*
+ Read record header and some fixed part of a record (the part depend on
+ record type).
+
+ SYNOPSIS
+ translog_read_record_header_scan()
+ scan scanner position to read
+ buff log record header buffer
+ move_scanner request to move scanner to the header position
+
+ NOTE
+ - Some type of record can be read completely by this call
+ - "Decoded" header stored in TRANSLOG_HEADER_BUFFER::header (relative
+ LSN can be translated to absolute one), some fields can be added
+ (like actual header length in the record if the header has variable
+ length)
+
+ RETURN
+ 0 - error
+ number of bytes in TRANSLOG_HEADER_BUFFER::header where stored decoded
+ part of the header
+*/
+
+translog_size_t
+translog_read_record_header_scan(struct st_translog_scanner_data
+ *scanner,
+ TRANSLOG_HEADER_BUFFER *buff,
+ my_bool move_scanner)
+{
+ DBUG_ENTER("translog_read_record_header_scan");
+ DBUG_PRINT("enter", ("Scanner: Cur: (%u, 0x%lx), Hrz: (%u, 0x%lx), "
+ "Lst: (%u, 0x%lx), Offset: %u(%x), fixed %d",
+ (uint) scanner->page_addr.file_no,
+ (ulong) scanner->page_addr.rec_offset,
+ (uint) scanner->horizon.file_no,
+ (ulong) scanner->horizon.rec_offset,
+ (uint) scanner->last_file_page.file_no,
+ (ulong) scanner->last_file_page.rec_offset,
+ (uint) scanner->page_offset,
+ (uint) scanner->page_offset, scanner->fixed_horizon));
+ buff->groups_no= 0;
+ buff->lsn= scanner->page_addr;
+ buff->lsn.rec_offset+= scanner->page_offset;
+ DBUG_RETURN(translog_read_record_header_from_buffer(scanner->page,
+ scanner->page_offset,
+ buff,
+ (move_scanner ?
+ scanner : 0)));
+}
+
+
+/*
+ Read record header and some fixed part of the next record (the part
+ depend on record type).
+
+ SYNOPSIS
+ translog_read_next_record_header()
+ lsn log record serial number (address of the record)
+ previous to the record which will be read
+ If LSN present scanner will be initialized from it,
+ do not use LSN after initialization for fast scanning.
+ buff log record header buffer
+ fixed_horizon true if it is OK do not read records which was written
+ after scanning beginning
+ scanner data for scanning if lsn is NULL scanner data
+ will be used for continue scanning.
+ The scanner can be NULL.
+
+ NOTE
+ - lsn can point to TRANSLOG_HEADER_BUFFER::lsn and it will be processed
+ correctly (lsn in buffer will be replaced by next record, but initial
+ lsn will be read correctly).
+ - it is like translog_read_record_header, but read next record, so see
+ its NOTES.
+ - in case of end of the log buff->lsn will be set to
+ (CONTROL_FILE_IMPOSSIBLE_FILENO, 0)
+ RETURN
+ 0 - error
+ TRANSLOG_RECORD_HEADER_MAX_SIZE + 1 - End of the log
+ number of bytes in TRANSLOG_HEADER_BUFFER::header where stored decoded
+ part of the header
+*/
+translog_size_t translog_read_next_record_header(LSN *lsn,
+ TRANSLOG_HEADER_BUFFER *buff,
+ my_bool fixed_horizon,
+ struct
+ st_translog_scanner_data
+ *scanner)
+{
+ struct st_translog_scanner_data internal_scanner;
+ uint8 chunk_type;
+
+ buff->groups_no= 0; /* to be sure that we will free
+ it right */
+
+ DBUG_ENTER("translog_read_next_record_header");
+ DBUG_PRINT("enter", ("scanner: 0x%lx", (ulong) scanner));
+ if (scanner == NULL)
+ {
+ DBUG_ASSERT(lsn != NULL);
+ scanner= &internal_scanner;
+ }
+ if (lsn)
+ {
+ if (translog_init_scanner(lsn, fixed_horizon, scanner))
+ DBUG_RETURN(0);
+ DBUG_ASSERT(lsn->rec_offset % TRANSLOG_PAGE_SIZE != 0);
+ }
+ DBUG_PRINT("info", ("Scanner: Cur: (%u, 0x%lx), Hrz: (%u, 0x%lx), "
+ "Lst: (%u, 0x%lx), Offset: %u(%x), fixed %d",
+ (uint) scanner->page_addr.file_no,
+ (ulong) scanner->page_addr.rec_offset,
+ (uint) scanner->horizon.file_no,
+ (ulong) scanner->horizon.rec_offset,
+ (uint) scanner->last_file_page.file_no,
+ (ulong) scanner->last_file_page.rec_offset,
+ (uint) scanner->page_offset,
+ (uint) scanner->page_offset, scanner->fixed_horizon));
+
+ do
+ {
+ if (translog_get_next_chunk(scanner))
+ DBUG_RETURN(0);
+ chunk_type= scanner->page[scanner->page_offset] & TRANSLOG_CHUNK_TYPE;
+ DBUG_PRINT("info", ("type %x, byte %x", (uint) chunk_type,
+ (uint) scanner->page[scanner->page_offset]));
+ } while (chunk_type != TRANSLOG_CHUNK_LSN && chunk_type !=
+ TRANSLOG_CHUNK_FIXED && scanner->page[scanner->page_offset] != 0);
+
+ if (scanner->page[scanner->page_offset] == 0)
+ {
+ /* Last record was read */
+ buff->lsn.file_no= CONTROL_FILE_IMPOSSIBLE_FILENO;
+ buff->lsn.rec_offset= 0;
+ DBUG_RETURN(TRANSLOG_RECORD_HEADER_MAX_SIZE + 1); /* just it is not error
+ */
+ }
+ DBUG_RETURN(translog_read_record_header_scan(scanner, buff, 0));
+}
+
+
+/*
+ Moves record data reader to the next chunk and fill the data reader
+ information about that chunk.
+
+ SYNOPSIS
+ translog_record_read_next_chunk()
+ data data cursor
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+static my_bool translog_record_read_next_chunk(struct st_translog_reader_data
+ *data)
+{
+ translog_size_t new_current_offset= data->current_offset + data->chunk_size;
+ uint16 chunk_header_len, chunk_len;
+ uint8 type;
+
+ DBUG_ENTER("translog_record_read_next_chunk");
+
+ if (data->eor)
+ {
+ DBUG_PRINT("info", ("end of the record flag set"));
+ DBUG_RETURN(1);
+ }
+
+ if (data->header.groups_no &&
+ data->header.groups_no - 1 != data->current_group &&
+ data->header.groups[data->current_group].num == data->current_chunk)
+ {
+ /* Goto next group */
+ data->current_group++;
+ data->current_chunk= 0;
+ DBUG_PRINT("info", ("skip to group #%u", data->current_group));
+ translog_init_scanner(&data->header.groups[data->current_group].addr,
+ 1, &data->scanner);
+ }
+ else
+ {
+ data->current_chunk++;
+ if (translog_get_next_chunk(&data->scanner))
+ DBUG_RETURN(1);
+ }
+ type= data->scanner.page[data->scanner.page_offset] & TRANSLOG_CHUNK_TYPE;
+
+ if (type == TRANSLOG_CHUNK_LSN && data->header.groups_no)
+ {
+ DBUG_PRINT("info",
+ ("Last chunk: data len %u, offset %u group %u of %u",
+ data->header.chunk0_data_len, data->scanner.page_offset,
+ data->current_group, data->header.groups_no - 1));
+ DBUG_ASSERT(data->header.groups_no - 1 == data->current_group);
+ DBUG_ASSERT(data->header.lsn.file_no == data->scanner.page_addr.file_no &&
+ data->header.lsn.rec_offset ==
+ data->scanner.page_addr.rec_offset + data->scanner.page_offset);
+ translog_init_scanner(&data->header.chunk0_data_addr, 1, &data->scanner);
+ data->chunk_size= data->header.chunk0_data_len;
+ data->body_offset= data->scanner.page_offset;
+ data->current_offset= new_current_offset;
+ data->eor= 1;
+ DBUG_RETURN(0);
+ }
+
+ if (type == TRANSLOG_CHUNK_LSN || type == TRANSLOG_CHUNK_FIXED)
+ {
+ data->eor= 1;
+ DBUG_RETURN(1); /* End of record */
+ }
+
+ chunk_header_len=
+ translog_get_chunk_header_length(data->scanner.page,
+ data->scanner.page_offset);
+ chunk_len= translog_get_total_chunk_length(data->scanner.page,
+ data->scanner.page_offset);
+ data->chunk_size= chunk_len - chunk_header_len;
+ data->body_offset= data->scanner.page_offset + chunk_header_len;
+ data->current_offset= new_current_offset;
+ DBUG_PRINT("info", ("grp: %u chunk %u body_offset %u, chunk_size %u, "
+ "current_offset %lu",
+ (uint) data->current_group,
+ (uint) data->current_chunk,
+ (uint) data->body_offset,
+ (uint) data->chunk_size, (ulong) data->current_offset));
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Initialize record reader data from LSN
+
+ SYNOPSIS
+ translog_init_reader_data()
+ lsn reference to LSN we should start from
+ data reader data to initialize
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+static my_bool translog_init_reader_data(LSN *lsn,
+ struct st_translog_reader_data *data)
+{
+ DBUG_ENTER("translog_init_reader_data");
+ if (translog_init_scanner(lsn, 1, &data->scanner) ||
+ (data->read_header=
+ translog_read_record_header_scan(&data->scanner, &data->header, 1)) == 0)
+ {
+ DBUG_RETURN(1);
+ }
+ data->body_offset= data->header.non_header_data_start_offset;
+ data->chunk_size= data->header.non_header_data_len;
+ data->current_offset= data->read_header;
+ data->current_group= 0;
+ data->current_chunk= 0;
+ data->eor= 0;
+ DBUG_PRINT("info", ("read_header %u, "
+ "body_offset %u, chunk_size %u, current_offset %lu",
+ (uint) data->read_header,
+ (uint) data->body_offset,
+ (uint) data->chunk_size, (ulong) data->current_offset));
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Read a part of the record.
+
+ SYNOPSIS
+ translog_read_record_header()
+ lsn log record serial number (address of the record)
+ offset from the beginning of the record beginning (read
+ by translog_read_record_header).
+ length length of record part which have to be read.
+ buffer buffer where to read the record part (have to be at
+ least 'length' bytes length)
+
+ RETURN
+ length of data actually read
+*/
+
+translog_size_t translog_read_record(LSN *lsn,
+ translog_size_t offset,
+ translog_size_t length,
+ uchar *buffer,
+ struct st_translog_reader_data *data)
+{
+ translog_size_t requested_length= length;
+ translog_size_t end= offset + length;
+ struct st_translog_reader_data internal_data;
+
+ DBUG_ENTER("translog_read_record");
+
+ if (data == NULL)
+ {
+ DBUG_ASSERT(lsn != NULL);
+ data= &internal_data;
+ }
+ if (lsn ||
+ (offset < data->current_offset &&
+ !(offset < data->read_header && offset + length < data->read_header)))
+ {
+ if (translog_init_reader_data(lsn, data))
+ DBUG_RETURN(0);
+ }
+ DBUG_PRINT("info", ("Offset %lu, length %lu "
+ "Scanner: Cur: (%u, 0x%lx), Hrz: (%u, 0x%lx), "
+ "Lst: (%u, 0x%lx), Offset: %u(%x), fixed %d",
+ (ulong) offset, (ulong) length,
+ (uint) data->scanner.page_addr.file_no,
+ (ulong) data->scanner.page_addr.rec_offset,
+ (uint) data->scanner.horizon.file_no,
+ (ulong) data->scanner.horizon.rec_offset,
+ (uint) data->scanner.last_file_page.file_no,
+ (ulong) data->scanner.last_file_page.rec_offset,
+ (uint) data->scanner.page_offset,
+ (uint) data->scanner.page_offset,
+ data->scanner.fixed_horizon));
+ if (offset < data->read_header)
+ {
+ DBUG_PRINT("info",
+ ("enter header offset %lu, length %lu",
+ (ulong) offset, (ulong) length));
+ uint16 len= (data->read_header < end ? data->read_header : end) - offset;
+ memmove(buffer, data->header.header + offset, len);
+ length-= len;
+ if (length == 0)
+ DBUG_RETURN(requested_length);
+ offset+= len;
+ buffer+= len;
+ DBUG_PRINT("info",
+ ("len: %u, offset %lu, curr %lu, length %lu",
+ len, (ulong) offset, (ulong) data->current_offset,
+ (ulong) length));
+ }
+ /* TODO: find first page which we should read by offset */
+
+ /* read the record chunk by chunk */
+ do
+ {
+ uint page_end= data->current_offset + data->chunk_size;
+ DBUG_PRINT("info",
+ ("enter body offset %lu, curr %lu, length %lu page_end %lu",
+ (ulong) offset, (ulong) data->current_offset, (ulong) length,
+ (ulong) page_end));
+ if (offset < page_end)
+ {
+ DBUG_ASSERT(offset >= data->current_offset);
+ uint len= page_end - offset;
+ memmove(buffer,
+ data->scanner.page + data->body_offset +
+ (offset - data->current_offset), len);
+ length-= len;
+ if (length == 0)
+ DBUG_RETURN(requested_length);
+ offset+= len;
+ buffer+= len;
+ DBUG_PRINT("info",
+ ("len: %u, offset %lu, curr %lu, length %lu",
+ len, (ulong) offset, (ulong) data->current_offset,
+ (ulong) length));
+ }
+ if (translog_record_read_next_chunk(data))
+ DBUG_RETURN(requested_length - length);
+ } while (length != 0);
+
+ DBUG_RETURN(requested_length);
+}
+
+
+/*
+ Force skipping to the next buffer
+
+ SYNOPSIS
+ translog_force_current_buffer_to_finish()
+*/
+
+static void translog_force_current_buffer_to_finish()
+{
+ TRANSLOG_ADDRESS new_buff_begunning;
+ uint8 old_buffer_no= log_descriptor.bc.buffer_no;
+ uint8 new_buffer_no= (old_buffer_no + 1) % TRANSLOG_BUFFERS_NO;
+ struct st_translog_buffer *new_buffer= log_descriptor.buffers + new_buffer_no;
+ struct st_translog_buffer *old_buffer= log_descriptor.bc.buffer;
+ uchar *data= log_descriptor.bc.ptr -log_descriptor.bc.current_page_size;
+ uint16 left= TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_size;
+ uint16 current_page_size;
+
+ new_buff_begunning= log_descriptor.bc.buffer->offset;
+ new_buff_begunning.rec_offset+= log_descriptor.bc.buffer->size;
+
+ DBUG_ENTER("translog_force_current_buffer_to_finish");
+ DBUG_PRINT("enter", ("Buffer #%u 0x%lx, "
+ "Buffer addr (%lu,0x%lx), "
+ "Page addr: (%lu,0x%lx), "
+ "New Buff: (%lu,0x%lx), "
+ "size %lu (%lu), Pg: %u, left: %u",
+ (uint) log_descriptor.bc.buffer_no,
+ (ulong) log_descriptor.bc.buffer,
+ (ulong) log_descriptor.bc.buffer->offset.file_no,
+ (ulong) log_descriptor.bc.buffer->offset.rec_offset,
+ (ulong) log_descriptor.horizon.file_no,
+ (ulong) (log_descriptor.horizon.rec_offset -
+ log_descriptor.bc.current_page_size),
+ (ulong) new_buff_begunning.file_no,
+ (ulong) new_buff_begunning.rec_offset,
+ (ulong) log_descriptor.bc.buffer->size,
+ (ulong) (log_descriptor.bc.ptr -log_descriptor.bc.
+ buffer->buffer),
+ (uint) log_descriptor.bc.current_page_size,
+ (uint) left));
+ DBUG_ASSERT(log_descriptor.bc.ptr !=NULL);
+ DBUG_ASSERT((log_descriptor.bc.ptr -log_descriptor.bc.buffer->buffer)
+ %TRANSLOG_PAGE_SIZE ==
+ log_descriptor.bc.current_page_size % TRANSLOG_PAGE_SIZE);
+ DBUG_ASSERT(log_descriptor.horizon.file_no ==
+ log_descriptor.bc.buffer->offset.file_no);
+ DBUG_ASSERT(log_descriptor.bc.buffer->offset.rec_offset +
+ (log_descriptor.bc.ptr -log_descriptor.bc.buffer->buffer) ==
+ log_descriptor.horizon.rec_offset);
+ if (left != TRANSLOG_PAGE_SIZE && left != 0)
+ {
+ /*
+ TODO: if 'left' is so small that can't hold any other record
+ then do not move the page
+ */
+ DBUG_PRINT("info", ("left %u", (uint) left));
+
+ new_buff_begunning.rec_offset-= log_descriptor.bc.current_page_size;
+ current_page_size= log_descriptor.bc.current_page_size;
+
+ bzero(log_descriptor.bc.ptr, left);
+ log_descriptor.bc.buffer->size+= left;
+ DBUG_PRINT("info", ("Finish Page buffer #%u: 0x%lx, "
+ "Size: %lu",
+ (uint) log_descriptor.bc.buffer->buffer_no,
+ (ulong) log_descriptor.bc.buffer,
+ (ulong) log_descriptor.bc.buffer->size));
+ DBUG_ASSERT(log_descriptor.bc.buffer->buffer_no ==
+ log_descriptor.bc.buffer_no);
+ }
+ else
+ {
+ left= 0;
+ log_descriptor.bc.current_page_size= 0;
+ }
+
+ translog_buffer_lock(new_buffer);
+ translog_wait_for_buffer_free(new_buffer);
+
+ {
+ uint16 write_counter= log_descriptor.bc.write_counter;
+ uint16 previous_offset= log_descriptor.bc.previous_offset;
+ translog_start_buffer(new_buffer, &log_descriptor.bc, new_buffer_no);
+ log_descriptor.bc.buffer->offset= new_buff_begunning;
+ log_descriptor.bc.write_counter= write_counter;
+ log_descriptor.bc.previous_offset= previous_offset;
+ }
+
+ if (log_descriptor.flags & TRANSLOG_SECTOR_PROTECTION)
+ {
+ translog_put_sector_protection(data, &log_descriptor.bc);
+ if (left)
+ {
+ log_descriptor.bc.write_counter++;
+ log_descriptor.bc.previous_offset= current_page_size;
+ }
+ else
+ {
+ DBUG_PRINT("info", ("drop write_counter"));
+ log_descriptor.bc.write_counter= 0;
+ log_descriptor.bc.previous_offset= 0;
+ }
+ }
+
+ if (log_descriptor.flags & TRANSLOG_PAGE_CRC)
+ {
+ uint32 crc= translog_adler_crc(data + log_descriptor.page_overhead,
+ TRANSLOG_PAGE_SIZE -
+ log_descriptor.page_overhead);
+ DBUG_PRINT("info", ("CRC: 0x%lx", (ulong) crc));
+ int4store(data + 3 + 3 + 1, crc);
+ }
+
+ if (left)
+ {
+ memmove(new_buffer->buffer, data, current_page_size);
+ log_descriptor.bc.ptr +=current_page_size;
+ log_descriptor.bc.buffer->size= log_descriptor.bc.current_page_size=
+ current_page_size;
+ new_buffer->overlay= old_buffer;
+ }
+ else
+ translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc);
+
+ DBUG_VOID_RETURN;
+}
+
+/*
+ Flush the log up to given LSN (included)
+
+ SYNOPSIS
+ translog_flush()
+ lsn log record serial number up to which (inclusive)
+ the log have to be flushed
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+my_bool translog_flush(LSN *lsn)
+{
+ LSN old_flushed, sent_to_file;
+ int rc= 0;
+ uint i;
+ my_bool full_circle= 0;
+
+ DBUG_ENTER("translog_flush");
+ DBUG_PRINT("enter", ("Flush up to LSN (%u,0x%lx)",
+ (uint) lsn->file_no, (ulong) lsn->rec_offset));
+
+ translog_lock();
+ old_flushed= log_descriptor.flushed;
+ for (;;)
+ {
+ uint8 buffer_no= log_descriptor.bc.buffer_no;
+ uint8 buffer_start= buffer_no;
+ struct st_translog_buffer *buffer_unlock= log_descriptor.bc.buffer;
+
+ struct st_translog_buffer *buffer= log_descriptor.bc.buffer;
+ /* we can't flush in future */
+ DBUG_ASSERT(cmp_translog_addr(log_descriptor.horizon, *lsn) >= 0);
+ if (cmp_translog_addr(log_descriptor.flushed, *lsn) >= 0)
+ {
+ DBUG_PRINT("info", ("already flushed (%u,0x%lx)",
+ (uint) log_descriptor.flushed.file_no,
+ (ulong) log_descriptor.flushed.rec_offset));
+ translog_unlock();
+ DBUG_RETURN(0);
+ }
+ /* send to the file if it is not sent */
+ translog_get_sent_to_file(&sent_to_file);
+ if (cmp_translog_addr(sent_to_file, *lsn) >= 0)
+ break;
+
+ do
+ {
+ buffer_no= (buffer_no + 1) % TRANSLOG_BUFFERS_NO;
+ buffer= log_descriptor.buffers + buffer_no;
+ translog_buffer_lock(buffer);
+ translog_buffer_unlock(buffer_unlock);
+ buffer_unlock= buffer;
+ if (buffer->file)
+ {
+ buffer_unlock= NULL;
+ if (buffer_start == buffer_no)
+ {
+ /* we made a circle */
+ full_circle= 1;
+ translog_force_current_buffer_to_finish();
+ }
+ break;
+ }
+ } while ((buffer_start != buffer_no) &&
+ cmp_translog_addr(log_descriptor.flushed, *lsn) < 0);
+ if (buffer_unlock != NULL)
+ translog_buffer_unlock(buffer_unlock);
+ if (translog_buffer_flush(buffer))
+ {
+ translog_buffer_unlock(buffer);
+ DBUG_RETURN(1);
+ }
+ translog_buffer_unlock(buffer);
+ if (!full_circle)
+ translog_lock();
+ }
+
+ for (i= old_flushed.file_no; i <= lsn->file_no; i++)
+ {
+ uint cache_index;
+ File file;
+
+ if ((cache_index= log_descriptor.horizon.file_no - i) < OPENED_FILES_NUM)
+ {
+ /* file in the cache */
+ if (log_descriptor.log_file_num[cache_index] == 0)
+ {
+ if ((log_descriptor.log_file_num[cache_index]=
+ open_logfile_by_number_no_cache(i)) == 0)
+ {
+ translog_unlock();
+ DBUG_RETURN(1);
+ }
+ }
+ file= log_descriptor.log_file_num[cache_index];
+ rc|= my_sync(file, MYF(MY_WME));
+ }
+ else
+ {
+ /* very unlike situation with extremely small file size */
+ File file= open_logfile_by_number_no_cache(i);
+ rc|= my_sync(file, MYF(MY_WME));
+ my_close(file, MYF(MY_WME));
+ }
+ }
+ log_descriptor.flushed= sent_to_file;
+ rc|= my_sync(log_descriptor.directory_fd, MYF(MY_WME));
+ translog_unlock();
+ DBUG_RETURN(rc);
+}
diff --git a/storage/maria/ma_loghandler.h b/storage/maria/ma_loghandler.h
new file mode 100644
index 00000000000..f4d939786fc
--- /dev/null
+++ b/storage/maria/ma_loghandler.h
@@ -0,0 +1,314 @@
+
+#ifndef _ma_loghandler_h
+#define _ma_loghandler_h
+
+/* Transaction log flags */
+#define TRANSLOG_PAGE_CRC 1
+#define TRANSLOG_SECTOR_PROTECTION (1<<1)
+#define TRANSLOG_RECORD_CRC (1<<2)
+
+/* page size in transaction log */
+#define TRANSLOG_PAGE_SIZE (8*1024)
+
+#include "ma_loghandler_lsn.h"
+
+/* short transaction ID type */
+typedef uint16 SHORT_TRANSACTION_ID;
+
+/* types of records in the transaction log */
+enum translog_record_type
+{
+ LOGREC_RESERVED_FOR_CHUNKS23= 0,
+ LOGREC_REDO_INSERT_ROW_HEAD= 1,
+ LOGREC_REDO_INSERT_ROW_TAIL= 2,
+ LOGREC_REDO_INSERT_ROW_BLOB= 3,
+ LOGREC_REDO_INSERT_ROW_BLOBS= 4,
+ LOGREC_REDO_PURGE_ROW= 5,
+ eLOGREC_REDO_PURGE_BLOCKS= 6,
+ LOGREC_REDO_DELETE_ROW= 7,
+ LOGREC_REDO_UPDATE_ROW_HEAD= 8,
+ LOGREC_REDO_INDEX= 9,
+ LOGREC_REDO_UNDELETE_ROW= 10,
+ LOGREC_CLR_END= 11,
+ LOGREC_PURGE_END= 12,
+ LOGREC_UNDO_ROW_INSERT= 13,
+ LOGREC_UNDO_ROW_DELETE= 14,
+ LOGREC_UNDO_ROW_UPDATE= 15,
+ LOGREC_UNDO_KEY_INSERT= 16,
+ LOGREC_UNDO_KEY_DELETE= 17,
+ LOGREC_PREPARE= 18,
+ LOGREC_PREPARE_WITH_UNDO_PURGE= 19,
+ LOGREC_COMMIT= 20,
+ LOGREC_COMMIT_WITH_UNDO_PURGE= 21,
+ LOGREC_CHECKPOINT_PAGE= 22,
+ LOGREC_CHECKPOINT_TRAN= 23,
+ LOGREC_CHECKPOINT_TABL= 24,
+ LOGREC_REDO_CREATE_TABLE= 25,
+ LOGREC_REDO_RENAME_TABLE= 26,
+ LOGREC_REDO_DROP_TABLE= 27,
+ LOGREC_REDO_TRUNCATE_TABLE= 28,
+ LOGREC_FILE_ID= 29,
+ LOGREC_LONG_TRANSACTION_ID= 30,
+ LOGREC_RESERVED_FUTURE_EXTENSION= 63
+};
+#define LOGREC_NUMBER_OF_TYPES 64
+
+typedef uint32 translog_size_t;
+
+#define TRANSLOG_RECORD_HEADER_MAX_SIZE 1024
+
+typedef struct st_translog_group_descriptor
+{
+ TRANSLOG_ADDRESS addr;
+ uint8 num;
+} TRANSLOG_GROUP;
+
+
+typedef struct st_translog_header_buffer
+{
+ /* LSN of the read record */
+ LSN lsn;
+ /* type of the read record */
+ enum translog_record_type type;
+ /* short transaction ID or 0 if it has no sense for the record */
+ SHORT_TRANSACTION_ID short_trid;
+ /*
+ The Record length in buffer (including read header, but excluding
+ hidden part of record (type, short TrID, length)
+ */
+ translog_size_t record_length;
+ /*
+ Real compressed LSN(s) size economy (<number of LSN(s)>*7 - <real_size>)
+ */
+ uint16 compressed_LSN_economy;
+ /*
+ Buffer for write decoded header of the record (depend on the record
+ type)
+ */
+ uchar header[TRANSLOG_RECORD_HEADER_MAX_SIZE];
+ /* non read body data offset on the page */
+ uint16 non_header_data_start_offset;
+ /* non read body data length in this first chunk */
+ uint16 non_header_data_len;
+ /* number of groups listed in */
+ uint groups_no;
+ /* array of groups descriptors, can be used only if groups_no > 0 */
+ TRANSLOG_GROUP *groups;
+ /* in multi-group number of chunk0 pages (valid only if groups_no > 0) */
+ uint chunk0_pages;
+ /* chunk 0 data address (valid only if groups_no > 0) */
+ TRANSLOG_ADDRESS chunk0_data_addr;
+ /* chunk 0 data size (valid only if groups_no > 0) */
+ uint16 chunk0_data_len;
+} TRANSLOG_HEADER_BUFFER;
+
+
+struct st_translog_scanner_data
+{
+ uchar buffer[TRANSLOG_PAGE_SIZE]; /* buffer for page content */
+ TRANSLOG_ADDRESS page_addr; /* current page address */
+ TRANSLOG_ADDRESS horizon; /* end of the log which we saw
+ last time */
+ TRANSLOG_ADDRESS last_file_page; /* Last page on in this file */
+ uchar *page; /* page content pointer */
+ translog_size_t page_offset; /* offset of the chunk in the
+ page */
+ my_bool fixed_horizon; /* set horizon only once at
+ init */
+};
+
+
+struct st_translog_reader_data
+{
+ TRANSLOG_HEADER_BUFFER header; /* Header */
+ struct st_translog_scanner_data scanner; /* chunks scanner */
+ translog_size_t body_offset; /* current chunk body offset */
+ translog_size_t current_offset; /* data offset from the record
+ beginning */
+ uint16 read_header; /* number of bytes read in
+ header */
+ uint16 chunk_size; /* current chunk size */
+ uint current_group; /* current group */
+ uint current_chunk; /* current chunk in the group */
+ my_bool eor; /* end of the record */
+};
+
+
+/*
+ Initialize transaction log
+
+ SYNOPSIS
+ translog_init()
+ directory Directory where log files are put
+ log_file_max_size max size of one log size (for new logs creation)
+ server_version version of MySQL servger (MYSQL_VERSION_ID)
+ server_id server ID (replication & Co)
+ pagecache Page cache for the log reads
+ flags flags (TRANSLOG_PAGE_CRC, TRANSLOG_SECTOR_PROTECTION
+ TRANSLOG_RECORD_CRC)
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+my_bool translog_init(const char *directory, uint32 log_file_max_size,
+ uint32 server_version,
+ uint32 server_id, PAGECACHE *pagecache, uint flags);
+
+
+/*
+ Write the log record
+
+ SYNOPSIS
+ translog_write_record()
+ lsn LSN of the record will be writen here
+ type the log record type
+ short_trid Sort transaction ID or 0 if it has no sense
+ tcb Transaction control block pointer for hooks by
+ record log type
+ partN_length length of Ns part of the log
+ partN_buffer pointer on Ns part buffer
+ 0 sign of the end of parts
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+my_bool translog_write_record(LSN *lsn,
+ enum translog_record_type type,
+ SHORT_TRANSACTION_ID short_trid,
+ void *tcb,
+ translog_size_t part1_length,
+ uchar *part1_buff, ...);
+
+
+/*
+ Free log handler resources
+
+ SYNOPSIS
+ translog_destroy()
+*/
+
+void translog_destroy();
+
+
+/*
+ Read record header and some fixed part of a record (the part depend on
+ record type).
+
+ SYNOPSIS
+ translog_read_record_header()
+ lsn log record serial number (address of the record)
+ buff log record header buffer
+
+ NOTE
+ - lsn can point to TRANSLOG_HEADER_BUFFER::lsn and it will be processed
+ correctly.
+ - Some type of record can be read completely by this call
+ - "Decoded" header stored in TRANSLOG_HEADER_BUFFER::header (relative
+ LSN can be translated to absolute one), some fields can be added
+ (like actual header length in the record if the header has variable
+ length)
+
+ RETURN
+ 0 - error
+ number of bytes in TRANSLOG_HEADER_BUFFER::header where stored decoded
+ part of the header
+*/
+
+translog_size_t translog_read_record_header(LSN *lsn,
+ TRANSLOG_HEADER_BUFFER *buff);
+
+
+/*
+ Free resources used by TRANSLOG_HEADER_BUFFER
+
+ SYNOPSIS
+ translog_free_record_header();
+*/
+
+void translog_free_record_header(TRANSLOG_HEADER_BUFFER *buff);
+
+
+/*
+ Read a part of the record.
+
+ SYNOPSIS
+ translog_read_record_header()
+ lsn log record serial number (address of the record)
+ offset from the beginning of the record beginning (read
+ by translog_read_record_header).
+ length length of record part which have to be read.
+ buffer buffer where to read the record part (have to be at
+ least 'length' bytes length)
+
+ RETURN
+ 0 - error (or read out of the record)
+ length of data actually read
+*/
+
+translog_size_t translog_read_record(LSN *lsn,
+ translog_size_t offset,
+ translog_size_t length,
+ uchar *buffer,
+ struct st_translog_reader_data *data);
+
+
+/*
+ Flush the log up to given LSN (included)
+
+ SYNOPSIS
+ translog_flush()
+ lsn log record serial number up to which (inclusive)
+ the log have to be flushed
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+my_bool translog_flush(LSN *lsn);
+
+
+/*
+ Read record header and some fixed part of the next record (the part
+ depend on record type).
+
+ SYNOPSIS
+ translog_read_next_record_header()
+ lsn log record serial number (address of the record)
+ previous to the record which will be read
+ If LSN present scanner will be initialized from it,
+ do not use LSN after initialization for fast scanning.
+ buff log record header buffer
+ fixed_horizon true if it is OK do not read records which was written
+ after scaning begining
+ scanner data for scaning if lsn is NULL scanner data
+ will be used for continue scaning.
+ scanner can be NULL.
+
+ NOTE
+ - lsn can point to TRANSLOG_HEADER_BUFFER::lsn and it will be processed
+ correctly (lsn in buffer will be replaced by next record, but initial
+ lsn will be read correctly).
+ - it is like translog_read_record_header, but read next record, so see
+ its NOTES.
+ - in case of end of the log buff->lsn will be set to
+ (CONTROL_FILE_IMPOSSIBLE_LOGNO, 0)
+ RETURN
+ 0 - error
+ TRANSLOG_RECORD_HEADER_MAX_SIZE + 1 - End of the log
+ number of bytes in TRANSLOG_HEADER_BUFFER::header where stored decoded
+ part of the header
+*/
+
+translog_size_t translog_read_next_record_header(LSN *lsn,
+ TRANSLOG_HEADER_BUFFER *buff,
+ my_bool fixed_horizon,
+ struct
+ st_translog_scanner_data
+ *scanner);
+
+#endif
diff --git a/storage/maria/ma_loghandler_lsn.h b/storage/maria/ma_loghandler_lsn.h
new file mode 100644
index 00000000000..9576d4d734d
--- /dev/null
+++ b/storage/maria/ma_loghandler_lsn.h
@@ -0,0 +1,39 @@
+#ifndef _ma_loghandler_lsn_h
+#define _ma_loghandler_lsn_h
+
+/* Transaction log record address (file_no is int24 on the disk) */
+typedef struct st_translog_address
+{
+ uint32 file_no;
+ uint32 rec_offset;
+} TRANSLOG_ADDRESS;
+
+/*
+ Compare addresses
+ A1 > A2 -> result > 0
+ A1 == A2 -> 0
+ A1 < A2 -> result < 0
+*/
+#define cmp_translog_addr(A1,A2) \
+ ((A1).file_no == (A2).file_no ? \
+ ((int64)(A1).rec_offset) - (int64)(A2).rec_offset : \
+ ((int64)(A1).file_no - (int64)(A2).file_no))
+
+/* LSN type (address of certain log record chank */
+typedef TRANSLOG_ADDRESS LSN;
+
+/* Puts LSN into buffer (dst) */
+#define lsn7store(dst, lsn) \
+ do { \
+ int3store((dst), (lsn)->file_no); \
+ int4store((dst) + 3, (lsn)->rec_offset); \
+ } while (0)
+
+/* Unpacks LSN from the buffer (P) */
+#define lsn7korr(lsn, P) \
+ do { \
+ (lsn)->file_no= uint3korr(P); \
+ (lsn)->rec_offset= uint4korr((P) + 3); \
+ } while (0)
+
+#endif
diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h
index 506bdbc71ca..380c42c105e 100644
--- a/storage/maria/maria_def.h
+++ b/storage/maria/maria_def.h
@@ -26,6 +26,10 @@
#include <my_no_pthread.h>
#endif
+#include <pagecache.h>
+#include "ma_loghandler.h"
+#include "ma_control_file.h"
+
/* undef map from my_nosys; We need test-if-disk full */
#undef my_write
@@ -438,6 +442,7 @@ extern LIST *maria_open_list;
extern uchar NEAR maria_file_magic[], NEAR maria_pack_file_magic[];
extern uint NEAR maria_read_vec[], NEAR maria_readnext_vec[];
extern uint maria_quick_table_bits;
+extern const char *maria_data_root;
extern my_bool maria_inited;
/* This is used by _ma_calc_xxx_key_length och _ma_store_key */
diff --git a/storage/maria/unittest/Makefile.am b/storage/maria/unittest/Makefile.am
index d0b247d65e1..78b285edd70 100644
--- a/storage/maria/unittest/Makefile.am
+++ b/storage/maria/unittest/Makefile.am
@@ -14,8 +14,10 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-AM_CPPFLAGS = @ZLIB_INCLUDES@ -I$(top_builddir)/include
-AM_CPPFLAGS += -I$(top_srcdir)/include -I$(top_srcdir)/unittest/mytap
+AM_CPPFLAGS = @ZLIB_INCLUDES@ -I$(top_builddir)/include \
+ -I$(top_srcdir)/include -I$(top_srcdir)/unittest/mytap
+INCLUDES = @ZLIB_INCLUDES@ -I$(top_builddir)/include \
+ -I$(top_srcdir)/include -I$(top_srcdir)/unittest/mytap
# Only reason to link with libmyisam.a here is that it's where some fulltext
# pieces are (but soon we'll remove fulltext dependencies from Maria).
@@ -24,6 +26,54 @@ LDADD= $(top_builddir)/unittest/mytap/libmytap.a \
$(top_builddir)/storage/myisam/libmyisam.a \
$(top_builddir)/mysys/libmysys.a \
$(top_builddir)/dbug/libdbug.a \
- $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@
-noinst_PROGRAMS = ma_control_file-t trnman-t lockman-t lockman1-t lockman2-t
-CLEANFILES = maria_control
+ $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ \
+ $(top_builddir)/storage/maria/ma_loghandler.o
+noinst_PROGRAMS = ma_control_file-t trnman-t lockman-t lockman1-t \
+ lockman2-t \
+ mf_pagecache_single_1k-t mf_pagecache_single_8k-t \
+ mf_pagecache_single_64k-t \
+ mf_pagecache_consist_1k-t mf_pagecache_consist_64k-t \
+ mf_pagecache_consist_1kHC-t \
+ mf_pagecache_consist_64kHC-t \
+ mf_pagecache_consist_1kRD-t \
+ mf_pagecache_consist_64kRD-t \
+ mf_pagecache_consist_1kWR-t \
+ mf_pagecache_consist_64kWR-t \
+ ma_test_loghandler-t \
+ ma_test_loghandler_multigroup-t \
+ ma_test_loghandler_multithread-t \
+ ma_test_loghandler_pagecache-t
+
+mf_pagecache_single_src = mf_pagecache_single.c $(top_srcdir)/mysys/mf_pagecache.c test_file.c
+mf_pagecache_consist_src = mf_pagecache_consist.c $(top_srcdir)/mysys/mf_pagecache.c test_file.c
+mf_pagecache_common_cppflags = -DEXTRA_DEBUG -DPAGECACHE_DEBUG -DMAIN
+
+mf_pagecache_single_1k_t_SOURCES = $(mf_pagecache_single_src)
+mf_pagecache_single_8k_t_SOURCES = $(mf_pagecache_single_src)
+mf_pagecache_single_64k_t_SOURCES = $(mf_pagecache_single_src)
+mf_pagecache_single_1k_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024
+mf_pagecache_single_8k_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=8192
+mf_pagecache_single_64k_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536
+
+mf_pagecache_consist_1k_t_SOURCES = $(mf_pagecache_consist_src)
+mf_pagecache_consist_1k_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024
+mf_pagecache_consist_64k_t_SOURCES = $(mf_pagecache_consist_src)
+mf_pagecache_consist_64k_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536
+
+mf_pagecache_consist_1kHC_t_SOURCES = $(mf_pagecache_consist_src)
+mf_pagecache_consist_1kHC_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 -DTEST_HIGH_CONCURENCY
+mf_pagecache_consist_64kHC_t_SOURCES = $(mf_pagecache_consist_src)
+mf_pagecache_consist_64kHC_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 -DTEST_HIGH_CONCURENCY
+
+mf_pagecache_consist_1kRD_t_SOURCES = $(mf_pagecache_consist_src)
+mf_pagecache_consist_1kRD_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 -DTEST_READERS
+mf_pagecache_consist_64kRD_t_SOURCES = $(mf_pagecache_consist_src)
+mf_pagecache_consist_64kRD_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 -DTEST_READERS
+
+mf_pagecache_consist_1kWR_t_SOURCES = $(mf_pagecache_consist_src)
+mf_pagecache_consist_1kWR_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 -DTEST_WRITERS
+mf_pagecache_consist_64kWR_t_SOURCES = $(mf_pagecache_consist_src)
+mf_pagecache_consist_64kWR_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 -DTEST_WRITERS
+
+CLEANFILES = maria_control page_cache_test_file_1 \
+ maria_log.???????? maria_control
diff --git a/storage/maria/unittest/ma_control_file-t.c b/storage/maria/unittest/ma_control_file-t.c
index beb86843dd3..7b7e1454cc3 100644
--- a/storage/maria/unittest/ma_control_file-t.c
+++ b/storage/maria/unittest/ma_control_file-t.c
@@ -33,7 +33,7 @@
#endif
#include "maria.h"
-#include "../../../storage/maria/ma_control_file.h"
+#include "../../../storage/maria/maria_def.h"
#include <my_getopt.h>
char file_name[FN_REFLEN];
diff --git a/storage/maria/unittest/ma_test_loghandler-t.c b/storage/maria/unittest/ma_test_loghandler-t.c
new file mode 100644
index 00000000000..1cbfcac504e
--- /dev/null
+++ b/storage/maria/unittest/ma_test_loghandler-t.c
@@ -0,0 +1,540 @@
+#include "../maria_def.h"
+#include <stdio.h>
+#include <errno.h>
+
+#ifndef DBUG_OFF
+static const char *default_dbug_option;
+#endif
+
+#define PCACHE_SIZE (1024*1024*10)
+
+#define LONG_BUFFER_SIZE (100 * 1024)
+
+
+#define LOG_FLAGS TRANSLOG_SECTOR_PROTECTION | TRANSLOG_PAGE_CRC
+#define LOG_FILE_SIZE 1024L*1024L*3L
+#define ITERATIONS 1600
+
+/*
+#define LOG_FLAGS 0
+#define LOG_FILE_SIZE 1024L*1024L*1024L
+#define ITERATIONS 181000
+*/
+
+/*
+#define LOG_FLAGS 0
+#define LOG_FILE_SIZE 1024L*1024L*3L
+#define ITERATIONS 1600
+*/
+
+/*
+#define LOG_FLAGS 0
+#define LOG_FILE_SIZE 1024L*1024L*100L
+#define ITERATIONS 65000
+*/
+
+/*
+ Check that the buffer filled correctly
+
+ SYNOPSIS
+ check_content()
+ ptr Pointer to the buffer
+ length length of the buffer
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+static my_bool check_content(uchar *ptr, ulong length)
+{
+ ulong i;
+ uchar buff[2];
+ for (i= 0; i < length; i++)
+ {
+ if (i % 2 == 0)
+ int2store(buff, i >> 1);
+ if (ptr[i] != buff[i % 2])
+ {
+ fprintf(stderr, "Byte # %lu is %x instead of %x",
+ i, (uint) ptr[i], (uint) buff[i % 2]);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+
+/*
+ Read whole record content, and check content (put with offset)
+
+ SYNOPSIS
+ read_and_check_content()
+ rec The record header buffer
+ buffer The buffer to read the record in
+ skip Skip this number of bytes ot the record content
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+static my_bool read_and_check_content(TRANSLOG_HEADER_BUFFER *rec,
+ uchar *buffer, uint skip)
+{
+ DBUG_ASSERT(rec->record_length < LONG_BUFFER_SIZE * 2 + 7 * 2 + 2);
+ if (translog_read_record(&rec->lsn, 0, rec->record_length, buffer, NULL) !=
+ rec->record_length)
+ return 1;
+ return check_content(buffer + skip, rec->record_length - skip);
+}
+
+int main(int argc, char *argv[])
+{
+ uint32 i;
+ uint32 rec_len;
+ uint pagen;
+ uchar long_tr_id[6];
+ uchar lsn_buff[23]=
+ {
+ 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA,
+ 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA,
+ 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55
+ };
+ uchar long_buffer[LONG_BUFFER_SIZE * 2 + 7 * 2 + 2];
+ PAGECACHE pagecache;
+ LSN lsn, lsn_base, first_lsn, *lsn_ptr;
+ TRANSLOG_HEADER_BUFFER rec;
+ struct st_translog_scanner_data scanner;
+ int rc;
+
+ MY_INIT(argv[0]);
+
+ bzero(&pagecache, sizeof(pagecache));
+ maria_data_root= ".";
+
+ for (i= 0; i < (LONG_BUFFER_SIZE + 7 * 2 + 2); i+= 2)
+ {
+ int2store(long_buffer + i, (i >> 1));
+ /* long_buffer[i]= (i & 0xFF); */
+ }
+
+ bzero(long_tr_id, 6);
+#ifndef DBUG_OFF
+#if defined(__WIN__)
+ default_dbug_option= "d:t:i:O,\\ma_test_loghandler.trace";
+#else
+ default_dbug_option= "d:t:i:o,/tmp/ma_test_loghandler.trace";
+#endif
+ if (argc > 1)
+ {
+ DBUG_SET(default_dbug_option);
+ DBUG_SET_INITIAL(default_dbug_option);
+ }
+#endif
+
+ if (ma_control_file_create_or_open())
+ {
+ fprintf(stderr, "Can't init control file (%d)\n", errno);
+ exit(1);
+ }
+ if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0,
+ TRANSLOG_PAGE_SIZE)) == 0)
+ {
+ fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno);
+ exit(1);
+ }
+ if (translog_init(".", LOG_FILE_SIZE, 50112, 0, &pagecache, LOG_FLAGS))
+ {
+ fprintf(stderr, "Can't init loghandler (%d)\n", errno);
+ translog_destroy();
+ exit(1);
+ }
+
+ srandom(122334817L);
+
+ long_tr_id[5]= 0xff;
+
+ int4store(long_tr_id, 0);
+ if (translog_write_record(&lsn,
+ LOGREC_LONG_TRANSACTION_ID,
+ 0, NULL, 6, long_tr_id, 0))
+ {
+ fprintf(stderr, "Can't write record #%lu\n", (ulong) 0);
+ translog_destroy();
+ exit(1);
+ }
+ lsn_base= first_lsn= lsn;
+
+ for (i= 1; i < ITERATIONS; i++)
+ {
+ if (i % 1000 == 0)
+ printf("write %d\n", i);
+ if (i % 2)
+ {
+ lsn7store(lsn_buff, &lsn_base);
+ if (translog_write_record(&lsn,
+ LOGREC_CLR_END,
+ (i % 0xFFFF), NULL, 7, lsn_buff, 0))
+ {
+ fprintf(stderr, "1 Can't write reference defore record #%lu\n",
+ (ulong) i);
+ translog_destroy();
+ exit(1);
+ }
+ lsn7store(lsn_buff, &lsn_base);
+ if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 12)
+ rec_len= 12;
+ if (translog_write_record(&lsn,
+ LOGREC_UNDO_KEY_INSERT,
+ (i % 0xFFFF),
+ NULL, 7, lsn_buff, rec_len, long_buffer, 0))
+ {
+ fprintf(stderr, "1 Can't write var reference defore record #%lu\n",
+ (ulong) i);
+ translog_destroy();
+ exit(1);
+ }
+ }
+ else
+ {
+ lsn7store(lsn_buff, &lsn_base);
+ lsn7store(lsn_buff + 7, &first_lsn);
+ if (translog_write_record(&lsn,
+ LOGREC_UNDO_ROW_DELETE,
+ (i % 0xFFFF), NULL, 23, lsn_buff, 0))
+ {
+ fprintf(stderr, "0 Can't write reference defore record #%lu\n",
+ (ulong) i);
+ translog_destroy();
+ exit(1);
+ }
+ lsn7store(lsn_buff, &lsn_base);
+ lsn7store(lsn_buff + 7, &first_lsn);
+ if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 19)
+ rec_len= 19;
+ if (translog_write_record(&lsn,
+ LOGREC_UNDO_KEY_DELETE,
+ (i % 0xFFFF),
+ NULL, 14, lsn_buff, rec_len, long_buffer, 0))
+ {
+ fprintf(stderr, "0 Can't write var reference defore record #%lu\n",
+ (ulong) i);
+ translog_destroy();
+ exit(1);
+ }
+ }
+ int4store(long_tr_id, i);
+ if (translog_write_record(&lsn,
+ LOGREC_LONG_TRANSACTION_ID,
+ (i % 0xFFFF), NULL, 6, long_tr_id, 0))
+ {
+ fprintf(stderr, "Can't write record #%lu\n", (ulong) i);
+ translog_destroy();
+ exit(1);
+ }
+
+ lsn_base= lsn;
+
+ if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 9)
+ rec_len= 9;
+ if (translog_write_record(&lsn,
+ LOGREC_REDO_INSERT_ROW_HEAD,
+ (i % 0xFFFF), NULL, rec_len, long_buffer, 0))
+ {
+ fprintf(stderr, "Can't write variable record #%lu\n", (ulong) i);
+ translog_destroy();
+ exit(1);
+ }
+ if (translog_flush(&lsn))
+ {
+ fprintf(stderr, "Can't flush #%lu\n", (ulong) i);
+ translog_destroy();
+ exit(1);
+ }
+ }
+
+ translog_destroy();
+ end_pagecache(&pagecache, 1);
+ ma_control_file_end();
+
+
+ if (ma_control_file_create_or_open())
+ {
+ fprintf(stderr, "pass2: Can't init control file (%d)\n", errno);
+ exit(1);
+ }
+ if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0,
+ TRANSLOG_PAGE_SIZE)) == 0)
+ {
+ fprintf(stderr, "pass2: Got error: init_pagecache() (errno: %d)\n", errno);
+ exit(1);
+ }
+ if (translog_init(".", LOG_FILE_SIZE, 50112, 0, &pagecache, LOG_FLAGS))
+ {
+ fprintf(stderr, "pass2: Can't init loghandler (%d)\n", errno);
+ translog_destroy();
+ exit(1);
+ }
+ srandom(122334817L);
+
+
+ rc= 1;
+
+ {
+ translog_size_t len= translog_read_record_header(&first_lsn, &rec);
+ if (len == 0)
+ {
+ fprintf(stderr, "translog_read_record_header failed (%d)\n", errno);
+ goto err;
+ }
+ if (rec.type !=LOGREC_LONG_TRANSACTION_ID || rec.short_trid != 0 ||
+ rec.record_length != 6 || uint4korr(rec.header) != 0 ||
+ (uint)rec.header[4] != 0 || rec.header[5] != 0xFF ||
+ first_lsn.file_no != rec.lsn.file_no ||
+ first_lsn.rec_offset != rec.lsn.rec_offset)
+ {
+ fprintf(stderr, "Incorrect LOGREC_LONG_TRANSACTION_ID data read(0)\n"
+ "type %u, strid %u, len %u, i: %u, 4: %u 5: %u, "
+ "lsn(0x%lx,0x%lx)\n",
+ (uint) rec.type, (uint) rec.short_trid, (uint) rec.record_length,
+ uint4korr(rec.header), (uint) rec.header[4], (uint) rec.header[5],
+ (ulong) rec.lsn.file_no, (ulong) rec.lsn.rec_offset);
+ goto err;
+ }
+ lsn= first_lsn;
+ lsn_ptr= &first_lsn;
+ for (i= 1;; i++)
+ {
+ if (i % 1000 == 0)
+ printf("read %d\n", i);
+ len= translog_read_next_record_header(lsn_ptr, &rec, 1, &scanner);
+ if (len == 0)
+ {
+ fprintf(stderr, "1-%d translog_read_next_record_header failed (%d)\n",
+ i, errno);
+ goto err;
+ }
+ if (rec.lsn.file_no == CONTROL_FILE_IMPOSSIBLE_FILENO)
+ {
+ if (i != ITERATIONS)
+ {
+ fprintf(stderr, "EOL met at iteration %u instead of %u\n",
+ i, ITERATIONS);
+ goto err;
+ }
+ break;
+ }
+ lsn_ptr= NULL; /* use scanner after its
+ initialization */
+ if (i % 2)
+ {
+ LSN ref;
+ lsn7korr(&ref, rec.header);
+ if (rec.type !=LOGREC_CLR_END || rec.short_trid != (i % 0xFFFF) ||
+ rec.record_length != 7 || ref.file_no != lsn.file_no ||
+ ref.rec_offset != lsn.rec_offset)
+ {
+ fprintf(stderr, "Incorrect LOGREC_CLR_END data read(%d)"
+ "type %u, strid %u, len %u, ref(%u,0x%lx), lsn(%u,0x%lx)\n",
+ i, (uint) rec.type, (uint) rec.short_trid,
+ (uint) rec.record_length,
+ (uint) ref.file_no, (ulong) ref.rec_offset,
+ (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset);
+ goto err;
+ }
+ }
+ else
+ {
+ LSN ref1, ref2;
+ lsn7korr(&ref1, rec.header);
+ lsn7korr(&ref2, rec.header + 7);
+ if (rec.type !=LOGREC_UNDO_ROW_DELETE ||
+ rec.short_trid != (i % 0xFFFF) ||
+ rec.record_length != 23 ||
+ ref1.file_no != lsn.file_no ||
+ ref1.rec_offset != lsn.rec_offset ||
+ ref2.file_no != first_lsn.file_no ||
+ ref2.rec_offset != first_lsn.rec_offset ||
+ rec.header[22] != 0x55 || rec.header[21] != 0xAA ||
+ rec.header[20] != 0x55 || rec.header[19] != 0xAA ||
+ rec.header[18] != 0x55 || rec.header[17] != 0xAA ||
+ rec.header[16] != 0x55 || rec.header[15] != 0xAA ||
+ rec.header[14] != 0x55)
+ {
+ fprintf(stderr, "Incorrect LOGREC_UNDO_ROW_DELETE data read(%d)"
+ "type %u, strid %u, len %u, ref1(%u,0x%lx), "
+ "ref2(%u,0x%lx) %x%x%x%x%x%x%x%x%x "
+ "lsn(%u,0x%lx)\n",
+ i, (uint) rec.type, (uint) rec.short_trid,
+ (uint) rec.record_length,
+ (uint) ref1.file_no, (ulong) ref1.rec_offset,
+ (uint) ref2.file_no, (ulong) ref2.rec_offset,
+ (uint) rec.header[14], (uint) rec.header[15],
+ (uint) rec.header[16], (uint) rec.header[17],
+ (uint) rec.header[18], (uint) rec.header[19],
+ (uint) rec.header[20], (uint) rec.header[21],
+ (uint) rec.header[22],
+ (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset);
+ goto err;
+ }
+ }
+ len= translog_read_next_record_header(lsn_ptr, &rec, 1, &scanner);
+ if (len == 0)
+ {
+ fprintf(stderr, "1-%d translog_read_next_record_header (var) "
+ "failed (%d)\n", i, errno);
+ goto err;
+ }
+ if (rec.lsn.file_no == CONTROL_FILE_IMPOSSIBLE_FILENO)
+ {
+ fprintf(stderr, "EOL met at the middle of iteration (first var) %u "
+ "instead of beginning of %u\n", i, ITERATIONS);
+ goto err;
+ }
+ if (i % 2)
+ {
+ LSN ref;
+ lsn7korr(&ref, rec.header);
+ if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 12)
+ rec_len= 12;
+ if (rec.type !=LOGREC_UNDO_KEY_INSERT ||
+ rec.short_trid != (i % 0xFFFF) ||
+ rec.record_length != rec_len + 7 ||
+ len != 12 || ref.file_no != lsn.file_no ||
+ ref.rec_offset != lsn.rec_offset ||
+ check_content(rec.header + 7, len - 7))
+ {
+ fprintf(stderr, "Incorrect LOGREC_UNDO_KEY_INSERT data read(%d)"
+ "type %u (%d), strid %u (%d), len %lu, %lu + 7 (%d), "
+ "hdr len: %u (%d), "
+ "ref(%u,0x%lx), lsn(%u,0x%lx) (%d), content: %d\n",
+ i, (uint) rec.type,
+ rec.type !=LOGREC_UNDO_KEY_INSERT,
+ (uint) rec.short_trid,
+ rec.short_trid != (i % 0xFFFF),
+ (ulong) rec.record_length, (ulong) rec_len,
+ rec.record_length != rec_len + 7,
+ (uint) len,
+ len != 12,
+ (uint) ref.file_no, (ulong) ref.rec_offset,
+ (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset,
+ (len != 12 || ref.file_no != lsn.file_no ||
+ ref.rec_offset != lsn.rec_offset),
+ check_content(rec.header + 7, len - 7));
+ goto err;
+ }
+ if (read_and_check_content(&rec, long_buffer, 7))
+ {
+ fprintf(stderr,
+ "Incorrect LOGREC_UNDO_KEY_INSERT in whole rec read "
+ "lsn(%u,0x%lx)\n",
+ (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset);
+ goto err;
+ }
+ }
+ else
+ {
+ LSN ref1, ref2;
+ lsn7korr(&ref1, rec.header);
+ lsn7korr(&ref2, rec.header + 7);
+ if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 19)
+ rec_len= 19;
+ if (rec.type !=LOGREC_UNDO_KEY_DELETE ||
+ rec.short_trid != (i % 0xFFFF) ||
+ rec.record_length != rec_len + 14 ||
+ len != 19 ||
+ ref1.file_no != lsn.file_no ||
+ ref1.rec_offset != lsn.rec_offset ||
+ ref2.file_no != first_lsn.file_no ||
+ ref2.rec_offset != first_lsn.rec_offset ||
+ check_content(rec.header + 14, len - 14))
+ {
+ fprintf(stderr, "Incorrect LOGREC_UNDO_KEY_DELETE data read(%d)"
+ "type %u, strid %u, len %lu != %lu + 7, hdr len: %u, "
+ "ref1(%u,0x%lx), ref2(%u,0x%lx), "
+ "lsn(%u,0x%lx)\n",
+ i, (uint) rec.type, (uint) rec.short_trid,
+ (ulong) rec.record_length, (ulong) rec_len,
+ (uint) len,
+ (uint) ref1.file_no, (ulong) ref1.rec_offset,
+ (uint) ref2.file_no, (ulong) ref2.rec_offset,
+ (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset);
+ goto err;
+ }
+ if (read_and_check_content(&rec, long_buffer, 14))
+ {
+ fprintf(stderr,
+ "Incorrect LOGREC_UNDO_KEY_DELETE in whole rec read "
+ "lsn(%u,0x%lx)\n",
+ (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset);
+ goto err;
+ }
+ }
+
+ len= translog_read_next_record_header(lsn_ptr, &rec, 1, &scanner);
+ if (len == 0)
+ {
+ fprintf(stderr, "1-%d translog_read_next_record_header failed (%d)\n",
+ i, errno);
+ goto err;
+ }
+ if (rec.lsn.file_no == CONTROL_FILE_IMPOSSIBLE_FILENO)
+ {
+ fprintf(stderr, "EOL met at the middle of iteration %u "
+ "instead of beginning of %u\n", i, ITERATIONS);
+ goto err;
+ }
+ if (rec.type !=LOGREC_LONG_TRANSACTION_ID ||
+ rec.short_trid != (i % 0xFFFF) ||
+ rec.record_length != 6 || uint4korr(rec.header) != i ||
+ rec.header[4] != 0 || rec.header[5] != 0xFF)
+ {
+ fprintf(stderr, "Incorrect LOGREC_LONG_TRANSACTION_ID data read(%d)\n"
+ "type %u, strid %u, len %u, i: %u, 4: %u 5: %u "
+ "lsn(%u,0x%lx)\n",
+ i, (uint) rec.type, (uint) rec.short_trid,
+ (uint) rec.record_length,
+ uint4korr(rec.header), (uint) rec.header[4],
+ (uint) rec.header[5],
+ (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset);
+ goto err;
+ }
+
+ lsn= rec.lsn;
+
+ len= translog_read_next_record_header(lsn_ptr, &rec, 1, &scanner);
+ if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 9)
+ rec_len= 9;
+ if (rec.type !=LOGREC_REDO_INSERT_ROW_HEAD ||
+ rec.short_trid != (i % 0xFFFF) ||
+ rec.record_length != rec_len ||
+ len != 9 || check_content(rec.header, len))
+ {
+ fprintf(stderr, "Incorrect LOGREC_REDO_INSERT_ROW_HEAD data read(%d)"
+ "type %u, strid %u, len %lu != %lu, hdr len: %u, "
+ "lsn(%u,0x%lx)\n",
+ i, (uint) rec.type, (uint) rec.short_trid,
+ (ulong) rec.record_length, (ulong) rec_len,
+ (uint) len, (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset);
+ goto err;
+ }
+ if (read_and_check_content(&rec, long_buffer, 0))
+ {
+ fprintf(stderr,
+ "Incorrect LOGREC_UNDO_KEY_DELETE in whole rec read "
+ "lsn(%u,0x%lx)\n",
+ (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset);
+ goto err;
+ }
+ }
+ }
+
+ rc= 1;
+err:
+ translog_destroy();
+ end_pagecache(&pagecache, 1);
+ ma_control_file_end();
+
+ return(test(exit_status() || rc));
+}
diff --git a/storage/maria/unittest/ma_test_loghandler_multigroup-t.c b/storage/maria/unittest/ma_test_loghandler_multigroup-t.c
new file mode 100644
index 00000000000..abb12faa015
--- /dev/null
+++ b/storage/maria/unittest/ma_test_loghandler_multigroup-t.c
@@ -0,0 +1,570 @@
+#include "../maria_def.h"
+#include <stdio.h>
+#include <errno.h>
+
+#ifndef DBUG_OFF
+static const char *default_dbug_option;
+#endif
+
+#define PCACHE_SIZE (1024*1024*10)
+
+#define LONG_BUFFER_SIZE ((1024L*1024L*1024L) + (1024L*1024L*512))
+
+#define MIN_REC_LENGTH (1024L*1024L + 1024L*512L + 1)
+
+#define SHOW_DIVIDER 2
+
+#define LOG_FILE_SIZE (1024L*1024L*1024L + 1024L*1024L*512)
+#define ITERATIONS 2
+/*#define ITERATIONS 63 */
+
+/*
+#define LOG_FILE_SIZE 1024L*1024L*3L
+#define ITERATIONS 1600
+*/
+/*
+#define LOG_FILE_SIZE 1024L*1024L*100L
+#define ITERATIONS 65000
+*/
+
+
+/*
+ Check that the buffer filled correctly
+
+ SYNOPSIS
+ check_content()
+ ptr Pointer to the buffer
+ length length of the buffer
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+static my_bool check_content(uchar *ptr, ulong length)
+{
+ ulong i;
+ uchar buff[4];
+ DBUG_ENTER("check_content");
+ for (i= 0; i < length; i++)
+ {
+ if (i % 4 == 0)
+ int4store(buff, (i >> 2));
+ if (ptr[i] != buff[i % 4])
+ {
+ fprintf(stderr, "Byte # %lu is %x instead of %x",
+ i, (uint) ptr[i], (uint) buff[i % 4]);
+ DBUG_DUMP("mem", ptr +(ulong) (i > 16 ? i - 16 : 0),
+ (i > 16 ? 16 : i) + (i + 16 < length ? 16 : length - i));
+ DBUG_RETURN(1);
+ }
+ }
+ DBUG_RETURN(0);
+}
+
+
+/*
+ Read whole record content, and check content (put with offset)
+
+ SYNOPSIS
+ read_and_check_content()
+ rec The record header buffer
+ buffer The buffer to read the record in
+ skip Skip this number of bytes ot the record content
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+static my_bool read_and_check_content(TRANSLOG_HEADER_BUFFER *rec,
+ uchar *buffer, uint skip)
+{
+ int res= 0;
+ translog_size_t len;
+ DBUG_ENTER("read_and_check_content");
+ DBUG_ASSERT(rec->record_length < LONG_BUFFER_SIZE + 7 * 2 + 2);
+ if ((len= translog_read_record(&rec->lsn, 0, rec->record_length,
+ buffer, NULL)) != rec->record_length)
+ {
+ fprintf(stderr, "Requested %lu byte, read %lu\n",
+ (ulong) rec->record_length, (ulong) len);
+ res= 1;
+ }
+ res|= check_content(buffer + skip, rec->record_length - skip);
+ DBUG_RETURN(res);
+}
+
+
+static uint32 get_len()
+{
+ uint32 rec_len;
+ do
+ {
+ rec_len= random() /
+ (RAND_MAX / (LONG_BUFFER_SIZE - MIN_REC_LENGTH - 1)) + MIN_REC_LENGTH;
+ } while (rec_len >= LONG_BUFFER_SIZE);
+ return rec_len;
+}
+
+int main(int argc, char *argv[])
+{
+ uint32 i;
+ uint32 rec_len;
+ uint pagen;
+ uchar long_tr_id[6];
+ uchar lsn_buff[23]=
+ {
+ 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA,
+ 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA,
+ 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55
+ };
+ uchar *long_buffer= malloc(LONG_BUFFER_SIZE + 7 * 2 + 2);
+ PAGECACHE pagecache;
+ LSN lsn, lsn_base, first_lsn, *lsn_ptr;
+ TRANSLOG_HEADER_BUFFER rec;
+ struct st_translog_scanner_data scanner;
+ int rc;
+
+ MY_INIT(argv[0]);
+
+ bzero(&pagecache, sizeof(pagecache));
+ maria_data_root= ".";
+
+ {
+ uchar buff[4];
+ for (i= 0; i < (LONG_BUFFER_SIZE + 7 * 2 + 2); i++)
+ {
+ if (i % 4 == 0)
+ int4store(buff, (i >> 2));
+ long_buffer[i]= buff[i % 4];
+ }
+ }
+
+ bzero(long_tr_id, 6);
+#ifndef DBUG_OFF
+#if defined(__WIN__)
+ default_dbug_option= "d:t:i:O,\\ma_test_loghandler.trace";
+#else
+ default_dbug_option= "d:t:i:o,/tmp/ma_test_loghandler.trace";
+#endif
+ if (argc > 1)
+ {
+ DBUG_SET(default_dbug_option);
+ DBUG_SET_INITIAL(default_dbug_option);
+ }
+#endif
+
+ if (ma_control_file_create_or_open())
+ {
+ fprintf(stderr, "Can't init control file (%d)\n", errno);
+ exit(1);
+ }
+ if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0,
+ TRANSLOG_PAGE_SIZE)) == 0)
+ {
+ fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno);
+ exit(1);
+ }
+ if (translog_init(".", LOG_FILE_SIZE, 50112, 0, &pagecache, 0))
+ {
+ fprintf(stderr, "Can't init loghandler (%d)\n", errno);
+ translog_destroy();
+ exit(1);
+ }
+
+ srandom(122334817L);
+
+ long_tr_id[5]= 0xff;
+
+ int4store(long_tr_id, 0);
+ if (translog_write_record(&lsn,
+ LOGREC_LONG_TRANSACTION_ID,
+ 0, NULL, 6, long_tr_id, 0))
+ {
+ fprintf(stderr, "Can't write record #%lu\n", (ulong) 0);
+ translog_destroy();
+ exit(1);
+ }
+ lsn_base= first_lsn= lsn;
+
+ for (i= 1; i < ITERATIONS; i++)
+ {
+ if (i % SHOW_DIVIDER == 0)
+ printf("write %d\n", i);
+ if (i % 2)
+ {
+ lsn7store(lsn_buff, &lsn_base);
+ if (translog_write_record(&lsn,
+ LOGREC_CLR_END,
+ (i % 0xFFFF), NULL, 7, lsn_buff, 0))
+ {
+ fprintf(stderr, "1 Can't write reference before record #%lu\n",
+ (ulong) i);
+ translog_destroy();
+ exit(1);
+ }
+ lsn7store(lsn_buff, &lsn_base);
+ rec_len= get_len();
+ if (translog_write_record(&lsn,
+ LOGREC_UNDO_KEY_INSERT,
+ (i % 0xFFFF),
+ NULL, 7, lsn_buff, rec_len, long_buffer, 0))
+ {
+ fprintf(stderr, "1 Can't write var reference before record #%lu\n",
+ (ulong) i);
+ translog_destroy();
+ exit(1);
+ }
+ }
+ else
+ {
+ lsn7store(lsn_buff, &lsn_base);
+ lsn7store(lsn_buff + 7, &first_lsn);
+ if (translog_write_record(&lsn,
+ LOGREC_UNDO_ROW_DELETE,
+ (i % 0xFFFF), NULL, 23, lsn_buff, 0))
+ {
+ fprintf(stderr, "0 Can't write reference before record #%lu\n",
+ (ulong) i);
+ translog_destroy();
+ exit(1);
+ }
+ lsn7store(lsn_buff, &lsn_base);
+ lsn7store(lsn_buff + 7, &first_lsn);
+ rec_len= get_len();
+ if (translog_write_record(&lsn,
+ LOGREC_UNDO_KEY_DELETE,
+ (i % 0xFFFF),
+ NULL, 14, lsn_buff, rec_len, long_buffer, 0))
+ {
+ fprintf(stderr, "0 Can't write var reference before record #%lu\n",
+ (ulong) i);
+ translog_destroy();
+ exit(1);
+ }
+ }
+ int4store(long_tr_id, i);
+ if (translog_write_record(&lsn,
+ LOGREC_LONG_TRANSACTION_ID,
+ (i % 0xFFFF), NULL, 6, long_tr_id, 0))
+ {
+ fprintf(stderr, "Can't write record #%lu\n", (ulong) i);
+ translog_destroy();
+ exit(1);
+ }
+
+ lsn_base= lsn;
+
+ rec_len= get_len();
+ if (translog_write_record(&lsn,
+ LOGREC_REDO_INSERT_ROW_HEAD,
+ (i % 0xFFFF), NULL, rec_len, long_buffer, 0))
+ {
+ fprintf(stderr, "Can't write variable record #%lu\n", (ulong) i);
+ translog_destroy();
+ exit(1);
+ }
+ }
+
+ translog_destroy();
+ end_pagecache(&pagecache, 1);
+ ma_control_file_end();
+
+ if (ma_control_file_create_or_open())
+ {
+ fprintf(stderr, "pass2: Can't init control file (%d)\n", errno);
+ exit(1);
+ }
+ if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0,
+ TRANSLOG_PAGE_SIZE)) == 0)
+ {
+ fprintf(stderr, "pass2: Got error: init_pagecache() (errno: %d)\n", errno);
+ exit(1);
+ }
+ if (translog_init(".", LOG_FILE_SIZE, 50112, 0, &pagecache, 0))
+ {
+ fprintf(stderr, "pass2: Can't init loghandler (%d)\n", errno);
+ translog_destroy();
+ exit(1);
+ }
+
+ srandom(122334817L);
+
+ rc= 1;
+
+ {
+ translog_size_t len= translog_read_record_header(&first_lsn, &rec);
+ if (len == 0)
+ {
+ fprintf(stderr, "translog_read_record_header failed (%d)\n", errno);
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ if (rec.type !=LOGREC_LONG_TRANSACTION_ID || rec.short_trid != 0 ||
+ rec.record_length != 6 || uint4korr(rec.header) != 0 ||
+ (uint)rec.header[4] != 0 || rec.header[5] != 0xFF ||
+ first_lsn.file_no != rec.lsn.file_no ||
+ first_lsn.rec_offset != rec.lsn.rec_offset)
+ {
+ fprintf(stderr, "Incorrect LOGREC_LONG_TRANSACTION_ID data read(0)\n"
+ "type %u, strid %u, len %u, i: %u, 4: %u 5: %u, "
+ "lsn(0x%lx,0x%lx)\n",
+ (uint) rec.type, (uint) rec.short_trid, (uint) rec.record_length,
+ uint4korr(rec.header), (uint) rec.header[4], (uint) rec.header[5],
+ (ulong) rec.lsn.file_no, (ulong) rec.lsn.rec_offset);
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ translog_free_record_header(&rec);
+ lsn= first_lsn;
+ lsn_ptr= &first_lsn;
+ for (i= 1;; i++)
+ {
+ if (i % SHOW_DIVIDER == 0)
+ printf("read %d\n", i);
+ len= translog_read_next_record_header(lsn_ptr, &rec, 1, &scanner);
+ if (len == 0)
+ {
+ fprintf(stderr, "1-%d translog_read_next_record_header failed (%d)\n",
+ i, errno);
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ if (rec.lsn.file_no == CONTROL_FILE_IMPOSSIBLE_FILENO)
+ {
+ if (i != ITERATIONS)
+ {
+ fprintf(stderr, "EOL met at iteration %u instead of %u\n",
+ i, ITERATIONS);
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ break;
+ }
+ lsn_ptr= NULL; /* use scanner after its
+ initialization */
+
+ if (i % 2)
+ {
+ LSN ref;
+ lsn7korr(&ref, rec.header);
+ if (rec.type !=LOGREC_CLR_END || rec.short_trid != (i % 0xFFFF) ||
+ rec.record_length != 7 || ref.file_no != lsn.file_no ||
+ ref.rec_offset != lsn.rec_offset)
+ {
+ fprintf(stderr, "Incorrect LOGREC_CLR_END data read(%d)"
+ "type %u, strid %u, len %u, ref(%u,0x%lx), lsn(%u,0x%lx)\n",
+ i, (uint) rec.type, (uint) rec.short_trid,
+ (uint) rec.record_length,
+ (uint) ref.file_no, (ulong) ref.rec_offset,
+ (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset);
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ }
+ else
+ {
+ LSN ref1, ref2;
+ lsn7korr(&ref1, rec.header);
+ lsn7korr(&ref2, rec.header + 7);
+ if (rec.type !=LOGREC_UNDO_ROW_DELETE ||
+ rec.short_trid != (i % 0xFFFF) ||
+ rec.record_length != 23 ||
+ ref1.file_no != lsn.file_no ||
+ ref1.rec_offset != lsn.rec_offset ||
+ ref2.file_no != first_lsn.file_no ||
+ ref2.rec_offset != first_lsn.rec_offset ||
+ rec.header[22] != 0x55 || rec.header[21] != 0xAA ||
+ rec.header[20] != 0x55 || rec.header[19] != 0xAA ||
+ rec.header[18] != 0x55 || rec.header[17] != 0xAA ||
+ rec.header[16] != 0x55 || rec.header[15] != 0xAA ||
+ rec.header[14] != 0x55)
+ {
+ fprintf(stderr, "Incorrect LOGREC_UNDO_ROW_DELETE data read(%d)"
+ "type %u, strid %u, len %u, ref1(%u,0x%lx), "
+ "ref2(%u,0x%lx) %x%x%x%x%x%x%x%x%x "
+ "lsn(%u,0x%lx)\n",
+ i, (uint) rec.type, (uint) rec.short_trid,
+ (uint) rec.record_length,
+ (uint) ref1.file_no, (ulong) ref1.rec_offset,
+ (uint) ref2.file_no, (ulong) ref2.rec_offset,
+ (uint) rec.header[14], (uint) rec.header[15],
+ (uint) rec.header[16], (uint) rec.header[17],
+ (uint) rec.header[18], (uint) rec.header[19],
+ (uint) rec.header[20], (uint) rec.header[21],
+ (uint) rec.header[22],
+ (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset);
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ }
+ translog_free_record_header(&rec);
+
+ len= translog_read_next_record_header(lsn_ptr, &rec, 1, &scanner);
+ if (len == 0)
+ {
+ fprintf(stderr, "1-%d translog_read_next_record_header (var) "
+ "failed (%d)\n", i, errno);
+ goto err;
+ }
+ if (rec.lsn.file_no == CONTROL_FILE_IMPOSSIBLE_FILENO)
+ {
+ fprintf(stderr, "EOL met at the middle of iteration (first var) %u "
+ "instead of beginning of %u\n", i, ITERATIONS);
+ goto err;
+ }
+ if (i % 2)
+ {
+ LSN ref;
+ lsn7korr(&ref, rec.header);
+ rec_len= get_len();
+ if (rec.type !=LOGREC_UNDO_KEY_INSERT ||
+ rec.short_trid != (i % 0xFFFF) ||
+ rec.record_length != rec_len + 7 ||
+ len != 12 || ref.file_no != lsn.file_no ||
+ ref.rec_offset != lsn.rec_offset ||
+ check_content(rec.header + 7, len - 7))
+ {
+ fprintf(stderr, "Incorrect LOGREC_UNDO_KEY_INSERT data read(%d)"
+ "type %u (%d), strid %u (%d), len %lu, %lu + 7 (%d), "
+ "hdr len: %u (%d), "
+ "ref(%u,0x%lx), lsn(%u,0x%lx) (%d), content: %d\n",
+ i, (uint) rec.type,
+ rec.type !=LOGREC_UNDO_KEY_INSERT,
+ (uint) rec.short_trid,
+ rec.short_trid != (i % 0xFFFF),
+ (ulong) rec.record_length, (ulong) rec_len,
+ rec.record_length != rec_len + 7,
+ (uint) len,
+ len != 12,
+ (uint) ref.file_no, (ulong) ref.rec_offset,
+ (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset,
+ (ref.file_no != lsn.file_no ||
+ ref.rec_offset != lsn.rec_offset),
+ check_content(rec.header + 7, len - 7));
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ if (read_and_check_content(&rec, long_buffer, 7))
+ {
+ fprintf(stderr,
+ "Incorrect LOGREC_UNDO_KEY_INSERT in whole rec read "
+ "lsn(%u,0x%lx)\n",
+ (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset);
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ }
+ else
+ {
+ LSN ref1, ref2;
+ lsn7korr(&ref1, rec.header);
+ lsn7korr(&ref2, rec.header + 7);
+ rec_len= get_len();
+ if (rec.type !=LOGREC_UNDO_KEY_DELETE ||
+ rec.short_trid != (i % 0xFFFF) ||
+ rec.record_length != rec_len + 14 ||
+ len != 19 ||
+ ref1.file_no != lsn.file_no ||
+ ref1.rec_offset != lsn.rec_offset ||
+ ref2.file_no != first_lsn.file_no ||
+ ref2.rec_offset != first_lsn.rec_offset ||
+ check_content(rec.header + 14, len - 14))
+ {
+ fprintf(stderr, "Incorrect LOGREC_UNDO_KEY_DELETE data read(%d)"
+ "type %u, strid %u, len %lu != %lu + 7, hdr len: %u, "
+ "ref1(%u,0x%lx), ref2(%u,0x%lx), "
+ "lsn(%u,0x%lx)\n",
+ i, (uint) rec.type, (uint) rec.short_trid,
+ (ulong) rec.record_length, (ulong) rec_len,
+ (uint) len,
+ (uint) ref1.file_no, (ulong) ref1.rec_offset,
+ (uint) ref2.file_no, (ulong) ref2.rec_offset,
+ (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset);
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ if (read_and_check_content(&rec, long_buffer, 14))
+ {
+ fprintf(stderr,
+ "Incorrect LOGREC_UNDO_KEY_DELETE in whole rec read "
+ "lsn(%u,0x%lx)\n",
+ (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset);
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ }
+ translog_free_record_header(&rec);
+
+ len= translog_read_next_record_header(lsn_ptr, &rec, 1, &scanner);
+ if (len == 0)
+ {
+ fprintf(stderr, "1-%d translog_read_next_record_header failed (%d)\n",
+ i, errno);
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ if (rec.lsn.file_no == CONTROL_FILE_IMPOSSIBLE_FILENO)
+ {
+ fprintf(stderr, "EOL met at the middle of iteration %u "
+ "instead of beginning of %u\n", i, ITERATIONS);
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ if (rec.type !=LOGREC_LONG_TRANSACTION_ID ||
+ rec.short_trid != (i % 0xFFFF) ||
+ rec.record_length != 6 || uint4korr(rec.header) != i ||
+ rec.header[4] != 0 || rec.header[5] != 0xFF)
+ {
+ fprintf(stderr, "Incorrect LOGREC_LONG_TRANSACTION_ID data read(%d)\n"
+ "type %u, strid %u, len %u, i: %u, 4: %u 5: %u "
+ "lsn(%u,0x%lx)\n",
+ i, (uint) rec.type, (uint) rec.short_trid,
+ (uint) rec.record_length,
+ uint4korr(rec.header), (uint) rec.header[4],
+ (uint) rec.header[5],
+ (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset);
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ translog_free_record_header(&rec);
+
+ lsn= rec.lsn;
+
+ len= translog_read_next_record_header(lsn_ptr, &rec, 1, &scanner);
+ rec_len= get_len();
+ if (rec.type !=LOGREC_REDO_INSERT_ROW_HEAD ||
+ rec.short_trid != (i % 0xFFFF) ||
+ rec.record_length != rec_len ||
+ len != 9 || check_content(rec.header, len))
+ {
+ fprintf(stderr, "Incorrect LOGREC_REDO_INSERT_ROW_HEAD data read(%d)"
+ "type %u, strid %u, len %lu != %lu, hdr len: %u, "
+ "lsn(%u,0x%lx)\n",
+ i, (uint) rec.type, (uint) rec.short_trid,
+ (ulong) rec.record_length, (ulong) rec_len,
+ (uint) len, (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset);
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ if (read_and_check_content(&rec, long_buffer, 0))
+ {
+ fprintf(stderr,
+ "Incorrect LOGREC_UNDO_KEY_DELETE in whole rec read "
+ "lsn(%u,0x%lx)\n",
+ (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset);
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ }
+ }
+
+ rc= 0;
+err:
+ translog_destroy();
+ end_pagecache(&pagecache, 1);
+ ma_control_file_end();
+
+ return (test(exit_status() || rc));
+}
diff --git a/storage/maria/unittest/ma_test_loghandler_multithread-t.c b/storage/maria/unittest/ma_test_loghandler_multithread-t.c
new file mode 100644
index 00000000000..794dc6dd255
--- /dev/null
+++ b/storage/maria/unittest/ma_test_loghandler_multithread-t.c
@@ -0,0 +1,468 @@
+#include "../maria_def.h"
+#include <stdio.h>
+#include <errno.h>
+
+#ifndef DBUG_OFF
+static const char *default_dbug_option;
+#endif
+
+#define PCACHE_SIZE (1024*1024*10)
+
+/*#define LOG_FLAGS TRANSLOG_SECTOR_PROTECTION | TRANSLOG_PAGE_CRC */
+#define LOG_FLAGS 0
+/*#define LONG_BUFFER_SIZE (1024L*1024L*1024L + 1024L*1024L*512)*/
+#define LONG_BUFFER_SIZE (1024L*1024L*1024L)
+#define MIN_REC_LENGTH 30
+#define SHOW_DIVIDER 10
+#define LOG_FILE_SIZE (1024L*1024L*1024L + 1024L*1024L*512)
+#define ITERATIONS 3
+#define WRITERS 3
+static uint number_of_writers= WRITERS;
+
+static pthread_cond_t COND_thread_count;
+static pthread_mutex_t LOCK_thread_count;
+static uint thread_count;
+
+static ulong lens[WRITERS][ITERATIONS];
+static LSN lsns1[WRITERS][ITERATIONS];
+static LSN lsns2[WRITERS][ITERATIONS];
+static uchar *long_buffer;
+
+/*
+ Get pseudo-random length of the field in
+ limits [MIN_REC_LENGTH..LONG_BUFFER_SIZE]
+
+ SYNOPSYS
+ get_len()
+
+ RETURN
+ length - length >= 0 length <= LONG_BUFFER_SIZE
+*/
+
+static uint32 get_len()
+{
+ uint32 rec_len;
+ do
+ {
+ rec_len= random() /
+ (RAND_MAX / (LONG_BUFFER_SIZE - MIN_REC_LENGTH - 1)) + MIN_REC_LENGTH;
+ } while (rec_len >= LONG_BUFFER_SIZE);
+ return rec_len;
+}
+
+
+/*
+ Check that the buffer filled correctly
+
+ SYNOPSIS
+ check_content()
+ ptr Pointer to the buffer
+ length length of the buffer
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+static my_bool check_content(uchar *ptr, ulong length)
+{
+ ulong i;
+ for (i= 0; i < length; i++)
+ {
+ if (ptr[i] != (i & 0xFF))
+ {
+ fprintf(stderr, "Byte # %lu is %x instead of %x",
+ i, (uint) ptr[i], (uint) (i & 0xFF));
+ return 1;
+ }
+ }
+ return 0;
+}
+
+
+/*
+ Read whole record content, and check content (put with offset)
+
+ SYNOPSIS
+ read_and_check_content()
+ rec The record header buffer
+ buffer The buffer to read the record in
+ skip Skip this number of bytes ot the record content
+
+ RETURN
+ 0 - OK
+ 1 - Error
+*/
+
+
+static my_bool read_and_check_content(TRANSLOG_HEADER_BUFFER *rec,
+ uchar *buffer, uint skip)
+{
+ int res= 0;
+ translog_size_t len;
+ DBUG_ENTER("read_and_check_content");
+ DBUG_ASSERT(rec->record_length < LONG_BUFFER_SIZE + 7 * 2 + 2);
+ if ((len= translog_read_record(&rec->lsn, 0, rec->record_length,
+ buffer, NULL)) != rec->record_length)
+ {
+ fprintf(stderr, "Requested %lu byte, read %lu\n",
+ (ulong) rec->record_length, (ulong) len);
+ res= 1;
+ }
+ res|= check_content(buffer + skip, rec->record_length - skip);
+ DBUG_RETURN(res);
+}
+
+void writer(int num)
+{
+ LSN lsn;
+ uchar long_tr_id[6];
+ uint i;
+ DBUG_ENTER("writer");
+
+ for (i= 0; i < ITERATIONS; i++)
+ {
+ uint len= get_len();
+ lens[num][i]= len;
+
+ int2store(long_tr_id, num);
+ int4store(long_tr_id + 2, i);
+ if (translog_write_record(&lsn,
+ LOGREC_LONG_TRANSACTION_ID,
+ num, NULL, 6, long_tr_id, 0))
+ {
+ fprintf(stderr, "Can't write LOGREC_LONG_TRANSACTION_ID record #%lu "
+ "thread %i\n", (ulong) i, num);
+ translog_destroy();
+ return;
+ }
+ lsns1[num][i]= lsn;
+ if (translog_write_record(&lsn,
+ LOGREC_REDO_INSERT_ROW_HEAD,
+ num, NULL, len, long_buffer, 0))
+ {
+ fprintf(stderr, "Can't write variable record #%lu\n", (ulong) i);
+ translog_destroy();
+ return;
+ }
+ lsns2[num][i]= lsn;
+ DBUG_PRINT("info", ("thread: %u, iteration: %u, len: %lu, "
+ "lsn1 (%lu,0x%lx) lsn2 (%lu,0x%lx)",
+ num, i, (ulong) lens[num][i],
+ (ulong) lsns1[num][i].file_no,
+ (ulong) lsns1[num][i].rec_offset,
+ (ulong) lsns2[num][i].file_no,
+ (ulong) lsns2[num][i].rec_offset));
+ printf("thread: %u, iteration: %u, len: %lu, "
+ "lsn1 (%lu,0x%lx) lsn2 (%lu,0x%lx)\n",
+ num, i, (ulong) lens[num][i],
+ (ulong) lsns1[num][i].file_no,
+ (ulong) lsns1[num][i].rec_offset,
+ (ulong) lsns2[num][i].file_no, (ulong) lsns2[num][i].rec_offset);
+ }
+ DBUG_VOID_RETURN;
+}
+
+
+static void *test_thread_writer(void *arg)
+{
+ int param= *((int*) arg);
+
+ my_thread_init();
+ DBUG_ENTER("test_writer");
+ DBUG_PRINT("enter", ("param: %d", param));
+
+ writer(param);
+
+ DBUG_PRINT("info", ("Thread %s ended\n", my_thread_name()));
+ pthread_mutex_lock(&LOCK_thread_count);
+ thread_count--;
+ VOID(pthread_cond_signal(&COND_thread_count)); /* Tell main we are
+ ready */
+ pthread_mutex_unlock(&LOCK_thread_count);
+ free((gptr) arg);
+ my_thread_end();
+ DBUG_RETURN(0);
+}
+
+
+int main(int argc, char **argv __attribute__ ((unused)))
+{
+ uint32 i;
+ uint pagen;
+ PAGECACHE pagecache;
+ LSN first_lsn, *lsn_ptr;
+ TRANSLOG_HEADER_BUFFER rec;
+ struct st_translog_scanner_data scanner;
+ pthread_t tid;
+ pthread_attr_t thr_attr;
+ int *param, error;
+ int rc;
+
+ bzero(&pagecache, sizeof(pagecache));
+ maria_data_root= ".";
+ long_buffer= malloc(LONG_BUFFER_SIZE + 7 * 2 + 2);
+ if (long_buffer == 0)
+ {
+ fprintf(stderr, "End of memory\n");
+ exit(1);
+ }
+ for (i= 0; i < (LONG_BUFFER_SIZE + 7 * 2 + 2); i++)
+ long_buffer[i]= (i & 0xFF);
+
+
+ MY_INIT(argv[0]);
+
+#ifndef DBUG_OFF
+#if defined(__WIN__)
+ default_dbug_option= "d:t:i:O,\\ma_test_loghandler.trace";
+#else
+ default_dbug_option= "d:t:i:o,/tmp/ma_test_loghandler.trace";
+#endif
+ if (argc > 1)
+ {
+ DBUG_SET(default_dbug_option);
+ DBUG_SET_INITIAL(default_dbug_option);
+ }
+#endif
+
+ DBUG_ENTER("main");
+ DBUG_PRINT("info", ("Main thread: %s\n", my_thread_name()));
+
+ if ((error= pthread_cond_init(&COND_thread_count, NULL)))
+ {
+ fprintf(stderr, "COND_thread_count: %d from pthread_cond_init "
+ "(errno: %d)\n", error, errno);
+ exit(1);
+ }
+ if ((error= pthread_mutex_init(&LOCK_thread_count, MY_MUTEX_INIT_FAST)))
+ {
+ fprintf(stderr, "LOCK_thread_count: %d from pthread_cond_init "
+ "(errno: %d)\n", error, errno);
+ exit(1);
+ }
+ if ((error= pthread_attr_init(&thr_attr)))
+ {
+ fprintf(stderr, "Got error: %d from pthread_attr_init "
+ "(errno: %d)\n", error, errno);
+ exit(1);
+ }
+ if ((error= pthread_attr_setdetachstate(&thr_attr, PTHREAD_CREATE_DETACHED)))
+ {
+ fprintf(stderr,
+ "Got error: %d from pthread_attr_setdetachstate (errno: %d)\n",
+ error, errno);
+ exit(1);
+ }
+#ifndef pthread_attr_setstacksize /* void return value */
+ if ((error= pthread_attr_setstacksize(&thr_attr, 65536L)))
+ {
+ fprintf(stderr, "Got error: %d from pthread_attr_setstacksize "
+ "(errno: %d)\n", error, errno);
+ exit(1);
+ }
+#endif
+#ifdef HAVE_THR_SETCONCURRENCY
+ VOID(thr_setconcurrency(2));
+#endif
+
+ my_thread_global_init();
+
+ if (ma_control_file_create_or_open())
+ {
+ fprintf(stderr, "Can't init control file (%d)\n", errno);
+ exit(1);
+ }
+ if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0,
+ TRANSLOG_PAGE_SIZE)) == 0)
+ {
+ fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno);
+ exit(1);
+ }
+ if (translog_init(".", LOG_FILE_SIZE, 50112, 0, &pagecache, LOG_FLAGS))
+ {
+ fprintf(stderr, "Can't init loghandler (%d)\n", errno);
+ translog_destroy();
+ exit(1);
+ }
+
+ srandom(122334817L);
+ {
+ uchar long_tr_id[6]=
+ {
+ 0x11, 0x22, 0x33, 0x44, 0x55, 0x66
+ };
+
+ if (translog_write_record(&first_lsn,
+ LOGREC_LONG_TRANSACTION_ID,
+ 0, NULL, 6, long_tr_id, 0))
+ {
+ fprintf(stderr, "Can't write the first record\n");
+ translog_destroy();
+ exit(1);
+ }
+ }
+
+
+ if ((error= pthread_mutex_lock(&LOCK_thread_count)))
+ {
+ fprintf(stderr, "LOCK_thread_count: %d from pthread_mutex_lock "
+ "(errno: %d)\n", error, errno);
+ exit(1);
+ }
+
+ while (number_of_writers != 0)
+ {
+ param= (int*) malloc(sizeof(int));
+ *param= number_of_writers - 1;
+ if ((error= pthread_create(&tid, &thr_attr, test_thread_writer,
+ (void*) param)))
+ {
+ fprintf(stderr, "Got error: %d from pthread_create (errno: %d)\n",
+ error, errno);
+ exit(1);
+ }
+ thread_count++;
+ number_of_writers--;
+ }
+ DBUG_PRINT("info", ("All threads are started"));
+ pthread_mutex_unlock(&LOCK_thread_count);
+
+ pthread_attr_destroy(&thr_attr);
+
+ /* wait finishing */
+ if ((error= pthread_mutex_lock(&LOCK_thread_count)))
+ fprintf(stderr, "LOCK_thread_count: %d from pthread_mutex_lock\n", error);
+ while (thread_count)
+ {
+ if ((error= pthread_cond_wait(&COND_thread_count, &LOCK_thread_count)))
+ fprintf(stderr, "COND_thread_count: %d from pthread_cond_wait\n", error);
+ }
+ if ((error= pthread_mutex_unlock(&LOCK_thread_count)))
+ fprintf(stderr, "LOCK_thread_count: %d from pthread_mutex_unlock\n", error);
+ DBUG_PRINT("info", ("All threads ended"));
+
+ /* Find last LSN and flush up to it (all our log) */
+ {
+ LSN max=
+ {
+ 0, 0
+ };
+ for (i= 0; i < WRITERS; i++)
+ {
+ if (cmp_translog_addr(lsns2[i][ITERATIONS - 1], max) > 0)
+ max= lsns2[i][ITERATIONS - 1];
+ }
+ DBUG_PRINT("info", ("first lsn: (%lu,0x%lx), max lsn: (%lu,0x%lx)",
+ (ulong) first_lsn.file_no,
+ (ulong) first_lsn.rec_offset,
+ (ulong) max.file_no, (ulong) max.rec_offset));
+ translog_flush(&max);
+ }
+
+ rc= 1;
+
+ {
+ uint indeces[WRITERS];
+ uint index, len, stage;
+ bzero(indeces, sizeof(uint) * WRITERS);
+
+ bzero(indeces, sizeof(indeces));
+
+ lsn_ptr= &first_lsn;
+ for (i= 0;; i++)
+ {
+ len= translog_read_next_record_header(lsn_ptr, &rec, 1, &scanner);
+ lsn_ptr= NULL;
+
+ if (len == 0)
+ {
+ fprintf(stderr, "1-%d translog_read_next_record_header failed (%d)\n",
+ i, errno);
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ if (rec.lsn.file_no == CONTROL_FILE_IMPOSSIBLE_FILENO)
+ {
+ if (i != WRITERS * ITERATIONS * 2)
+ {
+ fprintf(stderr, "EOL met at iteration %u instead of %u\n",
+ i, ITERATIONS * WRITERS * 2);
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ break;
+ }
+ index= indeces[rec.short_trid] / 2;
+ stage= indeces[rec.short_trid] % 2;
+ printf("read(%d) thread: %d, iteration %d, stage %d\n",
+ i, (uint) rec.short_trid, index, stage);
+ if (stage == 0)
+ {
+ if (rec.type !=LOGREC_LONG_TRANSACTION_ID ||
+ rec.record_length != 6 ||
+ uint2korr(rec.header) != rec.short_trid ||
+ index != uint4korr(rec.header + 2) ||
+ cmp_translog_addr(lsns1[rec.short_trid][index], rec.lsn) != 0)
+ {
+ fprintf(stderr, "Incorrect LOGREC_LONG_TRANSACTION_ID data read(%d)\n"
+ "type %u, strid %u %u, len %u, i: %u %u, "
+ "lsn(%lu,0x%lx) (%lu,0x%lx)\n",
+ i, (uint) rec.type,
+ (uint) rec.short_trid, (uint) uint2korr(rec.header),
+ (uint) rec.record_length,
+ (uint) index, (uint) uint4korr(rec.header + 2),
+ (ulong) rec.lsn.file_no, (ulong) rec.lsn.rec_offset,
+ (ulong) lsns1[rec.short_trid][index].file_no,
+ (ulong) lsns1[rec.short_trid][index].rec_offset);
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ }
+ else
+ {
+ if (rec.type !=LOGREC_REDO_INSERT_ROW_HEAD ||
+ len != 9 ||
+ rec.record_length != lens[rec.short_trid][index] ||
+ cmp_translog_addr(lsns2[rec.short_trid][index], rec.lsn) != 0 ||
+ check_content(rec.header, len))
+ {
+ fprintf(stderr,
+ "Incorrect LOGREC_REDO_INSERT_ROW_HEAD data read(%d) "
+ " thread: %d, iteration %d, stage %d\n"
+ "type %u (%d), len %u, length %lu %lu (%d) "
+ "lsn(%lu,0x%lx) (%lu,0x%lx)\n",
+ i, (uint) rec.short_trid, index, stage,
+ (uint) rec.type, (rec.type !=LOGREC_REDO_INSERT_ROW_HEAD),
+ (uint) len,
+ (ulong) rec.record_length, lens[rec.short_trid][index],
+ (rec.record_length != lens[rec.short_trid][index]),
+ (ulong) rec.lsn.file_no, (ulong) rec.lsn.rec_offset,
+ (ulong) lsns2[rec.short_trid][index].file_no,
+ (ulong) lsns2[rec.short_trid][index].rec_offset);
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ if (read_and_check_content(&rec, long_buffer, 0))
+ {
+ fprintf(stderr,
+ "Incorrect LOGREC_REDO_INSERT_ROW_HEAD in whole rec read "
+ "lsn(%u,0x%lx)\n",
+ (uint) rec.lsn.file_no, (ulong) rec.lsn.rec_offset);
+ translog_free_record_header(&rec);
+ goto err;
+ }
+ }
+ translog_free_record_header(&rec);
+ indeces[rec.short_trid]++;
+ }
+ }
+
+ rc= 0;
+err:
+ translog_destroy();
+ end_pagecache(&pagecache, 1);
+ ma_control_file_end();
+
+ DBUG_RETURN(test(exit_status() || rc));
+}
diff --git a/storage/maria/unittest/ma_test_loghandler_pagecache-t.c b/storage/maria/unittest/ma_test_loghandler_pagecache-t.c
new file mode 100644
index 00000000000..6771b5f888d
--- /dev/null
+++ b/storage/maria/unittest/ma_test_loghandler_pagecache-t.c
@@ -0,0 +1,140 @@
+#include "../maria_def.h"
+#include <stdio.h>
+#include <errno.h>
+
+#ifndef DBUG_OFF
+static const char *default_dbug_option;
+#endif
+
+#define PCACHE_SIZE (1024*1024*10)
+#define PCACHE_PAGE TRANSLOG_PAGE_SIZE
+#define LOG_FILE_SIZE (1024L*1024L*1024L + 1024L*1024L*512)
+#define LOG_FLAGS 0
+
+static char *first_translog_file= (char*)"maria_log.00000001";
+static char *file1_name= (char*)"page_cache_test_file_1";
+static PAGECACHE_FILE file1;
+
+int main(int argc, char *argv[])
+{
+ uint pagen;
+ uchar long_tr_id[6];
+ PAGECACHE pagecache;
+ LSN lsn;
+ MY_STAT st, *stat;
+
+ MY_INIT(argv[0]);
+
+ bzero(&pagecache, sizeof(pagecache));
+ maria_data_root= ".";
+ /* be sure that we have no logs in the directory*/
+ if (my_stat(CONTROL_FILE_BASE_NAME, &st, MYF(0)))
+ my_delete(CONTROL_FILE_BASE_NAME, MYF(0));
+ if (my_stat(first_translog_file, &st, MYF(0)))
+ my_delete(first_translog_file, MYF(0));
+
+ bzero(long_tr_id, 6);
+#ifndef DBUG_OFF
+#if defined(__WIN__)
+ default_dbug_option= "d:t:i:O,\\ma_test_loghandler_pagecache.trace";
+#else
+ default_dbug_option= "d:t:i:o,/tmp/ma_test_loghandler_pagecache.trace";
+#endif
+ if (argc > 1)
+ {
+ DBUG_SET(default_dbug_option);
+ DBUG_SET_INITIAL(default_dbug_option);
+ }
+#endif
+
+ if (ma_control_file_create_or_open())
+ {
+ fprintf(stderr, "Can't init control file (%d)\n", errno);
+ exit(1);
+ }
+ if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0,
+ PCACHE_PAGE)) == 0)
+ {
+ fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno);
+ exit(1);
+ }
+ if (translog_init(".", LOG_FILE_SIZE, 50112, 0, &pagecache, LOG_FLAGS))
+ {
+ fprintf(stderr, "Can't init loghandler (%d)\n", errno);
+ translog_destroy();
+ exit(1);
+ }
+
+ if ((stat= my_stat(first_translog_file, &st, MYF(0))) == 0)
+ {
+ fprintf(stderr, "There is no %s (%d)\n", first_translog_file, errno);
+ exit(1);
+ }
+ if (st.st_size != TRANSLOG_PAGE_SIZE)
+ {
+ fprintf(stderr,
+ "incorrect initial size of %s: %ld instead of %ld\n",
+ first_translog_file, (long)st.st_size, (long)TRANSLOG_PAGE_SIZE);
+ exit(1);
+ }
+ int4store(long_tr_id, 0);
+ if (translog_write_record(&lsn,
+ LOGREC_LONG_TRANSACTION_ID,
+ 0, NULL, 6, long_tr_id, 0))
+ {
+ fprintf(stderr, "Can't write record #%lu\n", (ulong) 0);
+ translog_destroy();
+ exit(1);
+ }
+
+ if ((file1.file= my_open(file1_name,
+ O_CREAT | O_TRUNC | O_RDWR, MYF(0))) == -1)
+ {
+ fprintf(stderr, "Got error during file1 creation from open() (errno: %d)\n",
+ errno);
+ exit(1);
+ }
+ if (chmod(file1_name, S_IRWXU | S_IRWXG | S_IRWXO) != 0)
+ {
+ fprintf(stderr, "Got error during file1 chmod() (errno: %d)\n",
+ errno);
+ exit(1);
+ }
+
+ {
+ uchar page[PCACHE_PAGE];
+
+ bzero(page, PCACHE_PAGE);
+#define PAGE_LSN_OFFSET 0
+ lsn7store(page + PAGE_LSN_OFFSET, &lsn);
+ pagecache_write(&pagecache, &file1, 0, 3, (char*)page,
+ PAGECACHE_LSN_PAGE,
+ PAGECACHE_LOCK_LEFT_UNLOCKED,
+ PAGECACHE_PIN_LEFT_UNPINNED,
+ PAGECACHE_WRITE_DELAY,
+ 0);
+ flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE);
+ }
+ if ((stat= my_stat(first_translog_file, &st, MYF(0))) == 0)
+ {
+ fprintf(stderr, "can't stat %s (%d)\n", first_translog_file, errno);
+ exit(1);
+ }
+ if (st.st_size != TRANSLOG_PAGE_SIZE * 2)
+ {
+ fprintf(stderr,
+ "incorrect initial size of %s: %ld instead of %ld\n",
+ first_translog_file,
+ (long)st.st_size, (long)(TRANSLOG_PAGE_SIZE * 2));
+ exit(1);
+ }
+
+ translog_destroy();
+ end_pagecache(&pagecache, 1);
+ ma_control_file_end();
+ my_delete(CONTROL_FILE_BASE_NAME, MYF(0));
+ my_delete(first_translog_file, MYF(0));
+ my_delete(file1_name, MYF(0));
+
+ exit(0);
+}
diff --git a/unittest/mysys/mf_pagecache_consist.c b/storage/maria/unittest/mf_pagecache_consist.c
index f7724222a09..9389e5a093c 100755
--- a/unittest/mysys/mf_pagecache_consist.c
+++ b/storage/maria/unittest/mf_pagecache_consist.c
@@ -57,6 +57,29 @@ static uint flush_divider= 1000;
#endif /*TEST_HIGH_CONCURENCY*/
+/*
+ Get pseudo-random length of the field in (0;limit)
+
+ SYNOPSYS
+ get_len()
+ limit limit for generated value
+
+ RETURN
+ length where length >= 0 & length < limit
+*/
+
+static uint get_len(uint limit)
+{
+ uint32 rec_len;
+ do
+ {
+ rec_len= random() /
+ (RAND_MAX / limit);
+ } while (rec_len >= limit || rec_len == 0);
+ return rec_len;
+}
+
+
/* check page consistency */
uint check_page(uchar *buff, ulong offset, int page_locked, int page_no,
int tag)
@@ -70,7 +93,7 @@ uint check_page(uchar *buff, ulong offset, int page_locked, int page_no,
{
uint len= *((uint *)(buff + end));
uint j;
- end+= sizeof(uint)+ sizeof(uint);
+ end+= sizeof(uint) + sizeof(uint);
if (len + end > PAGE_SIZE)
{
diag("incorrect field header #%u by offset %lu\n", i, offset + end + j);
@@ -178,7 +201,7 @@ void reader(int num)
for (i= 0; i < number_of_tests; i++)
{
- uint page= rand()/(RAND_MAX/number_of_pages);
+ uint page= get_len(number_of_pages);
pagecache_read(&pagecache, &file1, page, 3, (char*)buffr,
PAGECACHE_PLAIN_PAGE,
PAGECACHE_LOCK_LEFT_UNLOCKED,
@@ -201,13 +224,13 @@ void writer(int num)
for (i= 0; i < number_of_tests; i++)
{
uint end;
- uint page= rand()/(RAND_MAX/number_of_pages);
+ uint page= get_len(number_of_pages);
pagecache_read(&pagecache, &file1, page, 3, (char*)buffr,
PAGECACHE_PLAIN_PAGE,
PAGECACHE_LOCK_WRITE,
0);
end= check_page(buffr, page * PAGE_SIZE, 1, page, num);
- put_rec(buffr, end, rand()/(RAND_MAX/record_length_limit), num);
+ put_rec(buffr, end, get_len(record_length_limit), num);
pagecache_write(&pagecache, &file1, page, 3, (char*)buffr,
PAGECACHE_PLAIN_PAGE,
PAGECACHE_LOCK_WRITE_UNLOCK,
@@ -348,7 +371,7 @@ int main(int argc, char **argv __attribute__((unused)))
if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0,
- PAGE_SIZE, 0)) == 0)
+ PAGE_SIZE)) == 0)
{
fprintf(stderr,"Got error: init_pagecache() (errno: %d)\n",
errno);
diff --git a/unittest/mysys/mf_pagecache_single.c b/storage/maria/unittest/mf_pagecache_single.c
index 49ecd2986ab..3c4a3642fe9 100644
--- a/unittest/mysys/mf_pagecache_single.c
+++ b/storage/maria/unittest/mf_pagecache_single.c
@@ -346,7 +346,7 @@ int simple_big_test()
unsigned char *buffw= (unsigned char *)malloc(PAGE_SIZE);
unsigned char *buffr= (unsigned char *)malloc(PAGE_SIZE);
struct file_desc *desc=
- (struct file_desc *)malloc((PCACHE_SIZE/(PAGE_SIZE/2)) *
+ (struct file_desc *)malloc((PCACHE_SIZE/(PAGE_SIZE/2) + 1) *
sizeof(struct file_desc));
int res, i;
DBUG_ENTER("simple_big_test");
@@ -363,6 +363,8 @@ int simple_big_test()
PAGECACHE_WRITE_DELAY,
0);
}
+ desc[i].length= 0;
+ desc[i].content= NULL;
ok(1, "Simple big file write");
/* check written pages sequentally read */
for (i= 0; i < PCACHE_SIZE/(PAGE_SIZE/2); i++)
@@ -528,7 +530,7 @@ int main(int argc, char **argv __attribute__((unused)))
plan(12);
if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0,
- PAGE_SIZE, 0)) == 0)
+ PAGE_SIZE)) == 0)
{
fprintf(stderr,"Got error: init_pagecache() (errno: %d)\n",
errno);
diff --git a/unittest/mysys/test_file.c b/storage/maria/unittest/test_file.c
index 758d0bfa81b..758d0bfa81b 100644
--- a/unittest/mysys/test_file.c
+++ b/storage/maria/unittest/test_file.c
diff --git a/unittest/mysys/test_file.h b/storage/maria/unittest/test_file.h
index ea787c123ed..ea787c123ed 100644
--- a/unittest/mysys/test_file.h
+++ b/storage/maria/unittest/test_file.h
diff --git a/unittest/mysys/Makefile.am b/unittest/mysys/Makefile.am
index 9b230272329..a32dbb9b0c3 100644
--- a/unittest/mysys/Makefile.am
+++ b/unittest/mysys/Makefile.am
@@ -7,45 +7,4 @@ LDADD = $(top_builddir)/unittest/mytap/libmytap.a \
$(top_builddir)/dbug/libdbug.a \
$(top_builddir)/strings/libmystrings.a
-noinst_PROGRAMS = bitmap-t base64-t my_atomic-t \
- mf_pagecache_single_1k-t mf_pagecache_single_8k-t \
- mf_pagecache_single_64k-t \
- mf_pagecache_consist_1k-t mf_pagecache_consist_64k-t \
- mf_pagecache_consist_1kHC-t mf_pagecache_consist_64kHC-t \
- mf_pagecache_consist_1kRD-t mf_pagecache_consist_64kRD-t \
- mf_pagecache_consist_1kWR-t mf_pagecache_consist_64kWR-t
-
-# tests for mysys/mf_pagecache.c
-
-mf_pagecache_single_src = mf_pagecache_single.c $(top_srcdir)/mysys/mf_pagecache.c test_file.c
-mf_pagecache_consist_src = mf_pagecache_consist.c $(top_srcdir)/mysys/mf_pagecache.c test_file.c
-mf_pagecache_common_cppflags = -DEXTRA_DEBUG -DPAGECACHE_DEBUG -DMAIN
-
-mf_pagecache_single_1k_t_SOURCES = $(mf_pagecache_single_src)
-mf_pagecache_single_8k_t_SOURCES = $(mf_pagecache_single_src)
-mf_pagecache_single_64k_t_SOURCES = $(mf_pagecache_single_src)
-mf_pagecache_single_1k_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024
-mf_pagecache_single_8k_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=8192
-mf_pagecache_single_64k_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536
-
-mf_pagecache_consist_1k_t_SOURCES = $(mf_pagecache_consist_src)
-mf_pagecache_consist_1k_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024
-mf_pagecache_consist_64k_t_SOURCES = $(mf_pagecache_consist_src)
-mf_pagecache_consist_64k_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536
-
-mf_pagecache_consist_1kHC_t_SOURCES = $(mf_pagecache_consist_src)
-mf_pagecache_consist_1kHC_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 -DTEST_HIGH_CONCURENCY
-mf_pagecache_consist_64kHC_t_SOURCES = $(mf_pagecache_consist_src)
-mf_pagecache_consist_64kHC_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 -DTEST_HIGH_CONCURENCY
-
-mf_pagecache_consist_1kRD_t_SOURCES = $(mf_pagecache_consist_src)
-mf_pagecache_consist_1kRD_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 -DTEST_READERS
-mf_pagecache_consist_64kRD_t_SOURCES = $(mf_pagecache_consist_src)
-mf_pagecache_consist_64kRD_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 -DTEST_READERS
-
-mf_pagecache_consist_1kWR_t_SOURCES = $(mf_pagecache_consist_src)
-mf_pagecache_consist_1kWR_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=1024 -DTEST_WRITERS
-mf_pagecache_consist_64kWR_t_SOURCES = $(mf_pagecache_consist_src)
-mf_pagecache_consist_64kWR_t_CPPFLAGS = $(mf_pagecache_common_cppflags) -DPAGE_SIZE=65536 -DTEST_WRITERS
-
-CLEANFILES = my_pagecache_debug.log page_cache_test_file_1
+noinst_PROGRAMS = bitmap-t base64-t my_atomic-t